1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s 5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s 6; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 7 8define amdgpu_ps i32 @scalar_xnor_i32_one_use(i32 inreg %a, i32 inreg %b) { 9; GCN-LABEL: scalar_xnor_i32_one_use: 10; GCN: ; %bb.0: ; %entry 11; GCN-NEXT: s_xnor_b32 s0, s0, s1 12; GCN-NEXT: ; return to shader part epilog 13; 14; GFX10-LABEL: scalar_xnor_i32_one_use: 15; GFX10: ; %bb.0: ; %entry 16; GFX10-NEXT: s_xnor_b32 s0, s0, s1 17; GFX10-NEXT: ; return to shader part epilog 18entry: 19 %xor = xor i32 %a, %b 20 %r0.val = xor i32 %xor, -1 21 ret i32 %r0.val 22} 23 24; FIXME: fails to match 25define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) { 26; GFX7-LABEL: scalar_xnor_v2i16_one_use: 27; GFX7: ; %bb.0: ; %entry 28; GFX7-NEXT: s_mov_b32 s4, 0xffff 29; GFX7-NEXT: s_lshl_b32 s1, s1, 16 30; GFX7-NEXT: s_and_b32 s0, s0, s4 31; GFX7-NEXT: s_or_b32 s0, s1, s0 32; GFX7-NEXT: s_lshl_b32 s1, s3, 16 33; GFX7-NEXT: s_and_b32 s2, s2, s4 34; GFX7-NEXT: s_or_b32 s1, s1, s2 35; GFX7-NEXT: s_xor_b32 s0, s0, s1 36; GFX7-NEXT: s_xor_b32 s0, s0, -1 37; GFX7-NEXT: ; return to shader part epilog 38; 39; GFX8-LABEL: scalar_xnor_v2i16_one_use: 40; GFX8: ; %bb.0: ; %entry 41; GFX8-NEXT: s_mov_b32 s2, 0xffff 42; GFX8-NEXT: s_xor_b32 s0, s0, s1 43; GFX8-NEXT: s_mov_b32 s3, s2 44; GFX8-NEXT: s_lshr_b32 s1, s0, 16 45; GFX8-NEXT: s_and_b32 s0, s0, s2 46; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 47; GFX8-NEXT: s_lshl_b32 s1, s1, 16 48; GFX8-NEXT: s_and_b32 s0, s0, s2 49; GFX8-NEXT: s_or_b32 s0, s1, s0 50; GFX8-NEXT: ; return to shader part epilog 51; 52; GFX900-LABEL: scalar_xnor_v2i16_one_use: 53; GFX900: ; %bb.0: ; %entry 54; GFX900-NEXT: s_xor_b32 s0, s0, s1 55; GFX900-NEXT: s_xor_b32 s0, s0, -1 56; GFX900-NEXT: ; return to shader part epilog 57; 58; GFX906-LABEL: scalar_xnor_v2i16_one_use: 59; GFX906: ; %bb.0: ; %entry 60; GFX906-NEXT: s_xor_b32 s0, s0, s1 61; GFX906-NEXT: s_xor_b32 s0, s0, -1 62; GFX906-NEXT: ; return to shader part epilog 63; 64; GFX10-LABEL: scalar_xnor_v2i16_one_use: 65; GFX10: ; %bb.0: ; %entry 66; GFX10-NEXT: s_xor_b32 s0, s0, s1 67; GFX10-NEXT: s_xor_b32 s0, s0, -1 68; GFX10-NEXT: ; return to shader part epilog 69entry: 70 %xor = xor <2 x i16> %a, %b 71 %r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1> 72 %cast = bitcast <2 x i16> %r0.val to i32 73 ret i32 %cast 74} 75 76define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) { 77; GCN-LABEL: scalar_xnor_i32_mul_use: 78; GCN: ; %bb.0: ; %entry 79; GCN-NEXT: s_xor_b32 s1, s0, s1 80; GCN-NEXT: s_not_b32 s2, s1 81; GCN-NEXT: s_add_i32 s1, s1, s0 82; GCN-NEXT: s_mov_b32 s0, s2 83; GCN-NEXT: ; return to shader part epilog 84; 85; GFX10-LABEL: scalar_xnor_i32_mul_use: 86; GFX10: ; %bb.0: ; %entry 87; GFX10-NEXT: s_xor_b32 s1, s0, s1 88; GFX10-NEXT: s_not_b32 s2, s1 89; GFX10-NEXT: s_add_i32 s1, s1, s0 90; GFX10-NEXT: s_mov_b32 s0, s2 91; GFX10-NEXT: ; return to shader part epilog 92entry: 93 %xor = xor i32 %a, %b 94 %r0.val = xor i32 %xor, -1 95 %r1.val = add i32 %xor, %a 96 %ins0 = insertelement <2 x i32> undef, i32 %r0.val, i32 0 97 %ins1 = insertelement <2 x i32> %ins0, i32 %r1.val, i32 1 98 ret <2 x i32> %ins1 99} 100 101define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) { 102; GCN-LABEL: scalar_xnor_i64_one_use: 103; GCN: ; %bb.0: 104; GCN-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3] 105; GCN-NEXT: ; return to shader part epilog 106; 107; GFX10-LABEL: scalar_xnor_i64_one_use: 108; GFX10: ; %bb.0: 109; GFX10-NEXT: s_xnor_b64 s[0:1], s[0:1], s[2:3] 110; GFX10-NEXT: ; return to shader part epilog 111 %xor = xor i64 %a, %b 112 %r0.val = xor i64 %xor, -1 113 ret i64 %r0.val 114} 115 116; FIXME: fails to match 117define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) { 118; GFX7-LABEL: scalar_xnor_v4i16_one_use: 119; GFX7: ; %bb.0: 120; GFX7-NEXT: s_mov_b32 s8, 0xffff 121; GFX7-NEXT: s_lshl_b32 s1, s1, 16 122; GFX7-NEXT: s_and_b32 s0, s0, s8 123; GFX7-NEXT: s_or_b32 s0, s1, s0 124; GFX7-NEXT: s_lshl_b32 s1, s3, 16 125; GFX7-NEXT: s_and_b32 s2, s2, s8 126; GFX7-NEXT: s_or_b32 s1, s1, s2 127; GFX7-NEXT: s_lshl_b32 s2, s5, 16 128; GFX7-NEXT: s_and_b32 s3, s4, s8 129; GFX7-NEXT: s_or_b32 s2, s2, s3 130; GFX7-NEXT: s_lshl_b32 s3, s7, 16 131; GFX7-NEXT: s_and_b32 s4, s6, s8 132; GFX7-NEXT: s_or_b32 s3, s3, s4 133; GFX7-NEXT: s_mov_b32 s4, -1 134; GFX7-NEXT: s_mov_b32 s5, s4 135; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 136; GFX7-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 137; GFX7-NEXT: ; return to shader part epilog 138; 139; GFX8-LABEL: scalar_xnor_v4i16_one_use: 140; GFX8: ; %bb.0: 141; GFX8-NEXT: s_mov_b32 s4, 0xffff 142; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 143; GFX8-NEXT: s_mov_b32 s5, s4 144; GFX8-NEXT: s_lshr_b32 s3, s0, 16 145; GFX8-NEXT: s_and_b32 s2, s0, s4 146; GFX8-NEXT: s_lshr_b32 s7, s1, 16 147; GFX8-NEXT: s_and_b32 s6, s1, s4 148; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[4:5] 149; GFX8-NEXT: s_xor_b64 s[2:3], s[6:7], s[4:5] 150; GFX8-NEXT: s_lshl_b32 s1, s1, 16 151; GFX8-NEXT: s_and_b32 s0, s0, s4 152; GFX8-NEXT: s_or_b32 s0, s1, s0 153; GFX8-NEXT: s_lshl_b32 s1, s3, 16 154; GFX8-NEXT: s_and_b32 s2, s2, s4 155; GFX8-NEXT: s_or_b32 s1, s1, s2 156; GFX8-NEXT: ; return to shader part epilog 157; 158; GFX900-LABEL: scalar_xnor_v4i16_one_use: 159; GFX900: ; %bb.0: 160; GFX900-NEXT: s_mov_b32 s4, -1 161; GFX900-NEXT: s_mov_b32 s5, s4 162; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 163; GFX900-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 164; GFX900-NEXT: ; return to shader part epilog 165; 166; GFX906-LABEL: scalar_xnor_v4i16_one_use: 167; GFX906: ; %bb.0: 168; GFX906-NEXT: s_mov_b32 s4, -1 169; GFX906-NEXT: s_mov_b32 s5, s4 170; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 171; GFX906-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 172; GFX906-NEXT: ; return to shader part epilog 173; 174; GFX10-LABEL: scalar_xnor_v4i16_one_use: 175; GFX10: ; %bb.0: 176; GFX10-NEXT: s_mov_b32 s4, -1 177; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 178; GFX10-NEXT: s_mov_b32 s5, s4 179; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5] 180; GFX10-NEXT: ; return to shader part epilog 181 %xor = xor <4 x i16> %a, %b 182 %ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1> 183 %cast = bitcast <4 x i16> %ret to i64 184 ret i64 %cast 185} 186 187define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) { 188; GCN-LABEL: scalar_xnor_i64_mul_use: 189; GCN: ; %bb.0: 190; GCN-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] 191; GCN-NEXT: s_not_b64 s[4:5], s[2:3] 192; GCN-NEXT: s_add_u32 s2, s2, s0 193; GCN-NEXT: s_cselect_b32 s0, 1, 0 194; GCN-NEXT: s_and_b32 s0, s0, 1 195; GCN-NEXT: s_cmp_lg_u32 s0, 0 196; GCN-NEXT: s_addc_u32 s3, s3, s1 197; GCN-NEXT: s_mov_b32 s0, s4 198; GCN-NEXT: s_mov_b32 s1, s5 199; GCN-NEXT: ; return to shader part epilog 200; 201; GFX10-LABEL: scalar_xnor_i64_mul_use: 202; GFX10: ; %bb.0: 203; GFX10-NEXT: s_xor_b64 s[2:3], s[0:1], s[2:3] 204; GFX10-NEXT: s_not_b64 s[4:5], s[2:3] 205; GFX10-NEXT: s_add_u32 s2, s2, s0 206; GFX10-NEXT: s_cselect_b32 s0, 1, 0 207; GFX10-NEXT: s_and_b32 s0, s0, 1 208; GFX10-NEXT: s_cmp_lg_u32 s0, 0 209; GFX10-NEXT: s_mov_b32 s0, s4 210; GFX10-NEXT: s_addc_u32 s3, s3, s1 211; GFX10-NEXT: s_mov_b32 s1, s5 212; GFX10-NEXT: ; return to shader part epilog 213 %xor = xor i64 %a, %b 214 %r0.val = xor i64 %xor, -1 215 %r1.val = add i64 %xor, %a 216 %ins0 = insertelement <2 x i64> undef, i64 %r0.val, i32 0 217 %ins1 = insertelement <2 x i64> %ins0, i64 %r1.val, i32 1 218 ret <2 x i64> %ins1 219} 220 221define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) { 222; GFX7-LABEL: vector_xnor_i32_one_use: 223; GFX7: ; %bb.0: ; %entry 224; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1 226; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 227; GFX7-NEXT: s_setpc_b64 s[30:31] 228; 229; GFX8-LABEL: vector_xnor_i32_one_use: 230; GFX8: ; %bb.0: ; %entry 231; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1 233; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 234; GFX8-NEXT: s_setpc_b64 s[30:31] 235; 236; GFX900-LABEL: vector_xnor_i32_one_use: 237; GFX900: ; %bb.0: ; %entry 238; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1 240; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0 241; GFX900-NEXT: s_setpc_b64 s[30:31] 242; 243; GFX906-LABEL: vector_xnor_i32_one_use: 244; GFX906: ; %bb.0: ; %entry 245; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 246; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1 247; GFX906-NEXT: s_setpc_b64 s[30:31] 248; 249; GFX10-LABEL: vector_xnor_i32_one_use: 250; GFX10: ; %bb.0: ; %entry 251; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 253; GFX10-NEXT: v_xor3_b32 v0, v0, v1, -1 254; GFX10-NEXT: s_setpc_b64 s[30:31] 255entry: 256 %xor = xor i32 %a, %b 257 %r = xor i32 %xor, -1 258 ret i32 %r 259} 260 261define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) { 262; GCN-LABEL: vector_xnor_i64_one_use: 263; GCN: ; %bb.0: ; %entry 264; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 265; GCN-NEXT: v_xor_b32_e32 v0, v0, v2 266; GCN-NEXT: v_xor_b32_e32 v1, v1, v3 267; GCN-NEXT: v_xor_b32_e32 v0, -1, v0 268; GCN-NEXT: v_xor_b32_e32 v1, -1, v1 269; GCN-NEXT: s_setpc_b64 s[30:31] 270; 271; GFX10-LABEL: vector_xnor_i64_one_use: 272; GFX10: ; %bb.0: ; %entry 273; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 275; GFX10-NEXT: v_xor_b32_e32 v0, v0, v2 276; GFX10-NEXT: v_xor_b32_e32 v1, v1, v3 277; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0 278; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1 279; GFX10-NEXT: s_setpc_b64 s[30:31] 280entry: 281 %xor = xor i64 %a, %b 282 %r = xor i64 %xor, -1 283 ret i64 %r 284} 285 286define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) { 287; GFX7-LABEL: xnor_s_v_i32_one_use: 288; GFX7: ; %bb.0: 289; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 290; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 291; GFX7-NEXT: ; return to shader part epilog 292; 293; GFX8-LABEL: xnor_s_v_i32_one_use: 294; GFX8: ; %bb.0: 295; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 296; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 297; GFX8-NEXT: ; return to shader part epilog 298; 299; GFX900-LABEL: xnor_s_v_i32_one_use: 300; GFX900: ; %bb.0: 301; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0 302; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0 303; GFX900-NEXT: ; return to shader part epilog 304; 305; GFX906-LABEL: xnor_s_v_i32_one_use: 306; GFX906: ; %bb.0: 307; GFX906-NEXT: v_xnor_b32_e32 v0, s0, v0 308; GFX906-NEXT: ; return to shader part epilog 309; 310; GFX10-LABEL: xnor_s_v_i32_one_use: 311; GFX10: ; %bb.0: 312; GFX10-NEXT: v_xor3_b32 v0, s0, v0, -1 313; GFX10-NEXT: ; return to shader part epilog 314 %xor = xor i32 %s, %v 315 %d = xor i32 %xor, -1 316 %cast = bitcast i32 %d to float 317 ret float %cast 318} 319 320define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) { 321; GFX7-LABEL: xnor_v_s_i32_one_use: 322; GFX7: ; %bb.0: 323; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 324; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 325; GFX7-NEXT: ; return to shader part epilog 326; 327; GFX8-LABEL: xnor_v_s_i32_one_use: 328; GFX8: ; %bb.0: 329; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 330; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 331; GFX8-NEXT: ; return to shader part epilog 332; 333; GFX900-LABEL: xnor_v_s_i32_one_use: 334; GFX900: ; %bb.0: 335; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0 336; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0 337; GFX900-NEXT: ; return to shader part epilog 338; 339; GFX906-LABEL: xnor_v_s_i32_one_use: 340; GFX906: ; %bb.0: 341; GFX906-NEXT: v_xnor_b32_e64 v0, v0, s0 342; GFX906-NEXT: ; return to shader part epilog 343; 344; GFX10-LABEL: xnor_v_s_i32_one_use: 345; GFX10: ; %bb.0: 346; GFX10-NEXT: v_xor3_b32 v0, v0, s0, -1 347; GFX10-NEXT: ; return to shader part epilog 348 %xor = xor i32 %v, %s 349 %d = xor i32 %xor, -1 350 %cast = bitcast i32 %d to float 351 ret float %cast 352} 353 354define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) { 355; GFX7-LABEL: xnor_i64_s_v_one_use: 356; GFX7: ; %bb.0: ; %entry 357; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29 358; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 359; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 360; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 361; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1 362; GFX7-NEXT: ; return to shader part epilog 363; 364; GFX8-LABEL: xnor_i64_s_v_one_use: 365; GFX8: ; %bb.0: ; %entry 366; GFX8-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 367; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 368; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 369; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 370; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 371; GFX8-NEXT: ; return to shader part epilog 372; 373; GFX900-LABEL: xnor_i64_s_v_one_use: 374; GFX900: ; %bb.0: ; %entry 375; GFX900-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 376; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0 377; GFX900-NEXT: v_xor_b32_e32 v1, s1, v1 378; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0 379; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1 380; GFX900-NEXT: ; return to shader part epilog 381; 382; GFX906-LABEL: xnor_i64_s_v_one_use: 383; GFX906: ; %bb.0: ; %entry 384; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 385; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0 386; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1 387; GFX906-NEXT: v_xor_b32_e32 v0, -1, v0 388; GFX906-NEXT: v_xor_b32_e32 v1, -1, v1 389; GFX906-NEXT: ; return to shader part epilog 390; 391; GFX10-LABEL: xnor_i64_s_v_one_use: 392; GFX10: ; %bb.0: ; %entry 393; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 394; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 395; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 396; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0 397; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1 398; GFX10-NEXT: ; return to shader part epilog 399entry: 400 %b = shl i64 %b64, 29 401 %xor = xor i64 %a, %b 402 %r0.val = xor i64 %xor, -1 403 %cast = bitcast i64 %r0.val to <2 x float> 404 ret <2 x float> %cast 405} 406 407define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) { 408; GFX7-LABEL: xnor_i64_v_s_one_use: 409; GFX7: ; %bb.0: 410; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 29 411; GFX7-NEXT: v_xor_b32_e32 v0, s0, v0 412; GFX7-NEXT: v_xor_b32_e32 v1, s1, v1 413; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 414; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1 415; GFX7-NEXT: ; return to shader part epilog 416; 417; GFX8-LABEL: xnor_i64_v_s_one_use: 418; GFX8: ; %bb.0: 419; GFX8-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 420; GFX8-NEXT: v_xor_b32_e32 v0, s0, v0 421; GFX8-NEXT: v_xor_b32_e32 v1, s1, v1 422; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 423; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 424; GFX8-NEXT: ; return to shader part epilog 425; 426; GFX900-LABEL: xnor_i64_v_s_one_use: 427; GFX900: ; %bb.0: 428; GFX900-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 429; GFX900-NEXT: v_xor_b32_e32 v0, s0, v0 430; GFX900-NEXT: v_xor_b32_e32 v1, s1, v1 431; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0 432; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1 433; GFX900-NEXT: ; return to shader part epilog 434; 435; GFX906-LABEL: xnor_i64_v_s_one_use: 436; GFX906: ; %bb.0: 437; GFX906-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 438; GFX906-NEXT: v_xor_b32_e32 v0, s0, v0 439; GFX906-NEXT: v_xor_b32_e32 v1, s1, v1 440; GFX906-NEXT: v_xor_b32_e32 v0, -1, v0 441; GFX906-NEXT: v_xor_b32_e32 v1, -1, v1 442; GFX906-NEXT: ; return to shader part epilog 443; 444; GFX10-LABEL: xnor_i64_v_s_one_use: 445; GFX10: ; %bb.0: 446; GFX10-NEXT: v_lshlrev_b64 v[0:1], 29, v[0:1] 447; GFX10-NEXT: v_xor_b32_e32 v0, s0, v0 448; GFX10-NEXT: v_xor_b32_e32 v1, s1, v1 449; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0 450; GFX10-NEXT: v_xor_b32_e32 v1, -1, v1 451; GFX10-NEXT: ; return to shader part epilog 452 %b = shl i64 %b64, 29 453 %xor = xor i64 %b, %a 454 %r0.val = xor i64 %xor, -1 455 %cast = bitcast i64 %r0.val to <2 x float> 456 ret <2 x float> %cast 457} 458 459define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) { 460; GFX7-LABEL: vector_xor_na_b_i32_one_use: 461; GFX7: ; %bb.0: ; %entry 462; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX7-NEXT: v_xor_b32_e32 v0, -1, v0 464; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1 465; GFX7-NEXT: s_setpc_b64 s[30:31] 466; 467; GFX8-LABEL: vector_xor_na_b_i32_one_use: 468; GFX8: ; %bb.0: ; %entry 469; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 470; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 471; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1 472; GFX8-NEXT: s_setpc_b64 s[30:31] 473; 474; GFX900-LABEL: vector_xor_na_b_i32_one_use: 475; GFX900: ; %bb.0: ; %entry 476; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX900-NEXT: v_xor_b32_e32 v0, -1, v0 478; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1 479; GFX900-NEXT: s_setpc_b64 s[30:31] 480; 481; GFX906-LABEL: vector_xor_na_b_i32_one_use: 482; GFX906: ; %bb.0: ; %entry 483; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 484; GFX906-NEXT: v_xnor_b32_e32 v0, v0, v1 485; GFX906-NEXT: s_setpc_b64 s[30:31] 486; 487; GFX10-LABEL: vector_xor_na_b_i32_one_use: 488; GFX10: ; %bb.0: ; %entry 489; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 490; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 491; GFX10-NEXT: v_xor3_b32 v0, v0, -1, v1 492; GFX10-NEXT: s_setpc_b64 s[30:31] 493entry: 494 %na = xor i32 %a, -1 495 %r = xor i32 %na, %b 496 ret i32 %r 497} 498 499define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) { 500; GFX7-LABEL: vector_xor_a_nb_i32_one_use: 501; GFX7: ; %bb.0: ; %entry 502; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 503; GFX7-NEXT: v_xor_b32_e32 v1, -1, v1 504; GFX7-NEXT: v_xor_b32_e32 v0, v0, v1 505; GFX7-NEXT: s_setpc_b64 s[30:31] 506; 507; GFX8-LABEL: vector_xor_a_nb_i32_one_use: 508; GFX8: ; %bb.0: ; %entry 509; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 511; GFX8-NEXT: v_xor_b32_e32 v0, v0, v1 512; GFX8-NEXT: s_setpc_b64 s[30:31] 513; 514; GFX900-LABEL: vector_xor_a_nb_i32_one_use: 515; GFX900: ; %bb.0: ; %entry 516; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX900-NEXT: v_xor_b32_e32 v1, -1, v1 518; GFX900-NEXT: v_xor_b32_e32 v0, v0, v1 519; GFX900-NEXT: s_setpc_b64 s[30:31] 520; 521; GFX906-LABEL: vector_xor_a_nb_i32_one_use: 522; GFX906: ; %bb.0: ; %entry 523; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 524; GFX906-NEXT: v_xnor_b32_e32 v0, v1, v0 525; GFX906-NEXT: s_setpc_b64 s[30:31] 526; 527; GFX10-LABEL: vector_xor_a_nb_i32_one_use: 528; GFX10: ; %bb.0: ; %entry 529; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 531; GFX10-NEXT: v_xor3_b32 v0, v1, -1, v0 532; GFX10-NEXT: s_setpc_b64 s[30:31] 533entry: 534 %nb = xor i32 %b, -1 535 %r = xor i32 %a, %nb 536 ret i32 %r 537} 538 539define amdgpu_ps <2 x i32> @scalar_xor_a_nb_i64_one_use(i64 inreg %a, i64 inreg %b) { 540; GCN-LABEL: scalar_xor_a_nb_i64_one_use: 541; GCN: ; %bb.0: ; %entry 542; GCN-NEXT: s_not_b64 s[2:3], s[2:3] 543; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 544; GCN-NEXT: ; return to shader part epilog 545; 546; GFX10-LABEL: scalar_xor_a_nb_i64_one_use: 547; GFX10: ; %bb.0: ; %entry 548; GFX10-NEXT: s_not_b64 s[2:3], s[2:3] 549; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 550; GFX10-NEXT: ; return to shader part epilog 551entry: 552 %nb = xor i64 %b, -1 553 %r0.val = xor i64 %a, %nb 554 %cast = bitcast i64 %r0.val to <2 x i32> 555 ret <2 x i32> %cast 556} 557 558define amdgpu_ps <2 x i32> @scalar_xor_na_b_i64_one_use(i64 inreg %a, i64 inreg %b) { 559; GCN-LABEL: scalar_xor_na_b_i64_one_use: 560; GCN: ; %bb.0: ; %entry 561; GCN-NEXT: s_not_b64 s[0:1], s[0:1] 562; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 563; GCN-NEXT: ; return to shader part epilog 564; 565; GFX10-LABEL: scalar_xor_na_b_i64_one_use: 566; GFX10: ; %bb.0: ; %entry 567; GFX10-NEXT: s_not_b64 s[0:1], s[0:1] 568; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] 569; GFX10-NEXT: ; return to shader part epilog 570entry: 571 %na = xor i64 %a, -1 572 %r0.val = xor i64 %na, %b 573 %cast = bitcast i64 %r0.val to <2 x i32> 574 ret <2 x i32> %cast 575} 576