1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX7 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX900 %s
5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX906 %s
6; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
7
8define amdgpu_ps i32 @scalar_xnor_i32_one_use(i32 inreg %a, i32 inreg %b) {
9; GCN-LABEL: scalar_xnor_i32_one_use:
10; GCN:       ; %bb.0: ; %entry
11; GCN-NEXT:    s_xnor_b32 s0, s0, s1
12; GCN-NEXT:    ; return to shader part epilog
13;
14; GFX10-LABEL: scalar_xnor_i32_one_use:
15; GFX10:       ; %bb.0: ; %entry
16; GFX10-NEXT:    s_xnor_b32 s0, s0, s1
17; GFX10-NEXT:    ; return to shader part epilog
18entry:
19  %xor = xor i32 %a, %b
20  %r0.val = xor i32 %xor, -1
21  ret i32 %r0.val
22}
23
24; FIXME: fails to match
25define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) {
26; GFX7-LABEL: scalar_xnor_v2i16_one_use:
27; GFX7:       ; %bb.0: ; %entry
28; GFX7-NEXT:    s_mov_b32 s4, 0xffff
29; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
30; GFX7-NEXT:    s_and_b32 s0, s0, s4
31; GFX7-NEXT:    s_or_b32 s0, s1, s0
32; GFX7-NEXT:    s_lshl_b32 s1, s3, 16
33; GFX7-NEXT:    s_and_b32 s2, s2, s4
34; GFX7-NEXT:    s_or_b32 s1, s1, s2
35; GFX7-NEXT:    s_xor_b32 s0, s0, s1
36; GFX7-NEXT:    s_xor_b32 s0, s0, -1
37; GFX7-NEXT:    ; return to shader part epilog
38;
39; GFX8-LABEL: scalar_xnor_v2i16_one_use:
40; GFX8:       ; %bb.0: ; %entry
41; GFX8-NEXT:    s_mov_b32 s2, 0xffff
42; GFX8-NEXT:    s_xor_b32 s0, s0, s1
43; GFX8-NEXT:    s_mov_b32 s3, s2
44; GFX8-NEXT:    s_lshr_b32 s1, s0, 16
45; GFX8-NEXT:    s_and_b32 s0, s0, s2
46; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
47; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
48; GFX8-NEXT:    s_and_b32 s0, s0, s2
49; GFX8-NEXT:    s_or_b32 s0, s1, s0
50; GFX8-NEXT:    ; return to shader part epilog
51;
52; GFX900-LABEL: scalar_xnor_v2i16_one_use:
53; GFX900:       ; %bb.0: ; %entry
54; GFX900-NEXT:    s_xor_b32 s0, s0, s1
55; GFX900-NEXT:    s_xor_b32 s0, s0, -1
56; GFX900-NEXT:    ; return to shader part epilog
57;
58; GFX906-LABEL: scalar_xnor_v2i16_one_use:
59; GFX906:       ; %bb.0: ; %entry
60; GFX906-NEXT:    s_xor_b32 s0, s0, s1
61; GFX906-NEXT:    s_xor_b32 s0, s0, -1
62; GFX906-NEXT:    ; return to shader part epilog
63;
64; GFX10-LABEL: scalar_xnor_v2i16_one_use:
65; GFX10:       ; %bb.0: ; %entry
66; GFX10-NEXT:    s_xor_b32 s0, s0, s1
67; GFX10-NEXT:    s_xor_b32 s0, s0, -1
68; GFX10-NEXT:    ; return to shader part epilog
69entry:
70  %xor = xor <2 x i16> %a, %b
71  %r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1>
72  %cast = bitcast <2 x i16> %r0.val to i32
73  ret i32 %cast
74}
75
76define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) {
77; GCN-LABEL: scalar_xnor_i32_mul_use:
78; GCN:       ; %bb.0: ; %entry
79; GCN-NEXT:    s_xor_b32 s1, s0, s1
80; GCN-NEXT:    s_not_b32 s2, s1
81; GCN-NEXT:    s_add_i32 s1, s1, s0
82; GCN-NEXT:    s_mov_b32 s0, s2
83; GCN-NEXT:    ; return to shader part epilog
84;
85; GFX10-LABEL: scalar_xnor_i32_mul_use:
86; GFX10:       ; %bb.0: ; %entry
87; GFX10-NEXT:    s_xor_b32 s1, s0, s1
88; GFX10-NEXT:    s_not_b32 s2, s1
89; GFX10-NEXT:    s_add_i32 s1, s1, s0
90; GFX10-NEXT:    s_mov_b32 s0, s2
91; GFX10-NEXT:    ; return to shader part epilog
92entry:
93  %xor = xor i32 %a, %b
94  %r0.val = xor i32 %xor, -1
95  %r1.val = add i32 %xor, %a
96  %ins0 = insertelement <2 x i32> undef, i32 %r0.val, i32 0
97  %ins1 = insertelement <2 x i32> %ins0, i32 %r1.val, i32 1
98  ret <2 x i32> %ins1
99}
100
101define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) {
102; GCN-LABEL: scalar_xnor_i64_one_use:
103; GCN:       ; %bb.0:
104; GCN-NEXT:    s_xnor_b64 s[0:1], s[0:1], s[2:3]
105; GCN-NEXT:    ; return to shader part epilog
106;
107; GFX10-LABEL: scalar_xnor_i64_one_use:
108; GFX10:       ; %bb.0:
109; GFX10-NEXT:    s_xnor_b64 s[0:1], s[0:1], s[2:3]
110; GFX10-NEXT:    ; return to shader part epilog
111  %xor = xor i64 %a, %b
112  %r0.val = xor i64 %xor, -1
113  ret i64 %r0.val
114}
115
116; FIXME: fails to match
117define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) {
118; GFX7-LABEL: scalar_xnor_v4i16_one_use:
119; GFX7:       ; %bb.0:
120; GFX7-NEXT:    s_mov_b32 s8, 0xffff
121; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
122; GFX7-NEXT:    s_and_b32 s0, s0, s8
123; GFX7-NEXT:    s_or_b32 s0, s1, s0
124; GFX7-NEXT:    s_lshl_b32 s1, s3, 16
125; GFX7-NEXT:    s_and_b32 s2, s2, s8
126; GFX7-NEXT:    s_or_b32 s1, s1, s2
127; GFX7-NEXT:    s_lshl_b32 s2, s5, 16
128; GFX7-NEXT:    s_and_b32 s3, s4, s8
129; GFX7-NEXT:    s_or_b32 s2, s2, s3
130; GFX7-NEXT:    s_lshl_b32 s3, s7, 16
131; GFX7-NEXT:    s_and_b32 s4, s6, s8
132; GFX7-NEXT:    s_or_b32 s3, s3, s4
133; GFX7-NEXT:    s_mov_b32 s4, -1
134; GFX7-NEXT:    s_mov_b32 s5, s4
135; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
136; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
137; GFX7-NEXT:    ; return to shader part epilog
138;
139; GFX8-LABEL: scalar_xnor_v4i16_one_use:
140; GFX8:       ; %bb.0:
141; GFX8-NEXT:    s_mov_b32 s4, 0xffff
142; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
143; GFX8-NEXT:    s_mov_b32 s5, s4
144; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
145; GFX8-NEXT:    s_and_b32 s2, s0, s4
146; GFX8-NEXT:    s_lshr_b32 s7, s1, 16
147; GFX8-NEXT:    s_and_b32 s6, s1, s4
148; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[4:5]
149; GFX8-NEXT:    s_xor_b64 s[2:3], s[6:7], s[4:5]
150; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
151; GFX8-NEXT:    s_and_b32 s0, s0, s4
152; GFX8-NEXT:    s_or_b32 s0, s1, s0
153; GFX8-NEXT:    s_lshl_b32 s1, s3, 16
154; GFX8-NEXT:    s_and_b32 s2, s2, s4
155; GFX8-NEXT:    s_or_b32 s1, s1, s2
156; GFX8-NEXT:    ; return to shader part epilog
157;
158; GFX900-LABEL: scalar_xnor_v4i16_one_use:
159; GFX900:       ; %bb.0:
160; GFX900-NEXT:    s_mov_b32 s4, -1
161; GFX900-NEXT:    s_mov_b32 s5, s4
162; GFX900-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
163; GFX900-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
164; GFX900-NEXT:    ; return to shader part epilog
165;
166; GFX906-LABEL: scalar_xnor_v4i16_one_use:
167; GFX906:       ; %bb.0:
168; GFX906-NEXT:    s_mov_b32 s4, -1
169; GFX906-NEXT:    s_mov_b32 s5, s4
170; GFX906-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
171; GFX906-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
172; GFX906-NEXT:    ; return to shader part epilog
173;
174; GFX10-LABEL: scalar_xnor_v4i16_one_use:
175; GFX10:       ; %bb.0:
176; GFX10-NEXT:    s_mov_b32 s4, -1
177; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
178; GFX10-NEXT:    s_mov_b32 s5, s4
179; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
180; GFX10-NEXT:    ; return to shader part epilog
181  %xor = xor <4 x i16> %a, %b
182  %ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1>
183  %cast = bitcast <4 x i16> %ret to i64
184  ret i64 %cast
185}
186
187define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) {
188; GCN-LABEL: scalar_xnor_i64_mul_use:
189; GCN:       ; %bb.0:
190; GCN-NEXT:    s_xor_b64 s[2:3], s[0:1], s[2:3]
191; GCN-NEXT:    s_not_b64 s[4:5], s[2:3]
192; GCN-NEXT:    s_add_u32 s2, s2, s0
193; GCN-NEXT:    s_cselect_b32 s0, 1, 0
194; GCN-NEXT:    s_and_b32 s0, s0, 1
195; GCN-NEXT:    s_cmp_lg_u32 s0, 0
196; GCN-NEXT:    s_addc_u32 s3, s3, s1
197; GCN-NEXT:    s_mov_b32 s0, s4
198; GCN-NEXT:    s_mov_b32 s1, s5
199; GCN-NEXT:    ; return to shader part epilog
200;
201; GFX10-LABEL: scalar_xnor_i64_mul_use:
202; GFX10:       ; %bb.0:
203; GFX10-NEXT:    s_xor_b64 s[2:3], s[0:1], s[2:3]
204; GFX10-NEXT:    s_not_b64 s[4:5], s[2:3]
205; GFX10-NEXT:    s_add_u32 s2, s2, s0
206; GFX10-NEXT:    s_cselect_b32 s0, 1, 0
207; GFX10-NEXT:    s_and_b32 s0, s0, 1
208; GFX10-NEXT:    s_cmp_lg_u32 s0, 0
209; GFX10-NEXT:    s_mov_b32 s0, s4
210; GFX10-NEXT:    s_addc_u32 s3, s3, s1
211; GFX10-NEXT:    s_mov_b32 s1, s5
212; GFX10-NEXT:    ; return to shader part epilog
213  %xor = xor i64 %a, %b
214  %r0.val = xor i64 %xor, -1
215  %r1.val = add i64 %xor, %a
216  %ins0 = insertelement <2 x i64> undef, i64 %r0.val, i32 0
217  %ins1 = insertelement <2 x i64> %ins0, i64 %r1.val, i32 1
218  ret <2 x i64> %ins1
219}
220
221define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
222; GFX7-LABEL: vector_xnor_i32_one_use:
223; GFX7:       ; %bb.0: ; %entry
224; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v1
226; GFX7-NEXT:    v_xor_b32_e32 v0, -1, v0
227; GFX7-NEXT:    s_setpc_b64 s[30:31]
228;
229; GFX8-LABEL: vector_xnor_i32_one_use:
230; GFX8:       ; %bb.0: ; %entry
231; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v1
233; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
234; GFX8-NEXT:    s_setpc_b64 s[30:31]
235;
236; GFX900-LABEL: vector_xnor_i32_one_use:
237; GFX900:       ; %bb.0: ; %entry
238; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239; GFX900-NEXT:    v_xor_b32_e32 v0, v0, v1
240; GFX900-NEXT:    v_xor_b32_e32 v0, -1, v0
241; GFX900-NEXT:    s_setpc_b64 s[30:31]
242;
243; GFX906-LABEL: vector_xnor_i32_one_use:
244; GFX906:       ; %bb.0: ; %entry
245; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246; GFX906-NEXT:    v_xnor_b32_e32 v0, v0, v1
247; GFX906-NEXT:    s_setpc_b64 s[30:31]
248;
249; GFX10-LABEL: vector_xnor_i32_one_use:
250; GFX10:       ; %bb.0: ; %entry
251; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
253; GFX10-NEXT:    v_xor3_b32 v0, v0, v1, -1
254; GFX10-NEXT:    s_setpc_b64 s[30:31]
255entry:
256  %xor = xor i32 %a, %b
257  %r = xor i32 %xor, -1
258  ret i32 %r
259}
260
261define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
262; GCN-LABEL: vector_xnor_i64_one_use:
263; GCN:       ; %bb.0: ; %entry
264; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265; GCN-NEXT:    v_xor_b32_e32 v0, v0, v2
266; GCN-NEXT:    v_xor_b32_e32 v1, v1, v3
267; GCN-NEXT:    v_xor_b32_e32 v0, -1, v0
268; GCN-NEXT:    v_xor_b32_e32 v1, -1, v1
269; GCN-NEXT:    s_setpc_b64 s[30:31]
270;
271; GFX10-LABEL: vector_xnor_i64_one_use:
272; GFX10:       ; %bb.0: ; %entry
273; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
275; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
276; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
277; GFX10-NEXT:    v_xor_b32_e32 v0, -1, v0
278; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v1
279; GFX10-NEXT:    s_setpc_b64 s[30:31]
280entry:
281  %xor = xor i64 %a, %b
282  %r = xor i64 %xor, -1
283  ret i64 %r
284}
285
286define amdgpu_ps float @xnor_s_v_i32_one_use(i32 inreg %s, i32 %v) {
287; GFX7-LABEL: xnor_s_v_i32_one_use:
288; GFX7:       ; %bb.0:
289; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
290; GFX7-NEXT:    v_xor_b32_e32 v0, -1, v0
291; GFX7-NEXT:    ; return to shader part epilog
292;
293; GFX8-LABEL: xnor_s_v_i32_one_use:
294; GFX8:       ; %bb.0:
295; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
296; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
297; GFX8-NEXT:    ; return to shader part epilog
298;
299; GFX900-LABEL: xnor_s_v_i32_one_use:
300; GFX900:       ; %bb.0:
301; GFX900-NEXT:    v_xor_b32_e32 v0, s0, v0
302; GFX900-NEXT:    v_xor_b32_e32 v0, -1, v0
303; GFX900-NEXT:    ; return to shader part epilog
304;
305; GFX906-LABEL: xnor_s_v_i32_one_use:
306; GFX906:       ; %bb.0:
307; GFX906-NEXT:    v_xnor_b32_e32 v0, s0, v0
308; GFX906-NEXT:    ; return to shader part epilog
309;
310; GFX10-LABEL: xnor_s_v_i32_one_use:
311; GFX10:       ; %bb.0:
312; GFX10-NEXT:    v_xor3_b32 v0, s0, v0, -1
313; GFX10-NEXT:    ; return to shader part epilog
314  %xor = xor i32 %s, %v
315  %d = xor i32 %xor, -1
316  %cast = bitcast i32 %d to float
317  ret float %cast
318}
319
320define amdgpu_ps float @xnor_v_s_i32_one_use(i32 inreg %s, i32 %v) {
321; GFX7-LABEL: xnor_v_s_i32_one_use:
322; GFX7:       ; %bb.0:
323; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
324; GFX7-NEXT:    v_xor_b32_e32 v0, -1, v0
325; GFX7-NEXT:    ; return to shader part epilog
326;
327; GFX8-LABEL: xnor_v_s_i32_one_use:
328; GFX8:       ; %bb.0:
329; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
330; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
331; GFX8-NEXT:    ; return to shader part epilog
332;
333; GFX900-LABEL: xnor_v_s_i32_one_use:
334; GFX900:       ; %bb.0:
335; GFX900-NEXT:    v_xor_b32_e32 v0, s0, v0
336; GFX900-NEXT:    v_xor_b32_e32 v0, -1, v0
337; GFX900-NEXT:    ; return to shader part epilog
338;
339; GFX906-LABEL: xnor_v_s_i32_one_use:
340; GFX906:       ; %bb.0:
341; GFX906-NEXT:    v_xnor_b32_e64 v0, v0, s0
342; GFX906-NEXT:    ; return to shader part epilog
343;
344; GFX10-LABEL: xnor_v_s_i32_one_use:
345; GFX10:       ; %bb.0:
346; GFX10-NEXT:    v_xor3_b32 v0, v0, s0, -1
347; GFX10-NEXT:    ; return to shader part epilog
348  %xor = xor i32 %v, %s
349  %d = xor i32 %xor, -1
350  %cast = bitcast i32 %d to float
351  ret float %cast
352}
353
354define amdgpu_ps <2 x float> @xnor_i64_s_v_one_use(i64 inreg %a, i64 %b64) {
355; GFX7-LABEL: xnor_i64_s_v_one_use:
356; GFX7:       ; %bb.0: ; %entry
357; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 29
358; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
359; GFX7-NEXT:    v_xor_b32_e32 v1, s1, v1
360; GFX7-NEXT:    v_xor_b32_e32 v0, -1, v0
361; GFX7-NEXT:    v_xor_b32_e32 v1, -1, v1
362; GFX7-NEXT:    ; return to shader part epilog
363;
364; GFX8-LABEL: xnor_i64_s_v_one_use:
365; GFX8:       ; %bb.0: ; %entry
366; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
367; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
368; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v1
369; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
370; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v1
371; GFX8-NEXT:    ; return to shader part epilog
372;
373; GFX900-LABEL: xnor_i64_s_v_one_use:
374; GFX900:       ; %bb.0: ; %entry
375; GFX900-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
376; GFX900-NEXT:    v_xor_b32_e32 v0, s0, v0
377; GFX900-NEXT:    v_xor_b32_e32 v1, s1, v1
378; GFX900-NEXT:    v_xor_b32_e32 v0, -1, v0
379; GFX900-NEXT:    v_xor_b32_e32 v1, -1, v1
380; GFX900-NEXT:    ; return to shader part epilog
381;
382; GFX906-LABEL: xnor_i64_s_v_one_use:
383; GFX906:       ; %bb.0: ; %entry
384; GFX906-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
385; GFX906-NEXT:    v_xor_b32_e32 v0, s0, v0
386; GFX906-NEXT:    v_xor_b32_e32 v1, s1, v1
387; GFX906-NEXT:    v_xor_b32_e32 v0, -1, v0
388; GFX906-NEXT:    v_xor_b32_e32 v1, -1, v1
389; GFX906-NEXT:    ; return to shader part epilog
390;
391; GFX10-LABEL: xnor_i64_s_v_one_use:
392; GFX10:       ; %bb.0: ; %entry
393; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
394; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
395; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
396; GFX10-NEXT:    v_xor_b32_e32 v0, -1, v0
397; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v1
398; GFX10-NEXT:    ; return to shader part epilog
399entry:
400  %b = shl i64 %b64, 29
401  %xor = xor i64 %a, %b
402  %r0.val = xor i64 %xor, -1
403  %cast = bitcast i64 %r0.val to <2 x float>
404  ret <2 x float> %cast
405}
406
407define amdgpu_ps <2 x float> @xnor_i64_v_s_one_use(i64 inreg %a, i64 %b64) {
408; GFX7-LABEL: xnor_i64_v_s_one_use:
409; GFX7:       ; %bb.0:
410; GFX7-NEXT:    v_lshl_b64 v[0:1], v[0:1], 29
411; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
412; GFX7-NEXT:    v_xor_b32_e32 v1, s1, v1
413; GFX7-NEXT:    v_xor_b32_e32 v0, -1, v0
414; GFX7-NEXT:    v_xor_b32_e32 v1, -1, v1
415; GFX7-NEXT:    ; return to shader part epilog
416;
417; GFX8-LABEL: xnor_i64_v_s_one_use:
418; GFX8:       ; %bb.0:
419; GFX8-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
420; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
421; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v1
422; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
423; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v1
424; GFX8-NEXT:    ; return to shader part epilog
425;
426; GFX900-LABEL: xnor_i64_v_s_one_use:
427; GFX900:       ; %bb.0:
428; GFX900-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
429; GFX900-NEXT:    v_xor_b32_e32 v0, s0, v0
430; GFX900-NEXT:    v_xor_b32_e32 v1, s1, v1
431; GFX900-NEXT:    v_xor_b32_e32 v0, -1, v0
432; GFX900-NEXT:    v_xor_b32_e32 v1, -1, v1
433; GFX900-NEXT:    ; return to shader part epilog
434;
435; GFX906-LABEL: xnor_i64_v_s_one_use:
436; GFX906:       ; %bb.0:
437; GFX906-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
438; GFX906-NEXT:    v_xor_b32_e32 v0, s0, v0
439; GFX906-NEXT:    v_xor_b32_e32 v1, s1, v1
440; GFX906-NEXT:    v_xor_b32_e32 v0, -1, v0
441; GFX906-NEXT:    v_xor_b32_e32 v1, -1, v1
442; GFX906-NEXT:    ; return to shader part epilog
443;
444; GFX10-LABEL: xnor_i64_v_s_one_use:
445; GFX10:       ; %bb.0:
446; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 29, v[0:1]
447; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
448; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
449; GFX10-NEXT:    v_xor_b32_e32 v0, -1, v0
450; GFX10-NEXT:    v_xor_b32_e32 v1, -1, v1
451; GFX10-NEXT:    ; return to shader part epilog
452  %b = shl i64 %b64, 29
453  %xor = xor i64 %b, %a
454  %r0.val = xor i64 %xor, -1
455  %cast = bitcast i64 %r0.val to <2 x float>
456  ret <2 x float> %cast
457}
458
459define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
460; GFX7-LABEL: vector_xor_na_b_i32_one_use:
461; GFX7:       ; %bb.0: ; %entry
462; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463; GFX7-NEXT:    v_xor_b32_e32 v0, -1, v0
464; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v1
465; GFX7-NEXT:    s_setpc_b64 s[30:31]
466;
467; GFX8-LABEL: vector_xor_na_b_i32_one_use:
468; GFX8:       ; %bb.0: ; %entry
469; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470; GFX8-NEXT:    v_xor_b32_e32 v0, -1, v0
471; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v1
472; GFX8-NEXT:    s_setpc_b64 s[30:31]
473;
474; GFX900-LABEL: vector_xor_na_b_i32_one_use:
475; GFX900:       ; %bb.0: ; %entry
476; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477; GFX900-NEXT:    v_xor_b32_e32 v0, -1, v0
478; GFX900-NEXT:    v_xor_b32_e32 v0, v0, v1
479; GFX900-NEXT:    s_setpc_b64 s[30:31]
480;
481; GFX906-LABEL: vector_xor_na_b_i32_one_use:
482; GFX906:       ; %bb.0: ; %entry
483; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484; GFX906-NEXT:    v_xnor_b32_e32 v0, v0, v1
485; GFX906-NEXT:    s_setpc_b64 s[30:31]
486;
487; GFX10-LABEL: vector_xor_na_b_i32_one_use:
488; GFX10:       ; %bb.0: ; %entry
489; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
491; GFX10-NEXT:    v_xor3_b32 v0, v0, -1, v1
492; GFX10-NEXT:    s_setpc_b64 s[30:31]
493entry:
494  %na = xor i32 %a, -1
495  %r = xor i32 %na, %b
496  ret i32 %r
497}
498
499define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
500; GFX7-LABEL: vector_xor_a_nb_i32_one_use:
501; GFX7:       ; %bb.0: ; %entry
502; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503; GFX7-NEXT:    v_xor_b32_e32 v1, -1, v1
504; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v1
505; GFX7-NEXT:    s_setpc_b64 s[30:31]
506;
507; GFX8-LABEL: vector_xor_a_nb_i32_one_use:
508; GFX8:       ; %bb.0: ; %entry
509; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510; GFX8-NEXT:    v_xor_b32_e32 v1, -1, v1
511; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v1
512; GFX8-NEXT:    s_setpc_b64 s[30:31]
513;
514; GFX900-LABEL: vector_xor_a_nb_i32_one_use:
515; GFX900:       ; %bb.0: ; %entry
516; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517; GFX900-NEXT:    v_xor_b32_e32 v1, -1, v1
518; GFX900-NEXT:    v_xor_b32_e32 v0, v0, v1
519; GFX900-NEXT:    s_setpc_b64 s[30:31]
520;
521; GFX906-LABEL: vector_xor_a_nb_i32_one_use:
522; GFX906:       ; %bb.0: ; %entry
523; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524; GFX906-NEXT:    v_xnor_b32_e32 v0, v1, v0
525; GFX906-NEXT:    s_setpc_b64 s[30:31]
526;
527; GFX10-LABEL: vector_xor_a_nb_i32_one_use:
528; GFX10:       ; %bb.0: ; %entry
529; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
531; GFX10-NEXT:    v_xor3_b32 v0, v1, -1, v0
532; GFX10-NEXT:    s_setpc_b64 s[30:31]
533entry:
534  %nb = xor i32 %b, -1
535  %r = xor i32 %a, %nb
536  ret i32 %r
537}
538
539define amdgpu_ps <2 x i32> @scalar_xor_a_nb_i64_one_use(i64 inreg %a, i64 inreg %b) {
540; GCN-LABEL: scalar_xor_a_nb_i64_one_use:
541; GCN:       ; %bb.0: ; %entry
542; GCN-NEXT:    s_not_b64 s[2:3], s[2:3]
543; GCN-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
544; GCN-NEXT:    ; return to shader part epilog
545;
546; GFX10-LABEL: scalar_xor_a_nb_i64_one_use:
547; GFX10:       ; %bb.0: ; %entry
548; GFX10-NEXT:    s_not_b64 s[2:3], s[2:3]
549; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
550; GFX10-NEXT:    ; return to shader part epilog
551entry:
552  %nb = xor i64 %b, -1
553  %r0.val = xor i64 %a, %nb
554  %cast = bitcast i64 %r0.val to <2 x i32>
555  ret <2 x i32> %cast
556}
557
558define amdgpu_ps <2 x i32> @scalar_xor_na_b_i64_one_use(i64 inreg %a, i64 inreg %b) {
559; GCN-LABEL: scalar_xor_na_b_i64_one_use:
560; GCN:       ; %bb.0: ; %entry
561; GCN-NEXT:    s_not_b64 s[0:1], s[0:1]
562; GCN-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
563; GCN-NEXT:    ; return to shader part epilog
564;
565; GFX10-LABEL: scalar_xor_na_b_i64_one_use:
566; GFX10:       ; %bb.0: ; %entry
567; GFX10-NEXT:    s_not_b64 s[0:1], s[0:1]
568; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
569; GFX10-NEXT:    ; return to shader part epilog
570entry:
571  %na = xor i64 %a, -1
572  %r0.val = xor i64 %na, %b
573  %cast = bitcast i64 %r0.val to <2 x i32>
574  ret <2 x i32> %cast
575}
576