1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,SSE2
3; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,SSE41
4
5define i32 @test_eq_1(<4 x i32> %A, <4 x i32> %B) {
6; SSE2-LABEL: test_eq_1:
7; SSE2:       # %bb.0:
8; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
9; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
10; SSE2-NEXT:    movd %xmm0, %eax
11; SSE2-NEXT:    notl %eax
12; SSE2-NEXT:    retq
13;
14; SSE41-LABEL: test_eq_1:
15; SSE41:       # %bb.0:
16; SSE41-NEXT:    pcmpgtd %xmm0, %xmm1
17; SSE41-NEXT:    pextrd $1, %xmm1, %eax
18; SSE41-NEXT:    notl %eax
19; SSE41-NEXT:    retq
20  %cmp = icmp slt <4 x i32> %A, %B
21  %sext = sext <4 x i1> %cmp to <4 x i32>
22  %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer
23  %t0 = extractelement <4 x i1> %cmp1, i32 1
24  %t1 = sext i1 %t0 to i32
25  ret i32 %t1
26}
27
28define i32 @test_ne_1(<4 x i32> %A, <4 x i32> %B) {
29; SSE2-LABEL: test_ne_1:
30; SSE2:       # %bb.0:
31; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
32; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
33; SSE2-NEXT:    movd %xmm0, %eax
34; SSE2-NEXT:    retq
35;
36; SSE41-LABEL: test_ne_1:
37; SSE41:       # %bb.0:
38; SSE41-NEXT:    pcmpgtd %xmm0, %xmm1
39; SSE41-NEXT:    pextrd $1, %xmm1, %eax
40; SSE41-NEXT:    retq
41  %cmp = icmp slt <4 x i32> %A, %B
42  %sext = sext <4 x i1> %cmp to <4 x i32>
43  %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer
44  %t0 = extractelement <4 x i1> %cmp1, i32 1
45  %t1 = sext i1 %t0 to i32
46  ret i32 %t1
47}
48
49define i32 @test_le_1(<4 x i32> %A, <4 x i32> %B) {
50; CHECK-LABEL: test_le_1:
51; CHECK:       # %bb.0:
52; CHECK-NEXT:    movl $-1, %eax
53; CHECK-NEXT:    retq
54  %cmp = icmp slt <4 x i32> %A, %B
55  %sext = sext <4 x i1> %cmp to <4 x i32>
56  %cmp1 = icmp sle <4 x i32> %sext, zeroinitializer
57  %t0 = extractelement <4 x i1> %cmp1, i32 1
58  %t1 = sext i1 %t0 to i32
59  ret i32 %t1
60}
61
62define i32 @test_ge_1(<4 x i32> %A, <4 x i32> %B) {
63; SSE2-LABEL: test_ge_1:
64; SSE2:       # %bb.0:
65; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
66; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
67; SSE2-NEXT:    movd %xmm0, %eax
68; SSE2-NEXT:    notl %eax
69; SSE2-NEXT:    retq
70;
71; SSE41-LABEL: test_ge_1:
72; SSE41:       # %bb.0:
73; SSE41-NEXT:    pcmpgtd %xmm0, %xmm1
74; SSE41-NEXT:    pextrd $1, %xmm1, %eax
75; SSE41-NEXT:    notl %eax
76; SSE41-NEXT:    retq
77  %cmp = icmp slt <4 x i32> %A, %B
78  %sext = sext <4 x i1> %cmp to <4 x i32>
79  %cmp1 = icmp sge <4 x i32> %sext, zeroinitializer
80  %t0 = extractelement <4 x i1> %cmp1, i32 1
81  %t1 = sext i1 %t0 to i32
82  ret i32 %t1
83}
84
85define i32 @test_lt_1(<4 x i32> %A, <4 x i32> %B) {
86; SSE2-LABEL: test_lt_1:
87; SSE2:       # %bb.0:
88; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
89; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
90; SSE2-NEXT:    movd %xmm0, %eax
91; SSE2-NEXT:    retq
92;
93; SSE41-LABEL: test_lt_1:
94; SSE41:       # %bb.0:
95; SSE41-NEXT:    pcmpgtd %xmm0, %xmm1
96; SSE41-NEXT:    pextrd $1, %xmm1, %eax
97; SSE41-NEXT:    retq
98  %cmp = icmp slt <4 x i32> %A, %B
99  %sext = sext <4 x i1> %cmp to <4 x i32>
100  %cmp1 = icmp slt <4 x i32> %sext, zeroinitializer
101  %t0 = extractelement <4 x i1> %cmp, i32 1
102  %t1 = sext i1 %t0 to i32
103  ret i32 %t1
104}
105
106define i32 @test_gt_1(<4 x i32> %A, <4 x i32> %B) {
107; CHECK-LABEL: test_gt_1:
108; CHECK:       # %bb.0:
109; CHECK-NEXT:    xorl %eax, %eax
110; CHECK-NEXT:    retq
111  %cmp = icmp slt <4 x i32> %A, %B
112  %sext = sext <4 x i1> %cmp to <4 x i32>
113  %cmp1 = icmp sgt <4 x i32> %sext, zeroinitializer
114  %t0 = extractelement <4 x i1> %cmp1, i32 1
115  %t1 = sext i1 %t0 to i32
116  ret i32 %t1
117}
118
119define i32 @test_eq_2(<4 x i32> %A, <4 x i32> %B) {
120; SSE2-LABEL: test_eq_2:
121; SSE2:       # %bb.0:
122; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
123; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
124; SSE2-NEXT:    movd %xmm0, %eax
125; SSE2-NEXT:    notl %eax
126; SSE2-NEXT:    retq
127;
128; SSE41-LABEL: test_eq_2:
129; SSE41:       # %bb.0:
130; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
131; SSE41-NEXT:    pextrd $1, %xmm0, %eax
132; SSE41-NEXT:    notl %eax
133; SSE41-NEXT:    retq
134  %cmp = icmp slt <4 x i32> %B, %A
135  %sext = sext <4 x i1> %cmp to <4 x i32>
136  %cmp1 = icmp eq <4 x i32> %sext, zeroinitializer
137  %t0 = extractelement <4 x i1> %cmp1, i32 1
138  %t1 = sext i1 %t0 to i32
139  ret i32 %t1
140}
141
142define i32 @test_ne_2(<4 x i32> %A, <4 x i32> %B) {
143; SSE2-LABEL: test_ne_2:
144; SSE2:       # %bb.0:
145; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
146; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
147; SSE2-NEXT:    movd %xmm0, %eax
148; SSE2-NEXT:    retq
149;
150; SSE41-LABEL: test_ne_2:
151; SSE41:       # %bb.0:
152; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
153; SSE41-NEXT:    pextrd $1, %xmm0, %eax
154; SSE41-NEXT:    retq
155  %cmp = icmp slt <4 x i32> %B, %A
156  %sext = sext <4 x i1> %cmp to <4 x i32>
157  %cmp1 = icmp ne <4 x i32> %sext, zeroinitializer
158  %t0 = extractelement <4 x i1> %cmp1, i32 1
159  %t1 = sext i1 %t0 to i32
160  ret i32 %t1
161}
162
163define i32 @test_le_2(<4 x i32> %A, <4 x i32> %B) {
164; SSE2-LABEL: test_le_2:
165; SSE2:       # %bb.0:
166; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
167; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
168; SSE2-NEXT:    movd %xmm0, %eax
169; SSE2-NEXT:    notl %eax
170; SSE2-NEXT:    retq
171;
172; SSE41-LABEL: test_le_2:
173; SSE41:       # %bb.0:
174; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
175; SSE41-NEXT:    pextrd $1, %xmm0, %eax
176; SSE41-NEXT:    notl %eax
177; SSE41-NEXT:    retq
178  %cmp = icmp slt <4 x i32> %B, %A
179  %sext = sext <4 x i1> %cmp to <4 x i32>
180  %cmp1 = icmp sle <4 x i32> zeroinitializer, %sext
181  %t0 = extractelement <4 x i1> %cmp1, i32 1
182  %t1 = sext i1 %t0 to i32
183  ret i32 %t1
184}
185
186define i32 @test_ge_2(<4 x i32> %A, <4 x i32> %B) {
187; CHECK-LABEL: test_ge_2:
188; CHECK:       # %bb.0:
189; CHECK-NEXT:    movl $-1, %eax
190; CHECK-NEXT:    retq
191  %cmp = icmp slt <4 x i32> %B, %A
192  %sext = sext <4 x i1> %cmp to <4 x i32>
193  %cmp1 = icmp sge <4 x i32> zeroinitializer, %sext
194  %t0 = extractelement <4 x i1> %cmp1, i32 1
195  %t1 = sext i1 %t0 to i32
196  ret i32 %t1
197}
198
199define i32 @test_lt_2(<4 x i32> %A, <4 x i32> %B) {
200; SSE2-LABEL: test_lt_2:
201; SSE2:       # %bb.0:
202; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
203; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
204; SSE2-NEXT:    movd %xmm0, %eax
205; SSE2-NEXT:    retq
206;
207; SSE41-LABEL: test_lt_2:
208; SSE41:       # %bb.0:
209; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
210; SSE41-NEXT:    pextrd $1, %xmm0, %eax
211; SSE41-NEXT:    retq
212  %cmp = icmp slt <4 x i32> %B, %A
213  %sext = sext <4 x i1> %cmp to <4 x i32>
214  %cmp1 = icmp slt <4 x i32> zeroinitializer, %sext
215  %t0 = extractelement <4 x i1> %cmp, i32 1
216  %t1 = sext i1 %t0 to i32
217  ret i32 %t1
218}
219
220define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) {
221; SSE2-LABEL: test_gt_2:
222; SSE2:       # %bb.0:
223; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
224; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
225; SSE2-NEXT:    movd %xmm0, %eax
226; SSE2-NEXT:    retq
227;
228; SSE41-LABEL: test_gt_2:
229; SSE41:       # %bb.0:
230; SSE41-NEXT:    pcmpgtd %xmm1, %xmm0
231; SSE41-NEXT:    pextrd $1, %xmm0, %eax
232; SSE41-NEXT:    retq
233  %cmp = icmp slt <4 x i32> %B, %A
234  %sext = sext <4 x i1> %cmp to <4 x i32>
235  %cmp1 = icmp sgt <4 x i32> zeroinitializer, %sext
236  %t0 = extractelement <4 x i1> %cmp1, i32 1
237  %t1 = sext i1 %t0 to i32
238  ret i32 %t1
239}
240
241; (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
242; Don't combine with i1 - out of range constant
243define void @test_i1_uge(i1 *%A2) {
244; CHECK-LABEL: test_i1_uge:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    movb (%rdi), %al
247; CHECK-NEXT:    movl %eax, %ecx
248; CHECK-NEXT:    xorb $1, %cl
249; CHECK-NEXT:    andb %cl, %al
250; CHECK-NEXT:    movzbl %al, %eax
251; CHECK-NEXT:    andl $1, %eax
252; CHECK-NEXT:    negq %rax
253; CHECK-NEXT:    andb $1, %cl
254; CHECK-NEXT:    movb %cl, (%rdi,%rax)
255; CHECK-NEXT:    retq
256  %L5 = load i1, i1* %A2
257  %C3 = icmp ne i1 %L5, true
258  %C8 = icmp eq i1 %L5, false
259  %C9 = icmp ugt i1 %C3, %C8
260  %G3 = getelementptr i1, i1* %A2, i1 %C9
261  store i1 %C3, i1* %G3
262  ret void
263}
264
265; This should not get folded to 0.
266
267define i64 @PR40657(i8 %var2, i8 %var9) {
268; CHECK-LABEL: PR40657:
269; CHECK:       # %bb.0:
270; CHECK-NEXT:    notb %sil
271; CHECK-NEXT:    addb %dil, %sil
272; CHECK-NEXT:    movzbl %sil, %eax
273; CHECK-NEXT:    andl $1, %eax
274; CHECK-NEXT:    retq
275  %var6 = trunc i8 %var9 to i1
276  %var7 = trunc i8 175 to i1
277  %var3 = sub nsw i1 %var6, %var7
278  %var4 = icmp eq i64 1114591064, 1114591064
279  %var1 = udiv i1 %var3, %var4
280  %var0 = trunc i8 %var2 to i1
281  %res = sub nsw nuw i1 %var0, %var1
282  %res.cast = zext i1 %res to i64
283  ret i64 %res.cast
284}
285
286; This should not get folded to 0.
287
288define i64 @PR40657_commute(i8 %var7, i8 %var8, i8 %var9) {
289; CHECK-LABEL: PR40657_commute:
290; CHECK:       # %bb.0:
291; CHECK-NEXT:    subb %dil, %sil
292; CHECK-NEXT:    subb %sil, %dl
293; CHECK-NEXT:    subb %dl, %sil
294; CHECK-NEXT:    xorb %dl, %sil
295; CHECK-NEXT:    subb %sil, %dl
296; CHECK-NEXT:    movzbl %dl, %eax
297; CHECK-NEXT:    andl $1, %eax
298; CHECK-NEXT:    retq
299  %var4 = trunc i8 %var9 to i1
300  %var5 = trunc i8 %var8 to i1
301  %var6 = trunc i8 %var7 to i1
302  %var3 = sub nsw nuw i1 %var5, %var6
303  %var0 = sub nuw i1 %var4, %var3
304  %var2 = sub i1 %var3, %var0
305  %var1 = icmp ne i1 %var0, %var2
306  %res = sub nsw nuw i1 %var0, %var1
307  %res.cast = zext i1 %res to i64
308  ret i64 %res.cast
309}
310
311define i64 @sub_to_shift_to_add(i32 %x, i32 %y, i64 %s1, i64 %s2) {
312; CHECK-LABEL: sub_to_shift_to_add:
313; CHECK:       # %bb.0:
314; CHECK-NEXT:    movq %rdx, %rax
315; CHECK-NEXT:    addl %esi, %esi
316; CHECK-NEXT:    cmpl %esi, %edi
317; CHECK-NEXT:    cmovneq %rcx, %rax
318; CHECK-NEXT:    retq
319  %sub = sub i32 %x, %y
320  %cmp = icmp eq i32 %sub, %y
321  %r = select i1 %cmp, i64 %s1, i64 %s2
322  ret i64 %r
323}
324
325define <4 x float> @sub_to_shift_to_add_vec(<4 x i32> %x, <4 x i32> %y, <4 x float> %s1, <4 x float> %s2) {
326; SSE2-LABEL: sub_to_shift_to_add_vec:
327; SSE2:       # %bb.0:
328; SSE2-NEXT:    paddd %xmm1, %xmm1
329; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
330; SSE2-NEXT:    pand %xmm1, %xmm2
331; SSE2-NEXT:    pandn %xmm3, %xmm1
332; SSE2-NEXT:    por %xmm2, %xmm1
333; SSE2-NEXT:    movdqa %xmm1, %xmm0
334; SSE2-NEXT:    retq
335;
336; SSE41-LABEL: sub_to_shift_to_add_vec:
337; SSE41:       # %bb.0:
338; SSE41-NEXT:    paddd %xmm1, %xmm1
339; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
340; SSE41-NEXT:    blendvps %xmm0, %xmm2, %xmm3
341; SSE41-NEXT:    movaps %xmm3, %xmm0
342; SSE41-NEXT:    retq
343  %sub = sub <4 x i32> %x, %y
344  %cmp = icmp eq <4 x i32> %sub, %y
345  %r = select <4 x i1> %cmp, <4 x float> %s1, <4 x float> %s2
346  ret <4 x float> %r
347}
348
349define i64 @sub_constant_to_shift_to_add(i32 %x, i64 %s1, i64 %s2) {
350; CHECK-LABEL: sub_constant_to_shift_to_add:
351; CHECK:       # %bb.0:
352; CHECK-NEXT:    movq %rsi, %rax
353; CHECK-NEXT:    addl %edi, %edi
354; CHECK-NEXT:    cmpl $42, %edi
355; CHECK-NEXT:    cmovneq %rdx, %rax
356; CHECK-NEXT:    retq
357  %sub = sub i32 42, %x
358  %cmp = icmp eq i32 %sub, %x
359  %r = select i1 %cmp, i64 %s1, i64 %s2
360  ret i64 %r
361}
362
363