1; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 | FileCheck %s -check-prefix=SSE2
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse4.1 | FileCheck %s -check-prefix=SSE41
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx | FileCheck %s -check-prefix=AVX
4
5define <16 x i8> @v16i8_icmp_uge(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
6  %1 = icmp uge <16 x i8> %a, %b
7  %2 = sext <16 x i1> %1 to <16 x i8>
8  ret <16 x i8> %2
9; SSE2-LABEL: v16i8_icmp_uge:
10; SSE2: pmaxub  %xmm0, %xmm1
11; SSE2: pcmpeqb %xmm1, %xmm0
12
13; SSE41-LABEL: v16i8_icmp_uge:
14; SSE41: pmaxub  %xmm0, %xmm1
15; SSE41: pcmpeqb %xmm1, %xmm0
16
17; AVX-LABEL: v16i8_icmp_uge:
18; AVX: vpmaxub  %xmm1, %xmm0, %xmm1
19; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
20}
21
22define <16 x i8> @v16i8_icmp_ule(<16 x i8> %a, <16 x i8> %b) nounwind readnone ssp uwtable {
23  %1 = icmp ule <16 x i8> %a, %b
24  %2 = sext <16 x i1> %1 to <16 x i8>
25  ret <16 x i8> %2
26; SSE2-LABEL: v16i8_icmp_ule:
27; SSE2: pminub  %xmm0, %xmm1
28; SSE2: pcmpeqb %xmm1, %xmm0
29
30; SSE41-LABEL: v16i8_icmp_ule:
31; SSE41: pminub  %xmm0, %xmm1
32; SSE41: pcmpeqb %xmm1, %xmm0
33
34; AVX-LABEL: v16i8_icmp_ule:
35; AVX: vpminub  %xmm1, %xmm0, %xmm1
36; AVX: vpcmpeqb %xmm1, %xmm0, %xmm0
37}
38
39
40define <8 x i16> @v8i16_icmp_uge(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
41  %1 = icmp uge <8 x i16> %a, %b
42  %2 = sext <8 x i1> %1 to <8 x i16>
43  ret <8 x i16> %2
44; SSE2-LABEL: v8i16_icmp_uge:
45; SSE2: movdqa  {{.*}}(%rip), %xmm2
46; SEE2: pxor    %xmm2, %xmm0
47; SSE2: pxor    %xmm1, %xmm2
48; SSE2: pcmpgtw %xmm0, %xmm2
49; SSE2: pcmpeqd %xmm0, %xmm0
50; SSE2: pxor    %xmm2, %xmm0
51
52; SSE41-LABEL: v8i16_icmp_uge:
53; SSE41: pmaxuw  %xmm0, %xmm1
54; SSE41: pcmpeqw %xmm1, %xmm0
55
56; AVX-LABEL: v8i16_icmp_uge:
57; AVX: vpmaxuw  %xmm1, %xmm0, %xmm1
58; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
59}
60
61define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone ssp uwtable {
62  %1 = icmp ule <8 x i16> %a, %b
63  %2 = sext <8 x i1> %1 to <8 x i16>
64  ret <8 x i16> %2
65; SSE2-LABEL: v8i16_icmp_ule:
66; SSE2: movdqa  {{.*}}(%rip), %xmm2
67; SSE2: pxor    %xmm2, %xmm1
68; SSE2: pxor    %xmm2, %xmm0
69; SSE2: pcmpgtw %xmm1, %xmm0
70; SSE2: pcmpeqd %xmm1, %xmm1
71; SSE2: pxor    %xmm0, %xmm1
72; SSE2: movdqa  %xmm1, %xmm0
73
74; SSE41-LABEL: v8i16_icmp_ule:
75; SSE41: pminuw  %xmm0, %xmm1
76; SSE41: pcmpeqw %xmm1, %xmm0
77
78; AVX-LABEL: v8i16_icmp_ule:
79; AVX: vpminuw  %xmm1, %xmm0, %xmm1
80; AVX: vpcmpeqw %xmm1, %xmm0, %xmm0
81}
82
83
84define <4 x i32> @v4i32_icmp_uge(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
85  %1 = icmp uge <4 x i32> %a, %b
86  %2 = sext <4 x i1> %1 to <4 x i32>
87  ret <4 x i32> %2
88; SSE2-LABEL: v4i32_icmp_uge:
89; SSE2: movdqa  {{.*}}(%rip), %xmm2
90; SSE2: pxor    %xmm2, %xmm0
91; SSE2: pxor    %xmm1, %xmm2
92; SSE2: pcmpgtd %xmm0, %xmm2
93; SSE2: pcmpeqd %xmm0, %xmm0
94; SSE2: pxor    %xmm2, %xmm0
95
96; SSE41-LABEL: v4i32_icmp_uge:
97; SSE41: pmaxud  %xmm0, %xmm1
98; SSE41: pcmpeqd %xmm1, %xmm0
99
100; AVX-LABEL: v4i32_icmp_uge:
101; AVX: vpmaxud  %xmm1, %xmm0, %xmm1
102; AVX: vpcmpeqd %xmm1, %xmm0, %xmm0
103}
104
105define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp uwtable {
106  %1 = icmp ule <4 x i32> %a, %b
107  %2 = sext <4 x i1> %1 to <4 x i32>
108  ret <4 x i32> %2
109; SSE2-LABEL: v4i32_icmp_ule:
110; SSE2: movdqa  {{.*}}(%rip), %xmm2
111; SSE2: pxor    %xmm2, %xmm1
112; SSE2: pxor    %xmm2, %xmm0
113; SSE2: pcmpgtd %xmm1, %xmm0
114; SSE2: pcmpeqd %xmm1, %xmm1
115; SSE2: pxor    %xmm0, %xmm1
116; SSE2: movdqa  %xmm1, %xmm0
117
118; SSE41-LABEL: v4i32_icmp_ule:
119; SSE41: pminud  %xmm0, %xmm1
120; SSE41: pcmpeqd %xmm1, %xmm0
121
122; AVX-LABEL: v4i32_icmp_ule:
123; AVX: pminud  %xmm1, %xmm0, %xmm1
124; AVX: pcmpeqd %xmm1, %xmm0, %xmm0
125}
126
127; At one point we were incorrectly constant-folding a setcc to 0x1 instead of
128; 0xff, leading to a constpool load. The instruction doesn't matter here, but it
129; should set all bits to 1.
130define <16 x i8> @test_setcc_constfold_vi8(<16 x i8> %l, <16 x i8> %r) {
131  %test1 = icmp eq <16 x i8> %l, %r
132  %mask1 = sext <16 x i1> %test1 to <16 x i8>
133
134  %test2 = icmp ne <16 x i8> %l, %r
135  %mask2 = sext <16 x i1> %test2 to <16 x i8>
136
137  %res = or <16 x i8> %mask1, %mask2
138  ret <16 x i8> %res
139; SSE2-LABEL: test_setcc_constfold_vi8:
140; SSE2: pcmpeqd %xmm0, %xmm0
141
142; SSE41-LABEL: test_setcc_constfold_vi8:
143; SSE41: pcmpeqd %xmm0, %xmm0
144
145; AVX-LABEL: test_setcc_constfold_vi8:
146; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
147}
148
149; Make sure sensible results come from doing extension afterwards
150define <16 x i8> @test_setcc_constfold_vi1(<16 x i8> %l, <16 x i8> %r) {
151  %test1 = icmp eq <16 x i8> %l, %r
152  %test2 = icmp ne <16 x i8> %l, %r
153
154  %res = or <16 x i1> %test1, %test2
155  %mask = sext <16 x i1> %res to <16 x i8>
156  ret <16 x i8> %mask
157; SSE2-LABEL: test_setcc_constfold_vi1:
158; SSE2: pcmpeqd %xmm0, %xmm0
159
160; SSE41-LABEL: test_setcc_constfold_vi1:
161; SSE41: pcmpeqd %xmm0, %xmm0
162
163; AVX-LABEL: test_setcc_constfold_vi1:
164; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
165}
166
167
168; 64-bit case is also particularly important, as the constant "-1" is probably
169; just 32-bits wide.
170define <2 x i64> @test_setcc_constfold_vi64(<2 x i64> %l, <2 x i64> %r) {
171  %test1 = icmp eq <2 x i64> %l, %r
172  %mask1 = sext <2 x i1> %test1 to <2 x i64>
173
174  %test2 = icmp ne <2 x i64> %l, %r
175  %mask2 = sext <2 x i1> %test2 to <2 x i64>
176
177  %res = or <2 x i64> %mask1, %mask2
178  ret <2 x i64> %res
179; SSE2-LABEL: test_setcc_constfold_vi64:
180; SSE2: pcmpeqd %xmm0, %xmm0
181
182; SSE41-LABEL: test_setcc_constfold_vi64:
183; SSE41: pcmpeqd %xmm0, %xmm0
184
185; AVX-LABEL: test_setcc_constfold_vi64:
186; AVX: vpcmpeqd %xmm0, %xmm0, %xmm0
187}
188