1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6
7declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
8declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
9declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
10
11; Use widest possible vector for movmsk comparisons (PR37087)
12
13define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) {
14; SSE-LABEL: movmskps_noneof_bitcast_v2f64:
15; SSE:       # %bb.0:
16; SSE-NEXT:    xorpd %xmm1, %xmm1
17; SSE-NEXT:    cmpeqpd %xmm0, %xmm1
18; SSE-NEXT:    movmskpd %xmm1, %eax
19; SSE-NEXT:    testl %eax, %eax
20; SSE-NEXT:    sete %al
21; SSE-NEXT:    retq
22;
23; AVX-LABEL: movmskps_noneof_bitcast_v2f64:
24; AVX:       # %bb.0:
25; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
26; AVX-NEXT:    vcmpeqpd %xmm0, %xmm1, %xmm0
27; AVX-NEXT:    vmovmskpd %xmm0, %eax
28; AVX-NEXT:    testl %eax, %eax
29; AVX-NEXT:    sete %al
30; AVX-NEXT:    retq
31  %1 = fcmp oeq <2 x double> zeroinitializer, %a0
32  %2 = sext <2 x i1> %1 to <2 x i64>
33  %3 = bitcast <2 x i64> %2 to <4 x float>
34  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
35  %5 = icmp eq i32 %4, 0
36  ret i1 %5
37}
38
39define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) {
40; SSE-LABEL: movmskps_allof_bitcast_v2f64:
41; SSE:       # %bb.0:
42; SSE-NEXT:    xorpd %xmm1, %xmm1
43; SSE-NEXT:    cmpeqpd %xmm0, %xmm1
44; SSE-NEXT:    movmskpd %xmm1, %eax
45; SSE-NEXT:    cmpl $3, %eax
46; SSE-NEXT:    sete %al
47; SSE-NEXT:    retq
48;
49; AVX-LABEL: movmskps_allof_bitcast_v2f64:
50; AVX:       # %bb.0:
51; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
52; AVX-NEXT:    vcmpeqpd %xmm0, %xmm1, %xmm0
53; AVX-NEXT:    vmovmskpd %xmm0, %eax
54; AVX-NEXT:    cmpl $3, %eax
55; AVX-NEXT:    sete %al
56; AVX-NEXT:    retq
57  %1 = fcmp oeq <2 x double> zeroinitializer, %a0
58  %2 = sext <2 x i1> %1 to <2 x i64>
59  %3 = bitcast <2 x i64> %2 to <4 x float>
60  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
61  %5 = icmp eq i32 %4, 15
62  ret i1 %5
63}
64
65define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) {
66; SSE2-LABEL: pmovmskb_noneof_bitcast_v2i64:
67; SSE2:       # %bb.0:
68; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
69; SSE2-NEXT:    movmskps %xmm0, %eax
70; SSE2-NEXT:    testl %eax, %eax
71; SSE2-NEXT:    sete %al
72; SSE2-NEXT:    retq
73;
74; SSE42-LABEL: pmovmskb_noneof_bitcast_v2i64:
75; SSE42:       # %bb.0:
76; SSE42-NEXT:    movmskpd %xmm0, %eax
77; SSE42-NEXT:    testl %eax, %eax
78; SSE42-NEXT:    sete %al
79; SSE42-NEXT:    retq
80;
81; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64:
82; AVX:       # %bb.0:
83; AVX-NEXT:    vmovmskpd %xmm0, %eax
84; AVX-NEXT:    testl %eax, %eax
85; AVX-NEXT:    sete %al
86; AVX-NEXT:    retq
87  %1 = icmp sgt <2 x i64> zeroinitializer, %a0
88  %2 = sext <2 x i1> %1 to <2 x i64>
89  %3 = bitcast <2 x i64> %2 to <16 x i8>
90  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
91  %5 = icmp eq i32 %4, 0
92  ret i1 %5
93}
94
95define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) {
96; SSE2-LABEL: pmovmskb_allof_bitcast_v2i64:
97; SSE2:       # %bb.0:
98; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
99; SSE2-NEXT:    movmskps %xmm0, %eax
100; SSE2-NEXT:    cmpl $15, %eax
101; SSE2-NEXT:    sete %al
102; SSE2-NEXT:    retq
103;
104; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64:
105; SSE42:       # %bb.0:
106; SSE42-NEXT:    movmskpd %xmm0, %eax
107; SSE42-NEXT:    cmpl $3, %eax
108; SSE42-NEXT:    sete %al
109; SSE42-NEXT:    retq
110;
111; AVX-LABEL: pmovmskb_allof_bitcast_v2i64:
112; AVX:       # %bb.0:
113; AVX-NEXT:    vmovmskpd %xmm0, %eax
114; AVX-NEXT:    cmpl $3, %eax
115; AVX-NEXT:    sete %al
116; AVX-NEXT:    retq
117  %1 = icmp sgt <2 x i64> zeroinitializer, %a0
118  %2 = sext <2 x i1> %1 to <2 x i64>
119  %3 = bitcast <2 x i64> %2 to <16 x i8>
120  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
121  %5 = icmp eq i32 %4, 65535
122  ret i1 %5
123}
124
125define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) {
126; SSE-LABEL: pmovmskb_noneof_bitcast_v4f32:
127; SSE:       # %bb.0:
128; SSE-NEXT:    xorps %xmm1, %xmm1
129; SSE-NEXT:    cmpeqps %xmm0, %xmm1
130; SSE-NEXT:    movmskps %xmm1, %eax
131; SSE-NEXT:    testl %eax, %eax
132; SSE-NEXT:    sete %al
133; SSE-NEXT:    retq
134;
135; AVX-LABEL: pmovmskb_noneof_bitcast_v4f32:
136; AVX:       # %bb.0:
137; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
138; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
139; AVX-NEXT:    vmovmskps %xmm0, %eax
140; AVX-NEXT:    testl %eax, %eax
141; AVX-NEXT:    sete %al
142; AVX-NEXT:    retq
143  %1 = fcmp oeq <4 x float> %a0, zeroinitializer
144  %2 = sext <4 x i1> %1 to <4 x i32>
145  %3 = bitcast <4 x i32> %2 to <16 x i8>
146  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
147  %5 = icmp eq i32 %4, 0
148  ret i1 %5
149}
150
151define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) {
152; SSE-LABEL: pmovmskb_allof_bitcast_v4f32:
153; SSE:       # %bb.0:
154; SSE-NEXT:    xorps %xmm1, %xmm1
155; SSE-NEXT:    cmpeqps %xmm0, %xmm1
156; SSE-NEXT:    movmskps %xmm1, %eax
157; SSE-NEXT:    cmpl $15, %eax
158; SSE-NEXT:    sete %al
159; SSE-NEXT:    retq
160;
161; AVX-LABEL: pmovmskb_allof_bitcast_v4f32:
162; AVX:       # %bb.0:
163; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
164; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
165; AVX-NEXT:    vmovmskps %xmm0, %eax
166; AVX-NEXT:    cmpl $15, %eax
167; AVX-NEXT:    sete %al
168; AVX-NEXT:    retq
169  %1 = fcmp oeq <4 x float> %a0, zeroinitializer
170  %2 = sext <4 x i1> %1 to <4 x i32>
171  %3 = bitcast <4 x i32> %2 to <16 x i8>
172  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
173  %5 = icmp eq i32 %4, 65535
174  ret i1 %5
175}
176
177; MOVMSK(ICMP_SGT(X,-1)) -> NOT(MOVMSK(X)))
178define i1 @movmskps_allof_v4i32_positive(<4 x i32> %a0) {
179; SSE-LABEL: movmskps_allof_v4i32_positive:
180; SSE:       # %bb.0:
181; SSE-NEXT:    movmskps %xmm0, %eax
182; SSE-NEXT:    xorl $15, %eax
183; SSE-NEXT:    cmpl $15, %eax
184; SSE-NEXT:    sete %al
185; SSE-NEXT:    retq
186;
187; AVX-LABEL: movmskps_allof_v4i32_positive:
188; AVX:       # %bb.0:
189; AVX-NEXT:    vmovmskps %xmm0, %eax
190; AVX-NEXT:    xorl $15, %eax
191; AVX-NEXT:    cmpl $15, %eax
192; AVX-NEXT:    sete %al
193; AVX-NEXT:    retq
194  %1 = icmp sgt <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1>
195  %2 = sext <4 x i1> %1 to <4 x i32>
196  %3 = bitcast <4 x i32> %2 to <4 x float>
197  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
198  %5 = icmp eq i32 %4, 15
199  ret i1 %5
200}
201
202define i1 @pmovmskb_noneof_v16i8_positive(<16 x i8> %a0) {
203; SSE-LABEL: pmovmskb_noneof_v16i8_positive:
204; SSE:       # %bb.0:
205; SSE-NEXT:    pmovmskb %xmm0, %eax
206; SSE-NEXT:    xorl $65535, %eax # imm = 0xFFFF
207; SSE-NEXT:    sete %al
208; SSE-NEXT:    retq
209;
210; AVX-LABEL: pmovmskb_noneof_v16i8_positive:
211; AVX:       # %bb.0:
212; AVX-NEXT:    vpmovmskb %xmm0, %eax
213; AVX-NEXT:    xorl $65535, %eax # imm = 0xFFFF
214; AVX-NEXT:    sete %al
215; AVX-NEXT:    retq
216  %1 = icmp sgt <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
217  %2 = sext <16 x i1> %1 to <16 x i8>
218  %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
219  %4 = icmp eq i32 %3, 0
220  ret i1 %4
221}
222
223; AND(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(AND(X,Y))
224; XOR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(XOR(X,Y))
225; OR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(OR(X,Y))
226; if the elements are the same width.
227
228define i32 @and_movmskpd_movmskpd(<2 x double> %a0, <2 x i64> %a1) {
229; SSE-LABEL: and_movmskpd_movmskpd:
230; SSE:       # %bb.0:
231; SSE-NEXT:    xorpd %xmm2, %xmm2
232; SSE-NEXT:    cmpeqpd %xmm0, %xmm2
233; SSE-NEXT:    andpd %xmm1, %xmm2
234; SSE-NEXT:    movmskpd %xmm2, %eax
235; SSE-NEXT:    retq
236;
237; AVX-LABEL: and_movmskpd_movmskpd:
238; AVX:       # %bb.0:
239; AVX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
240; AVX-NEXT:    vcmpeqpd %xmm0, %xmm2, %xmm0
241; AVX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
242; AVX-NEXT:    vmovmskpd %xmm0, %eax
243; AVX-NEXT:    retq
244  %1 = fcmp oeq <2 x double> zeroinitializer, %a0
245  %2 = sext <2 x i1> %1 to <2 x i64>
246  %3 = bitcast <2 x i64> %2 to <2 x double>
247  %4 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %3)
248  %5 = icmp sgt <2 x i64> zeroinitializer, %a1
249  %6 = bitcast <2 x i1> %5 to i2
250  %7 = zext i2 %6 to i32
251  %8 = and i32 %4, %7
252  ret i32 %8
253}
254
255define i32 @xor_movmskps_movmskps(<4 x float> %a0, <4 x i32> %a1) {
256; SSE-LABEL: xor_movmskps_movmskps:
257; SSE:       # %bb.0:
258; SSE-NEXT:    xorps %xmm2, %xmm2
259; SSE-NEXT:    cmpeqps %xmm0, %xmm2
260; SSE-NEXT:    xorps %xmm1, %xmm2
261; SSE-NEXT:    movmskps %xmm2, %eax
262; SSE-NEXT:    retq
263;
264; AVX-LABEL: xor_movmskps_movmskps:
265; AVX:       # %bb.0:
266; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
267; AVX-NEXT:    vcmpeqps %xmm0, %xmm2, %xmm0
268; AVX-NEXT:    vxorps %xmm1, %xmm0, %xmm0
269; AVX-NEXT:    vmovmskps %xmm0, %eax
270; AVX-NEXT:    retq
271  %1 = fcmp oeq <4 x float> zeroinitializer, %a0
272  %2 = sext <4 x i1> %1 to <4 x i32>
273  %3 = bitcast <4 x i32> %2 to <4 x float>
274  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
275  %5 = ashr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31>
276  %6 = bitcast <4 x i32> %5 to <4 x float>
277  %7 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %6)
278  %8 = xor i32 %4, %7
279  ret i32 %8
280}
281
282define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) {
283; SSE-LABEL: or_pmovmskb_pmovmskb:
284; SSE:       # %bb.0:
285; SSE-NEXT:    pxor %xmm2, %xmm2
286; SSE-NEXT:    pcmpeqb %xmm0, %xmm2
287; SSE-NEXT:    psraw $15, %xmm1
288; SSE-NEXT:    por %xmm2, %xmm1
289; SSE-NEXT:    pmovmskb %xmm1, %eax
290; SSE-NEXT:    retq
291;
292; AVX-LABEL: or_pmovmskb_pmovmskb:
293; AVX:       # %bb.0:
294; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
295; AVX-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
296; AVX-NEXT:    vpsraw $15, %xmm1, %xmm1
297; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
298; AVX-NEXT:    vpmovmskb %xmm0, %eax
299; AVX-NEXT:    retq
300  %1 = icmp eq <16 x i8> zeroinitializer, %a0
301  %2 = sext <16 x i1> %1 to <16 x i8>
302  %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
303  %4 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
304  %5 = bitcast <8 x i16> %4 to <16 x i8>
305  %6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %5)
306  %7 = or i32 %3, %6
307  ret i32 %7
308}
309