1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6
7declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
8declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
9declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
10
11; Use widest possible vector for movmsk comparisons (PR37087)
12
13define i1 @movmskps_noneof_bitcast_v2f64(<2 x double> %a0) {
14; SSE-LABEL: movmskps_noneof_bitcast_v2f64:
15; SSE:       # %bb.0:
16; SSE-NEXT:    xorpd %xmm1, %xmm1
17; SSE-NEXT:    cmpeqpd %xmm0, %xmm1
18; SSE-NEXT:    movmskpd %xmm1, %eax
19; SSE-NEXT:    testl %eax, %eax
20; SSE-NEXT:    sete %al
21; SSE-NEXT:    retq
22;
23; AVX-LABEL: movmskps_noneof_bitcast_v2f64:
24; AVX:       # %bb.0:
25; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
26; AVX-NEXT:    vcmpeqpd %xmm0, %xmm1, %xmm0
27; AVX-NEXT:    vmovmskpd %xmm0, %eax
28; AVX-NEXT:    testl %eax, %eax
29; AVX-NEXT:    sete %al
30; AVX-NEXT:    retq
31  %1 = fcmp oeq <2 x double> zeroinitializer, %a0
32  %2 = sext <2 x i1> %1 to <2 x i64>
33  %3 = bitcast <2 x i64> %2 to <4 x float>
34  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
35  %5 = icmp eq i32 %4, 0
36  ret i1 %5
37}
38
39define i1 @movmskps_allof_bitcast_v2f64(<2 x double> %a0) {
40; SSE-LABEL: movmskps_allof_bitcast_v2f64:
41; SSE:       # %bb.0:
42; SSE-NEXT:    xorpd %xmm1, %xmm1
43; SSE-NEXT:    cmpeqpd %xmm0, %xmm1
44; SSE-NEXT:    movmskpd %xmm1, %eax
45; SSE-NEXT:    cmpl $3, %eax
46; SSE-NEXT:    sete %al
47; SSE-NEXT:    retq
48;
49; AVX-LABEL: movmskps_allof_bitcast_v2f64:
50; AVX:       # %bb.0:
51; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
52; AVX-NEXT:    vcmpeqpd %xmm0, %xmm1, %xmm0
53; AVX-NEXT:    vmovmskpd %xmm0, %eax
54; AVX-NEXT:    cmpl $3, %eax
55; AVX-NEXT:    sete %al
56; AVX-NEXT:    retq
57  %1 = fcmp oeq <2 x double> zeroinitializer, %a0
58  %2 = sext <2 x i1> %1 to <2 x i64>
59  %3 = bitcast <2 x i64> %2 to <4 x float>
60  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
61  %5 = icmp eq i32 %4, 15
62  ret i1 %5
63}
64
65define i1 @pmovmskb_noneof_bitcast_v2i64(<2 x i64> %a0) {
66; SSE2-LABEL: pmovmskb_noneof_bitcast_v2i64:
67; SSE2:       # %bb.0:
68; SSE2-NEXT:    pxor %xmm1, %xmm1
69; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
70; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
71; SSE2-NEXT:    movmskps %xmm0, %eax
72; SSE2-NEXT:    testl %eax, %eax
73; SSE2-NEXT:    sete %al
74; SSE2-NEXT:    retq
75;
76; SSE42-LABEL: pmovmskb_noneof_bitcast_v2i64:
77; SSE42:       # %bb.0:
78; SSE42-NEXT:    movmskpd %xmm0, %eax
79; SSE42-NEXT:    testl %eax, %eax
80; SSE42-NEXT:    sete %al
81; SSE42-NEXT:    retq
82;
83; AVX-LABEL: pmovmskb_noneof_bitcast_v2i64:
84; AVX:       # %bb.0:
85; AVX-NEXT:    vmovmskpd %xmm0, %eax
86; AVX-NEXT:    testl %eax, %eax
87; AVX-NEXT:    sete %al
88; AVX-NEXT:    retq
89  %1 = icmp sgt <2 x i64> zeroinitializer, %a0
90  %2 = sext <2 x i1> %1 to <2 x i64>
91  %3 = bitcast <2 x i64> %2 to <16 x i8>
92  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
93  %5 = icmp eq i32 %4, 0
94  ret i1 %5
95}
96
97define i1 @pmovmskb_allof_bitcast_v2i64(<2 x i64> %a0) {
98; SSE2-LABEL: pmovmskb_allof_bitcast_v2i64:
99; SSE2:       # %bb.0:
100; SSE2-NEXT:    pxor %xmm1, %xmm1
101; SSE2-NEXT:    pcmpgtd %xmm0, %xmm1
102; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
103; SSE2-NEXT:    movmskps %xmm0, %eax
104; SSE2-NEXT:    cmpl $15, %eax
105; SSE2-NEXT:    sete %al
106; SSE2-NEXT:    retq
107;
108; SSE42-LABEL: pmovmskb_allof_bitcast_v2i64:
109; SSE42:       # %bb.0:
110; SSE42-NEXT:    movmskpd %xmm0, %eax
111; SSE42-NEXT:    cmpl $3, %eax
112; SSE42-NEXT:    sete %al
113; SSE42-NEXT:    retq
114;
115; AVX-LABEL: pmovmskb_allof_bitcast_v2i64:
116; AVX:       # %bb.0:
117; AVX-NEXT:    vmovmskpd %xmm0, %eax
118; AVX-NEXT:    cmpl $3, %eax
119; AVX-NEXT:    sete %al
120; AVX-NEXT:    retq
121  %1 = icmp sgt <2 x i64> zeroinitializer, %a0
122  %2 = sext <2 x i1> %1 to <2 x i64>
123  %3 = bitcast <2 x i64> %2 to <16 x i8>
124  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
125  %5 = icmp eq i32 %4, 65535
126  ret i1 %5
127}
128
129define i1 @pmovmskb_noneof_bitcast_v4f32(<4 x float> %a0) {
130; SSE-LABEL: pmovmskb_noneof_bitcast_v4f32:
131; SSE:       # %bb.0:
132; SSE-NEXT:    xorps %xmm1, %xmm1
133; SSE-NEXT:    cmpeqps %xmm0, %xmm1
134; SSE-NEXT:    movmskps %xmm1, %eax
135; SSE-NEXT:    testl %eax, %eax
136; SSE-NEXT:    sete %al
137; SSE-NEXT:    retq
138;
139; AVX-LABEL: pmovmskb_noneof_bitcast_v4f32:
140; AVX:       # %bb.0:
141; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
142; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
143; AVX-NEXT:    vmovmskps %xmm0, %eax
144; AVX-NEXT:    testl %eax, %eax
145; AVX-NEXT:    sete %al
146; AVX-NEXT:    retq
147  %1 = fcmp oeq <4 x float> %a0, zeroinitializer
148  %2 = sext <4 x i1> %1 to <4 x i32>
149  %3 = bitcast <4 x i32> %2 to <16 x i8>
150  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
151  %5 = icmp eq i32 %4, 0
152  ret i1 %5
153}
154
155define i1 @pmovmskb_allof_bitcast_v4f32(<4 x float> %a0) {
156; SSE-LABEL: pmovmskb_allof_bitcast_v4f32:
157; SSE:       # %bb.0:
158; SSE-NEXT:    xorps %xmm1, %xmm1
159; SSE-NEXT:    cmpeqps %xmm0, %xmm1
160; SSE-NEXT:    movmskps %xmm1, %eax
161; SSE-NEXT:    cmpl $15, %eax
162; SSE-NEXT:    sete %al
163; SSE-NEXT:    retq
164;
165; AVX-LABEL: pmovmskb_allof_bitcast_v4f32:
166; AVX:       # %bb.0:
167; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
168; AVX-NEXT:    vcmpeqps %xmm1, %xmm0, %xmm0
169; AVX-NEXT:    vmovmskps %xmm0, %eax
170; AVX-NEXT:    cmpl $15, %eax
171; AVX-NEXT:    sete %al
172; AVX-NEXT:    retq
173  %1 = fcmp oeq <4 x float> %a0, zeroinitializer
174  %2 = sext <4 x i1> %1 to <4 x i32>
175  %3 = bitcast <4 x i32> %2 to <16 x i8>
176  %4 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %3)
177  %5 = icmp eq i32 %4, 65535
178  ret i1 %5
179}
180
181; MOVMSK(ICMP_SGT(X,-1)) -> NOT(MOVMSK(X)))
182define i1 @movmskps_allof_v4i32_positive(<4 x i32> %a0) {
183; SSE-LABEL: movmskps_allof_v4i32_positive:
184; SSE:       # %bb.0:
185; SSE-NEXT:    movmskps %xmm0, %eax
186; SSE-NEXT:    xorl $15, %eax
187; SSE-NEXT:    cmpl $15, %eax
188; SSE-NEXT:    sete %al
189; SSE-NEXT:    retq
190;
191; AVX-LABEL: movmskps_allof_v4i32_positive:
192; AVX:       # %bb.0:
193; AVX-NEXT:    vmovmskps %xmm0, %eax
194; AVX-NEXT:    xorl $15, %eax
195; AVX-NEXT:    cmpl $15, %eax
196; AVX-NEXT:    sete %al
197; AVX-NEXT:    retq
198  %1 = icmp sgt <4 x i32> %a0, <i32 -1, i32 -1, i32 -1, i32 -1>
199  %2 = sext <4 x i1> %1 to <4 x i32>
200  %3 = bitcast <4 x i32> %2 to <4 x float>
201  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
202  %5 = icmp eq i32 %4, 15
203  ret i1 %5
204}
205
206define i1 @pmovmskb_noneof_v16i8_positive(<16 x i8> %a0) {
207; SSE-LABEL: pmovmskb_noneof_v16i8_positive:
208; SSE:       # %bb.0:
209; SSE-NEXT:    pmovmskb %xmm0, %eax
210; SSE-NEXT:    xorl $65535, %eax # imm = 0xFFFF
211; SSE-NEXT:    sete %al
212; SSE-NEXT:    retq
213;
214; AVX-LABEL: pmovmskb_noneof_v16i8_positive:
215; AVX:       # %bb.0:
216; AVX-NEXT:    vpmovmskb %xmm0, %eax
217; AVX-NEXT:    xorl $65535, %eax # imm = 0xFFFF
218; AVX-NEXT:    sete %al
219; AVX-NEXT:    retq
220  %1 = icmp sgt <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
221  %2 = sext <16 x i1> %1 to <16 x i8>
222  %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
223  %4 = icmp eq i32 %3, 0
224  ret i1 %4
225}
226
227; AND(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(AND(X,Y))
228; XOR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(XOR(X,Y))
229; OR(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(OR(X,Y))
230; if the elements are the same width.
231
232define i32 @and_movmskpd_movmskpd(<2 x double> %a0, <2 x i64> %a1) {
233; SSE-LABEL: and_movmskpd_movmskpd:
234; SSE:       # %bb.0:
235; SSE-NEXT:    xorpd %xmm2, %xmm2
236; SSE-NEXT:    cmpeqpd %xmm0, %xmm2
237; SSE-NEXT:    andpd %xmm1, %xmm2
238; SSE-NEXT:    movmskpd %xmm2, %eax
239; SSE-NEXT:    retq
240;
241; AVX-LABEL: and_movmskpd_movmskpd:
242; AVX:       # %bb.0:
243; AVX-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
244; AVX-NEXT:    vcmpeqpd %xmm0, %xmm2, %xmm0
245; AVX-NEXT:    vandpd %xmm1, %xmm0, %xmm0
246; AVX-NEXT:    vmovmskpd %xmm0, %eax
247; AVX-NEXT:    retq
248  %1 = fcmp oeq <2 x double> zeroinitializer, %a0
249  %2 = sext <2 x i1> %1 to <2 x i64>
250  %3 = bitcast <2 x i64> %2 to <2 x double>
251  %4 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %3)
252  %5 = icmp sgt <2 x i64> zeroinitializer, %a1
253  %6 = bitcast <2 x i1> %5 to i2
254  %7 = zext i2 %6 to i32
255  %8 = and i32 %4, %7
256  ret i32 %8
257}
258
259define i32 @xor_movmskps_movmskps(<4 x float> %a0, <4 x i32> %a1) {
260; SSE-LABEL: xor_movmskps_movmskps:
261; SSE:       # %bb.0:
262; SSE-NEXT:    xorps %xmm2, %xmm2
263; SSE-NEXT:    cmpeqps %xmm0, %xmm2
264; SSE-NEXT:    xorps %xmm1, %xmm2
265; SSE-NEXT:    movmskps %xmm2, %eax
266; SSE-NEXT:    retq
267;
268; AVX-LABEL: xor_movmskps_movmskps:
269; AVX:       # %bb.0:
270; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
271; AVX-NEXT:    vcmpeqps %xmm0, %xmm2, %xmm0
272; AVX-NEXT:    vxorps %xmm1, %xmm0, %xmm0
273; AVX-NEXT:    vmovmskps %xmm0, %eax
274; AVX-NEXT:    retq
275  %1 = fcmp oeq <4 x float> zeroinitializer, %a0
276  %2 = sext <4 x i1> %1 to <4 x i32>
277  %3 = bitcast <4 x i32> %2 to <4 x float>
278  %4 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %3)
279  %5 = ashr <4 x i32> %a1, <i32 31, i32 31, i32 31, i32 31>
280  %6 = bitcast <4 x i32> %5 to <4 x float>
281  %7 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %6)
282  %8 = xor i32 %4, %7
283  ret i32 %8
284}
285
286define i32 @or_pmovmskb_pmovmskb(<16 x i8> %a0, <8 x i16> %a1) {
287; SSE-LABEL: or_pmovmskb_pmovmskb:
288; SSE:       # %bb.0:
289; SSE-NEXT:    pxor %xmm2, %xmm2
290; SSE-NEXT:    pcmpeqb %xmm0, %xmm2
291; SSE-NEXT:    psraw $15, %xmm1
292; SSE-NEXT:    por %xmm2, %xmm1
293; SSE-NEXT:    pmovmskb %xmm1, %eax
294; SSE-NEXT:    retq
295;
296; AVX-LABEL: or_pmovmskb_pmovmskb:
297; AVX:       # %bb.0:
298; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
299; AVX-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
300; AVX-NEXT:    vpsraw $15, %xmm1, %xmm1
301; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
302; AVX-NEXT:    vpmovmskb %xmm0, %eax
303; AVX-NEXT:    retq
304  %1 = icmp eq <16 x i8> zeroinitializer, %a0
305  %2 = sext <16 x i1> %1 to <16 x i8>
306  %3 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %2)
307  %4 = ashr <8 x i16> %a1, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
308  %5 = bitcast <8 x i16> %4 to <16 x i8>
309  %6 = tail call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %5)
310  %7 = or i32 %3, %6
311  ret i32 %7
312}
313