1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2
4
5declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
6declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
7
8; Use widest possible vector for movmsk comparisons (PR37087)
9
10define i1 @movmskps_noneof_bitcast_v4f64(<4 x double> %a0) {
11; CHECK-LABEL: movmskps_noneof_bitcast_v4f64:
12; CHECK:       # %bb.0:
13; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
14; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
15; CHECK-NEXT:    vmovmskpd %ymm0, %eax
16; CHECK-NEXT:    testl %eax, %eax
17; CHECK-NEXT:    sete %al
18; CHECK-NEXT:    vzeroupper
19; CHECK-NEXT:    retq
20  %1 = fcmp oeq <4 x double> %a0, zeroinitializer
21  %2 = sext <4 x i1> %1 to <4 x i64>
22  %3 = bitcast <4 x i64> %2 to <8 x float>
23  %4 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %3)
24  %5 = icmp eq i32 %4, 0
25  ret i1 %5
26}
27
28define i1 @movmskps_allof_bitcast_v4f64(<4 x double> %a0) {
29; CHECK-LABEL: movmskps_allof_bitcast_v4f64:
30; CHECK:       # %bb.0:
31; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
32; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %ymm0
33; CHECK-NEXT:    vmovmskpd %ymm0, %eax
34; CHECK-NEXT:    cmpl $15, %eax
35; CHECK-NEXT:    sete %al
36; CHECK-NEXT:    vzeroupper
37; CHECK-NEXT:    retq
38  %1 = fcmp oeq <4 x double> %a0, zeroinitializer
39  %2 = sext <4 x i1> %1 to <4 x i64>
40  %3 = bitcast <4 x i64> %2 to <8 x float>
41  %4 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %3)
42  %5 = icmp eq i32 %4, 255
43  ret i1 %5
44}
45
46;
47; TODO - Avoid sign extension ops when just extracting the sign bits.
48;
49
50define i32 @movmskpd_cmpgt_v4i64(<4 x i64> %a0) {
51; AVX1-LABEL: movmskpd_cmpgt_v4i64:
52; AVX1:       # %bb.0:
53; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
54; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm1
55; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
56; AVX1-NEXT:    vmovmskpd %ymm0, %eax
57; AVX1-NEXT:    vzeroupper
58; AVX1-NEXT:    retq
59;
60; AVX2-LABEL: movmskpd_cmpgt_v4i64:
61; AVX2:       # %bb.0:
62; AVX2-NEXT:    vmovmskpd %ymm0, %eax
63; AVX2-NEXT:    vzeroupper
64; AVX2-NEXT:    retq
65  %1 = icmp sgt <4 x i64> zeroinitializer, %a0
66  %2 = sext <4 x i1> %1 to <4 x i64>
67  %3 = bitcast <4 x i64> %2 to <4 x double>
68  %4 = tail call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %3)
69  ret i32 %4
70}
71
72define i32 @movmskps_ashr_v8i32(<8 x i32> %a0)  {
73; AVX1-LABEL: movmskps_ashr_v8i32:
74; AVX1:       # %bb.0:
75; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm1
76; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
77; AVX1-NEXT:    vmovmskps %ymm0, %eax
78; AVX1-NEXT:    vzeroupper
79; AVX1-NEXT:    retq
80;
81; AVX2-LABEL: movmskps_ashr_v8i32:
82; AVX2:       # %bb.0:
83; AVX2-NEXT:    vmovmskps %ymm0, %eax
84; AVX2-NEXT:    vzeroupper
85; AVX2-NEXT:    retq
86  %1 = ashr <8 x i32> %a0, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
87  %2 = bitcast <8 x i32> %1 to <8 x float>
88  %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2)
89  ret i32 %3
90}
91
92define i32 @movmskps_sext_v4i64(<4 x i32> %a0)  {
93; AVX1-LABEL: movmskps_sext_v4i64:
94; AVX1:       # %bb.0:
95; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
96; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
97; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
98; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
99; AVX1-NEXT:    vmovmskpd %ymm0, %eax
100; AVX1-NEXT:    vzeroupper
101; AVX1-NEXT:    retq
102;
103; AVX2-LABEL: movmskps_sext_v4i64:
104; AVX2:       # %bb.0:
105; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
106; AVX2-NEXT:    vmovmskpd %ymm0, %eax
107; AVX2-NEXT:    vzeroupper
108; AVX2-NEXT:    retq
109  %1 = sext <4 x i32> %a0 to <4 x i64>
110  %2 = bitcast <4 x i64> %1 to <4 x double>
111  %3 = tail call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %2)
112  ret i32 %3
113}
114
115define i32 @movmskps_sext_v8i32(<8 x i16> %a0)  {
116; AVX1-LABEL: movmskps_sext_v8i32:
117; AVX1:       # %bb.0:
118; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
119; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
120; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
121; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
122; AVX1-NEXT:    vmovmskps %ymm0, %eax
123; AVX1-NEXT:    vzeroupper
124; AVX1-NEXT:    retq
125;
126; AVX2-LABEL: movmskps_sext_v8i32:
127; AVX2:       # %bb.0:
128; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
129; AVX2-NEXT:    vmovmskps %ymm0, %eax
130; AVX2-NEXT:    vzeroupper
131; AVX2-NEXT:    retq
132  %1 = sext <8 x i16> %a0 to <8 x i32>
133  %2 = bitcast <8 x i32> %1 to <8 x float>
134  %3 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %2)
135  ret i32 %3
136}
137