1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefixes=AVX512VL
3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512dq,avx512bw | FileCheck %s --check-prefixes=AVX512VLDQBW
4
5; This test makes sure we don't use movmsk instructions when masked compares
6; would be better. The use of the getmant intrinsic introduces a convertion
7; scalar to vXi1 late after movmsk has been formed. Requiring it to be reversed.
8
9declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
10declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
11declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
12declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
13
14define <2 x double> @movmsk2(<2 x double> %x0, <2 x double> %x2, <2 x i64> %mask) {
15; AVX512VL-LABEL: movmsk2:
16; AVX512VL:       ## %bb.0:
17; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
18; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
19; AVX512VL-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1}
20; AVX512VL-NEXT:    vmovapd %xmm1, %xmm0
21; AVX512VL-NEXT:    retq
22;
23; AVX512VLDQBW-LABEL: movmsk2:
24; AVX512VLDQBW:       ## %bb.0:
25; AVX512VLDQBW-NEXT:    vpmovq2m %xmm2, %k1
26; AVX512VLDQBW-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1}
27; AVX512VLDQBW-NEXT:    vmovapd %xmm1, %xmm0
28; AVX512VLDQBW-NEXT:    retq
29  %a = icmp slt <2 x i64> %mask, zeroinitializer
30  %b = bitcast <2 x i1> %a to i2
31  %c = zext i2 %b to i8
32  %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %c)
33  ret <2 x double> %res
34}
35
36define <4 x double> @movmsk4(<4 x double> %x0, <4 x double> %x2, <4 x i32> %mask) {
37; AVX512VL-LABEL: movmsk4:
38; AVX512VL:       ## %bb.0:
39; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
40; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
41; AVX512VL-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1}
42; AVX512VL-NEXT:    vmovapd %ymm1, %ymm0
43; AVX512VL-NEXT:    retq
44;
45; AVX512VLDQBW-LABEL: movmsk4:
46; AVX512VLDQBW:       ## %bb.0:
47; AVX512VLDQBW-NEXT:    vpmovd2m %xmm2, %k1
48; AVX512VLDQBW-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1}
49; AVX512VLDQBW-NEXT:    vmovapd %ymm1, %ymm0
50; AVX512VLDQBW-NEXT:    retq
51  %a = icmp slt <4 x i32> %mask, zeroinitializer
52  %b = bitcast <4 x i1> %a to i4
53  %c = zext i4 %b to i8
54  %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %c)
55  ret <4 x double> %res
56}
57
58define <8 x double> @movmsk8(<8 x double> %x0, <8 x double> %x2, <8 x i32> %mask) {
59; AVX512VL-LABEL: movmsk8:
60; AVX512VL:       ## %bb.0:
61; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
62; AVX512VL-NEXT:    vpcmpgtd %ymm2, %ymm3, %k1
63; AVX512VL-NEXT:    vgetmantpd $11, %zmm0, %zmm1 {%k1}
64; AVX512VL-NEXT:    vmovapd %zmm1, %zmm0
65; AVX512VL-NEXT:    retq
66;
67; AVX512VLDQBW-LABEL: movmsk8:
68; AVX512VLDQBW:       ## %bb.0:
69; AVX512VLDQBW-NEXT:    vpmovd2m %ymm2, %k1
70; AVX512VLDQBW-NEXT:    vgetmantpd $11, %zmm0, %zmm1 {%k1}
71; AVX512VLDQBW-NEXT:    vmovapd %zmm1, %zmm0
72; AVX512VLDQBW-NEXT:    retq
73  %a = icmp slt <8 x i32> %mask, zeroinitializer
74  %b = bitcast <8 x i1> %a to i8
75  %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %b, i32 4)
76  ret <8 x double> %res
77}
78
79define <16 x float> @movmsk16(<16 x float> %x0, <16 x float> %x2, <16 x i8> %mask) {
80; AVX512VL-LABEL: movmsk16:
81; AVX512VL:       ## %bb.0:
82; AVX512VL-NEXT:    vpmovmskb %xmm2, %eax
83; AVX512VL-NEXT:    kmovw %eax, %k1
84; AVX512VL-NEXT:    vgetmantps $11, %zmm0, %zmm1 {%k1}
85; AVX512VL-NEXT:    vmovaps %zmm1, %zmm0
86; AVX512VL-NEXT:    retq
87;
88; AVX512VLDQBW-LABEL: movmsk16:
89; AVX512VLDQBW:       ## %bb.0:
90; AVX512VLDQBW-NEXT:    vpmovb2m %xmm2, %k1
91; AVX512VLDQBW-NEXT:    vgetmantps $11, %zmm0, %zmm1 {%k1}
92; AVX512VLDQBW-NEXT:    vmovaps %zmm1, %zmm0
93; AVX512VLDQBW-NEXT:    retq
94  %a = icmp slt <16 x i8> %mask, zeroinitializer
95  %b = bitcast <16 x i1> %a to i16
96  %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %b, i32 4)
97  ret <16 x float> %res
98}
99
100; Similar to above but with fp types bitcasted to int for the slt.
101define <2 x double> @movmsk2_fp(<2 x double> %x0, <2 x double> %x2, <2 x double> %mask) {
102; AVX512VL-LABEL: movmsk2_fp:
103; AVX512VL:       ## %bb.0:
104; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
105; AVX512VL-NEXT:    vpcmpgtq %xmm2, %xmm3, %k1
106; AVX512VL-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1}
107; AVX512VL-NEXT:    vmovapd %xmm1, %xmm0
108; AVX512VL-NEXT:    retq
109;
110; AVX512VLDQBW-LABEL: movmsk2_fp:
111; AVX512VLDQBW:       ## %bb.0:
112; AVX512VLDQBW-NEXT:    vpmovq2m %xmm2, %k1
113; AVX512VLDQBW-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1}
114; AVX512VLDQBW-NEXT:    vmovapd %xmm1, %xmm0
115; AVX512VLDQBW-NEXT:    retq
116  %q = bitcast <2 x double> %mask to <2 x i64>
117  %a = icmp slt <2 x i64> %q, zeroinitializer
118  %b = bitcast <2 x i1> %a to i2
119  %c = zext i2 %b to i8
120  %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %c)
121  ret <2 x double> %res
122}
123
124define <4 x double> @movmsk4_fp(<4 x double> %x0, <4 x double> %x2, <4 x float> %mask) {
125; AVX512VL-LABEL: movmsk4_fp:
126; AVX512VL:       ## %bb.0:
127; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
128; AVX512VL-NEXT:    vpcmpgtd %xmm2, %xmm3, %k1
129; AVX512VL-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1}
130; AVX512VL-NEXT:    vmovapd %ymm1, %ymm0
131; AVX512VL-NEXT:    retq
132;
133; AVX512VLDQBW-LABEL: movmsk4_fp:
134; AVX512VLDQBW:       ## %bb.0:
135; AVX512VLDQBW-NEXT:    vpmovd2m %xmm2, %k1
136; AVX512VLDQBW-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1}
137; AVX512VLDQBW-NEXT:    vmovapd %ymm1, %ymm0
138; AVX512VLDQBW-NEXT:    retq
139  %q = bitcast <4 x float> %mask to <4 x i32>
140  %a = icmp slt <4 x i32> %q, zeroinitializer
141  %b = bitcast <4 x i1> %a to i4
142  %c = zext i4 %b to i8
143  %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %c)
144  ret <4 x double> %res
145}
146
147define <8 x double> @movmsk8_fp(<8 x double> %x0, <8 x double> %x2, <8 x float> %mask) {
148; AVX512VL-LABEL: movmsk8_fp:
149; AVX512VL:       ## %bb.0:
150; AVX512VL-NEXT:    vpxor %xmm3, %xmm3, %xmm3
151; AVX512VL-NEXT:    vpcmpgtd %ymm2, %ymm3, %k1
152; AVX512VL-NEXT:    vgetmantpd $11, %zmm0, %zmm1 {%k1}
153; AVX512VL-NEXT:    vmovapd %zmm1, %zmm0
154; AVX512VL-NEXT:    retq
155;
156; AVX512VLDQBW-LABEL: movmsk8_fp:
157; AVX512VLDQBW:       ## %bb.0:
158; AVX512VLDQBW-NEXT:    vpmovd2m %ymm2, %k1
159; AVX512VLDQBW-NEXT:    vgetmantpd $11, %zmm0, %zmm1 {%k1}
160; AVX512VLDQBW-NEXT:    vmovapd %zmm1, %zmm0
161; AVX512VLDQBW-NEXT:    retq
162  %q = bitcast <8 x float> %mask to <8 x i32>
163  %a = icmp slt <8 x i32> %q, zeroinitializer
164  %b = bitcast <8 x i1> %a to i8
165  %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %b, i32 4)
166  ret <8 x double> %res
167}
168