1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
6
7define <2 x i64>@test_int_x86_avx512_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1) {
8; CHECK-LABEL: test_int_x86_avx512_cvt_pd2qq_128:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vcvtpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7b,0xc0]
11; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
13  ret <2 x i64> %res
14}
15
16define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
17; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
18; X86:       # %bb.0:
19; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
20; X86-NEXT:    vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
21; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
22; X86-NEXT:    retl # encoding: [0xc3]
23;
24; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
25; X64:       # %bb.0:
26; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
27; X64-NEXT:    vcvtpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7b,0xc8]
28; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
29; X64-NEXT:    retq # encoding: [0xc3]
30  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
31  ret <2 x i64> %res
32}
33
34declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
35
36define <4 x i64>@test_int_x86_avx512_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1) {
37; CHECK-LABEL: test_int_x86_avx512_cvt_pd2qq_256:
38; CHECK:       # %bb.0:
39; CHECK-NEXT:    vcvtpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7b,0xc0]
40; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
41  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
42  ret <4 x i64> %res
43}
44
45define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
46; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
47; X86:       # %bb.0:
48; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
49; X86-NEXT:    vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
50; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
51; X86-NEXT:    retl # encoding: [0xc3]
52;
53; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
54; X64:       # %bb.0:
55; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
56; X64-NEXT:    vcvtpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7b,0xc8]
57; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
58; X64-NEXT:    retq # encoding: [0xc3]
59  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
60  ret <4 x i64> %res
61}
62
63declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
64
65define <2 x i64>@test_int_x86_avx512_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1) {
66; CHECK-LABEL: test_int_x86_avx512_cvt_pd2uqq_128:
67; CHECK:       # %bb.0:
68; CHECK-NEXT:    vcvtpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x79,0xc0]
69; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
70  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
71  ret <2 x i64> %res
72}
73
74define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
75; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
76; X86:       # %bb.0:
77; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
78; X86-NEXT:    vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
79; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
80; X86-NEXT:    retl # encoding: [0xc3]
81;
82; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
83; X64:       # %bb.0:
84; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
85; X64-NEXT:    vcvtpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x79,0xc8]
86; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
87; X64-NEXT:    retq # encoding: [0xc3]
88  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
89  ret <2 x i64> %res
90}
91
92declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
93
94define <4 x i64>@test_int_x86_avx512_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1) {
95; CHECK-LABEL: test_int_x86_avx512_cvt_pd2uqq_256:
96; CHECK:       # %bb.0:
97; CHECK-NEXT:    vcvtpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x79,0xc0]
98; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
99  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
100  ret <4 x i64> %res
101}
102
103define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
104; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
105; X86:       # %bb.0:
106; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
107; X86-NEXT:    vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
108; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
109; X86-NEXT:    retl # encoding: [0xc3]
110;
111; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
112; X64:       # %bb.0:
113; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
114; X64-NEXT:    vcvtpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x79,0xc8]
115; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
116; X64-NEXT:    retq # encoding: [0xc3]
117  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
118  ret <4 x i64> %res
119}
120
121declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
122
123define <2 x i64>@test_int_x86_avx512_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1) {
124; CHECK-LABEL: test_int_x86_avx512_cvt_ps2qq_128:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    vcvtps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0xc0]
127; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
128  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
129  ret <2 x i64> %res
130}
131
132define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
133; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
134; X86:       # %bb.0:
135; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
136; X86-NEXT:    vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
137; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
138; X86-NEXT:    retl # encoding: [0xc3]
139;
140; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
141; X64:       # %bb.0:
142; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
143; X64-NEXT:    vcvtps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0xc8]
144; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
145; X64-NEXT:    retq # encoding: [0xc3]
146  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
147  ret <2 x i64> %res
148}
149
150define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load(<2 x float>* %p) {
151; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
152; X86:       # %bb.0:
153; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
154; X86-NEXT:    vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
155; X86-NEXT:    retl # encoding: [0xc3]
156;
157; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load:
158; X64:       # %bb.0:
159; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
160; X64-NEXT:    retq # encoding: [0xc3]
161  %x0 = load <2 x float>, <2 x float>* %p
162  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
163  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
164  ret <2 x i64> %res
165}
166
167define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
168; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
169; X86:       # %bb.0:
170; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
171; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
172; X86-NEXT:    vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
173; X86-NEXT:    retl # encoding: [0xc3]
174;
175; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load:
176; X64:       # %bb.0:
177; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
178; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
179; X64-NEXT:    retq # encoding: [0xc3]
180  %x0 = load <2 x float>, <2 x float>* %p
181  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
182  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
183  ret <2 x i64> %res
184}
185
186define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load(<2 x float>* %p, i8 %mask) {
187; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
188; X86:       # %bb.0:
189; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
190; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
191; X86-NEXT:    vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
192; X86-NEXT:    retl # encoding: [0xc3]
193;
194; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load:
195; X64:       # %bb.0:
196; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
197; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
198; X64-NEXT:    retq # encoding: [0xc3]
199  %x0 = load <2 x float>, <2 x float>* %p
200  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
201  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
202  ret <2 x i64> %res
203}
204
205
206define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_2(<2 x float>* %p) {
207; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
208; X86:       # %bb.0:
209; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
210; X86-NEXT:    vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
211; X86-NEXT:    retl # encoding: [0xc3]
212;
213; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_2:
214; X64:       # %bb.0:
215; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
216; X64-NEXT:    retq # encoding: [0xc3]
217  %x0 = load <2 x float>, <2 x float>* %p
218  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
219  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
220  ret <2 x i64> %res
221}
222
223define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
224; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
225; X86:       # %bb.0:
226; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
227; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
228; X86-NEXT:    vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
229; X86-NEXT:    retl # encoding: [0xc3]
230;
231; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_2:
232; X64:       # %bb.0:
233; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
234; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
235; X64-NEXT:    retq # encoding: [0xc3]
236  %x0 = load <2 x float>, <2 x float>* %p
237  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
238  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
239  ret <2 x i64> %res
240}
241
242define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) {
243; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
244; X86:       # %bb.0:
245; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
246; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
247; X86-NEXT:    vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
248; X86-NEXT:    retl # encoding: [0xc3]
249;
250; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_2:
251; X64:       # %bb.0:
252; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
253; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
254; X64-NEXT:    retq # encoding: [0xc3]
255  %x0 = load <2 x float>, <2 x float>* %p
256  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
257  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
258  ret <2 x i64> %res
259}
260
261define <2 x i64> @test_int_x86_avx512_cvt_ps2qq_128_load_3(<4 x float>* %p) {
262; X86-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3:
263; X86:       # %bb.0:
264; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
265; X86-NEXT:    vcvtps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x00]
266; X86-NEXT:    retl # encoding: [0xc3]
267;
268; X64-LABEL: test_int_x86_avx512_cvt_ps2qq_128_load_3:
269; X64:       # %bb.0:
270; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7b,0x07]
271; X64-NEXT:    retq # encoding: [0xc3]
272  %x0 = load <4 x float>, <4 x float>* %p
273  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
274  ret <2 x i64> %res
275}
276
277define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
278; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3:
279; X86:       # %bb.0:
280; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
281; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
282; X86-NEXT:    vcvtps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x00]
283; X86-NEXT:    retl # encoding: [0xc3]
284;
285; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128_load_3:
286; X64:       # %bb.0:
287; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
288; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7b,0x07]
289; X64-NEXT:    retq # encoding: [0xc3]
290  %x0 = load <4 x float>, <4 x float>* %p
291  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
292  ret <2 x i64> %res
293}
294
295define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) {
296; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3:
297; X86:       # %bb.0:
298; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
299; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
300; X86-NEXT:    vcvtps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x00]
301; X86-NEXT:    retl # encoding: [0xc3]
302;
303; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2qq_128_load_3:
304; X64:       # %bb.0:
305; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
306; X64-NEXT:    vcvtps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7b,0x07]
307; X64-NEXT:    retq # encoding: [0xc3]
308  %x0 = load <4 x float>, <4 x float>* %p
309  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
310  ret <2 x i64> %res
311}
312
313declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
314
315define <4 x i64>@test_int_x86_avx512_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1) {
316; CHECK-LABEL: test_int_x86_avx512_cvt_ps2qq_256:
317; CHECK:       # %bb.0:
318; CHECK-NEXT:    vcvtps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7b,0xc0]
319; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
320  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
321  ret <4 x i64> %res
322}
323
324define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
325; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
326; X86:       # %bb.0:
327; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
328; X86-NEXT:    vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
329; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
330; X86-NEXT:    retl # encoding: [0xc3]
331;
332; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
333; X64:       # %bb.0:
334; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
335; X64-NEXT:    vcvtps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7b,0xc8]
336; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
337; X64-NEXT:    retq # encoding: [0xc3]
338  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
339  ret <4 x i64> %res
340}
341
342declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
343
344define <2 x i64>@test_int_x86_avx512_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1) {
345; CHECK-LABEL: test_int_x86_avx512_cvt_ps2uqq_128:
346; CHECK:       # %bb.0:
347; CHECK-NEXT:    vcvtps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0xc0]
348; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
349  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
350  ret <2 x i64> %res
351}
352
353define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
354; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
355; X86:       # %bb.0:
356; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
357; X86-NEXT:    vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
358; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
359; X86-NEXT:    retl # encoding: [0xc3]
360;
361; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
362; X64:       # %bb.0:
363; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
364; X64-NEXT:    vcvtps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0xc8]
365; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
366; X64-NEXT:    retq # encoding: [0xc3]
367  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
368  ret <2 x i64> %res
369}
370
371define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load(<2 x float>* %p) {
372; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
373; X86:       # %bb.0:
374; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
375; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
376; X86-NEXT:    retl # encoding: [0xc3]
377;
378; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load:
379; X64:       # %bb.0:
380; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
381; X64-NEXT:    retq # encoding: [0xc3]
382  %x0 = load <2 x float>, <2 x float>* %p
383  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
384  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
385  ret <2 x i64> %res
386}
387
388define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
389; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
390; X86:       # %bb.0:
391; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
392; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
393; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
394; X86-NEXT:    retl # encoding: [0xc3]
395;
396; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load:
397; X64:       # %bb.0:
398; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
399; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
400; X64-NEXT:    retq # encoding: [0xc3]
401  %x0 = load <2 x float>, <2 x float>* %p
402  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
403  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
404  ret <2 x i64> %res
405}
406
407define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) {
408; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
409; X86:       # %bb.0:
410; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
411; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
412; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
413; X86-NEXT:    retl # encoding: [0xc3]
414;
415; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load:
416; X64:       # %bb.0:
417; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
418; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
419; X64-NEXT:    retq # encoding: [0xc3]
420  %x0 = load <2 x float>, <2 x float>* %p
421  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
422  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
423  ret <2 x i64> %res
424}
425
426define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_2(<2 x float>* %p) {
427; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
428; X86:       # %bb.0:
429; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
430; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
431; X86-NEXT:    retl # encoding: [0xc3]
432;
433; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_2:
434; X64:       # %bb.0:
435; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
436; X64-NEXT:    retq # encoding: [0xc3]
437  %x0 = load <2 x float>, <2 x float>* %p
438  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
439  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
440  ret <2 x i64> %res
441}
442
443define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
444; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
445; X86:       # %bb.0:
446; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
447; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
448; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
449; X86-NEXT:    retl # encoding: [0xc3]
450;
451; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_2:
452; X64:       # %bb.0:
453; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
454; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
455; X64-NEXT:    retq # encoding: [0xc3]
456  %x0 = load <2 x float>, <2 x float>* %p
457  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
458  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
459  ret <2 x i64> %res
460}
461
462define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) {
463; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
464; X86:       # %bb.0:
465; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
466; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
467; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
468; X86-NEXT:    retl # encoding: [0xc3]
469;
470; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_2:
471; X64:       # %bb.0:
472; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
473; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
474; X64-NEXT:    retq # encoding: [0xc3]
475  %x0 = load <2 x float>, <2 x float>* %p
476  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
477  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
478  ret <2 x i64> %res
479}
480
481define <2 x i64> @test_int_x86_avx512_cvt_ps2uqq_128_load_3(<4 x float>* %p) {
482; X86-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3:
483; X86:       # %bb.0:
484; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
485; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x00]
486; X86-NEXT:    retl # encoding: [0xc3]
487;
488; X64-LABEL: test_int_x86_avx512_cvt_ps2uqq_128_load_3:
489; X64:       # %bb.0:
490; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x79,0x07]
491; X64-NEXT:    retq # encoding: [0xc3]
492  %x0 = load <4 x float>, <4 x float>* %p
493  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
494  ret <2 x i64> %res
495}
496
497define <2 x i64> @test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
498; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3:
499; X86:       # %bb.0:
500; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
501; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
502; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x00]
503; X86-NEXT:    retl # encoding: [0xc3]
504;
505; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128_load_3:
506; X64:       # %bb.0:
507; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
508; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x79,0x07]
509; X64-NEXT:    retq # encoding: [0xc3]
510  %x0 = load <4 x float>, <4 x float>* %p
511  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
512  ret <2 x i64> %res
513}
514
515define <2 x i64> @test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) {
516; X86-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3:
517; X86:       # %bb.0:
518; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
519; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
520; X86-NEXT:    vcvtps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x00]
521; X86-NEXT:    retl # encoding: [0xc3]
522;
523; X64-LABEL: test_int_x86_avx512_maskz_cvt_ps2uqq_128_load_3:
524; X64:       # %bb.0:
525; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
526; X64-NEXT:    vcvtps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x79,0x07]
527; X64-NEXT:    retq # encoding: [0xc3]
528  %x0 = load <4 x float>, <4 x float>* %p
529  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
530  ret <2 x i64> %res
531}
532
533declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
534
535define <4 x i64>@test_int_x86_avx512_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1) {
536; CHECK-LABEL: test_int_x86_avx512_cvt_ps2uqq_256:
537; CHECK:       # %bb.0:
538; CHECK-NEXT:    vcvtps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x79,0xc0]
539; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
540  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
541  ret <4 x i64> %res
542}
543
544define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
545; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
546; X86:       # %bb.0:
547; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
548; X86-NEXT:    vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
549; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
550; X86-NEXT:    retl # encoding: [0xc3]
551;
552; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
553; X64:       # %bb.0:
554; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
555; X64-NEXT:    vcvtps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x79,0xc8]
556; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
557; X64-NEXT:    retq # encoding: [0xc3]
558  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
559  ret <4 x i64> %res
560}
561
562declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
563
564define <4 x float>@test_int_x86_avx512_ask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1) {
565; CHECK-LABEL: test_int_x86_avx512_ask_cvt_qq2ps_128:
566; CHECK:       # %bb.0:
567; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
568; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
569  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
570  ret <4 x float> %res
571}
572
573define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
574; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
575; X86:       # %bb.0:
576; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
577; X86-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
578; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
579; X86-NEXT:    retl # encoding: [0xc3]
580;
581; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
582; X64:       # %bb.0:
583; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
584; X64-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
585; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
586; X64-NEXT:    retq # encoding: [0xc3]
587  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
588  ret <4 x float> %res
589}
590
591define <4 x float>@test_int_x86_avx512_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1) {
592; CHECK-LABEL: test_int_x86_avx512_cvt_qq2ps_128_zext:
593; CHECK:       # %bb.0:
594; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x08,0x5b,0xc0]
595; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
596  %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
597  %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
598  ret <4 x float> %res3
599}
600
601define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
602; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
603; X86:       # %bb.0:
604; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
605; X86-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
606; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
607; X86-NEXT:    retl # encoding: [0xc3]
608;
609; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128_zext:
610; X64:       # %bb.0:
611; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
612; X64-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x09,0x5b,0xc8]
613; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
614; X64-NEXT:    retq # encoding: [0xc3]
615  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
616  %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
617  ret <4 x float> %res1
618}
619
620
621define <4 x float>@test_int_x86_avx512_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1) {
622; CHECK-LABEL: test_int_x86_avx512_cvt_qq2ps_256:
623; CHECK:       # %bb.0:
624; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x5b,0xc0]
625; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
626; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
627  %cvt = sitofp <4 x i64> %x0 to <4 x float>
628  ret <4 x float> %cvt
629}
630
631define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
632; X86-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
633; X86:       # %bb.0:
634; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
635; X86-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
636; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
637; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
638; X86-NEXT:    retl # encoding: [0xc3]
639;
640; X64-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
641; X64:       # %bb.0:
642; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
643; X64-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x5b,0xc8]
644; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
645; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
646; X64-NEXT:    retq # encoding: [0xc3]
647  %cvt1 = sitofp <4 x i64> %x0 to <4 x float>
648  %1 = bitcast i8 %x2 to <8 x i1>
649  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
650  %2 = select <4 x i1> %extract, <4 x float> %cvt1, <4 x float> %x1
651  ret <4 x float> %2
652}
653
654declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
655
656define <2 x i64>@test_int_x86_avx512_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1) {
657; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2qq_128:
658; CHECK:       # %bb.0:
659; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0]
660; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
661  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
662  ret <2 x i64> %res
663}
664
665define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
666; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
667; X86:       # %bb.0:
668; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
669; X86-NEXT:    vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
670; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
671; X86-NEXT:    retl # encoding: [0xc3]
672;
673; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
674; X64:       # %bb.0:
675; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
676; X64-NEXT:    vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8]
677; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
678; X64-NEXT:    retq # encoding: [0xc3]
679  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
680  ret <2 x i64> %res
681}
682
683declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
684
685define <4 x i64>@test_int_x86_avx512_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1) {
686; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2qq_256:
687; CHECK:       # %bb.0:
688; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0]
689; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
690  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
691  ret <4 x i64> %res
692}
693
694define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
695; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
696; X86:       # %bb.0:
697; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
698; X86-NEXT:    vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
699; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
700; X86-NEXT:    retl # encoding: [0xc3]
701;
702; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
703; X64:       # %bb.0:
704; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
705; X64-NEXT:    vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8]
706; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
707; X64-NEXT:    retq # encoding: [0xc3]
708  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
709  ret <4 x i64> %res
710}
711
712declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
713
714define <2 x i64>@test_int_x86_avx512_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1) {
715; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2uqq_128:
716; CHECK:       # %bb.0:
717; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0]
718; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
719  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
720  ret <2 x i64> %res
721}
722
723define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
724; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
725; X86:       # %bb.0:
726; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
727; X86-NEXT:    vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
728; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
729; X86-NEXT:    retl # encoding: [0xc3]
730;
731; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
732; X64:       # %bb.0:
733; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
734; X64-NEXT:    vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8]
735; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
736; X64-NEXT:    retq # encoding: [0xc3]
737  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
738  ret <2 x i64> %res
739}
740
741declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
742
743define <4 x i64>@test_int_x86_avx512_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1) {
744; CHECK-LABEL: test_int_x86_avx512_cvtt_pd2uqq_256:
745; CHECK:       # %bb.0:
746; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0]
747; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
748  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
749  ret <4 x i64> %res
750}
751
752define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
753; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
754; X86:       # %bb.0:
755; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
756; X86-NEXT:    vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
757; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
758; X86-NEXT:    retl # encoding: [0xc3]
759;
760; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
761; X64:       # %bb.0:
762; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
763; X64-NEXT:    vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8]
764; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
765; X64-NEXT:    retq # encoding: [0xc3]
766  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
767  ret <4 x i64> %res
768}
769
770declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
771
772define <2 x i64>@test_int_x86_avx512_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1) {
773; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2qq_128:
774; CHECK:       # %bb.0:
775; CHECK-NEXT:    vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0]
776; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
777  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
778  ret <2 x i64> %res
779}
780
781define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
782; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
783; X86:       # %bb.0:
784; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
785; X86-NEXT:    vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
786; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
787; X86-NEXT:    retl # encoding: [0xc3]
788;
789; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
790; X64:       # %bb.0:
791; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
792; X64-NEXT:    vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8]
793; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
794; X64-NEXT:    retq # encoding: [0xc3]
795  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
796  ret <2 x i64> %res
797}
798
799define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load(<2 x float>* %p) {
800; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
801; X86:       # %bb.0:
802; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
803; X86-NEXT:    vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
804; X86-NEXT:    retl # encoding: [0xc3]
805;
806; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load:
807; X64:       # %bb.0:
808; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
809; X64-NEXT:    retq # encoding: [0xc3]
810  %x0 = load <2 x float>, <2 x float>* %p
811  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
812  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
813  ret <2 x i64> %res
814}
815
816define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
817; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
818; X86:       # %bb.0:
819; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
820; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
821; X86-NEXT:    vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
822; X86-NEXT:    retl # encoding: [0xc3]
823;
824; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load:
825; X64:       # %bb.0:
826; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
827; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
828; X64-NEXT:    retq # encoding: [0xc3]
829  %x0 = load <2 x float>, <2 x float>* %p
830  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
831  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
832  ret <2 x i64> %res
833}
834
835define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load(<2 x float>* %p, i8 %mask) {
836; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
837; X86:       # %bb.0:
838; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
839; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
840; X86-NEXT:    vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
841; X86-NEXT:    retl # encoding: [0xc3]
842;
843; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load:
844; X64:       # %bb.0:
845; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
846; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
847; X64-NEXT:    retq # encoding: [0xc3]
848  %x0 = load <2 x float>, <2 x float>* %p
849  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
850  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
851  ret <2 x i64> %res
852}
853
854
855define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_2(<2 x float>* %p) {
856; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
857; X86:       # %bb.0:
858; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
859; X86-NEXT:    vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
860; X86-NEXT:    retl # encoding: [0xc3]
861;
862; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_2:
863; X64:       # %bb.0:
864; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
865; X64-NEXT:    retq # encoding: [0xc3]
866  %x0 = load <2 x float>, <2 x float>* %p
867  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
868  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
869  ret <2 x i64> %res
870}
871
872define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
873; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
874; X86:       # %bb.0:
875; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
876; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
877; X86-NEXT:    vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
878; X86-NEXT:    retl # encoding: [0xc3]
879;
880; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2:
881; X64:       # %bb.0:
882; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
883; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
884; X64-NEXT:    retq # encoding: [0xc3]
885  %x0 = load <2 x float>, <2 x float>* %p
886  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
887  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
888  ret <2 x i64> %res
889}
890
891define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2(<2 x float>* %p, i8 %mask) {
892; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
893; X86:       # %bb.0:
894; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
895; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
896; X86-NEXT:    vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
897; X86-NEXT:    retl # encoding: [0xc3]
898;
899; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2:
900; X64:       # %bb.0:
901; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
902; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
903; X64-NEXT:    retq # encoding: [0xc3]
904  %x0 = load <2 x float>, <2 x float>* %p
905  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
906  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
907  ret <2 x i64> %res
908}
909
910define <2 x i64> @test_int_x86_avx512_cvtt_ps2qq_128_load_3(<4 x float>* %p) {
911; X86-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3:
912; X86:       # %bb.0:
913; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
914; X86-NEXT:    vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00]
915; X86-NEXT:    retl # encoding: [0xc3]
916;
917; X64-LABEL: test_int_x86_avx512_cvtt_ps2qq_128_load_3:
918; X64:       # %bb.0:
919; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07]
920; X64-NEXT:    retq # encoding: [0xc3]
921  %x0 = load <4 x float>, <4 x float>* %p
922  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
923  ret <2 x i64> %res
924}
925
926define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
927; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
928; X86:       # %bb.0:
929; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
930; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
931; X86-NEXT:    vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00]
932; X86-NEXT:    retl # encoding: [0xc3]
933;
934; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3:
935; X64:       # %bb.0:
936; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
937; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07]
938; X64-NEXT:    retq # encoding: [0xc3]
939  %x0 = load <4 x float>, <4 x float>* %p
940  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
941  ret <2 x i64> %res
942}
943
944define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3(<4 x float>* %p, i8 %mask) {
945; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
946; X86:       # %bb.0:
947; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
948; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
949; X86-NEXT:    vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00]
950; X86-NEXT:    retl # encoding: [0xc3]
951;
952; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3:
953; X64:       # %bb.0:
954; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
955; X64-NEXT:    vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07]
956; X64-NEXT:    retq # encoding: [0xc3]
957  %x0 = load <4 x float>, <4 x float>* %p
958  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
959  ret <2 x i64> %res
960}
961
962declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
963
964define <4 x i64>@test_int_x86_avx512_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1) {
965; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2qq_256:
966; CHECK:       # %bb.0:
967; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0]
968; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
969  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
970  ret <4 x i64> %res
971}
972
973define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
974; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
975; X86:       # %bb.0:
976; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
977; X86-NEXT:    vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
978; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
979; X86-NEXT:    retl # encoding: [0xc3]
980;
981; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
982; X64:       # %bb.0:
983; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
984; X64-NEXT:    vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8]
985; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
986; X64-NEXT:    retq # encoding: [0xc3]
987  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
988  ret <4 x i64> %res
989}
990
991declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
992
993define <4 x float>@test_int_x86_avx512_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1) {
994; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_128:
995; CHECK:       # %bb.0:
996; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
997; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
998  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
999  ret <4 x float> %res
1000}
1001
1002define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
1003; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
1004; X86:       # %bb.0:
1005; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1006; X86-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1007; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1008; X86-NEXT:    retl # encoding: [0xc3]
1009;
1010; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
1011; X64:       # %bb.0:
1012; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1013; X64-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1014; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1015; X64-NEXT:    retq # encoding: [0xc3]
1016  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
1017  ret <4 x float> %res
1018}
1019
1020define <4 x float>@test_int_x86_avx512_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1) {
1021; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_128_zext:
1022; CHECK:       # %bb.0:
1023; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x08,0x7a,0xc0]
1024; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1025  %res2 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
1026  %res3 = shufflevector <4 x float> %res2, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1027  ret <4 x float> %res3
1028}
1029
1030define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128_zext(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
1031; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
1032; X86:       # %bb.0:
1033; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1034; X86-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1035; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1036; X86-NEXT:    retl # encoding: [0xc3]
1037;
1038; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128_zext:
1039; X64:       # %bb.0:
1040; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1041; X64-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7a,0xc8]
1042; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1043; X64-NEXT:    retq # encoding: [0xc3]
1044  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
1045  %res1 = shufflevector <4 x float> %res, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
1046  ret <4 x float> %res1
1047}
1048
1049declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
1050
1051define <4 x float>@test_int_x86_avx512_cvt_uqq2ps_256(<4 x i64> %x0) {
1052; CHECK-LABEL: test_int_x86_avx512_cvt_uqq2ps_256:
1053; CHECK:       # %bb.0:
1054; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0 # encoding: [0x62,0xf1,0xff,0x28,0x7a,0xc0]
1055; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1056; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1057  %cvt = uitofp <4 x i64> %x0 to <4 x float>
1058  ret <4 x float> %cvt
1059}
1060
1061define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
1062; X86-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
1063; X86:       # %bb.0:
1064; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1065; X86-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
1066; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1067; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1068; X86-NEXT:    retl # encoding: [0xc3]
1069;
1070; X64-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
1071; X64:       # %bb.0:
1072; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1073; X64-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7a,0xc8]
1074; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1075; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1076; X64-NEXT:    retq # encoding: [0xc3]
1077  %cvt = uitofp <4 x i64> %x0 to <4 x float>
1078  %1 = bitcast i8 %x2 to <8 x i1>
1079  %extract = shufflevector <8 x i1> %1, <8 x i1> %1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1080  %2 = select <4 x i1> %extract, <4 x float> %cvt, <4 x float> %x1
1081  ret <4 x float> %2
1082}
1083
1084declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
1085
1086define <2 x i64>@test_int_x86_avx512_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1) {
1087; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128:
1088; CHECK:       # %bb.0:
1089; CHECK-NEXT:    vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0]
1090; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1091  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
1092  ret <2 x i64> %res
1093}
1094
1095define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
1096; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
1097; X86:       # %bb.0:
1098; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1099; X86-NEXT:    vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
1100; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1101; X86-NEXT:    retl # encoding: [0xc3]
1102;
1103; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
1104; X64:       # %bb.0:
1105; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1106; X64-NEXT:    vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8]
1107; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
1108; X64-NEXT:    retq # encoding: [0xc3]
1109  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
1110  ret <2 x i64> %res
1111}
1112
1113define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load(<2 x float>* %p) {
1114; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
1115; X86:       # %bb.0:
1116; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1117; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1118; X86-NEXT:    retl # encoding: [0xc3]
1119;
1120; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load:
1121; X64:       # %bb.0:
1122; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1123; X64-NEXT:    retq # encoding: [0xc3]
1124  %x0 = load <2 x float>, <2 x float>* %p
1125  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1126  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
1127  ret <2 x i64> %res
1128}
1129
1130define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1131; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
1132; X86:       # %bb.0:
1133; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1134; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1135; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1136; X86-NEXT:    retl # encoding: [0xc3]
1137;
1138; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load:
1139; X64:       # %bb.0:
1140; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1141; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1142; X64-NEXT:    retq # encoding: [0xc3]
1143  %x0 = load <2 x float>, <2 x float>* %p
1144  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1145  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
1146  ret <2 x i64> %res
1147}
1148
1149define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load(<2 x float>* %p, i8 %mask) {
1150; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
1151; X86:       # %bb.0:
1152; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1153; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1154; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1155; X86-NEXT:    retl # encoding: [0xc3]
1156;
1157; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load:
1158; X64:       # %bb.0:
1159; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1160; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1161; X64-NEXT:    retq # encoding: [0xc3]
1162  %x0 = load <2 x float>, <2 x float>* %p
1163  %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1164  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
1165  ret <2 x i64> %res
1166}
1167
1168
1169define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_2(<2 x float>* %p) {
1170; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
1171; X86:       # %bb.0:
1172; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1173; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1174; X86-NEXT:    retl # encoding: [0xc3]
1175;
1176; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_2:
1177; X64:       # %bb.0:
1178; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1179; X64-NEXT:    retq # encoding: [0xc3]
1180  %x0 = load <2 x float>, <2 x float>* %p
1181  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1182  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> undef, i8 -1)
1183  ret <2 x i64> %res
1184}
1185
1186define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2(<2 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1187; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
1188; X86:       # %bb.0:
1189; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1190; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1191; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1192; X86-NEXT:    retl # encoding: [0xc3]
1193;
1194; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2:
1195; X64:       # %bb.0:
1196; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1197; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1198; X64-NEXT:    retq # encoding: [0xc3]
1199  %x0 = load <2 x float>, <2 x float>* %p
1200  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1201  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> %passthru, i8 %mask)
1202  ret <2 x i64> %res
1203}
1204
1205define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2(<2 x float>* %p, i8 %mask) {
1206; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
1207; X86:       # %bb.0:
1208; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1209; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1210; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1211; X86-NEXT:    retl # encoding: [0xc3]
1212;
1213; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2:
1214; X64:       # %bb.0:
1215; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1216; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1217; X64-NEXT:    retq # encoding: [0xc3]
1218  %x0 = load <2 x float>, <2 x float>* %p
1219  %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1220  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0b, <2 x i64> zeroinitializer, i8 %mask)
1221  ret <2 x i64> %res
1222}
1223
1224define <2 x i64> @test_int_x86_avx512_cvtt_ps2uqq_128_load_3(<4 x float>* %p) {
1225; X86-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3:
1226; X86:       # %bb.0:
1227; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1228; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00]
1229; X86-NEXT:    retl # encoding: [0xc3]
1230;
1231; X64-LABEL: test_int_x86_avx512_cvtt_ps2uqq_128_load_3:
1232; X64:       # %bb.0:
1233; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07]
1234; X64-NEXT:    retq # encoding: [0xc3]
1235  %x0 = load <4 x float>, <4 x float>* %p
1236  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> undef, i8 -1)
1237  ret <2 x i64> %res
1238}
1239
1240define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3(<4 x float>* %p, <2 x i64> %passthru, i8 %mask) {
1241; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
1242; X86:       # %bb.0:
1243; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1244; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1245; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00]
1246; X86-NEXT:    retl # encoding: [0xc3]
1247;
1248; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3:
1249; X64:       # %bb.0:
1250; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1251; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07]
1252; X64-NEXT:    retq # encoding: [0xc3]
1253  %x0 = load <4 x float>, <4 x float>* %p
1254  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask)
1255  ret <2 x i64> %res
1256}
1257
1258define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3(<4 x float>* %p, i8 %mask) {
1259; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
1260; X86:       # %bb.0:
1261; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
1262; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08]
1263; X86-NEXT:    vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00]
1264; X86-NEXT:    retl # encoding: [0xc3]
1265;
1266; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3:
1267; X64:       # %bb.0:
1268; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
1269; X64-NEXT:    vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07]
1270; X64-NEXT:    retq # encoding: [0xc3]
1271  %x0 = load <4 x float>, <4 x float>* %p
1272  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask)
1273  ret <2 x i64> %res
1274}
1275
1276declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
1277
1278define <4 x i64>@test_int_x86_avx512_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1) {
1279; CHECK-LABEL: test_int_x86_avx512_cvtt_ps2uqq_256:
1280; CHECK:       # %bb.0:
1281; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0]
1282; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1283  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
1284  ret <4 x i64> %res
1285}
1286
1287define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
1288; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
1289; X86:       # %bb.0:
1290; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1291; X86-NEXT:    vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
1292; X86-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1293; X86-NEXT:    retl # encoding: [0xc3]
1294;
1295; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
1296; X64:       # %bb.0:
1297; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1298; X64-NEXT:    vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8]
1299; X64-NEXT:    vmovaps %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc1]
1300; X64-NEXT:    retq # encoding: [0xc3]
1301  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
1302  ret <4 x i64> %res
1303}
1304
1305declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
1306
1307define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
1308; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
1309; X86:       # %bb.0:
1310; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1311; X86-NEXT:    vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
1312; X86-NEXT:    vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
1313; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1314; X86-NEXT:    retl # encoding: [0xc3]
1315;
1316; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_128:
1317; X64:       # %bb.0:
1318; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1319; X64-NEXT:    vreducepd $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x56,0xc8,0x04]
1320; X64-NEXT:    vreducepd $8, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x56,0xc0,0x08]
1321; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
1322; X64-NEXT:    retq # encoding: [0xc3]
1323  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
1324  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
1325  %res2 = fadd <2 x double> %res, %res1
1326  ret <2 x double> %res2
1327}
1328
1329declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
1330
1331define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
1332; X86-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
1333; X86:       # %bb.0:
1334; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1335; X86-NEXT:    vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
1336; X86-NEXT:    vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
1337; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1338; X86-NEXT:    retl # encoding: [0xc3]
1339;
1340; X64-LABEL: test_int_x86_avx512_mask_reduce_pd_256:
1341; X64:       # %bb.0:
1342; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1343; X64-NEXT:    vreducepd $4, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x56,0xc8,0x04]
1344; X64-NEXT:    vreducepd $0, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x56,0xc0,0x00]
1345; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
1346; X64-NEXT:    retq # encoding: [0xc3]
1347  %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
1348  %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
1349  %res2 = fadd <4 x double> %res, %res1
1350  ret <4 x double> %res2
1351}
1352
1353declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
1354
1355define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
1356; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
1357; X86:       # %bb.0:
1358; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1359; X86-NEXT:    vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
1360; X86-NEXT:    vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
1361; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1362; X86-NEXT:    retl # encoding: [0xc3]
1363;
1364; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_128:
1365; X64:       # %bb.0:
1366; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1367; X64-NEXT:    vreduceps $4, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x56,0xc8,0x04]
1368; X64-NEXT:    vreduceps $88, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x56,0xc0,0x58]
1369; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
1370; X64-NEXT:    retq # encoding: [0xc3]
1371  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
1372  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
1373  %res2 = fadd <4 x float> %res, %res1
1374  ret <4 x float> %res2
1375}
1376
1377declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
1378
1379define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
1380; X86-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
1381; X86:       # %bb.0:
1382; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1383; X86-NEXT:    vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
1384; X86-NEXT:    vreduceps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0c]
1385; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1386; X86-NEXT:    retl # encoding: [0xc3]
1387;
1388; X64-LABEL: test_int_x86_avx512_mask_reduce_ps_256:
1389; X64:       # %bb.0:
1390; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1391; X64-NEXT:    vreduceps $11, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x56,0xc8,0x0b]
1392; X64-NEXT:    vreduceps $12, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x56,0xc0,0x0c]
1393; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
1394; X64-NEXT:    retq # encoding: [0xc3]
1395  %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
1396  %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 12, <8 x float> %x2, i8 -1)
1397  %res2 = fadd <8 x float> %res, %res1
1398  ret <8 x float> %res2
1399}
1400
1401declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
1402
1403define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
1404; X86-LABEL: test_int_x86_avx512_mask_range_pd_128:
1405; X86:       # %bb.0:
1406; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1407; X86-NEXT:    vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
1408; X86-NEXT:    vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
1409; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1410; X86-NEXT:    retl # encoding: [0xc3]
1411;
1412; X64-LABEL: test_int_x86_avx512_mask_range_pd_128:
1413; X64:       # %bb.0:
1414; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1415; X64-NEXT:    vrangepd $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x50,0xd1,0x04]
1416; X64-NEXT:    vrangepd $8, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x50,0xc1,0x08]
1417; X64-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
1418; X64-NEXT:    retq # encoding: [0xc3]
1419  %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
1420  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
1421  %res2 = fadd <2 x double> %res, %res1
1422  ret <2 x double> %res2
1423}
1424
1425declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
1426
1427define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
1428; X86-LABEL: test_int_x86_avx512_mask_range_pd_256:
1429; X86:       # %bb.0:
1430; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1431; X86-NEXT:    vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
1432; X86-NEXT:    vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
1433; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1434; X86-NEXT:    retl # encoding: [0xc3]
1435;
1436; X64-LABEL: test_int_x86_avx512_mask_range_pd_256:
1437; X64:       # %bb.0:
1438; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1439; X64-NEXT:    vrangepd $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x50,0xd1,0x04]
1440; X64-NEXT:    vrangepd $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0xfd,0x28,0x50,0xc1,0x58]
1441; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
1442; X64-NEXT:    retq # encoding: [0xc3]
1443  %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
1444  %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
1445  %res2 = fadd <4 x double> %res, %res1
1446  ret <4 x double> %res2
1447}
1448
1449declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
1450
1451define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
1452; X86-LABEL: test_int_x86_avx512_mask_range_ps_128:
1453; X86:       # %bb.0:
1454; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1455; X86-NEXT:    vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
1456; X86-NEXT:    vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
1457; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1458; X86-NEXT:    retl # encoding: [0xc3]
1459;
1460; X64-LABEL: test_int_x86_avx512_mask_range_ps_128:
1461; X64:       # %bb.0:
1462; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1463; X64-NEXT:    vrangeps $4, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x50,0xd1,0x04]
1464; X64-NEXT:    vrangeps $88, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x50,0xc1,0x58]
1465; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
1466; X64-NEXT:    retq # encoding: [0xc3]
1467  %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
1468  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
1469  %res2 = fadd <4 x float> %res, %res1
1470  ret <4 x float> %res2
1471}
1472
1473declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
1474
1475define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
1476; X86-LABEL: test_int_x86_avx512_mask_range_ps_256:
1477; X86:       # %bb.0:
1478; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04]
1479; X86-NEXT:    vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
1480; X86-NEXT:    vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
1481; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1482; X86-NEXT:    retl # encoding: [0xc3]
1483;
1484; X64-LABEL: test_int_x86_avx512_mask_range_ps_256:
1485; X64:       # %bb.0:
1486; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1487; X64-NEXT:    vrangeps $4, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x50,0xd1,0x04]
1488; X64-NEXT:    vrangeps $88, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x50,0xc1,0x58]
1489; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
1490; X64-NEXT:    retq # encoding: [0xc3]
1491  %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
1492  %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
1493  %res2 = fadd <8 x float> %res, %res1
1494  ret <8 x float> %res2
1495}
1496
1497declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
1498
1499define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
1500; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
1501; CHECK:       # %bb.0:
1502; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
1503; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
1504; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1505; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1506; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1507  %res = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 2)
1508  %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float> %x0, i32 4)
1509  %1 = and <4 x i1> %res1, %res
1510  %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1511  %3 = bitcast <8 x i1> %2 to i8
1512  ret i8 %3
1513}
1514
1515declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
1516
1517define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
1518; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
1519; CHECK:       # %bb.0:
1520; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
1521; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
1522; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1523; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1524; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1525; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1526  %res = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 2)
1527  %res1 = call <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float> %x0, i32 4)
1528  %1 = and <8 x i1> %res1, %res
1529  %2 = bitcast <8 x i1> %1 to i8
1530  ret i8 %2
1531}
1532
1533declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
1534
1535define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
1536; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
1537; CHECK:       # %bb.0:
1538; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
1539; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
1540; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1541; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1542; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1543  %res = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 4)
1544  %res1 = call <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double> %x0, i32 2)
1545  %1 = and <2 x i1> %res1, %res
1546  %2 = shufflevector <2 x i1> %1, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
1547  %3 = bitcast <8 x i1> %2 to i8
1548  ret i8 %3
1549}
1550
1551declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
1552
1553define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
1554; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
1555; CHECK:       # %bb.0:
1556; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
1557; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
1558; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
1559; CHECK-NEXT:    # kill: def $al killed $al killed $eax
1560; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
1561; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
1562  %res = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 2)
1563  %res1 = call <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double> %x0, i32 4)
1564  %1 = and <4 x i1> %res1, %res
1565  %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1566  %3 = bitcast <8 x i1> %2 to i8
1567  ret i8 %3
1568}
1569