1; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
2
3declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
4; CHECK-LABEL: test_kortestz
5; CHECK: kortestw
6; CHECK: sete
7define i32 @test_kortestz(i16 %a0, i16 %a1) {
8  %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
9  ret i32 %res
10}
11
12declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
13; CHECK-LABEL: test_kortestc
14; CHECK: kortestw
15; CHECK: sbbl
16define i32 @test_kortestc(i16 %a0, i16 %a1) {
17  %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
18  ret i32 %res
19}
20
21declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
22; CHECK-LABEL: test_kand
23; CHECK: kandw
24; CHECK: kandw
25define i16 @test_kand(i16 %a0, i16 %a1) {
26  %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
27  %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
28  ret i16 %t2
29}
30
31declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
32; CHECK-LABEL: test_knot
33; CHECK: knotw
34define i16 @test_knot(i16 %a0) {
35  %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
36  ret i16 %res
37}
38
39declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
40
41; CHECK-LABEL: unpckbw_test
42; CHECK: kunpckbw
43; CHECK:ret
44define i16 @unpckbw_test(i16 %a0, i16 %a1) {
45  %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
46  ret i16 %res
47}
48
49define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
50  ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
51  %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
52  ret <16 x float> %res
53}
54declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
55
56define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
57  ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
58  %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
59  ret <8 x double> %res
60}
61declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
62
63declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
64
65define <8 x double> @test7(<8 x double> %a) {
66; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
67  %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
68  ret <8 x double>%res
69}
70
71declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
72
73define <16 x float> @test8(<16 x float> %a) {
74; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
75  %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
76  ret <16 x float>%res
77}
78
79define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
80  ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
81  %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
82  ret <16 x float> %res
83}
84declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
85
86define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
87  ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
88  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
89  ret <4 x float> %res
90}
91declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
92
93define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
94  ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
95  %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
96  ret <4 x float> %res
97}
98declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
99
100define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
101  ; CHECK: vsqrtpd
102  %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
103  ret <8 x double> %res
104}
105declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
106
107define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
108  ; CHECK: vsqrtps
109  %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
110  ret <16 x float> %res
111}
112declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
113
114define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
115  ; CHECK: vsqrtss {{.*}}encoding: [0x62
116  %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
117  ret <4 x float> %res
118}
119declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
120
121define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
122  ; CHECK: vsqrtsd {{.*}}encoding: [0x62
123  %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
124  ret <2 x double> %res
125}
126declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
127
128define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
129  ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
130  %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
131  ret i64 %res
132}
133declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
134
135define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
136  ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
137  %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
138  ret <2 x double> %res
139}
140declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
141
142define <2 x double> @test_x86_avx512_cvtusi642sd(<2 x double> %a0, i64 %a1) {
143  ; CHECK: vcvtusi2sdq {{.*}}encoding: [0x62
144  %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
145  ret <2 x double> %res
146}
147declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64) nounwind readnone
148
149define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
150  ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
151  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
152  ret i64 %res
153}
154declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
155
156
157define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
158  ; CHECK: vcvtss2si {{.*}}encoding: [0x62
159  %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
160  ret i64 %res
161}
162declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
163
164
165define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
166  ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
167  %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
168  ret <4 x float> %res
169}
170declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
171
172
173define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
174  ; CHECK: vcvttss2si {{.*}}encoding: [0x62
175  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
176  ret i64 %res
177}
178declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
179
180define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
181  ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
182  %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
183  ret i64 %res
184}
185declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
186
187define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
188  ; CHECK: vcvtph2ps  %ymm0, %zmm0    ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
189  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
190  ret <16 x float> %res
191}
192declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
193
194
195define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
196  ; CHECK: vcvtps2ph $2, %zmm0, %ymm0  ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
197  %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
198  ret <16 x i16> %res
199}
200
201declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
202
203define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
204  ; CHECK: vbroadcastss
205  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
206  ret <16 x float> %res
207}
208declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
209
210define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
211  ; CHECK: vbroadcastsd
212  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
213  ret <8 x double> %res
214}
215declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
216
217define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
218  ; CHECK: vbroadcastss
219  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
220  ret <16 x float> %res
221}
222declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
223
224define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
225  ; CHECK: vbroadcastsd
226  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
227  ret <8 x double> %res
228}
229declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
230
231define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32>  %a0) {
232  ; CHECK: vpbroadcastd
233  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
234  ret <16 x i32> %res
235}
236declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
237
238define <16 x i32> @test_x86_pbroadcastd_i32_512(i32  %a0) {
239  ; CHECK: vpbroadcastd
240  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
241  ret <16 x i32> %res
242}
243declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
244
245define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
246  ; CHECK: vpbroadcastq
247  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
248  ret <8 x i64> %res
249}
250declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
251
252define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
253  ; CHECK: vpbroadcastq
254  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
255  ret <8 x i64> %res
256}
257declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
258
259define <16 x i32> @test_conflict_d(<16 x i32> %a) {
260  ; CHECK: movw $-1, %ax
261  ; CHECK: vpxor
262  ; CHECK: vpconflictd
263  %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
264  ret <16 x i32> %res
265}
266
267declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
268
269define <8 x i64> @test_conflict_q(<8 x i64> %a) {
270  ; CHECK: movb $-1, %al
271  ; CHECK: vpxor
272  ; CHECK: vpconflictq
273  %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
274  ret <8 x i64> %res
275}
276
277declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
278
279define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
280  ; CHECK: vpconflictd
281  %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
282  ret <16 x i32> %res
283}
284
285define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
286  ; CHECK: vpconflictq
287  %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
288  ret <8 x i64> %res
289}
290
291define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
292  ; CHECK: movw $-1, %ax
293  ; CHECK: vpxor
294  ; CHECK: vplzcntd
295  %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
296  ret <16 x i32> %res
297}
298
299declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
300
301define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
302  ; CHECK: movb $-1, %al
303  ; CHECK: vpxor
304  ; CHECK: vplzcntq
305  %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
306  ret <8 x i64> %res
307}
308
309declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
310
311
312define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
313  ; CHECK: vplzcntd
314  %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
315  ret <16 x i32> %res
316}
317
318define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
319  ; CHECK: vplzcntq
320  %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
321  ret <8 x i64> %res
322}
323
324define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
325  ; CHECK-LABEL: test_ctlz_d
326  ; CHECK: vplzcntd
327  %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
328  ret <16 x i32> %res
329}
330
331declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
332
333define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
334  ; CHECK-LABEL: test_ctlz_q
335  ; CHECK: vplzcntq
336  %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
337  ret <8 x i64> %res
338}
339
340declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
341
342define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
343  ; CHECK: vblendmps %zmm1, %zmm0
344  %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
345  ret <16 x float> %res
346}
347
348declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
349
350define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
351  ; CHECK: vblendmpd %zmm1, %zmm0
352  %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
353  ret <8 x double> %res
354}
355
356define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
357  ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
358  ; CHECK: vblendmpd (%
359  %b = load <8 x double>* %ptr
360  %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
361  ret <8 x double> %res
362}
363declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
364
365define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
366  ; CHECK: vpblendmd
367  %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
368  ret <16 x i32> %res
369}
370declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
371
372define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
373  ; CHECK: vpblendmq
374  %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
375  ret <8 x i64> %res
376}
377declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
378
379 define <8 x i32> @test_cvtpd2udq(<8 x double> %a) {
380 ;CHECK: vcvtpd2udq {ru-sae}{{.*}}encoding: [0x62,0xf1,0xfc,0x58,0x79,0xc0]
381  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %a, <8 x i32>zeroinitializer, i8 -1, i32 2)
382  ret <8 x i32>%res
383 }
384 declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
385
386 define <16 x i32> @test_cvtps2udq(<16 x float> %a) {
387 ;CHECK: vcvtps2udq {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x79,0xc0]
388  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %a, <16 x i32>zeroinitializer, i16 -1, i32 1)
389  ret <16 x i32>%res
390 }
391 declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
392
393 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
394 ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
395   %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
396   ret i16 %res
397 }
398 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
399
400 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
401 ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
402   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
403   ret i8 %res
404 }
405 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
406
407 ; cvt intrinsics
408 define <16 x float> @test_cvtdq2ps(<16 x i32> %a) {
409 ;CHECK: vcvtdq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x38,0x5b,0xc0]
410  %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
411  ret <16 x float>%res
412 }
413 declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
414
415 define <16 x float> @test_cvtudq2ps(<16 x i32> %a) {
416 ;CHECK: vcvtudq2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0x7f,0x38,0x7a,0xc0]
417  %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %a, <16 x float>zeroinitializer, i16 -1, i32 1)
418  ret <16 x float>%res
419 }
420 declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
421
422 define <8 x double> @test_cvtdq2pd(<8 x i32> %a) {
423 ;CHECK: vcvtdq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xc0]
424  %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
425  ret <8 x double>%res
426 }
427 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
428
429 define <8 x double> @test_cvtudq2pd(<8 x i32> %a) {
430 ;CHECK: vcvtudq2pd {{.*}}encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xc0]
431  %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %a, <8 x double>zeroinitializer, i8 -1)
432  ret <8 x double>%res
433 }
434 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
435
436 ; fp min - max
437define <16 x float> @test_vmaxps(<16 x float> %a0, <16 x float> %a1) {
438  ; CHECK: vmaxps
439  %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1,
440                    <16 x float>zeroinitializer, i16 -1, i32 4)
441  ret <16 x float> %res
442}
443declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>,
444                    <16 x float>, i16, i32)
445
446define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
447  ; CHECK: vmaxpd
448  %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
449                    <8 x double>zeroinitializer, i8 -1, i32 4)
450  ret <8 x double> %res
451}
452declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>,
453                    <8 x double>, i8, i32)
454
455define <16 x float> @test_vminps(<16 x float> %a0, <16 x float> %a1) {
456  ; CHECK: vminps
457  %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1,
458                    <16 x float>zeroinitializer, i16 -1, i32 4)
459  ret <16 x float> %res
460}
461declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>,
462                    <16 x float>, i16, i32)
463
464define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
465  ; CHECK: vminpd
466  %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
467                    <8 x double>zeroinitializer, i8 -1, i32 4)
468  ret <8 x double> %res
469}
470declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
471                    <8 x double>, i8, i32)
472
473 define <8 x float> @test_cvtpd2ps(<8 x double> %a) {
474 ;CHECK: vcvtpd2ps {rd-sae}{{.*}}encoding: [0x62,0xf1,0xfd,0x38,0x5a,0xc0]
475  %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %a, <8 x float>zeroinitializer, i8 -1, i32 1)
476  ret <8 x float>%res
477 }
478 declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
479
480 define <16 x i32> @test_pabsd(<16 x i32> %a) {
481 ;CHECK: vpabsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xc0]
482 %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %a, <16 x i32>zeroinitializer, i16 -1)
483 ret < 16 x i32> %res
484 }
485 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
486
487 define <8 x i64> @test_pabsq(<8 x i64> %a) {
488 ;CHECK: vpabsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xc0]
489 %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %a, <8 x i64>zeroinitializer, i8 -1)
490 ret <8 x i64> %res
491 }
492 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
493
494define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
495  ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
496  %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
497                    <8 x i64>zeroinitializer, i8 -1)
498  ret <8 x i64> %res
499}
500declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
501
502define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
503  ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
504  %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
505                    <16 x i32>zeroinitializer, i16 -1)
506  ret <16 x i32> %res
507}
508declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
509
510define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
511  ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
512  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
513                    <16 x i32>zeroinitializer, i16 -1)
514  ret <16 x i32> %res
515}
516declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
517
518define <8 x i64> @test_vpmuludq(<16 x i32> %a0, <16 x i32> %a1) {
519  ; CHECK: vpmuludq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
520  %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a0, <16 x i32> %a1,
521                    <8 x i64>zeroinitializer, i8 -1)
522  ret <8 x i64> %res
523}
524declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
525
526define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
527  ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
528  %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
529  ret i8 %res
530}
531declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
532
533define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
534  ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
535  %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
536  ret i16 %res
537}
538declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
539
540define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
541; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
542  call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
543  ret void
544}
545
546declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
547
548define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
549; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
550  call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
551  ret void
552}
553
554declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8 )
555
556define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) {
557; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1]
558  %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1)
559  ret <16 x float> %res
560}
561
562define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) {
563; CHECK-LABEL: test_vpermt2ps_mask:
564; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1]
565  %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask)
566  ret <16 x float> %res
567}
568
569declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
570
571define <8 x i64> @test_vmovntdqa(i8 *%x) {
572; CHECK-LABEL: test_vmovntdqa:
573; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
574  %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x)
575  ret <8 x i64> %res
576}
577
578declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
579
580define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
581; CHECK-LABEL: test_valign_q:
582; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
583  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
584  ret <8 x i64> %res
585}
586
587define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
588; CHECK-LABEL: test_mask_valign_q:
589; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
590  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
591  ret <8 x i64> %res
592}
593
594declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
595
596define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
597; CHECK-LABEL: test_maskz_valign_d:
598; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
599  %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
600  ret <16 x i32> %res
601}
602
603declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
604
605define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
606 ; CHECK-LABEL: test_mask_store_ss
607 ; CHECK: vmovss %xmm0, (%rdi) {%k1}     ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
608 call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
609 ret void
610}
611
612declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
613
614define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
615; CHECK-LABEL: test_pcmpeq_d
616; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
617  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
618  ret i16 %res
619}
620
621define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
622; CHECK-LABEL: test_mask_pcmpeq_d
623; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
624  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
625  ret i16 %res
626}
627
628declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
629
630define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
631; CHECK-LABEL: test_pcmpeq_q
632; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
633  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
634  ret i8 %res
635}
636
637define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
638; CHECK-LABEL: test_mask_pcmpeq_q
639; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
640  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
641  ret i8 %res
642}
643
644declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
645
646define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
647; CHECK-LABEL: test_pcmpgt_d
648; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
649  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
650  ret i16 %res
651}
652
653define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
654; CHECK-LABEL: test_mask_pcmpgt_d
655; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
656  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
657  ret i16 %res
658}
659
660declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
661
662define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
663; CHECK-LABEL: test_pcmpgt_q
664; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
665  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
666  ret i8 %res
667}
668
669define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
670; CHECK-LABEL: test_mask_pcmpgt_q
671; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
672  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
673  ret i8 %res
674}
675
676declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
677
678define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
679; CHECK_LABEL: test_cmp_d_512
680; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
681  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
682  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
683; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
684  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
685  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
686; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
687  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
688  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
689; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
690  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
691  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
692; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
693  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
694  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
695; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
696  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
697  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
698; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
699  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
700  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
701; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
702  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
703  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
704  ret <8 x i16> %vec7
705}
706
707define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
708; CHECK_LABEL: test_mask_cmp_d_512
709; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
710  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
711  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
712; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
713  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
714  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
715; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
716  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
717  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
718; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
719  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
720  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
721; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
722  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
723  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
724; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
725  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
726  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
727; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
728  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
729  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
730; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
731  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
732  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
733  ret <8 x i16> %vec7
734}
735
736declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
737
738define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
739; CHECK_LABEL: test_ucmp_d_512
740; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
741  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
742  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
743; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
744  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
745  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
746; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
747  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
748  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
749; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
750  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
751  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
752; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
753  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
754  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
755; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
756  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
757  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
758; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
759  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
760  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
761; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
762  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
763  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
764  ret <8 x i16> %vec7
765}
766
767define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
768; CHECK_LABEL: test_mask_ucmp_d_512
769; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
770  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
771  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
772; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
773  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
774  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
775; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
776  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
777  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
778; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
779  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
780  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
781; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
782  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
783  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
784; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
785  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
786  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
787; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
788  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
789  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
790; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
791  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
792  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
793  ret <8 x i16> %vec7
794}
795
796declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
797
798define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
799; CHECK_LABEL: test_cmp_q_512
800; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
801  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
802  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
803; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
804  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
805  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
806; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
807  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
808  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
809; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
810  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
811  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
812; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
813  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
814  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
815; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
816  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
817  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
818; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
819  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
820  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
821; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
822  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
823  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
824  ret <8 x i8> %vec7
825}
826
827define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
828; CHECK_LABEL: test_mask_cmp_q_512
829; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
830  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
831  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
832; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
833  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
834  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
835; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
836  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
837  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
838; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
839  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
840  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
841; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
842  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
843  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
844; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
845  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
846  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
847; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
848  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
849  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
850; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
851  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
852  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
853  ret <8 x i8> %vec7
854}
855
856declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
857
858define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
859; CHECK_LABEL: test_ucmp_q_512
860; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
861  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
862  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
863; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
864  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
865  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
866; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
867  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
868  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
869; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
870  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
871  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
872; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
873  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
874  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
875; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
876  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
877  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
878; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
879  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
880  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
881; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
882  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
883  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
884  ret <8 x i8> %vec7
885}
886
887define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
888; CHECK_LABEL: test_mask_ucmp_q_512
889; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
890  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
891  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
892; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
893  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
894  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
895; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
896  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
897  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
898; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
899  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
900  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
901; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
902  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
903  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
904; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
905  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
906  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
907; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
908  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
909  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
910; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
911  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
912  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
913  ret <8 x i8> %vec7
914}
915
916declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
917
918define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
919; CHECK-LABEL: test_mask_vextractf32x4:
920; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
921  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
922  ret <4 x float> %res
923}
924
925declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
926
927define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
928; CHECK-LABEL: test_mask_vextracti64x4:
929; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
930  %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
931  ret <4 x i64> %res
932}
933
934declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
935
936define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
937; CHECK-LABEL: test_maskz_vextracti32x4:
938; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
939  %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
940  ret <4 x i32> %res
941}
942
943declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
944
945define <4 x double> @test_vextractf64x4(<8 x double> %a) {
946; CHECK-LABEL: test_vextractf64x4:
947; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
948  %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
949  ret <4 x double> %res
950}
951
952declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
953
954define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
955  ; CHECK-LABEL: test_x86_avx512_pslli_d
956  ; CHECK: vpslld
957  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
958  ret <16 x i32> %res
959}
960
961define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
962  ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
963  ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
964  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
965  ret <16 x i32> %res
966}
967
968define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
969  ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
970  ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
971  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
972  ret <16 x i32> %res
973}
974
975declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
976
977define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
978  ; CHECK-LABEL: test_x86_avx512_pslli_q
979  ; CHECK: vpsllq
980  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
981  ret <8 x i64> %res
982}
983
984define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
985  ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
986  ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
987  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
988  ret <8 x i64> %res
989}
990
991define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
992  ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
993  ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
994  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
995  ret <8 x i64> %res
996}
997
998declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
999
1000define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
1001  ; CHECK-LABEL: test_x86_avx512_psrli_d
1002  ; CHECK: vpsrld
1003  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1004  ret <16 x i32> %res
1005}
1006
1007define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1008  ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
1009  ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
1010  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1011  ret <16 x i32> %res
1012}
1013
1014define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
1015  ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
1016  ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
1017  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1018  ret <16 x i32> %res
1019}
1020
1021declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1022
1023define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
1024  ; CHECK-LABEL: test_x86_avx512_psrli_q
1025  ; CHECK: vpsrlq
1026  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1027  ret <8 x i64> %res
1028}
1029
1030define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1031  ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
1032  ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
1033  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1034  ret <8 x i64> %res
1035}
1036
1037define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
1038  ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
1039  ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
1040  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1041  ret <8 x i64> %res
1042}
1043
1044declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1045
1046define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
1047  ; CHECK-LABEL: test_x86_avx512_psrai_d
1048  ; CHECK: vpsrad
1049  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
1050  ret <16 x i32> %res
1051}
1052
1053define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1054  ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
1055  ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
1056  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
1057  ret <16 x i32> %res
1058}
1059
1060define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
1061  ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
1062  ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
1063  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
1064  ret <16 x i32> %res
1065}
1066
1067declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
1068
1069define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
1070  ; CHECK-LABEL: test_x86_avx512_psrai_q
1071  ; CHECK: vpsraq
1072  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
1073  ret <8 x i64> %res
1074}
1075
1076define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1077  ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
1078  ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
1079  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
1080  ret <8 x i64> %res
1081}
1082
1083define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
1084  ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
1085  ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
1086  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
1087  ret <8 x i64> %res
1088}
1089
1090declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
1091
1092define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
1093  ; CHECK-LABEL: test_x86_avx512_psll_d
1094  ; CHECK: vpslld
1095  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1096  ret <16 x i32> %res
1097}
1098
1099define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1100  ; CHECK-LABEL: test_x86_avx512_mask_psll_d
1101  ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
1102  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1103  ret <16 x i32> %res
1104}
1105
1106define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1107  ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
1108  ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
1109  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1110  ret <16 x i32> %res
1111}
1112
1113declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1114
1115define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
1116  ; CHECK-LABEL: test_x86_avx512_psll_q
1117  ; CHECK: vpsllq
1118  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1119  ret <8 x i64> %res
1120}
1121
1122define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1123  ; CHECK-LABEL: test_x86_avx512_mask_psll_q
1124  ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
1125  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1126  ret <8 x i64> %res
1127}
1128
1129define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1130  ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
1131  ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
1132  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1133  ret <8 x i64> %res
1134}
1135
1136declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1137
1138define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
1139  ; CHECK-LABEL: test_x86_avx512_psrl_d
1140  ; CHECK: vpsrld
1141  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1142  ret <16 x i32> %res
1143}
1144
1145define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1146  ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
1147  ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
1148  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1149  ret <16 x i32> %res
1150}
1151
1152define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1153  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
1154  ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
1155  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1156  ret <16 x i32> %res
1157}
1158
1159declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1160
1161define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
1162  ; CHECK-LABEL: test_x86_avx512_psrl_q
1163  ; CHECK: vpsrlq
1164  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1165  ret <8 x i64> %res
1166}
1167
1168define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1169  ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
1170  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
1171  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1172  ret <8 x i64> %res
1173}
1174
1175define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1176  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
1177  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
1178  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1179  ret <8 x i64> %res
1180}
1181
1182declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1183
1184define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
1185  ; CHECK-LABEL: test_x86_avx512_psra_d
1186  ; CHECK: vpsrad
1187  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1188  ret <16 x i32> %res
1189}
1190
1191define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1192  ; CHECK-LABEL: test_x86_avx512_mask_psra_d
1193  ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
1194  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
1195  ret <16 x i32> %res
1196}
1197
1198define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
1199  ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
1200  ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
1201  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1202  ret <16 x i32> %res
1203}
1204
1205declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
1206
1207define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
1208  ; CHECK-LABEL: test_x86_avx512_psra_q
1209  ; CHECK: vpsraq
1210  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1211  ret <8 x i64> %res
1212}
1213
1214define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1215  ; CHECK-LABEL: test_x86_avx512_mask_psra_q
1216  ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
1217  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
1218  ret <8 x i64> %res
1219}
1220
1221define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
1222  ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
1223  ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
1224  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1225  ret <8 x i64> %res
1226}
1227
1228declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
1229
1230define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
1231  ; CHECK-LABEL: test_x86_avx512_psllv_d
1232  ; CHECK: vpsllvd
1233  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1234  ret <16 x i32> %res
1235}
1236
1237define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1238  ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
1239  ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
1240  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1241  ret <16 x i32> %res
1242}
1243
1244define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1245  ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
1246  ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1247  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1248  ret <16 x i32> %res
1249}
1250
1251declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1252
1253define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
1254  ; CHECK-LABEL: test_x86_avx512_psllv_q
1255  ; CHECK: vpsllvq
1256  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1257  ret <8 x i64> %res
1258}
1259
1260define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1261  ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
1262  ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
1263  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1264  ret <8 x i64> %res
1265}
1266
1267define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1268  ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
1269  ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1270  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1271  ret <8 x i64> %res
1272}
1273
1274declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1275
1276
1277define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
1278  ; CHECK-LABEL: test_x86_avx512_psrav_d
1279  ; CHECK: vpsravd
1280  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1281  ret <16 x i32> %res
1282}
1283
1284define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1285  ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
1286  ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
1287  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1288  ret <16 x i32> %res
1289}
1290
1291define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1292  ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
1293  ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
1294  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1295  ret <16 x i32> %res
1296}
1297
1298declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1299
1300define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
1301  ; CHECK-LABEL: test_x86_avx512_psrav_q
1302  ; CHECK: vpsravq
1303  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1304  ret <8 x i64> %res
1305}
1306
1307define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1308  ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
1309  ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
1310  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1311  ret <8 x i64> %res
1312}
1313
1314define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1315  ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
1316  ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
1317  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1318  ret <8 x i64> %res
1319}
1320
1321declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1322
1323define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
1324  ; CHECK-LABEL: test_x86_avx512_psrlv_d
1325  ; CHECK: vpsrlvd
1326  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
1327  ret <16 x i32> %res
1328}
1329
1330define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
1331  ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
1332  ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
1333  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
1334  ret <16 x i32> %res
1335}
1336
1337define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
1338  ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
1339  ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
1340  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
1341  ret <16 x i32> %res
1342}
1343
1344declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
1345
1346define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
1347  ; CHECK-LABEL: test_x86_avx512_psrlv_q
1348  ; CHECK: vpsrlvq
1349  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
1350  ret <8 x i64> %res
1351}
1352
1353define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
1354  ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
1355  ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
1356  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
1357  ret <8 x i64> %res
1358}
1359
1360define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
1361  ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
1362  ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
1363  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
1364  ret <8 x i64> %res
1365}
1366
1367declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
1368
1369define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
1370  ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
1371  ; CHECK: vpsrlvq (%
1372  %b = load <8 x i64>* %ptr
1373  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
1374  ret <8 x i64> %res
1375}
1376