1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c
6
7define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
8; X86-LABEL: test_mm512_mask_compress_epi16:
9; X86:       # %bb.0: # %entry
10; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
11; X86-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
12; X86-NEXT:    retl
13;
14; X64-LABEL: test_mm512_mask_compress_epi16:
15; X64:       # %bb.0: # %entry
16; X64-NEXT:    kmovd %edi, %k1
17; X64-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
18; X64-NEXT:    retq
19entry:
20  %0 = bitcast <8 x i64> %__D to <32 x i16>
21  %1 = bitcast <8 x i64> %__S to <32 x i16>
22  %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
23  %3 = bitcast <32 x i16> %2 to <8 x i64>
24  ret <8 x i64> %3
25}
26
27define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) {
28; X86-LABEL: test_mm512_maskz_compress_epi16:
29; X86:       # %bb.0: # %entry
30; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
31; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
32; X86-NEXT:    retl
33;
34; X64-LABEL: test_mm512_maskz_compress_epi16:
35; X64:       # %bb.0: # %entry
36; X64-NEXT:    kmovd %edi, %k1
37; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
38; X64-NEXT:    retq
39entry:
40  %0 = bitcast <8 x i64> %__D to <32 x i16>
41  %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
42  %2 = bitcast <32 x i16> %1 to <8 x i64>
43  ret <8 x i64> %2
44}
45
46define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
47; X86-LABEL: test_mm512_mask_compress_epi8:
48; X86:       # %bb.0: # %entry
49; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
50; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
51; X86-NEXT:    kunpckdq %k1, %k0, %k1
52; X86-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
53; X86-NEXT:    retl
54;
55; X64-LABEL: test_mm512_mask_compress_epi8:
56; X64:       # %bb.0: # %entry
57; X64-NEXT:    kmovq %rdi, %k1
58; X64-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
59; X64-NEXT:    retq
60entry:
61  %0 = bitcast <8 x i64> %__D to <64 x i8>
62  %1 = bitcast <8 x i64> %__S to <64 x i8>
63  %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
64  %3 = bitcast <64 x i8> %2 to <8 x i64>
65  ret <8 x i64> %3
66}
67
68define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) {
69; X86-LABEL: test_mm512_maskz_compress_epi8:
70; X86:       # %bb.0: # %entry
71; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
72; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
73; X86-NEXT:    kunpckdq %k1, %k0, %k1
74; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
75; X86-NEXT:    retl
76;
77; X64-LABEL: test_mm512_maskz_compress_epi8:
78; X64:       # %bb.0: # %entry
79; X64-NEXT:    kmovq %rdi, %k1
80; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
81; X64-NEXT:    retq
82entry:
83  %0 = bitcast <8 x i64> %__D to <64 x i8>
84  %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
85  %2 = bitcast <64 x i8> %1 to <8 x i64>
86  ret <8 x i64> %2
87}
88
89define void @test_mm512_mask_compressstoreu_epi16(i8* %__P, i32 %__U, <8 x i64> %__D) {
90; X86-LABEL: test_mm512_mask_compressstoreu_epi16:
91; X86:       # %bb.0: # %entry
92; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
93; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
94; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1}
95; X86-NEXT:    vzeroupper
96; X86-NEXT:    retl
97;
98; X64-LABEL: test_mm512_mask_compressstoreu_epi16:
99; X64:       # %bb.0: # %entry
100; X64-NEXT:    kmovd %esi, %k1
101; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1}
102; X64-NEXT:    vzeroupper
103; X64-NEXT:    retq
104entry:
105  %0 = bitcast <8 x i64> %__D to <32 x i16>
106  %1 = bitcast i8* %__P to i16*
107  %2 = bitcast i32 %__U to <32 x i1>
108  tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, i16* %1, <32 x i1> %2)
109  ret void
110}
111
112define void @test_mm512_mask_compressstoreu_epi8(i8* %__P, i64 %__U, <8 x i64> %__D) {
113; X86-LABEL: test_mm512_mask_compressstoreu_epi8:
114; X86:       # %bb.0: # %entry
115; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
116; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
117; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
118; X86-NEXT:    kunpckdq %k1, %k0, %k1
119; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1}
120; X86-NEXT:    vzeroupper
121; X86-NEXT:    retl
122;
123; X64-LABEL: test_mm512_mask_compressstoreu_epi8:
124; X64:       # %bb.0: # %entry
125; X64-NEXT:    kmovq %rsi, %k1
126; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1}
127; X64-NEXT:    vzeroupper
128; X64-NEXT:    retq
129entry:
130  %0 = bitcast <8 x i64> %__D to <64 x i8>
131  %1 = bitcast i64 %__U to <64 x i1>
132  tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, i8* %__P, <64 x i1> %1)
133  ret void
134}
135
136define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
137; X86-LABEL: test_mm512_mask_expand_epi16:
138; X86:       # %bb.0: # %entry
139; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
140; X86-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
141; X86-NEXT:    retl
142;
143; X64-LABEL: test_mm512_mask_expand_epi16:
144; X64:       # %bb.0: # %entry
145; X64-NEXT:    kmovd %edi, %k1
146; X64-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
147; X64-NEXT:    retq
148entry:
149  %0 = bitcast <8 x i64> %__D to <32 x i16>
150  %1 = bitcast <8 x i64> %__S to <32 x i16>
151  %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
152  %3 = bitcast <32 x i16> %2 to <8 x i64>
153  ret <8 x i64> %3
154}
155
156define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) {
157; X86-LABEL: test_mm512_maskz_expand_epi16:
158; X86:       # %bb.0: # %entry
159; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
160; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
161; X86-NEXT:    retl
162;
163; X64-LABEL: test_mm512_maskz_expand_epi16:
164; X64:       # %bb.0: # %entry
165; X64-NEXT:    kmovd %edi, %k1
166; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
167; X64-NEXT:    retq
168entry:
169  %0 = bitcast <8 x i64> %__D to <32 x i16>
170  %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
171  %2 = bitcast <32 x i16> %1 to <8 x i64>
172  ret <8 x i64> %2
173}
174
175define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
176; X86-LABEL: test_mm512_mask_expand_epi8:
177; X86:       # %bb.0: # %entry
178; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
179; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
180; X86-NEXT:    kunpckdq %k1, %k0, %k1
181; X86-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
182; X86-NEXT:    retl
183;
184; X64-LABEL: test_mm512_mask_expand_epi8:
185; X64:       # %bb.0: # %entry
186; X64-NEXT:    kmovq %rdi, %k1
187; X64-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
188; X64-NEXT:    retq
189entry:
190  %0 = bitcast <8 x i64> %__D to <64 x i8>
191  %1 = bitcast <8 x i64> %__S to <64 x i8>
192  %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
193  %3 = bitcast <64 x i8> %2 to <8 x i64>
194  ret <8 x i64> %3
195}
196
197define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) {
198; X86-LABEL: test_mm512_maskz_expand_epi8:
199; X86:       # %bb.0: # %entry
200; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
201; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
202; X86-NEXT:    kunpckdq %k1, %k0, %k1
203; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
204; X86-NEXT:    retl
205;
206; X64-LABEL: test_mm512_maskz_expand_epi8:
207; X64:       # %bb.0: # %entry
208; X64-NEXT:    kmovq %rdi, %k1
209; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
210; X64-NEXT:    retq
211entry:
212  %0 = bitcast <8 x i64> %__D to <64 x i8>
213  %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
214  %2 = bitcast <64 x i8> %1 to <8 x i64>
215  ret <8 x i64> %2
216}
217
218define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, i8* readonly %__P) {
219; X86-LABEL: test_mm512_mask_expandloadu_epi16:
220; X86:       # %bb.0: # %entry
221; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
222; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
223; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1}
224; X86-NEXT:    retl
225;
226; X64-LABEL: test_mm512_mask_expandloadu_epi16:
227; X64:       # %bb.0: # %entry
228; X64-NEXT:    kmovd %edi, %k1
229; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1}
230; X64-NEXT:    retq
231entry:
232  %0 = bitcast <8 x i64> %__S to <32 x i16>
233  %1 = bitcast i8* %__P to i16*
234  %2 = bitcast i32 %__U to <32 x i1>
235  %3 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %0)
236  %4 = bitcast <32 x i16> %3 to <8 x i64>
237  ret <8 x i64> %4
238}
239
240define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P) {
241; X86-LABEL: test_mm512_maskz_expandloadu_epi16:
242; X86:       # %bb.0: # %entry
243; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
244; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
245; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z}
246; X86-NEXT:    retl
247;
248; X64-LABEL: test_mm512_maskz_expandloadu_epi16:
249; X64:       # %bb.0: # %entry
250; X64-NEXT:    kmovd %edi, %k1
251; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1} {z}
252; X64-NEXT:    retq
253entry:
254  %0 = bitcast i8* %__P to i16*
255  %1 = bitcast i32 %__U to <32 x i1>
256  %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %0, <32 x i1> %1, <32 x i16> zeroinitializer)
257  %3 = bitcast <32 x i16> %2 to <8 x i64>
258  ret <8 x i64> %3
259}
260
261define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8* readonly %__P) {
262; X86-LABEL: test_mm512_mask_expandloadu_epi8:
263; X86:       # %bb.0: # %entry
264; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
265; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
266; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
267; X86-NEXT:    kunpckdq %k1, %k0, %k1
268; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1}
269; X86-NEXT:    retl
270;
271; X64-LABEL: test_mm512_mask_expandloadu_epi8:
272; X64:       # %bb.0: # %entry
273; X64-NEXT:    kmovq %rdi, %k1
274; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1}
275; X64-NEXT:    retq
276entry:
277  %0 = bitcast <8 x i64> %__S to <64 x i8>
278  %1 = bitcast i64 %__U to <64 x i1>
279  %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %1, <64 x i8> %0)
280  %3 = bitcast <64 x i8> %2 to <8 x i64>
281  ret <8 x i64> %3
282}
283
284define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P) {
285; X86-LABEL: test_mm512_maskz_expandloadu_epi8:
286; X86:       # %bb.0: # %entry
287; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
288; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
289; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
290; X86-NEXT:    kunpckdq %k1, %k0, %k1
291; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z}
292; X86-NEXT:    retl
293;
294; X64-LABEL: test_mm512_maskz_expandloadu_epi8:
295; X64:       # %bb.0: # %entry
296; X64-NEXT:    kmovq %rdi, %k1
297; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1} {z}
298; X64-NEXT:    retq
299entry:
300  %0 = bitcast i64 %__U to <64 x i1>
301  %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
302  %2 = bitcast <64 x i8> %1 to <8 x i64>
303  ret <8 x i64> %2
304}
305
306define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
307; X86-LABEL: test_mm512_mask_shldi_epi64:
308; X86:       # %bb.0: # %entry
309; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
310; X86-NEXT:    kmovd %eax, %k1
311; X86-NEXT:    vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
312; X86-NEXT:    retl
313;
314; X64-LABEL: test_mm512_mask_shldi_epi64:
315; X64:       # %bb.0: # %entry
316; X64-NEXT:    kmovd %edi, %k1
317; X64-NEXT:    vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
318; X64-NEXT:    retq
319entry:
320  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
321  %1 = bitcast i8 %__U to <8 x i1>
322  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
323  ret <8 x i64> %2
324}
325
326declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
327
328define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
329; X86-LABEL: test_mm512_maskz_shldi_epi64:
330; X86:       # %bb.0: # %entry
331; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
332; X86-NEXT:    kmovd %eax, %k1
333; X86-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
334; X86-NEXT:    retl
335;
336; X64-LABEL: test_mm512_maskz_shldi_epi64:
337; X64:       # %bb.0: # %entry
338; X64-NEXT:    kmovd %edi, %k1
339; X64-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
340; X64-NEXT:    retq
341entry:
342  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
343  %1 = bitcast i8 %__U to <8 x i1>
344  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
345  ret <8 x i64> %2
346}
347
348define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
349; CHECK-LABEL: test_mm512_shldi_epi64:
350; CHECK:       # %bb.0: # %entry
351; CHECK-NEXT:    vpshldq $31, %zmm1, %zmm0, %zmm0
352; CHECK-NEXT:    ret{{[l|q]}}
353entry:
354  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
355  ret <8 x i64> %0
356}
357
358define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
359; X86-LABEL: test_mm512_mask_shldi_epi32:
360; X86:       # %bb.0: # %entry
361; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
362; X86-NEXT:    vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
363; X86-NEXT:    retl
364;
365; X64-LABEL: test_mm512_mask_shldi_epi32:
366; X64:       # %bb.0: # %entry
367; X64-NEXT:    kmovd %edi, %k1
368; X64-NEXT:    vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
369; X64-NEXT:    retq
370entry:
371  %0 = bitcast <8 x i64> %__A to <16 x i32>
372  %1 = bitcast <8 x i64> %__B to <16 x i32>
373  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
374  %3 = bitcast <8 x i64> %__S to <16 x i32>
375  %4 = bitcast i16 %__U to <16 x i1>
376  %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
377  %6 = bitcast <16 x i32> %5 to <8 x i64>
378  ret <8 x i64> %6
379}
380
381declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
382
383define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
384; X86-LABEL: test_mm512_maskz_shldi_epi32:
385; X86:       # %bb.0: # %entry
386; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
387; X86-NEXT:    vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
388; X86-NEXT:    retl
389;
390; X64-LABEL: test_mm512_maskz_shldi_epi32:
391; X64:       # %bb.0: # %entry
392; X64-NEXT:    kmovd %edi, %k1
393; X64-NEXT:    vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
394; X64-NEXT:    retq
395entry:
396  %0 = bitcast <8 x i64> %__A to <16 x i32>
397  %1 = bitcast <8 x i64> %__B to <16 x i32>
398  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
399  %3 = bitcast i16 %__U to <16 x i1>
400  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
401  %5 = bitcast <16 x i32> %4 to <8 x i64>
402  ret <8 x i64> %5
403}
404
405define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
406; CHECK-LABEL: test_mm512_shldi_epi32:
407; CHECK:       # %bb.0: # %entry
408; CHECK-NEXT:    vpshldd $31, %zmm1, %zmm0, %zmm0
409; CHECK-NEXT:    ret{{[l|q]}}
410entry:
411  %0 = bitcast <8 x i64> %__A to <16 x i32>
412  %1 = bitcast <8 x i64> %__B to <16 x i32>
413  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
414  %3 = bitcast <16 x i32> %2 to <8 x i64>
415  ret <8 x i64> %3
416}
417
418define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
419; X86-LABEL: test_mm512_mask_shldi_epi16:
420; X86:       # %bb.0: # %entry
421; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
422; X86-NEXT:    vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
423; X86-NEXT:    retl
424;
425; X64-LABEL: test_mm512_mask_shldi_epi16:
426; X64:       # %bb.0: # %entry
427; X64-NEXT:    kmovd %edi, %k1
428; X64-NEXT:    vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
429; X64-NEXT:    retq
430entry:
431  %0 = bitcast <8 x i64> %__A to <32 x i16>
432  %1 = bitcast <8 x i64> %__B to <32 x i16>
433  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
434  %3 = bitcast <8 x i64> %__S to <32 x i16>
435  %4 = bitcast i32 %__U to <32 x i1>
436  %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
437  %6 = bitcast <32 x i16> %5 to <8 x i64>
438  ret <8 x i64> %6
439}
440
441declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
442
443define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
444; X86-LABEL: test_mm512_maskz_shldi_epi16:
445; X86:       # %bb.0: # %entry
446; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
447; X86-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
448; X86-NEXT:    retl
449;
450; X64-LABEL: test_mm512_maskz_shldi_epi16:
451; X64:       # %bb.0: # %entry
452; X64-NEXT:    kmovd %edi, %k1
453; X64-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
454; X64-NEXT:    retq
455entry:
456  %0 = bitcast <8 x i64> %__A to <32 x i16>
457  %1 = bitcast <8 x i64> %__B to <32 x i16>
458  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
459  %3 = bitcast i32 %__U to <32 x i1>
460  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
461  %5 = bitcast <32 x i16> %4 to <8 x i64>
462  ret <8 x i64> %5
463}
464
465define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
466; CHECK-LABEL: test_mm512_shldi_epi16:
467; CHECK:       # %bb.0: # %entry
468; CHECK-NEXT:    vpshldw $15, %zmm1, %zmm0, %zmm0
469; CHECK-NEXT:    ret{{[l|q]}}
470entry:
471  %0 = bitcast <8 x i64> %__A to <32 x i16>
472  %1 = bitcast <8 x i64> %__B to <32 x i16>
473  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
474  %3 = bitcast <32 x i16> %2 to <8 x i64>
475  ret <8 x i64> %3
476}
477
478define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
479; X86-LABEL: test_mm512_mask_shrdi_epi64:
480; X86:       # %bb.0: # %entry
481; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
482; X86-NEXT:    kmovd %eax, %k1
483; X86-NEXT:    vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
484; X86-NEXT:    retl
485;
486; X64-LABEL: test_mm512_mask_shrdi_epi64:
487; X64:       # %bb.0: # %entry
488; X64-NEXT:    kmovd %edi, %k1
489; X64-NEXT:    vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
490; X64-NEXT:    retq
491entry:
492  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
493  %1 = bitcast i8 %__U to <8 x i1>
494  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
495  ret <8 x i64> %2
496}
497
498declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)
499
500define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
501; X86-LABEL: test_mm512_maskz_shrdi_epi64:
502; X86:       # %bb.0: # %entry
503; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
504; X86-NEXT:    kmovd %eax, %k1
505; X86-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
506; X86-NEXT:    retl
507;
508; X64-LABEL: test_mm512_maskz_shrdi_epi64:
509; X64:       # %bb.0: # %entry
510; X64-NEXT:    kmovd %edi, %k1
511; X64-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
512; X64-NEXT:    retq
513entry:
514  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
515  %1 = bitcast i8 %__U to <8 x i1>
516  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
517  ret <8 x i64> %2
518}
519
520define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
521; CHECK-LABEL: test_mm512_shrdi_epi64:
522; CHECK:       # %bb.0: # %entry
523; CHECK-NEXT:    vpshrdq $31, %zmm1, %zmm0, %zmm0
524; CHECK-NEXT:    ret{{[l|q]}}
525entry:
526  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
527  ret <8 x i64> %0
528}
529
530define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
531; X86-LABEL: test_mm512_mask_shrdi_epi32:
532; X86:       # %bb.0: # %entry
533; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
534; X86-NEXT:    vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
535; X86-NEXT:    retl
536;
537; X64-LABEL: test_mm512_mask_shrdi_epi32:
538; X64:       # %bb.0: # %entry
539; X64-NEXT:    kmovd %edi, %k1
540; X64-NEXT:    vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
541; X64-NEXT:    retq
542entry:
543  %0 = bitcast <8 x i64> %__A to <16 x i32>
544  %1 = bitcast <8 x i64> %__B to <16 x i32>
545  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
546  %3 = bitcast <8 x i64> %__S to <16 x i32>
547  %4 = bitcast i16 %__U to <16 x i1>
548  %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
549  %6 = bitcast <16 x i32> %5 to <8 x i64>
550  ret <8 x i64> %6
551}
552
553declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)
554
555define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
556; X86-LABEL: test_mm512_maskz_shrdi_epi32:
557; X86:       # %bb.0: # %entry
558; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
559; X86-NEXT:    vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
560; X86-NEXT:    retl
561;
562; X64-LABEL: test_mm512_maskz_shrdi_epi32:
563; X64:       # %bb.0: # %entry
564; X64-NEXT:    kmovd %edi, %k1
565; X64-NEXT:    vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
566; X64-NEXT:    retq
567entry:
568  %0 = bitcast <8 x i64> %__A to <16 x i32>
569  %1 = bitcast <8 x i64> %__B to <16 x i32>
570  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
571  %3 = bitcast i16 %__U to <16 x i1>
572  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
573  %5 = bitcast <16 x i32> %4 to <8 x i64>
574  ret <8 x i64> %5
575}
576
577define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
578; CHECK-LABEL: test_mm512_shrdi_epi32:
579; CHECK:       # %bb.0: # %entry
580; CHECK-NEXT:    vpshrdd $31, %zmm1, %zmm0, %zmm0
581; CHECK-NEXT:    ret{{[l|q]}}
582entry:
583  %0 = bitcast <8 x i64> %__A to <16 x i32>
584  %1 = bitcast <8 x i64> %__B to <16 x i32>
585  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
586  %3 = bitcast <16 x i32> %2 to <8 x i64>
587  ret <8 x i64> %3
588}
589
590define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
591; X86-LABEL: test_mm512_mask_shrdi_epi16:
592; X86:       # %bb.0: # %entry
593; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
594; X86-NEXT:    vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
595; X86-NEXT:    retl
596;
597; X64-LABEL: test_mm512_mask_shrdi_epi16:
598; X64:       # %bb.0: # %entry
599; X64-NEXT:    kmovd %edi, %k1
600; X64-NEXT:    vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
601; X64-NEXT:    retq
602entry:
603  %0 = bitcast <8 x i64> %__A to <32 x i16>
604  %1 = bitcast <8 x i64> %__B to <32 x i16>
605  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
606  %3 = bitcast <8 x i64> %__S to <32 x i16>
607  %4 = bitcast i32 %__U to <32 x i1>
608  %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
609  %6 = bitcast <32 x i16> %5 to <8 x i64>
610  ret <8 x i64> %6
611}
612
613declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)
614
615define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
616; X86-LABEL: test_mm512_maskz_shrdi_epi16:
617; X86:       # %bb.0: # %entry
618; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
619; X86-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
620; X86-NEXT:    retl
621;
622; X64-LABEL: test_mm512_maskz_shrdi_epi16:
623; X64:       # %bb.0: # %entry
624; X64-NEXT:    kmovd %edi, %k1
625; X64-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
626; X64-NEXT:    retq
627entry:
628  %0 = bitcast <8 x i64> %__A to <32 x i16>
629  %1 = bitcast <8 x i64> %__B to <32 x i16>
630  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
631  %3 = bitcast i32 %__U to <32 x i1>
632  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
633  %5 = bitcast <32 x i16> %4 to <8 x i64>
634  ret <8 x i64> %5
635}
636
637define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
638; CHECK-LABEL: test_mm512_shrdi_epi16:
639; CHECK:       # %bb.0: # %entry
640; CHECK-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0
641; CHECK-NEXT:    ret{{[l|q]}}
642entry:
643  %0 = bitcast <8 x i64> %__A to <32 x i16>
644  %1 = bitcast <8 x i64> %__B to <32 x i16>
645  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
646  %3 = bitcast <32 x i16> %2 to <8 x i64>
647  ret <8 x i64> %3
648}
649
650define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
651; X86-LABEL: test_mm512_mask_shldv_epi64:
652; X86:       # %bb.0: # %entry
653; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
654; X86-NEXT:    kmovd %eax, %k1
655; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
656; X86-NEXT:    retl
657;
658; X64-LABEL: test_mm512_mask_shldv_epi64:
659; X64:       # %bb.0: # %entry
660; X64-NEXT:    kmovd %edi, %k1
661; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
662; X64-NEXT:    retq
663entry:
664  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
665  %1 = bitcast i8 %__U to <8 x i1>
666  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
667  ret <8 x i64> %2
668}
669
670define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
671; X86-LABEL: test_mm512_maskz_shldv_epi64:
672; X86:       # %bb.0: # %entry
673; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
674; X86-NEXT:    kmovd %eax, %k1
675; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
676; X86-NEXT:    retl
677;
678; X64-LABEL: test_mm512_maskz_shldv_epi64:
679; X64:       # %bb.0: # %entry
680; X64-NEXT:    kmovd %edi, %k1
681; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
682; X64-NEXT:    retq
683entry:
684  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
685  %1 = bitcast i8 %__U to <8 x i1>
686  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
687  ret <8 x i64> %2
688}
689
690define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
691; CHECK-LABEL: test_mm512_shldv_epi64:
692; CHECK:       # %bb.0: # %entry
693; CHECK-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0
694; CHECK-NEXT:    ret{{[l|q]}}
695entry:
696  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
697  ret <8 x i64> %0
698}
699
700define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
701; X86-LABEL: test_mm512_mask_shldv_epi32:
702; X86:       # %bb.0: # %entry
703; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
704; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
705; X86-NEXT:    retl
706;
707; X64-LABEL: test_mm512_mask_shldv_epi32:
708; X64:       # %bb.0: # %entry
709; X64-NEXT:    kmovd %edi, %k1
710; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
711; X64-NEXT:    retq
712entry:
713  %0 = bitcast <8 x i64> %__S to <16 x i32>
714  %1 = bitcast <8 x i64> %__A to <16 x i32>
715  %2 = bitcast <8 x i64> %__B to <16 x i32>
716  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
717  %4 = bitcast i16 %__U to <16 x i1>
718  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
719  %6 = bitcast <16 x i32> %5 to <8 x i64>
720  ret <8 x i64> %6
721}
722
723define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
724; X86-LABEL: test_mm512_maskz_shldv_epi32:
725; X86:       # %bb.0: # %entry
726; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
727; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
728; X86-NEXT:    retl
729;
730; X64-LABEL: test_mm512_maskz_shldv_epi32:
731; X64:       # %bb.0: # %entry
732; X64-NEXT:    kmovd %edi, %k1
733; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
734; X64-NEXT:    retq
735entry:
736  %0 = bitcast <8 x i64> %__S to <16 x i32>
737  %1 = bitcast <8 x i64> %__A to <16 x i32>
738  %2 = bitcast <8 x i64> %__B to <16 x i32>
739  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
740  %4 = bitcast i16 %__U to <16 x i1>
741  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
742  %6 = bitcast <16 x i32> %5 to <8 x i64>
743  ret <8 x i64> %6
744}
745
746define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
747; CHECK-LABEL: test_mm512_shldv_epi32:
748; CHECK:       # %bb.0: # %entry
749; CHECK-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0
750; CHECK-NEXT:    ret{{[l|q]}}
751entry:
752  %0 = bitcast <8 x i64> %__S to <16 x i32>
753  %1 = bitcast <8 x i64> %__A to <16 x i32>
754  %2 = bitcast <8 x i64> %__B to <16 x i32>
755  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
756  %4 = bitcast <16 x i32> %3 to <8 x i64>
757  ret <8 x i64> %4
758}
759
760define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
761; X86-LABEL: test_mm512_mask_shldv_epi16:
762; X86:       # %bb.0: # %entry
763; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
764; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
765; X86-NEXT:    retl
766;
767; X64-LABEL: test_mm512_mask_shldv_epi16:
768; X64:       # %bb.0: # %entry
769; X64-NEXT:    kmovd %edi, %k1
770; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
771; X64-NEXT:    retq
772entry:
773  %0 = bitcast <8 x i64> %__S to <32 x i16>
774  %1 = bitcast <8 x i64> %__A to <32 x i16>
775  %2 = bitcast <8 x i64> %__B to <32 x i16>
776  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
777  %4 = bitcast i32 %__U to <32 x i1>
778  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
779  %6 = bitcast <32 x i16> %5 to <8 x i64>
780  ret <8 x i64> %6
781}
782
783define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
784; X86-LABEL: test_mm512_maskz_shldv_epi16:
785; X86:       # %bb.0: # %entry
786; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
787; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
788; X86-NEXT:    retl
789;
790; X64-LABEL: test_mm512_maskz_shldv_epi16:
791; X64:       # %bb.0: # %entry
792; X64-NEXT:    kmovd %edi, %k1
793; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
794; X64-NEXT:    retq
795entry:
796  %0 = bitcast <8 x i64> %__S to <32 x i16>
797  %1 = bitcast <8 x i64> %__A to <32 x i16>
798  %2 = bitcast <8 x i64> %__B to <32 x i16>
799  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
800  %4 = bitcast i32 %__U to <32 x i1>
801  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
802  %6 = bitcast <32 x i16> %5 to <8 x i64>
803  ret <8 x i64> %6
804}
805
806define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
807; CHECK-LABEL: test_mm512_shldv_epi16:
808; CHECK:       # %bb.0: # %entry
809; CHECK-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0
810; CHECK-NEXT:    ret{{[l|q]}}
811entry:
812  %0 = bitcast <8 x i64> %__S to <32 x i16>
813  %1 = bitcast <8 x i64> %__A to <32 x i16>
814  %2 = bitcast <8 x i64> %__B to <32 x i16>
815  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
816  %4 = bitcast <32 x i16> %3 to <8 x i64>
817  ret <8 x i64> %4
818}
819
820define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
821; X86-LABEL: test_mm512_mask_shrdv_epi64:
822; X86:       # %bb.0: # %entry
823; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
824; X86-NEXT:    kmovd %eax, %k1
825; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
826; X86-NEXT:    retl
827;
828; X64-LABEL: test_mm512_mask_shrdv_epi64:
829; X64:       # %bb.0: # %entry
830; X64-NEXT:    kmovd %edi, %k1
831; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
832; X64-NEXT:    retq
833entry:
834  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
835  %1 = bitcast i8 %__U to <8 x i1>
836  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
837  ret <8 x i64> %2
838}
839
840define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
841; X86-LABEL: test_mm512_maskz_shrdv_epi64:
842; X86:       # %bb.0: # %entry
843; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
844; X86-NEXT:    kmovd %eax, %k1
845; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
846; X86-NEXT:    retl
847;
848; X64-LABEL: test_mm512_maskz_shrdv_epi64:
849; X64:       # %bb.0: # %entry
850; X64-NEXT:    kmovd %edi, %k1
851; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
852; X64-NEXT:    retq
853entry:
854  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
855  %1 = bitcast i8 %__U to <8 x i1>
856  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
857  ret <8 x i64> %2
858}
859
860define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
861; CHECK-LABEL: test_mm512_shrdv_epi64:
862; CHECK:       # %bb.0: # %entry
863; CHECK-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0
864; CHECK-NEXT:    ret{{[l|q]}}
865entry:
866  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
867  ret <8 x i64> %0
868}
869
870define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
871; X86-LABEL: test_mm512_mask_shrdv_epi32:
872; X86:       # %bb.0: # %entry
873; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
874; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
875; X86-NEXT:    retl
876;
877; X64-LABEL: test_mm512_mask_shrdv_epi32:
878; X64:       # %bb.0: # %entry
879; X64-NEXT:    kmovd %edi, %k1
880; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
881; X64-NEXT:    retq
882entry:
883  %0 = bitcast <8 x i64> %__S to <16 x i32>
884  %1 = bitcast <8 x i64> %__A to <16 x i32>
885  %2 = bitcast <8 x i64> %__B to <16 x i32>
886  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
887  %4 = bitcast i16 %__U to <16 x i1>
888  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
889  %6 = bitcast <16 x i32> %5 to <8 x i64>
890  ret <8 x i64> %6
891}
892
893define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
894; X86-LABEL: test_mm512_maskz_shrdv_epi32:
895; X86:       # %bb.0: # %entry
896; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
897; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
898; X86-NEXT:    retl
899;
900; X64-LABEL: test_mm512_maskz_shrdv_epi32:
901; X64:       # %bb.0: # %entry
902; X64-NEXT:    kmovd %edi, %k1
903; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
904; X64-NEXT:    retq
905entry:
906  %0 = bitcast <8 x i64> %__S to <16 x i32>
907  %1 = bitcast <8 x i64> %__A to <16 x i32>
908  %2 = bitcast <8 x i64> %__B to <16 x i32>
909  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
910  %4 = bitcast i16 %__U to <16 x i1>
911  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
912  %6 = bitcast <16 x i32> %5 to <8 x i64>
913  ret <8 x i64> %6
914}
915
916define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
917; CHECK-LABEL: test_mm512_shrdv_epi32:
918; CHECK:       # %bb.0: # %entry
919; CHECK-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0
920; CHECK-NEXT:    ret{{[l|q]}}
921entry:
922  %0 = bitcast <8 x i64> %__S to <16 x i32>
923  %1 = bitcast <8 x i64> %__A to <16 x i32>
924  %2 = bitcast <8 x i64> %__B to <16 x i32>
925  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
926  %4 = bitcast <16 x i32> %3 to <8 x i64>
927  ret <8 x i64> %4
928}
929
930define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
931; X86-LABEL: test_mm512_mask_shrdv_epi16:
932; X86:       # %bb.0: # %entry
933; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
934; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
935; X86-NEXT:    retl
936;
937; X64-LABEL: test_mm512_mask_shrdv_epi16:
938; X64:       # %bb.0: # %entry
939; X64-NEXT:    kmovd %edi, %k1
940; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
941; X64-NEXT:    retq
942entry:
943  %0 = bitcast <8 x i64> %__S to <32 x i16>
944  %1 = bitcast <8 x i64> %__A to <32 x i16>
945  %2 = bitcast <8 x i64> %__B to <32 x i16>
946  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
947  %4 = bitcast i32 %__U to <32 x i1>
948  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
949  %6 = bitcast <32 x i16> %5 to <8 x i64>
950  ret <8 x i64> %6
951}
952
953define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
954; X86-LABEL: test_mm512_maskz_shrdv_epi16:
955; X86:       # %bb.0: # %entry
956; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
957; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
958; X86-NEXT:    retl
959;
960; X64-LABEL: test_mm512_maskz_shrdv_epi16:
961; X64:       # %bb.0: # %entry
962; X64-NEXT:    kmovd %edi, %k1
963; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
964; X64-NEXT:    retq
965entry:
966  %0 = bitcast <8 x i64> %__S to <32 x i16>
967  %1 = bitcast <8 x i64> %__A to <32 x i16>
968  %2 = bitcast <8 x i64> %__B to <32 x i16>
969  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
970  %4 = bitcast i32 %__U to <32 x i1>
971  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
972  %6 = bitcast <32 x i16> %5 to <8 x i64>
973  ret <8 x i64> %6
974}
975
976define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
977; CHECK-LABEL: test_mm512_shrdv_epi16:
978; CHECK:       # %bb.0: # %entry
979; CHECK-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0
980; CHECK-NEXT:    ret{{[l|q]}}
981entry:
982  %0 = bitcast <8 x i64> %__S to <32 x i16>
983  %1 = bitcast <8 x i64> %__A to <32 x i16>
984  %2 = bitcast <8 x i64> %__B to <32 x i16>
985  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
986  %4 = bitcast <32 x i16> %3 to <8 x i64>
987  ret <8 x i64> %4
988}
989
990declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
991declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
992declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
993declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
994declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
995declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
996declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
997declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
998