1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
4
5; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c
6
7define zeroext i16 @test_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
8; CHECK-LABEL: test_mm_test_epi8_mask:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    vptestmb %xmm0, %xmm1, %k0
11; CHECK-NEXT:    kmovd %k0, %eax
12; CHECK-NEXT:    movzwl %ax, %eax
13; CHECK-NEXT:    ret{{[l|q]}}
14entry:
15  %and.i.i = and <2 x i64> %__B, %__A
16  %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
17  %1 = icmp ne <16 x i8> %0, zeroinitializer
18  %2 = bitcast <16 x i1> %1 to i16
19  ret i16 %2
20}
21
22define zeroext i16 @test_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
23; X86-LABEL: test_mm_mask_test_epi8_mask:
24; X86:       # %bb.0: # %entry
25; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
26; X86-NEXT:    vptestmb %xmm0, %xmm1, %k0 {%k1}
27; X86-NEXT:    kmovd %k0, %eax
28; X86-NEXT:    movzwl %ax, %eax
29; X86-NEXT:    retl
30;
31; X64-LABEL: test_mm_mask_test_epi8_mask:
32; X64:       # %bb.0: # %entry
33; X64-NEXT:    kmovd %edi, %k1
34; X64-NEXT:    vptestmb %xmm0, %xmm1, %k0 {%k1}
35; X64-NEXT:    kmovd %k0, %eax
36; X64-NEXT:    movzwl %ax, %eax
37; X64-NEXT:    retq
38entry:
39  %and.i.i = and <2 x i64> %__B, %__A
40  %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
41  %1 = icmp ne <16 x i8> %0, zeroinitializer
42  %2 = bitcast i16 %__U to <16 x i1>
43  %3 = and <16 x i1> %1, %2
44  %4 = bitcast <16 x i1> %3 to i16
45  ret i16 %4
46}
47
48define i32 @test_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
49; CHECK-LABEL: test_mm256_test_epi8_mask:
50; CHECK:       # %bb.0: # %entry
51; CHECK-NEXT:    vptestmb %ymm0, %ymm1, %k0
52; CHECK-NEXT:    kmovd %k0, %eax
53; CHECK-NEXT:    vzeroupper
54; CHECK-NEXT:    ret{{[l|q]}}
55entry:
56  %and.i.i = and <4 x i64> %__B, %__A
57  %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
58  %1 = icmp ne <32 x i8> %0, zeroinitializer
59  %2 = bitcast <32 x i1> %1 to i32
60  ret i32 %2
61}
62
63define i32 @test_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
64; X86-LABEL: test_mm256_mask_test_epi8_mask:
65; X86:       # %bb.0: # %entry
66; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
67; X86-NEXT:    vptestmb %ymm0, %ymm1, %k0 {%k1}
68; X86-NEXT:    kmovd %k0, %eax
69; X86-NEXT:    vzeroupper
70; X86-NEXT:    retl
71;
72; X64-LABEL: test_mm256_mask_test_epi8_mask:
73; X64:       # %bb.0: # %entry
74; X64-NEXT:    kmovd %edi, %k1
75; X64-NEXT:    vptestmb %ymm0, %ymm1, %k0 {%k1}
76; X64-NEXT:    kmovd %k0, %eax
77; X64-NEXT:    vzeroupper
78; X64-NEXT:    retq
79entry:
80  %and.i.i = and <4 x i64> %__B, %__A
81  %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
82  %1 = icmp ne <32 x i8> %0, zeroinitializer
83  %2 = bitcast i32 %__U to <32 x i1>
84  %3 = and <32 x i1> %1, %2
85  %4 = bitcast <32 x i1> %3 to i32
86  ret i32 %4
87}
88
89define zeroext i8 @test_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
90; CHECK-LABEL: test_mm_test_epi16_mask:
91; CHECK:       # %bb.0: # %entry
92; CHECK-NEXT:    vptestmw %xmm0, %xmm1, %k0
93; CHECK-NEXT:    kmovd %k0, %eax
94; CHECK-NEXT:    movzbl %al, %eax
95; CHECK-NEXT:    ret{{[l|q]}}
96entry:
97  %and.i.i = and <2 x i64> %__B, %__A
98  %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
99  %1 = icmp ne <8 x i16> %0, zeroinitializer
100  %2 = bitcast <8 x i1> %1 to i8
101  ret i8 %2
102}
103
104define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
105; X86-LABEL: test_mm_mask_test_epi16_mask:
106; X86:       # %bb.0: # %entry
107; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
108; X86-NEXT:    kmovd %eax, %k1
109; X86-NEXT:    vptestmw %xmm0, %xmm1, %k0 {%k1}
110; X86-NEXT:    kmovd %k0, %eax
111; X86-NEXT:    movzbl %al, %eax
112; X86-NEXT:    retl
113;
114; X64-LABEL: test_mm_mask_test_epi16_mask:
115; X64:       # %bb.0: # %entry
116; X64-NEXT:    kmovd %edi, %k1
117; X64-NEXT:    vptestmw %xmm0, %xmm1, %k0 {%k1}
118; X64-NEXT:    kmovd %k0, %eax
119; X64-NEXT:    movzbl %al, %eax
120; X64-NEXT:    retq
121entry:
122  %and.i.i = and <2 x i64> %__B, %__A
123  %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
124  %1 = icmp ne <8 x i16> %0, zeroinitializer
125  %2 = bitcast i8 %__U to <8 x i1>
126  %3 = and <8 x i1> %1, %2
127  %4 = bitcast <8 x i1> %3 to i8
128  ret i8 %4
129}
130
131define zeroext i16 @test_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
132; CHECK-LABEL: test_mm256_test_epi16_mask:
133; CHECK:       # %bb.0: # %entry
134; CHECK-NEXT:    vptestmw %ymm0, %ymm1, %k0
135; CHECK-NEXT:    kmovd %k0, %eax
136; CHECK-NEXT:    movzwl %ax, %eax
137; CHECK-NEXT:    vzeroupper
138; CHECK-NEXT:    ret{{[l|q]}}
139entry:
140  %and.i.i = and <4 x i64> %__B, %__A
141  %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
142  %1 = icmp ne <16 x i16> %0, zeroinitializer
143  %2 = bitcast <16 x i1> %1 to i16
144  ret i16 %2
145}
146
147define zeroext i16 @test_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
148; X86-LABEL: test_mm256_mask_test_epi16_mask:
149; X86:       # %bb.0: # %entry
150; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
151; X86-NEXT:    vptestmw %ymm0, %ymm1, %k0 {%k1}
152; X86-NEXT:    kmovd %k0, %eax
153; X86-NEXT:    movzwl %ax, %eax
154; X86-NEXT:    vzeroupper
155; X86-NEXT:    retl
156;
157; X64-LABEL: test_mm256_mask_test_epi16_mask:
158; X64:       # %bb.0: # %entry
159; X64-NEXT:    kmovd %edi, %k1
160; X64-NEXT:    vptestmw %ymm0, %ymm1, %k0 {%k1}
161; X64-NEXT:    kmovd %k0, %eax
162; X64-NEXT:    movzwl %ax, %eax
163; X64-NEXT:    vzeroupper
164; X64-NEXT:    retq
165entry:
166  %and.i.i = and <4 x i64> %__B, %__A
167  %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
168  %1 = icmp ne <16 x i16> %0, zeroinitializer
169  %2 = bitcast i16 %__U to <16 x i1>
170  %3 = and <16 x i1> %1, %2
171  %4 = bitcast <16 x i1> %3 to i16
172  ret i16 %4
173}
174
175define zeroext i16 @test_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
176; CHECK-LABEL: test_mm_testn_epi8_mask:
177; CHECK:       # %bb.0: # %entry
178; CHECK-NEXT:    vptestnmb %xmm0, %xmm1, %k0
179; CHECK-NEXT:    kmovd %k0, %eax
180; CHECK-NEXT:    movzwl %ax, %eax
181; CHECK-NEXT:    ret{{[l|q]}}
182entry:
183  %and.i.i = and <2 x i64> %__B, %__A
184  %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
185  %1 = icmp eq <16 x i8> %0, zeroinitializer
186  %2 = bitcast <16 x i1> %1 to i16
187  ret i16 %2
188}
189
190define zeroext i16 @test_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
191; X86-LABEL: test_mm_mask_testn_epi8_mask:
192; X86:       # %bb.0: # %entry
193; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
194; X86-NEXT:    vptestnmb %xmm0, %xmm1, %k0 {%k1}
195; X86-NEXT:    kmovd %k0, %eax
196; X86-NEXT:    movzwl %ax, %eax
197; X86-NEXT:    retl
198;
199; X64-LABEL: test_mm_mask_testn_epi8_mask:
200; X64:       # %bb.0: # %entry
201; X64-NEXT:    kmovd %edi, %k1
202; X64-NEXT:    vptestnmb %xmm0, %xmm1, %k0 {%k1}
203; X64-NEXT:    kmovd %k0, %eax
204; X64-NEXT:    movzwl %ax, %eax
205; X64-NEXT:    retq
206entry:
207  %and.i.i = and <2 x i64> %__B, %__A
208  %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
209  %1 = icmp eq <16 x i8> %0, zeroinitializer
210  %2 = bitcast i16 %__U to <16 x i1>
211  %3 = and <16 x i1> %1, %2
212  %4 = bitcast <16 x i1> %3 to i16
213  ret i16 %4
214}
215
216define i32 @test_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
217; CHECK-LABEL: test_mm256_testn_epi8_mask:
218; CHECK:       # %bb.0: # %entry
219; CHECK-NEXT:    vptestnmb %ymm0, %ymm1, %k0
220; CHECK-NEXT:    kmovd %k0, %eax
221; CHECK-NEXT:    vzeroupper
222; CHECK-NEXT:    ret{{[l|q]}}
223entry:
224  %and.i.i = and <4 x i64> %__B, %__A
225  %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
226  %1 = icmp eq <32 x i8> %0, zeroinitializer
227  %2 = bitcast <32 x i1> %1 to i32
228  ret i32 %2
229}
230
231define i32 @test_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
232; X86-LABEL: test_mm256_mask_testn_epi8_mask:
233; X86:       # %bb.0: # %entry
234; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
235; X86-NEXT:    vptestnmb %ymm0, %ymm1, %k0 {%k1}
236; X86-NEXT:    kmovd %k0, %eax
237; X86-NEXT:    vzeroupper
238; X86-NEXT:    retl
239;
240; X64-LABEL: test_mm256_mask_testn_epi8_mask:
241; X64:       # %bb.0: # %entry
242; X64-NEXT:    kmovd %edi, %k1
243; X64-NEXT:    vptestnmb %ymm0, %ymm1, %k0 {%k1}
244; X64-NEXT:    kmovd %k0, %eax
245; X64-NEXT:    vzeroupper
246; X64-NEXT:    retq
247entry:
248  %and.i.i = and <4 x i64> %__B, %__A
249  %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
250  %1 = icmp eq <32 x i8> %0, zeroinitializer
251  %2 = bitcast i32 %__U to <32 x i1>
252  %3 = and <32 x i1> %1, %2
253  %4 = bitcast <32 x i1> %3 to i32
254  ret i32 %4
255}
256
257define zeroext i8 @test_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
258; CHECK-LABEL: test_mm_testn_epi16_mask:
259; CHECK:       # %bb.0: # %entry
260; CHECK-NEXT:    vptestnmw %xmm0, %xmm1, %k0
261; CHECK-NEXT:    kmovd %k0, %eax
262; CHECK-NEXT:    movzbl %al, %eax
263; CHECK-NEXT:    ret{{[l|q]}}
264entry:
265  %and.i.i = and <2 x i64> %__B, %__A
266  %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
267  %1 = icmp eq <8 x i16> %0, zeroinitializer
268  %2 = bitcast <8 x i1> %1 to i8
269  ret i8 %2
270}
271
272define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
273; X86-LABEL: test_mm_mask_testn_epi16_mask:
274; X86:       # %bb.0: # %entry
275; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
276; X86-NEXT:    kmovd %eax, %k1
277; X86-NEXT:    vptestnmw %xmm0, %xmm1, %k0 {%k1}
278; X86-NEXT:    kmovd %k0, %eax
279; X86-NEXT:    movzbl %al, %eax
280; X86-NEXT:    retl
281;
282; X64-LABEL: test_mm_mask_testn_epi16_mask:
283; X64:       # %bb.0: # %entry
284; X64-NEXT:    kmovd %edi, %k1
285; X64-NEXT:    vptestnmw %xmm0, %xmm1, %k0 {%k1}
286; X64-NEXT:    kmovd %k0, %eax
287; X64-NEXT:    movzbl %al, %eax
288; X64-NEXT:    retq
289entry:
290  %and.i.i = and <2 x i64> %__B, %__A
291  %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
292  %1 = icmp eq <8 x i16> %0, zeroinitializer
293  %2 = bitcast i8 %__U to <8 x i1>
294  %3 = and <8 x i1> %1, %2
295  %4 = bitcast <8 x i1> %3 to i8
296  ret i8 %4
297}
298
299define zeroext i16 @test_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
300; CHECK-LABEL: test_mm256_testn_epi16_mask:
301; CHECK:       # %bb.0: # %entry
302; CHECK-NEXT:    vptestnmw %ymm0, %ymm1, %k0
303; CHECK-NEXT:    kmovd %k0, %eax
304; CHECK-NEXT:    movzwl %ax, %eax
305; CHECK-NEXT:    vzeroupper
306; CHECK-NEXT:    ret{{[l|q]}}
307entry:
308  %and.i.i = and <4 x i64> %__B, %__A
309  %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
310  %1 = icmp eq <16 x i16> %0, zeroinitializer
311  %2 = bitcast <16 x i1> %1 to i16
312  ret i16 %2
313}
314
315define zeroext i16 @test_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
316; X86-LABEL: test_mm256_mask_testn_epi16_mask:
317; X86:       # %bb.0: # %entry
318; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
319; X86-NEXT:    vptestnmw %ymm0, %ymm1, %k0 {%k1}
320; X86-NEXT:    kmovd %k0, %eax
321; X86-NEXT:    movzwl %ax, %eax
322; X86-NEXT:    vzeroupper
323; X86-NEXT:    retl
324;
325; X64-LABEL: test_mm256_mask_testn_epi16_mask:
326; X64:       # %bb.0: # %entry
327; X64-NEXT:    kmovd %edi, %k1
328; X64-NEXT:    vptestnmw %ymm0, %ymm1, %k0 {%k1}
329; X64-NEXT:    kmovd %k0, %eax
330; X64-NEXT:    movzwl %ax, %eax
331; X64-NEXT:    vzeroupper
332; X64-NEXT:    retq
333entry:
334  %and.i.i = and <4 x i64> %__B, %__A
335  %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
336  %1 = icmp eq <16 x i16> %0, zeroinitializer
337  %2 = bitcast i16 %__U to <16 x i1>
338  %3 = and <16 x i1> %1, %2
339  %4 = bitcast <16 x i1> %3 to i16
340  ret i16 %4
341}
342
343define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 {
344; X86-LABEL: test_mm_mask_set1_epi8:
345; X86:       # %bb.0: # %entry
346; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
347; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
348; X86-NEXT:    vpbroadcastb %eax, %xmm0 {%k1}
349; X86-NEXT:    retl
350;
351; X64-LABEL: test_mm_mask_set1_epi8:
352; X64:       # %bb.0: # %entry
353; X64-NEXT:    kmovd %edi, %k1
354; X64-NEXT:    vpbroadcastb %esi, %xmm0 {%k1}
355; X64-NEXT:    retq
356entry:
357  %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0
358  %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer
359  %0 = bitcast <2 x i64> %__O to <16 x i8>
360  %1 = bitcast i16 %__M to <16 x i1>
361  %2 = select <16 x i1> %1, <16 x i8> %vecinit15.i.i, <16 x i8> %0
362  %3 = bitcast <16 x i8> %2 to <2 x i64>
363  ret <2 x i64> %3
364}
365
366define <2 x i64> @test_mm_maskz_set1_epi8(i16 zeroext %__M, i8 signext %__A)  {
367; X86-LABEL: test_mm_maskz_set1_epi8:
368; X86:       # %bb.0: # %entry
369; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
370; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
371; X86-NEXT:    vpbroadcastb %eax, %xmm0 {%k1} {z}
372; X86-NEXT:    retl
373;
374; X64-LABEL: test_mm_maskz_set1_epi8:
375; X64:       # %bb.0: # %entry
376; X64-NEXT:    kmovd %edi, %k1
377; X64-NEXT:    vpbroadcastb %esi, %xmm0 {%k1} {z}
378; X64-NEXT:    retq
379entry:
380  %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0
381  %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer
382  %0 = bitcast i16 %__M to <16 x i1>
383  %1 = select <16 x i1> %0, <16 x i8> %vecinit15.i.i, <16 x i8> zeroinitializer
384  %2 = bitcast <16 x i8> %1 to <2 x i64>
385  ret <2 x i64> %2
386}
387
388define <4 x i64> @test_mm256_mask_set1_epi8(<4 x i64> %__O, i32 %__M, i8 signext %__A){
389; X86-LABEL: test_mm256_mask_set1_epi8:
390; X86:       # %bb.0: # %entry
391; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
392; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
393; X86-NEXT:    vpbroadcastb %eax, %ymm0 {%k1}
394; X86-NEXT:    retl
395;
396; X64-LABEL: test_mm256_mask_set1_epi8:
397; X64:       # %bb.0: # %entry
398; X64-NEXT:    kmovd %edi, %k1
399; X64-NEXT:    vpbroadcastb %esi, %ymm0 {%k1}
400; X64-NEXT:    retq
401entry:
402  %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0
403  %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer
404  %0 = bitcast <4 x i64> %__O to <32 x i8>
405  %1 = bitcast i32 %__M to <32 x i1>
406  %2 = select <32 x i1> %1, <32 x i8> %vecinit31.i.i, <32 x i8> %0
407  %3 = bitcast <32 x i8> %2 to <4 x i64>
408  ret <4 x i64> %3
409}
410
411define <4 x i64> @test_mm256_maskz_set1_epi8(i32 %__M, i8 signext %__A)  {
412; X86-LABEL: test_mm256_maskz_set1_epi8:
413; X86:       # %bb.0: # %entry
414; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
415; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
416; X86-NEXT:    vpbroadcastb %eax, %ymm0 {%k1} {z}
417; X86-NEXT:    retl
418;
419; X64-LABEL: test_mm256_maskz_set1_epi8:
420; X64:       # %bb.0: # %entry
421; X64-NEXT:    kmovd %edi, %k1
422; X64-NEXT:    vpbroadcastb %esi, %ymm0 {%k1} {z}
423; X64-NEXT:    retq
424entry:
425  %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0
426  %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer
427  %0 = bitcast i32 %__M to <32 x i1>
428  %1 = select <32 x i1> %0, <32 x i8> %vecinit31.i.i, <32 x i8> zeroinitializer
429  %2 = bitcast <32 x i8> %1 to <4 x i64>
430  ret <4 x i64> %2
431}
432
433define <4 x i64> @test_mm256_mask_set1_epi16(<4 x i64> %__O, i16 zeroext %__M, i16 signext %__A)  {
434; X86-LABEL: test_mm256_mask_set1_epi16:
435; X86:       # %bb.0: # %entry
436; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
437; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
438; X86-NEXT:    vpbroadcastw %eax, %ymm0 {%k1}
439; X86-NEXT:    retl
440;
441; X64-LABEL: test_mm256_mask_set1_epi16:
442; X64:       # %bb.0: # %entry
443; X64-NEXT:    kmovd %edi, %k1
444; X64-NEXT:    vpbroadcastw %esi, %ymm0 {%k1}
445; X64-NEXT:    retq
446entry:
447  %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0
448  %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer
449  %0 = bitcast <4 x i64> %__O to <16 x i16>
450  %1 = bitcast i16 %__M to <16 x i1>
451  %2 = select <16 x i1> %1, <16 x i16> %vecinit15.i.i, <16 x i16> %0
452  %3 = bitcast <16 x i16> %2 to <4 x i64>
453  ret <4 x i64> %3
454}
455
456define <4 x i64> @test_mm256_maskz_set1_epi16(i16 zeroext %__M, i16 signext %__A) {
457; X86-LABEL: test_mm256_maskz_set1_epi16:
458; X86:       # %bb.0: # %entry
459; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
460; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
461; X86-NEXT:    vpbroadcastw %eax, %ymm0 {%k1} {z}
462; X86-NEXT:    retl
463;
464; X64-LABEL: test_mm256_maskz_set1_epi16:
465; X64:       # %bb.0: # %entry
466; X64-NEXT:    kmovd %edi, %k1
467; X64-NEXT:    vpbroadcastw %esi, %ymm0 {%k1} {z}
468; X64-NEXT:    retq
469entry:
470  %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0
471  %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer
472  %0 = bitcast i16 %__M to <16 x i1>
473  %1 = select <16 x i1> %0, <16 x i16> %vecinit15.i.i, <16 x i16> zeroinitializer
474  %2 = bitcast <16 x i16> %1 to <4 x i64>
475  ret <4 x i64> %2
476}
477
478define <2 x i64> @test_mm_mask_set1_epi16(<2 x i64> %__O, i8 zeroext %__M, i16 signext %__A) {
479; X86-LABEL: test_mm_mask_set1_epi16:
480; X86:       # %bb.0: # %entry
481; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
482; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
483; X86-NEXT:    kmovd %ecx, %k1
484; X86-NEXT:    vpbroadcastw %eax, %xmm0 {%k1}
485; X86-NEXT:    retl
486;
487; X64-LABEL: test_mm_mask_set1_epi16:
488; X64:       # %bb.0: # %entry
489; X64-NEXT:    kmovd %edi, %k1
490; X64-NEXT:    vpbroadcastw %esi, %xmm0 {%k1}
491; X64-NEXT:    retq
492entry:
493  %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0
494  %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer
495  %0 = bitcast <2 x i64> %__O to <8 x i16>
496  %1 = bitcast i8 %__M to <8 x i1>
497  %2 = select <8 x i1> %1, <8 x i16> %vecinit7.i.i, <8 x i16> %0
498  %3 = bitcast <8 x i16> %2 to <2 x i64>
499  ret <2 x i64> %3
500}
501
502define <2 x i64> @test_mm_maskz_set1_epi16(i8 zeroext %__M, i16 signext %__A) {
503; X86-LABEL: test_mm_maskz_set1_epi16:
504; X86:       # %bb.0: # %entry
505; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
506; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
507; X86-NEXT:    kmovd %ecx, %k1
508; X86-NEXT:    vpbroadcastw %eax, %xmm0 {%k1} {z}
509; X86-NEXT:    retl
510;
511; X64-LABEL: test_mm_maskz_set1_epi16:
512; X64:       # %bb.0: # %entry
513; X64-NEXT:    kmovd %edi, %k1
514; X64-NEXT:    vpbroadcastw %esi, %xmm0 {%k1} {z}
515; X64-NEXT:    retq
516entry:
517  %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0
518  %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer
519  %0 = bitcast i8 %__M to <8 x i1>
520  %1 = select <8 x i1> %0, <8 x i16> %vecinit7.i.i, <8 x i16> zeroinitializer
521  %2 = bitcast <8 x i16> %1 to <2 x i64>
522  ret <2 x i64> %2
523}
524
525
526define <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) {
527; CHECK-LABEL: test_mm_broadcastb_epi8:
528; CHECK:       # %bb.0:
529; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
530; CHECK-NEXT:    ret{{[l|q]}}
531  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
532  %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer
533  %res1 = bitcast <16 x i8> %res0 to <2 x i64>
534  ret <2 x i64> %res1
535}
536
537define <2 x i64> @test_mm_mask_broadcastb_epi8(<2 x i64> %a0, i16 %a1, <2 x i64> %a2) {
538; X86-LABEL: test_mm_mask_broadcastb_epi8:
539; X86:       # %bb.0:
540; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
541; X86-NEXT:    vpbroadcastb %xmm1, %xmm0 {%k1}
542; X86-NEXT:    retl
543;
544; X64-LABEL: test_mm_mask_broadcastb_epi8:
545; X64:       # %bb.0:
546; X64-NEXT:    kmovd %edi, %k1
547; X64-NEXT:    vpbroadcastb %xmm1, %xmm0 {%k1}
548; X64-NEXT:    retq
549  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
550  %arg1 = bitcast i16 %a1 to <16 x i1>
551  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
552  %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <16 x i32> zeroinitializer
553  %res1 = select <16 x i1> %arg1, <16 x i8> %res0, <16 x i8> %arg0
554  %res2 = bitcast <16 x i8> %res1 to <2 x i64>
555  ret <2 x i64> %res2
556}
557
558define <2 x i64> @test_mm_maskz_broadcastb_epi8(i16 %a0, <2 x i64> %a1) {
559; X86-LABEL: test_mm_maskz_broadcastb_epi8:
560; X86:       # %bb.0:
561; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
562; X86-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z}
563; X86-NEXT:    retl
564;
565; X64-LABEL: test_mm_maskz_broadcastb_epi8:
566; X64:       # %bb.0:
567; X64-NEXT:    kmovd %edi, %k1
568; X64-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z}
569; X64-NEXT:    retq
570  %arg0 = bitcast i16 %a0 to <16 x i1>
571  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
572  %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <16 x i32> zeroinitializer
573  %res1 = select <16 x i1> %arg0, <16 x i8> %res0, <16 x i8> zeroinitializer
574  %res2 = bitcast <16 x i8> %res1 to <2 x i64>
575  ret <2 x i64> %res2
576}
577
578define <4 x i64> @test_mm256_broadcastb_epi8(<2 x i64> %a0) {
579; CHECK-LABEL: test_mm256_broadcastb_epi8:
580; CHECK:       # %bb.0:
581; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
582; CHECK-NEXT:    ret{{[l|q]}}
583  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
584  %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <32 x i32> zeroinitializer
585  %res1 = bitcast <32 x i8> %res0 to <4 x i64>
586  ret <4 x i64> %res1
587}
588
589define <4 x i64> @test_mm256_mask_broadcastb_epi8(<4 x i64> %a0, i32 %a1, <2 x i64> %a2) {
590; X86-LABEL: test_mm256_mask_broadcastb_epi8:
591; X86:       # %bb.0:
592; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
593; X86-NEXT:    vpbroadcastb %xmm1, %ymm0 {%k1}
594; X86-NEXT:    retl
595;
596; X64-LABEL: test_mm256_mask_broadcastb_epi8:
597; X64:       # %bb.0:
598; X64-NEXT:    kmovd %edi, %k1
599; X64-NEXT:    vpbroadcastb %xmm1, %ymm0 {%k1}
600; X64-NEXT:    retq
601  %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
602  %arg1 = bitcast i32 %a1 to <32 x i1>
603  %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
604  %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <32 x i32> zeroinitializer
605  %res1 = select <32 x i1> %arg1, <32 x i8> %res0, <32 x i8> %arg0
606  %res2 = bitcast <32 x i8> %res1 to <4 x i64>
607  ret <4 x i64> %res2
608}
609
610define <4 x i64> @test_mm256_maskz_broadcastb_epi8(i32 %a0, <2 x i64> %a1) {
611; X86-LABEL: test_mm256_maskz_broadcastb_epi8:
612; X86:       # %bb.0:
613; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
614; X86-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z}
615; X86-NEXT:    retl
616;
617; X64-LABEL: test_mm256_maskz_broadcastb_epi8:
618; X64:       # %bb.0:
619; X64-NEXT:    kmovd %edi, %k1
620; X64-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z}
621; X64-NEXT:    retq
622  %arg0 = bitcast i32 %a0 to <32 x i1>
623  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
624  %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <32 x i32> zeroinitializer
625  %res1 = select <32 x i1> %arg0, <32 x i8> %res0, <32 x i8> zeroinitializer
626  %res2 = bitcast <32 x i8> %res1 to <4 x i64>
627  ret <4 x i64> %res2
628}
629
630define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) {
631; CHECK-LABEL: test_mm_broadcastw_epi16:
632; CHECK:       # %bb.0:
633; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
634; CHECK-NEXT:    ret{{[l|q]}}
635  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
636  %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer
637  %res1 = bitcast <8 x i16> %res0 to <2 x i64>
638  ret <2 x i64> %res1
639}
640
641define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
642; X86-LABEL: test_mm_mask_broadcastw_epi16:
643; X86:       # %bb.0:
644; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
645; X86-NEXT:    kmovd %eax, %k1
646; X86-NEXT:    vpbroadcastw %xmm1, %xmm0 {%k1}
647; X86-NEXT:    retl
648;
649; X64-LABEL: test_mm_mask_broadcastw_epi16:
650; X64:       # %bb.0:
651; X64-NEXT:    kmovd %edi, %k1
652; X64-NEXT:    vpbroadcastw %xmm1, %xmm0 {%k1}
653; X64-NEXT:    retq
654  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
655  %arg1 = bitcast i8 %a1 to <8 x i1>
656  %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
657  %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <8 x i32> zeroinitializer
658  %res1 = select <8 x i1> %arg1, <8 x i16> %res0, <8 x i16> %arg0
659  %res2 = bitcast <8 x i16> %res1 to <2 x i64>
660  ret <2 x i64> %res2
661}
662
663define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) {
664; X86-LABEL: test_mm_maskz_broadcastw_epi16:
665; X86:       # %bb.0:
666; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
667; X86-NEXT:    kmovd %eax, %k1
668; X86-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z}
669; X86-NEXT:    retl
670;
671; X64-LABEL: test_mm_maskz_broadcastw_epi16:
672; X64:       # %bb.0:
673; X64-NEXT:    kmovd %edi, %k1
674; X64-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z}
675; X64-NEXT:    retq
676  %arg0 = bitcast i8 %a0 to <8 x i1>
677  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
678  %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <8 x i32> zeroinitializer
679  %res1 = select <8 x i1> %arg0, <8 x i16> %res0, <8 x i16> zeroinitializer
680  %res2 = bitcast <8 x i16> %res1 to <2 x i64>
681  ret <2 x i64> %res2
682}
683
684define <4 x i64> @test_mm256_broadcastw_epi16(<2 x i64> %a0) {
685; CHECK-LABEL: test_mm256_broadcastw_epi16:
686; CHECK:       # %bb.0:
687; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
688; CHECK-NEXT:    ret{{[l|q]}}
689  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
690  %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <16 x i32> zeroinitializer
691  %res1 = bitcast <16 x i16> %res0 to <4 x i64>
692  ret <4 x i64> %res1
693}
694
695define <4 x i64> @test_mm256_mask_broadcastw_epi16(<4 x i64> %a0, i16 %a1, <2 x i64> %a2) {
696; X86-LABEL: test_mm256_mask_broadcastw_epi16:
697; X86:       # %bb.0:
698; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
699; X86-NEXT:    vpbroadcastw %xmm1, %ymm0 {%k1}
700; X86-NEXT:    retl
701;
702; X64-LABEL: test_mm256_mask_broadcastw_epi16:
703; X64:       # %bb.0:
704; X64-NEXT:    kmovd %edi, %k1
705; X64-NEXT:    vpbroadcastw %xmm1, %ymm0 {%k1}
706; X64-NEXT:    retq
707  %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
708  %arg1 = bitcast i16 %a1 to <16 x i1>
709  %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
710  %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <16 x i32> zeroinitializer
711  %res1 = select <16 x i1> %arg1, <16 x i16> %res0, <16 x i16> %arg0
712  %res2 = bitcast <16 x i16> %res1 to <4 x i64>
713  ret <4 x i64> %res2
714}
715
716define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) {
717; X86-LABEL: test_mm256_maskz_broadcastw_epi16:
718; X86:       # %bb.0:
719; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
720; X86-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z}
721; X86-NEXT:    retl
722;
723; X64-LABEL: test_mm256_maskz_broadcastw_epi16:
724; X64:       # %bb.0:
725; X64-NEXT:    kmovd %edi, %k1
726; X64-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z}
727; X64-NEXT:    retq
728  %arg0 = bitcast i16 %a0 to <16 x i1>
729  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
730  %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <16 x i32> zeroinitializer
731  %res1 = select <16 x i1> %arg0, <16 x i16> %res0, <16 x i16> zeroinitializer
732  %res2 = bitcast <16 x i16> %res1 to <4 x i64>
733  ret <4 x i64> %res2
734}
735
736define <2 x i64> @test_mm_cvtepi16_epi8(<2 x i64> %__A) {
737; CHECK-LABEL: test_mm_cvtepi16_epi8:
738; CHECK:       # %bb.0: # %entry
739; CHECK-NEXT:    vpmovwb %xmm0, %xmm0
740; CHECK-NEXT:    ret{{[l|q]}}
741entry:
742  %0 = bitcast <2 x i64> %__A to <8 x i16>
743  %conv.i = trunc <8 x i16> %0 to <8 x i8>
744  %shuf.i = shufflevector <8 x i8> %conv.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
745  %1 = bitcast <16 x i8> %shuf.i to <2 x i64>
746  ret <2 x i64> %1
747}
748
749define <2 x i64> @test_mm256_cvtepi16_epi8(<4 x i64> %__A) {
750; CHECK-LABEL: test_mm256_cvtepi16_epi8:
751; CHECK:       # %bb.0: # %entry
752; CHECK-NEXT:    vpmovwb %ymm0, %xmm0
753; CHECK-NEXT:    vzeroupper
754; CHECK-NEXT:    ret{{[l|q]}}
755entry:
756  %0 = bitcast <4 x i64> %__A to <16 x i16>
757  %conv.i = trunc <16 x i16> %0 to <16 x i8>
758  %1 = bitcast <16 x i8> %conv.i to <2 x i64>
759  ret <2 x i64> %1
760}
761
762define <2 x i64> @test_mm256_mask_cvtepi16_epi8(<2 x i64> %__O, i16 zeroext %__M, <4 x i64> %__A) {
763; X86-LABEL: test_mm256_mask_cvtepi16_epi8:
764; X86:       # %bb.0: # %entry
765; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
766; X86-NEXT:    vpmovwb %ymm1, %xmm0 {%k1}
767; X86-NEXT:    vzeroupper
768; X86-NEXT:    retl
769;
770; X64-LABEL: test_mm256_mask_cvtepi16_epi8:
771; X64:       # %bb.0: # %entry
772; X64-NEXT:    kmovd %edi, %k1
773; X64-NEXT:    vpmovwb %ymm1, %xmm0 {%k1}
774; X64-NEXT:    vzeroupper
775; X64-NEXT:    retq
776entry:
777  %0 = bitcast <4 x i64> %__A to <16 x i16>
778  %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
779  %1 = bitcast <2 x i64> %__O to <16 x i8>
780  %2 = bitcast i16 %__M to <16 x i1>
781  %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1
782  %4 = bitcast <16 x i8> %3 to <2 x i64>
783  ret <2 x i64> %4
784}
785
786define <2 x i64> @test_mm256_maskz_cvtepi16_epi8(i16 zeroext %__M, <4 x i64> %__A) {
787; X86-LABEL: test_mm256_maskz_cvtepi16_epi8:
788; X86:       # %bb.0: # %entry
789; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
790; X86-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z}
791; X86-NEXT:    vzeroupper
792; X86-NEXT:    retl
793;
794; X64-LABEL: test_mm256_maskz_cvtepi16_epi8:
795; X64:       # %bb.0: # %entry
796; X64-NEXT:    kmovd %edi, %k1
797; X64-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z}
798; X64-NEXT:    vzeroupper
799; X64-NEXT:    retq
800entry:
801  %0 = bitcast <4 x i64> %__A to <16 x i16>
802  %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
803  %1 = bitcast i16 %__M to <16 x i1>
804  %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer
805  %3 = bitcast <16 x i8> %2 to <2 x i64>
806  ret <2 x i64> %3
807}
808
809define <2 x i64> @test_mm_mask2_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
810; X86-LABEL: test_mm_mask2_permutex2var_epi16:
811; X86:       # %bb.0: # %entry
812; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
813; X86-NEXT:    kmovd %eax, %k1
814; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
815; X86-NEXT:    vmovdqa %xmm1, %xmm0
816; X86-NEXT:    retl
817;
818; X64-LABEL: test_mm_mask2_permutex2var_epi16:
819; X64:       # %bb.0: # %entry
820; X64-NEXT:    kmovd %edi, %k1
821; X64-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
822; X64-NEXT:    vmovdqa %xmm1, %xmm0
823; X64-NEXT:    retq
824entry:
825  %0 = bitcast <2 x i64> %__A to <8 x i16>
826  %1 = bitcast <2 x i64> %__I to <8 x i16>
827  %2 = bitcast <2 x i64> %__B to <8 x i16>
828  %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
829  %4 = bitcast i8 %__U to <8 x i1>
830  %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %1
831  %6 = bitcast <8 x i16> %5 to <2 x i64>
832  ret <2 x i64> %6
833}
834
835define <4 x i64> @test_mm256_mask2_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, i16 zeroext %__U, <4 x i64> %__B) {
836; X86-LABEL: test_mm256_mask2_permutex2var_epi16:
837; X86:       # %bb.0: # %entry
838; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
839; X86-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1}
840; X86-NEXT:    vmovdqa %ymm1, %ymm0
841; X86-NEXT:    retl
842;
843; X64-LABEL: test_mm256_mask2_permutex2var_epi16:
844; X64:       # %bb.0: # %entry
845; X64-NEXT:    kmovd %edi, %k1
846; X64-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1}
847; X64-NEXT:    vmovdqa %ymm1, %ymm0
848; X64-NEXT:    retq
849entry:
850  %0 = bitcast <4 x i64> %__A to <16 x i16>
851  %1 = bitcast <4 x i64> %__I to <16 x i16>
852  %2 = bitcast <4 x i64> %__B to <16 x i16>
853  %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
854  %4 = bitcast i16 %__U to <16 x i1>
855  %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %1
856  %6 = bitcast <16 x i16> %5 to <4 x i64>
857  ret <4 x i64> %6
858}
859
860define <2 x i64> @test_mm_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
861; CHECK-LABEL: test_mm_permutex2var_epi16:
862; CHECK:       # %bb.0: # %entry
863; CHECK-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0
864; CHECK-NEXT:    ret{{[l|q]}}
865entry:
866  %0 = bitcast <2 x i64> %__A to <8 x i16>
867  %1 = bitcast <2 x i64> %__I to <8 x i16>
868  %2 = bitcast <2 x i64> %__B to <8 x i16>
869  %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
870  %4 = bitcast <8 x i16> %3 to <2 x i64>
871  ret <2 x i64> %4
872}
873
874define <2 x i64> @test_mm_mask_permutex2var_epi16(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
875; X86-LABEL: test_mm_mask_permutex2var_epi16:
876; X86:       # %bb.0: # %entry
877; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
878; X86-NEXT:    kmovd %eax, %k1
879; X86-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
880; X86-NEXT:    retl
881;
882; X64-LABEL: test_mm_mask_permutex2var_epi16:
883; X64:       # %bb.0: # %entry
884; X64-NEXT:    kmovd %edi, %k1
885; X64-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
886; X64-NEXT:    retq
887entry:
888  %0 = bitcast <2 x i64> %__A to <8 x i16>
889  %1 = bitcast <2 x i64> %__I to <8 x i16>
890  %2 = bitcast <2 x i64> %__B to <8 x i16>
891  %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
892  %4 = bitcast i8 %__U to <8 x i1>
893  %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0
894  %6 = bitcast <8 x i16> %5 to <2 x i64>
895  ret <2 x i64> %6
896}
897
898define <2 x i64> @test_mm_maskz_permutex2var_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
899; X86-LABEL: test_mm_maskz_permutex2var_epi16:
900; X86:       # %bb.0: # %entry
901; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
902; X86-NEXT:    kmovd %eax, %k1
903; X86-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
904; X86-NEXT:    retl
905;
906; X64-LABEL: test_mm_maskz_permutex2var_epi16:
907; X64:       # %bb.0: # %entry
908; X64-NEXT:    kmovd %edi, %k1
909; X64-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
910; X64-NEXT:    retq
911entry:
912  %0 = bitcast <2 x i64> %__A to <8 x i16>
913  %1 = bitcast <2 x i64> %__I to <8 x i16>
914  %2 = bitcast <2 x i64> %__B to <8 x i16>
915  %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
916  %4 = bitcast i8 %__U to <8 x i1>
917  %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer
918  %6 = bitcast <8 x i16> %5 to <2 x i64>
919  ret <2 x i64> %6
920}
921
922define <4 x i64> @test_mm256_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
923; CHECK-LABEL: test_mm256_permutex2var_epi16:
924; CHECK:       # %bb.0: # %entry
925; CHECK-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0
926; CHECK-NEXT:    ret{{[l|q]}}
927entry:
928  %0 = bitcast <4 x i64> %__A to <16 x i16>
929  %1 = bitcast <4 x i64> %__I to <16 x i16>
930  %2 = bitcast <4 x i64> %__B to <16 x i16>
931  %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
932  %4 = bitcast <16 x i16> %3 to <4 x i64>
933  ret <4 x i64> %4
934}
935
936define <4 x i64> @test_mm256_mask_permutex2var_epi16(<4 x i64> %__A, i16 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) {
937; X86-LABEL: test_mm256_mask_permutex2var_epi16:
938; X86:       # %bb.0: # %entry
939; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
940; X86-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1}
941; X86-NEXT:    retl
942;
943; X64-LABEL: test_mm256_mask_permutex2var_epi16:
944; X64:       # %bb.0: # %entry
945; X64-NEXT:    kmovd %edi, %k1
946; X64-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1}
947; X64-NEXT:    retq
948entry:
949  %0 = bitcast <4 x i64> %__A to <16 x i16>
950  %1 = bitcast <4 x i64> %__I to <16 x i16>
951  %2 = bitcast <4 x i64> %__B to <16 x i16>
952  %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
953  %4 = bitcast i16 %__U to <16 x i1>
954  %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0
955  %6 = bitcast <16 x i16> %5 to <4 x i64>
956  ret <4 x i64> %6
957}
958
959define <4 x i64> @test_mm256_maskz_permutex2var_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
960; X86-LABEL: test_mm256_maskz_permutex2var_epi16:
961; X86:       # %bb.0: # %entry
962; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
963; X86-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z}
964; X86-NEXT:    retl
965;
966; X64-LABEL: test_mm256_maskz_permutex2var_epi16:
967; X64:       # %bb.0: # %entry
968; X64-NEXT:    kmovd %edi, %k1
969; X64-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z}
970; X64-NEXT:    retq
971entry:
972  %0 = bitcast <4 x i64> %__A to <16 x i16>
973  %1 = bitcast <4 x i64> %__I to <16 x i16>
974  %2 = bitcast <4 x i64> %__B to <16 x i16>
975  %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
976  %4 = bitcast i16 %__U to <16 x i1>
977  %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer
978  %6 = bitcast <16 x i16> %5 to <4 x i64>
979  ret <4 x i64> %6
980}
981
982declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
983declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)
984
985!0 = !{i32 1}
986
987