1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=cannonlake | FileCheck %s
3
4; These test cases demonstrate cases where vpermt2/vpermi2 could benefit from being commuted.
5
6declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7
8define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p) {
9; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
10; CHECK:       ## %bb.0:
11; CHECK-NEXT:    vpermt2d (%rdi), %zmm1, %zmm0
12; CHECK-NEXT:    retq
13  %x2 = load <16 x i32>, <16 x i32>* %x2p
14  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
15  ret <16 x i32> %res
16}
17
18declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
19
20define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) {
21; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
22; CHECK:       ## %bb.0:
23; CHECK-NEXT:    vpermt2pd %zmm2, %zmm1, %zmm0
24; CHECK-NEXT:    retq
25  %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
26  ret <8 x double> %res
27}
28
29declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
30
31define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) {
32; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
33; CHECK:       ## %bb.0:
34; CHECK-NEXT:    vpermt2ps %zmm2, %zmm1, %zmm0
35; CHECK-NEXT:    retq
36  %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
37  ret <16 x float> %res
38}
39
40declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
41
42define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) {
43; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
44; CHECK:       ## %bb.0:
45; CHECK-NEXT:    vpermt2q %zmm2, %zmm1, %zmm0
46; CHECK-NEXT:    retq
47  %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
48  ret <8 x i64> %res
49}
50
51declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
52
53define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
54; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
55; CHECK:       ## %bb.0:
56; CHECK-NEXT:    kmovd %esi, %k1
57; CHECK-NEXT:    vpermi2d (%rdi), %zmm1, %zmm0 {%k1} {z}
58; CHECK-NEXT:    retq
59  %x2 = load <16 x i32>, <16 x i32>* %x2p
60  %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
61  ret <16 x i32> %res
62}
63
64declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
65
66define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
67; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
68; CHECK:       ## %bb.0:
69; CHECK-NEXT:    kmovd %esi, %k1
70; CHECK-NEXT:    vpermi2pd (%rdi){1to8}, %zmm1, %zmm0 {%k1} {z}
71; CHECK-NEXT:    retq
72  %x2s = load double, double* %x2ptr
73  %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
74  %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
75  %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
76  ret <8 x double> %res
77}
78
79declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
80
81define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
82; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
83; CHECK:       ## %bb.0:
84; CHECK-NEXT:    kmovd %edi, %k1
85; CHECK-NEXT:    vpermi2ps %zmm2, %zmm1, %zmm0 {%k1} {z}
86; CHECK-NEXT:    retq
87  %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
88  ret <16 x float> %res
89}
90
91
92declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
93
94define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
95; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
96; CHECK:       ## %bb.0:
97; CHECK-NEXT:    kmovd %edi, %k1
98; CHECK-NEXT:    vpermi2q %zmm2, %zmm1, %zmm0 {%k1} {z}
99; CHECK-NEXT:    retq
100  %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
101  ret <8 x i64> %res
102}
103
104declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
105
106define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) {
107; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
108; CHECK:       ## %bb.0:
109; CHECK-NEXT:    vpermi2d %zmm2, %zmm1, %zmm0
110; CHECK-NEXT:    retq
111  %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
112  ret <16 x i32> %res
113}
114
115declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
116
117define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) {
118; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
119; CHECK:       ## %bb.0:
120; CHECK-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0
121; CHECK-NEXT:    retq
122  %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
123  ret <4 x i32> %res
124}
125
126declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
127
128define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
129; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
130; CHECK:       ## %bb.0:
131; CHECK-NEXT:    kmovd %edi, %k1
132; CHECK-NEXT:    vpermi2d %xmm2, %xmm1, %xmm0 {%k1} {z}
133; CHECK-NEXT:    retq
134  %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
135  ret <4 x i32> %res
136}
137
138define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast(<4 x i32> %x0, <4 x i32> %x1, i32* %x2ptr, i8 %x3) {
139; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128_broadcast:
140; CHECK:       ## %bb.0:
141; CHECK-NEXT:    kmovd %esi, %k1
142; CHECK-NEXT:    vpermi2d (%rdi){1to4}, %xmm1, %xmm0 {%k1} {z}
143; CHECK-NEXT:    retq
144  %x2s = load i32, i32* %x2ptr
145  %x2ins = insertelement <4 x i32> undef, i32 %x2s, i32 0
146  %x2 = shufflevector <4 x i32> %x2ins, <4 x i32> undef, <4 x i32> zeroinitializer
147  %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
148  ret <4 x i32> %res
149}
150
151declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
152
153define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) {
154; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
155; CHECK:       ## %bb.0:
156; CHECK-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0
157; CHECK-NEXT:    retq
158  %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
159  ret <8 x i32> %res
160}
161
162declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
163
164define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
165; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
166; CHECK:       ## %bb.0:
167; CHECK-NEXT:    kmovd %edi, %k1
168; CHECK-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0 {%k1} {z}
169; CHECK-NEXT:    retq
170  %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
171  ret <8 x i32> %res
172}
173
174declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
175
176define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2) {
177; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
178; CHECK:       ## %bb.0:
179; CHECK-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm0
180; CHECK-NEXT:    retq
181  %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
182  ret <2 x double> %res
183}
184
185declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
186
187define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2) {
188; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
189; CHECK:       ## %bb.0:
190; CHECK-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm0
191; CHECK-NEXT:    retq
192  %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
193  ret <4 x double> %res
194}
195
196declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
197
198define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2) {
199; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
200; CHECK:       ## %bb.0:
201; CHECK-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm0
202; CHECK-NEXT:    retq
203  %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
204  ret <4 x float> %res
205}
206
207declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
208
209define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2) {
210; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
211; CHECK:       ## %bb.0:
212; CHECK-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm0
213; CHECK-NEXT:    retq
214  %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
215  ret <8 x float> %res
216}
217
218define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_load(<8 x float> %x0, <8 x i32> %x1, <8 x float>* %x2p) {
219; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_load:
220; CHECK:       ## %bb.0:
221; CHECK-NEXT:    vpermt2ps (%rdi), %ymm1, %ymm0
222; CHECK-NEXT:    retq
223  %x2 = load <8 x float>, <8 x float>* %x2p
224  %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
225  ret <8 x float> %res
226}
227
228define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast(<8 x float> %x0, <8 x i32> %x1, float* %x2ptr) {
229; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256_broadcast:
230; CHECK:       ## %bb.0:
231; CHECK-NEXT:    vpermt2ps (%rdi){1to8}, %ymm1, %ymm0
232; CHECK-NEXT:    retq
233  %x2s = load float, float* %x2ptr
234  %x2ins = insertelement <8 x float> undef, float %x2s, i32 0
235  %x2 = shufflevector <8 x float> %x2ins, <8 x float> undef, <8 x i32> zeroinitializer
236  %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
237  ret <8 x float> %res
238}
239
240declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
241
242define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
243; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
244; CHECK:       ## %bb.0:
245; CHECK-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0
246; CHECK-NEXT:    retq
247  %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
248  ret <16 x i8> %res
249}
250
251declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
252
253define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
254; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
255; CHECK:       ## %bb.0:
256; CHECK-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0
257; CHECK-NEXT:    retq
258  %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
259  ret <32 x i8> %res
260}
261
262declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
263
264define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
265; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
266; CHECK:       ## %bb.0:
267; CHECK-NEXT:    vpermi2b %xmm2, %xmm1, %xmm0
268; CHECK-NEXT:    retq
269  %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
270  ret <16 x i8> %res
271}
272
273define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p) {
274; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128_load:
275; CHECK:       ## %bb.0:
276; CHECK-NEXT:    vpermi2b (%rdi), %xmm1, %xmm0
277; CHECK-NEXT:    retq
278  %x2 = load <16 x i8>, <16 x i8>* %x2p
279  %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
280  ret <16 x i8> %res
281}
282
283declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
284
285define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
286; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
287; CHECK:       ## %bb.0:
288; CHECK-NEXT:    vpermi2b %ymm2, %ymm1, %ymm0
289; CHECK-NEXT:    retq
290  %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
291  ret <32 x i8> %res
292}
293
294declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
295
296define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
297; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
298; CHECK:       ## %bb.0:
299; CHECK-NEXT:    kmovd %edi, %k1
300; CHECK-NEXT:    vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z}
301; CHECK-NEXT:    retq
302  %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
303  ret <16 x i8> %res
304}
305
306define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128_load(<16 x i8> %x0, <16 x i8> %x1, <16 x i8>* %x2p, i16 %x3) {
307; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128_load:
308; CHECK:       ## %bb.0:
309; CHECK-NEXT:    kmovd %esi, %k1
310; CHECK-NEXT:    vpermi2b (%rdi), %xmm1, %xmm0 {%k1} {z}
311; CHECK-NEXT:    retq
312  %x2 = load <16 x i8>, <16 x i8>* %x2p
313  %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
314  ret <16 x i8> %res
315}
316
317declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
318
319define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
320; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
321; CHECK:       ## %bb.0:
322; CHECK-NEXT:    kmovd %edi, %k1
323; CHECK-NEXT:    vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z}
324; CHECK-NEXT:    retq
325  %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
326  ret <32 x i8> %res
327}
328
329define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256_load(<32 x i8> %x0, <32 x i8> %x1, <32 x i8>* %x2p, i32 %x3) {
330; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256_load:
331; CHECK:       ## %bb.0:
332; CHECK-NEXT:    kmovd %esi, %k1
333; CHECK-NEXT:    vpermi2b (%rdi), %ymm1, %ymm0 {%k1} {z}
334; CHECK-NEXT:    retq
335  %x2 = load <32 x i8>, <32 x i8>* %x2p
336  %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
337  ret <32 x i8> %res
338}
339