1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
4
5declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
6
7define <16 x i8>@test_int_x86_avx512_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
8; CHECK-LABEL: test_int_x86_avx512_permvar_qi_128:
9; CHECK:       # %bb.0:
10; CHECK-NEXT:    vpermb %xmm0, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x8d,0xc0]
11; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
12  %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
13  ret <16 x i8> %res
14}
15
16define <16 x i8>@test_int_x86_avx512_mask_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
17; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
18; X86:       # %bb.0:
19; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
20; X86-NEXT:    vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
21; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
22; X86-NEXT:    retl # encoding: [0xc3]
23;
24; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_128:
25; X64:       # %bb.0:
26; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
27; X64-NEXT:    vpermb %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x8d,0xd0]
28; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
29; X64-NEXT:    retq # encoding: [0xc3]
30  %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
31  ret <16 x i8> %res
32}
33
34define <16 x i8>@test_int_x86_avx512_maskz_permvar_qi_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x3) {
35; X86-LABEL: test_int_x86_avx512_maskz_permvar_qi_128:
36; X86:       # %bb.0:
37; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
38; X86-NEXT:    vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
39; X86-NEXT:    retl # encoding: [0xc3]
40;
41; X64-LABEL: test_int_x86_avx512_maskz_permvar_qi_128:
42; X64:       # %bb.0:
43; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
44; X64-NEXT:    vpermb %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x8d,0xc0]
45; X64-NEXT:    retq # encoding: [0xc3]
46  %res = call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
47  ret <16 x i8> %res
48}
49
50declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
51
52define <32 x i8>@test_int_x86_avx512_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
53; CHECK-LABEL: test_int_x86_avx512_permvar_qi_256:
54; CHECK:       # %bb.0:
55; CHECK-NEXT:    vpermb %ymm0, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x8d,0xc0]
56; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
57  %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
58  ret <32 x i8> %res
59}
60
61define <32 x i8>@test_int_x86_avx512_mask_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
62; X86-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
63; X86:       # %bb.0:
64; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
65; X86-NEXT:    vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
66; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
67; X86-NEXT:    retl # encoding: [0xc3]
68;
69; X64-LABEL: test_int_x86_avx512_mask_permvar_qi_256:
70; X64:       # %bb.0:
71; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
72; X64-NEXT:    vpermb %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x8d,0xd0]
73; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
74; X64-NEXT:    retq # encoding: [0xc3]
75  %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
76  ret <32 x i8> %res
77}
78
79define <32 x i8>@test_int_x86_avx512_maskz_permvar_qi_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x3) {
80; X86-LABEL: test_int_x86_avx512_maskz_permvar_qi_256:
81; X86:       # %bb.0:
82; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
83; X86-NEXT:    vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
84; X86-NEXT:    retl # encoding: [0xc3]
85;
86; X64-LABEL: test_int_x86_avx512_maskz_permvar_qi_256:
87; X64:       # %bb.0:
88; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
89; X64-NEXT:    vpermb %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x8d,0xc0]
90; X64-NEXT:    retq # encoding: [0xc3]
91  %res = call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
92  ret <32 x i8> %res
93}
94
95declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
96
97define <16 x i8>@test_int_x86_avx512_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
98; CHECK-LABEL: test_int_x86_avx512_pmultishift_qb_128:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1]
101; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
102  %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
103  ret <16 x i8> %res
104}
105
106define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
107; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128:
108; X86:       # %bb.0:
109; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
110; X86-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1]
111; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
112; X86-NEXT:    retl # encoding: [0xc3]
113;
114; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128:
115; X64:       # %bb.0:
116; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
117; X64-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1]
118; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
119; X64-NEXT:    retq # encoding: [0xc3]
120  %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
121  ret <16 x i8> %res
122}
123
124define <16 x i8>@test_int_x86_avx512_maskz_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x3) {
125; X86-LABEL: test_int_x86_avx512_maskz_pmultishift_qb_128:
126; X86:       # %bb.0:
127; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
128; X86-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xc1]
129; X86-NEXT:    retl # encoding: [0xc3]
130;
131; X64-LABEL: test_int_x86_avx512_maskz_pmultishift_qb_128:
132; X64:       # %bb.0:
133; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
134; X64-NEXT:    vpmultishiftqb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x83,0xc1]
135; X64-NEXT:    retq # encoding: [0xc3]
136  %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3)
137  ret <16 x i8> %res
138}
139
140declare <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
141
142define <32 x i8>@test_int_x86_avx512_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
143; CHECK-LABEL: test_int_x86_avx512_pmultishift_qb_256:
144; CHECK:       # %bb.0:
145; CHECK-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1]
146; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
147  %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
148  ret <32 x i8> %res
149}
150
151define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
152; X86-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256:
153; X86:       # %bb.0:
154; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
155; X86-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1]
156; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
157; X86-NEXT:    retl # encoding: [0xc3]
158;
159; X64-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256:
160; X64:       # %bb.0:
161; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
162; X64-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1]
163; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
164; X64-NEXT:    retq # encoding: [0xc3]
165  %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
166  ret <32 x i8> %res
167}
168
169define <32 x i8>@test_int_x86_avx512_maskz_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x3) {
170; X86-LABEL: test_int_x86_avx512_maskz_pmultishift_qb_256:
171; X86:       # %bb.0:
172; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
173; X86-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xc1]
174; X86-NEXT:    retl # encoding: [0xc3]
175;
176; X64-LABEL: test_int_x86_avx512_maskz_pmultishift_qb_256:
177; X64:       # %bb.0:
178; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
179; X64-NEXT:    vpmultishiftqb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xc1]
180; X64-NEXT:    retq # encoding: [0xc3]
181  %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3)
182  ret <32 x i8> %res
183}
184
185declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
186
187define <16 x i8>@test_int_x86_avx512_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
188; CHECK-LABEL: test_int_x86_avx512_vpermi2var_qi_128:
189; CHECK:       # %bb.0:
190; CHECK-NEXT:    vpermt2b %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x7d,0xc2]
191; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
192  %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
193  ret <16 x i8> %res
194}
195
196define <16 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
197; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
198; X86:       # %bb.0:
199; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
200; X86-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
201; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
202; X86-NEXT:    retl # encoding: [0xc3]
203;
204; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_128:
205; X64:       # %bb.0:
206; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
207; X64-NEXT:    vpermi2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x75,0xca]
208; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
209; X64-NEXT:    retq # encoding: [0xc3]
210  %res = call <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
211  ret <16 x i8> %res
212}
213
214declare <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
215
216define <32 x i8>@test_int_x86_avx512_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
217; CHECK-LABEL: test_int_x86_avx512_vpermi2var_qi_256:
218; CHECK:       # %bb.0:
219; CHECK-NEXT:    vpermt2b %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x7d,0xc2]
220; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
221  %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
222  ret <32 x i8> %res
223}
224
225define <32 x i8>@test_int_x86_avx512_mask_vpermi2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
226; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
227; X86:       # %bb.0:
228; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
229; X86-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
230; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
231; X86-NEXT:    retl # encoding: [0xc3]
232;
233; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_qi_256:
234; X64:       # %bb.0:
235; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
236; X64-NEXT:    vpermi2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x75,0xca]
237; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
238; X64-NEXT:    retq # encoding: [0xc3]
239  %res = call <32 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
240  ret <32 x i8> %res
241}
242
243declare <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
244
245define <16 x i8>@test_int_x86_avx512_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2) {
246; CHECK-LABEL: test_int_x86_avx512_vpermt2var_qi_128:
247; CHECK:       # %bb.0:
248; CHECK-NEXT:    vpermi2b %xmm2, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x08,0x75,0xc2]
249; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
250  %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
251  ret <16 x i8> %res
252}
253
254define <16 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
255; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
256; X86:       # %bb.0:
257; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
258; X86-NEXT:    vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
259; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
260; X86-NEXT:    retl # encoding: [0xc3]
261;
262; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_128:
263; X64:       # %bb.0:
264; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
265; X64-NEXT:    vpermt2b %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7d,0xca]
266; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
267; X64-NEXT:    retq # encoding: [0xc3]
268  %res = call <16 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
269  ret <16 x i8> %res
270}
271
272declare <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
273
274define <32 x i8>@test_int_x86_avx512_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2) {
275; CHECK-LABEL: test_int_x86_avx512_vpermt2var_qi_256:
276; CHECK:       # %bb.0:
277; CHECK-NEXT:    vpermi2b %ymm2, %ymm1, %ymm0 # encoding: [0x62,0xf2,0x75,0x28,0x75,0xc2]
278; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
279  %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
280  ret <32 x i8> %res
281}
282
283define <32 x i8>@test_int_x86_avx512_mask_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
284; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
285; X86:       # %bb.0:
286; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
287; X86-NEXT:    vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
288; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
289; X86-NEXT:    retl # encoding: [0xc3]
290;
291; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_qi_256:
292; X64:       # %bb.0:
293; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
294; X64-NEXT:    vpermt2b %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7d,0xca]
295; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
296; X64-NEXT:    retq # encoding: [0xc3]
297  %res = call <32 x i8> @llvm.x86.avx512.mask.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
298  ret <32 x i8> %res
299}
300
301declare <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
302
303define <16 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
304; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
305; X86:       # %bb.0:
306; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
307; X86-NEXT:    vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
308; X86-NEXT:    retl # encoding: [0xc3]
309;
310; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_128:
311; X64:       # %bb.0:
312; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
313; X64-NEXT:    vpermi2b %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x75,0xc2]
314; X64-NEXT:    retq # encoding: [0xc3]
315  %res = call <16 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
316  ret <16 x i8> %res
317}
318
319declare <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
320
321define <32 x i8>@test_int_x86_avx512_maskz_vpermt2var_qi_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
322; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
323; X86:       # %bb.0:
324; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
325; X86-NEXT:    vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
326; X86-NEXT:    retl # encoding: [0xc3]
327;
328; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_qi_256:
329; X64:       # %bb.0:
330; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
331; X64-NEXT:    vpermi2b %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x75,0xc2]
332; X64-NEXT:    retq # encoding: [0xc3]
333  %res = call <32 x i8> @llvm.x86.avx512.maskz.vpermt2var.qi.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
334  ret <32 x i8> %res
335}
336