1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=ALL,AVX512DQ
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512BW
4
5;
6; Variable Shifts
7;
8
9define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
10; ALL-LABEL: var_shift_v8i64:
11; ALL:       # %bb.0:
12; ALL-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0
13; ALL-NEXT:    retq
14  %shift = shl <8 x i64> %a, %b
15  ret <8 x i64> %shift
16}
17
18define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
19; ALL-LABEL: var_shift_v16i32:
20; ALL:       # %bb.0:
21; ALL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
22; ALL-NEXT:    retq
23  %shift = shl <16 x i32> %a, %b
24  ret <16 x i32> %shift
25}
26
27define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
28; AVX512DQ-LABEL: var_shift_v32i16:
29; AVX512DQ:       # %bb.0:
30; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
31; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm3 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
32; AVX512DQ-NEXT:    vpsllvd %zmm2, %zmm3, %zmm2
33; AVX512DQ-NEXT:    vpmovdw %zmm2, %ymm2
34; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
35; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero
36; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
37; AVX512DQ-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
38; AVX512DQ-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
39; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
40; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
41; AVX512DQ-NEXT:    retq
42;
43; AVX512BW-LABEL: var_shift_v32i16:
44; AVX512BW:       # %bb.0:
45; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
46; AVX512BW-NEXT:    retq
47  %shift = shl <32 x i16> %a, %b
48  ret <32 x i16> %shift
49}
50
51define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
52; AVX512DQ-LABEL: var_shift_v64i8:
53; AVX512DQ:       # %bb.0:
54; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
55; AVX512DQ-NEXT:    vpsllw $4, %ymm2, %ymm3
56; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
57; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
58; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm5
59; AVX512DQ-NEXT:    vpsllw $5, %ymm5, %ymm5
60; AVX512DQ-NEXT:    vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
61; AVX512DQ-NEXT:    vpsllw $2, %ymm2, %ymm3
62; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
63; AVX512DQ-NEXT:    vpand %ymm6, %ymm3, %ymm3
64; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm5, %ymm5
65; AVX512DQ-NEXT:    vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
66; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm3
67; AVX512DQ-NEXT:    vpaddb %ymm5, %ymm5, %ymm5
68; AVX512DQ-NEXT:    vpblendvb %ymm5, %ymm3, %ymm2, %ymm2
69; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm3
70; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
71; AVX512DQ-NEXT:    vpsllw $5, %ymm1, %ymm1
72; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
73; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm3
74; AVX512DQ-NEXT:    vpand %ymm6, %ymm3, %ymm3
75; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
76; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
77; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm3
78; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
79; AVX512DQ-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
80; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
81; AVX512DQ-NEXT:    retq
82;
83; AVX512BW-LABEL: var_shift_v64i8:
84; AVX512BW:       # %bb.0:
85; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm2
86; AVX512BW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
87; AVX512BW-NEXT:    vpsllw $5, %zmm1, %zmm1
88; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
89; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
90; AVX512BW-NEXT:    vpsllw $2, %zmm0, %zmm2
91; AVX512BW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
92; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
93; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
94; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
95; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
96; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
97; AVX512BW-NEXT:    vpaddb %zmm0, %zmm0, %zmm0 {%k1}
98; AVX512BW-NEXT:    retq
99  %shift = shl <64 x i8> %a, %b
100  ret <64 x i8> %shift
101}
102
103;
104; Uniform Variable Shifts
105;
106
107define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
108; ALL-LABEL: splatvar_shift_v8i64:
109; ALL:       # %bb.0:
110; ALL-NEXT:    vpsllq %xmm1, %zmm0, %zmm0
111; ALL-NEXT:    retq
112  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
113  %shift = shl <8 x i64> %a, %splat
114  ret <8 x i64> %shift
115}
116
117define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
118; ALL-LABEL: splatvar_shift_v16i32:
119; ALL:       # %bb.0:
120; ALL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
121; ALL-NEXT:    vpslld %xmm1, %zmm0, %zmm0
122; ALL-NEXT:    retq
123  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
124  %shift = shl <16 x i32> %a, %splat
125  ret <16 x i32> %shift
126}
127
128define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
129; AVX512DQ-LABEL: splatvar_shift_v32i16:
130; AVX512DQ:       # %bb.0:
131; AVX512DQ-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
132; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
133; AVX512DQ-NEXT:    vpsllw %xmm1, %ymm2, %ymm2
134; AVX512DQ-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
135; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
136; AVX512DQ-NEXT:    retq
137;
138; AVX512BW-LABEL: splatvar_shift_v32i16:
139; AVX512BW:       # %bb.0:
140; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
141; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
142; AVX512BW-NEXT:    retq
143  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
144  %shift = shl <32 x i16> %a, %splat
145  ret <32 x i16> %shift
146}
147
148define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
149; AVX512DQ-LABEL: splatvar_shift_v64i8:
150; AVX512DQ:       # %bb.0:
151; AVX512DQ-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
152; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
153; AVX512DQ-NEXT:    vpsllw %xmm1, %ymm2, %ymm2
154; AVX512DQ-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
155; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
156; AVX512DQ-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
157; AVX512DQ-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
158; AVX512DQ-NEXT:    vpbroadcastb %xmm1, %ymm1
159; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm1, %zmm1
160; AVX512DQ-NEXT:    vpandq %zmm1, %zmm0, %zmm0
161; AVX512DQ-NEXT:    retq
162;
163; AVX512BW-LABEL: splatvar_shift_v64i8:
164; AVX512BW:       # %bb.0:
165; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
166; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
167; AVX512BW-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2
168; AVX512BW-NEXT:    vpsllw %xmm1, %xmm2, %xmm1
169; AVX512BW-NEXT:    vpbroadcastb %xmm1, %zmm1
170; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
171; AVX512BW-NEXT:    retq
172  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
173  %shift = shl <64 x i8> %a, %splat
174  ret <64 x i8> %shift
175}
176
177;
178; Constant Shifts
179;
180
181define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
182; ALL-LABEL: constant_shift_v8i64:
183; ALL:       # %bb.0:
184; ALL-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
185; ALL-NEXT:    retq
186  %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
187  ret <8 x i64> %shift
188}
189
190define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
191; ALL-LABEL: constant_shift_v16i32:
192; ALL:       # %bb.0:
193; ALL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
194; ALL-NEXT:    retq
195  %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
196  ret <16 x i32> %shift
197}
198
199define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
200; AVX512DQ-LABEL: constant_shift_v32i16:
201; AVX512DQ:       # %bb.0:
202; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
203; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
204; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
205; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
206; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
207; AVX512DQ-NEXT:    retq
208;
209; AVX512BW-LABEL: constant_shift_v32i16:
210; AVX512BW:       # %bb.0:
211; AVX512BW-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
212; AVX512BW-NEXT:    retq
213  %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
214  ret <32 x i16> %shift
215}
216
217define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
218; AVX512DQ-LABEL: constant_shift_v64i8:
219; AVX512DQ:       # %bb.0:
220; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
221; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm2
222; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
223; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
224; AVX512DQ-NEXT:    vbroadcasti128 {{.*#+}} ymm4 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
225; AVX512DQ-NEXT:    # ymm4 = mem[0,1,0,1]
226; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
227; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
228; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
229; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
230; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
231; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
232; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
233; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
234; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
235; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm2
236; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
237; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
238; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm2
239; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
240; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
241; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
242; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
243; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
244; AVX512DQ-NEXT:    retq
245;
246; AVX512BW-LABEL: constant_shift_v64i8:
247; AVX512BW:       # %bb.0:
248; AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm1 = [8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32,8192,24640,41088,57536,49376,32928,16480,32]
249; AVX512BW-NEXT:    # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
250; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
251; AVX512BW-NEXT:    vpsllw $4, %zmm0, %zmm2
252; AVX512BW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
253; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
254; AVX512BW-NEXT:    vpsllw $2, %zmm0, %zmm2
255; AVX512BW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2
256; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
257; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
258; AVX512BW-NEXT:    vmovdqu8 %zmm2, %zmm0 {%k1}
259; AVX512BW-NEXT:    vpaddb %zmm1, %zmm1, %zmm1
260; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
261; AVX512BW-NEXT:    vpaddb %zmm0, %zmm0, %zmm0 {%k1}
262; AVX512BW-NEXT:    retq
263  %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
264  ret <64 x i8> %shift
265}
266
267;
268; Uniform Constant Shifts
269;
270
271define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
272; ALL-LABEL: splatconstant_shift_v8i64:
273; ALL:       # %bb.0:
274; ALL-NEXT:    vpsllq $7, %zmm0, %zmm0
275; ALL-NEXT:    retq
276  %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
277  ret <8 x i64> %shift
278}
279
280define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
281; ALL-LABEL: splatconstant_shift_v16i32:
282; ALL:       # %bb.0:
283; ALL-NEXT:    vpslld $5, %zmm0, %zmm0
284; ALL-NEXT:    retq
285  %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
286  ret <16 x i32> %shift
287}
288
289define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
290; AVX512DQ-LABEL: splatconstant_shift_v32i16:
291; AVX512DQ:       # %bb.0:
292; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm1
293; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
294; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
295; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
296; AVX512DQ-NEXT:    retq
297;
298; AVX512BW-LABEL: splatconstant_shift_v32i16:
299; AVX512BW:       # %bb.0:
300; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
301; AVX512BW-NEXT:    retq
302  %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
303  ret <32 x i16> %shift
304}
305
306define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
307; AVX512DQ-LABEL: splatconstant_shift_v64i8:
308; AVX512DQ:       # %bb.0:
309; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm1
310; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
311; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
312; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
313; AVX512DQ-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
314; AVX512DQ-NEXT:    retq
315;
316; AVX512BW-LABEL: splatconstant_shift_v64i8:
317; AVX512BW:       # %bb.0:
318; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
319; AVX512BW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
320; AVX512BW-NEXT:    retq
321  %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
322  ret <64 x i8> %shift
323}
324