1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,NOBW,NOVBMI,AVX512F
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX512,NOVBMI,AVX512BW
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vbmi | FileCheck %s --check-prefixes=AVX512,AVX512BW,VBMI
5
6define <8 x i64> @var_shuffle_v8i64(<8 x i64> %v, <8 x i64> %indices) nounwind {
7; AVX512-LABEL: var_shuffle_v8i64:
8; AVX512:       # %bb.0:
9; AVX512-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
10; AVX512-NEXT:    retq
11  %index0 = extractelement <8 x i64> %indices, i32 0
12  %index1 = extractelement <8 x i64> %indices, i32 1
13  %index2 = extractelement <8 x i64> %indices, i32 2
14  %index3 = extractelement <8 x i64> %indices, i32 3
15  %index4 = extractelement <8 x i64> %indices, i32 4
16  %index5 = extractelement <8 x i64> %indices, i32 5
17  %index6 = extractelement <8 x i64> %indices, i32 6
18  %index7 = extractelement <8 x i64> %indices, i32 7
19  %v0 = extractelement <8 x i64> %v, i64 %index0
20  %v1 = extractelement <8 x i64> %v, i64 %index1
21  %v2 = extractelement <8 x i64> %v, i64 %index2
22  %v3 = extractelement <8 x i64> %v, i64 %index3
23  %v4 = extractelement <8 x i64> %v, i64 %index4
24  %v5 = extractelement <8 x i64> %v, i64 %index5
25  %v6 = extractelement <8 x i64> %v, i64 %index6
26  %v7 = extractelement <8 x i64> %v, i64 %index7
27  %ret0 = insertelement <8 x i64> undef, i64 %v0, i32 0
28  %ret1 = insertelement <8 x i64> %ret0, i64 %v1, i32 1
29  %ret2 = insertelement <8 x i64> %ret1, i64 %v2, i32 2
30  %ret3 = insertelement <8 x i64> %ret2, i64 %v3, i32 3
31  %ret4 = insertelement <8 x i64> %ret3, i64 %v4, i32 4
32  %ret5 = insertelement <8 x i64> %ret4, i64 %v5, i32 5
33  %ret6 = insertelement <8 x i64> %ret5, i64 %v6, i32 6
34  %ret7 = insertelement <8 x i64> %ret6, i64 %v7, i32 7
35  ret <8 x i64> %ret7
36}
37
38define <16 x i32> @var_shuffle_v16i32(<16 x i32> %v, <16 x i32> %indices) nounwind {
39; AVX512-LABEL: var_shuffle_v16i32:
40; AVX512:       # %bb.0:
41; AVX512-NEXT:    vpermps %zmm0, %zmm1, %zmm0
42; AVX512-NEXT:    retq
43  %index0 = extractelement <16 x i32> %indices, i32 0
44  %index1 = extractelement <16 x i32> %indices, i32 1
45  %index2 = extractelement <16 x i32> %indices, i32 2
46  %index3 = extractelement <16 x i32> %indices, i32 3
47  %index4 = extractelement <16 x i32> %indices, i32 4
48  %index5 = extractelement <16 x i32> %indices, i32 5
49  %index6 = extractelement <16 x i32> %indices, i32 6
50  %index7 = extractelement <16 x i32> %indices, i32 7
51  %index8 = extractelement <16 x i32> %indices, i32 8
52  %index9 = extractelement <16 x i32> %indices, i32 9
53  %index10 = extractelement <16 x i32> %indices, i32 10
54  %index11 = extractelement <16 x i32> %indices, i32 11
55  %index12 = extractelement <16 x i32> %indices, i32 12
56  %index13 = extractelement <16 x i32> %indices, i32 13
57  %index14 = extractelement <16 x i32> %indices, i32 14
58  %index15 = extractelement <16 x i32> %indices, i32 15
59  %v0 = extractelement <16 x i32> %v, i32 %index0
60  %v1 = extractelement <16 x i32> %v, i32 %index1
61  %v2 = extractelement <16 x i32> %v, i32 %index2
62  %v3 = extractelement <16 x i32> %v, i32 %index3
63  %v4 = extractelement <16 x i32> %v, i32 %index4
64  %v5 = extractelement <16 x i32> %v, i32 %index5
65  %v6 = extractelement <16 x i32> %v, i32 %index6
66  %v7 = extractelement <16 x i32> %v, i32 %index7
67  %v8 = extractelement <16 x i32> %v, i32 %index8
68  %v9 = extractelement <16 x i32> %v, i32 %index9
69  %v10 = extractelement <16 x i32> %v, i32 %index10
70  %v11 = extractelement <16 x i32> %v, i32 %index11
71  %v12 = extractelement <16 x i32> %v, i32 %index12
72  %v13 = extractelement <16 x i32> %v, i32 %index13
73  %v14 = extractelement <16 x i32> %v, i32 %index14
74  %v15 = extractelement <16 x i32> %v, i32 %index15
75  %ret0 = insertelement <16 x i32> undef, i32 %v0, i32 0
76  %ret1 = insertelement <16 x i32> %ret0, i32 %v1, i32 1
77  %ret2 = insertelement <16 x i32> %ret1, i32 %v2, i32 2
78  %ret3 = insertelement <16 x i32> %ret2, i32 %v3, i32 3
79  %ret4 = insertelement <16 x i32> %ret3, i32 %v4, i32 4
80  %ret5 = insertelement <16 x i32> %ret4, i32 %v5, i32 5
81  %ret6 = insertelement <16 x i32> %ret5, i32 %v6, i32 6
82  %ret7 = insertelement <16 x i32> %ret6, i32 %v7, i32 7
83  %ret8 = insertelement <16 x i32> %ret7, i32 %v8, i32 8
84  %ret9 = insertelement <16 x i32> %ret8, i32 %v9, i32 9
85  %ret10 = insertelement <16 x i32> %ret9, i32 %v10, i32 10
86  %ret11 = insertelement <16 x i32> %ret10, i32 %v11, i32 11
87  %ret12 = insertelement <16 x i32> %ret11, i32 %v12, i32 12
88  %ret13 = insertelement <16 x i32> %ret12, i32 %v13, i32 13
89  %ret14 = insertelement <16 x i32> %ret13, i32 %v14, i32 14
90  %ret15 = insertelement <16 x i32> %ret14, i32 %v15, i32 15
91  ret <16 x i32> %ret15
92}
93
94define <32 x i16> @var_shuffle_v32i16(<32 x i16> %v, <32 x i16> %indices) nounwind {
95; NOBW-LABEL: var_shuffle_v32i16:
96; NOBW:       # %bb.0:
97; NOBW-NEXT:    pushq %rbp
98; NOBW-NEXT:    movq %rsp, %rbp
99; NOBW-NEXT:    andq $-64, %rsp
100; NOBW-NEXT:    subq $2112, %rsp # imm = 0x840
101; NOBW-NEXT:    vextracti128 $1, %ymm2, %xmm4
102; NOBW-NEXT:    vmovd %xmm4, %eax
103; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
104; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
105; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
106; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
107; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
108; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
109; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
110; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
111; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
112; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
113; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
114; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
115; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
116; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
117; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
118; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
119; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
120; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
121; NOBW-NEXT:    andl $31, %eax
122; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
123; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
124; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
125; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
126; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
127; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
128; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
129; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
130; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
131; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
132; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
133; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
134; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
135; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
136; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
137; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
138; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
139; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
140; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
141; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
142; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
143; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
144; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
145; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
146; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
147; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
148; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
149; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
150; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
151; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
152; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
153; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
154; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
155; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
156; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
157; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
158; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
159; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
160; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
161; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
162; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
163; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
164; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
165; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
166; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
167; NOBW-NEXT:    vmovaps %ymm0, (%rsp)
168; NOBW-NEXT:    movzwl 1472(%rsp,%rax,2), %eax
169; NOBW-NEXT:    vmovd %eax, %xmm0
170; NOBW-NEXT:    vpextrw $1, %xmm4, %eax
171; NOBW-NEXT:    andl $31, %eax
172; NOBW-NEXT:    vpinsrw $1, 1408(%rsp,%rax,2), %xmm0, %xmm0
173; NOBW-NEXT:    vpextrw $2, %xmm4, %eax
174; NOBW-NEXT:    andl $31, %eax
175; NOBW-NEXT:    vpinsrw $2, 1344(%rsp,%rax,2), %xmm0, %xmm0
176; NOBW-NEXT:    vpextrw $3, %xmm4, %eax
177; NOBW-NEXT:    andl $31, %eax
178; NOBW-NEXT:    vpinsrw $3, 1280(%rsp,%rax,2), %xmm0, %xmm0
179; NOBW-NEXT:    vpextrw $4, %xmm4, %eax
180; NOBW-NEXT:    andl $31, %eax
181; NOBW-NEXT:    vpinsrw $4, 1216(%rsp,%rax,2), %xmm0, %xmm0
182; NOBW-NEXT:    vpextrw $5, %xmm4, %eax
183; NOBW-NEXT:    andl $31, %eax
184; NOBW-NEXT:    vpinsrw $5, 1152(%rsp,%rax,2), %xmm0, %xmm0
185; NOBW-NEXT:    vpextrw $6, %xmm4, %eax
186; NOBW-NEXT:    andl $31, %eax
187; NOBW-NEXT:    vpinsrw $6, 1088(%rsp,%rax,2), %xmm0, %xmm0
188; NOBW-NEXT:    vpextrw $7, %xmm4, %eax
189; NOBW-NEXT:    andl $31, %eax
190; NOBW-NEXT:    vpinsrw $7, 1024(%rsp,%rax,2), %xmm0, %xmm0
191; NOBW-NEXT:    vmovd %xmm2, %eax
192; NOBW-NEXT:    andl $31, %eax
193; NOBW-NEXT:    movzwl 1984(%rsp,%rax,2), %eax
194; NOBW-NEXT:    vmovd %eax, %xmm1
195; NOBW-NEXT:    vpextrw $1, %xmm2, %eax
196; NOBW-NEXT:    andl $31, %eax
197; NOBW-NEXT:    vpinsrw $1, 1920(%rsp,%rax,2), %xmm1, %xmm1
198; NOBW-NEXT:    vpextrw $2, %xmm2, %eax
199; NOBW-NEXT:    andl $31, %eax
200; NOBW-NEXT:    vpinsrw $2, 1856(%rsp,%rax,2), %xmm1, %xmm1
201; NOBW-NEXT:    vpextrw $3, %xmm2, %eax
202; NOBW-NEXT:    andl $31, %eax
203; NOBW-NEXT:    vpinsrw $3, 1792(%rsp,%rax,2), %xmm1, %xmm1
204; NOBW-NEXT:    vpextrw $4, %xmm2, %eax
205; NOBW-NEXT:    andl $31, %eax
206; NOBW-NEXT:    vpinsrw $4, 1728(%rsp,%rax,2), %xmm1, %xmm1
207; NOBW-NEXT:    vpextrw $5, %xmm2, %eax
208; NOBW-NEXT:    andl $31, %eax
209; NOBW-NEXT:    vpinsrw $5, 1664(%rsp,%rax,2), %xmm1, %xmm1
210; NOBW-NEXT:    vpextrw $6, %xmm2, %eax
211; NOBW-NEXT:    andl $31, %eax
212; NOBW-NEXT:    vpinsrw $6, 1600(%rsp,%rax,2), %xmm1, %xmm1
213; NOBW-NEXT:    vpextrw $7, %xmm2, %eax
214; NOBW-NEXT:    vextracti128 $1, %ymm3, %xmm2
215; NOBW-NEXT:    andl $31, %eax
216; NOBW-NEXT:    vpinsrw $7, 1536(%rsp,%rax,2), %xmm1, %xmm1
217; NOBW-NEXT:    vmovd %xmm2, %eax
218; NOBW-NEXT:    andl $31, %eax
219; NOBW-NEXT:    movzwl 448(%rsp,%rax,2), %eax
220; NOBW-NEXT:    vmovd %eax, %xmm4
221; NOBW-NEXT:    vpextrw $1, %xmm2, %eax
222; NOBW-NEXT:    andl $31, %eax
223; NOBW-NEXT:    vpinsrw $1, 384(%rsp,%rax,2), %xmm4, %xmm4
224; NOBW-NEXT:    vpextrw $2, %xmm2, %eax
225; NOBW-NEXT:    andl $31, %eax
226; NOBW-NEXT:    vpinsrw $2, 320(%rsp,%rax,2), %xmm4, %xmm4
227; NOBW-NEXT:    vpextrw $3, %xmm2, %eax
228; NOBW-NEXT:    andl $31, %eax
229; NOBW-NEXT:    vpinsrw $3, 256(%rsp,%rax,2), %xmm4, %xmm4
230; NOBW-NEXT:    vpextrw $4, %xmm2, %eax
231; NOBW-NEXT:    andl $31, %eax
232; NOBW-NEXT:    vpinsrw $4, 192(%rsp,%rax,2), %xmm4, %xmm4
233; NOBW-NEXT:    vpextrw $5, %xmm2, %eax
234; NOBW-NEXT:    andl $31, %eax
235; NOBW-NEXT:    vpinsrw $5, 128(%rsp,%rax,2), %xmm4, %xmm4
236; NOBW-NEXT:    vpextrw $6, %xmm2, %eax
237; NOBW-NEXT:    andl $31, %eax
238; NOBW-NEXT:    vpinsrw $6, 64(%rsp,%rax,2), %xmm4, %xmm4
239; NOBW-NEXT:    vpextrw $7, %xmm2, %eax
240; NOBW-NEXT:    andl $31, %eax
241; NOBW-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm4, %xmm2
242; NOBW-NEXT:    vmovd %xmm3, %eax
243; NOBW-NEXT:    andl $31, %eax
244; NOBW-NEXT:    movzwl 960(%rsp,%rax,2), %eax
245; NOBW-NEXT:    vmovd %eax, %xmm4
246; NOBW-NEXT:    vpextrw $1, %xmm3, %eax
247; NOBW-NEXT:    andl $31, %eax
248; NOBW-NEXT:    vpinsrw $1, 896(%rsp,%rax,2), %xmm4, %xmm4
249; NOBW-NEXT:    vpextrw $2, %xmm3, %eax
250; NOBW-NEXT:    andl $31, %eax
251; NOBW-NEXT:    vpinsrw $2, 832(%rsp,%rax,2), %xmm4, %xmm4
252; NOBW-NEXT:    vpextrw $3, %xmm3, %eax
253; NOBW-NEXT:    andl $31, %eax
254; NOBW-NEXT:    vpinsrw $3, 768(%rsp,%rax,2), %xmm4, %xmm4
255; NOBW-NEXT:    vpextrw $4, %xmm3, %eax
256; NOBW-NEXT:    andl $31, %eax
257; NOBW-NEXT:    vpinsrw $4, 704(%rsp,%rax,2), %xmm4, %xmm4
258; NOBW-NEXT:    vpextrw $5, %xmm3, %eax
259; NOBW-NEXT:    andl $31, %eax
260; NOBW-NEXT:    vpinsrw $5, 640(%rsp,%rax,2), %xmm4, %xmm4
261; NOBW-NEXT:    vpextrw $6, %xmm3, %eax
262; NOBW-NEXT:    andl $31, %eax
263; NOBW-NEXT:    vpinsrw $6, 576(%rsp,%rax,2), %xmm4, %xmm4
264; NOBW-NEXT:    vpextrw $7, %xmm3, %eax
265; NOBW-NEXT:    andl $31, %eax
266; NOBW-NEXT:    vpinsrw $7, 512(%rsp,%rax,2), %xmm4, %xmm3
267; NOBW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
268; NOBW-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm1
269; NOBW-NEXT:    movq %rbp, %rsp
270; NOBW-NEXT:    popq %rbp
271; NOBW-NEXT:    retq
272;
273; AVX512BW-LABEL: var_shuffle_v32i16:
274; AVX512BW:       # %bb.0:
275; AVX512BW-NEXT:    vpermw %zmm0, %zmm1, %zmm0
276; AVX512BW-NEXT:    retq
277  %index0 = extractelement <32 x i16> %indices, i32 0
278  %index1 = extractelement <32 x i16> %indices, i32 1
279  %index2 = extractelement <32 x i16> %indices, i32 2
280  %index3 = extractelement <32 x i16> %indices, i32 3
281  %index4 = extractelement <32 x i16> %indices, i32 4
282  %index5 = extractelement <32 x i16> %indices, i32 5
283  %index6 = extractelement <32 x i16> %indices, i32 6
284  %index7 = extractelement <32 x i16> %indices, i32 7
285  %index8 = extractelement <32 x i16> %indices, i32 8
286  %index9 = extractelement <32 x i16> %indices, i32 9
287  %index10 = extractelement <32 x i16> %indices, i32 10
288  %index11 = extractelement <32 x i16> %indices, i32 11
289  %index12 = extractelement <32 x i16> %indices, i32 12
290  %index13 = extractelement <32 x i16> %indices, i32 13
291  %index14 = extractelement <32 x i16> %indices, i32 14
292  %index15 = extractelement <32 x i16> %indices, i32 15
293  %index16 = extractelement <32 x i16> %indices, i32 16
294  %index17 = extractelement <32 x i16> %indices, i32 17
295  %index18 = extractelement <32 x i16> %indices, i32 18
296  %index19 = extractelement <32 x i16> %indices, i32 19
297  %index20 = extractelement <32 x i16> %indices, i32 20
298  %index21 = extractelement <32 x i16> %indices, i32 21
299  %index22 = extractelement <32 x i16> %indices, i32 22
300  %index23 = extractelement <32 x i16> %indices, i32 23
301  %index24 = extractelement <32 x i16> %indices, i32 24
302  %index25 = extractelement <32 x i16> %indices, i32 25
303  %index26 = extractelement <32 x i16> %indices, i32 26
304  %index27 = extractelement <32 x i16> %indices, i32 27
305  %index28 = extractelement <32 x i16> %indices, i32 28
306  %index29 = extractelement <32 x i16> %indices, i32 29
307  %index30 = extractelement <32 x i16> %indices, i32 30
308  %index31 = extractelement <32 x i16> %indices, i32 31
309  %v0 = extractelement <32 x i16> %v, i16 %index0
310  %v1 = extractelement <32 x i16> %v, i16 %index1
311  %v2 = extractelement <32 x i16> %v, i16 %index2
312  %v3 = extractelement <32 x i16> %v, i16 %index3
313  %v4 = extractelement <32 x i16> %v, i16 %index4
314  %v5 = extractelement <32 x i16> %v, i16 %index5
315  %v6 = extractelement <32 x i16> %v, i16 %index6
316  %v7 = extractelement <32 x i16> %v, i16 %index7
317  %v8 = extractelement <32 x i16> %v, i16 %index8
318  %v9 = extractelement <32 x i16> %v, i16 %index9
319  %v10 = extractelement <32 x i16> %v, i16 %index10
320  %v11 = extractelement <32 x i16> %v, i16 %index11
321  %v12 = extractelement <32 x i16> %v, i16 %index12
322  %v13 = extractelement <32 x i16> %v, i16 %index13
323  %v14 = extractelement <32 x i16> %v, i16 %index14
324  %v15 = extractelement <32 x i16> %v, i16 %index15
325  %v16 = extractelement <32 x i16> %v, i16 %index16
326  %v17 = extractelement <32 x i16> %v, i16 %index17
327  %v18 = extractelement <32 x i16> %v, i16 %index18
328  %v19 = extractelement <32 x i16> %v, i16 %index19
329  %v20 = extractelement <32 x i16> %v, i16 %index20
330  %v21 = extractelement <32 x i16> %v, i16 %index21
331  %v22 = extractelement <32 x i16> %v, i16 %index22
332  %v23 = extractelement <32 x i16> %v, i16 %index23
333  %v24 = extractelement <32 x i16> %v, i16 %index24
334  %v25 = extractelement <32 x i16> %v, i16 %index25
335  %v26 = extractelement <32 x i16> %v, i16 %index26
336  %v27 = extractelement <32 x i16> %v, i16 %index27
337  %v28 = extractelement <32 x i16> %v, i16 %index28
338  %v29 = extractelement <32 x i16> %v, i16 %index29
339  %v30 = extractelement <32 x i16> %v, i16 %index30
340  %v31 = extractelement <32 x i16> %v, i16 %index31
341  %ret0 = insertelement <32 x i16> undef, i16 %v0, i32 0
342  %ret1 = insertelement <32 x i16> %ret0, i16 %v1, i32 1
343  %ret2 = insertelement <32 x i16> %ret1, i16 %v2, i32 2
344  %ret3 = insertelement <32 x i16> %ret2, i16 %v3, i32 3
345  %ret4 = insertelement <32 x i16> %ret3, i16 %v4, i32 4
346  %ret5 = insertelement <32 x i16> %ret4, i16 %v5, i32 5
347  %ret6 = insertelement <32 x i16> %ret5, i16 %v6, i32 6
348  %ret7 = insertelement <32 x i16> %ret6, i16 %v7, i32 7
349  %ret8 = insertelement <32 x i16> %ret7, i16 %v8, i32 8
350  %ret9 = insertelement <32 x i16> %ret8, i16 %v9, i32 9
351  %ret10 = insertelement <32 x i16> %ret9, i16 %v10, i32 10
352  %ret11 = insertelement <32 x i16> %ret10, i16 %v11, i32 11
353  %ret12 = insertelement <32 x i16> %ret11, i16 %v12, i32 12
354  %ret13 = insertelement <32 x i16> %ret12, i16 %v13, i32 13
355  %ret14 = insertelement <32 x i16> %ret13, i16 %v14, i32 14
356  %ret15 = insertelement <32 x i16> %ret14, i16 %v15, i32 15
357  %ret16 = insertelement <32 x i16> %ret15, i16 %v16, i32 16
358  %ret17 = insertelement <32 x i16> %ret16, i16 %v17, i32 17
359  %ret18 = insertelement <32 x i16> %ret17, i16 %v18, i32 18
360  %ret19 = insertelement <32 x i16> %ret18, i16 %v19, i32 19
361  %ret20 = insertelement <32 x i16> %ret19, i16 %v20, i32 20
362  %ret21 = insertelement <32 x i16> %ret20, i16 %v21, i32 21
363  %ret22 = insertelement <32 x i16> %ret21, i16 %v22, i32 22
364  %ret23 = insertelement <32 x i16> %ret22, i16 %v23, i32 23
365  %ret24 = insertelement <32 x i16> %ret23, i16 %v24, i32 24
366  %ret25 = insertelement <32 x i16> %ret24, i16 %v25, i32 25
367  %ret26 = insertelement <32 x i16> %ret25, i16 %v26, i32 26
368  %ret27 = insertelement <32 x i16> %ret26, i16 %v27, i32 27
369  %ret28 = insertelement <32 x i16> %ret27, i16 %v28, i32 28
370  %ret29 = insertelement <32 x i16> %ret28, i16 %v29, i32 29
371  %ret30 = insertelement <32 x i16> %ret29, i16 %v30, i32 30
372  %ret31 = insertelement <32 x i16> %ret30, i16 %v31, i32 31
373  ret <32 x i16> %ret31
374}
375
376define <64 x i8> @var_shuffle_v64i8(<64 x i8> %v, <64 x i8> %indices) nounwind {
377; NOBW-LABEL: var_shuffle_v64i8:
378; NOBW:       # %bb.0:
379; NOBW-NEXT:    pushq %rbp
380; NOBW-NEXT:    movq %rsp, %rbp
381; NOBW-NEXT:    andq $-64, %rsp
382; NOBW-NEXT:    subq $4160, %rsp # imm = 0x1040
383; NOBW-NEXT:    vextracti128 $1, %ymm2, %xmm4
384; NOBW-NEXT:    vpextrb $0, %xmm4, %eax
385; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
386; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
387; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
388; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
389; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
390; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
391; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
392; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
393; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
394; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
395; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
396; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
397; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
398; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
399; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
400; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
401; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
402; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
403; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
404; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
405; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
406; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
407; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
408; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
409; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
410; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
411; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
412; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
413; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
414; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
415; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
416; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
417; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
418; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
419; NOBW-NEXT:    andl $63, %eax
420; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
421; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
422; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
423; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
424; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
425; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
426; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
427; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
428; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
429; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
430; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
431; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
432; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
433; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
434; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
435; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
436; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
437; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
438; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
439; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
440; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
441; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
442; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
443; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
444; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
445; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
446; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
447; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
448; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
449; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
450; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
451; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
452; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
453; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
454; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
455; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
456; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
457; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
458; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
459; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
460; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
461; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
462; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
463; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
464; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
465; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
466; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
467; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
468; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
469; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
470; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
471; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
472; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
473; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
474; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
475; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
476; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
477; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
478; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
479; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
480; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
481; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
482; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
483; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
484; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
485; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
486; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
487; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
488; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
489; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
490; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
491; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
492; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
493; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
494; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
495; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
496; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
497; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
498; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
499; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
500; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
501; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
502; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
503; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
504; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
505; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
506; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
507; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
508; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
509; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
510; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
511; NOBW-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
512; NOBW-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
513; NOBW-NEXT:    vmovaps %ymm0, (%rsp)
514; NOBW-NEXT:    movzbl 3008(%rsp,%rax), %eax
515; NOBW-NEXT:    vmovd %eax, %xmm0
516; NOBW-NEXT:    vpextrb $1, %xmm4, %eax
517; NOBW-NEXT:    andl $63, %eax
518; NOBW-NEXT:    vpinsrb $1, 2944(%rsp,%rax), %xmm0, %xmm0
519; NOBW-NEXT:    vpextrb $2, %xmm4, %eax
520; NOBW-NEXT:    andl $63, %eax
521; NOBW-NEXT:    vpinsrb $2, 2880(%rsp,%rax), %xmm0, %xmm0
522; NOBW-NEXT:    vpextrb $3, %xmm4, %eax
523; NOBW-NEXT:    andl $63, %eax
524; NOBW-NEXT:    vpinsrb $3, 2816(%rsp,%rax), %xmm0, %xmm0
525; NOBW-NEXT:    vpextrb $4, %xmm4, %eax
526; NOBW-NEXT:    andl $63, %eax
527; NOBW-NEXT:    vpinsrb $4, 2752(%rsp,%rax), %xmm0, %xmm0
528; NOBW-NEXT:    vpextrb $5, %xmm4, %eax
529; NOBW-NEXT:    andl $63, %eax
530; NOBW-NEXT:    vpinsrb $5, 2688(%rsp,%rax), %xmm0, %xmm0
531; NOBW-NEXT:    vpextrb $6, %xmm4, %eax
532; NOBW-NEXT:    andl $63, %eax
533; NOBW-NEXT:    vpinsrb $6, 2624(%rsp,%rax), %xmm0, %xmm0
534; NOBW-NEXT:    vpextrb $7, %xmm4, %eax
535; NOBW-NEXT:    andl $63, %eax
536; NOBW-NEXT:    vpinsrb $7, 2560(%rsp,%rax), %xmm0, %xmm0
537; NOBW-NEXT:    vpextrb $8, %xmm4, %eax
538; NOBW-NEXT:    andl $63, %eax
539; NOBW-NEXT:    vpinsrb $8, 2496(%rsp,%rax), %xmm0, %xmm0
540; NOBW-NEXT:    vpextrb $9, %xmm4, %eax
541; NOBW-NEXT:    andl $63, %eax
542; NOBW-NEXT:    vpinsrb $9, 2432(%rsp,%rax), %xmm0, %xmm0
543; NOBW-NEXT:    vpextrb $10, %xmm4, %eax
544; NOBW-NEXT:    andl $63, %eax
545; NOBW-NEXT:    vpinsrb $10, 2368(%rsp,%rax), %xmm0, %xmm0
546; NOBW-NEXT:    vpextrb $11, %xmm4, %eax
547; NOBW-NEXT:    andl $63, %eax
548; NOBW-NEXT:    vpinsrb $11, 2304(%rsp,%rax), %xmm0, %xmm0
549; NOBW-NEXT:    vpextrb $12, %xmm4, %eax
550; NOBW-NEXT:    andl $63, %eax
551; NOBW-NEXT:    vpinsrb $12, 2240(%rsp,%rax), %xmm0, %xmm0
552; NOBW-NEXT:    vpextrb $13, %xmm4, %eax
553; NOBW-NEXT:    andl $63, %eax
554; NOBW-NEXT:    vpinsrb $13, 2176(%rsp,%rax), %xmm0, %xmm0
555; NOBW-NEXT:    vpextrb $14, %xmm4, %eax
556; NOBW-NEXT:    andl $63, %eax
557; NOBW-NEXT:    vpinsrb $14, 2112(%rsp,%rax), %xmm0, %xmm0
558; NOBW-NEXT:    vpextrb $15, %xmm4, %eax
559; NOBW-NEXT:    andl $63, %eax
560; NOBW-NEXT:    vpinsrb $15, 2048(%rsp,%rax), %xmm0, %xmm0
561; NOBW-NEXT:    vpextrb $0, %xmm2, %eax
562; NOBW-NEXT:    andl $63, %eax
563; NOBW-NEXT:    movzbl 4032(%rsp,%rax), %eax
564; NOBW-NEXT:    vmovd %eax, %xmm1
565; NOBW-NEXT:    vpextrb $1, %xmm2, %eax
566; NOBW-NEXT:    andl $63, %eax
567; NOBW-NEXT:    vpinsrb $1, 3968(%rsp,%rax), %xmm1, %xmm1
568; NOBW-NEXT:    vpextrb $2, %xmm2, %eax
569; NOBW-NEXT:    andl $63, %eax
570; NOBW-NEXT:    vpinsrb $2, 3904(%rsp,%rax), %xmm1, %xmm1
571; NOBW-NEXT:    vpextrb $3, %xmm2, %eax
572; NOBW-NEXT:    andl $63, %eax
573; NOBW-NEXT:    vpinsrb $3, 3840(%rsp,%rax), %xmm1, %xmm1
574; NOBW-NEXT:    vpextrb $4, %xmm2, %eax
575; NOBW-NEXT:    andl $63, %eax
576; NOBW-NEXT:    vpinsrb $4, 3776(%rsp,%rax), %xmm1, %xmm1
577; NOBW-NEXT:    vpextrb $5, %xmm2, %eax
578; NOBW-NEXT:    andl $63, %eax
579; NOBW-NEXT:    vpinsrb $5, 3712(%rsp,%rax), %xmm1, %xmm1
580; NOBW-NEXT:    vpextrb $6, %xmm2, %eax
581; NOBW-NEXT:    andl $63, %eax
582; NOBW-NEXT:    vpinsrb $6, 3648(%rsp,%rax), %xmm1, %xmm1
583; NOBW-NEXT:    vpextrb $7, %xmm2, %eax
584; NOBW-NEXT:    andl $63, %eax
585; NOBW-NEXT:    vpinsrb $7, 3584(%rsp,%rax), %xmm1, %xmm1
586; NOBW-NEXT:    vpextrb $8, %xmm2, %eax
587; NOBW-NEXT:    andl $63, %eax
588; NOBW-NEXT:    vpinsrb $8, 3520(%rsp,%rax), %xmm1, %xmm1
589; NOBW-NEXT:    vpextrb $9, %xmm2, %eax
590; NOBW-NEXT:    andl $63, %eax
591; NOBW-NEXT:    vpinsrb $9, 3456(%rsp,%rax), %xmm1, %xmm1
592; NOBW-NEXT:    vpextrb $10, %xmm2, %eax
593; NOBW-NEXT:    andl $63, %eax
594; NOBW-NEXT:    vpinsrb $10, 3392(%rsp,%rax), %xmm1, %xmm1
595; NOBW-NEXT:    vpextrb $11, %xmm2, %eax
596; NOBW-NEXT:    andl $63, %eax
597; NOBW-NEXT:    vpinsrb $11, 3328(%rsp,%rax), %xmm1, %xmm1
598; NOBW-NEXT:    vpextrb $12, %xmm2, %eax
599; NOBW-NEXT:    andl $63, %eax
600; NOBW-NEXT:    vpinsrb $12, 3264(%rsp,%rax), %xmm1, %xmm1
601; NOBW-NEXT:    vpextrb $13, %xmm2, %eax
602; NOBW-NEXT:    andl $63, %eax
603; NOBW-NEXT:    vpinsrb $13, 3200(%rsp,%rax), %xmm1, %xmm1
604; NOBW-NEXT:    vpextrb $14, %xmm2, %eax
605; NOBW-NEXT:    andl $63, %eax
606; NOBW-NEXT:    vpinsrb $14, 3136(%rsp,%rax), %xmm1, %xmm1
607; NOBW-NEXT:    vpextrb $15, %xmm2, %eax
608; NOBW-NEXT:    vextracti128 $1, %ymm3, %xmm2
609; NOBW-NEXT:    andl $63, %eax
610; NOBW-NEXT:    vpinsrb $15, 3072(%rsp,%rax), %xmm1, %xmm1
611; NOBW-NEXT:    vpextrb $0, %xmm2, %eax
612; NOBW-NEXT:    andl $63, %eax
613; NOBW-NEXT:    movzbl 960(%rsp,%rax), %eax
614; NOBW-NEXT:    vmovd %eax, %xmm4
615; NOBW-NEXT:    vpextrb $1, %xmm2, %eax
616; NOBW-NEXT:    andl $63, %eax
617; NOBW-NEXT:    vpinsrb $1, 896(%rsp,%rax), %xmm4, %xmm4
618; NOBW-NEXT:    vpextrb $2, %xmm2, %eax
619; NOBW-NEXT:    andl $63, %eax
620; NOBW-NEXT:    vpinsrb $2, 832(%rsp,%rax), %xmm4, %xmm4
621; NOBW-NEXT:    vpextrb $3, %xmm2, %eax
622; NOBW-NEXT:    andl $63, %eax
623; NOBW-NEXT:    vpinsrb $3, 768(%rsp,%rax), %xmm4, %xmm4
624; NOBW-NEXT:    vpextrb $4, %xmm2, %eax
625; NOBW-NEXT:    andl $63, %eax
626; NOBW-NEXT:    vpinsrb $4, 704(%rsp,%rax), %xmm4, %xmm4
627; NOBW-NEXT:    vpextrb $5, %xmm2, %eax
628; NOBW-NEXT:    andl $63, %eax
629; NOBW-NEXT:    vpinsrb $5, 640(%rsp,%rax), %xmm4, %xmm4
630; NOBW-NEXT:    vpextrb $6, %xmm2, %eax
631; NOBW-NEXT:    andl $63, %eax
632; NOBW-NEXT:    vpinsrb $6, 576(%rsp,%rax), %xmm4, %xmm4
633; NOBW-NEXT:    vpextrb $7, %xmm2, %eax
634; NOBW-NEXT:    andl $63, %eax
635; NOBW-NEXT:    vpinsrb $7, 512(%rsp,%rax), %xmm4, %xmm4
636; NOBW-NEXT:    vpextrb $8, %xmm2, %eax
637; NOBW-NEXT:    andl $63, %eax
638; NOBW-NEXT:    vpinsrb $8, 448(%rsp,%rax), %xmm4, %xmm4
639; NOBW-NEXT:    vpextrb $9, %xmm2, %eax
640; NOBW-NEXT:    andl $63, %eax
641; NOBW-NEXT:    vpinsrb $9, 384(%rsp,%rax), %xmm4, %xmm4
642; NOBW-NEXT:    vpextrb $10, %xmm2, %eax
643; NOBW-NEXT:    andl $63, %eax
644; NOBW-NEXT:    vpinsrb $10, 320(%rsp,%rax), %xmm4, %xmm4
645; NOBW-NEXT:    vpextrb $11, %xmm2, %eax
646; NOBW-NEXT:    andl $63, %eax
647; NOBW-NEXT:    vpinsrb $11, 256(%rsp,%rax), %xmm4, %xmm4
648; NOBW-NEXT:    vpextrb $12, %xmm2, %eax
649; NOBW-NEXT:    andl $63, %eax
650; NOBW-NEXT:    vpinsrb $12, 192(%rsp,%rax), %xmm4, %xmm4
651; NOBW-NEXT:    vpextrb $13, %xmm2, %eax
652; NOBW-NEXT:    andl $63, %eax
653; NOBW-NEXT:    vpinsrb $13, 128(%rsp,%rax), %xmm4, %xmm4
654; NOBW-NEXT:    vpextrb $14, %xmm2, %eax
655; NOBW-NEXT:    andl $63, %eax
656; NOBW-NEXT:    vpinsrb $14, 64(%rsp,%rax), %xmm4, %xmm4
657; NOBW-NEXT:    vpextrb $15, %xmm2, %eax
658; NOBW-NEXT:    andl $63, %eax
659; NOBW-NEXT:    vpinsrb $15, (%rsp,%rax), %xmm4, %xmm2
660; NOBW-NEXT:    vpextrb $0, %xmm3, %eax
661; NOBW-NEXT:    andl $63, %eax
662; NOBW-NEXT:    movzbl 1984(%rsp,%rax), %eax
663; NOBW-NEXT:    vmovd %eax, %xmm4
664; NOBW-NEXT:    vpextrb $1, %xmm3, %eax
665; NOBW-NEXT:    andl $63, %eax
666; NOBW-NEXT:    vpinsrb $1, 1920(%rsp,%rax), %xmm4, %xmm4
667; NOBW-NEXT:    vpextrb $2, %xmm3, %eax
668; NOBW-NEXT:    andl $63, %eax
669; NOBW-NEXT:    vpinsrb $2, 1856(%rsp,%rax), %xmm4, %xmm4
670; NOBW-NEXT:    vpextrb $3, %xmm3, %eax
671; NOBW-NEXT:    andl $63, %eax
672; NOBW-NEXT:    vpinsrb $3, 1792(%rsp,%rax), %xmm4, %xmm4
673; NOBW-NEXT:    vpextrb $4, %xmm3, %eax
674; NOBW-NEXT:    andl $63, %eax
675; NOBW-NEXT:    vpinsrb $4, 1728(%rsp,%rax), %xmm4, %xmm4
676; NOBW-NEXT:    vpextrb $5, %xmm3, %eax
677; NOBW-NEXT:    andl $63, %eax
678; NOBW-NEXT:    vpinsrb $5, 1664(%rsp,%rax), %xmm4, %xmm4
679; NOBW-NEXT:    vpextrb $6, %xmm3, %eax
680; NOBW-NEXT:    andl $63, %eax
681; NOBW-NEXT:    vpinsrb $6, 1600(%rsp,%rax), %xmm4, %xmm4
682; NOBW-NEXT:    vpextrb $7, %xmm3, %eax
683; NOBW-NEXT:    andl $63, %eax
684; NOBW-NEXT:    vpinsrb $7, 1536(%rsp,%rax), %xmm4, %xmm4
685; NOBW-NEXT:    vpextrb $8, %xmm3, %eax
686; NOBW-NEXT:    andl $63, %eax
687; NOBW-NEXT:    vpinsrb $8, 1472(%rsp,%rax), %xmm4, %xmm4
688; NOBW-NEXT:    vpextrb $9, %xmm3, %eax
689; NOBW-NEXT:    andl $63, %eax
690; NOBW-NEXT:    vpinsrb $9, 1408(%rsp,%rax), %xmm4, %xmm4
691; NOBW-NEXT:    vpextrb $10, %xmm3, %eax
692; NOBW-NEXT:    andl $63, %eax
693; NOBW-NEXT:    vpinsrb $10, 1344(%rsp,%rax), %xmm4, %xmm4
694; NOBW-NEXT:    vpextrb $11, %xmm3, %eax
695; NOBW-NEXT:    andl $63, %eax
696; NOBW-NEXT:    vpinsrb $11, 1280(%rsp,%rax), %xmm4, %xmm4
697; NOBW-NEXT:    vpextrb $12, %xmm3, %eax
698; NOBW-NEXT:    andl $63, %eax
699; NOBW-NEXT:    vpinsrb $12, 1216(%rsp,%rax), %xmm4, %xmm4
700; NOBW-NEXT:    vpextrb $13, %xmm3, %eax
701; NOBW-NEXT:    andl $63, %eax
702; NOBW-NEXT:    vpinsrb $13, 1152(%rsp,%rax), %xmm4, %xmm4
703; NOBW-NEXT:    vpextrb $14, %xmm3, %eax
704; NOBW-NEXT:    andl $63, %eax
705; NOBW-NEXT:    vpinsrb $14, 1088(%rsp,%rax), %xmm4, %xmm4
706; NOBW-NEXT:    vpextrb $15, %xmm3, %eax
707; NOBW-NEXT:    andl $63, %eax
708; NOBW-NEXT:    vpinsrb $15, 1024(%rsp,%rax), %xmm4, %xmm3
709; NOBW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
710; NOBW-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm1
711; NOBW-NEXT:    movq %rbp, %rsp
712; NOBW-NEXT:    popq %rbp
713; NOBW-NEXT:    retq
714;
715; VBMI-LABEL: var_shuffle_v64i8:
716; VBMI:       # %bb.0:
717; VBMI-NEXT:    vpermb %zmm0, %zmm1, %zmm0
718; VBMI-NEXT:    retq
719  %index0 = extractelement <64 x i8> %indices, i32 0
720  %index1 = extractelement <64 x i8> %indices, i32 1
721  %index2 = extractelement <64 x i8> %indices, i32 2
722  %index3 = extractelement <64 x i8> %indices, i32 3
723  %index4 = extractelement <64 x i8> %indices, i32 4
724  %index5 = extractelement <64 x i8> %indices, i32 5
725  %index6 = extractelement <64 x i8> %indices, i32 6
726  %index7 = extractelement <64 x i8> %indices, i32 7
727  %index8 = extractelement <64 x i8> %indices, i32 8
728  %index9 = extractelement <64 x i8> %indices, i32 9
729  %index10 = extractelement <64 x i8> %indices, i32 10
730  %index11 = extractelement <64 x i8> %indices, i32 11
731  %index12 = extractelement <64 x i8> %indices, i32 12
732  %index13 = extractelement <64 x i8> %indices, i32 13
733  %index14 = extractelement <64 x i8> %indices, i32 14
734  %index15 = extractelement <64 x i8> %indices, i32 15
735  %index16 = extractelement <64 x i8> %indices, i32 16
736  %index17 = extractelement <64 x i8> %indices, i32 17
737  %index18 = extractelement <64 x i8> %indices, i32 18
738  %index19 = extractelement <64 x i8> %indices, i32 19
739  %index20 = extractelement <64 x i8> %indices, i32 20
740  %index21 = extractelement <64 x i8> %indices, i32 21
741  %index22 = extractelement <64 x i8> %indices, i32 22
742  %index23 = extractelement <64 x i8> %indices, i32 23
743  %index24 = extractelement <64 x i8> %indices, i32 24
744  %index25 = extractelement <64 x i8> %indices, i32 25
745  %index26 = extractelement <64 x i8> %indices, i32 26
746  %index27 = extractelement <64 x i8> %indices, i32 27
747  %index28 = extractelement <64 x i8> %indices, i32 28
748  %index29 = extractelement <64 x i8> %indices, i32 29
749  %index30 = extractelement <64 x i8> %indices, i32 30
750  %index31 = extractelement <64 x i8> %indices, i32 31
751  %index32 = extractelement <64 x i8> %indices, i32 32
752  %index33 = extractelement <64 x i8> %indices, i32 33
753  %index34 = extractelement <64 x i8> %indices, i32 34
754  %index35 = extractelement <64 x i8> %indices, i32 35
755  %index36 = extractelement <64 x i8> %indices, i32 36
756  %index37 = extractelement <64 x i8> %indices, i32 37
757  %index38 = extractelement <64 x i8> %indices, i32 38
758  %index39 = extractelement <64 x i8> %indices, i32 39
759  %index40 = extractelement <64 x i8> %indices, i32 40
760  %index41 = extractelement <64 x i8> %indices, i32 41
761  %index42 = extractelement <64 x i8> %indices, i32 42
762  %index43 = extractelement <64 x i8> %indices, i32 43
763  %index44 = extractelement <64 x i8> %indices, i32 44
764  %index45 = extractelement <64 x i8> %indices, i32 45
765  %index46 = extractelement <64 x i8> %indices, i32 46
766  %index47 = extractelement <64 x i8> %indices, i32 47
767  %index48 = extractelement <64 x i8> %indices, i32 48
768  %index49 = extractelement <64 x i8> %indices, i32 49
769  %index50 = extractelement <64 x i8> %indices, i32 50
770  %index51 = extractelement <64 x i8> %indices, i32 51
771  %index52 = extractelement <64 x i8> %indices, i32 52
772  %index53 = extractelement <64 x i8> %indices, i32 53
773  %index54 = extractelement <64 x i8> %indices, i32 54
774  %index55 = extractelement <64 x i8> %indices, i32 55
775  %index56 = extractelement <64 x i8> %indices, i32 56
776  %index57 = extractelement <64 x i8> %indices, i32 57
777  %index58 = extractelement <64 x i8> %indices, i32 58
778  %index59 = extractelement <64 x i8> %indices, i32 59
779  %index60 = extractelement <64 x i8> %indices, i32 60
780  %index61 = extractelement <64 x i8> %indices, i32 61
781  %index62 = extractelement <64 x i8> %indices, i32 62
782  %index63 = extractelement <64 x i8> %indices, i32 63
783  %v0 = extractelement <64 x i8> %v, i8 %index0
784  %v1 = extractelement <64 x i8> %v, i8 %index1
785  %v2 = extractelement <64 x i8> %v, i8 %index2
786  %v3 = extractelement <64 x i8> %v, i8 %index3
787  %v4 = extractelement <64 x i8> %v, i8 %index4
788  %v5 = extractelement <64 x i8> %v, i8 %index5
789  %v6 = extractelement <64 x i8> %v, i8 %index6
790  %v7 = extractelement <64 x i8> %v, i8 %index7
791  %v8 = extractelement <64 x i8> %v, i8 %index8
792  %v9 = extractelement <64 x i8> %v, i8 %index9
793  %v10 = extractelement <64 x i8> %v, i8 %index10
794  %v11 = extractelement <64 x i8> %v, i8 %index11
795  %v12 = extractelement <64 x i8> %v, i8 %index12
796  %v13 = extractelement <64 x i8> %v, i8 %index13
797  %v14 = extractelement <64 x i8> %v, i8 %index14
798  %v15 = extractelement <64 x i8> %v, i8 %index15
799  %v16 = extractelement <64 x i8> %v, i8 %index16
800  %v17 = extractelement <64 x i8> %v, i8 %index17
801  %v18 = extractelement <64 x i8> %v, i8 %index18
802  %v19 = extractelement <64 x i8> %v, i8 %index19
803  %v20 = extractelement <64 x i8> %v, i8 %index20
804  %v21 = extractelement <64 x i8> %v, i8 %index21
805  %v22 = extractelement <64 x i8> %v, i8 %index22
806  %v23 = extractelement <64 x i8> %v, i8 %index23
807  %v24 = extractelement <64 x i8> %v, i8 %index24
808  %v25 = extractelement <64 x i8> %v, i8 %index25
809  %v26 = extractelement <64 x i8> %v, i8 %index26
810  %v27 = extractelement <64 x i8> %v, i8 %index27
811  %v28 = extractelement <64 x i8> %v, i8 %index28
812  %v29 = extractelement <64 x i8> %v, i8 %index29
813  %v30 = extractelement <64 x i8> %v, i8 %index30
814  %v31 = extractelement <64 x i8> %v, i8 %index31
815  %v32 = extractelement <64 x i8> %v, i8 %index32
816  %v33 = extractelement <64 x i8> %v, i8 %index33
817  %v34 = extractelement <64 x i8> %v, i8 %index34
818  %v35 = extractelement <64 x i8> %v, i8 %index35
819  %v36 = extractelement <64 x i8> %v, i8 %index36
820  %v37 = extractelement <64 x i8> %v, i8 %index37
821  %v38 = extractelement <64 x i8> %v, i8 %index38
822  %v39 = extractelement <64 x i8> %v, i8 %index39
823  %v40 = extractelement <64 x i8> %v, i8 %index40
824  %v41 = extractelement <64 x i8> %v, i8 %index41
825  %v42 = extractelement <64 x i8> %v, i8 %index42
826  %v43 = extractelement <64 x i8> %v, i8 %index43
827  %v44 = extractelement <64 x i8> %v, i8 %index44
828  %v45 = extractelement <64 x i8> %v, i8 %index45
829  %v46 = extractelement <64 x i8> %v, i8 %index46
830  %v47 = extractelement <64 x i8> %v, i8 %index47
831  %v48 = extractelement <64 x i8> %v, i8 %index48
832  %v49 = extractelement <64 x i8> %v, i8 %index49
833  %v50 = extractelement <64 x i8> %v, i8 %index50
834  %v51 = extractelement <64 x i8> %v, i8 %index51
835  %v52 = extractelement <64 x i8> %v, i8 %index52
836  %v53 = extractelement <64 x i8> %v, i8 %index53
837  %v54 = extractelement <64 x i8> %v, i8 %index54
838  %v55 = extractelement <64 x i8> %v, i8 %index55
839  %v56 = extractelement <64 x i8> %v, i8 %index56
840  %v57 = extractelement <64 x i8> %v, i8 %index57
841  %v58 = extractelement <64 x i8> %v, i8 %index58
842  %v59 = extractelement <64 x i8> %v, i8 %index59
843  %v60 = extractelement <64 x i8> %v, i8 %index60
844  %v61 = extractelement <64 x i8> %v, i8 %index61
845  %v62 = extractelement <64 x i8> %v, i8 %index62
846  %v63 = extractelement <64 x i8> %v, i8 %index63
847  %ret0 = insertelement <64 x i8> undef, i8 %v0, i32 0
848  %ret1 = insertelement <64 x i8> %ret0, i8 %v1, i32 1
849  %ret2 = insertelement <64 x i8> %ret1, i8 %v2, i32 2
850  %ret3 = insertelement <64 x i8> %ret2, i8 %v3, i32 3
851  %ret4 = insertelement <64 x i8> %ret3, i8 %v4, i32 4
852  %ret5 = insertelement <64 x i8> %ret4, i8 %v5, i32 5
853  %ret6 = insertelement <64 x i8> %ret5, i8 %v6, i32 6
854  %ret7 = insertelement <64 x i8> %ret6, i8 %v7, i32 7
855  %ret8 = insertelement <64 x i8> %ret7, i8 %v8, i32 8
856  %ret9 = insertelement <64 x i8> %ret8, i8 %v9, i32 9
857  %ret10 = insertelement <64 x i8> %ret9, i8 %v10, i32 10
858  %ret11 = insertelement <64 x i8> %ret10, i8 %v11, i32 11
859  %ret12 = insertelement <64 x i8> %ret11, i8 %v12, i32 12
860  %ret13 = insertelement <64 x i8> %ret12, i8 %v13, i32 13
861  %ret14 = insertelement <64 x i8> %ret13, i8 %v14, i32 14
862  %ret15 = insertelement <64 x i8> %ret14, i8 %v15, i32 15
863  %ret16 = insertelement <64 x i8> %ret15, i8 %v16, i32 16
864  %ret17 = insertelement <64 x i8> %ret16, i8 %v17, i32 17
865  %ret18 = insertelement <64 x i8> %ret17, i8 %v18, i32 18
866  %ret19 = insertelement <64 x i8> %ret18, i8 %v19, i32 19
867  %ret20 = insertelement <64 x i8> %ret19, i8 %v20, i32 20
868  %ret21 = insertelement <64 x i8> %ret20, i8 %v21, i32 21
869  %ret22 = insertelement <64 x i8> %ret21, i8 %v22, i32 22
870  %ret23 = insertelement <64 x i8> %ret22, i8 %v23, i32 23
871  %ret24 = insertelement <64 x i8> %ret23, i8 %v24, i32 24
872  %ret25 = insertelement <64 x i8> %ret24, i8 %v25, i32 25
873  %ret26 = insertelement <64 x i8> %ret25, i8 %v26, i32 26
874  %ret27 = insertelement <64 x i8> %ret26, i8 %v27, i32 27
875  %ret28 = insertelement <64 x i8> %ret27, i8 %v28, i32 28
876  %ret29 = insertelement <64 x i8> %ret28, i8 %v29, i32 29
877  %ret30 = insertelement <64 x i8> %ret29, i8 %v30, i32 30
878  %ret31 = insertelement <64 x i8> %ret30, i8 %v31, i32 31
879  %ret32 = insertelement <64 x i8> %ret31, i8 %v32, i32 32
880  %ret33 = insertelement <64 x i8> %ret32, i8 %v33, i32 33
881  %ret34 = insertelement <64 x i8> %ret33, i8 %v34, i32 34
882  %ret35 = insertelement <64 x i8> %ret34, i8 %v35, i32 35
883  %ret36 = insertelement <64 x i8> %ret35, i8 %v36, i32 36
884  %ret37 = insertelement <64 x i8> %ret36, i8 %v37, i32 37
885  %ret38 = insertelement <64 x i8> %ret37, i8 %v38, i32 38
886  %ret39 = insertelement <64 x i8> %ret38, i8 %v39, i32 39
887  %ret40 = insertelement <64 x i8> %ret39, i8 %v40, i32 40
888  %ret41 = insertelement <64 x i8> %ret40, i8 %v41, i32 41
889  %ret42 = insertelement <64 x i8> %ret41, i8 %v42, i32 42
890  %ret43 = insertelement <64 x i8> %ret42, i8 %v43, i32 43
891  %ret44 = insertelement <64 x i8> %ret43, i8 %v44, i32 44
892  %ret45 = insertelement <64 x i8> %ret44, i8 %v45, i32 45
893  %ret46 = insertelement <64 x i8> %ret45, i8 %v46, i32 46
894  %ret47 = insertelement <64 x i8> %ret46, i8 %v47, i32 47
895  %ret48 = insertelement <64 x i8> %ret47, i8 %v48, i32 48
896  %ret49 = insertelement <64 x i8> %ret48, i8 %v49, i32 49
897  %ret50 = insertelement <64 x i8> %ret49, i8 %v50, i32 50
898  %ret51 = insertelement <64 x i8> %ret50, i8 %v51, i32 51
899  %ret52 = insertelement <64 x i8> %ret51, i8 %v52, i32 52
900  %ret53 = insertelement <64 x i8> %ret52, i8 %v53, i32 53
901  %ret54 = insertelement <64 x i8> %ret53, i8 %v54, i32 54
902  %ret55 = insertelement <64 x i8> %ret54, i8 %v55, i32 55
903  %ret56 = insertelement <64 x i8> %ret55, i8 %v56, i32 56
904  %ret57 = insertelement <64 x i8> %ret56, i8 %v57, i32 57
905  %ret58 = insertelement <64 x i8> %ret57, i8 %v58, i32 58
906  %ret59 = insertelement <64 x i8> %ret58, i8 %v59, i32 59
907  %ret60 = insertelement <64 x i8> %ret59, i8 %v60, i32 60
908  %ret61 = insertelement <64 x i8> %ret60, i8 %v61, i32 61
909  %ret62 = insertelement <64 x i8> %ret61, i8 %v62, i32 62
910  %ret63 = insertelement <64 x i8> %ret62, i8 %v63, i32 63
911  ret <64 x i8> %ret63
912}
913
914define <8 x double> @var_shuffle_v8f64(<8 x double> %v, <8 x i64> %indices) nounwind {
915; AVX512-LABEL: var_shuffle_v8f64:
916; AVX512:       # %bb.0:
917; AVX512-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
918; AVX512-NEXT:    retq
919  %index0 = extractelement <8 x i64> %indices, i32 0
920  %index1 = extractelement <8 x i64> %indices, i32 1
921  %index2 = extractelement <8 x i64> %indices, i32 2
922  %index3 = extractelement <8 x i64> %indices, i32 3
923  %index4 = extractelement <8 x i64> %indices, i32 4
924  %index5 = extractelement <8 x i64> %indices, i32 5
925  %index6 = extractelement <8 x i64> %indices, i32 6
926  %index7 = extractelement <8 x i64> %indices, i32 7
927  %v0 = extractelement <8 x double> %v, i64 %index0
928  %v1 = extractelement <8 x double> %v, i64 %index1
929  %v2 = extractelement <8 x double> %v, i64 %index2
930  %v3 = extractelement <8 x double> %v, i64 %index3
931  %v4 = extractelement <8 x double> %v, i64 %index4
932  %v5 = extractelement <8 x double> %v, i64 %index5
933  %v6 = extractelement <8 x double> %v, i64 %index6
934  %v7 = extractelement <8 x double> %v, i64 %index7
935  %ret0 = insertelement <8 x double> undef, double %v0, i32 0
936  %ret1 = insertelement <8 x double> %ret0, double %v1, i32 1
937  %ret2 = insertelement <8 x double> %ret1, double %v2, i32 2
938  %ret3 = insertelement <8 x double> %ret2, double %v3, i32 3
939  %ret4 = insertelement <8 x double> %ret3, double %v4, i32 4
940  %ret5 = insertelement <8 x double> %ret4, double %v5, i32 5
941  %ret6 = insertelement <8 x double> %ret5, double %v6, i32 6
942  %ret7 = insertelement <8 x double> %ret6, double %v7, i32 7
943  ret <8 x double> %ret7
944}
945
946define <16 x float> @var_shuffle_v16f32(<16 x float> %v, <16 x i32> %indices) nounwind {
947; AVX512-LABEL: var_shuffle_v16f32:
948; AVX512:       # %bb.0:
949; AVX512-NEXT:    vpermps %zmm0, %zmm1, %zmm0
950; AVX512-NEXT:    retq
951  %index0 = extractelement <16 x i32> %indices, i32 0
952  %index1 = extractelement <16 x i32> %indices, i32 1
953  %index2 = extractelement <16 x i32> %indices, i32 2
954  %index3 = extractelement <16 x i32> %indices, i32 3
955  %index4 = extractelement <16 x i32> %indices, i32 4
956  %index5 = extractelement <16 x i32> %indices, i32 5
957  %index6 = extractelement <16 x i32> %indices, i32 6
958  %index7 = extractelement <16 x i32> %indices, i32 7
959  %index8 = extractelement <16 x i32> %indices, i32 8
960  %index9 = extractelement <16 x i32> %indices, i32 9
961  %index10 = extractelement <16 x i32> %indices, i32 10
962  %index11 = extractelement <16 x i32> %indices, i32 11
963  %index12 = extractelement <16 x i32> %indices, i32 12
964  %index13 = extractelement <16 x i32> %indices, i32 13
965  %index14 = extractelement <16 x i32> %indices, i32 14
966  %index15 = extractelement <16 x i32> %indices, i32 15
967  %v0 = extractelement <16 x float> %v, i32 %index0
968  %v1 = extractelement <16 x float> %v, i32 %index1
969  %v2 = extractelement <16 x float> %v, i32 %index2
970  %v3 = extractelement <16 x float> %v, i32 %index3
971  %v4 = extractelement <16 x float> %v, i32 %index4
972  %v5 = extractelement <16 x float> %v, i32 %index5
973  %v6 = extractelement <16 x float> %v, i32 %index6
974  %v7 = extractelement <16 x float> %v, i32 %index7
975  %v8 = extractelement <16 x float> %v, i32 %index8
976  %v9 = extractelement <16 x float> %v, i32 %index9
977  %v10 = extractelement <16 x float> %v, i32 %index10
978  %v11 = extractelement <16 x float> %v, i32 %index11
979  %v12 = extractelement <16 x float> %v, i32 %index12
980  %v13 = extractelement <16 x float> %v, i32 %index13
981  %v14 = extractelement <16 x float> %v, i32 %index14
982  %v15 = extractelement <16 x float> %v, i32 %index15
983  %ret0 = insertelement <16 x float> undef, float %v0, i32 0
984  %ret1 = insertelement <16 x float> %ret0, float %v1, i32 1
985  %ret2 = insertelement <16 x float> %ret1, float %v2, i32 2
986  %ret3 = insertelement <16 x float> %ret2, float %v3, i32 3
987  %ret4 = insertelement <16 x float> %ret3, float %v4, i32 4
988  %ret5 = insertelement <16 x float> %ret4, float %v5, i32 5
989  %ret6 = insertelement <16 x float> %ret5, float %v6, i32 6
990  %ret7 = insertelement <16 x float> %ret6, float %v7, i32 7
991  %ret8 = insertelement <16 x float> %ret7, float %v8, i32 8
992  %ret9 = insertelement <16 x float> %ret8, float %v9, i32 9
993  %ret10 = insertelement <16 x float> %ret9, float %v10, i32 10
994  %ret11 = insertelement <16 x float> %ret10, float %v11, i32 11
995  %ret12 = insertelement <16 x float> %ret11, float %v12, i32 12
996  %ret13 = insertelement <16 x float> %ret12, float %v13, i32 13
997  %ret14 = insertelement <16 x float> %ret13, float %v14, i32 14
998  %ret15 = insertelement <16 x float> %ret14, float %v15, i32 15
999  ret <16 x float> %ret15
1000}
1001