1; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
3
4target triple = "x86_64-unknown-unknown"
5
6define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
7; AVX1-LABEL: shuffle_v4f64_0000:
8; AVX1:       # BB#0:
9; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
10; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
11; AVX1-NEXT:    retq
12;
13; AVX2-LABEL: shuffle_v4f64_0000:
14; AVX2:       # BB#0:
15; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
16; AVX2-NEXT:    retq
17  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
18  ret <4 x double> %shuffle
19}
20
21define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
22; AVX1-LABEL: shuffle_v4f64_0001:
23; AVX1:       # BB#0:
24; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
25; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
26; AVX1-NEXT:    retq
27;
28; AVX2-LABEL: shuffle_v4f64_0001:
29; AVX2:       # BB#0:
30; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
31; AVX2-NEXT:    retq
32  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
33  ret <4 x double> %shuffle
34}
35
36define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
37; AVX1-LABEL: shuffle_v4f64_0020:
38; AVX1:       # BB#0:
39; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
40; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
41; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
42; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
43; AVX1-NEXT:    retq
44;
45; AVX2-LABEL: shuffle_v4f64_0020:
46; AVX2:       # BB#0:
47; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
48; AVX2-NEXT:    retq
49  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
50  ret <4 x double> %shuffle
51}
52
53define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
54; AVX1-LABEL: shuffle_v4f64_0300:
55; AVX1:       # BB#0:
56; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
57; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
58; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
59; AVX1-NEXT:    retq
60;
61; AVX2-LABEL: shuffle_v4f64_0300:
62; AVX2:       # BB#0:
63; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
64; AVX2-NEXT:    retq
65  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
66  ret <4 x double> %shuffle
67}
68
69define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
70; AVX1-LABEL: shuffle_v4f64_1000:
71; AVX1:       # BB#0:
72; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
73; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
74; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
75; AVX1-NEXT:    retq
76;
77; AVX2-LABEL: shuffle_v4f64_1000:
78; AVX2:       # BB#0:
79; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
80; AVX2-NEXT:    retq
81  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
82  ret <4 x double> %shuffle
83}
84
85define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
86; AVX1-LABEL: shuffle_v4f64_2200:
87; AVX1:       # BB#0:
88; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
89; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
90; AVX1-NEXT:    retq
91;
92; AVX2-LABEL: shuffle_v4f64_2200:
93; AVX2:       # BB#0:
94; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
95; AVX2-NEXT:    retq
96  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
97  ret <4 x double> %shuffle
98}
99
100define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
101; AVX1-LABEL: shuffle_v4f64_3330:
102; AVX1:       # BB#0:
103; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
104; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
105; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
106; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
107; AVX1-NEXT:    retq
108;
109; AVX2-LABEL: shuffle_v4f64_3330:
110; AVX2:       # BB#0:
111; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
112; AVX2-NEXT:    retq
113  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
114  ret <4 x double> %shuffle
115}
116
117define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
118; AVX1-LABEL: shuffle_v4f64_3210:
119; AVX1:       # BB#0:
120; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
121; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
122; AVX1-NEXT:    retq
123;
124; AVX2-LABEL: shuffle_v4f64_3210:
125; AVX2:       # BB#0:
126; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
127; AVX2-NEXT:    retq
128  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
129  ret <4 x double> %shuffle
130}
131
132define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
133; ALL-LABEL: shuffle_v4f64_0023:
134; ALL:       # BB#0:
135; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
136; ALL-NEXT:    retq
137  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
138  ret <4 x double> %shuffle
139}
140
141define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
142; ALL-LABEL: shuffle_v4f64_0022:
143; ALL:       # BB#0:
144; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
145; ALL-NEXT:    retq
146  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
147  ret <4 x double> %shuffle
148}
149
150define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
151; ALL-LABEL: shuffle_v4f64_1032:
152; ALL:       # BB#0:
153; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
154; ALL-NEXT:    retq
155  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
156  ret <4 x double> %shuffle
157}
158
159define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
160; ALL-LABEL: shuffle_v4f64_1133:
161; ALL:       # BB#0:
162; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
163; ALL-NEXT:    retq
164  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
165  ret <4 x double> %shuffle
166}
167
168define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
169; ALL-LABEL: shuffle_v4f64_1023:
170; ALL:       # BB#0:
171; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
172; ALL-NEXT:    retq
173  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
174  ret <4 x double> %shuffle
175}
176
177define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
178; ALL-LABEL: shuffle_v4f64_1022:
179; ALL:       # BB#0:
180; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
181; ALL-NEXT:    retq
182  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
183  ret <4 x double> %shuffle
184}
185
186define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
187; AVX1-LABEL: shuffle_v4f64_0423:
188; AVX1:       # BB#0:
189; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
190; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
191; AVX1-NEXT:    retq
192;
193; AVX2-LABEL: shuffle_v4f64_0423:
194; AVX2:       # BB#0:
195; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
196; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
197; AVX2-NEXT:    retq
198  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
199  ret <4 x double> %shuffle
200}
201
202define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
203; ALL-LABEL: shuffle_v4f64_0462:
204; ALL:       # BB#0:
205; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
206; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
207; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
208; ALL-NEXT:    retq
209  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
210  ret <4 x double> %shuffle
211}
212
213define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
214; ALL-LABEL: shuffle_v4f64_0426:
215; ALL:       # BB#0:
216; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
217; ALL-NEXT:    retq
218  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
219  ret <4 x double> %shuffle
220}
221
222define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
223; ALL-LABEL: shuffle_v4f64_1537:
224; ALL:       # BB#0:
225; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
226; ALL-NEXT:    retq
227  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
228  ret <4 x double> %shuffle
229}
230
231define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
232; ALL-LABEL: shuffle_v4f64_4062:
233; ALL:       # BB#0:
234; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
235; ALL-NEXT:    retq
236  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
237  ret <4 x double> %shuffle
238}
239
240define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
241; ALL-LABEL: shuffle_v4f64_5173:
242; ALL:       # BB#0:
243; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
244; ALL-NEXT:    retq
245  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
246  ret <4 x double> %shuffle
247}
248
249define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
250; ALL-LABEL: shuffle_v4f64_5163:
251; ALL:       # BB#0:
252; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
253; ALL-NEXT:    retq
254  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
255  ret <4 x double> %shuffle
256}
257
258define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
259; ALL-LABEL: shuffle_v4f64_0527:
260; ALL:       # BB#0:
261; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
262; ALL-NEXT:    retq
263  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
264  ret <4 x double> %shuffle
265}
266
267define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
268; ALL-LABEL: shuffle_v4f64_4163:
269; ALL:       # BB#0:
270; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
271; ALL-NEXT:    retq
272  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
273  ret <4 x double> %shuffle
274}
275
276define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
277; ALL-LABEL: shuffle_v4f64_0145:
278; ALL:       # BB#0:
279; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
280; ALL-NEXT:    retq
281  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
282  ret <4 x double> %shuffle
283}
284
285define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
286; ALL-LABEL: shuffle_v4f64_4501:
287; ALL:       # BB#0:
288; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
289; ALL-NEXT:    retq
290  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
291  ret <4 x double> %shuffle
292}
293
294define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
295; ALL-LABEL: shuffle_v4f64_0167:
296; ALL:       # BB#0:
297; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
298; ALL-NEXT:    retq
299  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
300  ret <4 x double> %shuffle
301}
302
303define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
304; ALL-LABEL: shuffle_v4f64_1054:
305; ALL:       # BB#0:
306; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
307; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
308; ALL-NEXT:    retq
309  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
310  ret <4 x double> %shuffle
311}
312
313define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
314; ALL-LABEL: shuffle_v4f64_3254:
315; ALL:       # BB#0:
316; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
317; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
318; ALL-NEXT:    retq
319  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
320  ret <4 x double> %shuffle
321}
322
323define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
324; ALL-LABEL: shuffle_v4f64_3276:
325; ALL:       # BB#0:
326; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
327; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
328; ALL-NEXT:    retq
329  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
330  ret <4 x double> %shuffle
331}
332
333define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
334; ALL-LABEL: shuffle_v4f64_1076:
335; ALL:       # BB#0:
336; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
337; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
338; ALL-NEXT:    retq
339  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
340  ret <4 x double> %shuffle
341}
342
343define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
344; AVX1-LABEL: shuffle_v4f64_0415:
345; AVX1:       # BB#0:
346; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
347; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
348; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
349; AVX1-NEXT:    retq
350;
351; AVX2-LABEL: shuffle_v4f64_0415:
352; AVX2:       # BB#0:
353; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
354; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
355; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
356; AVX2-NEXT:    retq
357  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
358  ret <4 x double> %shuffle
359}
360
361define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
362; AVX1-LABEL: shuffle_v4i64_0000:
363; AVX1:       # BB#0:
364; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
365; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
366; AVX1-NEXT:    retq
367;
368; AVX2-LABEL: shuffle_v4i64_0000:
369; AVX2:       # BB#0:
370; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
371; AVX2-NEXT:    retq
372  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
373  ret <4 x i64> %shuffle
374}
375
376define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
377; AVX1-LABEL: shuffle_v4i64_0001:
378; AVX1:       # BB#0:
379; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
380; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
381; AVX1-NEXT:    retq
382;
383; AVX2-LABEL: shuffle_v4i64_0001:
384; AVX2:       # BB#0:
385; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
386; AVX2-NEXT:    retq
387  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
388  ret <4 x i64> %shuffle
389}
390
391define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
392; AVX1-LABEL: shuffle_v4i64_0020:
393; AVX1:       # BB#0:
394; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
395; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
396; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
397; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
398; AVX1-NEXT:    retq
399;
400; AVX2-LABEL: shuffle_v4i64_0020:
401; AVX2:       # BB#0:
402; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
403; AVX2-NEXT:    retq
404  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
405  ret <4 x i64> %shuffle
406}
407
408define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
409; AVX1-LABEL: shuffle_v4i64_0112:
410; AVX1:       # BB#0:
411; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
412; AVX1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
413; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
414; AVX1-NEXT:    retq
415;
416; AVX2-LABEL: shuffle_v4i64_0112:
417; AVX2:       # BB#0:
418; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
419; AVX2-NEXT:    retq
420  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
421  ret <4 x i64> %shuffle
422}
423
424define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
425; AVX1-LABEL: shuffle_v4i64_0300:
426; AVX1:       # BB#0:
427; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
428; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
429; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
430; AVX1-NEXT:    retq
431;
432; AVX2-LABEL: shuffle_v4i64_0300:
433; AVX2:       # BB#0:
434; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
435; AVX2-NEXT:    retq
436  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
437  ret <4 x i64> %shuffle
438}
439
440define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
441; AVX1-LABEL: shuffle_v4i64_1000:
442; AVX1:       # BB#0:
443; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
444; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
445; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
446; AVX1-NEXT:    retq
447;
448; AVX2-LABEL: shuffle_v4i64_1000:
449; AVX2:       # BB#0:
450; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
451; AVX2-NEXT:    retq
452  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
453  ret <4 x i64> %shuffle
454}
455
456define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
457; AVX1-LABEL: shuffle_v4i64_2200:
458; AVX1:       # BB#0:
459; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
460; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
461; AVX1-NEXT:    retq
462;
463; AVX2-LABEL: shuffle_v4i64_2200:
464; AVX2:       # BB#0:
465; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
466; AVX2-NEXT:    retq
467  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
468  ret <4 x i64> %shuffle
469}
470
471define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
472; AVX1-LABEL: shuffle_v4i64_3330:
473; AVX1:       # BB#0:
474; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
475; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
476; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
477; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
478; AVX1-NEXT:    retq
479;
480; AVX2-LABEL: shuffle_v4i64_3330:
481; AVX2:       # BB#0:
482; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
483; AVX2-NEXT:    retq
484  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
485  ret <4 x i64> %shuffle
486}
487
488define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
489; AVX1-LABEL: shuffle_v4i64_3210:
490; AVX1:       # BB#0:
491; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
492; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
493; AVX1-NEXT:    retq
494;
495; AVX2-LABEL: shuffle_v4i64_3210:
496; AVX2:       # BB#0:
497; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
498; AVX2-NEXT:    retq
499  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
500  ret <4 x i64> %shuffle
501}
502
503define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
504; AVX1-LABEL: shuffle_v4i64_0124:
505; AVX1:       # BB#0:
506; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0,0]
507; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
508; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
509; AVX1-NEXT:    retq
510;
511; AVX2-LABEL: shuffle_v4i64_0124:
512; AVX2:       # BB#0:
513; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
514; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
515; AVX2-NEXT:    retq
516  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
517  ret <4 x i64> %shuffle
518}
519
520define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
521; AVX1-LABEL: shuffle_v4i64_0142:
522; AVX1:       # BB#0:
523; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
524; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
525; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
526; AVX1-NEXT:    retq
527;
528; AVX2-LABEL: shuffle_v4i64_0142:
529; AVX2:       # BB#0:
530; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
531; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
532; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
533; AVX2-NEXT:    retq
534  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
535  ret <4 x i64> %shuffle
536}
537
538define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
539; AVX1-LABEL: shuffle_v4i64_0412:
540; AVX1:       # BB#0:
541; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
542; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
543; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
544; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
545; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
546; AVX1-NEXT:    retq
547;
548; AVX2-LABEL: shuffle_v4i64_0412:
549; AVX2:       # BB#0:
550; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
551; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
552; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
553; AVX2-NEXT:    retq
554  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
555  ret <4 x i64> %shuffle
556}
557
558define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
559; AVX1-LABEL: shuffle_v4i64_4012:
560; AVX1:       # BB#0:
561; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
562; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
563; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
564; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
565; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
566; AVX1-NEXT:    retq
567;
568; AVX2-LABEL: shuffle_v4i64_4012:
569; AVX2:       # BB#0:
570; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
571; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
572; AVX2-NEXT:    retq
573  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
574  ret <4 x i64> %shuffle
575}
576
577define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
578; ALL-LABEL: shuffle_v4i64_0145:
579; ALL:       # BB#0:
580; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
581; ALL-NEXT:    retq
582  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
583  ret <4 x i64> %shuffle
584}
585
586define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
587; AVX1-LABEL: shuffle_v4i64_0451:
588; AVX1:       # BB#0:
589; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
590; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
591; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
592; AVX1-NEXT:    retq
593;
594; AVX2-LABEL: shuffle_v4i64_0451:
595; AVX2:       # BB#0:
596; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
597; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
598; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
599; AVX2-NEXT:    retq
600  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
601  ret <4 x i64> %shuffle
602}
603
604define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
605; ALL-LABEL: shuffle_v4i64_4501:
606; ALL:       # BB#0:
607; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
608; ALL-NEXT:    retq
609  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
610  ret <4 x i64> %shuffle
611}
612
613define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
614; AVX1-LABEL: shuffle_v4i64_4015:
615; AVX1:       # BB#0:
616; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
617; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
618; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
619; AVX1-NEXT:    retq
620;
621; AVX2-LABEL: shuffle_v4i64_4015:
622; AVX2:       # BB#0:
623; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
624; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
625; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
626; AVX2-NEXT:    retq
627  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
628  ret <4 x i64> %shuffle
629}
630
631define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
632; AVX1-LABEL: shuffle_v4i64_2u35:
633; AVX1:       # BB#0:
634; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
635; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
636; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
637; AVX1-NEXT:    retq
638;
639; AVX2-LABEL: shuffle_v4i64_2u35:
640; AVX2:       # BB#0:
641; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
642; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
643; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
644; AVX2-NEXT:    retq
645  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
646  ret <4 x i64> %shuffle
647}
648
649define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
650; AVX1-LABEL: shuffle_v4i64_1251:
651; AVX1:       # BB#0:
652; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
653; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
654; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
655; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
656; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
657; AVX1-NEXT:    retq
658;
659; AVX2-LABEL: shuffle_v4i64_1251:
660; AVX2:       # BB#0:
661; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
662; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
663; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
664; AVX2-NEXT:    retq
665  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
666  ret <4 x i64> %shuffle
667}
668
669define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
670; AVX1-LABEL: shuffle_v4i64_1054:
671; AVX1:       # BB#0:
672; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
673; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
674; AVX1-NEXT:    retq
675;
676; AVX2-LABEL: shuffle_v4i64_1054:
677; AVX2:       # BB#0:
678; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
679; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
680; AVX2-NEXT:    retq
681  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
682  ret <4 x i64> %shuffle
683}
684
685define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
686; AVX1-LABEL: shuffle_v4i64_3254:
687; AVX1:       # BB#0:
688; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
689; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
690; AVX1-NEXT:    retq
691;
692; AVX2-LABEL: shuffle_v4i64_3254:
693; AVX2:       # BB#0:
694; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
695; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
696; AVX2-NEXT:    retq
697  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
698  ret <4 x i64> %shuffle
699}
700
701define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
702; AVX1-LABEL: shuffle_v4i64_3276:
703; AVX1:       # BB#0:
704; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
705; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
706; AVX1-NEXT:    retq
707;
708; AVX2-LABEL: shuffle_v4i64_3276:
709; AVX2:       # BB#0:
710; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
711; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
712; AVX2-NEXT:    retq
713  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
714  ret <4 x i64> %shuffle
715}
716
717define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
718; AVX1-LABEL: shuffle_v4i64_1076:
719; AVX1:       # BB#0:
720; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
721; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
722; AVX1-NEXT:    retq
723;
724; AVX2-LABEL: shuffle_v4i64_1076:
725; AVX2:       # BB#0:
726; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
727; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
728; AVX2-NEXT:    retq
729  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
730  ret <4 x i64> %shuffle
731}
732
733define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
734; AVX1-LABEL: shuffle_v4i64_0415:
735; AVX1:       # BB#0:
736; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
737; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
738; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
739; AVX1-NEXT:    retq
740;
741; AVX2-LABEL: shuffle_v4i64_0415:
742; AVX2:       # BB#0:
743; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
744; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
745; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
746; AVX2-NEXT:    retq
747  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
748  ret <4 x i64> %shuffle
749}
750
751define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
752; AVX1-LABEL: stress_test1:
753; AVX1:       # BB#0:
754; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
755; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm0[1,0,3,2]
756; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3]
757; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
758; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
759; AVX1-NEXT:    retq
760;
761; AVX2-LABEL: stress_test1:
762; AVX2:       # BB#0:
763; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm1[3,1,1,0]
764; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,3,1,3]
765; AVX2-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
766; AVX2-NEXT:    retq
767  %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
768  %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
769  %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
770  %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
771
772  ret <4 x i64> %f
773}
774
775define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
776; AVX1-LABEL: insert_reg_and_zero_v4i64:
777; AVX1:       # BB#0:
778; AVX1-NEXT:    vmovq %rdi, %xmm0
779; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
780; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
781; AVX1-NEXT:    retq
782;
783; AVX2-LABEL: insert_reg_and_zero_v4i64:
784; AVX2:       # BB#0:
785; AVX2-NEXT:    vmovq %rdi, %xmm0
786; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
787; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
788; AVX2-NEXT:    retq
789  %v = insertelement <4 x i64> undef, i64 %a, i64 0
790  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
791  ret <4 x i64> %shuffle
792}
793
794define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
795; AVX1-LABEL: insert_mem_and_zero_v4i64:
796; AVX1:       # BB#0:
797; AVX1-NEXT:    vmovq (%rdi), %xmm0
798; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
799; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
800; AVX1-NEXT:    retq
801;
802; AVX2-LABEL: insert_mem_and_zero_v4i64:
803; AVX2:       # BB#0:
804; AVX2-NEXT:    vmovq (%rdi), %xmm0
805; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
806; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
807; AVX2-NEXT:    retq
808  %a = load i64* %ptr
809  %v = insertelement <4 x i64> undef, i64 %a, i64 0
810  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
811  ret <4 x i64> %shuffle
812}
813
814define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
815; ALL-LABEL: insert_reg_and_zero_v4f64:
816; ALL:       # BB#0:
817; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
818; ALL-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
819; ALL-NEXT:    retq
820  %v = insertelement <4 x double> undef, double %a, i32 0
821  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
822  ret <4 x double> %shuffle
823}
824
825define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
826; ALL-LABEL: insert_mem_and_zero_v4f64:
827; ALL:       # BB#0:
828; ALL-NEXT:    vmovsd (%rdi), %xmm0
829; ALL-NEXT:    retq
830  %a = load double* %ptr
831  %v = insertelement <4 x double> undef, double %a, i32 0
832  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
833  ret <4 x double> %shuffle
834}
835
836define <4 x double> @splat_mem_v4f64(double* %ptr) {
837; ALL-LABEL: splat_mem_v4f64:
838; ALL:       # BB#0:
839; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
840; ALL-NEXT:    retq
841  %a = load double* %ptr
842  %v = insertelement <4 x double> undef, double %a, i32 0
843  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
844  ret <4 x double> %shuffle
845}
846
847define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
848; AVX1-LABEL: splat_mem_v4i64:
849; AVX1:       # BB#0:
850; AVX1-NEXT:    vmovddup (%rdi), %xmm0
851; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
852; AVX1-NEXT:    retq
853;
854; AVX2-LABEL: splat_mem_v4i64:
855; AVX2:       # BB#0:
856; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
857; AVX2-NEXT:    retq
858  %a = load i64* %ptr
859  %v = insertelement <4 x i64> undef, i64 %a, i64 0
860  %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
861  ret <4 x i64> %shuffle
862}
863
864define <4 x double> @splat_mem_v4f64_2(double* %p) {
865; ALL-LABEL: splat_mem_v4f64_2:
866; ALL:       # BB#0:
867; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
868; ALL-NEXT:    retq
869  %1 = load double* %p
870  %2 = insertelement <2 x double> undef, double %1, i32 0
871  %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
872  ret <4 x double> %3
873}
874
875define <4 x double> @splat_v4f64(<2 x double> %r) {
876; AVX1-LABEL: splat_v4f64:
877; AVX1:       # BB#0:
878; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
879; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
880; AVX1-NEXT:    retq
881;
882; AVX2-LABEL: splat_v4f64:
883; AVX2:       # BB#0:
884; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
885; AVX2-NEXT:    retq
886  %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
887  ret <4 x double> %1
888}
889