1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-SLOW
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefixes=AVX12,AVX2,AVX2-FAST
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+fast-variable-shuffle | FileCheck %s --check-prefix=AVX512
8
9;
10; 128-bit vectors
11;
12
13define <2 x i64> @ext_i2_2i64(i2 %a0) {
14; SSE2-SSSE3-LABEL: ext_i2_2i64:
15; SSE2-SSSE3:       # %bb.0:
16; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
17; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,0,1]
18; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
19; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
20; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
21; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
22; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
23; SSE2-SSSE3-NEXT:    retq
24;
25; AVX1-LABEL: ext_i2_2i64:
26; AVX1:       # %bb.0:
27; AVX1-NEXT:    vmovd %edi, %xmm0
28; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
29; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
30; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
31; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
32; AVX1-NEXT:    retq
33;
34; AVX2-LABEL: ext_i2_2i64:
35; AVX2:       # %bb.0:
36; AVX2-NEXT:    vmovd %edi, %xmm0
37; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
38; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2]
39; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
40; AVX2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
41; AVX2-NEXT:    retq
42;
43; AVX512-LABEL: ext_i2_2i64:
44; AVX512:       # %bb.0:
45; AVX512-NEXT:    kmovd %edi, %k1
46; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
47; AVX512-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
48; AVX512-NEXT:    retq
49  %1 = bitcast i2 %a0 to <2 x i1>
50  %2 = sext <2 x i1> %1 to <2 x i64>
51  ret <2 x i64> %2
52}
53
54define <4 x i32> @ext_i4_4i32(i4 %a0) {
55; SSE2-SSSE3-LABEL: ext_i4_4i32:
56; SSE2-SSSE3:       # %bb.0:
57; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
58; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
59; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
60; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
61; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
62; SSE2-SSSE3-NEXT:    retq
63;
64; AVX1-LABEL: ext_i4_4i32:
65; AVX1:       # %bb.0:
66; AVX1-NEXT:    vmovd %edi, %xmm0
67; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
68; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
69; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
70; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
71; AVX1-NEXT:    retq
72;
73; AVX2-LABEL: ext_i4_4i32:
74; AVX2:       # %bb.0:
75; AVX2-NEXT:    vmovd %edi, %xmm0
76; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
77; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8]
78; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
79; AVX2-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
80; AVX2-NEXT:    retq
81;
82; AVX512-LABEL: ext_i4_4i32:
83; AVX512:       # %bb.0:
84; AVX512-NEXT:    kmovd %edi, %k1
85; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
86; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
87; AVX512-NEXT:    retq
88  %1 = bitcast i4 %a0 to <4 x i1>
89  %2 = sext <4 x i1> %1 to <4 x i32>
90  ret <4 x i32> %2
91}
92
93define <8 x i16> @ext_i8_8i16(i8 %a0) {
94; SSE2-SSSE3-LABEL: ext_i8_8i16:
95; SSE2-SSSE3:       # %bb.0:
96; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
97; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
98; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
99; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
100; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
101; SSE2-SSSE3-NEXT:    pcmpeqw %xmm1, %xmm0
102; SSE2-SSSE3-NEXT:    retq
103;
104; AVX1-LABEL: ext_i8_8i16:
105; AVX1:       # %bb.0:
106; AVX1-NEXT:    vmovd %edi, %xmm0
107; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
108; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
109; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
110; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
111; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
112; AVX1-NEXT:    retq
113;
114; AVX2-LABEL: ext_i8_8i16:
115; AVX2:       # %bb.0:
116; AVX2-NEXT:    vmovd %edi, %xmm0
117; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
118; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
119; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
120; AVX2-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
121; AVX2-NEXT:    retq
122;
123; AVX512-LABEL: ext_i8_8i16:
124; AVX512:       # %bb.0:
125; AVX512-NEXT:    kmovd %edi, %k0
126; AVX512-NEXT:    vpmovm2w %k0, %xmm0
127; AVX512-NEXT:    retq
128  %1 = bitcast i8 %a0 to <8 x i1>
129  %2 = sext <8 x i1> %1 to <8 x i16>
130  ret <8 x i16> %2
131}
132
133define <16 x i8> @ext_i16_16i8(i16 %a0) {
134; SSE2-LABEL: ext_i16_16i8:
135; SSE2:       # %bb.0:
136; SSE2-NEXT:    movd %edi, %xmm0
137; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
138; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
139; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
140; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
141; SSE2-NEXT:    pand %xmm1, %xmm0
142; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
143; SSE2-NEXT:    retq
144;
145; SSSE3-LABEL: ext_i16_16i8:
146; SSSE3:       # %bb.0:
147; SSSE3-NEXT:    movd %edi, %xmm0
148; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
149; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
150; SSSE3-NEXT:    pand %xmm1, %xmm0
151; SSSE3-NEXT:    pcmpeqb %xmm1, %xmm0
152; SSSE3-NEXT:    retq
153;
154; AVX1-LABEL: ext_i16_16i8:
155; AVX1:       # %bb.0:
156; AVX1-NEXT:    vmovd %edi, %xmm0
157; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
158; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
159; AVX1-NEXT:    # xmm1 = mem[0,0]
160; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
161; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
162; AVX1-NEXT:    retq
163;
164; AVX2-LABEL: ext_i16_16i8:
165; AVX2:       # %bb.0:
166; AVX2-NEXT:    vmovd %edi, %xmm0
167; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
168; AVX2-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745]
169; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
170; AVX2-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
171; AVX2-NEXT:    retq
172;
173; AVX512-LABEL: ext_i16_16i8:
174; AVX512:       # %bb.0:
175; AVX512-NEXT:    kmovd %edi, %k0
176; AVX512-NEXT:    vpmovm2b %k0, %xmm0
177; AVX512-NEXT:    retq
178  %1 = bitcast i16 %a0 to <16 x i1>
179  %2 = sext <16 x i1> %1 to <16 x i8>
180  ret <16 x i8> %2
181}
182
183;
184; 256-bit vectors
185;
186
187define <4 x i64> @ext_i4_4i64(i4 %a0) {
188; SSE2-SSSE3-LABEL: ext_i4_4i64:
189; SSE2-SSSE3:       # %bb.0:
190; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
191; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
192; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
193; SSE2-SSSE3-NEXT:    movdqa %xmm2, %xmm1
194; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
195; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
196; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
197; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
198; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
199; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
200; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
201; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
202; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
203; SSE2-SSSE3-NEXT:    retq
204;
205; AVX1-LABEL: ext_i4_4i64:
206; AVX1:       # %bb.0:
207; AVX1-NEXT:    vmovd %edi, %xmm0
208; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
209; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
210; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
211; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
212; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
213; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm1, %xmm1
214; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
215; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
216; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
217; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
218; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
219; AVX1-NEXT:    retq
220;
221; AVX2-LABEL: ext_i4_4i64:
222; AVX2:       # %bb.0:
223; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
224; AVX2-NEXT:    vmovq %rdi, %xmm0
225; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
226; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8]
227; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
228; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
229; AVX2-NEXT:    retq
230;
231; AVX512-LABEL: ext_i4_4i64:
232; AVX512:       # %bb.0:
233; AVX512-NEXT:    kmovd %edi, %k1
234; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
235; AVX512-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
236; AVX512-NEXT:    retq
237  %1 = bitcast i4 %a0 to <4 x i1>
238  %2 = sext <4 x i1> %1 to <4 x i64>
239  ret <4 x i64> %2
240}
241
242define <8 x i32> @ext_i8_8i32(i8 %a0) {
243; SSE2-SSSE3-LABEL: ext_i8_8i32:
244; SSE2-SSSE3:       # %bb.0:
245; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
246; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
247; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8]
248; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
249; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
250; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm0
251; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
252; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
253; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
254; SSE2-SSSE3-NEXT:    retq
255;
256; AVX1-LABEL: ext_i8_8i32:
257; AVX1:       # %bb.0:
258; AVX1-NEXT:    vmovd %edi, %xmm0
259; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
260; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
261; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
262; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
263; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
264; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
265; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
266; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
267; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
268; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
269; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
270; AVX1-NEXT:    retq
271;
272; AVX2-LABEL: ext_i8_8i32:
273; AVX2:       # %bb.0:
274; AVX2-NEXT:    vmovd %edi, %xmm0
275; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
276; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
277; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
278; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
279; AVX2-NEXT:    retq
280;
281; AVX512-LABEL: ext_i8_8i32:
282; AVX512:       # %bb.0:
283; AVX512-NEXT:    kmovd %edi, %k1
284; AVX512-NEXT:    vpcmpeqd %ymm0, %ymm0, %ymm0
285; AVX512-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
286; AVX512-NEXT:    retq
287  %1 = bitcast i8 %a0 to <8 x i1>
288  %2 = sext <8 x i1> %1 to <8 x i32>
289  ret <8 x i32> %2
290}
291
292define <16 x i16> @ext_i16_16i16(i16 %a0) {
293; SSE2-SSSE3-LABEL: ext_i16_16i16:
294; SSE2-SSSE3:       # %bb.0:
295; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
296; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
297; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
298; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
299; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
300; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
301; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm0
302; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768]
303; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
304; SSE2-SSSE3-NEXT:    pcmpeqw %xmm2, %xmm1
305; SSE2-SSSE3-NEXT:    retq
306;
307; AVX1-LABEL: ext_i16_16i16:
308; AVX1:       # %bb.0:
309; AVX1-NEXT:    vmovd %edi, %xmm0
310; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
311; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
312; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
313; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
314; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
315; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
316; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
317; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
318; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
319; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
320; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
321; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
322; AVX1-NEXT:    retq
323;
324; AVX2-LABEL: ext_i16_16i16:
325; AVX2:       # %bb.0:
326; AVX2-NEXT:    vmovd %edi, %xmm0
327; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
328; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
329; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
330; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
331; AVX2-NEXT:    retq
332;
333; AVX512-LABEL: ext_i16_16i16:
334; AVX512:       # %bb.0:
335; AVX512-NEXT:    kmovd %edi, %k0
336; AVX512-NEXT:    vpmovm2w %k0, %ymm0
337; AVX512-NEXT:    retq
338  %1 = bitcast i16 %a0 to <16 x i1>
339  %2 = sext <16 x i1> %1 to <16 x i16>
340  ret <16 x i16> %2
341}
342
343define <32 x i8> @ext_i32_32i8(i32 %a0) {
344; SSE2-SSSE3-LABEL: ext_i32_32i8:
345; SSE2-SSSE3:       # %bb.0:
346; SSE2-SSSE3-NEXT:    movd %edi, %xmm1
347; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
348; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
349; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
350; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
351; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm0
352; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm0
353; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7]
354; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
355; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
356; SSE2-SSSE3-NEXT:    pcmpeqb %xmm2, %xmm1
357; SSE2-SSSE3-NEXT:    retq
358;
359; AVX1-LABEL: ext_i32_32i8:
360; AVX1:       # %bb.0:
361; AVX1-NEXT:    vmovd %edi, %xmm0
362; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
363; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7]
364; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7]
365; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
366; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
367; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
368; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
369; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
370; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
371; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
372; AVX1-NEXT:    vpxor %xmm3, %xmm1, %xmm1
373; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
374; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
375; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
376; AVX1-NEXT:    retq
377;
378; AVX2-LABEL: ext_i32_32i8:
379; AVX2:       # %bb.0:
380; AVX2-NEXT:    vmovd %edi, %xmm0
381; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
382; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
383; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
384; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
385; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
386; AVX2-NEXT:    retq
387;
388; AVX512-LABEL: ext_i32_32i8:
389; AVX512:       # %bb.0:
390; AVX512-NEXT:    kmovd %edi, %k0
391; AVX512-NEXT:    vpmovm2b %k0, %ymm0
392; AVX512-NEXT:    retq
393  %1 = bitcast i32 %a0 to <32 x i1>
394  %2 = sext <32 x i1> %1 to <32 x i8>
395  ret <32 x i8> %2
396}
397
398;
399; 512-bit vectors
400;
401
402define <8 x i64> @ext_i8_8i64(i8 %a0) {
403; SSE2-SSSE3-LABEL: ext_i8_8i64:
404; SSE2-SSSE3:       # %bb.0:
405; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
406; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[0,1,0,1]
407; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [1,2]
408; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm1
409; SSE2-SSSE3-NEXT:    pand %xmm0, %xmm1
410; SSE2-SSSE3-NEXT:    pcmpeqd %xmm0, %xmm1
411; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,3,2]
412; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
413; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [4,8]
414; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm2
415; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm2
416; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm2
417; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,0,3,2]
418; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
419; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32]
420; SSE2-SSSE3-NEXT:    movdqa %xmm4, %xmm3
421; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm3
422; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm3
423; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,0,3,2]
424; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm2
425; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [64,128]
426; SSE2-SSSE3-NEXT:    pand %xmm3, %xmm4
427; SSE2-SSSE3-NEXT:    pcmpeqd %xmm3, %xmm4
428; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,0,3,2]
429; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
430; SSE2-SSSE3-NEXT:    retq
431;
432; AVX1-LABEL: ext_i8_8i64:
433; AVX1:       # %bb.0:
434; AVX1-NEXT:    vmovd %edi, %xmm0
435; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
436; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
437; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
438; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
439; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
440; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
441; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
442; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
443; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm0, %xmm0
444; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
445; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
446; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
447; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
448; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm2, %xmm2
449; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
450; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
451; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
452; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
453; AVX1-NEXT:    retq
454;
455; AVX2-LABEL: ext_i8_8i64:
456; AVX2:       # %bb.0:
457; AVX2-NEXT:    vmovd %edi, %xmm0
458; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm1
459; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8]
460; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
461; AVX2-NEXT:    vpcmpeqq %ymm0, %ymm2, %ymm0
462; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,32,64,128]
463; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
464; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
465; AVX2-NEXT:    retq
466;
467; AVX512-LABEL: ext_i8_8i64:
468; AVX512:       # %bb.0:
469; AVX512-NEXT:    kmovd %edi, %k1
470; AVX512-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
471; AVX512-NEXT:    retq
472  %1 = bitcast i8 %a0 to <8 x i1>
473  %2 = sext <8 x i1> %1 to <8 x i64>
474  ret <8 x i64> %2
475}
476
477define <16 x i32> @ext_i16_16i32(i16 %a0) {
478; SSE2-SSSE3-LABEL: ext_i16_16i32:
479; SSE2-SSSE3:       # %bb.0:
480; SSE2-SSSE3-NEXT:    movd %edi, %xmm0
481; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0]
482; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8]
483; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm0
484; SSE2-SSSE3-NEXT:    pand %xmm1, %xmm0
485; SSE2-SSSE3-NEXT:    pcmpeqd %xmm1, %xmm0
486; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [16,32,64,128]
487; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm1
488; SSE2-SSSE3-NEXT:    pand %xmm2, %xmm1
489; SSE2-SSSE3-NEXT:    pcmpeqd %xmm2, %xmm1
490; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [256,512,1024,2048]
491; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
492; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
493; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm2
494; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [4096,8192,16384,32768]
495; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
496; SSE2-SSSE3-NEXT:    pcmpeqd %xmm4, %xmm3
497; SSE2-SSSE3-NEXT:    retq
498;
499; AVX1-LABEL: ext_i16_16i32:
500; AVX1:       # %bb.0:
501; AVX1-NEXT:    vmovd %edi, %xmm0
502; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
503; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm1
504; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm0
505; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
506; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
507; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
508; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
509; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
510; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm0, %xmm0
511; AVX1-NEXT:    vpxor %xmm4, %xmm0, %xmm0
512; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
513; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
514; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
515; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm2, %xmm2
516; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
517; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
518; AVX1-NEXT:    vpxor %xmm4, %xmm1, %xmm1
519; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
520; AVX1-NEXT:    retq
521;
522; AVX2-LABEL: ext_i16_16i32:
523; AVX2:       # %bb.0:
524; AVX2-NEXT:    vmovd %edi, %xmm0
525; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm1
526; AVX2-NEXT:    vmovdqa {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
527; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm2
528; AVX2-NEXT:    vpcmpeqd %ymm0, %ymm2, %ymm0
529; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
530; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
531; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
532; AVX2-NEXT:    retq
533;
534; AVX512-LABEL: ext_i16_16i32:
535; AVX512:       # %bb.0:
536; AVX512-NEXT:    kmovd %edi, %k1
537; AVX512-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
538; AVX512-NEXT:    retq
539  %1 = bitcast i16 %a0 to <16 x i1>
540  %2 = sext <16 x i1> %1 to <16 x i32>
541  ret <16 x i32> %2
542}
543
544define <32 x i16> @ext_i32_32i16(i32 %a0) {
545; SSE2-SSSE3-LABEL: ext_i32_32i16:
546; SSE2-SSSE3:       # %bb.0:
547; SSE2-SSSE3-NEXT:    movd %edi, %xmm2
548; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm2[0,0,2,3,4,5,6,7]
549; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
550; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
551; SSE2-SSSE3-NEXT:    movdqa %xmm1, %xmm0
552; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
553; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm0
554; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [256,512,1024,2048,4096,8192,16384,32768]
555; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm1
556; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm1
557; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,1,2,3,4,5,6,7]
558; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,0,0]
559; SSE2-SSSE3-NEXT:    movdqa %xmm3, %xmm2
560; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
561; SSE2-SSSE3-NEXT:    pcmpeqw %xmm4, %xmm2
562; SSE2-SSSE3-NEXT:    pand %xmm5, %xmm3
563; SSE2-SSSE3-NEXT:    pcmpeqw %xmm5, %xmm3
564; SSE2-SSSE3-NEXT:    retq
565;
566; AVX1-LABEL: ext_i32_32i16:
567; AVX1:       # %bb.0:
568; AVX1-NEXT:    vmovd %edi, %xmm1
569; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,2,3,4,5,6,7]
570; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
571; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
572; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
573; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
574; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
575; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
576; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm3, %xmm3
577; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
578; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
579; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm0, %xmm0
580; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
581; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
582; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[1,1,2,3,4,5,6,7]
583; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
584; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
585; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
586; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
587; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm2, %xmm2
588; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
589; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm1, %xmm1
590; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
591; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
592; AVX1-NEXT:    retq
593;
594; AVX2-LABEL: ext_i32_32i16:
595; AVX2:       # %bb.0:
596; AVX2-NEXT:    vmovd %edi, %xmm0
597; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
598; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
599; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
600; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
601; AVX2-NEXT:    shrl $16, %edi
602; AVX2-NEXT:    vmovd %edi, %xmm2
603; AVX2-NEXT:    vpbroadcastw %xmm2, %ymm2
604; AVX2-NEXT:    vpand %ymm1, %ymm2, %ymm2
605; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm2, %ymm1
606; AVX2-NEXT:    retq
607;
608; AVX512-LABEL: ext_i32_32i16:
609; AVX512:       # %bb.0:
610; AVX512-NEXT:    kmovd %edi, %k0
611; AVX512-NEXT:    vpmovm2w %k0, %zmm0
612; AVX512-NEXT:    retq
613  %1 = bitcast i32 %a0 to <32 x i1>
614  %2 = sext <32 x i1> %1 to <32 x i16>
615  ret <32 x i16> %2
616}
617
618define <64 x i8> @ext_i64_64i8(i64 %a0) {
619; SSE2-SSSE3-LABEL: ext_i64_64i8:
620; SSE2-SSSE3:       # %bb.0:
621; SSE2-SSSE3-NEXT:    movq %rdi, %xmm3
622; SSE2-SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
623; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm3[0,0,1,1,4,5,6,7]
624; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
625; SSE2-SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
626; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm0
627; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm0
628; SSE2-SSSE3-NEXT:    pshuflw {{.*#+}} xmm1 = xmm3[2,2,3,3,4,5,6,7]
629; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
630; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm1
631; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm1
632; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,4,5,5]
633; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,2,3,3]
634; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm2
635; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm2
636; SSE2-SSSE3-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,6,6,7,7]
637; SSE2-SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[2,2,3,3]
638; SSE2-SSSE3-NEXT:    pand %xmm4, %xmm3
639; SSE2-SSSE3-NEXT:    pcmpeqb %xmm4, %xmm3
640; SSE2-SSSE3-NEXT:    retq
641;
642; AVX1-LABEL: ext_i64_64i8:
643; AVX1:       # %bb.0:
644; AVX1-NEXT:    vmovq %rdi, %xmm0
645; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
646; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7]
647; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm1[2,2,3,3,4,5,6,7]
648; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
649; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
650; AVX1-NEXT:    vbroadcastsd {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
651; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
652; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
653; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
654; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm3, %xmm3
655; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
656; AVX1-NEXT:    vpxor %xmm5, %xmm3, %xmm3
657; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm0, %xmm0
658; AVX1-NEXT:    vpxor %xmm5, %xmm0, %xmm0
659; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
660; AVX1-NEXT:    vpshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,5,5]
661; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,7,7]
662; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
663; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[2,2,3,3,6,6,7,7]
664; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm1
665; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
666; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm2, %xmm2
667; AVX1-NEXT:    vpxor %xmm5, %xmm2, %xmm2
668; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm1, %xmm1
669; AVX1-NEXT:    vpxor %xmm5, %xmm1, %xmm1
670; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
671; AVX1-NEXT:    retq
672;
673; AVX2-LABEL: ext_i64_64i8:
674; AVX2:       # %bb.0:
675; AVX2-NEXT:    vmovq %rdi, %xmm0
676; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
677; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
678; AVX2-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745]
679; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
680; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
681; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
682; AVX2-NEXT:    vpand %ymm2, %ymm1, %ymm1
683; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
684; AVX2-NEXT:    retq
685;
686; AVX512-LABEL: ext_i64_64i8:
687; AVX512:       # %bb.0:
688; AVX512-NEXT:    kmovq %rdi, %k0
689; AVX512-NEXT:    vpmovm2b %k0, %zmm0
690; AVX512-NEXT:    retq
691  %1 = bitcast i64 %a0 to <64 x i1>
692  %2 = sext <64 x i1> %1 to <64 x i8>
693  ret <64 x i8> %2
694}
695