1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
14
15; Just one 32-bit run to make sure we do reasonable things for i64 cases.
16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2
17
18declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
19declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
20declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
21declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
22
23;
24; Variable Shifts
25;
26
27define <2 x i64> @var_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
28; SSE2-LABEL: var_funnnel_v2i64:
29; SSE2:       # %bb.0:
30; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [63,63]
31; SSE2-NEXT:    pxor %xmm3, %xmm3
32; SSE2-NEXT:    psubq %xmm1, %xmm3
33; SSE2-NEXT:    pand %xmm2, %xmm1
34; SSE2-NEXT:    movdqa %xmm0, %xmm4
35; SSE2-NEXT:    psllq %xmm1, %xmm4
36; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
37; SSE2-NEXT:    movdqa %xmm0, %xmm5
38; SSE2-NEXT:    psllq %xmm1, %xmm5
39; SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
40; SSE2-NEXT:    pand %xmm2, %xmm3
41; SSE2-NEXT:    movdqa %xmm0, %xmm1
42; SSE2-NEXT:    psrlq %xmm3, %xmm1
43; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
44; SSE2-NEXT:    psrlq %xmm2, %xmm0
45; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
46; SSE2-NEXT:    orpd %xmm5, %xmm0
47; SSE2-NEXT:    retq
48;
49; SSE41-LABEL: var_funnnel_v2i64:
50; SSE41:       # %bb.0:
51; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [63,63]
52; SSE41-NEXT:    pxor %xmm3, %xmm3
53; SSE41-NEXT:    psubq %xmm1, %xmm3
54; SSE41-NEXT:    pand %xmm2, %xmm1
55; SSE41-NEXT:    movdqa %xmm0, %xmm4
56; SSE41-NEXT:    psllq %xmm1, %xmm4
57; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
58; SSE41-NEXT:    movdqa %xmm0, %xmm5
59; SSE41-NEXT:    psllq %xmm1, %xmm5
60; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm4[0,1,2,3],xmm5[4,5,6,7]
61; SSE41-NEXT:    pand %xmm2, %xmm3
62; SSE41-NEXT:    movdqa %xmm0, %xmm1
63; SSE41-NEXT:    psrlq %xmm3, %xmm1
64; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
65; SSE41-NEXT:    psrlq %xmm2, %xmm0
66; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
67; SSE41-NEXT:    por %xmm5, %xmm0
68; SSE41-NEXT:    retq
69;
70; AVX1-LABEL: var_funnnel_v2i64:
71; AVX1:       # %bb.0:
72; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
73; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm3
74; AVX1-NEXT:    vpsllq %xmm3, %xmm0, %xmm4
75; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
76; AVX1-NEXT:    vpsllq %xmm3, %xmm0, %xmm3
77; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
78; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
79; AVX1-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
80; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
81; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2
82; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
83; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
84; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
85; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
86; AVX1-NEXT:    retq
87;
88; AVX2-LABEL: var_funnnel_v2i64:
89; AVX2:       # %bb.0:
90; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
91; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm3
92; AVX2-NEXT:    vpsllvq %xmm3, %xmm0, %xmm3
93; AVX2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
94; AVX2-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
95; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
96; AVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
97; AVX2-NEXT:    vpor %xmm0, %xmm3, %xmm0
98; AVX2-NEXT:    retq
99;
100; AVX512F-LABEL: var_funnnel_v2i64:
101; AVX512F:       # %bb.0:
102; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
103; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
104; AVX512F-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
105; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
106; AVX512F-NEXT:    vzeroupper
107; AVX512F-NEXT:    retq
108;
109; AVX512VL-LABEL: var_funnnel_v2i64:
110; AVX512VL:       # %bb.0:
111; AVX512VL-NEXT:    vprolvq %xmm1, %xmm0, %xmm0
112; AVX512VL-NEXT:    retq
113;
114; AVX512BW-LABEL: var_funnnel_v2i64:
115; AVX512BW:       # %bb.0:
116; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
117; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
118; AVX512BW-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
119; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
120; AVX512BW-NEXT:    vzeroupper
121; AVX512BW-NEXT:    retq
122;
123; AVX512VLBW-LABEL: var_funnnel_v2i64:
124; AVX512VLBW:       # %bb.0:
125; AVX512VLBW-NEXT:    vprolvq %xmm1, %xmm0, %xmm0
126; AVX512VLBW-NEXT:    retq
127;
128; AVX512VBMI2-LABEL: var_funnnel_v2i64:
129; AVX512VBMI2:       # %bb.0:
130; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
131; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
132; AVX512VBMI2-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
133; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
134; AVX512VBMI2-NEXT:    vzeroupper
135; AVX512VBMI2-NEXT:    retq
136;
137; AVX512VLVBMI2-LABEL: var_funnnel_v2i64:
138; AVX512VLVBMI2:       # %bb.0:
139; AVX512VLVBMI2-NEXT:    vprolvq %xmm1, %xmm0, %xmm0
140; AVX512VLVBMI2-NEXT:    retq
141;
142; XOP-LABEL: var_funnnel_v2i64:
143; XOP:       # %bb.0:
144; XOP-NEXT:    vprotq %xmm1, %xmm0, %xmm0
145; XOP-NEXT:    retq
146;
147; X86-SSE2-LABEL: var_funnnel_v2i64:
148; X86-SSE2:       # %bb.0:
149; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [63,0,63,0]
150; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
151; X86-SSE2-NEXT:    psubq %xmm1, %xmm3
152; X86-SSE2-NEXT:    pand %xmm2, %xmm1
153; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
154; X86-SSE2-NEXT:    psllq %xmm1, %xmm4
155; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
156; X86-SSE2-NEXT:    movdqa %xmm0, %xmm5
157; X86-SSE2-NEXT:    psllq %xmm1, %xmm5
158; X86-SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
159; X86-SSE2-NEXT:    pand %xmm2, %xmm3
160; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
161; X86-SSE2-NEXT:    psrlq %xmm3, %xmm1
162; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
163; X86-SSE2-NEXT:    psrlq %xmm2, %xmm0
164; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
165; X86-SSE2-NEXT:    orpd %xmm5, %xmm0
166; X86-SSE2-NEXT:    retl
167  %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> %amt)
168  ret <2 x i64> %res
169}
170
171define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
172; SSE2-LABEL: var_funnnel_v4i32:
173; SSE2:       # %bb.0:
174; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
175; SSE2-NEXT:    pslld $23, %xmm1
176; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
177; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
178; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
179; SSE2-NEXT:    pmuludq %xmm1, %xmm0
180; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
181; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
182; SSE2-NEXT:    pmuludq %xmm2, %xmm1
183; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
184; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
185; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
186; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
187; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
188; SSE2-NEXT:    por %xmm3, %xmm0
189; SSE2-NEXT:    retq
190;
191; SSE41-LABEL: var_funnnel_v4i32:
192; SSE41:       # %bb.0:
193; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
194; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
195; SSE41-NEXT:    pslld $23, %xmm1
196; SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
197; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
198; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
199; SSE41-NEXT:    pmuludq %xmm2, %xmm3
200; SSE41-NEXT:    pmuludq %xmm1, %xmm0
201; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
202; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
203; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
204; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
205; SSE41-NEXT:    por %xmm1, %xmm0
206; SSE41-NEXT:    retq
207;
208; AVX1-LABEL: var_funnnel_v4i32:
209; AVX1:       # %bb.0:
210; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
211; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
212; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
213; AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
214; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
215; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
216; AVX1-NEXT:    vpmuludq %xmm3, %xmm2, %xmm2
217; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
218; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
219; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
220; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
221; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
222; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
223; AVX1-NEXT:    retq
224;
225; AVX2-LABEL: var_funnnel_v4i32:
226; AVX2:       # %bb.0:
227; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
228; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
229; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2
230; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
231; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
232; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
233; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
234; AVX2-NEXT:    retq
235;
236; AVX512F-LABEL: var_funnnel_v4i32:
237; AVX512F:       # %bb.0:
238; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
239; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
240; AVX512F-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
241; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
242; AVX512F-NEXT:    vzeroupper
243; AVX512F-NEXT:    retq
244;
245; AVX512VL-LABEL: var_funnnel_v4i32:
246; AVX512VL:       # %bb.0:
247; AVX512VL-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
248; AVX512VL-NEXT:    retq
249;
250; AVX512BW-LABEL: var_funnnel_v4i32:
251; AVX512BW:       # %bb.0:
252; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
253; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
254; AVX512BW-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
255; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
256; AVX512BW-NEXT:    vzeroupper
257; AVX512BW-NEXT:    retq
258;
259; AVX512VLBW-LABEL: var_funnnel_v4i32:
260; AVX512VLBW:       # %bb.0:
261; AVX512VLBW-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
262; AVX512VLBW-NEXT:    retq
263;
264; AVX512VBMI2-LABEL: var_funnnel_v4i32:
265; AVX512VBMI2:       # %bb.0:
266; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
267; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
268; AVX512VBMI2-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
269; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
270; AVX512VBMI2-NEXT:    vzeroupper
271; AVX512VBMI2-NEXT:    retq
272;
273; AVX512VLVBMI2-LABEL: var_funnnel_v4i32:
274; AVX512VLVBMI2:       # %bb.0:
275; AVX512VLVBMI2-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
276; AVX512VLVBMI2-NEXT:    retq
277;
278; XOP-LABEL: var_funnnel_v4i32:
279; XOP:       # %bb.0:
280; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
281; XOP-NEXT:    retq
282;
283; X86-SSE2-LABEL: var_funnnel_v4i32:
284; X86-SSE2:       # %bb.0:
285; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
286; X86-SSE2-NEXT:    pslld $23, %xmm1
287; X86-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
288; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
289; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
290; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
291; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
292; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
293; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
294; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
295; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
296; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
297; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
298; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
299; X86-SSE2-NEXT:    por %xmm3, %xmm0
300; X86-SSE2-NEXT:    retl
301  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %amt)
302  ret <4 x i32> %res
303}
304
305define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
306; SSE2-LABEL: var_funnnel_v8i16:
307; SSE2:       # %bb.0:
308; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
309; SSE2-NEXT:    movdqa %xmm1, %xmm2
310; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
311; SSE2-NEXT:    pslld $23, %xmm2
312; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
313; SSE2-NEXT:    paddd %xmm3, %xmm2
314; SSE2-NEXT:    cvttps2dq %xmm2, %xmm2
315; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
316; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
317; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
318; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
319; SSE2-NEXT:    pslld $23, %xmm1
320; SSE2-NEXT:    paddd %xmm3, %xmm1
321; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
322; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
323; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
324; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
325; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
326; SSE2-NEXT:    movdqa %xmm0, %xmm2
327; SSE2-NEXT:    pmulhuw %xmm1, %xmm2
328; SSE2-NEXT:    pmullw %xmm1, %xmm0
329; SSE2-NEXT:    por %xmm2, %xmm0
330; SSE2-NEXT:    retq
331;
332; SSE41-LABEL: var_funnnel_v8i16:
333; SSE41:       # %bb.0:
334; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
335; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
336; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
337; SSE41-NEXT:    pslld $23, %xmm1
338; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
339; SSE41-NEXT:    paddd %xmm3, %xmm1
340; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
341; SSE41-NEXT:    pslld $23, %xmm2
342; SSE41-NEXT:    paddd %xmm3, %xmm2
343; SSE41-NEXT:    cvttps2dq %xmm2, %xmm2
344; SSE41-NEXT:    packusdw %xmm1, %xmm2
345; SSE41-NEXT:    movdqa %xmm0, %xmm1
346; SSE41-NEXT:    pmulhuw %xmm2, %xmm1
347; SSE41-NEXT:    pmullw %xmm2, %xmm0
348; SSE41-NEXT:    por %xmm1, %xmm0
349; SSE41-NEXT:    retq
350;
351; AVX1-LABEL: var_funnnel_v8i16:
352; AVX1:       # %bb.0:
353; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
354; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm1[4,4,5,5,6,6,7,7]
355; AVX1-NEXT:    vpslld $23, %xmm2, %xmm2
356; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
357; AVX1-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
358; AVX1-NEXT:    vcvttps2dq %xmm2, %xmm2
359; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
360; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
361; AVX1-NEXT:    vpaddd %xmm3, %xmm1, %xmm1
362; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
363; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
364; AVX1-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
365; AVX1-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
366; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
367; AVX1-NEXT:    retq
368;
369; AVX2-LABEL: var_funnnel_v8i16:
370; AVX2:       # %bb.0:
371; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
372; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
373; AVX2-NEXT:    vpsubw %xmm1, %xmm2, %xmm2
374; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
375; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
376; AVX2-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm2
377; AVX2-NEXT:    vextracti128 $1, %ymm2, %xmm3
378; AVX2-NEXT:    vpackusdw %xmm3, %xmm2, %xmm2
379; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
380; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
381; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
382; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
383; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
384; AVX2-NEXT:    vzeroupper
385; AVX2-NEXT:    retq
386;
387; AVX512F-LABEL: var_funnnel_v8i16:
388; AVX512F:       # %bb.0:
389; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
390; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
391; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
392; AVX512F-NEXT:    vpsllvd %ymm2, %ymm0, %ymm2
393; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
394; AVX512F-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
395; AVX512F-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
396; AVX512F-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
397; AVX512F-NEXT:    vpor %ymm0, %ymm2, %ymm0
398; AVX512F-NEXT:    vpmovdw %zmm0, %ymm0
399; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
400; AVX512F-NEXT:    vzeroupper
401; AVX512F-NEXT:    retq
402;
403; AVX512VL-LABEL: var_funnnel_v8i16:
404; AVX512VL:       # %bb.0:
405; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
406; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
407; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
408; AVX512VL-NEXT:    vpsllvd %ymm2, %ymm0, %ymm2
409; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
410; AVX512VL-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
411; AVX512VL-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
412; AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
413; AVX512VL-NEXT:    vpor %ymm0, %ymm2, %ymm0
414; AVX512VL-NEXT:    vpmovdw %ymm0, %xmm0
415; AVX512VL-NEXT:    vzeroupper
416; AVX512VL-NEXT:    retq
417;
418; AVX512BW-LABEL: var_funnnel_v8i16:
419; AVX512BW:       # %bb.0:
420; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
421; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
422; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2
423; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
424; AVX512BW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
425; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
426; AVX512BW-NEXT:    vpor %xmm0, %xmm2, %xmm0
427; AVX512BW-NEXT:    vzeroupper
428; AVX512BW-NEXT:    retq
429;
430; AVX512VLBW-LABEL: var_funnnel_v8i16:
431; AVX512VLBW:       # %bb.0:
432; AVX512VLBW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
433; AVX512VLBW-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2
434; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
435; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
436; AVX512VLBW-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0
437; AVX512VLBW-NEXT:    vpor %xmm0, %xmm2, %xmm0
438; AVX512VLBW-NEXT:    retq
439;
440; AVX512VBMI2-LABEL: var_funnnel_v8i16:
441; AVX512VBMI2:       # %bb.0:
442; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
443; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
444; AVX512VBMI2-NEXT:    vpshldvw %zmm1, %zmm0, %zmm0
445; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
446; AVX512VBMI2-NEXT:    vzeroupper
447; AVX512VBMI2-NEXT:    retq
448;
449; AVX512VLVBMI2-LABEL: var_funnnel_v8i16:
450; AVX512VLVBMI2:       # %bb.0:
451; AVX512VLVBMI2-NEXT:    vpshldvw %xmm1, %xmm0, %xmm0
452; AVX512VLVBMI2-NEXT:    retq
453;
454; XOP-LABEL: var_funnnel_v8i16:
455; XOP:       # %bb.0:
456; XOP-NEXT:    vprotw %xmm1, %xmm0, %xmm0
457; XOP-NEXT:    retq
458;
459; X86-SSE2-LABEL: var_funnnel_v8i16:
460; X86-SSE2:       # %bb.0:
461; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
462; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
463; X86-SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
464; X86-SSE2-NEXT:    pslld $23, %xmm2
465; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1065353216,1065353216,1065353216,1065353216]
466; X86-SSE2-NEXT:    paddd %xmm3, %xmm2
467; X86-SSE2-NEXT:    cvttps2dq %xmm2, %xmm2
468; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
469; X86-SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,4,6,6,7]
470; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
471; X86-SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
472; X86-SSE2-NEXT:    pslld $23, %xmm1
473; X86-SSE2-NEXT:    paddd %xmm3, %xmm1
474; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
475; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
476; X86-SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
477; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
478; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
479; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
480; X86-SSE2-NEXT:    pmulhuw %xmm1, %xmm2
481; X86-SSE2-NEXT:    pmullw %xmm1, %xmm0
482; X86-SSE2-NEXT:    por %xmm2, %xmm0
483; X86-SSE2-NEXT:    retl
484  %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> %amt)
485  ret <8 x i16> %res
486}
487
488define <16 x i8> @var_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
489; SSE2-LABEL: var_funnnel_v16i8:
490; SSE2:       # %bb.0:
491; SSE2-NEXT:    movdqa %xmm0, %xmm2
492; SSE2-NEXT:    psllw $5, %xmm1
493; SSE2-NEXT:    pxor %xmm0, %xmm0
494; SSE2-NEXT:    pxor %xmm3, %xmm3
495; SSE2-NEXT:    pcmpgtb %xmm1, %xmm3
496; SSE2-NEXT:    movdqa %xmm2, %xmm4
497; SSE2-NEXT:    psrlw $4, %xmm4
498; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
499; SSE2-NEXT:    movdqa %xmm2, %xmm5
500; SSE2-NEXT:    psllw $4, %xmm5
501; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5
502; SSE2-NEXT:    por %xmm4, %xmm5
503; SSE2-NEXT:    pand %xmm3, %xmm5
504; SSE2-NEXT:    pandn %xmm2, %xmm3
505; SSE2-NEXT:    por %xmm5, %xmm3
506; SSE2-NEXT:    movdqa %xmm3, %xmm2
507; SSE2-NEXT:    psrlw $6, %xmm2
508; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
509; SSE2-NEXT:    movdqa %xmm3, %xmm4
510; SSE2-NEXT:    psllw $2, %xmm4
511; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
512; SSE2-NEXT:    por %xmm2, %xmm4
513; SSE2-NEXT:    paddb %xmm1, %xmm1
514; SSE2-NEXT:    pxor %xmm2, %xmm2
515; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
516; SSE2-NEXT:    pand %xmm2, %xmm4
517; SSE2-NEXT:    pandn %xmm3, %xmm2
518; SSE2-NEXT:    por %xmm4, %xmm2
519; SSE2-NEXT:    movdqa %xmm2, %xmm3
520; SSE2-NEXT:    paddb %xmm2, %xmm3
521; SSE2-NEXT:    movdqa %xmm2, %xmm4
522; SSE2-NEXT:    psrlw $7, %xmm4
523; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4
524; SSE2-NEXT:    por %xmm3, %xmm4
525; SSE2-NEXT:    paddb %xmm1, %xmm1
526; SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
527; SSE2-NEXT:    pand %xmm0, %xmm4
528; SSE2-NEXT:    pandn %xmm2, %xmm0
529; SSE2-NEXT:    por %xmm4, %xmm0
530; SSE2-NEXT:    retq
531;
532; SSE41-LABEL: var_funnnel_v16i8:
533; SSE41:       # %bb.0:
534; SSE41-NEXT:    movdqa %xmm1, %xmm2
535; SSE41-NEXT:    movdqa %xmm0, %xmm1
536; SSE41-NEXT:    psrlw $4, %xmm0
537; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
538; SSE41-NEXT:    movdqa %xmm1, %xmm3
539; SSE41-NEXT:    psllw $4, %xmm3
540; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
541; SSE41-NEXT:    por %xmm0, %xmm3
542; SSE41-NEXT:    psllw $5, %xmm2
543; SSE41-NEXT:    movdqa %xmm2, %xmm0
544; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
545; SSE41-NEXT:    movdqa %xmm1, %xmm0
546; SSE41-NEXT:    psrlw $6, %xmm0
547; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
548; SSE41-NEXT:    movdqa %xmm1, %xmm3
549; SSE41-NEXT:    psllw $2, %xmm3
550; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
551; SSE41-NEXT:    por %xmm0, %xmm3
552; SSE41-NEXT:    paddb %xmm2, %xmm2
553; SSE41-NEXT:    movdqa %xmm2, %xmm0
554; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
555; SSE41-NEXT:    movdqa %xmm1, %xmm0
556; SSE41-NEXT:    paddb %xmm1, %xmm0
557; SSE41-NEXT:    movdqa %xmm1, %xmm3
558; SSE41-NEXT:    psrlw $7, %xmm3
559; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
560; SSE41-NEXT:    por %xmm0, %xmm3
561; SSE41-NEXT:    paddb %xmm2, %xmm2
562; SSE41-NEXT:    movdqa %xmm2, %xmm0
563; SSE41-NEXT:    pblendvb %xmm0, %xmm3, %xmm1
564; SSE41-NEXT:    movdqa %xmm1, %xmm0
565; SSE41-NEXT:    retq
566;
567; AVX-LABEL: var_funnnel_v16i8:
568; AVX:       # %bb.0:
569; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm2
570; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
571; AVX-NEXT:    vpsllw $4, %xmm0, %xmm3
572; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
573; AVX-NEXT:    vpor %xmm2, %xmm3, %xmm2
574; AVX-NEXT:    vpsllw $5, %xmm1, %xmm1
575; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
576; AVX-NEXT:    vpsrlw $6, %xmm0, %xmm2
577; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
578; AVX-NEXT:    vpsllw $2, %xmm0, %xmm3
579; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
580; AVX-NEXT:    vpor %xmm2, %xmm3, %xmm2
581; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
582; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
583; AVX-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
584; AVX-NEXT:    vpsrlw $7, %xmm0, %xmm3
585; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
586; AVX-NEXT:    vpor %xmm3, %xmm2, %xmm2
587; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
588; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
589; AVX-NEXT:    retq
590;
591; AVX512F-LABEL: var_funnnel_v16i8:
592; AVX512F:       # %bb.0:
593; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
594; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm3
595; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
596; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
597; AVX512F-NEXT:    vpsllvd %zmm3, %zmm0, %zmm3
598; AVX512F-NEXT:    vpxor %xmm4, %xmm4, %xmm4
599; AVX512F-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
600; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm1
601; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
602; AVX512F-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
603; AVX512F-NEXT:    vpord %zmm0, %zmm3, %zmm0
604; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
605; AVX512F-NEXT:    vzeroupper
606; AVX512F-NEXT:    retq
607;
608; AVX512VL-LABEL: var_funnnel_v16i8:
609; AVX512VL:       # %bb.0:
610; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
611; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm3
612; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero,xmm3[8],zero,zero,zero,xmm3[9],zero,zero,zero,xmm3[10],zero,zero,zero,xmm3[11],zero,zero,zero,xmm3[12],zero,zero,zero,xmm3[13],zero,zero,zero,xmm3[14],zero,zero,zero,xmm3[15],zero,zero,zero
613; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
614; AVX512VL-NEXT:    vpsllvd %zmm3, %zmm0, %zmm3
615; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
616; AVX512VL-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
617; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm1
618; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
619; AVX512VL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
620; AVX512VL-NEXT:    vpord %zmm0, %zmm3, %zmm0
621; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
622; AVX512VL-NEXT:    vzeroupper
623; AVX512VL-NEXT:    retq
624;
625; AVX512BW-LABEL: var_funnnel_v16i8:
626; AVX512BW:       # %bb.0:
627; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
628; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
629; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm3
630; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
631; AVX512BW-NEXT:    vpsllvw %zmm3, %zmm0, %zmm3
632; AVX512BW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
633; AVX512BW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
634; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm1
635; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
636; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
637; AVX512BW-NEXT:    vpor %ymm0, %ymm3, %ymm0
638; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
639; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
640; AVX512BW-NEXT:    vzeroupper
641; AVX512BW-NEXT:    retq
642;
643; AVX512VLBW-LABEL: var_funnnel_v16i8:
644; AVX512VLBW:       # %bb.0:
645; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
646; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm3
647; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
648; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
649; AVX512VLBW-NEXT:    vpsllvw %ymm3, %ymm0, %ymm3
650; AVX512VLBW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
651; AVX512VLBW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
652; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm1
653; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
654; AVX512VLBW-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0
655; AVX512VLBW-NEXT:    vpor %ymm0, %ymm3, %ymm0
656; AVX512VLBW-NEXT:    vpmovwb %ymm0, %xmm0
657; AVX512VLBW-NEXT:    vzeroupper
658; AVX512VLBW-NEXT:    retq
659;
660; AVX512VBMI2-LABEL: var_funnnel_v16i8:
661; AVX512VBMI2:       # %bb.0:
662; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
663; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
664; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
665; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
666; AVX512VBMI2-NEXT:    vpsllvw %zmm3, %zmm0, %zmm3
667; AVX512VBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
668; AVX512VBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
669; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
670; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
671; AVX512VBMI2-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
672; AVX512VBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
673; AVX512VBMI2-NEXT:    vpmovwb %zmm0, %ymm0
674; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
675; AVX512VBMI2-NEXT:    vzeroupper
676; AVX512VBMI2-NEXT:    retq
677;
678; AVX512VLVBMI2-LABEL: var_funnnel_v16i8:
679; AVX512VLVBMI2:       # %bb.0:
680; AVX512VLVBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
681; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
682; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero
683; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
684; AVX512VLVBMI2-NEXT:    vpsllvw %ymm3, %ymm0, %ymm3
685; AVX512VLVBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
686; AVX512VLVBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
687; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
688; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
689; AVX512VLVBMI2-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0
690; AVX512VLVBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
691; AVX512VLVBMI2-NEXT:    vpmovwb %ymm0, %xmm0
692; AVX512VLVBMI2-NEXT:    vzeroupper
693; AVX512VLVBMI2-NEXT:    retq
694;
695; XOP-LABEL: var_funnnel_v16i8:
696; XOP:       # %bb.0:
697; XOP-NEXT:    vprotb %xmm1, %xmm0, %xmm0
698; XOP-NEXT:    retq
699;
700; X86-SSE2-LABEL: var_funnnel_v16i8:
701; X86-SSE2:       # %bb.0:
702; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
703; X86-SSE2-NEXT:    psllw $5, %xmm1
704; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
705; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
706; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm3
707; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
708; X86-SSE2-NEXT:    psrlw $4, %xmm4
709; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm4
710; X86-SSE2-NEXT:    movdqa %xmm2, %xmm5
711; X86-SSE2-NEXT:    psllw $4, %xmm5
712; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm5
713; X86-SSE2-NEXT:    por %xmm4, %xmm5
714; X86-SSE2-NEXT:    pand %xmm3, %xmm5
715; X86-SSE2-NEXT:    pandn %xmm2, %xmm3
716; X86-SSE2-NEXT:    por %xmm5, %xmm3
717; X86-SSE2-NEXT:    movdqa %xmm3, %xmm2
718; X86-SSE2-NEXT:    psrlw $6, %xmm2
719; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
720; X86-SSE2-NEXT:    movdqa %xmm3, %xmm4
721; X86-SSE2-NEXT:    psllw $2, %xmm4
722; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm4
723; X86-SSE2-NEXT:    por %xmm2, %xmm4
724; X86-SSE2-NEXT:    paddb %xmm1, %xmm1
725; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
726; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
727; X86-SSE2-NEXT:    pand %xmm2, %xmm4
728; X86-SSE2-NEXT:    pandn %xmm3, %xmm2
729; X86-SSE2-NEXT:    por %xmm4, %xmm2
730; X86-SSE2-NEXT:    movdqa %xmm2, %xmm3
731; X86-SSE2-NEXT:    paddb %xmm2, %xmm3
732; X86-SSE2-NEXT:    movdqa %xmm2, %xmm4
733; X86-SSE2-NEXT:    psrlw $7, %xmm4
734; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm4
735; X86-SSE2-NEXT:    por %xmm3, %xmm4
736; X86-SSE2-NEXT:    paddb %xmm1, %xmm1
737; X86-SSE2-NEXT:    pcmpgtb %xmm1, %xmm0
738; X86-SSE2-NEXT:    pand %xmm0, %xmm4
739; X86-SSE2-NEXT:    pandn %xmm2, %xmm0
740; X86-SSE2-NEXT:    por %xmm4, %xmm0
741; X86-SSE2-NEXT:    retl
742  %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> %amt)
743  ret <16 x i8> %res
744}
745
746;
747; Uniform Variable Shifts
748;
749
750define <2 x i64> @splatvar_funnnel_v2i64(<2 x i64> %x, <2 x i64> %amt) nounwind {
751; SSE-LABEL: splatvar_funnnel_v2i64:
752; SSE:       # %bb.0:
753; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [63,63]
754; SSE-NEXT:    pxor %xmm3, %xmm3
755; SSE-NEXT:    psubq %xmm1, %xmm3
756; SSE-NEXT:    pand %xmm2, %xmm1
757; SSE-NEXT:    movdqa %xmm0, %xmm4
758; SSE-NEXT:    psllq %xmm1, %xmm4
759; SSE-NEXT:    pand %xmm2, %xmm3
760; SSE-NEXT:    psrlq %xmm3, %xmm0
761; SSE-NEXT:    por %xmm4, %xmm0
762; SSE-NEXT:    retq
763;
764; AVX-LABEL: splatvar_funnnel_v2i64:
765; AVX:       # %bb.0:
766; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [63,63]
767; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm3
768; AVX-NEXT:    vpsllq %xmm3, %xmm0, %xmm3
769; AVX-NEXT:    vpxor %xmm4, %xmm4, %xmm4
770; AVX-NEXT:    vpsubq %xmm1, %xmm4, %xmm1
771; AVX-NEXT:    vpand %xmm2, %xmm1, %xmm1
772; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
773; AVX-NEXT:    vpor %xmm0, %xmm3, %xmm0
774; AVX-NEXT:    retq
775;
776; AVX512F-LABEL: splatvar_funnnel_v2i64:
777; AVX512F:       # %bb.0:
778; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
779; AVX512F-NEXT:    vpbroadcastq %xmm1, %xmm1
780; AVX512F-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
781; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
782; AVX512F-NEXT:    vzeroupper
783; AVX512F-NEXT:    retq
784;
785; AVX512VL-LABEL: splatvar_funnnel_v2i64:
786; AVX512VL:       # %bb.0:
787; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm1
788; AVX512VL-NEXT:    vprolvq %xmm1, %xmm0, %xmm0
789; AVX512VL-NEXT:    retq
790;
791; AVX512BW-LABEL: splatvar_funnnel_v2i64:
792; AVX512BW:       # %bb.0:
793; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
794; AVX512BW-NEXT:    vpbroadcastq %xmm1, %xmm1
795; AVX512BW-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
796; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
797; AVX512BW-NEXT:    vzeroupper
798; AVX512BW-NEXT:    retq
799;
800; AVX512VLBW-LABEL: splatvar_funnnel_v2i64:
801; AVX512VLBW:       # %bb.0:
802; AVX512VLBW-NEXT:    vpbroadcastq %xmm1, %xmm1
803; AVX512VLBW-NEXT:    vprolvq %xmm1, %xmm0, %xmm0
804; AVX512VLBW-NEXT:    retq
805;
806; AVX512VBMI2-LABEL: splatvar_funnnel_v2i64:
807; AVX512VBMI2:       # %bb.0:
808; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
809; AVX512VBMI2-NEXT:    vpbroadcastq %xmm1, %xmm1
810; AVX512VBMI2-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
811; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
812; AVX512VBMI2-NEXT:    vzeroupper
813; AVX512VBMI2-NEXT:    retq
814;
815; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i64:
816; AVX512VLVBMI2:       # %bb.0:
817; AVX512VLVBMI2-NEXT:    vpbroadcastq %xmm1, %xmm1
818; AVX512VLVBMI2-NEXT:    vprolvq %xmm1, %xmm0, %xmm0
819; AVX512VLVBMI2-NEXT:    retq
820;
821; XOPAVX1-LABEL: splatvar_funnnel_v2i64:
822; XOPAVX1:       # %bb.0:
823; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
824; XOPAVX1-NEXT:    vprotq %xmm1, %xmm0, %xmm0
825; XOPAVX1-NEXT:    retq
826;
827; XOPAVX2-LABEL: splatvar_funnnel_v2i64:
828; XOPAVX2:       # %bb.0:
829; XOPAVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
830; XOPAVX2-NEXT:    vprotq %xmm1, %xmm0, %xmm0
831; XOPAVX2-NEXT:    retq
832;
833; X86-SSE2-LABEL: splatvar_funnnel_v2i64:
834; X86-SSE2:       # %bb.0:
835; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
836; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [63,0,63,0]
837; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
838; X86-SSE2-NEXT:    psubq %xmm1, %xmm3
839; X86-SSE2-NEXT:    pand %xmm2, %xmm1
840; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
841; X86-SSE2-NEXT:    psllq %xmm1, %xmm4
842; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
843; X86-SSE2-NEXT:    movdqa %xmm0, %xmm5
844; X86-SSE2-NEXT:    psllq %xmm1, %xmm5
845; X86-SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
846; X86-SSE2-NEXT:    pand %xmm2, %xmm3
847; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
848; X86-SSE2-NEXT:    psrlq %xmm3, %xmm1
849; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
850; X86-SSE2-NEXT:    psrlq %xmm2, %xmm0
851; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
852; X86-SSE2-NEXT:    orpd %xmm5, %xmm0
853; X86-SSE2-NEXT:    retl
854  %splat = shufflevector <2 x i64> %amt, <2 x i64> undef, <2 x i32> zeroinitializer
855  %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> %splat)
856  ret <2 x i64> %res
857}
858
859define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind {
860; SSE2-LABEL: splatvar_funnnel_v4i32:
861; SSE2:       # %bb.0:
862; SSE2-NEXT:    movd %xmm1, %eax
863; SSE2-NEXT:    andl $31, %eax
864; SSE2-NEXT:    movd %eax, %xmm1
865; SSE2-NEXT:    movdqa %xmm0, %xmm2
866; SSE2-NEXT:    pslld %xmm1, %xmm2
867; SSE2-NEXT:    movl $32, %ecx
868; SSE2-NEXT:    subl %eax, %ecx
869; SSE2-NEXT:    movd %ecx, %xmm1
870; SSE2-NEXT:    psrld %xmm1, %xmm0
871; SSE2-NEXT:    por %xmm2, %xmm0
872; SSE2-NEXT:    retq
873;
874; SSE41-LABEL: splatvar_funnnel_v4i32:
875; SSE41:       # %bb.0:
876; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
877; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
878; SSE41-NEXT:    movdqa %xmm0, %xmm3
879; SSE41-NEXT:    pslld %xmm2, %xmm3
880; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32]
881; SSE41-NEXT:    psubd %xmm1, %xmm2
882; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero
883; SSE41-NEXT:    psrld %xmm1, %xmm0
884; SSE41-NEXT:    por %xmm3, %xmm0
885; SSE41-NEXT:    retq
886;
887; AVX1-LABEL: splatvar_funnnel_v4i32:
888; AVX1:       # %bb.0:
889; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
890; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
891; AVX1-NEXT:    vpslld %xmm2, %xmm0, %xmm2
892; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32]
893; AVX1-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
894; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
895; AVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
896; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
897; AVX1-NEXT:    retq
898;
899; AVX2-LABEL: splatvar_funnnel_v4i32:
900; AVX2:       # %bb.0:
901; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
902; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
903; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
904; AVX2-NEXT:    vpslld %xmm2, %xmm0, %xmm2
905; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
906; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
907; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
908; AVX2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
909; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
910; AVX2-NEXT:    retq
911;
912; AVX512F-LABEL: splatvar_funnnel_v4i32:
913; AVX512F:       # %bb.0:
914; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
915; AVX512F-NEXT:    vpbroadcastd %xmm1, %xmm1
916; AVX512F-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
917; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
918; AVX512F-NEXT:    vzeroupper
919; AVX512F-NEXT:    retq
920;
921; AVX512VL-LABEL: splatvar_funnnel_v4i32:
922; AVX512VL:       # %bb.0:
923; AVX512VL-NEXT:    vpbroadcastd %xmm1, %xmm1
924; AVX512VL-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
925; AVX512VL-NEXT:    retq
926;
927; AVX512BW-LABEL: splatvar_funnnel_v4i32:
928; AVX512BW:       # %bb.0:
929; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
930; AVX512BW-NEXT:    vpbroadcastd %xmm1, %xmm1
931; AVX512BW-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
932; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
933; AVX512BW-NEXT:    vzeroupper
934; AVX512BW-NEXT:    retq
935;
936; AVX512VLBW-LABEL: splatvar_funnnel_v4i32:
937; AVX512VLBW:       # %bb.0:
938; AVX512VLBW-NEXT:    vpbroadcastd %xmm1, %xmm1
939; AVX512VLBW-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
940; AVX512VLBW-NEXT:    retq
941;
942; AVX512VBMI2-LABEL: splatvar_funnnel_v4i32:
943; AVX512VBMI2:       # %bb.0:
944; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
945; AVX512VBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
946; AVX512VBMI2-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
947; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
948; AVX512VBMI2-NEXT:    vzeroupper
949; AVX512VBMI2-NEXT:    retq
950;
951; AVX512VLVBMI2-LABEL: splatvar_funnnel_v4i32:
952; AVX512VLVBMI2:       # %bb.0:
953; AVX512VLVBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
954; AVX512VLVBMI2-NEXT:    vprolvd %xmm1, %xmm0, %xmm0
955; AVX512VLVBMI2-NEXT:    retq
956;
957; XOPAVX1-LABEL: splatvar_funnnel_v4i32:
958; XOPAVX1:       # %bb.0:
959; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
960; XOPAVX1-NEXT:    vprotd %xmm1, %xmm0, %xmm0
961; XOPAVX1-NEXT:    retq
962;
963; XOPAVX2-LABEL: splatvar_funnnel_v4i32:
964; XOPAVX2:       # %bb.0:
965; XOPAVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
966; XOPAVX2-NEXT:    vprotd %xmm1, %xmm0, %xmm0
967; XOPAVX2-NEXT:    retq
968;
969; X86-SSE2-LABEL: splatvar_funnnel_v4i32:
970; X86-SSE2:       # %bb.0:
971; X86-SSE2-NEXT:    movd %xmm1, %eax
972; X86-SSE2-NEXT:    andl $31, %eax
973; X86-SSE2-NEXT:    movd %eax, %xmm1
974; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
975; X86-SSE2-NEXT:    pslld %xmm1, %xmm2
976; X86-SSE2-NEXT:    movl $32, %ecx
977; X86-SSE2-NEXT:    subl %eax, %ecx
978; X86-SSE2-NEXT:    movd %ecx, %xmm1
979; X86-SSE2-NEXT:    psrld %xmm1, %xmm0
980; X86-SSE2-NEXT:    por %xmm2, %xmm0
981; X86-SSE2-NEXT:    retl
982  %splat = shufflevector <4 x i32> %amt, <4 x i32> undef, <4 x i32> zeroinitializer
983  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %splat)
984  ret <4 x i32> %res
985}
986
987define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind {
988; SSE2-LABEL: splatvar_funnnel_v8i16:
989; SSE2:       # %bb.0:
990; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
991; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
992; SSE2-NEXT:    psubw %xmm1, %xmm2
993; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
994; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
995; SSE2-NEXT:    movdqa %xmm0, %xmm3
996; SSE2-NEXT:    psllw %xmm1, %xmm3
997; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
998; SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
999; SSE2-NEXT:    psrlw %xmm2, %xmm0
1000; SSE2-NEXT:    por %xmm3, %xmm0
1001; SSE2-NEXT:    retq
1002;
1003; SSE41-LABEL: splatvar_funnnel_v8i16:
1004; SSE41:       # %bb.0:
1005; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1006; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1007; SSE41-NEXT:    movdqa %xmm0, %xmm3
1008; SSE41-NEXT:    psllw %xmm2, %xmm3
1009; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
1010; SSE41-NEXT:    psubw %xmm1, %xmm2
1011; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
1012; SSE41-NEXT:    psrlw %xmm1, %xmm0
1013; SSE41-NEXT:    por %xmm3, %xmm0
1014; SSE41-NEXT:    retq
1015;
1016; AVX-LABEL: splatvar_funnnel_v8i16:
1017; AVX:       # %bb.0:
1018; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1019; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1020; AVX-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1021; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1022; AVX-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1023; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1024; AVX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1025; AVX-NEXT:    vpor %xmm0, %xmm2, %xmm0
1026; AVX-NEXT:    retq
1027;
1028; AVX512F-LABEL: splatvar_funnnel_v8i16:
1029; AVX512F:       # %bb.0:
1030; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1031; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1032; AVX512F-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1033; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1034; AVX512F-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1035; AVX512F-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1036; AVX512F-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1037; AVX512F-NEXT:    vpor %xmm0, %xmm2, %xmm0
1038; AVX512F-NEXT:    retq
1039;
1040; AVX512VL-LABEL: splatvar_funnnel_v8i16:
1041; AVX512VL:       # %bb.0:
1042; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1043; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1044; AVX512VL-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1045; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1046; AVX512VL-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1047; AVX512VL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1048; AVX512VL-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1049; AVX512VL-NEXT:    vpor %xmm0, %xmm2, %xmm0
1050; AVX512VL-NEXT:    retq
1051;
1052; AVX512BW-LABEL: splatvar_funnnel_v8i16:
1053; AVX512BW:       # %bb.0:
1054; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1055; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1056; AVX512BW-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1057; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1058; AVX512BW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1059; AVX512BW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1060; AVX512BW-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1061; AVX512BW-NEXT:    vpor %xmm0, %xmm2, %xmm0
1062; AVX512BW-NEXT:    retq
1063;
1064; AVX512VLBW-LABEL: splatvar_funnnel_v8i16:
1065; AVX512VLBW:       # %bb.0:
1066; AVX512VLBW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1067; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1068; AVX512VLBW-NEXT:    vpsllw %xmm2, %xmm0, %xmm2
1069; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
1070; AVX512VLBW-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
1071; AVX512VLBW-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
1072; AVX512VLBW-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1073; AVX512VLBW-NEXT:    vpor %xmm0, %xmm2, %xmm0
1074; AVX512VLBW-NEXT:    retq
1075;
1076; AVX512VBMI2-LABEL: splatvar_funnnel_v8i16:
1077; AVX512VBMI2:       # %bb.0:
1078; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1079; AVX512VBMI2-NEXT:    vpbroadcastw %xmm1, %xmm1
1080; AVX512VBMI2-NEXT:    vpshldvw %zmm1, %zmm0, %zmm0
1081; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1082; AVX512VBMI2-NEXT:    vzeroupper
1083; AVX512VBMI2-NEXT:    retq
1084;
1085; AVX512VLVBMI2-LABEL: splatvar_funnnel_v8i16:
1086; AVX512VLVBMI2:       # %bb.0:
1087; AVX512VLVBMI2-NEXT:    vpbroadcastw %xmm1, %xmm1
1088; AVX512VLVBMI2-NEXT:    vpshldvw %xmm1, %xmm0, %xmm0
1089; AVX512VLVBMI2-NEXT:    retq
1090;
1091; XOPAVX1-LABEL: splatvar_funnnel_v8i16:
1092; XOPAVX1:       # %bb.0:
1093; XOPAVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1094; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
1095; XOPAVX1-NEXT:    vprotw %xmm1, %xmm0, %xmm0
1096; XOPAVX1-NEXT:    retq
1097;
1098; XOPAVX2-LABEL: splatvar_funnnel_v8i16:
1099; XOPAVX2:       # %bb.0:
1100; XOPAVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
1101; XOPAVX2-NEXT:    vprotw %xmm1, %xmm0, %xmm0
1102; XOPAVX2-NEXT:    retq
1103;
1104; X86-SSE2-LABEL: splatvar_funnnel_v8i16:
1105; X86-SSE2:       # %bb.0:
1106; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1107; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
1108; X86-SSE2-NEXT:    psubw %xmm1, %xmm2
1109; X86-SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
1110; X86-SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1111; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1112; X86-SSE2-NEXT:    psllw %xmm1, %xmm3
1113; X86-SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1]
1114; X86-SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1115; X86-SSE2-NEXT:    psrlw %xmm2, %xmm0
1116; X86-SSE2-NEXT:    por %xmm3, %xmm0
1117; X86-SSE2-NEXT:    retl
1118  %splat = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> zeroinitializer
1119  %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> %splat)
1120  ret <8 x i16> %res
1121}
1122
1123define <16 x i8> @splatvar_funnnel_v16i8(<16 x i8> %x, <16 x i8> %amt) nounwind {
1124; SSE2-LABEL: splatvar_funnnel_v16i8:
1125; SSE2:       # %bb.0:
1126; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1127; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1128; SSE2-NEXT:    psubb %xmm1, %xmm2
1129; SSE2-NEXT:    movdqa %xmm1, %xmm3
1130; SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0]
1131; SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1132; SSE2-NEXT:    movdqa %xmm0, %xmm1
1133; SSE2-NEXT:    psllw %xmm3, %xmm1
1134; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
1135; SSE2-NEXT:    pcmpeqd %xmm5, %xmm5
1136; SSE2-NEXT:    psllw %xmm3, %xmm5
1137; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1138; SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm5[0,0,0,0,4,5,6,7]
1139; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
1140; SSE2-NEXT:    pand %xmm3, %xmm1
1141; SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
1142; SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1143; SSE2-NEXT:    psrlw %xmm2, %xmm0
1144; SSE2-NEXT:    psrlw %xmm2, %xmm4
1145; SSE2-NEXT:    psrlw $8, %xmm4
1146; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1147; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm4[0,0,0,0,4,5,6,7]
1148; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1149; SSE2-NEXT:    pand %xmm0, %xmm2
1150; SSE2-NEXT:    por %xmm2, %xmm1
1151; SSE2-NEXT:    movdqa %xmm1, %xmm0
1152; SSE2-NEXT:    retq
1153;
1154; SSE41-LABEL: splatvar_funnnel_v16i8:
1155; SSE41:       # %bb.0:
1156; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1157; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1158; SSE41-NEXT:    movdqa %xmm0, %xmm2
1159; SSE41-NEXT:    psllw %xmm3, %xmm2
1160; SSE41-NEXT:    pcmpeqd %xmm4, %xmm4
1161; SSE41-NEXT:    pcmpeqd %xmm5, %xmm5
1162; SSE41-NEXT:    psllw %xmm3, %xmm5
1163; SSE41-NEXT:    pxor %xmm3, %xmm3
1164; SSE41-NEXT:    pshufb %xmm3, %xmm5
1165; SSE41-NEXT:    pand %xmm5, %xmm2
1166; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1167; SSE41-NEXT:    psubb %xmm1, %xmm3
1168; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1169; SSE41-NEXT:    psrlw %xmm1, %xmm0
1170; SSE41-NEXT:    psrlw %xmm1, %xmm4
1171; SSE41-NEXT:    pshufb {{.*#+}} xmm4 = xmm4[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1172; SSE41-NEXT:    pand %xmm0, %xmm4
1173; SSE41-NEXT:    por %xmm4, %xmm2
1174; SSE41-NEXT:    movdqa %xmm2, %xmm0
1175; SSE41-NEXT:    retq
1176;
1177; AVX1-LABEL: splatvar_funnnel_v16i8:
1178; AVX1:       # %bb.0:
1179; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1180; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1181; AVX1-NEXT:    vpsllw %xmm2, %xmm0, %xmm3
1182; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
1183; AVX1-NEXT:    vpsllw %xmm2, %xmm4, %xmm2
1184; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
1185; AVX1-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
1186; AVX1-NEXT:    vpand %xmm2, %xmm3, %xmm2
1187; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1188; AVX1-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
1189; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1190; AVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1191; AVX1-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1
1192; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
1193; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
1194; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
1195; AVX1-NEXT:    retq
1196;
1197; AVX2-LABEL: splatvar_funnnel_v16i8:
1198; AVX2:       # %bb.0:
1199; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1200; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1201; AVX2-NEXT:    vpsllw %xmm2, %xmm0, %xmm3
1202; AVX2-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
1203; AVX2-NEXT:    vpsllw %xmm2, %xmm4, %xmm2
1204; AVX2-NEXT:    vpbroadcastb %xmm2, %xmm2
1205; AVX2-NEXT:    vpand %xmm2, %xmm3, %xmm2
1206; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1207; AVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
1208; AVX2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1209; AVX2-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
1210; AVX2-NEXT:    vpsrlw %xmm1, %xmm4, %xmm1
1211; AVX2-NEXT:    vpsrlw $8, %xmm1, %xmm1
1212; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
1213; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
1214; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
1215; AVX2-NEXT:    retq
1216;
1217; AVX512F-LABEL: splatvar_funnnel_v16i8:
1218; AVX512F:       # %bb.0:
1219; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1220; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm3
1221; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1222; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1223; AVX512F-NEXT:    vpslld %xmm3, %zmm0, %zmm3
1224; AVX512F-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1225; AVX512F-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1226; AVX512F-NEXT:    vpand %xmm2, %xmm1, %xmm1
1227; AVX512F-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1228; AVX512F-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
1229; AVX512F-NEXT:    vpord %zmm0, %zmm3, %zmm0
1230; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1231; AVX512F-NEXT:    vzeroupper
1232; AVX512F-NEXT:    retq
1233;
1234; AVX512VL-LABEL: splatvar_funnnel_v16i8:
1235; AVX512VL:       # %bb.0:
1236; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1237; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm3
1238; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1239; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1240; AVX512VL-NEXT:    vpslld %xmm3, %zmm0, %zmm3
1241; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1242; AVX512VL-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1243; AVX512VL-NEXT:    vpand %xmm2, %xmm1, %xmm1
1244; AVX512VL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1245; AVX512VL-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
1246; AVX512VL-NEXT:    vpord %zmm0, %zmm3, %zmm0
1247; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
1248; AVX512VL-NEXT:    vzeroupper
1249; AVX512VL-NEXT:    retq
1250;
1251; AVX512BW-LABEL: splatvar_funnnel_v16i8:
1252; AVX512BW:       # %bb.0:
1253; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1254; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm3
1255; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1256; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1257; AVX512BW-NEXT:    vpsllw %xmm3, %ymm0, %ymm3
1258; AVX512BW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1259; AVX512BW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1260; AVX512BW-NEXT:    vpand %xmm2, %xmm1, %xmm1
1261; AVX512BW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1262; AVX512BW-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
1263; AVX512BW-NEXT:    vpor %ymm0, %ymm3, %ymm0
1264; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1265; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1266; AVX512BW-NEXT:    vzeroupper
1267; AVX512BW-NEXT:    retq
1268;
1269; AVX512VLBW-LABEL: splatvar_funnnel_v16i8:
1270; AVX512VLBW:       # %bb.0:
1271; AVX512VLBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1272; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm3
1273; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1274; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1275; AVX512VLBW-NEXT:    vpsllw %xmm3, %ymm0, %ymm3
1276; AVX512VLBW-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1277; AVX512VLBW-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1278; AVX512VLBW-NEXT:    vpand %xmm2, %xmm1, %xmm1
1279; AVX512VLBW-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1280; AVX512VLBW-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
1281; AVX512VLBW-NEXT:    vpor %ymm0, %ymm3, %ymm0
1282; AVX512VLBW-NEXT:    vpmovwb %ymm0, %xmm0
1283; AVX512VLBW-NEXT:    vzeroupper
1284; AVX512VLBW-NEXT:    retq
1285;
1286; AVX512VBMI2-LABEL: splatvar_funnnel_v16i8:
1287; AVX512VBMI2:       # %bb.0:
1288; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1289; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
1290; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1291; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1292; AVX512VBMI2-NEXT:    vpsllw %xmm3, %ymm0, %ymm3
1293; AVX512VBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1294; AVX512VBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1295; AVX512VBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
1296; AVX512VBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1297; AVX512VBMI2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
1298; AVX512VBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
1299; AVX512VBMI2-NEXT:    vpmovwb %zmm0, %ymm0
1300; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1301; AVX512VBMI2-NEXT:    vzeroupper
1302; AVX512VBMI2-NEXT:    retq
1303;
1304; AVX512VLVBMI2-LABEL: splatvar_funnnel_v16i8:
1305; AVX512VLVBMI2:       # %bb.0:
1306; AVX512VLVBMI2-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
1307; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm3
1308; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero
1309; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1310; AVX512VLVBMI2-NEXT:    vpsllw %xmm3, %ymm0, %ymm3
1311; AVX512VLVBMI2-NEXT:    vpxor %xmm4, %xmm4, %xmm4
1312; AVX512VLVBMI2-NEXT:    vpsubb %xmm1, %xmm4, %xmm1
1313; AVX512VLVBMI2-NEXT:    vpand %xmm2, %xmm1, %xmm1
1314; AVX512VLVBMI2-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
1315; AVX512VLVBMI2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
1316; AVX512VLVBMI2-NEXT:    vpor %ymm0, %ymm3, %ymm0
1317; AVX512VLVBMI2-NEXT:    vpmovwb %ymm0, %xmm0
1318; AVX512VLVBMI2-NEXT:    vzeroupper
1319; AVX512VLVBMI2-NEXT:    retq
1320;
1321; XOPAVX1-LABEL: splatvar_funnnel_v16i8:
1322; XOPAVX1:       # %bb.0:
1323; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1324; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
1325; XOPAVX1-NEXT:    vprotb %xmm1, %xmm0, %xmm0
1326; XOPAVX1-NEXT:    retq
1327;
1328; XOPAVX2-LABEL: splatvar_funnnel_v16i8:
1329; XOPAVX2:       # %bb.0:
1330; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
1331; XOPAVX2-NEXT:    vprotb %xmm1, %xmm0, %xmm0
1332; XOPAVX2-NEXT:    retq
1333;
1334; X86-SSE2-LABEL: splatvar_funnnel_v16i8:
1335; X86-SSE2:       # %bb.0:
1336; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1337; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
1338; X86-SSE2-NEXT:    psubb %xmm1, %xmm2
1339; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
1340; X86-SSE2-NEXT:    pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0]
1341; X86-SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1342; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1343; X86-SSE2-NEXT:    psllw %xmm3, %xmm1
1344; X86-SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
1345; X86-SSE2-NEXT:    pcmpeqd %xmm5, %xmm5
1346; X86-SSE2-NEXT:    psllw %xmm3, %xmm5
1347; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1348; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm3 = xmm5[0,0,0,0,4,5,6,7]
1349; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
1350; X86-SSE2-NEXT:    pand %xmm3, %xmm1
1351; X86-SSE2-NEXT:    pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
1352; X86-SSE2-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
1353; X86-SSE2-NEXT:    psrlw %xmm2, %xmm0
1354; X86-SSE2-NEXT:    psrlw %xmm2, %xmm4
1355; X86-SSE2-NEXT:    psrlw $8, %xmm4
1356; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1357; X86-SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm4[0,0,0,0,4,5,6,7]
1358; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
1359; X86-SSE2-NEXT:    pand %xmm0, %xmm2
1360; X86-SSE2-NEXT:    por %xmm2, %xmm1
1361; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
1362; X86-SSE2-NEXT:    retl
1363  %splat = shufflevector <16 x i8> %amt, <16 x i8> undef, <16 x i32> zeroinitializer
1364  %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> %splat)
1365  ret <16 x i8> %res
1366}
1367
1368;
1369; Constant Shifts
1370;
1371
1372define <2 x i64> @constant_funnnel_v2i64(<2 x i64> %x) nounwind {
1373; SSE2-LABEL: constant_funnnel_v2i64:
1374; SSE2:       # %bb.0:
1375; SSE2-NEXT:    movdqa %xmm0, %xmm1
1376; SSE2-NEXT:    psrlq $60, %xmm1
1377; SSE2-NEXT:    movdqa %xmm0, %xmm2
1378; SSE2-NEXT:    psrlq $50, %xmm2
1379; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
1380; SSE2-NEXT:    movdqa %xmm0, %xmm1
1381; SSE2-NEXT:    psllq $4, %xmm1
1382; SSE2-NEXT:    psllq $14, %xmm0
1383; SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1384; SSE2-NEXT:    orpd %xmm2, %xmm0
1385; SSE2-NEXT:    retq
1386;
1387; SSE41-LABEL: constant_funnnel_v2i64:
1388; SSE41:       # %bb.0:
1389; SSE41-NEXT:    movdqa %xmm0, %xmm1
1390; SSE41-NEXT:    psrlq $50, %xmm1
1391; SSE41-NEXT:    movdqa %xmm0, %xmm2
1392; SSE41-NEXT:    psrlq $60, %xmm2
1393; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1394; SSE41-NEXT:    movdqa %xmm0, %xmm1
1395; SSE41-NEXT:    psllq $14, %xmm1
1396; SSE41-NEXT:    psllq $4, %xmm0
1397; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
1398; SSE41-NEXT:    por %xmm2, %xmm0
1399; SSE41-NEXT:    retq
1400;
1401; AVX1-LABEL: constant_funnnel_v2i64:
1402; AVX1:       # %bb.0:
1403; AVX1-NEXT:    vpsrlq $50, %xmm0, %xmm1
1404; AVX1-NEXT:    vpsrlq $60, %xmm0, %xmm2
1405; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
1406; AVX1-NEXT:    vpsllq $14, %xmm0, %xmm2
1407; AVX1-NEXT:    vpsllq $4, %xmm0, %xmm0
1408; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
1409; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1410; AVX1-NEXT:    retq
1411;
1412; AVX2-LABEL: constant_funnnel_v2i64:
1413; AVX2:       # %bb.0:
1414; AVX2-NEXT:    vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1415; AVX2-NEXT:    vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1416; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1417; AVX2-NEXT:    retq
1418;
1419; AVX512F-LABEL: constant_funnnel_v2i64:
1420; AVX512F:       # %bb.0:
1421; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1422; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,14]
1423; AVX512F-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
1424; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1425; AVX512F-NEXT:    vzeroupper
1426; AVX512F-NEXT:    retq
1427;
1428; AVX512VL-LABEL: constant_funnnel_v2i64:
1429; AVX512VL:       # %bb.0:
1430; AVX512VL-NEXT:    vprolvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1431; AVX512VL-NEXT:    retq
1432;
1433; AVX512BW-LABEL: constant_funnnel_v2i64:
1434; AVX512BW:       # %bb.0:
1435; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1436; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,14]
1437; AVX512BW-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
1438; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1439; AVX512BW-NEXT:    vzeroupper
1440; AVX512BW-NEXT:    retq
1441;
1442; AVX512VLBW-LABEL: constant_funnnel_v2i64:
1443; AVX512VLBW:       # %bb.0:
1444; AVX512VLBW-NEXT:    vprolvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1445; AVX512VLBW-NEXT:    retq
1446;
1447; AVX512VBMI2-LABEL: constant_funnnel_v2i64:
1448; AVX512VBMI2:       # %bb.0:
1449; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1450; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,14]
1451; AVX512VBMI2-NEXT:    vprolvq %zmm1, %zmm0, %zmm0
1452; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1453; AVX512VBMI2-NEXT:    vzeroupper
1454; AVX512VBMI2-NEXT:    retq
1455;
1456; AVX512VLVBMI2-LABEL: constant_funnnel_v2i64:
1457; AVX512VLVBMI2:       # %bb.0:
1458; AVX512VLVBMI2-NEXT:    vprolvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1459; AVX512VLVBMI2-NEXT:    retq
1460;
1461; XOP-LABEL: constant_funnnel_v2i64:
1462; XOP:       # %bb.0:
1463; XOP-NEXT:    vprotq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1464; XOP-NEXT:    retq
1465;
1466; X86-SSE2-LABEL: constant_funnnel_v2i64:
1467; X86-SSE2:       # %bb.0:
1468; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [63,0,63,0]
1469; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = <4,u,14,u>
1470; X86-SSE2-NEXT:    pxor %xmm3, %xmm3
1471; X86-SSE2-NEXT:    psubq %xmm2, %xmm3
1472; X86-SSE2-NEXT:    pand %xmm1, %xmm2
1473; X86-SSE2-NEXT:    movdqa %xmm0, %xmm4
1474; X86-SSE2-NEXT:    psllq %xmm2, %xmm4
1475; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3]
1476; X86-SSE2-NEXT:    movdqa %xmm0, %xmm5
1477; X86-SSE2-NEXT:    psllq %xmm2, %xmm5
1478; X86-SSE2-NEXT:    movsd {{.*#+}} xmm5 = xmm4[0],xmm5[1]
1479; X86-SSE2-NEXT:    pand %xmm1, %xmm3
1480; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1481; X86-SSE2-NEXT:    psrlq %xmm3, %xmm1
1482; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3]
1483; X86-SSE2-NEXT:    psrlq %xmm2, %xmm0
1484; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
1485; X86-SSE2-NEXT:    orpd %xmm5, %xmm0
1486; X86-SSE2-NEXT:    retl
1487  %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 4, i64 14>)
1488  ret <2 x i64> %res
1489}
1490
1491define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x) nounwind {
1492; SSE2-LABEL: constant_funnnel_v4i32:
1493; SSE2:       # %bb.0:
1494; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
1495; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1496; SSE2-NEXT:    pmuludq %xmm1, %xmm0
1497; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
1498; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1499; SSE2-NEXT:    pmuludq %xmm2, %xmm1
1500; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1501; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1502; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1503; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1504; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1505; SSE2-NEXT:    por %xmm3, %xmm0
1506; SSE2-NEXT:    retq
1507;
1508; SSE41-LABEL: constant_funnnel_v4i32:
1509; SSE41:       # %bb.0:
1510; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
1511; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1512; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1513; SSE41-NEXT:    pmuludq %xmm2, %xmm3
1514; SSE41-NEXT:    pmuludq %xmm1, %xmm0
1515; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1516; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1517; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1518; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
1519; SSE41-NEXT:    por %xmm1, %xmm0
1520; SSE41-NEXT:    retq
1521;
1522; AVX1-LABEL: constant_funnnel_v4i32:
1523; AVX1:       # %bb.0:
1524; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,32,64,128]
1525; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1526; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1527; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
1528; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
1529; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1530; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1531; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
1532; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
1533; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
1534; AVX1-NEXT:    retq
1535;
1536; AVX2-LABEL: constant_funnnel_v4i32:
1537; AVX2:       # %bb.0:
1538; AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1539; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1540; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1541; AVX2-NEXT:    retq
1542;
1543; AVX512F-LABEL: constant_funnnel_v4i32:
1544; AVX512F:       # %bb.0:
1545; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1546; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
1547; AVX512F-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
1548; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1549; AVX512F-NEXT:    vzeroupper
1550; AVX512F-NEXT:    retq
1551;
1552; AVX512VL-LABEL: constant_funnnel_v4i32:
1553; AVX512VL:       # %bb.0:
1554; AVX512VL-NEXT:    vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1555; AVX512VL-NEXT:    retq
1556;
1557; AVX512BW-LABEL: constant_funnnel_v4i32:
1558; AVX512BW:       # %bb.0:
1559; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1560; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
1561; AVX512BW-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
1562; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1563; AVX512BW-NEXT:    vzeroupper
1564; AVX512BW-NEXT:    retq
1565;
1566; AVX512VLBW-LABEL: constant_funnnel_v4i32:
1567; AVX512VLBW:       # %bb.0:
1568; AVX512VLBW-NEXT:    vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1569; AVX512VLBW-NEXT:    retq
1570;
1571; AVX512VBMI2-LABEL: constant_funnnel_v4i32:
1572; AVX512VBMI2:       # %bb.0:
1573; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1574; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4,5,6,7]
1575; AVX512VBMI2-NEXT:    vprolvd %zmm1, %zmm0, %zmm0
1576; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1577; AVX512VBMI2-NEXT:    vzeroupper
1578; AVX512VBMI2-NEXT:    retq
1579;
1580; AVX512VLVBMI2-LABEL: constant_funnnel_v4i32:
1581; AVX512VLVBMI2:       # %bb.0:
1582; AVX512VLVBMI2-NEXT:    vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1583; AVX512VLVBMI2-NEXT:    retq
1584;
1585; XOP-LABEL: constant_funnnel_v4i32:
1586; XOP:       # %bb.0:
1587; XOP-NEXT:    vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1588; XOP-NEXT:    retq
1589;
1590; X86-SSE2-LABEL: constant_funnnel_v4i32:
1591; X86-SSE2:       # %bb.0:
1592; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
1593; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1594; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
1595; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
1596; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1597; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
1598; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1599; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1600; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1601; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1602; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1603; X86-SSE2-NEXT:    por %xmm3, %xmm0
1604; X86-SSE2-NEXT:    retl
1605  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 4, i32 5, i32 6, i32 7>)
1606  ret <4 x i32> %res
1607}
1608
1609define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x) nounwind {
1610; SSE-LABEL: constant_funnnel_v8i16:
1611; SSE:       # %bb.0:
1612; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
1613; SSE-NEXT:    movdqa %xmm0, %xmm2
1614; SSE-NEXT:    pmulhuw %xmm1, %xmm2
1615; SSE-NEXT:    pmullw %xmm1, %xmm0
1616; SSE-NEXT:    por %xmm2, %xmm0
1617; SSE-NEXT:    retq
1618;
1619; AVX-LABEL: constant_funnnel_v8i16:
1620; AVX:       # %bb.0:
1621; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
1622; AVX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
1623; AVX-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
1624; AVX-NEXT:    vpor %xmm2, %xmm0, %xmm0
1625; AVX-NEXT:    retq
1626;
1627; AVX512F-LABEL: constant_funnnel_v8i16:
1628; AVX512F:       # %bb.0:
1629; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
1630; AVX512F-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
1631; AVX512F-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
1632; AVX512F-NEXT:    vpor %xmm2, %xmm0, %xmm0
1633; AVX512F-NEXT:    retq
1634;
1635; AVX512VL-LABEL: constant_funnnel_v8i16:
1636; AVX512VL:       # %bb.0:
1637; AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
1638; AVX512VL-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2
1639; AVX512VL-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
1640; AVX512VL-NEXT:    vpor %xmm2, %xmm0, %xmm0
1641; AVX512VL-NEXT:    retq
1642;
1643; AVX512BW-LABEL: constant_funnnel_v8i16:
1644; AVX512BW:       # %bb.0:
1645; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1646; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,15,14,13,12,11,10,9]
1647; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm1
1648; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7]
1649; AVX512BW-NEXT:    vpsllvw %zmm2, %zmm0, %zmm0
1650; AVX512BW-NEXT:    vpor %xmm1, %xmm0, %xmm0
1651; AVX512BW-NEXT:    vzeroupper
1652; AVX512BW-NEXT:    retq
1653;
1654; AVX512VLBW-LABEL: constant_funnnel_v8i16:
1655; AVX512VLBW:       # %bb.0:
1656; AVX512VLBW-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
1657; AVX512VLBW-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1658; AVX512VLBW-NEXT:    vpor %xmm1, %xmm0, %xmm0
1659; AVX512VLBW-NEXT:    retq
1660;
1661; AVX512VBMI2-LABEL: constant_funnnel_v8i16:
1662; AVX512VBMI2:       # %bb.0:
1663; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1664; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
1665; AVX512VBMI2-NEXT:    vpshldvw %zmm1, %zmm0, %zmm0
1666; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1667; AVX512VBMI2-NEXT:    vzeroupper
1668; AVX512VBMI2-NEXT:    retq
1669;
1670; AVX512VLVBMI2-LABEL: constant_funnnel_v8i16:
1671; AVX512VLVBMI2:       # %bb.0:
1672; AVX512VLVBMI2-NEXT:    vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1673; AVX512VLVBMI2-NEXT:    retq
1674;
1675; XOP-LABEL: constant_funnnel_v8i16:
1676; XOP:       # %bb.0:
1677; XOP-NEXT:    vprotw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1678; XOP-NEXT:    retq
1679;
1680; X86-SSE2-LABEL: constant_funnnel_v8i16:
1681; X86-SSE2:       # %bb.0:
1682; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128]
1683; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1684; X86-SSE2-NEXT:    pmulhuw %xmm1, %xmm2
1685; X86-SSE2-NEXT:    pmullw %xmm1, %xmm0
1686; X86-SSE2-NEXT:    por %xmm2, %xmm0
1687; X86-SSE2-NEXT:    retl
1688  %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
1689  ret <8 x i16> %res
1690}
1691
1692define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind {
1693; SSE2-LABEL: constant_funnnel_v16i8:
1694; SSE2:       # %bb.0:
1695; SSE2-NEXT:    pxor %xmm1, %xmm1
1696; SSE2-NEXT:    movdqa %xmm0, %xmm2
1697; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
1698; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1699; SSE2-NEXT:    psrlw $8, %xmm2
1700; SSE2-NEXT:    movdqa %xmm0, %xmm3
1701; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1702; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
1703; SSE2-NEXT:    psrlw $8, %xmm3
1704; SSE2-NEXT:    packuswb %xmm2, %xmm3
1705; SSE2-NEXT:    movdqa %xmm0, %xmm1
1706; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1707; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1708; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1709; SSE2-NEXT:    pand %xmm2, %xmm1
1710; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1711; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1712; SSE2-NEXT:    pand %xmm2, %xmm0
1713; SSE2-NEXT:    packuswb %xmm1, %xmm0
1714; SSE2-NEXT:    por %xmm3, %xmm0
1715; SSE2-NEXT:    retq
1716;
1717; SSE41-LABEL: constant_funnnel_v16i8:
1718; SSE41:       # %bb.0:
1719; SSE41-NEXT:    movdqa %xmm0, %xmm2
1720; SSE41-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1721; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
1722; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
1723; SSE41-NEXT:    pand %xmm3, %xmm2
1724; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1725; SSE41-NEXT:    movdqa {{.*#+}} xmm4 = [1,2,4,8,16,32,64,128]
1726; SSE41-NEXT:    pmullw %xmm1, %xmm4
1727; SSE41-NEXT:    pand %xmm3, %xmm4
1728; SSE41-NEXT:    packuswb %xmm2, %xmm4
1729; SSE41-NEXT:    pxor %xmm2, %xmm2
1730; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
1731; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1732; SSE41-NEXT:    psrlw $8, %xmm0
1733; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1734; SSE41-NEXT:    psrlw $8, %xmm1
1735; SSE41-NEXT:    packuswb %xmm0, %xmm1
1736; SSE41-NEXT:    por %xmm4, %xmm1
1737; SSE41-NEXT:    movdqa %xmm1, %xmm0
1738; SSE41-NEXT:    retq
1739;
1740; AVX1-LABEL: constant_funnnel_v16i8:
1741; AVX1:       # %bb.0:
1742; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1743; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
1744; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1745; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
1746; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
1747; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm4
1748; AVX1-NEXT:    vpand %xmm2, %xmm4, %xmm2
1749; AVX1-NEXT:    vpackuswb %xmm1, %xmm2, %xmm1
1750; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
1751; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15]
1752; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1753; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
1754; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm2
1755; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
1756; AVX1-NEXT:    vpackuswb %xmm0, %xmm2, %xmm0
1757; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
1758; AVX1-NEXT:    retq
1759;
1760; AVX2-LABEL: constant_funnnel_v16i8:
1761; AVX2:       # %bb.0:
1762; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1763; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1764; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
1765; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
1766; AVX2-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
1767; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1768; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1769; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
1770; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
1771; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
1772; AVX2-NEXT:    vzeroupper
1773; AVX2-NEXT:    retq
1774;
1775; AVX512F-LABEL: constant_funnnel_v16i8:
1776; AVX512F:       # %bb.0:
1777; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1778; AVX512F-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1779; AVX512F-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1780; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
1781; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
1782; AVX512F-NEXT:    vzeroupper
1783; AVX512F-NEXT:    retq
1784;
1785; AVX512VL-LABEL: constant_funnnel_v16i8:
1786; AVX512VL:       # %bb.0:
1787; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
1788; AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
1789; AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
1790; AVX512VL-NEXT:    vpord %zmm1, %zmm0, %zmm0
1791; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
1792; AVX512VL-NEXT:    vzeroupper
1793; AVX512VL-NEXT:    retq
1794;
1795; AVX512BW-LABEL: constant_funnnel_v16i8:
1796; AVX512BW:       # %bb.0:
1797; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1798; AVX512BW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1799; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm1
1800; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1]
1801; AVX512BW-NEXT:    vpsllvw %zmm2, %zmm0, %zmm0
1802; AVX512BW-NEXT:    vpor %ymm1, %ymm0, %ymm0
1803; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
1804; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1805; AVX512BW-NEXT:    vzeroupper
1806; AVX512BW-NEXT:    retq
1807;
1808; AVX512VLBW-LABEL: constant_funnnel_v16i8:
1809; AVX512VLBW:       # %bb.0:
1810; AVX512VLBW-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1811; AVX512VLBW-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1812; AVX512VLBW-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1813; AVX512VLBW-NEXT:    vpor %ymm1, %ymm0, %ymm0
1814; AVX512VLBW-NEXT:    vpmovwb %ymm0, %xmm0
1815; AVX512VLBW-NEXT:    vzeroupper
1816; AVX512VLBW-NEXT:    retq
1817;
1818; AVX512VBMI2-LABEL: constant_funnnel_v16i8:
1819; AVX512VBMI2:       # %bb.0:
1820; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
1821; AVX512VBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1822; AVX512VBMI2-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm1
1823; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,7,6,5,4,3,2,1]
1824; AVX512VBMI2-NEXT:    vpsllvw %zmm2, %zmm0, %zmm0
1825; AVX512VBMI2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1826; AVX512VBMI2-NEXT:    vpmovwb %zmm0, %ymm0
1827; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
1828; AVX512VBMI2-NEXT:    vzeroupper
1829; AVX512VBMI2-NEXT:    retq
1830;
1831; AVX512VLVBMI2-LABEL: constant_funnnel_v16i8:
1832; AVX512VLVBMI2:       # %bb.0:
1833; AVX512VLVBMI2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
1834; AVX512VLVBMI2-NEXT:    vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1
1835; AVX512VLVBMI2-NEXT:    vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
1836; AVX512VLVBMI2-NEXT:    vpor %ymm1, %ymm0, %ymm0
1837; AVX512VLVBMI2-NEXT:    vpmovwb %ymm0, %xmm0
1838; AVX512VLVBMI2-NEXT:    vzeroupper
1839; AVX512VLVBMI2-NEXT:    retq
1840;
1841; XOP-LABEL: constant_funnnel_v16i8:
1842; XOP:       # %bb.0:
1843; XOP-NEXT:    vprotb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1844; XOP-NEXT:    retq
1845;
1846; X86-SSE2-LABEL: constant_funnnel_v16i8:
1847; X86-SSE2:       # %bb.0:
1848; X86-SSE2-NEXT:    pxor %xmm1, %xmm1
1849; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
1850; X86-SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
1851; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
1852; X86-SSE2-NEXT:    psrlw $8, %xmm2
1853; X86-SSE2-NEXT:    movdqa %xmm0, %xmm3
1854; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
1855; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
1856; X86-SSE2-NEXT:    psrlw $8, %xmm3
1857; X86-SSE2-NEXT:    packuswb %xmm2, %xmm3
1858; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1859; X86-SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
1860; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1861; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
1862; X86-SSE2-NEXT:    pand %xmm2, %xmm1
1863; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
1864; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1865; X86-SSE2-NEXT:    pand %xmm2, %xmm0
1866; X86-SSE2-NEXT:    packuswb %xmm1, %xmm0
1867; X86-SSE2-NEXT:    por %xmm3, %xmm0
1868; X86-SSE2-NEXT:    retl
1869  %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
1870  ret <16 x i8> %res
1871}
1872
1873;
1874; Uniform Constant Shifts
1875;
1876
1877define <2 x i64> @splatconstant_funnnel_v2i64(<2 x i64> %x) nounwind {
1878; SSE-LABEL: splatconstant_funnnel_v2i64:
1879; SSE:       # %bb.0:
1880; SSE-NEXT:    movdqa %xmm0, %xmm1
1881; SSE-NEXT:    psrlq $50, %xmm1
1882; SSE-NEXT:    psllq $14, %xmm0
1883; SSE-NEXT:    por %xmm1, %xmm0
1884; SSE-NEXT:    retq
1885;
1886; AVX-LABEL: splatconstant_funnnel_v2i64:
1887; AVX:       # %bb.0:
1888; AVX-NEXT:    vpsrlq $50, %xmm0, %xmm1
1889; AVX-NEXT:    vpsllq $14, %xmm0, %xmm0
1890; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
1891; AVX-NEXT:    retq
1892;
1893; AVX512F-LABEL: splatconstant_funnnel_v2i64:
1894; AVX512F:       # %bb.0:
1895; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1896; AVX512F-NEXT:    vprolq $14, %zmm0, %zmm0
1897; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1898; AVX512F-NEXT:    vzeroupper
1899; AVX512F-NEXT:    retq
1900;
1901; AVX512VL-LABEL: splatconstant_funnnel_v2i64:
1902; AVX512VL:       # %bb.0:
1903; AVX512VL-NEXT:    vprolq $14, %xmm0, %xmm0
1904; AVX512VL-NEXT:    retq
1905;
1906; AVX512BW-LABEL: splatconstant_funnnel_v2i64:
1907; AVX512BW:       # %bb.0:
1908; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1909; AVX512BW-NEXT:    vprolq $14, %zmm0, %zmm0
1910; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1911; AVX512BW-NEXT:    vzeroupper
1912; AVX512BW-NEXT:    retq
1913;
1914; AVX512VLBW-LABEL: splatconstant_funnnel_v2i64:
1915; AVX512VLBW:       # %bb.0:
1916; AVX512VLBW-NEXT:    vprolq $14, %xmm0, %xmm0
1917; AVX512VLBW-NEXT:    retq
1918;
1919; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i64:
1920; AVX512VBMI2:       # %bb.0:
1921; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1922; AVX512VBMI2-NEXT:    vprolq $14, %zmm0, %zmm0
1923; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1924; AVX512VBMI2-NEXT:    vzeroupper
1925; AVX512VBMI2-NEXT:    retq
1926;
1927; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i64:
1928; AVX512VLVBMI2:       # %bb.0:
1929; AVX512VLVBMI2-NEXT:    vprolq $14, %xmm0, %xmm0
1930; AVX512VLVBMI2-NEXT:    retq
1931;
1932; XOP-LABEL: splatconstant_funnnel_v2i64:
1933; XOP:       # %bb.0:
1934; XOP-NEXT:    vprotq $14, %xmm0, %xmm0
1935; XOP-NEXT:    retq
1936;
1937; X86-SSE2-LABEL: splatconstant_funnnel_v2i64:
1938; X86-SSE2:       # %bb.0:
1939; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
1940; X86-SSE2-NEXT:    psrlq $50, %xmm1
1941; X86-SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm1[0,1]
1942; X86-SSE2-NEXT:    psllq $14, %xmm0
1943; X86-SSE2-NEXT:    movsd {{.*#+}} xmm0 = xmm0[0,1]
1944; X86-SSE2-NEXT:    orpd %xmm1, %xmm0
1945; X86-SSE2-NEXT:    retl
1946  %res = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %x, <2 x i64> %x, <2 x i64> <i64 14, i64 14>)
1947  ret <2 x i64> %res
1948}
1949
1950define <4 x i32> @splatconstant_funnnel_v4i32(<4 x i32> %x) nounwind {
1951; SSE-LABEL: splatconstant_funnnel_v4i32:
1952; SSE:       # %bb.0:
1953; SSE-NEXT:    movdqa %xmm0, %xmm1
1954; SSE-NEXT:    psrld $28, %xmm1
1955; SSE-NEXT:    pslld $4, %xmm0
1956; SSE-NEXT:    por %xmm1, %xmm0
1957; SSE-NEXT:    retq
1958;
1959; AVX-LABEL: splatconstant_funnnel_v4i32:
1960; AVX:       # %bb.0:
1961; AVX-NEXT:    vpsrld $28, %xmm0, %xmm1
1962; AVX-NEXT:    vpslld $4, %xmm0, %xmm0
1963; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
1964; AVX-NEXT:    retq
1965;
1966; AVX512F-LABEL: splatconstant_funnnel_v4i32:
1967; AVX512F:       # %bb.0:
1968; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1969; AVX512F-NEXT:    vprold $4, %zmm0, %zmm0
1970; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1971; AVX512F-NEXT:    vzeroupper
1972; AVX512F-NEXT:    retq
1973;
1974; AVX512VL-LABEL: splatconstant_funnnel_v4i32:
1975; AVX512VL:       # %bb.0:
1976; AVX512VL-NEXT:    vprold $4, %xmm0, %xmm0
1977; AVX512VL-NEXT:    retq
1978;
1979; AVX512BW-LABEL: splatconstant_funnnel_v4i32:
1980; AVX512BW:       # %bb.0:
1981; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1982; AVX512BW-NEXT:    vprold $4, %zmm0, %zmm0
1983; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1984; AVX512BW-NEXT:    vzeroupper
1985; AVX512BW-NEXT:    retq
1986;
1987; AVX512VLBW-LABEL: splatconstant_funnnel_v4i32:
1988; AVX512VLBW:       # %bb.0:
1989; AVX512VLBW-NEXT:    vprold $4, %xmm0, %xmm0
1990; AVX512VLBW-NEXT:    retq
1991;
1992; AVX512VBMI2-LABEL: splatconstant_funnnel_v4i32:
1993; AVX512VBMI2:       # %bb.0:
1994; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
1995; AVX512VBMI2-NEXT:    vprold $4, %zmm0, %zmm0
1996; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
1997; AVX512VBMI2-NEXT:    vzeroupper
1998; AVX512VBMI2-NEXT:    retq
1999;
2000; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v4i32:
2001; AVX512VLVBMI2:       # %bb.0:
2002; AVX512VLVBMI2-NEXT:    vprold $4, %xmm0, %xmm0
2003; AVX512VLVBMI2-NEXT:    retq
2004;
2005; XOP-LABEL: splatconstant_funnnel_v4i32:
2006; XOP:       # %bb.0:
2007; XOP-NEXT:    vprotd $4, %xmm0, %xmm0
2008; XOP-NEXT:    retq
2009;
2010; X86-SSE2-LABEL: splatconstant_funnnel_v4i32:
2011; X86-SSE2:       # %bb.0:
2012; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2013; X86-SSE2-NEXT:    psrld $28, %xmm1
2014; X86-SSE2-NEXT:    pslld $4, %xmm0
2015; X86-SSE2-NEXT:    por %xmm1, %xmm0
2016; X86-SSE2-NEXT:    retl
2017  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 4, i32 4, i32 4, i32 4>)
2018  ret <4 x i32> %res
2019}
2020
2021define <8 x i16> @splatconstant_funnnel_v8i16(<8 x i16> %x) nounwind {
2022; SSE-LABEL: splatconstant_funnnel_v8i16:
2023; SSE:       # %bb.0:
2024; SSE-NEXT:    movdqa %xmm0, %xmm1
2025; SSE-NEXT:    psrlw $9, %xmm1
2026; SSE-NEXT:    psllw $7, %xmm0
2027; SSE-NEXT:    por %xmm1, %xmm0
2028; SSE-NEXT:    retq
2029;
2030; AVX-LABEL: splatconstant_funnnel_v8i16:
2031; AVX:       # %bb.0:
2032; AVX-NEXT:    vpsrlw $9, %xmm0, %xmm1
2033; AVX-NEXT:    vpsllw $7, %xmm0, %xmm0
2034; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
2035; AVX-NEXT:    retq
2036;
2037; AVX512F-LABEL: splatconstant_funnnel_v8i16:
2038; AVX512F:       # %bb.0:
2039; AVX512F-NEXT:    vpsrlw $9, %xmm0, %xmm1
2040; AVX512F-NEXT:    vpsllw $7, %xmm0, %xmm0
2041; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
2042; AVX512F-NEXT:    retq
2043;
2044; AVX512VL-LABEL: splatconstant_funnnel_v8i16:
2045; AVX512VL:       # %bb.0:
2046; AVX512VL-NEXT:    vpsrlw $9, %xmm0, %xmm1
2047; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
2048; AVX512VL-NEXT:    vpor %xmm1, %xmm0, %xmm0
2049; AVX512VL-NEXT:    retq
2050;
2051; AVX512BW-LABEL: splatconstant_funnnel_v8i16:
2052; AVX512BW:       # %bb.0:
2053; AVX512BW-NEXT:    vpsrlw $9, %xmm0, %xmm1
2054; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
2055; AVX512BW-NEXT:    vpor %xmm1, %xmm0, %xmm0
2056; AVX512BW-NEXT:    retq
2057;
2058; AVX512VLBW-LABEL: splatconstant_funnnel_v8i16:
2059; AVX512VLBW:       # %bb.0:
2060; AVX512VLBW-NEXT:    vpsrlw $9, %xmm0, %xmm1
2061; AVX512VLBW-NEXT:    vpsllw $7, %xmm0, %xmm0
2062; AVX512VLBW-NEXT:    vpor %xmm1, %xmm0, %xmm0
2063; AVX512VLBW-NEXT:    retq
2064;
2065; AVX512VBMI2-LABEL: splatconstant_funnnel_v8i16:
2066; AVX512VBMI2:       # %bb.0:
2067; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
2068; AVX512VBMI2-NEXT:    vpshldw $7, %zmm0, %zmm0, %zmm0
2069; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
2070; AVX512VBMI2-NEXT:    vzeroupper
2071; AVX512VBMI2-NEXT:    retq
2072;
2073; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v8i16:
2074; AVX512VLVBMI2:       # %bb.0:
2075; AVX512VLVBMI2-NEXT:    vpshldw $7, %xmm0, %xmm0, %xmm0
2076; AVX512VLVBMI2-NEXT:    retq
2077;
2078; XOP-LABEL: splatconstant_funnnel_v8i16:
2079; XOP:       # %bb.0:
2080; XOP-NEXT:    vprotw $7, %xmm0, %xmm0
2081; XOP-NEXT:    retq
2082;
2083; X86-SSE2-LABEL: splatconstant_funnnel_v8i16:
2084; X86-SSE2:       # %bb.0:
2085; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2086; X86-SSE2-NEXT:    psrlw $9, %xmm1
2087; X86-SSE2-NEXT:    psllw $7, %xmm0
2088; X86-SSE2-NEXT:    por %xmm1, %xmm0
2089; X86-SSE2-NEXT:    retl
2090  %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %x, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
2091  ret <8 x i16> %res
2092}
2093
2094define <16 x i8> @splatconstant_funnnel_v16i8(<16 x i8> %x) nounwind {
2095; SSE-LABEL: splatconstant_funnnel_v16i8:
2096; SSE:       # %bb.0:
2097; SSE-NEXT:    movdqa %xmm0, %xmm1
2098; SSE-NEXT:    psrlw $4, %xmm1
2099; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
2100; SSE-NEXT:    psllw $4, %xmm0
2101; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
2102; SSE-NEXT:    por %xmm1, %xmm0
2103; SSE-NEXT:    retq
2104;
2105; AVX-LABEL: splatconstant_funnnel_v16i8:
2106; AVX:       # %bb.0:
2107; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm1
2108; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
2109; AVX-NEXT:    vpsllw $4, %xmm0, %xmm0
2110; AVX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2111; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
2112; AVX-NEXT:    retq
2113;
2114; AVX512F-LABEL: splatconstant_funnnel_v16i8:
2115; AVX512F:       # %bb.0:
2116; AVX512F-NEXT:    vpsrlw $4, %xmm0, %xmm1
2117; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
2118; AVX512F-NEXT:    vpsllw $4, %xmm0, %xmm0
2119; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2120; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
2121; AVX512F-NEXT:    retq
2122;
2123; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
2124; AVX512VL:       # %bb.0:
2125; AVX512VL-NEXT:    vpsllw $4, %xmm0, %xmm1
2126; AVX512VL-NEXT:    vpsrlw $4, %xmm0, %xmm0
2127; AVX512VL-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
2128; AVX512VL-NEXT:    retq
2129;
2130; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
2131; AVX512BW:       # %bb.0:
2132; AVX512BW-NEXT:    vpsrlw $4, %xmm0, %xmm1
2133; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
2134; AVX512BW-NEXT:    vpsllw $4, %xmm0, %xmm0
2135; AVX512BW-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2136; AVX512BW-NEXT:    vpor %xmm1, %xmm0, %xmm0
2137; AVX512BW-NEXT:    retq
2138;
2139; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
2140; AVX512VLBW:       # %bb.0:
2141; AVX512VLBW-NEXT:    vpsllw $4, %xmm0, %xmm1
2142; AVX512VLBW-NEXT:    vpsrlw $4, %xmm0, %xmm0
2143; AVX512VLBW-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
2144; AVX512VLBW-NEXT:    retq
2145;
2146; AVX512VBMI2-LABEL: splatconstant_funnnel_v16i8:
2147; AVX512VBMI2:       # %bb.0:
2148; AVX512VBMI2-NEXT:    vpsrlw $4, %xmm0, %xmm1
2149; AVX512VBMI2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
2150; AVX512VBMI2-NEXT:    vpsllw $4, %xmm0, %xmm0
2151; AVX512VBMI2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2152; AVX512VBMI2-NEXT:    vpor %xmm1, %xmm0, %xmm0
2153; AVX512VBMI2-NEXT:    retq
2154;
2155; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
2156; AVX512VLVBMI2:       # %bb.0:
2157; AVX512VLVBMI2-NEXT:    vpsllw $4, %xmm0, %xmm1
2158; AVX512VLVBMI2-NEXT:    vpsrlw $4, %xmm0, %xmm0
2159; AVX512VLVBMI2-NEXT:    vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
2160; AVX512VLVBMI2-NEXT:    retq
2161;
2162; XOP-LABEL: splatconstant_funnnel_v16i8:
2163; XOP:       # %bb.0:
2164; XOP-NEXT:    vprotb $4, %xmm0, %xmm0
2165; XOP-NEXT:    retq
2166;
2167; X86-SSE2-LABEL: splatconstant_funnnel_v16i8:
2168; X86-SSE2:       # %bb.0:
2169; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
2170; X86-SSE2-NEXT:    psrlw $4, %xmm1
2171; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
2172; X86-SSE2-NEXT:    psllw $4, %xmm0
2173; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
2174; X86-SSE2-NEXT:    por %xmm1, %xmm0
2175; X86-SSE2-NEXT:    retl
2176  %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %x, <16 x i8> %x, <16 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
2177  ret <16 x i8> %res
2178}
2179