1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX1
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=AVX512VL
8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX512BW
9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=AVX512VLBW
10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefix=AVX512VBMI2
11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefix=AVX512VLVBMI2
12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefixes=XOP,XOPAVX1
13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefixes=XOP,XOPAVX2
14
15; Just one 32-bit run to make sure we do reasonable things for i64 cases.
16; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2
17
18declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
19
20;
21; Variable Shifts
22;
23
24define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
25; SSE2-LABEL: var_funnnel_v2i32:
26; SSE2:       # %bb.0:
27; SSE2-NEXT:    pxor %xmm2, %xmm2
28; SSE2-NEXT:    psubd %xmm1, %xmm2
29; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
30; SSE2-NEXT:    pslld $23, %xmm2
31; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
32; SSE2-NEXT:    cvttps2dq %xmm2, %xmm1
33; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
34; SSE2-NEXT:    pmuludq %xmm1, %xmm0
35; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
36; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
37; SSE2-NEXT:    pmuludq %xmm2, %xmm1
38; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
39; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
40; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
41; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
42; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
43; SSE2-NEXT:    por %xmm3, %xmm0
44; SSE2-NEXT:    retq
45;
46; SSE41-LABEL: var_funnnel_v2i32:
47; SSE41:       # %bb.0:
48; SSE41-NEXT:    pxor %xmm2, %xmm2
49; SSE41-NEXT:    psubd %xmm1, %xmm2
50; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
51; SSE41-NEXT:    pslld $23, %xmm2
52; SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
53; SSE41-NEXT:    cvttps2dq %xmm2, %xmm1
54; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
55; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
56; SSE41-NEXT:    pmuludq %xmm2, %xmm3
57; SSE41-NEXT:    pmuludq %xmm1, %xmm0
58; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
59; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
60; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
61; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
62; SSE41-NEXT:    por %xmm1, %xmm0
63; SSE41-NEXT:    retq
64;
65; AVX1-LABEL: var_funnnel_v2i32:
66; AVX1:       # %bb.0:
67; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
68; AVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
69; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
70; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
71; AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
72; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
73; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
74; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
75; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
76; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
77; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
78; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
79; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
80; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
81; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
82; AVX1-NEXT:    retq
83;
84; AVX2-LABEL: var_funnnel_v2i32:
85; AVX2:       # %bb.0:
86; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
87; AVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
88; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
89; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
90; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2
91; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
92; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
93; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
94; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
95; AVX2-NEXT:    retq
96;
97; AVX512F-LABEL: var_funnnel_v2i32:
98; AVX512F:       # %bb.0:
99; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
100; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
101; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
102; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
103; AVX512F-NEXT:    vzeroupper
104; AVX512F-NEXT:    retq
105;
106; AVX512VL-LABEL: var_funnnel_v2i32:
107; AVX512VL:       # %bb.0:
108; AVX512VL-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
109; AVX512VL-NEXT:    retq
110;
111; AVX512BW-LABEL: var_funnnel_v2i32:
112; AVX512BW:       # %bb.0:
113; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
114; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
115; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
116; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
117; AVX512BW-NEXT:    vzeroupper
118; AVX512BW-NEXT:    retq
119;
120; AVX512VLBW-LABEL: var_funnnel_v2i32:
121; AVX512VLBW:       # %bb.0:
122; AVX512VLBW-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
123; AVX512VLBW-NEXT:    retq
124;
125; AVX512VBMI2-LABEL: var_funnnel_v2i32:
126; AVX512VBMI2:       # %bb.0:
127; AVX512VBMI2-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
128; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
129; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
130; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
131; AVX512VBMI2-NEXT:    vzeroupper
132; AVX512VBMI2-NEXT:    retq
133;
134; AVX512VLVBMI2-LABEL: var_funnnel_v2i32:
135; AVX512VLVBMI2:       # %bb.0:
136; AVX512VLVBMI2-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
137; AVX512VLVBMI2-NEXT:    retq
138;
139; XOP-LABEL: var_funnnel_v2i32:
140; XOP:       # %bb.0:
141; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
142; XOP-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
143; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
144; XOP-NEXT:    retq
145;
146; X86-SSE2-LABEL: var_funnnel_v2i32:
147; X86-SSE2:       # %bb.0:
148; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
149; X86-SSE2-NEXT:    psubd %xmm1, %xmm2
150; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
151; X86-SSE2-NEXT:    pslld $23, %xmm2
152; X86-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
153; X86-SSE2-NEXT:    cvttps2dq %xmm2, %xmm1
154; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
155; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
156; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
157; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
158; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
159; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
160; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
161; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
162; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
163; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
164; X86-SSE2-NEXT:    por %xmm3, %xmm0
165; X86-SSE2-NEXT:    retl
166  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %amt)
167  ret <2 x i32> %res
168}
169
170;
171; Uniform Variable Shifts
172;
173
174define <2 x i32> @splatvar_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind {
175; SSE2-LABEL: splatvar_funnnel_v2i32:
176; SSE2:       # %bb.0:
177; SSE2-NEXT:    pxor %xmm2, %xmm2
178; SSE2-NEXT:    psubd %xmm1, %xmm2
179; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
180; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
181; SSE2-NEXT:    pslld $23, %xmm1
182; SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
183; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
184; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
185; SSE2-NEXT:    pmuludq %xmm1, %xmm0
186; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
187; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
188; SSE2-NEXT:    pmuludq %xmm2, %xmm1
189; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
190; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
191; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
192; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
193; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
194; SSE2-NEXT:    por %xmm3, %xmm0
195; SSE2-NEXT:    retq
196;
197; SSE41-LABEL: splatvar_funnnel_v2i32:
198; SSE41:       # %bb.0:
199; SSE41-NEXT:    pxor %xmm2, %xmm2
200; SSE41-NEXT:    psubd %xmm1, %xmm2
201; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
202; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
203; SSE41-NEXT:    pslld $23, %xmm1
204; SSE41-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
205; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
206; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
207; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
208; SSE41-NEXT:    pmuludq %xmm2, %xmm3
209; SSE41-NEXT:    pmuludq %xmm1, %xmm0
210; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
211; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
212; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
213; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
214; SSE41-NEXT:    por %xmm1, %xmm0
215; SSE41-NEXT:    retq
216;
217; AVX1-LABEL: splatvar_funnnel_v2i32:
218; AVX1:       # %bb.0:
219; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
220; AVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
221; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
222; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
223; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
224; AVX1-NEXT:    vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
225; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
226; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
227; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
228; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
229; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
230; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
231; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
232; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
233; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
234; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
235; AVX1-NEXT:    retq
236;
237; AVX2-LABEL: splatvar_funnnel_v2i32:
238; AVX2:       # %bb.0:
239; AVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
240; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
241; AVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
242; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
243; AVX2-NEXT:    vpand %xmm2, %xmm1, %xmm1
244; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero
245; AVX2-NEXT:    vpslld %xmm2, %xmm0, %xmm2
246; AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32]
247; AVX2-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
248; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
249; AVX2-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
250; AVX2-NEXT:    vpor %xmm0, %xmm2, %xmm0
251; AVX2-NEXT:    retq
252;
253; AVX512F-LABEL: splatvar_funnnel_v2i32:
254; AVX512F:       # %bb.0:
255; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
256; AVX512F-NEXT:    vpbroadcastd %xmm1, %xmm1
257; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
258; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
259; AVX512F-NEXT:    vzeroupper
260; AVX512F-NEXT:    retq
261;
262; AVX512VL-LABEL: splatvar_funnnel_v2i32:
263; AVX512VL:       # %bb.0:
264; AVX512VL-NEXT:    vpbroadcastd %xmm1, %xmm1
265; AVX512VL-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
266; AVX512VL-NEXT:    retq
267;
268; AVX512BW-LABEL: splatvar_funnnel_v2i32:
269; AVX512BW:       # %bb.0:
270; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
271; AVX512BW-NEXT:    vpbroadcastd %xmm1, %xmm1
272; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
273; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
274; AVX512BW-NEXT:    vzeroupper
275; AVX512BW-NEXT:    retq
276;
277; AVX512VLBW-LABEL: splatvar_funnnel_v2i32:
278; AVX512VLBW:       # %bb.0:
279; AVX512VLBW-NEXT:    vpbroadcastd %xmm1, %xmm1
280; AVX512VLBW-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
281; AVX512VLBW-NEXT:    retq
282;
283; AVX512VBMI2-LABEL: splatvar_funnnel_v2i32:
284; AVX512VBMI2:       # %bb.0:
285; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
286; AVX512VBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
287; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
288; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
289; AVX512VBMI2-NEXT:    vzeroupper
290; AVX512VBMI2-NEXT:    retq
291;
292; AVX512VLVBMI2-LABEL: splatvar_funnnel_v2i32:
293; AVX512VLVBMI2:       # %bb.0:
294; AVX512VLVBMI2-NEXT:    vpbroadcastd %xmm1, %xmm1
295; AVX512VLVBMI2-NEXT:    vprorvd %xmm1, %xmm0, %xmm0
296; AVX512VLVBMI2-NEXT:    retq
297;
298; XOPAVX1-LABEL: splatvar_funnnel_v2i32:
299; XOPAVX1:       # %bb.0:
300; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
301; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
302; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
303; XOPAVX1-NEXT:    vprotd %xmm1, %xmm0, %xmm0
304; XOPAVX1-NEXT:    retq
305;
306; XOPAVX2-LABEL: splatvar_funnnel_v2i32:
307; XOPAVX2:       # %bb.0:
308; XOPAVX2-NEXT:    vpbroadcastd %xmm1, %xmm1
309; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
310; XOPAVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
311; XOPAVX2-NEXT:    vprotd %xmm1, %xmm0, %xmm0
312; XOPAVX2-NEXT:    retq
313;
314; X86-SSE2-LABEL: splatvar_funnnel_v2i32:
315; X86-SSE2:       # %bb.0:
316; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
317; X86-SSE2-NEXT:    psubd %xmm1, %xmm2
318; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
319; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,0,0,0]
320; X86-SSE2-NEXT:    pslld $23, %xmm1
321; X86-SSE2-NEXT:    paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
322; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
323; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
324; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
325; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
326; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
327; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
328; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
329; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
330; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
331; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
332; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
333; X86-SSE2-NEXT:    por %xmm3, %xmm0
334; X86-SSE2-NEXT:    retl
335  %splat = shufflevector <2 x i32> %amt, <2 x i32> undef, <2 x i32> zeroinitializer
336  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> %splat)
337  ret <2 x i32> %res
338}
339
340;
341; Constant Shifts
342;
343
344define <2 x i32> @constant_funnnel_v2i32(<2 x i32> %x) nounwind {
345; SSE2-LABEL: constant_funnnel_v2i32:
346; SSE2:       # %bb.0:
347; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
348; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
349; SSE2-NEXT:    pmuludq %xmm1, %xmm0
350; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
351; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
352; SSE2-NEXT:    pmuludq %xmm2, %xmm1
353; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
354; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
355; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
356; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
357; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
358; SSE2-NEXT:    por %xmm3, %xmm0
359; SSE2-NEXT:    retq
360;
361; SSE41-LABEL: constant_funnnel_v2i32:
362; SSE41:       # %bb.0:
363; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
364; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
365; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
366; SSE41-NEXT:    pmuludq %xmm2, %xmm3
367; SSE41-NEXT:    pmuludq %xmm1, %xmm0
368; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
369; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
370; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
371; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
372; SSE41-NEXT:    por %xmm1, %xmm0
373; SSE41-NEXT:    retq
374;
375; AVX1-LABEL: constant_funnnel_v2i32:
376; AVX1:       # %bb.0:
377; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
378; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
379; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
380; AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
381; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
382; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
383; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
384; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,0,2,2]
385; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
386; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
387; AVX1-NEXT:    retq
388;
389; AVX2-LABEL: constant_funnnel_v2i32:
390; AVX2:       # %bb.0:
391; AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
392; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
393; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
394; AVX2-NEXT:    retq
395;
396; AVX512F-LABEL: constant_funnnel_v2i32:
397; AVX512F:       # %bb.0:
398; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
399; AVX512F-NEXT:    vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
400; AVX512F-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
401; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
402; AVX512F-NEXT:    vzeroupper
403; AVX512F-NEXT:    retq
404;
405; AVX512VL-LABEL: constant_funnnel_v2i32:
406; AVX512VL:       # %bb.0:
407; AVX512VL-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
408; AVX512VL-NEXT:    retq
409;
410; AVX512BW-LABEL: constant_funnnel_v2i32:
411; AVX512BW:       # %bb.0:
412; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
413; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
414; AVX512BW-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
415; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
416; AVX512BW-NEXT:    vzeroupper
417; AVX512BW-NEXT:    retq
418;
419; AVX512VLBW-LABEL: constant_funnnel_v2i32:
420; AVX512VLBW:       # %bb.0:
421; AVX512VLBW-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
422; AVX512VLBW-NEXT:    retq
423;
424; AVX512VBMI2-LABEL: constant_funnnel_v2i32:
425; AVX512VBMI2:       # %bb.0:
426; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
427; AVX512VBMI2-NEXT:    vmovdqa {{.*#+}} xmm1 = <4,5,u,u>
428; AVX512VBMI2-NEXT:    vprorvd %zmm1, %zmm0, %zmm0
429; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
430; AVX512VBMI2-NEXT:    vzeroupper
431; AVX512VBMI2-NEXT:    retq
432;
433; AVX512VLVBMI2-LABEL: constant_funnnel_v2i32:
434; AVX512VLVBMI2:       # %bb.0:
435; AVX512VLVBMI2-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
436; AVX512VLVBMI2-NEXT:    retq
437;
438; XOP-LABEL: constant_funnnel_v2i32:
439; XOP:       # %bb.0:
440; XOP-NEXT:    vprotd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
441; XOP-NEXT:    retq
442;
443; X86-SSE2-LABEL: constant_funnnel_v2i32:
444; X86-SSE2:       # %bb.0:
445; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [268435456,134217728,1,1]
446; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
447; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm0
448; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
449; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
450; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
451; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
452; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
453; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
454; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
455; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
456; X86-SSE2-NEXT:    por %xmm3, %xmm0
457; X86-SSE2-NEXT:    retl
458  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 5>)
459  ret <2 x i32> %res
460}
461
462;
463; Uniform Constant Shifts
464;
465
466define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
467; SSE2-LABEL: splatconstant_funnnel_v2i32:
468; SSE2:       # %bb.0:
469; SSE2-NEXT:    movdqa %xmm0, %xmm2
470; SSE2-NEXT:    psrld $4, %xmm2
471; SSE2-NEXT:    movdqa %xmm0, %xmm1
472; SSE2-NEXT:    pslld $28, %xmm1
473; SSE2-NEXT:    por %xmm2, %xmm1
474; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
475; SSE2-NEXT:    movaps %xmm1, %xmm0
476; SSE2-NEXT:    retq
477;
478; SSE41-LABEL: splatconstant_funnnel_v2i32:
479; SSE41:       # %bb.0:
480; SSE41-NEXT:    movdqa %xmm0, %xmm2
481; SSE41-NEXT:    psrld $4, %xmm2
482; SSE41-NEXT:    movdqa %xmm0, %xmm1
483; SSE41-NEXT:    pslld $28, %xmm1
484; SSE41-NEXT:    por %xmm2, %xmm1
485; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5,6,7]
486; SSE41-NEXT:    movdqa %xmm1, %xmm0
487; SSE41-NEXT:    retq
488;
489; AVX1-LABEL: splatconstant_funnnel_v2i32:
490; AVX1:       # %bb.0:
491; AVX1-NEXT:    vpsrld $4, %xmm0, %xmm1
492; AVX1-NEXT:    vpslld $28, %xmm0, %xmm2
493; AVX1-NEXT:    vpor %xmm1, %xmm2, %xmm1
494; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
495; AVX1-NEXT:    retq
496;
497; AVX2-LABEL: splatconstant_funnnel_v2i32:
498; AVX2:       # %bb.0:
499; AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
500; AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
501; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
502; AVX2-NEXT:    retq
503;
504; AVX512F-LABEL: splatconstant_funnnel_v2i32:
505; AVX512F:       # %bb.0:
506; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
507; AVX512F-NEXT:    vprord $4, %zmm0, %zmm0
508; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
509; AVX512F-NEXT:    vzeroupper
510; AVX512F-NEXT:    retq
511;
512; AVX512VL-LABEL: splatconstant_funnnel_v2i32:
513; AVX512VL:       # %bb.0:
514; AVX512VL-NEXT:    vprord $4, %xmm0, %xmm0
515; AVX512VL-NEXT:    retq
516;
517; AVX512BW-LABEL: splatconstant_funnnel_v2i32:
518; AVX512BW:       # %bb.0:
519; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
520; AVX512BW-NEXT:    vprord $4, %zmm0, %zmm0
521; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
522; AVX512BW-NEXT:    vzeroupper
523; AVX512BW-NEXT:    retq
524;
525; AVX512VLBW-LABEL: splatconstant_funnnel_v2i32:
526; AVX512VLBW:       # %bb.0:
527; AVX512VLBW-NEXT:    vprord $4, %xmm0, %xmm0
528; AVX512VLBW-NEXT:    retq
529;
530; AVX512VBMI2-LABEL: splatconstant_funnnel_v2i32:
531; AVX512VBMI2:       # %bb.0:
532; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
533; AVX512VBMI2-NEXT:    vprord $4, %zmm0, %zmm0
534; AVX512VBMI2-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
535; AVX512VBMI2-NEXT:    vzeroupper
536; AVX512VBMI2-NEXT:    retq
537;
538; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v2i32:
539; AVX512VLVBMI2:       # %bb.0:
540; AVX512VLVBMI2-NEXT:    vprord $4, %xmm0, %xmm0
541; AVX512VLVBMI2-NEXT:    retq
542;
543; XOP-LABEL: splatconstant_funnnel_v2i32:
544; XOP:       # %bb.0:
545; XOP-NEXT:    vprotd $28, %xmm0, %xmm0
546; XOP-NEXT:    retq
547;
548; X86-SSE2-LABEL: splatconstant_funnnel_v2i32:
549; X86-SSE2:       # %bb.0:
550; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
551; X86-SSE2-NEXT:    psrld $4, %xmm2
552; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
553; X86-SSE2-NEXT:    pslld $28, %xmm1
554; X86-SSE2-NEXT:    por %xmm2, %xmm1
555; X86-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
556; X86-SSE2-NEXT:    movaps %xmm1, %xmm0
557; X86-SSE2-NEXT:    retl
558  %res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
559  ret <2 x i32> %res
560}
561