1; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 -mcpu=corei7 | FileCheck %s
2
3; SSE2 Logical Shift Left
4
5define <8 x i16> @test_sllw_1(<8 x i16> %InVec) {
6entry:
7  %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
8  ret <8 x i16> %shl
9}
10
11; CHECK-LABEL: test_sllw_1:
12; CHECK-NOT: psllw   $0, %xmm0
13; CHECK: ret
14
15define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
16entry:
17  %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
18  ret <8 x i16> %shl
19}
20
21; CHECK-LABEL: test_sllw_2:
22; CHECK: paddw   %xmm0, %xmm0
23; CHECK-NEXT: ret
24
25define <8 x i16> @test_sllw_3(<8 x i16> %InVec) {
26entry:
27  %shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
28  ret <8 x i16> %shl
29}
30
31; CHECK-LABEL: test_sllw_3:
32; CHECK: psllw $15, %xmm0
33; CHECK-NEXT: ret
34
35define <4 x i32> @test_slld_1(<4 x i32> %InVec) {
36entry:
37  %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
38  ret <4 x i32> %shl
39}
40
41; CHECK-LABEL: test_slld_1:
42; CHECK-NOT: pslld   $0, %xmm0
43; CHECK: ret
44
45define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
46entry:
47  %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
48  ret <4 x i32> %shl
49}
50
51; CHECK-LABEL: test_slld_2:
52; CHECK: paddd   %xmm0, %xmm0
53; CHECK-NEXT: ret
54
55define <4 x i32> @test_slld_3(<4 x i32> %InVec) {
56entry:
57  %shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
58  ret <4 x i32> %shl
59}
60
61; CHECK-LABEL: test_slld_3:
62; CHECK: pslld $31, %xmm0
63; CHECK-NEXT: ret
64
65define <2 x i64> @test_sllq_1(<2 x i64> %InVec) {
66entry:
67  %shl = shl <2 x i64> %InVec, <i64 0, i64 0>
68  ret <2 x i64> %shl
69}
70
71; CHECK-LABEL: test_sllq_1:
72; CHECK-NOT: psllq   $0, %xmm0
73; CHECK: ret
74
75define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
76entry:
77  %shl = shl <2 x i64> %InVec, <i64 1, i64 1>
78  ret <2 x i64> %shl
79}
80
81; CHECK-LABEL: test_sllq_2:
82; CHECK: paddq   %xmm0, %xmm0
83; CHECK-NEXT: ret
84
85define <2 x i64> @test_sllq_3(<2 x i64> %InVec) {
86entry:
87  %shl = shl <2 x i64> %InVec, <i64 63, i64 63>
88  ret <2 x i64> %shl
89}
90
91; CHECK-LABEL: test_sllq_3:
92; CHECK: psllq $63, %xmm0
93; CHECK-NEXT: ret
94
95; SSE2 Arithmetic Shift
96
97define <8 x i16> @test_sraw_1(<8 x i16> %InVec) {
98entry:
99  %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
100  ret <8 x i16> %shl
101}
102
103; CHECK-LABEL: test_sraw_1:
104; CHECK-NOT: psraw   $0, %xmm0
105; CHECK: ret
106
107define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
108entry:
109  %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
110  ret <8 x i16> %shl
111}
112
113; CHECK-LABEL: test_sraw_2:
114; CHECK: psraw   $1, %xmm0
115; CHECK-NEXT: ret
116
117define <8 x i16> @test_sraw_3(<8 x i16> %InVec) {
118entry:
119  %shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
120  ret <8 x i16> %shl
121}
122
123; CHECK-LABEL: test_sraw_3:
124; CHECK: psraw   $15, %xmm0
125; CHECK-NEXT: ret
126
127define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
128entry:
129  %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
130  ret <4 x i32> %shl
131}
132
133; CHECK-LABEL: test_srad_1:
134; CHECK-NOT: psrad   $0, %xmm0
135; CHECK: ret
136
137define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
138entry:
139  %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
140  ret <4 x i32> %shl
141}
142
143; CHECK-LABEL: test_srad_2:
144; CHECK: psrad   $1, %xmm0
145; CHECK-NEXT: ret
146
147define <4 x i32> @test_srad_3(<4 x i32> %InVec) {
148entry:
149  %shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
150  ret <4 x i32> %shl
151}
152
153; CHECK-LABEL: test_srad_3:
154; CHECK: psrad   $31, %xmm0
155; CHECK-NEXT: ret
156
157; SSE Logical Shift Right
158
159define <8 x i16> @test_srlw_1(<8 x i16> %InVec) {
160entry:
161  %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
162  ret <8 x i16> %shl
163}
164
165; CHECK-LABEL: test_srlw_1:
166; CHECK-NOT: psrlw   $0, %xmm0
167; CHECK: ret
168
169define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
170entry:
171  %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
172  ret <8 x i16> %shl
173}
174
175; CHECK-LABEL: test_srlw_2:
176; CHECK: psrlw   $1, %xmm0
177; CHECK-NEXT: ret
178
179define <8 x i16> @test_srlw_3(<8 x i16> %InVec) {
180entry:
181  %shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
182  ret <8 x i16> %shl
183}
184
185; CHECK-LABEL: test_srlw_3:
186; CHECK: psrlw $15, %xmm0
187; CHECK-NEXT: ret
188
189define <4 x i32> @test_srld_1(<4 x i32> %InVec) {
190entry:
191  %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
192  ret <4 x i32> %shl
193}
194
195; CHECK-LABEL: test_srld_1:
196; CHECK-NOT: psrld   $0, %xmm0
197; CHECK: ret
198
199define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
200entry:
201  %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
202  ret <4 x i32> %shl
203}
204
205; CHECK-LABEL: test_srld_2:
206; CHECK: psrld   $1, %xmm0
207; CHECK-NEXT: ret
208
209define <4 x i32> @test_srld_3(<4 x i32> %InVec) {
210entry:
211  %shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
212  ret <4 x i32> %shl
213}
214
215; CHECK-LABEL: test_srld_3:
216; CHECK: psrld $31, %xmm0
217; CHECK-NEXT: ret
218
219define <2 x i64> @test_srlq_1(<2 x i64> %InVec) {
220entry:
221  %shl = lshr <2 x i64> %InVec, <i64 0, i64 0>
222  ret <2 x i64> %shl
223}
224
225; CHECK-LABEL: test_srlq_1:
226; CHECK-NOT: psrlq   $0, %xmm0
227; CHECK: ret
228
229define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
230entry:
231  %shl = lshr <2 x i64> %InVec, <i64 1, i64 1>
232  ret <2 x i64> %shl
233}
234
235; CHECK-LABEL: test_srlq_2:
236; CHECK: psrlq   $1, %xmm0
237; CHECK-NEXT: ret
238
239define <2 x i64> @test_srlq_3(<2 x i64> %InVec) {
240entry:
241  %shl = lshr <2 x i64> %InVec, <i64 63, i64 63>
242  ret <2 x i64> %shl
243}
244
245; CHECK-LABEL: test_srlq_3:
246; CHECK: psrlq $63, %xmm0
247; CHECK-NEXT: ret
248
249
250; CHECK-LABEL: sra_sra_v4i32:
251; CHECK: psrad $6, %xmm0
252; CHECK-NEXT: retq
253define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind {
254  %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
255  %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4>
256  ret <4 x i32> %sra1
257}
258
259; CHECK-LABEL: @srl_srl_v4i32
260; CHECK: psrld $6, %xmm0
261; CHECK-NEXT: ret
262define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind {
263  %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
264  %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
265  ret <4 x i32> %srl1
266}
267
268; CHECK-LABEL: @srl_shl_v4i32
269; CHECK: andps
270; CHECK-NEXT: retq
271define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind {
272  %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
273  %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
274  ret <4 x i32> %srl1
275}
276
277; CHECK-LABEL: @srl_sra_31_v4i32
278; CHECK: psrld $31, %xmm0
279; CHECK-NEXT: ret
280define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
281  %sra = ashr <4 x i32> %x, %y
282  %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31>
283  ret <4 x i32> %srl1
284}
285
286; CHECK-LABEL: @shl_shl_v4i32
287; CHECK: pslld $6, %xmm0
288; CHECK-NEXT: ret
289define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind {
290  %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
291  %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
292  ret <4 x i32> %shl1
293}
294
295; CHECK-LABEL: @shl_sra_v4i32
296; CHECK: andps
297; CHECK-NEXT: ret
298define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind {
299  %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
300  %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
301  ret <4 x i32> %shl1
302}
303
304; CHECK-LABEL: @shl_srl_v4i32
305; CHECK: pslld $3, %xmm0
306; CHECK-NEXT: pand
307; CHECK-NEXT: ret
308define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind {
309  %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
310  %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5>
311  ret <4 x i32> %shl1
312}
313
314; CHECK-LABEL: @shl_zext_srl_v4i32
315; CHECK: andps
316; CHECK-NEXT: ret
317define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind {
318  %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
319  %zext = zext <4 x i16> %srl to <4 x i32>
320  %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2>
321  ret <4 x i32> %shl
322}
323
324; CHECK: @sra_trunc_srl_v4i32
325; CHECK: psrad $19, %xmm0
326; CHECK-NEXT: retq
327define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind {
328  %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
329  %trunc = trunc <4 x i32> %srl to <4 x i16>
330  %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3>
331  ret <4 x i16> %sra
332}
333
334; CHECK-LABEL: @shl_zext_shl_v4i32
335; CHECK: pand
336; CHECK-NEXT: pslld $19, %xmm0
337; CHECK-NEXT: ret
338define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind {
339  %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
340  %ext = zext <4 x i16> %shl0 to <4 x i32>
341  %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17>
342  ret <4 x i32> %shl1
343}
344
345; CHECK-LABEL: @sra_v4i32
346; CHECK: psrad $3, %xmm0
347; CHECK-NEXT: ret
348define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind {
349  %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
350  ret <4 x i32> %sra
351}
352
353; CHECK-LABEL: @srl_v4i32
354; CHECK: psrld $3, %xmm0
355; CHECK-NEXT: ret
356define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind {
357  %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
358  ret <4 x i32> %sra
359}
360
361; CHECK-LABEL: @shl_v4i32
362; CHECK: pslld $3, %xmm0
363; CHECK-NEXT: ret
364define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind {
365  %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
366  ret <4 x i32> %sra
367}
368