1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
4
5
6; Verify that the following shifts are lowered into a sequence of two shifts plus
7; a blend. On pre-avx2 targets, instead of scalarizing logical and arithmetic
8; packed shift right by a constant build_vector the backend should always try to
9; emit a simpler sequence of two shifts + blend when possible.
10
11define <8 x i16> @test1(<8 x i16> %a) {
12  %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
13  ret <8 x i16> %lshr
14}
15; CHECK-LABEL: test1
16; SSE: psrlw
17; SSE-NEXT: psrlw
18; SSE-NEXT: movss
19; AVX: vpsrlw
20; AVX-NEXT: vpsrlw
21; AVX-NEXT: vmovss
22; AVX2: vpsrlw
23; AVX2-NEXT: vpsrlw
24; AVX2-NEXT: vmovss
25; CHECK: ret
26
27
28define <8 x i16> @test2(<8 x i16> %a) {
29  %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2>
30  ret <8 x i16> %lshr
31}
32; CHECK-LABEL: test2
33; SSE: psrlw
34; SSE-NEXT: psrlw
35; SSE-NEXT: movsd
36; AVX: vpsrlw
37; AVX-NEXT: vpsrlw
38; AVX-NEXT: vmovsd
39; AVX2: vpsrlw
40; AVX2-NEXT: vpsrlw
41; AVX2-NEXT: vmovsd
42; CHECK: ret
43
44
45define <4 x i32> @test3(<4 x i32> %a) {
46  %lshr = lshr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2>
47  ret <4 x i32> %lshr
48}
49; CHECK-LABEL: test3
50; SSE: psrld
51; SSE-NEXT: psrld
52; SSE-NEXT: movss
53; AVX: vpsrld
54; AVX-NEXT: vpsrld
55; AVX-NEXT: vmovss
56; AVX2: vpsrlvd
57; CHECK: ret
58
59
60define <4 x i32> @test4(<4 x i32> %a) {
61  %lshr = lshr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2>
62  ret <4 x i32> %lshr
63}
64; CHECK-LABEL: test4
65; SSE: psrld
66; SSE-NEXT: psrld
67; SSE-NEXT: movsd
68; AVX: vpsrld
69; AVX-NEXT: vpsrld
70; AVX-NEXT: vmovsd
71; AVX2: vpsrlvd
72; CHECK: ret
73
74
75define <8 x i16> @test5(<8 x i16> %a) {
76  %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
77  ret <8 x i16> %lshr
78}
79
80define <8 x i16> @test6(<8 x i16> %a) {
81  %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2>
82  ret <8 x i16> %lshr
83}
84; CHECK-LABEL: test6
85; SSE: psraw
86; SSE-NEXT: psraw
87; SSE-NEXT: movsd
88; AVX: vpsraw
89; AVX-NEXT: vpsraw
90; AVX-NEXT: vmovsd
91; AVX2: vpsraw
92; AVX2-NEXT: vpsraw
93; AVX2-NEXT: vmovsd
94; CHECK: ret
95
96
97define <4 x i32> @test7(<4 x i32> %a) {
98  %lshr = ashr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2>
99  ret <4 x i32> %lshr
100}
101; CHECK-LABEL: test7
102; SSE: psrad
103; SSE-NEXT: psrad
104; SSE-NEXT: movss
105; AVX: vpsrad
106; AVX-NEXT: vpsrad
107; AVX-NEXT: vmovss
108; AVX2: vpsravd
109; CHECK: ret
110
111
112define <4 x i32> @test8(<4 x i32> %a) {
113  %lshr = ashr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2>
114  ret <4 x i32> %lshr
115}
116; CHECK-LABEL: test8
117; SSE: psrad
118; SSE-NEXT: psrad
119; SSE-NEXT: movsd
120; AVX: vpsrad
121; AVX-NEXT: vpsrad
122; AVX-NEXT: vmovsd
123; AVX2: vpsravd
124; CHECK: ret
125
126