1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=hexagon | FileCheck %s
3; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length64b | FileCheck %s --check-prefix=CHECK-64B
4; RUN: llc < %s -march=hexagon -mattr=+hvx,hvx-length128b | FileCheck %s --check-prefix=CHECK-128B
5define <2 x i32> @test1(<2 x i32> %m) {
6; CHECK-LABEL: test1:
7; CHECK:         .cfi_startproc
8; CHECK-NEXT:  // %bb.0: // %entry
9; CHECK-NEXT:    {
10; CHECK-NEXT:     r1 = extract(r1,#8,#0)
11; CHECK-NEXT:     r0 = sxtb(r0)
12; CHECK-NEXT:     jumpr r31
13; CHECK-NEXT:    }
14;
15; CHECK-64B-LABEL: test1:
16; CHECK-64B:         .cfi_startproc
17; CHECK-64B-NEXT:  // %bb.0: // %entry
18; CHECK-64B-NEXT:    {
19; CHECK-64B-NEXT:     r1 = extract(r1,#8,#0)
20; CHECK-64B-NEXT:     r0 = sxtb(r0)
21; CHECK-64B-NEXT:     jumpr r31
22; CHECK-64B-NEXT:    }
23;
24; CHECK-128B-LABEL: test1:
25; CHECK-128B:         .cfi_startproc
26; CHECK-128B-NEXT:  // %bb.0: // %entry
27; CHECK-128B-NEXT:    {
28; CHECK-128B-NEXT:     r1 = extract(r1,#8,#0)
29; CHECK-128B-NEXT:     r0 = sxtb(r0)
30; CHECK-128B-NEXT:     jumpr r31
31; CHECK-128B-NEXT:    }
32entry:
33  %shl = shl <2 x i32> %m, <i32 24, i32 24>
34  %shr = ashr exact <2 x i32> %shl, <i32 24, i32 24>
35  ret <2 x i32> %shr
36}
37
38define <16 x i32> @test2(<16 x i32> %m) {
39; CHECK-LABEL: test2:
40; CHECK:         .cfi_startproc
41; CHECK-NEXT:  // %bb.0: // %entry
42; CHECK-NEXT:    {
43; CHECK-NEXT:     r3 = extract(r3,#8,#0)
44; CHECK-NEXT:     r29 = add(r29,#-8)
45; CHECK-NEXT:     r2 = sxtb(r2)
46; CHECK-NEXT:     r4 = sxtb(r4)
47; CHECK-NEXT:    }
48; CHECK-NEXT:    {
49; CHECK-NEXT:     r5 = extract(r5,#8,#0)
50; CHECK-NEXT:     r13:12 = memd(r29+#48)
51; CHECK-NEXT:     memd(r29+#0) = r17:16
52; CHECK-NEXT:    }
53; CHECK-NEXT:    {
54; CHECK-NEXT:     r13 = extract(r13,#8,#0)
55; CHECK-NEXT:     r12 = sxtb(r12)
56; CHECK-NEXT:     r15:14 = memd(r29+#40)
57; CHECK-NEXT:     r9:8 = memd(r29+#32)
58; CHECK-NEXT:    }
59; CHECK-NEXT:    {
60; CHECK-NEXT:     r9 = extract(r9,#8,#0)
61; CHECK-NEXT:     r8 = sxtb(r8)
62; CHECK-NEXT:     r11:10 = memd(r29+#24)
63; CHECK-NEXT:     r7:6 = memd(r29+#16)
64; CHECK-NEXT:    }
65; CHECK-NEXT:    {
66; CHECK-NEXT:     r11 = extract(r11,#8,#0)
67; CHECK-NEXT:     r10 = sxtb(r10)
68; CHECK-NEXT:     r14 = sxtb(r14)
69; CHECK-NEXT:     r17:16 = memd(r29+#8)
70; CHECK-NEXT:    }
71; CHECK-NEXT:    {
72; CHECK-NEXT:     r15 = extract(r15,#8,#0)
73; CHECK-NEXT:     r17 = extract(r17,#8,#0)
74; CHECK-NEXT:     r16 = sxtb(r16)
75; CHECK-NEXT:     r6 = sxtb(r6)
76; CHECK-NEXT:    }
77; CHECK-NEXT:    {
78; CHECK-NEXT:     r7 = extract(r7,#8,#0)
79; CHECK-NEXT:     memd(r0+#56) = r13:12
80; CHECK-NEXT:     memd(r0+#48) = r15:14
81; CHECK-NEXT:    }
82; CHECK-NEXT:    {
83; CHECK-NEXT:     memd(r0+#40) = r9:8
84; CHECK-NEXT:     memd(r0+#32) = r11:10
85; CHECK-NEXT:    }
86; CHECK-NEXT:    {
87; CHECK-NEXT:     memd(r0+#24) = r7:6
88; CHECK-NEXT:     memd(r0+#16) = r17:16
89; CHECK-NEXT:    }
90; CHECK-NEXT:    {
91; CHECK-NEXT:     memd(r0+#8) = r5:4
92; CHECK-NEXT:     memd(r0+#0) = r3:2
93; CHECK-NEXT:    }
94; CHECK-NEXT:    {
95; CHECK-NEXT:     r29 = add(r29,#8)
96; CHECK-NEXT:     r17:16 = memd(r29+#0)
97; CHECK-NEXT:     jumpr r31
98; CHECK-NEXT:    } // 8-byte Folded Reload
99;
100; CHECK-64B-LABEL: test2:
101; CHECK-64B:         .cfi_startproc
102; CHECK-64B-NEXT:  // %bb.0: // %entry
103; CHECK-64B-NEXT:    {
104; CHECK-64B-NEXT:     r0 = #24
105; CHECK-64B-NEXT:    }
106; CHECK-64B-NEXT:    {
107; CHECK-64B-NEXT:     v0.w = vasl(v0.w,r0)
108; CHECK-64B-NEXT:    }
109; CHECK-64B-NEXT:    {
110; CHECK-64B-NEXT:     v0.w = vasr(v0.w,r0)
111; CHECK-64B-NEXT:     jumpr r31
112; CHECK-64B-NEXT:    }
113;
114; CHECK-128B-LABEL: test2:
115; CHECK-128B:         .cfi_startproc
116; CHECK-128B-NEXT:  // %bb.0: // %entry
117; CHECK-128B-NEXT:    {
118; CHECK-128B-NEXT:     r0 = #24
119; CHECK-128B-NEXT:    }
120; CHECK-128B-NEXT:    {
121; CHECK-128B-NEXT:     v0.w = vasl(v0.w,r0)
122; CHECK-128B-NEXT:    }
123; CHECK-128B-NEXT:    {
124; CHECK-128B-NEXT:     v0.w = vasr(v0.w,r0)
125; CHECK-128B-NEXT:     jumpr r31
126; CHECK-128B-NEXT:    }
127entry:
128  %shl = shl <16 x i32> %m, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
129  %shr = ashr exact <16 x i32> %shl, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
130  ret <16 x i32> %shr
131}
132
133define <64 x i16> @test3(<64 x i16> %m) {
134; CHECK-LABEL: test3:
135; CHECK:         .cfi_startproc
136; CHECK-NEXT:  // %bb.0: // %entry
137; CHECK-NEXT:    {
138; CHECK-NEXT:     r3:2 = vaslh(r3:2,#8)
139; CHECK-NEXT:     r5:4 = vaslh(r5:4,#8)
140; CHECK-NEXT:     r9:8 = memd(r29+#96)
141; CHECK-NEXT:     r11:10 = memd(r29+#88)
142; CHECK-NEXT:    }
143; CHECK-NEXT:    {
144; CHECK-NEXT:     r13:12 = vaslh(r9:8,#8)
145; CHECK-NEXT:     r11:10 = vaslh(r11:10,#8)
146; CHECK-NEXT:     r9:8 = memd(r29+#80)
147; CHECK-NEXT:     r7:6 = memd(r29+#104)
148; CHECK-NEXT:    }
149; CHECK-NEXT:    {
150; CHECK-NEXT:     r15:14 = vaslh(r7:6,#8)
151; CHECK-NEXT:     r9:8 = vaslh(r9:8,#8)
152; CHECK-NEXT:     r7:6 = memd(r29+#72)
153; CHECK-NEXT:    }
154; CHECK-NEXT:    {
155; CHECK-NEXT:     r15:14 = vasrh(r15:14,#8)
156; CHECK-NEXT:     r13:12 = vasrh(r13:12,#8)
157; CHECK-NEXT:    }
158; CHECK-NEXT:    {
159; CHECK-NEXT:     r11:10 = vasrh(r11:10,#8)
160; CHECK-NEXT:     r9:8 = vasrh(r9:8,#8)
161; CHECK-NEXT:     r15:14 = memd(r29+#64)
162; CHECK-NEXT:     memd(r0+#120) = r15:14
163; CHECK-NEXT:    }
164; CHECK-NEXT:    {
165; CHECK-NEXT:     r7:6 = vaslh(r7:6,#8)
166; CHECK-NEXT:     r15:14 = vaslh(r15:14,#8)
167; CHECK-NEXT:     r13:12 = memd(r29+#56)
168; CHECK-NEXT:     memd(r0+#112) = r13:12
169; CHECK-NEXT:    }
170; CHECK-NEXT:    {
171; CHECK-NEXT:     r13:12 = vaslh(r13:12,#8)
172; CHECK-NEXT:     r7:6 = vasrh(r7:6,#8)
173; CHECK-NEXT:     r11:10 = memd(r29+#48)
174; CHECK-NEXT:     memd(r0+#104) = r11:10
175; CHECK-NEXT:    }
176; CHECK-NEXT:    {
177; CHECK-NEXT:     r11:10 = vaslh(r11:10,#8)
178; CHECK-NEXT:     r15:14 = vasrh(r15:14,#8)
179; CHECK-NEXT:     r9:8 = memd(r29+#40)
180; CHECK-NEXT:     memd(r0+#96) = r9:8
181; CHECK-NEXT:    }
182; CHECK-NEXT:    {
183; CHECK-NEXT:     r9:8 = vaslh(r9:8,#8)
184; CHECK-NEXT:     r13:12 = vasrh(r13:12,#8)
185; CHECK-NEXT:     r7:6 = memd(r29+#32)
186; CHECK-NEXT:     memd(r0+#88) = r7:6
187; CHECK-NEXT:    }
188; CHECK-NEXT:    {
189; CHECK-NEXT:     r11:10 = vasrh(r11:10,#8)
190; CHECK-NEXT:     r9:8 = vasrh(r9:8,#8)
191; CHECK-NEXT:     r15:14 = memd(r29+#0)
192; CHECK-NEXT:     memd(r0+#80) = r15:14
193; CHECK-NEXT:    }
194; CHECK-NEXT:    {
195; CHECK-NEXT:     r7:6 = vaslh(r7:6,#8)
196; CHECK-NEXT:     r15:14 = vaslh(r15:14,#8)
197; CHECK-NEXT:     r13:12 = memd(r29+#16)
198; CHECK-NEXT:     memd(r0+#72) = r13:12
199; CHECK-NEXT:    }
200; CHECK-NEXT:    {
201; CHECK-NEXT:     r13:12 = vaslh(r13:12,#8)
202; CHECK-NEXT:     r7:6 = vasrh(r7:6,#8)
203; CHECK-NEXT:     r11:10 = memd(r29+#24)
204; CHECK-NEXT:     memd(r0+#64) = r11:10
205; CHECK-NEXT:    }
206; CHECK-NEXT:    {
207; CHECK-NEXT:     r11:10 = vaslh(r11:10,#8)
208; CHECK-NEXT:     r3:2 = vasrh(r3:2,#8)
209; CHECK-NEXT:     r9:8 = memd(r29+#8)
210; CHECK-NEXT:     memd(r0+#56) = r9:8
211; CHECK-NEXT:    }
212; CHECK-NEXT:    {
213; CHECK-NEXT:     r9:8 = vaslh(r9:8,#8)
214; CHECK-NEXT:     r13:12 = vasrh(r13:12,#8)
215; CHECK-NEXT:     memd(r0+#48) = r7:6
216; CHECK-NEXT:     memd(r0+#0) = r3:2
217; CHECK-NEXT:    }
218; CHECK-NEXT:    {
219; CHECK-NEXT:     r11:10 = vasrh(r11:10,#8)
220; CHECK-NEXT:     r7:6 = vasrh(r15:14,#8)
221; CHECK-NEXT:     memd(r0+#32) = r13:12
222; CHECK-NEXT:    }
223; CHECK-NEXT:    {
224; CHECK-NEXT:     r9:8 = vasrh(r9:8,#8)
225; CHECK-NEXT:     r5:4 = vasrh(r5:4,#8)
226; CHECK-NEXT:     memd(r0+#40) = r11:10
227; CHECK-NEXT:     memd(r0+#16) = r7:6
228; CHECK-NEXT:    }
229; CHECK-NEXT:    {
230; CHECK-NEXT:     jumpr r31
231; CHECK-NEXT:     memd(r0+#24) = r9:8
232; CHECK-NEXT:     memd(r0+#8) = r5:4
233; CHECK-NEXT:    }
234;
235; CHECK-64B-LABEL: test3:
236; CHECK-64B:         .cfi_startproc
237; CHECK-64B-NEXT:  // %bb.0: // %entry
238; CHECK-64B-NEXT:    {
239; CHECK-64B-NEXT:     r0 = #8
240; CHECK-64B-NEXT:    }
241; CHECK-64B-NEXT:    {
242; CHECK-64B-NEXT:     v0.h = vasl(v0.h,r0)
243; CHECK-64B-NEXT:    }
244; CHECK-64B-NEXT:    {
245; CHECK-64B-NEXT:     v1.h = vasl(v1.h,r0)
246; CHECK-64B-NEXT:    }
247; CHECK-64B-NEXT:    {
248; CHECK-64B-NEXT:     v0.h = vasr(v0.h,r0)
249; CHECK-64B-NEXT:    }
250; CHECK-64B-NEXT:    {
251; CHECK-64B-NEXT:     v1.h = vasr(v1.h,r0)
252; CHECK-64B-NEXT:     jumpr r31
253; CHECK-64B-NEXT:    }
254;
255; CHECK-128B-LABEL: test3:
256; CHECK-128B:         .cfi_startproc
257; CHECK-128B-NEXT:  // %bb.0: // %entry
258; CHECK-128B-NEXT:    {
259; CHECK-128B-NEXT:     r0 = #8
260; CHECK-128B-NEXT:    }
261; CHECK-128B-NEXT:    {
262; CHECK-128B-NEXT:     v0.h = vasl(v0.h,r0)
263; CHECK-128B-NEXT:    }
264; CHECK-128B-NEXT:    {
265; CHECK-128B-NEXT:     v0.h = vasr(v0.h,r0)
266; CHECK-128B-NEXT:     jumpr r31
267; CHECK-128B-NEXT:    }
268entry:
269  %shl = shl <64 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
270  %shr = ashr exact <64 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
271  ret <64 x i16> %shr
272}
273