1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <16 x i8> @test_vshlq_s8(<16 x i8> %a, <16 x i8> %b) {
5; CHECK-LABEL: test_vshlq_s8:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vshl.s8 q0, q0, q1
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0)
11  ret <16 x i8> %0
12}
13
14define arm_aapcs_vfpcc <8 x i16> @test_vshlq_s16(<8 x i16> %a, <8 x i16> %b) {
15; CHECK-LABEL: test_vshlq_s16:
16; CHECK:       @ %bb.0: @ %entry
17; CHECK-NEXT:    vshl.s16 q0, q0, q1
18; CHECK-NEXT:    bx lr
19entry:
20  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0)
21  ret <8 x i16> %0
22}
23
24define arm_aapcs_vfpcc <4 x i32> @test_vshlq_s32(<4 x i32> %a, <4 x i32> %b) {
25; CHECK-LABEL: test_vshlq_s32:
26; CHECK:       @ %bb.0: @ %entry
27; CHECK-NEXT:    vshl.s32 q0, q0, q1
28; CHECK-NEXT:    bx lr
29entry:
30  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0)
31  ret <4 x i32> %0
32}
33
34define arm_aapcs_vfpcc <16 x i8> @test_vshlq_u8(<16 x i8> %a, <16 x i8> %b) {
35; CHECK-LABEL: test_vshlq_u8:
36; CHECK:       @ %bb.0: @ %entry
37; CHECK-NEXT:    vshl.u8 q0, q0, q1
38; CHECK-NEXT:    bx lr
39entry:
40  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1)
41  ret <16 x i8> %0
42}
43
44define arm_aapcs_vfpcc <8 x i16> @test_vshlq_u16(<8 x i16> %a, <8 x i16> %b) {
45; CHECK-LABEL: test_vshlq_u16:
46; CHECK:       @ %bb.0: @ %entry
47; CHECK-NEXT:    vshl.u16 q0, q0, q1
48; CHECK-NEXT:    bx lr
49entry:
50  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1)
51  ret <8 x i16> %0
52}
53
54define arm_aapcs_vfpcc <4 x i32> @test_vshlq_u32(<4 x i32> %a, <4 x i32> %b) {
55; CHECK-LABEL: test_vshlq_u32:
56; CHECK:       @ %bb.0: @ %entry
57; CHECK-NEXT:    vshl.u32 q0, q0, q1
58; CHECK-NEXT:    bx lr
59entry:
60  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1)
61  ret <4 x i32> %0
62}
63
64define arm_aapcs_vfpcc <16 x i8> @test_vshlq_r_s8(<16 x i8> %a, i32 %b) {
65; CHECK-LABEL: test_vshlq_r_s8:
66; CHECK:       @ %bb.0: @ %entry
67; CHECK-NEXT:    vshl.s8 q0, r0
68; CHECK-NEXT:    bx lr
69entry:
70  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 0)
71  ret <16 x i8> %0
72}
73
74define arm_aapcs_vfpcc <8 x i16> @test_vshlq_r_s16(<8 x i16> %a, i32 %b) {
75; CHECK-LABEL: test_vshlq_r_s16:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vshl.s16 q0, r0
78; CHECK-NEXT:    bx lr
79entry:
80  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 0)
81  ret <8 x i16> %0
82}
83
84define arm_aapcs_vfpcc <4 x i32> @test_vshlq_r_s32(<4 x i32> %a, i32 %b) {
85; CHECK-LABEL: test_vshlq_r_s32:
86; CHECK:       @ %bb.0: @ %entry
87; CHECK-NEXT:    vshl.s32 q0, r0
88; CHECK-NEXT:    bx lr
89entry:
90  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 0)
91  ret <4 x i32> %0
92}
93
94define arm_aapcs_vfpcc <16 x i8> @test_vshlq_r_u8(<16 x i8> %a, i32 %b) {
95; CHECK-LABEL: test_vshlq_r_u8:
96; CHECK:       @ %bb.0: @ %entry
97; CHECK-NEXT:    vshl.u8 q0, r0
98; CHECK-NEXT:    bx lr
99entry:
100  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 1)
101  ret <16 x i8> %0
102}
103
104define arm_aapcs_vfpcc <8 x i16> @test_vshlq_r_u16(<8 x i16> %a, i32 %b) {
105; CHECK-LABEL: test_vshlq_r_u16:
106; CHECK:       @ %bb.0: @ %entry
107; CHECK-NEXT:    vshl.u16 q0, r0
108; CHECK-NEXT:    bx lr
109entry:
110  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 1)
111  ret <8 x i16> %0
112}
113
114define arm_aapcs_vfpcc <4 x i32> @test_vshlq_r_u32(<4 x i32> %a, i32 %b) {
115; CHECK-LABEL: test_vshlq_r_u32:
116; CHECK:       @ %bb.0: @ %entry
117; CHECK-NEXT:    vshl.u32 q0, r0
118; CHECK-NEXT:    bx lr
119entry:
120  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 1)
121  ret <4 x i32> %0
122}
123
124define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_s8(<16 x i8> %a, <16 x i8> %b) {
125; CHECK-LABEL: test_vqshlq_s8:
126; CHECK:       @ %bb.0: @ %entry
127; CHECK-NEXT:    vqshl.s8 q0, q0, q1
128; CHECK-NEXT:    bx lr
129entry:
130  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0)
131  ret <16 x i8> %0
132}
133
134define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_s16(<8 x i16> %a, <8 x i16> %b) {
135; CHECK-LABEL: test_vqshlq_s16:
136; CHECK:       @ %bb.0: @ %entry
137; CHECK-NEXT:    vqshl.s16 q0, q0, q1
138; CHECK-NEXT:    bx lr
139entry:
140  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0)
141  ret <8 x i16> %0
142}
143
144define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_s32(<4 x i32> %a, <4 x i32> %b) {
145; CHECK-LABEL: test_vqshlq_s32:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vqshl.s32 q0, q0, q1
148; CHECK-NEXT:    bx lr
149entry:
150  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0)
151  ret <4 x i32> %0
152}
153
154define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_u8(<16 x i8> %a, <16 x i8> %b) {
155; CHECK-LABEL: test_vqshlq_u8:
156; CHECK:       @ %bb.0: @ %entry
157; CHECK-NEXT:    vqshl.u8 q0, q0, q1
158; CHECK-NEXT:    bx lr
159entry:
160  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1)
161  ret <16 x i8> %0
162}
163
164define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_u16(<8 x i16> %a, <8 x i16> %b) {
165; CHECK-LABEL: test_vqshlq_u16:
166; CHECK:       @ %bb.0: @ %entry
167; CHECK-NEXT:    vqshl.u16 q0, q0, q1
168; CHECK-NEXT:    bx lr
169entry:
170  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1)
171  ret <8 x i16> %0
172}
173
174define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_u32(<4 x i32> %a, <4 x i32> %b) {
175; CHECK-LABEL: test_vqshlq_u32:
176; CHECK:       @ %bb.0: @ %entry
177; CHECK-NEXT:    vqshl.u32 q0, q0, q1
178; CHECK-NEXT:    bx lr
179entry:
180  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1)
181  ret <4 x i32> %0
182}
183
184define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_r_s8(<16 x i8> %a, i32 %b) {
185; CHECK-LABEL: test_vqshlq_r_s8:
186; CHECK:       @ %bb.0: @ %entry
187; CHECK-NEXT:    vqshl.s8 q0, r0
188; CHECK-NEXT:    bx lr
189entry:
190  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 0)
191  ret <16 x i8> %0
192}
193
194define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_r_s16(<8 x i16> %a, i32 %b) {
195; CHECK-LABEL: test_vqshlq_r_s16:
196; CHECK:       @ %bb.0: @ %entry
197; CHECK-NEXT:    vqshl.s16 q0, r0
198; CHECK-NEXT:    bx lr
199entry:
200  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 0)
201  ret <8 x i16> %0
202}
203
204define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_r_s32(<4 x i32> %a, i32 %b) {
205; CHECK-LABEL: test_vqshlq_r_s32:
206; CHECK:       @ %bb.0: @ %entry
207; CHECK-NEXT:    vqshl.s32 q0, r0
208; CHECK-NEXT:    bx lr
209entry:
210  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 0)
211  ret <4 x i32> %0
212}
213
214define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_r_u8(<16 x i8> %a, i32 %b) {
215; CHECK-LABEL: test_vqshlq_r_u8:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vqshl.u8 q0, r0
218; CHECK-NEXT:    bx lr
219entry:
220  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 1)
221  ret <16 x i8> %0
222}
223
224define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_r_u16(<8 x i16> %a, i32 %b) {
225; CHECK-LABEL: test_vqshlq_r_u16:
226; CHECK:       @ %bb.0: @ %entry
227; CHECK-NEXT:    vqshl.u16 q0, r0
228; CHECK-NEXT:    bx lr
229entry:
230  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 1)
231  ret <8 x i16> %0
232}
233
234define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_r_u32(<4 x i32> %a, i32 %b) {
235; CHECK-LABEL: test_vqshlq_r_u32:
236; CHECK:       @ %bb.0: @ %entry
237; CHECK-NEXT:    vqshl.u32 q0, r0
238; CHECK-NEXT:    bx lr
239entry:
240  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 1)
241  ret <4 x i32> %0
242}
243
244define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_s8(<16 x i8> %a, <16 x i8> %b) {
245; CHECK-LABEL: test_vrshlq_s8:
246; CHECK:       @ %bb.0: @ %entry
247; CHECK-NEXT:    vrshl.s8 q0, q0, q1
248; CHECK-NEXT:    bx lr
249entry:
250  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0)
251  ret <16 x i8> %0
252}
253
254define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_s16(<8 x i16> %a, <8 x i16> %b) {
255; CHECK-LABEL: test_vrshlq_s16:
256; CHECK:       @ %bb.0: @ %entry
257; CHECK-NEXT:    vrshl.s16 q0, q0, q1
258; CHECK-NEXT:    bx lr
259entry:
260  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0)
261  ret <8 x i16> %0
262}
263
264define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_s32(<4 x i32> %a, <4 x i32> %b) {
265; CHECK-LABEL: test_vrshlq_s32:
266; CHECK:       @ %bb.0: @ %entry
267; CHECK-NEXT:    vrshl.s32 q0, q0, q1
268; CHECK-NEXT:    bx lr
269entry:
270  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0)
271  ret <4 x i32> %0
272}
273
274define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_u8(<16 x i8> %a, <16 x i8> %b) {
275; CHECK-LABEL: test_vrshlq_u8:
276; CHECK:       @ %bb.0: @ %entry
277; CHECK-NEXT:    vrshl.u8 q0, q0, q1
278; CHECK-NEXT:    bx lr
279entry:
280  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1)
281  ret <16 x i8> %0
282}
283
284define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_u16(<8 x i16> %a, <8 x i16> %b) {
285; CHECK-LABEL: test_vrshlq_u16:
286; CHECK:       @ %bb.0: @ %entry
287; CHECK-NEXT:    vrshl.u16 q0, q0, q1
288; CHECK-NEXT:    bx lr
289entry:
290  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1)
291  ret <8 x i16> %0
292}
293
294define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_u32(<4 x i32> %a, <4 x i32> %b) {
295; CHECK-LABEL: test_vrshlq_u32:
296; CHECK:       @ %bb.0: @ %entry
297; CHECK-NEXT:    vrshl.u32 q0, q0, q1
298; CHECK-NEXT:    bx lr
299entry:
300  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1)
301  ret <4 x i32> %0
302}
303
304define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_n_s8(<16 x i8> %a, i32 %b) {
305; CHECK-LABEL: test_vrshlq_n_s8:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vrshl.s8 q0, r0
308; CHECK-NEXT:    bx lr
309entry:
310  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 0)
311  ret <16 x i8> %0
312}
313
314define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_n_s16(<8 x i16> %a, i32 %b) {
315; CHECK-LABEL: test_vrshlq_n_s16:
316; CHECK:       @ %bb.0: @ %entry
317; CHECK-NEXT:    vrshl.s16 q0, r0
318; CHECK-NEXT:    bx lr
319entry:
320  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 0)
321  ret <8 x i16> %0
322}
323
324define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_n_s32(<4 x i32> %a, i32 %b) {
325; CHECK-LABEL: test_vrshlq_n_s32:
326; CHECK:       @ %bb.0: @ %entry
327; CHECK-NEXT:    vrshl.s32 q0, r0
328; CHECK-NEXT:    bx lr
329entry:
330  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 0)
331  ret <4 x i32> %0
332}
333
334define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_n_u8(<16 x i8> %a, i32 %b) {
335; CHECK-LABEL: test_vrshlq_n_u8:
336; CHECK:       @ %bb.0: @ %entry
337; CHECK-NEXT:    vrshl.u8 q0, r0
338; CHECK-NEXT:    bx lr
339entry:
340  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 1)
341  ret <16 x i8> %0
342}
343
344define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_n_u16(<8 x i16> %a, i32 %b) {
345; CHECK-LABEL: test_vrshlq_n_u16:
346; CHECK:       @ %bb.0: @ %entry
347; CHECK-NEXT:    vrshl.u16 q0, r0
348; CHECK-NEXT:    bx lr
349entry:
350  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 1)
351  ret <8 x i16> %0
352}
353
354define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_n_u32(<4 x i32> %a, i32 %b) {
355; CHECK-LABEL: test_vrshlq_n_u32:
356; CHECK:       @ %bb.0: @ %entry
357; CHECK-NEXT:    vrshl.u32 q0, r0
358; CHECK-NEXT:    bx lr
359entry:
360  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 1)
361  ret <4 x i32> %0
362}
363
364define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_s8(<16 x i8> %a, <16 x i8> %b) {
365; CHECK-LABEL: test_vqrshlq_s8:
366; CHECK:       @ %bb.0: @ %entry
367; CHECK-NEXT:    vqrshl.s8 q0, q0, q1
368; CHECK-NEXT:    bx lr
369entry:
370  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0)
371  ret <16 x i8> %0
372}
373
374define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_s16(<8 x i16> %a, <8 x i16> %b) {
375; CHECK-LABEL: test_vqrshlq_s16:
376; CHECK:       @ %bb.0: @ %entry
377; CHECK-NEXT:    vqrshl.s16 q0, q0, q1
378; CHECK-NEXT:    bx lr
379entry:
380  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0)
381  ret <8 x i16> %0
382}
383
384define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_s32(<4 x i32> %a, <4 x i32> %b) {
385; CHECK-LABEL: test_vqrshlq_s32:
386; CHECK:       @ %bb.0: @ %entry
387; CHECK-NEXT:    vqrshl.s32 q0, q0, q1
388; CHECK-NEXT:    bx lr
389entry:
390  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0)
391  ret <4 x i32> %0
392}
393
394define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_u8(<16 x i8> %a, <16 x i8> %b) {
395; CHECK-LABEL: test_vqrshlq_u8:
396; CHECK:       @ %bb.0: @ %entry
397; CHECK-NEXT:    vqrshl.u8 q0, q0, q1
398; CHECK-NEXT:    bx lr
399entry:
400  %0 = call <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1)
401  ret <16 x i8> %0
402}
403
404define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_u16(<8 x i16> %a, <8 x i16> %b) {
405; CHECK-LABEL: test_vqrshlq_u16:
406; CHECK:       @ %bb.0: @ %entry
407; CHECK-NEXT:    vqrshl.u16 q0, q0, q1
408; CHECK-NEXT:    bx lr
409entry:
410  %0 = call <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1)
411  ret <8 x i16> %0
412}
413
414define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_u32(<4 x i32> %a, <4 x i32> %b) {
415; CHECK-LABEL: test_vqrshlq_u32:
416; CHECK:       @ %bb.0: @ %entry
417; CHECK-NEXT:    vqrshl.u32 q0, q0, q1
418; CHECK-NEXT:    bx lr
419entry:
420  %0 = call <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1)
421  ret <4 x i32> %0
422}
423
424define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_n_s8(<16 x i8> %a, i32 %b) {
425; CHECK-LABEL: test_vqrshlq_n_s8:
426; CHECK:       @ %bb.0: @ %entry
427; CHECK-NEXT:    vqrshl.s8 q0, r0
428; CHECK-NEXT:    bx lr
429entry:
430  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 0)
431  ret <16 x i8> %0
432}
433
434define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_n_s16(<8 x i16> %a, i32 %b) {
435; CHECK-LABEL: test_vqrshlq_n_s16:
436; CHECK:       @ %bb.0: @ %entry
437; CHECK-NEXT:    vqrshl.s16 q0, r0
438; CHECK-NEXT:    bx lr
439entry:
440  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 0)
441  ret <8 x i16> %0
442}
443
444define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_n_s32(<4 x i32> %a, i32 %b) {
445; CHECK-LABEL: test_vqrshlq_n_s32:
446; CHECK:       @ %bb.0: @ %entry
447; CHECK-NEXT:    vqrshl.s32 q0, r0
448; CHECK-NEXT:    bx lr
449entry:
450  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 0)
451  ret <4 x i32> %0
452}
453
454define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_n_u8(<16 x i8> %a, i32 %b) {
455; CHECK-LABEL: test_vqrshlq_n_u8:
456; CHECK:       @ %bb.0: @ %entry
457; CHECK-NEXT:    vqrshl.u8 q0, r0
458; CHECK-NEXT:    bx lr
459entry:
460  %0 = call <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 1)
461  ret <16 x i8> %0
462}
463
464define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_n_u16(<8 x i16> %a, i32 %b) {
465; CHECK-LABEL: test_vqrshlq_n_u16:
466; CHECK:       @ %bb.0: @ %entry
467; CHECK-NEXT:    vqrshl.u16 q0, r0
468; CHECK-NEXT:    bx lr
469entry:
470  %0 = call <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 1)
471  ret <8 x i16> %0
472}
473
474define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_n_u32(<4 x i32> %a, i32 %b) {
475; CHECK-LABEL: test_vqrshlq_n_u32:
476; CHECK:       @ %bb.0: @ %entry
477; CHECK-NEXT:    vqrshl.u32 q0, r0
478; CHECK-NEXT:    bx lr
479entry:
480  %0 = call <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 1)
481  ret <4 x i32> %0
482}
483
484define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
485; CHECK-LABEL: test_vshlq_m_s8:
486; CHECK:       @ %bb.0: @ %entry
487; CHECK-NEXT:    vmsr p0, r0
488; CHECK-NEXT:    vpst
489; CHECK-NEXT:    vshlt.s8 q0, q1, q2
490; CHECK-NEXT:    bx lr
491entry:
492  %0 = zext i16 %p to i32
493  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
494  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0, <16 x i1> %1, <16 x i8> %inactive)
495  ret <16 x i8> %2
496}
497
498define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
499; CHECK-LABEL: test_vshlq_m_s16:
500; CHECK:       @ %bb.0: @ %entry
501; CHECK-NEXT:    vmsr p0, r0
502; CHECK-NEXT:    vpst
503; CHECK-NEXT:    vshlt.s16 q0, q1, q2
504; CHECK-NEXT:    bx lr
505entry:
506  %0 = zext i16 %p to i32
507  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
508  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
509  ret <8 x i16> %2
510}
511
512define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
513; CHECK-LABEL: test_vshlq_m_s32:
514; CHECK:       @ %bb.0: @ %entry
515; CHECK-NEXT:    vmsr p0, r0
516; CHECK-NEXT:    vpst
517; CHECK-NEXT:    vshlt.s32 q0, q1, q2
518; CHECK-NEXT:    bx lr
519entry:
520  %0 = zext i16 %p to i32
521  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
522  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
523  ret <4 x i32> %2
524}
525
526define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
527; CHECK-LABEL: test_vshlq_m_u8:
528; CHECK:       @ %bb.0: @ %entry
529; CHECK-NEXT:    vmsr p0, r0
530; CHECK-NEXT:    vpst
531; CHECK-NEXT:    vshlt.u8 q0, q1, q2
532; CHECK-NEXT:    bx lr
533entry:
534  %0 = zext i16 %p to i32
535  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
536  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1, <16 x i1> %1, <16 x i8> %inactive)
537  ret <16 x i8> %2
538}
539
540define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
541; CHECK-LABEL: test_vshlq_m_u16:
542; CHECK:       @ %bb.0: @ %entry
543; CHECK-NEXT:    vmsr p0, r0
544; CHECK-NEXT:    vpst
545; CHECK-NEXT:    vshlt.u16 q0, q1, q2
546; CHECK-NEXT:    bx lr
547entry:
548  %0 = zext i16 %p to i32
549  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
550  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
551  ret <8 x i16> %2
552}
553
554define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
555; CHECK-LABEL: test_vshlq_m_u32:
556; CHECK:       @ %bb.0: @ %entry
557; CHECK-NEXT:    vmsr p0, r0
558; CHECK-NEXT:    vpst
559; CHECK-NEXT:    vshlt.u32 q0, q1, q2
560; CHECK-NEXT:    bx lr
561entry:
562  %0 = zext i16 %p to i32
563  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
564  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
565  ret <4 x i32> %2
566}
567
568define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
569; CHECK-LABEL: test_vshlq_x_s8:
570; CHECK:       @ %bb.0: @ %entry
571; CHECK-NEXT:    vmsr p0, r0
572; CHECK-NEXT:    vpst
573; CHECK-NEXT:    vshlt.s8 q0, q0, q1
574; CHECK-NEXT:    bx lr
575entry:
576  %0 = zext i16 %p to i32
577  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
578  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 0, <16 x i1> %1, <16 x i8> undef)
579  ret <16 x i8> %2
580}
581
582define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
583; CHECK-LABEL: test_vshlq_x_s16:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    vmsr p0, r0
586; CHECK-NEXT:    vpst
587; CHECK-NEXT:    vshlt.s16 q0, q0, q1
588; CHECK-NEXT:    bx lr
589entry:
590  %0 = zext i16 %p to i32
591  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
592  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef)
593  ret <8 x i16> %2
594}
595
596define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
597; CHECK-LABEL: test_vshlq_x_s32:
598; CHECK:       @ %bb.0: @ %entry
599; CHECK-NEXT:    vmsr p0, r0
600; CHECK-NEXT:    vpst
601; CHECK-NEXT:    vshlt.s32 q0, q0, q1
602; CHECK-NEXT:    bx lr
603entry:
604  %0 = zext i16 %p to i32
605  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
606  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef)
607  ret <4 x i32> %2
608}
609
610define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
611; CHECK-LABEL: test_vshlq_x_u8:
612; CHECK:       @ %bb.0: @ %entry
613; CHECK-NEXT:    vmsr p0, r0
614; CHECK-NEXT:    vpst
615; CHECK-NEXT:    vshlt.u8 q0, q0, q1
616; CHECK-NEXT:    bx lr
617entry:
618  %0 = zext i16 %p to i32
619  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
620  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 0, i32 1, <16 x i1> %1, <16 x i8> undef)
621  ret <16 x i8> %2
622}
623
624define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
625; CHECK-LABEL: test_vshlq_x_u16:
626; CHECK:       @ %bb.0: @ %entry
627; CHECK-NEXT:    vmsr p0, r0
628; CHECK-NEXT:    vpst
629; CHECK-NEXT:    vshlt.u16 q0, q0, q1
630; CHECK-NEXT:    bx lr
631entry:
632  %0 = zext i16 %p to i32
633  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
634  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef)
635  ret <8 x i16> %2
636}
637
638define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
639; CHECK-LABEL: test_vshlq_x_u32:
640; CHECK:       @ %bb.0: @ %entry
641; CHECK-NEXT:    vmsr p0, r0
642; CHECK-NEXT:    vpst
643; CHECK-NEXT:    vshlt.u32 q0, q0, q1
644; CHECK-NEXT:    bx lr
645entry:
646  %0 = zext i16 %p to i32
647  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
648  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef)
649  ret <4 x i32> %2
650}
651
652define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_r_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
653; CHECK-LABEL: test_vshlq_m_r_s8:
654; CHECK:       @ %bb.0: @ %entry
655; CHECK-NEXT:    vmsr p0, r1
656; CHECK-NEXT:    vpst
657; CHECK-NEXT:    vshlt.s8 q0, r0
658; CHECK-NEXT:    bx lr
659entry:
660  %0 = zext i16 %p to i32
661  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
662  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 0, <16 x i1> %1)
663  ret <16 x i8> %2
664}
665
666define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_r_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
667; CHECK-LABEL: test_vshlq_m_r_s16:
668; CHECK:       @ %bb.0: @ %entry
669; CHECK-NEXT:    vmsr p0, r1
670; CHECK-NEXT:    vpst
671; CHECK-NEXT:    vshlt.s16 q0, r0
672; CHECK-NEXT:    bx lr
673entry:
674  %0 = zext i16 %p to i32
675  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
676  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 0, <8 x i1> %1)
677  ret <8 x i16> %2
678}
679
680define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_r_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
681; CHECK-LABEL: test_vshlq_m_r_s32:
682; CHECK:       @ %bb.0: @ %entry
683; CHECK-NEXT:    vmsr p0, r1
684; CHECK-NEXT:    vpst
685; CHECK-NEXT:    vshlt.s32 q0, r0
686; CHECK-NEXT:    bx lr
687entry:
688  %0 = zext i16 %p to i32
689  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
690  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 0, <4 x i1> %1)
691  ret <4 x i32> %2
692}
693
694define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_r_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
695; CHECK-LABEL: test_vshlq_m_r_u8:
696; CHECK:       @ %bb.0: @ %entry
697; CHECK-NEXT:    vmsr p0, r1
698; CHECK-NEXT:    vpst
699; CHECK-NEXT:    vshlt.u8 q0, r0
700; CHECK-NEXT:    bx lr
701entry:
702  %0 = zext i16 %p to i32
703  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
704  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 0, i32 1, <16 x i1> %1)
705  ret <16 x i8> %2
706}
707
708define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_r_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
709; CHECK-LABEL: test_vshlq_m_r_u16:
710; CHECK:       @ %bb.0: @ %entry
711; CHECK-NEXT:    vmsr p0, r1
712; CHECK-NEXT:    vpst
713; CHECK-NEXT:    vshlt.u16 q0, r0
714; CHECK-NEXT:    bx lr
715entry:
716  %0 = zext i16 %p to i32
717  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
718  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 0, i32 1, <8 x i1> %1)
719  ret <8 x i16> %2
720}
721
722define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_r_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
723; CHECK-LABEL: test_vshlq_m_r_u32:
724; CHECK:       @ %bb.0: @ %entry
725; CHECK-NEXT:    vmsr p0, r1
726; CHECK-NEXT:    vpst
727; CHECK-NEXT:    vshlt.u32 q0, r0
728; CHECK-NEXT:    bx lr
729entry:
730  %0 = zext i16 %p to i32
731  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
732  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 0, i32 1, <4 x i1> %1)
733  ret <4 x i32> %2
734}
735
736define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
737; CHECK-LABEL: test_vqshlq_m_s8:
738; CHECK:       @ %bb.0: @ %entry
739; CHECK-NEXT:    vmsr p0, r0
740; CHECK-NEXT:    vpst
741; CHECK-NEXT:    vqshlt.s8 q0, q1, q2
742; CHECK-NEXT:    bx lr
743entry:
744  %0 = zext i16 %p to i32
745  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
746  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 0, <16 x i1> %1, <16 x i8> %inactive)
747  ret <16 x i8> %2
748}
749
750define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
751; CHECK-LABEL: test_vqshlq_m_s16:
752; CHECK:       @ %bb.0: @ %entry
753; CHECK-NEXT:    vmsr p0, r0
754; CHECK-NEXT:    vpst
755; CHECK-NEXT:    vqshlt.s16 q0, q1, q2
756; CHECK-NEXT:    bx lr
757entry:
758  %0 = zext i16 %p to i32
759  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
760  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive)
761  ret <8 x i16> %2
762}
763
764define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
765; CHECK-LABEL: test_vqshlq_m_s32:
766; CHECK:       @ %bb.0: @ %entry
767; CHECK-NEXT:    vmsr p0, r0
768; CHECK-NEXT:    vpst
769; CHECK-NEXT:    vqshlt.s32 q0, q1, q2
770; CHECK-NEXT:    bx lr
771entry:
772  %0 = zext i16 %p to i32
773  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
774  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive)
775  ret <4 x i32> %2
776}
777
778define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
779; CHECK-LABEL: test_vqshlq_m_u8:
780; CHECK:       @ %bb.0: @ %entry
781; CHECK-NEXT:    vmsr p0, r0
782; CHECK-NEXT:    vpst
783; CHECK-NEXT:    vqshlt.u8 q0, q1, q2
784; CHECK-NEXT:    bx lr
785entry:
786  %0 = zext i16 %p to i32
787  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
788  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 0, i32 1, <16 x i1> %1, <16 x i8> %inactive)
789  ret <16 x i8> %2
790}
791
792define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
793; CHECK-LABEL: test_vqshlq_m_u16:
794; CHECK:       @ %bb.0: @ %entry
795; CHECK-NEXT:    vmsr p0, r0
796; CHECK-NEXT:    vpst
797; CHECK-NEXT:    vqshlt.u16 q0, q1, q2
798; CHECK-NEXT:    bx lr
799entry:
800  %0 = zext i16 %p to i32
801  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
802  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive)
803  ret <8 x i16> %2
804}
805
806define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
807; CHECK-LABEL: test_vqshlq_m_u32:
808; CHECK:       @ %bb.0: @ %entry
809; CHECK-NEXT:    vmsr p0, r0
810; CHECK-NEXT:    vpst
811; CHECK-NEXT:    vqshlt.u32 q0, q1, q2
812; CHECK-NEXT:    bx lr
813entry:
814  %0 = zext i16 %p to i32
815  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
816  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive)
817  ret <4 x i32> %2
818}
819
820define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_r_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
821; CHECK-LABEL: test_vqshlq_m_r_s8:
822; CHECK:       @ %bb.0: @ %entry
823; CHECK-NEXT:    vmsr p0, r1
824; CHECK-NEXT:    vpst
825; CHECK-NEXT:    vqshlt.s8 q0, r0
826; CHECK-NEXT:    bx lr
827entry:
828  %0 = zext i16 %p to i32
829  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
830  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 0, <16 x i1> %1)
831  ret <16 x i8> %2
832}
833
834define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_r_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
835; CHECK-LABEL: test_vqshlq_m_r_s16:
836; CHECK:       @ %bb.0: @ %entry
837; CHECK-NEXT:    vmsr p0, r1
838; CHECK-NEXT:    vpst
839; CHECK-NEXT:    vqshlt.s16 q0, r0
840; CHECK-NEXT:    bx lr
841entry:
842  %0 = zext i16 %p to i32
843  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
844  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 0, <8 x i1> %1)
845  ret <8 x i16> %2
846}
847
848define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_r_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
849; CHECK-LABEL: test_vqshlq_m_r_s32:
850; CHECK:       @ %bb.0: @ %entry
851; CHECK-NEXT:    vmsr p0, r1
852; CHECK-NEXT:    vpst
853; CHECK-NEXT:    vqshlt.s32 q0, r0
854; CHECK-NEXT:    bx lr
855entry:
856  %0 = zext i16 %p to i32
857  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
858  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 0, <4 x i1> %1)
859  ret <4 x i32> %2
860}
861
862define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_r_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
863; CHECK-LABEL: test_vqshlq_m_r_u8:
864; CHECK:       @ %bb.0: @ %entry
865; CHECK-NEXT:    vmsr p0, r1
866; CHECK-NEXT:    vpst
867; CHECK-NEXT:    vqshlt.u8 q0, r0
868; CHECK-NEXT:    bx lr
869entry:
870  %0 = zext i16 %p to i32
871  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
872  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 0, i32 1, <16 x i1> %1)
873  ret <16 x i8> %2
874}
875
876define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_r_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
877; CHECK-LABEL: test_vqshlq_m_r_u16:
878; CHECK:       @ %bb.0: @ %entry
879; CHECK-NEXT:    vmsr p0, r1
880; CHECK-NEXT:    vpst
881; CHECK-NEXT:    vqshlt.u16 q0, r0
882; CHECK-NEXT:    bx lr
883entry:
884  %0 = zext i16 %p to i32
885  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
886  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 0, i32 1, <8 x i1> %1)
887  ret <8 x i16> %2
888}
889
890define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_r_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
891; CHECK-LABEL: test_vqshlq_m_r_u32:
892; CHECK:       @ %bb.0: @ %entry
893; CHECK-NEXT:    vmsr p0, r1
894; CHECK-NEXT:    vpst
895; CHECK-NEXT:    vqshlt.u32 q0, r0
896; CHECK-NEXT:    bx lr
897entry:
898  %0 = zext i16 %p to i32
899  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
900  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 0, i32 1, <4 x i1> %1)
901  ret <4 x i32> %2
902}
903
904define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
905; CHECK-LABEL: test_vrshlq_m_s8:
906; CHECK:       @ %bb.0: @ %entry
907; CHECK-NEXT:    vmsr p0, r0
908; CHECK-NEXT:    vpst
909; CHECK-NEXT:    vrshlt.s8 q0, q1, q2
910; CHECK-NEXT:    bx lr
911entry:
912  %0 = zext i16 %p to i32
913  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
914  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0, <16 x i1> %1, <16 x i8> %inactive)
915  ret <16 x i8> %2
916}
917
918define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
919; CHECK-LABEL: test_vrshlq_m_s16:
920; CHECK:       @ %bb.0: @ %entry
921; CHECK-NEXT:    vmsr p0, r0
922; CHECK-NEXT:    vpst
923; CHECK-NEXT:    vrshlt.s16 q0, q1, q2
924; CHECK-NEXT:    bx lr
925entry:
926  %0 = zext i16 %p to i32
927  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
928  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
929  ret <8 x i16> %2
930}
931
932define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
933; CHECK-LABEL: test_vrshlq_m_s32:
934; CHECK:       @ %bb.0: @ %entry
935; CHECK-NEXT:    vmsr p0, r0
936; CHECK-NEXT:    vpst
937; CHECK-NEXT:    vrshlt.s32 q0, q1, q2
938; CHECK-NEXT:    bx lr
939entry:
940  %0 = zext i16 %p to i32
941  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
942  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
943  ret <4 x i32> %2
944}
945
946define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
947; CHECK-LABEL: test_vrshlq_m_u8:
948; CHECK:       @ %bb.0: @ %entry
949; CHECK-NEXT:    vmsr p0, r0
950; CHECK-NEXT:    vpst
951; CHECK-NEXT:    vrshlt.u8 q0, q1, q2
952; CHECK-NEXT:    bx lr
953entry:
954  %0 = zext i16 %p to i32
955  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
956  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1, <16 x i1> %1, <16 x i8> %inactive)
957  ret <16 x i8> %2
958}
959
960define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
961; CHECK-LABEL: test_vrshlq_m_u16:
962; CHECK:       @ %bb.0: @ %entry
963; CHECK-NEXT:    vmsr p0, r0
964; CHECK-NEXT:    vpst
965; CHECK-NEXT:    vrshlt.u16 q0, q1, q2
966; CHECK-NEXT:    bx lr
967entry:
968  %0 = zext i16 %p to i32
969  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
970  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
971  ret <8 x i16> %2
972}
973
974define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
975; CHECK-LABEL: test_vrshlq_m_u32:
976; CHECK:       @ %bb.0: @ %entry
977; CHECK-NEXT:    vmsr p0, r0
978; CHECK-NEXT:    vpst
979; CHECK-NEXT:    vrshlt.u32 q0, q1, q2
980; CHECK-NEXT:    bx lr
981entry:
982  %0 = zext i16 %p to i32
983  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
984  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
985  ret <4 x i32> %2
986}
987
988define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
989; CHECK-LABEL: test_vrshlq_x_s8:
990; CHECK:       @ %bb.0: @ %entry
991; CHECK-NEXT:    vmsr p0, r0
992; CHECK-NEXT:    vpst
993; CHECK-NEXT:    vrshlt.s8 q0, q0, q1
994; CHECK-NEXT:    bx lr
995entry:
996  %0 = zext i16 %p to i32
997  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
998  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 0, <16 x i1> %1, <16 x i8> undef)
999  ret <16 x i8> %2
1000}
1001
1002define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1003; CHECK-LABEL: test_vrshlq_x_s16:
1004; CHECK:       @ %bb.0: @ %entry
1005; CHECK-NEXT:    vmsr p0, r0
1006; CHECK-NEXT:    vpst
1007; CHECK-NEXT:    vrshlt.s16 q0, q0, q1
1008; CHECK-NEXT:    bx lr
1009entry:
1010  %0 = zext i16 %p to i32
1011  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1012  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef)
1013  ret <8 x i16> %2
1014}
1015
1016define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1017; CHECK-LABEL: test_vrshlq_x_s32:
1018; CHECK:       @ %bb.0: @ %entry
1019; CHECK-NEXT:    vmsr p0, r0
1020; CHECK-NEXT:    vpst
1021; CHECK-NEXT:    vrshlt.s32 q0, q0, q1
1022; CHECK-NEXT:    bx lr
1023entry:
1024  %0 = zext i16 %p to i32
1025  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1026  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef)
1027  ret <4 x i32> %2
1028}
1029
1030define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
1031; CHECK-LABEL: test_vrshlq_x_u8:
1032; CHECK:       @ %bb.0: @ %entry
1033; CHECK-NEXT:    vmsr p0, r0
1034; CHECK-NEXT:    vpst
1035; CHECK-NEXT:    vrshlt.u8 q0, q0, q1
1036; CHECK-NEXT:    bx lr
1037entry:
1038  %0 = zext i16 %p to i32
1039  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1040  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 0, i32 1, i32 1, <16 x i1> %1, <16 x i8> undef)
1041  ret <16 x i8> %2
1042}
1043
1044define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1045; CHECK-LABEL: test_vrshlq_x_u16:
1046; CHECK:       @ %bb.0: @ %entry
1047; CHECK-NEXT:    vmsr p0, r0
1048; CHECK-NEXT:    vpst
1049; CHECK-NEXT:    vrshlt.u16 q0, q0, q1
1050; CHECK-NEXT:    bx lr
1051entry:
1052  %0 = zext i16 %p to i32
1053  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1054  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 0, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef)
1055  ret <8 x i16> %2
1056}
1057
1058define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1059; CHECK-LABEL: test_vrshlq_x_u32:
1060; CHECK:       @ %bb.0: @ %entry
1061; CHECK-NEXT:    vmsr p0, r0
1062; CHECK-NEXT:    vpst
1063; CHECK-NEXT:    vrshlt.u32 q0, q0, q1
1064; CHECK-NEXT:    bx lr
1065entry:
1066  %0 = zext i16 %p to i32
1067  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1068  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef)
1069  ret <4 x i32> %2
1070}
1071
1072define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_n_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
1073; CHECK-LABEL: test_vrshlq_m_n_s8:
1074; CHECK:       @ %bb.0: @ %entry
1075; CHECK-NEXT:    vmsr p0, r1
1076; CHECK-NEXT:    vpst
1077; CHECK-NEXT:    vrshlt.s8 q0, r0
1078; CHECK-NEXT:    bx lr
1079entry:
1080  %0 = zext i16 %p to i32
1081  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1082  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 0, <16 x i1> %1)
1083  ret <16 x i8> %2
1084}
1085
1086define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_n_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
1087; CHECK-LABEL: test_vrshlq_m_n_s16:
1088; CHECK:       @ %bb.0: @ %entry
1089; CHECK-NEXT:    vmsr p0, r1
1090; CHECK-NEXT:    vpst
1091; CHECK-NEXT:    vrshlt.s16 q0, r0
1092; CHECK-NEXT:    bx lr
1093entry:
1094  %0 = zext i16 %p to i32
1095  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1096  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 0, <8 x i1> %1)
1097  ret <8 x i16> %2
1098}
1099
1100define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_n_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
1101; CHECK-LABEL: test_vrshlq_m_n_s32:
1102; CHECK:       @ %bb.0: @ %entry
1103; CHECK-NEXT:    vmsr p0, r1
1104; CHECK-NEXT:    vpst
1105; CHECK-NEXT:    vrshlt.s32 q0, r0
1106; CHECK-NEXT:    bx lr
1107entry:
1108  %0 = zext i16 %p to i32
1109  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1110  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 0, <4 x i1> %1)
1111  ret <4 x i32> %2
1112}
1113
1114define arm_aapcs_vfpcc <16 x i8> @test_vrshlq_m_n_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
1115; CHECK-LABEL: test_vrshlq_m_n_u8:
1116; CHECK:       @ %bb.0: @ %entry
1117; CHECK-NEXT:    vmsr p0, r1
1118; CHECK-NEXT:    vpst
1119; CHECK-NEXT:    vrshlt.u8 q0, r0
1120; CHECK-NEXT:    bx lr
1121entry:
1122  %0 = zext i16 %p to i32
1123  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1124  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 0, i32 1, i32 1, <16 x i1> %1)
1125  ret <16 x i8> %2
1126}
1127
1128define arm_aapcs_vfpcc <8 x i16> @test_vrshlq_m_n_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
1129; CHECK-LABEL: test_vrshlq_m_n_u16:
1130; CHECK:       @ %bb.0: @ %entry
1131; CHECK-NEXT:    vmsr p0, r1
1132; CHECK-NEXT:    vpst
1133; CHECK-NEXT:    vrshlt.u16 q0, r0
1134; CHECK-NEXT:    bx lr
1135entry:
1136  %0 = zext i16 %p to i32
1137  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1138  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 0, i32 1, i32 1, <8 x i1> %1)
1139  ret <8 x i16> %2
1140}
1141
1142define arm_aapcs_vfpcc <4 x i32> @test_vrshlq_m_n_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
1143; CHECK-LABEL: test_vrshlq_m_n_u32:
1144; CHECK:       @ %bb.0: @ %entry
1145; CHECK-NEXT:    vmsr p0, r1
1146; CHECK-NEXT:    vpst
1147; CHECK-NEXT:    vrshlt.u32 q0, r0
1148; CHECK-NEXT:    bx lr
1149entry:
1150  %0 = zext i16 %p to i32
1151  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1152  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 0, i32 1, i32 1, <4 x i1> %1)
1153  ret <4 x i32> %2
1154}
1155
1156define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
1157; CHECK-LABEL: test_vqrshlq_m_s8:
1158; CHECK:       @ %bb.0: @ %entry
1159; CHECK-NEXT:    vmsr p0, r0
1160; CHECK-NEXT:    vpst
1161; CHECK-NEXT:    vqrshlt.s8 q0, q1, q2
1162; CHECK-NEXT:    bx lr
1163entry:
1164  %0 = zext i16 %p to i32
1165  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1166  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 0, <16 x i1> %1, <16 x i8> %inactive)
1167  ret <16 x i8> %2
1168}
1169
1170define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1171; CHECK-LABEL: test_vqrshlq_m_s16:
1172; CHECK:       @ %bb.0: @ %entry
1173; CHECK-NEXT:    vmsr p0, r0
1174; CHECK-NEXT:    vpst
1175; CHECK-NEXT:    vqrshlt.s16 q0, q1, q2
1176; CHECK-NEXT:    bx lr
1177entry:
1178  %0 = zext i16 %p to i32
1179  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1180  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive)
1181  ret <8 x i16> %2
1182}
1183
1184define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1185; CHECK-LABEL: test_vqrshlq_m_s32:
1186; CHECK:       @ %bb.0: @ %entry
1187; CHECK-NEXT:    vmsr p0, r0
1188; CHECK-NEXT:    vpst
1189; CHECK-NEXT:    vqrshlt.s32 q0, q1, q2
1190; CHECK-NEXT:    bx lr
1191entry:
1192  %0 = zext i16 %p to i32
1193  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1194  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive)
1195  ret <4 x i32> %2
1196}
1197
1198define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
1199; CHECK-LABEL: test_vqrshlq_m_u8:
1200; CHECK:       @ %bb.0: @ %entry
1201; CHECK-NEXT:    vmsr p0, r0
1202; CHECK-NEXT:    vpst
1203; CHECK-NEXT:    vqrshlt.u8 q0, q1, q2
1204; CHECK-NEXT:    bx lr
1205entry:
1206  %0 = zext i16 %p to i32
1207  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1208  %2 = call <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 1, i32 1, i32 1, <16 x i1> %1, <16 x i8> %inactive)
1209  ret <16 x i8> %2
1210}
1211
1212define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
1213; CHECK-LABEL: test_vqrshlq_m_u16:
1214; CHECK:       @ %bb.0: @ %entry
1215; CHECK-NEXT:    vmsr p0, r0
1216; CHECK-NEXT:    vpst
1217; CHECK-NEXT:    vqrshlt.u16 q0, q1, q2
1218; CHECK-NEXT:    bx lr
1219entry:
1220  %0 = zext i16 %p to i32
1221  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1222  %2 = call <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive)
1223  ret <8 x i16> %2
1224}
1225
1226define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
1227; CHECK-LABEL: test_vqrshlq_m_u32:
1228; CHECK:       @ %bb.0: @ %entry
1229; CHECK-NEXT:    vmsr p0, r0
1230; CHECK-NEXT:    vpst
1231; CHECK-NEXT:    vqrshlt.u32 q0, q1, q2
1232; CHECK-NEXT:    bx lr
1233entry:
1234  %0 = zext i16 %p to i32
1235  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1236  %2 = call <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive)
1237  ret <4 x i32> %2
1238}
1239
1240define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_n_s8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
1241; CHECK-LABEL: test_vqrshlq_m_n_s8:
1242; CHECK:       @ %bb.0: @ %entry
1243; CHECK-NEXT:    vmsr p0, r1
1244; CHECK-NEXT:    vpst
1245; CHECK-NEXT:    vqrshlt.s8 q0, r0
1246; CHECK-NEXT:    bx lr
1247entry:
1248  %0 = zext i16 %p to i32
1249  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1250  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 0, <16 x i1> %1)
1251  ret <16 x i8> %2
1252}
1253
1254define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_n_s16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
1255; CHECK-LABEL: test_vqrshlq_m_n_s16:
1256; CHECK:       @ %bb.0: @ %entry
1257; CHECK-NEXT:    vmsr p0, r1
1258; CHECK-NEXT:    vpst
1259; CHECK-NEXT:    vqrshlt.s16 q0, r0
1260; CHECK-NEXT:    bx lr
1261entry:
1262  %0 = zext i16 %p to i32
1263  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1264  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 0, <8 x i1> %1)
1265  ret <8 x i16> %2
1266}
1267
1268define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_n_s32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
1269; CHECK-LABEL: test_vqrshlq_m_n_s32:
1270; CHECK:       @ %bb.0: @ %entry
1271; CHECK-NEXT:    vmsr p0, r1
1272; CHECK-NEXT:    vpst
1273; CHECK-NEXT:    vqrshlt.s32 q0, r0
1274; CHECK-NEXT:    bx lr
1275entry:
1276  %0 = zext i16 %p to i32
1277  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1278  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 0, <4 x i1> %1)
1279  ret <4 x i32> %2
1280}
1281
1282define arm_aapcs_vfpcc <16 x i8> @test_vqrshlq_m_n_u8(<16 x i8> %a, i32 %b, i16 zeroext %p) {
1283; CHECK-LABEL: test_vqrshlq_m_n_u8:
1284; CHECK:       @ %bb.0: @ %entry
1285; CHECK-NEXT:    vmsr p0, r1
1286; CHECK-NEXT:    vpst
1287; CHECK-NEXT:    vqrshlt.u8 q0, r0
1288; CHECK-NEXT:    bx lr
1289entry:
1290  %0 = zext i16 %p to i32
1291  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1292  %2 = call <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8> %a, i32 %b, i32 1, i32 1, i32 1, <16 x i1> %1)
1293  ret <16 x i8> %2
1294}
1295
1296define arm_aapcs_vfpcc <8 x i16> @test_vqrshlq_m_n_u16(<8 x i16> %a, i32 %b, i16 zeroext %p) {
1297; CHECK-LABEL: test_vqrshlq_m_n_u16:
1298; CHECK:       @ %bb.0: @ %entry
1299; CHECK-NEXT:    vmsr p0, r1
1300; CHECK-NEXT:    vpst
1301; CHECK-NEXT:    vqrshlt.u16 q0, r0
1302; CHECK-NEXT:    bx lr
1303entry:
1304  %0 = zext i16 %p to i32
1305  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1306  %2 = call <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16> %a, i32 %b, i32 1, i32 1, i32 1, <8 x i1> %1)
1307  ret <8 x i16> %2
1308}
1309
1310define arm_aapcs_vfpcc <4 x i32> @test_vqrshlq_m_n_u32(<4 x i32> %a, i32 %b, i16 zeroext %p) {
1311; CHECK-LABEL: test_vqrshlq_m_n_u32:
1312; CHECK:       @ %bb.0: @ %entry
1313; CHECK-NEXT:    vmsr p0, r1
1314; CHECK-NEXT:    vpst
1315; CHECK-NEXT:    vqrshlt.u32 q0, r0
1316; CHECK-NEXT:    bx lr
1317entry:
1318  %0 = zext i16 %p to i32
1319  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1320  %2 = call <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32> %a, i32 %b, i32 1, i32 1, i32 1, <4 x i1> %1)
1321  ret <4 x i32> %2
1322}
1323
1324declare <16 x i8> @llvm.arm.mve.vshl.vector.v16i8.v16i8(<16 x i8>, <16 x i8>, i32, i32, i32)
1325declare <8 x i16> @llvm.arm.mve.vshl.vector.v8i16.v8i16(<8 x i16>, <8 x i16>, i32, i32, i32)
1326declare <4 x i32> @llvm.arm.mve.vshl.vector.v4i32.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32)
1327declare <16 x i8> @llvm.arm.mve.vshl.scalar.v16i8(<16 x i8>, i32, i32, i32, i32)
1328declare <8 x i16> @llvm.arm.mve.vshl.scalar.v8i16(<8 x i16>, i32, i32, i32, i32)
1329declare <4 x i32> @llvm.arm.mve.vshl.scalar.v4i32(<4 x i32>, i32, i32, i32, i32)
1330declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
1331declare <16 x i8> @llvm.arm.mve.vshl.vector.predicated.v16i8.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, i32, i32, <16 x i1>, <16 x i8>)
1332declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
1333declare <8 x i16> @llvm.arm.mve.vshl.vector.predicated.v8i16.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, i32, i32, <8 x i1>, <8 x i16>)
1334declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
1335declare <4 x i32> @llvm.arm.mve.vshl.vector.predicated.v4i32.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, i32, <4 x i1>, <4 x i32>)
1336declare <16 x i8> @llvm.arm.mve.vshl.scalar.predicated.v16i8.v16i1(<16 x i8>, i32, i32, i32, i32, <16 x i1>)
1337declare <8 x i16> @llvm.arm.mve.vshl.scalar.predicated.v8i16.v8i1(<8 x i16>, i32, i32, i32, i32, <8 x i1>)
1338declare <4 x i32> @llvm.arm.mve.vshl.scalar.predicated.v4i32.v4i1(<4 x i32>, i32, i32, i32, i32, <4 x i1>)
1339