1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4define arm_aapcs_vfpcc <8 x half> @test_vld1q_f16(half* %base) {
5; CHECK-LABEL: test_vld1q_f16:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vldrh.u16 q0, [r0]
8; CHECK-NEXT:    bx lr
9entry:
10  %0 = bitcast half* %base to <8 x half>*
11  %1 = load <8 x half>, <8 x half>* %0, align 2
12  ret <8 x half> %1
13}
14
15define arm_aapcs_vfpcc <4 x float> @test_vld1q_f32(float* %base) {
16; CHECK-LABEL: test_vld1q_f32:
17; CHECK:       @ %bb.0: @ %entry
18; CHECK-NEXT:    vldrw.u32 q0, [r0]
19; CHECK-NEXT:    bx lr
20entry:
21  %0 = bitcast float* %base to <4 x float>*
22  %1 = load <4 x float>, <4 x float>* %0, align 4
23  ret <4 x float> %1
24}
25
26define arm_aapcs_vfpcc <16 x i8> @test_vld1q_s8(i8* %base) {
27; CHECK-LABEL: test_vld1q_s8:
28; CHECK:       @ %bb.0: @ %entry
29; CHECK-NEXT:    vldrb.u8 q0, [r0]
30; CHECK-NEXT:    bx lr
31entry:
32  %0 = bitcast i8* %base to <16 x i8>*
33  %1 = load <16 x i8>, <16 x i8>* %0, align 1
34  ret <16 x i8> %1
35}
36
37define arm_aapcs_vfpcc <8 x i16> @test_vld1q_s16(i16* %base) {
38; CHECK-LABEL: test_vld1q_s16:
39; CHECK:       @ %bb.0: @ %entry
40; CHECK-NEXT:    vldrh.u16 q0, [r0]
41; CHECK-NEXT:    bx lr
42entry:
43  %0 = bitcast i16* %base to <8 x i16>*
44  %1 = load <8 x i16>, <8 x i16>* %0, align 2
45  ret <8 x i16> %1
46}
47
48define arm_aapcs_vfpcc <4 x i32> @test_vld1q_s32(i32* %base) {
49; CHECK-LABEL: test_vld1q_s32:
50; CHECK:       @ %bb.0: @ %entry
51; CHECK-NEXT:    vldrw.u32 q0, [r0]
52; CHECK-NEXT:    bx lr
53entry:
54  %0 = bitcast i32* %base to <4 x i32>*
55  %1 = load <4 x i32>, <4 x i32>* %0, align 4
56  ret <4 x i32> %1
57}
58
59define arm_aapcs_vfpcc <16 x i8> @test_vld1q_u8(i8* %base) {
60; CHECK-LABEL: test_vld1q_u8:
61; CHECK:       @ %bb.0: @ %entry
62; CHECK-NEXT:    vldrb.u8 q0, [r0]
63; CHECK-NEXT:    bx lr
64entry:
65  %0 = bitcast i8* %base to <16 x i8>*
66  %1 = load <16 x i8>, <16 x i8>* %0, align 1
67  ret <16 x i8> %1
68}
69
70define arm_aapcs_vfpcc <8 x i16> @test_vld1q_u16(i16* %base) {
71; CHECK-LABEL: test_vld1q_u16:
72; CHECK:       @ %bb.0: @ %entry
73; CHECK-NEXT:    vldrh.u16 q0, [r0]
74; CHECK-NEXT:    bx lr
75entry:
76  %0 = bitcast i16* %base to <8 x i16>*
77  %1 = load <8 x i16>, <8 x i16>* %0, align 2
78  ret <8 x i16> %1
79}
80
81define arm_aapcs_vfpcc <4 x i32> @test_vld1q_u32(i32* %base) {
82; CHECK-LABEL: test_vld1q_u32:
83; CHECK:       @ %bb.0: @ %entry
84; CHECK-NEXT:    vldrw.u32 q0, [r0]
85; CHECK-NEXT:    bx lr
86entry:
87  %0 = bitcast i32* %base to <4 x i32>*
88  %1 = load <4 x i32>, <4 x i32>* %0, align 4
89  ret <4 x i32> %1
90}
91
92define arm_aapcs_vfpcc <8 x half> @test_vld1q_z_f16(half* %base, i16 zeroext %p) {
93; CHECK-LABEL: test_vld1q_z_f16:
94; CHECK:       @ %bb.0: @ %entry
95; CHECK-NEXT:    vmsr p0, r1
96; CHECK-NEXT:    vpst
97; CHECK-NEXT:    vldrht.u16 q0, [r0]
98; CHECK-NEXT:    bx lr
99entry:
100  %0 = bitcast half* %base to <8 x half>*
101  %1 = zext i16 %p to i32
102  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
103  %3 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %2, <8 x half> zeroinitializer)
104  ret <8 x half> %3
105}
106
107declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
108
109declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32 immarg, <8 x i1>, <8 x half>)
110
111define arm_aapcs_vfpcc <4 x float> @test_vld1q_z_f32(float* %base, i16 zeroext %p) {
112; CHECK-LABEL: test_vld1q_z_f32:
113; CHECK:       @ %bb.0: @ %entry
114; CHECK-NEXT:    vmsr p0, r1
115; CHECK-NEXT:    vpst
116; CHECK-NEXT:    vldrwt.u32 q0, [r0]
117; CHECK-NEXT:    bx lr
118entry:
119  %0 = bitcast float* %base to <4 x float>*
120  %1 = zext i16 %p to i32
121  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
122  %3 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %2, <4 x float> zeroinitializer)
123  ret <4 x float> %3
124}
125
126declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
127
128declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
129
130define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_s8(i8* %base, i16 zeroext %p) {
131; CHECK-LABEL: test_vld1q_z_s8:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    vmsr p0, r1
134; CHECK-NEXT:    vpst
135; CHECK-NEXT:    vldrbt.u8 q0, [r0]
136; CHECK-NEXT:    bx lr
137entry:
138  %0 = bitcast i8* %base to <16 x i8>*
139  %1 = zext i16 %p to i32
140  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
141  %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
142  ret <16 x i8> %3
143}
144
145declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
146
147declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
148
149define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_s16(i16* %base, i16 zeroext %p) {
150; CHECK-LABEL: test_vld1q_z_s16:
151; CHECK:       @ %bb.0: @ %entry
152; CHECK-NEXT:    vmsr p0, r1
153; CHECK-NEXT:    vpst
154; CHECK-NEXT:    vldrht.u16 q0, [r0]
155; CHECK-NEXT:    bx lr
156entry:
157  %0 = bitcast i16* %base to <8 x i16>*
158  %1 = zext i16 %p to i32
159  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
160  %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
161  ret <8 x i16> %3
162}
163
164declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
165
166define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_s32(i32* %base, i16 zeroext %p) {
167; CHECK-LABEL: test_vld1q_z_s32:
168; CHECK:       @ %bb.0: @ %entry
169; CHECK-NEXT:    vmsr p0, r1
170; CHECK-NEXT:    vpst
171; CHECK-NEXT:    vldrwt.u32 q0, [r0]
172; CHECK-NEXT:    bx lr
173entry:
174  %0 = bitcast i32* %base to <4 x i32>*
175  %1 = zext i16 %p to i32
176  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
177  %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
178  ret <4 x i32> %3
179}
180
181declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
182
183define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_u8(i8* %base, i16 zeroext %p) {
184; CHECK-LABEL: test_vld1q_z_u8:
185; CHECK:       @ %bb.0: @ %entry
186; CHECK-NEXT:    vmsr p0, r1
187; CHECK-NEXT:    vpst
188; CHECK-NEXT:    vldrbt.u8 q0, [r0]
189; CHECK-NEXT:    bx lr
190entry:
191  %0 = bitcast i8* %base to <16 x i8>*
192  %1 = zext i16 %p to i32
193  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
194  %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
195  ret <16 x i8> %3
196}
197
198define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_u16(i16* %base, i16 zeroext %p) {
199; CHECK-LABEL: test_vld1q_z_u16:
200; CHECK:       @ %bb.0: @ %entry
201; CHECK-NEXT:    vmsr p0, r1
202; CHECK-NEXT:    vpst
203; CHECK-NEXT:    vldrht.u16 q0, [r0]
204; CHECK-NEXT:    bx lr
205entry:
206  %0 = bitcast i16* %base to <8 x i16>*
207  %1 = zext i16 %p to i32
208  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
209  %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
210  ret <8 x i16> %3
211}
212
213define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_u32(i32* %base, i16 zeroext %p) {
214; CHECK-LABEL: test_vld1q_z_u32:
215; CHECK:       @ %bb.0: @ %entry
216; CHECK-NEXT:    vmsr p0, r1
217; CHECK-NEXT:    vpst
218; CHECK-NEXT:    vldrwt.u32 q0, [r0]
219; CHECK-NEXT:    bx lr
220entry:
221  %0 = bitcast i32* %base to <4 x i32>*
222  %1 = zext i16 %p to i32
223  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
224  %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
225  ret <4 x i32> %3
226}
227
228define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_s8(i8* %base) {
229; CHECK-LABEL: test_vldrbq_s8:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vldrb.u8 q0, [r0]
232; CHECK-NEXT:    bx lr
233entry:
234  %0 = bitcast i8* %base to <16 x i8>*
235  %1 = load <16 x i8>, <16 x i8>* %0, align 1
236  ret <16 x i8> %1
237}
238
239define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_s16(i8* %base) {
240; CHECK-LABEL: test_vldrbq_s16:
241; CHECK:       @ %bb.0: @ %entry
242; CHECK-NEXT:    vldrb.s16 q0, [r0]
243; CHECK-NEXT:    bx lr
244entry:
245  %0 = bitcast i8* %base to <8 x i8>*
246  %1 = load <8 x i8>, <8 x i8>* %0, align 1
247  %2 = sext <8 x i8> %1 to <8 x i16>
248  ret <8 x i16> %2
249}
250
251define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_s32(i8* %base) {
252; CHECK-LABEL: test_vldrbq_s32:
253; CHECK:       @ %bb.0: @ %entry
254; CHECK-NEXT:    vldrb.s32 q0, [r0]
255; CHECK-NEXT:    bx lr
256entry:
257  %0 = bitcast i8* %base to <4 x i8>*
258  %1 = load <4 x i8>, <4 x i8>* %0, align 1
259  %2 = sext <4 x i8> %1 to <4 x i32>
260  ret <4 x i32> %2
261}
262
263define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_u8(i8* %base) {
264; CHECK-LABEL: test_vldrbq_u8:
265; CHECK:       @ %bb.0: @ %entry
266; CHECK-NEXT:    vldrb.u8 q0, [r0]
267; CHECK-NEXT:    bx lr
268entry:
269  %0 = bitcast i8* %base to <16 x i8>*
270  %1 = load <16 x i8>, <16 x i8>* %0, align 1
271  ret <16 x i8> %1
272}
273
274define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_u16(i8* %base) {
275; CHECK-LABEL: test_vldrbq_u16:
276; CHECK:       @ %bb.0: @ %entry
277; CHECK-NEXT:    vldrb.u16 q0, [r0]
278; CHECK-NEXT:    bx lr
279entry:
280  %0 = bitcast i8* %base to <8 x i8>*
281  %1 = load <8 x i8>, <8 x i8>* %0, align 1
282  %2 = zext <8 x i8> %1 to <8 x i16>
283  ret <8 x i16> %2
284}
285
286define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_u32(i8* %base) {
287; CHECK-LABEL: test_vldrbq_u32:
288; CHECK:       @ %bb.0: @ %entry
289; CHECK-NEXT:    vldrb.u32 q0, [r0]
290; CHECK-NEXT:    bx lr
291entry:
292  %0 = bitcast i8* %base to <4 x i8>*
293  %1 = load <4 x i8>, <4 x i8>* %0, align 1
294  %2 = zext <4 x i8> %1 to <4 x i32>
295  ret <4 x i32> %2
296}
297
298define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_s8(i8* %base, i16 zeroext %p) {
299; CHECK-LABEL: test_vldrbq_z_s8:
300; CHECK:       @ %bb.0: @ %entry
301; CHECK-NEXT:    vmsr p0, r1
302; CHECK-NEXT:    vpst
303; CHECK-NEXT:    vldrbt.u8 q0, [r0]
304; CHECK-NEXT:    bx lr
305entry:
306  %0 = bitcast i8* %base to <16 x i8>*
307  %1 = zext i16 %p to i32
308  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
309  %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
310  ret <16 x i8> %3
311}
312
313define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_s16(i8* %base, i16 zeroext %p) {
314; CHECK-LABEL: test_vldrbq_z_s16:
315; CHECK:       @ %bb.0: @ %entry
316; CHECK-NEXT:    vmsr p0, r1
317; CHECK-NEXT:    vpst
318; CHECK-NEXT:    vldrbt.s16 q0, [r0]
319; CHECK-NEXT:    bx lr
320entry:
321  %0 = bitcast i8* %base to <8 x i8>*
322  %1 = zext i16 %p to i32
323  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
324  %3 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %2, <8 x i8> zeroinitializer)
325  %4 = sext <8 x i8> %3 to <8 x i16>
326  ret <8 x i16> %4
327}
328
329declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
330
331define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_s32(i8* %base, i16 zeroext %p) {
332; CHECK-LABEL: test_vldrbq_z_s32:
333; CHECK:       @ %bb.0: @ %entry
334; CHECK-NEXT:    vmsr p0, r1
335; CHECK-NEXT:    vpst
336; CHECK-NEXT:    vldrbt.s32 q0, [r0]
337; CHECK-NEXT:    bx lr
338entry:
339  %0 = bitcast i8* %base to <4 x i8>*
340  %1 = zext i16 %p to i32
341  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
342  %3 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %2, <4 x i8> zeroinitializer)
343  %4 = sext <4 x i8> %3 to <4 x i32>
344  ret <4 x i32> %4
345}
346
347declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
348
349define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_u8(i8* %base, i16 zeroext %p) {
350; CHECK-LABEL: test_vldrbq_z_u8:
351; CHECK:       @ %bb.0: @ %entry
352; CHECK-NEXT:    vmsr p0, r1
353; CHECK-NEXT:    vpst
354; CHECK-NEXT:    vldrbt.u8 q0, [r0]
355; CHECK-NEXT:    bx lr
356entry:
357  %0 = bitcast i8* %base to <16 x i8>*
358  %1 = zext i16 %p to i32
359  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
360  %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
361  ret <16 x i8> %3
362}
363
364define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_u16(i8* %base, i16 zeroext %p) {
365; CHECK-LABEL: test_vldrbq_z_u16:
366; CHECK:       @ %bb.0: @ %entry
367; CHECK-NEXT:    vmsr p0, r1
368; CHECK-NEXT:    vpst
369; CHECK-NEXT:    vldrbt.u16 q0, [r0]
370; CHECK-NEXT:    bx lr
371entry:
372  %0 = bitcast i8* %base to <8 x i8>*
373  %1 = zext i16 %p to i32
374  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
375  %3 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %2, <8 x i8> zeroinitializer)
376  %4 = zext <8 x i8> %3 to <8 x i16>
377  ret <8 x i16> %4
378}
379
380define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_u32(i8* %base, i16 zeroext %p) {
381; CHECK-LABEL: test_vldrbq_z_u32:
382; CHECK:       @ %bb.0: @ %entry
383; CHECK-NEXT:    vmsr p0, r1
384; CHECK-NEXT:    vpst
385; CHECK-NEXT:    vldrbt.u32 q0, [r0]
386; CHECK-NEXT:    bx lr
387entry:
388  %0 = bitcast i8* %base to <4 x i8>*
389  %1 = zext i16 %p to i32
390  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
391  %3 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %2, <4 x i8> zeroinitializer)
392  %4 = zext <4 x i8> %3 to <4 x i32>
393  ret <4 x i32> %4
394}
395
396define arm_aapcs_vfpcc <8 x half> @test_vldrhq_f16(half* %base) {
397; CHECK-LABEL: test_vldrhq_f16:
398; CHECK:       @ %bb.0: @ %entry
399; CHECK-NEXT:    vldrh.u16 q0, [r0]
400; CHECK-NEXT:    bx lr
401entry:
402  %0 = bitcast half* %base to <8 x half>*
403  %1 = load <8 x half>, <8 x half>* %0, align 2
404  ret <8 x half> %1
405}
406
407define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_s16(i16* %base) {
408; CHECK-LABEL: test_vldrhq_s16:
409; CHECK:       @ %bb.0: @ %entry
410; CHECK-NEXT:    vldrh.u16 q0, [r0]
411; CHECK-NEXT:    bx lr
412entry:
413  %0 = bitcast i16* %base to <8 x i16>*
414  %1 = load <8 x i16>, <8 x i16>* %0, align 2
415  ret <8 x i16> %1
416}
417
418define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_s32(i16* %base) {
419; CHECK-LABEL: test_vldrhq_s32:
420; CHECK:       @ %bb.0: @ %entry
421; CHECK-NEXT:    vldrh.s32 q0, [r0]
422; CHECK-NEXT:    bx lr
423entry:
424  %0 = bitcast i16* %base to <4 x i16>*
425  %1 = load <4 x i16>, <4 x i16>* %0, align 2
426  %2 = sext <4 x i16> %1 to <4 x i32>
427  ret <4 x i32> %2
428}
429
430define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_u16(i16* %base) {
431; CHECK-LABEL: test_vldrhq_u16:
432; CHECK:       @ %bb.0: @ %entry
433; CHECK-NEXT:    vldrh.u16 q0, [r0]
434; CHECK-NEXT:    bx lr
435entry:
436  %0 = bitcast i16* %base to <8 x i16>*
437  %1 = load <8 x i16>, <8 x i16>* %0, align 2
438  ret <8 x i16> %1
439}
440
441define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_u32(i16* %base) {
442; CHECK-LABEL: test_vldrhq_u32:
443; CHECK:       @ %bb.0: @ %entry
444; CHECK-NEXT:    vldrh.u32 q0, [r0]
445; CHECK-NEXT:    bx lr
446entry:
447  %0 = bitcast i16* %base to <4 x i16>*
448  %1 = load <4 x i16>, <4 x i16>* %0, align 2
449  %2 = zext <4 x i16> %1 to <4 x i32>
450  ret <4 x i32> %2
451}
452
453define arm_aapcs_vfpcc <8 x half> @test_vldrhq_z_f16(half* %base, i16 zeroext %p) {
454; CHECK-LABEL: test_vldrhq_z_f16:
455; CHECK:       @ %bb.0: @ %entry
456; CHECK-NEXT:    vmsr p0, r1
457; CHECK-NEXT:    vpst
458; CHECK-NEXT:    vldrht.u16 q0, [r0]
459; CHECK-NEXT:    bx lr
460entry:
461  %0 = bitcast half* %base to <8 x half>*
462  %1 = zext i16 %p to i32
463  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
464  %3 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %2, <8 x half> zeroinitializer)
465  ret <8 x half> %3
466}
467
468define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_s16(i16* %base, i16 zeroext %p) {
469; CHECK-LABEL: test_vldrhq_z_s16:
470; CHECK:       @ %bb.0: @ %entry
471; CHECK-NEXT:    vmsr p0, r1
472; CHECK-NEXT:    vpst
473; CHECK-NEXT:    vldrht.u16 q0, [r0]
474; CHECK-NEXT:    bx lr
475entry:
476  %0 = bitcast i16* %base to <8 x i16>*
477  %1 = zext i16 %p to i32
478  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
479  %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
480  ret <8 x i16> %3
481}
482
483define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_s32(i16* %base, i16 zeroext %p) {
484; CHECK-LABEL: test_vldrhq_z_s32:
485; CHECK:       @ %bb.0: @ %entry
486; CHECK-NEXT:    vmsr p0, r1
487; CHECK-NEXT:    vpst
488; CHECK-NEXT:    vldrht.s32 q0, [r0]
489; CHECK-NEXT:    bx lr
490entry:
491  %0 = bitcast i16* %base to <4 x i16>*
492  %1 = zext i16 %p to i32
493  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
494  %3 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %2, <4 x i16> zeroinitializer)
495  %4 = sext <4 x i16> %3 to <4 x i32>
496  ret <4 x i32> %4
497}
498
499declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
500
501define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_u16(i16* %base, i16 zeroext %p) {
502; CHECK-LABEL: test_vldrhq_z_u16:
503; CHECK:       @ %bb.0: @ %entry
504; CHECK-NEXT:    vmsr p0, r1
505; CHECK-NEXT:    vpst
506; CHECK-NEXT:    vldrht.u16 q0, [r0]
507; CHECK-NEXT:    bx lr
508entry:
509  %0 = bitcast i16* %base to <8 x i16>*
510  %1 = zext i16 %p to i32
511  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
512  %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
513  ret <8 x i16> %3
514}
515
516define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_u32(i16* %base, i16 zeroext %p) {
517; CHECK-LABEL: test_vldrhq_z_u32:
518; CHECK:       @ %bb.0: @ %entry
519; CHECK-NEXT:    vmsr p0, r1
520; CHECK-NEXT:    vpst
521; CHECK-NEXT:    vldrht.u32 q0, [r0]
522; CHECK-NEXT:    bx lr
523entry:
524  %0 = bitcast i16* %base to <4 x i16>*
525  %1 = zext i16 %p to i32
526  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
527  %3 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %2, <4 x i16> zeroinitializer)
528  %4 = zext <4 x i16> %3 to <4 x i32>
529  ret <4 x i32> %4
530}
531
532define arm_aapcs_vfpcc <4 x float> @test_vldrwq_f32(float* %base) {
533; CHECK-LABEL: test_vldrwq_f32:
534; CHECK:       @ %bb.0: @ %entry
535; CHECK-NEXT:    vldrw.u32 q0, [r0]
536; CHECK-NEXT:    bx lr
537entry:
538  %0 = bitcast float* %base to <4 x float>*
539  %1 = load <4 x float>, <4 x float>* %0, align 4
540  ret <4 x float> %1
541}
542
543define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_s32(i32* %base) {
544; CHECK-LABEL: test_vldrwq_s32:
545; CHECK:       @ %bb.0: @ %entry
546; CHECK-NEXT:    vldrw.u32 q0, [r0]
547; CHECK-NEXT:    bx lr
548entry:
549  %0 = bitcast i32* %base to <4 x i32>*
550  %1 = load <4 x i32>, <4 x i32>* %0, align 4
551  ret <4 x i32> %1
552}
553
554define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_u32(i32* %base) {
555; CHECK-LABEL: test_vldrwq_u32:
556; CHECK:       @ %bb.0: @ %entry
557; CHECK-NEXT:    vldrw.u32 q0, [r0]
558; CHECK-NEXT:    bx lr
559entry:
560  %0 = bitcast i32* %base to <4 x i32>*
561  %1 = load <4 x i32>, <4 x i32>* %0, align 4
562  ret <4 x i32> %1
563}
564
565define arm_aapcs_vfpcc <4 x float> @test_vldrwq_z_f32(float* %base, i16 zeroext %p) {
566; CHECK-LABEL: test_vldrwq_z_f32:
567; CHECK:       @ %bb.0: @ %entry
568; CHECK-NEXT:    vmsr p0, r1
569; CHECK-NEXT:    vpst
570; CHECK-NEXT:    vldrwt.u32 q0, [r0]
571; CHECK-NEXT:    bx lr
572entry:
573  %0 = bitcast float* %base to <4 x float>*
574  %1 = zext i16 %p to i32
575  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
576  %3 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %2, <4 x float> zeroinitializer)
577  ret <4 x float> %3
578}
579
580define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_s32(i32* %base, i16 zeroext %p) {
581; CHECK-LABEL: test_vldrwq_z_s32:
582; CHECK:       @ %bb.0: @ %entry
583; CHECK-NEXT:    vmsr p0, r1
584; CHECK-NEXT:    vpst
585; CHECK-NEXT:    vldrwt.u32 q0, [r0]
586; CHECK-NEXT:    bx lr
587entry:
588  %0 = bitcast i32* %base to <4 x i32>*
589  %1 = zext i16 %p to i32
590  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
591  %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
592  ret <4 x i32> %3
593}
594
595define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_u32(i32* %base, i16 zeroext %p) {
596; CHECK-LABEL: test_vldrwq_z_u32:
597; CHECK:       @ %bb.0: @ %entry
598; CHECK-NEXT:    vmsr p0, r1
599; CHECK-NEXT:    vpst
600; CHECK-NEXT:    vldrwt.u32 q0, [r0]
601; CHECK-NEXT:    bx lr
602entry:
603  %0 = bitcast i32* %base to <4 x i32>*
604  %1 = zext i16 %p to i32
605  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
606  %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
607  ret <4 x i32> %3
608}
609
610define arm_aapcs_vfpcc void @test_vst1q_f16(half* %base, <8 x half> %value) {
611; CHECK-LABEL: test_vst1q_f16:
612; CHECK:       @ %bb.0: @ %entry
613; CHECK-NEXT:    vstrh.16 q0, [r0]
614; CHECK-NEXT:    bx lr
615entry:
616  %0 = bitcast half* %base to <8 x half>*
617  store <8 x half> %value, <8 x half>* %0, align 2
618  ret void
619}
620
621define arm_aapcs_vfpcc void @test_vst1q_f32(float* %base, <4 x float> %value) {
622; CHECK-LABEL: test_vst1q_f32:
623; CHECK:       @ %bb.0: @ %entry
624; CHECK-NEXT:    vstrw.32 q0, [r0]
625; CHECK-NEXT:    bx lr
626entry:
627  %0 = bitcast float* %base to <4 x float>*
628  store <4 x float> %value, <4 x float>* %0, align 4
629  ret void
630}
631
632define arm_aapcs_vfpcc void @test_vst1q_s8(i8* %base, <16 x i8> %value) {
633; CHECK-LABEL: test_vst1q_s8:
634; CHECK:       @ %bb.0: @ %entry
635; CHECK-NEXT:    vstrb.8 q0, [r0]
636; CHECK-NEXT:    bx lr
637entry:
638  %0 = bitcast i8* %base to <16 x i8>*
639  store <16 x i8> %value, <16 x i8>* %0, align 1
640  ret void
641}
642
643define arm_aapcs_vfpcc void @test_vst1q_s16(i16* %base, <8 x i16> %value) {
644; CHECK-LABEL: test_vst1q_s16:
645; CHECK:       @ %bb.0: @ %entry
646; CHECK-NEXT:    vstrh.16 q0, [r0]
647; CHECK-NEXT:    bx lr
648entry:
649  %0 = bitcast i16* %base to <8 x i16>*
650  store <8 x i16> %value, <8 x i16>* %0, align 2
651  ret void
652}
653
654define arm_aapcs_vfpcc void @test_vst1q_s32(i32* %base, <4 x i32> %value) {
655; CHECK-LABEL: test_vst1q_s32:
656; CHECK:       @ %bb.0: @ %entry
657; CHECK-NEXT:    vstrw.32 q0, [r0]
658; CHECK-NEXT:    bx lr
659entry:
660  %0 = bitcast i32* %base to <4 x i32>*
661  store <4 x i32> %value, <4 x i32>* %0, align 4
662  ret void
663}
664
665define arm_aapcs_vfpcc void @test_vst1q_u8(i8* %base, <16 x i8> %value) {
666; CHECK-LABEL: test_vst1q_u8:
667; CHECK:       @ %bb.0: @ %entry
668; CHECK-NEXT:    vstrb.8 q0, [r0]
669; CHECK-NEXT:    bx lr
670entry:
671  %0 = bitcast i8* %base to <16 x i8>*
672  store <16 x i8> %value, <16 x i8>* %0, align 1
673  ret void
674}
675
676define arm_aapcs_vfpcc void @test_vst1q_u16(i16* %base, <8 x i16> %value) {
677; CHECK-LABEL: test_vst1q_u16:
678; CHECK:       @ %bb.0: @ %entry
679; CHECK-NEXT:    vstrh.16 q0, [r0]
680; CHECK-NEXT:    bx lr
681entry:
682  %0 = bitcast i16* %base to <8 x i16>*
683  store <8 x i16> %value, <8 x i16>* %0, align 2
684  ret void
685}
686
687define arm_aapcs_vfpcc void @test_vst1q_u32(i32* %base, <4 x i32> %value) {
688; CHECK-LABEL: test_vst1q_u32:
689; CHECK:       @ %bb.0: @ %entry
690; CHECK-NEXT:    vstrw.32 q0, [r0]
691; CHECK-NEXT:    bx lr
692entry:
693  %0 = bitcast i32* %base to <4 x i32>*
694  store <4 x i32> %value, <4 x i32>* %0, align 4
695  ret void
696}
697
698define arm_aapcs_vfpcc void @test_vst1q_p_f16(half* %base, <8 x half> %value, i16 zeroext %p) {
699; CHECK-LABEL: test_vst1q_p_f16:
700; CHECK:       @ %bb.0: @ %entry
701; CHECK-NEXT:    vmsr p0, r1
702; CHECK-NEXT:    vpst
703; CHECK-NEXT:    vstrht.16 q0, [r0]
704; CHECK-NEXT:    bx lr
705entry:
706  %0 = bitcast half* %base to <8 x half>*
707  %1 = zext i16 %p to i32
708  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
709  call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %value, <8 x half>* %0, i32 2, <8 x i1> %2)
710  ret void
711}
712
713declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32 immarg, <8 x i1>)
714
715define arm_aapcs_vfpcc void @test_vst1q_p_f32(float* %base, <4 x float> %value, i16 zeroext %p) {
716; CHECK-LABEL: test_vst1q_p_f32:
717; CHECK:       @ %bb.0: @ %entry
718; CHECK-NEXT:    vmsr p0, r1
719; CHECK-NEXT:    vpst
720; CHECK-NEXT:    vstrwt.32 q0, [r0]
721; CHECK-NEXT:    bx lr
722entry:
723  %0 = bitcast float* %base to <4 x float>*
724  %1 = zext i16 %p to i32
725  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
726  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %value, <4 x float>* %0, i32 4, <4 x i1> %2)
727  ret void
728}
729
730declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
731
732define arm_aapcs_vfpcc void @test_vst1q_p_s8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
733; CHECK-LABEL: test_vst1q_p_s8:
734; CHECK:       @ %bb.0: @ %entry
735; CHECK-NEXT:    vmsr p0, r1
736; CHECK-NEXT:    vpst
737; CHECK-NEXT:    vstrbt.8 q0, [r0]
738; CHECK-NEXT:    bx lr
739entry:
740  %0 = bitcast i8* %base to <16 x i8>*
741  %1 = zext i16 %p to i32
742  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
743  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
744  ret void
745}
746
747declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
748
749define arm_aapcs_vfpcc void @test_vst1q_p_s16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
750; CHECK-LABEL: test_vst1q_p_s16:
751; CHECK:       @ %bb.0: @ %entry
752; CHECK-NEXT:    vmsr p0, r1
753; CHECK-NEXT:    vpst
754; CHECK-NEXT:    vstrht.16 q0, [r0]
755; CHECK-NEXT:    bx lr
756entry:
757  %0 = bitcast i16* %base to <8 x i16>*
758  %1 = zext i16 %p to i32
759  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
760  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
761  ret void
762}
763
764declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
765
766define arm_aapcs_vfpcc void @test_vst1q_p_s32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
767; CHECK-LABEL: test_vst1q_p_s32:
768; CHECK:       @ %bb.0: @ %entry
769; CHECK-NEXT:    vmsr p0, r1
770; CHECK-NEXT:    vpst
771; CHECK-NEXT:    vstrwt.32 q0, [r0]
772; CHECK-NEXT:    bx lr
773entry:
774  %0 = bitcast i32* %base to <4 x i32>*
775  %1 = zext i16 %p to i32
776  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
777  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)
778  ret void
779}
780
781declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
782
783define arm_aapcs_vfpcc void @test_vst1q_p_u8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
784; CHECK-LABEL: test_vst1q_p_u8:
785; CHECK:       @ %bb.0: @ %entry
786; CHECK-NEXT:    vmsr p0, r1
787; CHECK-NEXT:    vpst
788; CHECK-NEXT:    vstrbt.8 q0, [r0]
789; CHECK-NEXT:    bx lr
790entry:
791  %0 = bitcast i8* %base to <16 x i8>*
792  %1 = zext i16 %p to i32
793  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
794  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
795  ret void
796}
797
798define arm_aapcs_vfpcc void @test_vst1q_p_u16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
799; CHECK-LABEL: test_vst1q_p_u16:
800; CHECK:       @ %bb.0: @ %entry
801; CHECK-NEXT:    vmsr p0, r1
802; CHECK-NEXT:    vpst
803; CHECK-NEXT:    vstrht.16 q0, [r0]
804; CHECK-NEXT:    bx lr
805entry:
806  %0 = bitcast i16* %base to <8 x i16>*
807  %1 = zext i16 %p to i32
808  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
809  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
810  ret void
811}
812
813define arm_aapcs_vfpcc void @test_vst1q_p_u32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
814; CHECK-LABEL: test_vst1q_p_u32:
815; CHECK:       @ %bb.0: @ %entry
816; CHECK-NEXT:    vmsr p0, r1
817; CHECK-NEXT:    vpst
818; CHECK-NEXT:    vstrwt.32 q0, [r0]
819; CHECK-NEXT:    bx lr
820entry:
821  %0 = bitcast i32* %base to <4 x i32>*
822  %1 = zext i16 %p to i32
823  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
824  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)
825  ret void
826}
827
828define arm_aapcs_vfpcc void @test_vstrbq_s8(i8* %base, <16 x i8> %value) {
829; CHECK-LABEL: test_vstrbq_s8:
830; CHECK:       @ %bb.0: @ %entry
831; CHECK-NEXT:    vstrb.8 q0, [r0]
832; CHECK-NEXT:    bx lr
833entry:
834  %0 = bitcast i8* %base to <16 x i8>*
835  store <16 x i8> %value, <16 x i8>* %0, align 1
836  ret void
837}
838
839define arm_aapcs_vfpcc void @test_vstrbq_s16(i8* %base, <8 x i16> %value) {
840; CHECK-LABEL: test_vstrbq_s16:
841; CHECK:       @ %bb.0: @ %entry
842; CHECK-NEXT:    vstrb.16 q0, [r0]
843; CHECK-NEXT:    bx lr
844entry:
845  %0 = trunc <8 x i16> %value to <8 x i8>
846  %1 = bitcast i8* %base to <8 x i8>*
847  store <8 x i8> %0, <8 x i8>* %1, align 1
848  ret void
849}
850
851define arm_aapcs_vfpcc void @test_vstrbq_s32(i8* %base, <4 x i32> %value) {
852; CHECK-LABEL: test_vstrbq_s32:
853; CHECK:       @ %bb.0: @ %entry
854; CHECK-NEXT:    vstrb.32 q0, [r0]
855; CHECK-NEXT:    bx lr
856entry:
857  %0 = trunc <4 x i32> %value to <4 x i8>
858  %1 = bitcast i8* %base to <4 x i8>*
859  store <4 x i8> %0, <4 x i8>* %1, align 1
860  ret void
861}
862
863define arm_aapcs_vfpcc void @test_vstrbq_u8(i8* %base, <16 x i8> %value) {
864; CHECK-LABEL: test_vstrbq_u8:
865; CHECK:       @ %bb.0: @ %entry
866; CHECK-NEXT:    vstrb.8 q0, [r0]
867; CHECK-NEXT:    bx lr
868entry:
869  %0 = bitcast i8* %base to <16 x i8>*
870  store <16 x i8> %value, <16 x i8>* %0, align 1
871  ret void
872}
873
874define arm_aapcs_vfpcc void @test_vstrbq_u16(i8* %base, <8 x i16> %value) {
875; CHECK-LABEL: test_vstrbq_u16:
876; CHECK:       @ %bb.0: @ %entry
877; CHECK-NEXT:    vstrb.16 q0, [r0]
878; CHECK-NEXT:    bx lr
879entry:
880  %0 = trunc <8 x i16> %value to <8 x i8>
881  %1 = bitcast i8* %base to <8 x i8>*
882  store <8 x i8> %0, <8 x i8>* %1, align 1
883  ret void
884}
885
886define arm_aapcs_vfpcc void @test_vstrbq_u32(i8* %base, <4 x i32> %value) {
887; CHECK-LABEL: test_vstrbq_u32:
888; CHECK:       @ %bb.0: @ %entry
889; CHECK-NEXT:    vstrb.32 q0, [r0]
890; CHECK-NEXT:    bx lr
891entry:
892  %0 = trunc <4 x i32> %value to <4 x i8>
893  %1 = bitcast i8* %base to <4 x i8>*
894  store <4 x i8> %0, <4 x i8>* %1, align 1
895  ret void
896}
897
898define arm_aapcs_vfpcc void @test_vstrbq_p_s8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
899; CHECK-LABEL: test_vstrbq_p_s8:
900; CHECK:       @ %bb.0: @ %entry
901; CHECK-NEXT:    vmsr p0, r1
902; CHECK-NEXT:    vpst
903; CHECK-NEXT:    vstrbt.8 q0, [r0]
904; CHECK-NEXT:    bx lr
905entry:
906  %0 = bitcast i8* %base to <16 x i8>*
907  %1 = zext i16 %p to i32
908  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
909  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
910  ret void
911}
912
913define arm_aapcs_vfpcc void @test_vstrbq_p_s16(i8* %base, <8 x i16> %value, i16 zeroext %p) {
914; CHECK-LABEL: test_vstrbq_p_s16:
915; CHECK:       @ %bb.0: @ %entry
916; CHECK-NEXT:    vmsr p0, r1
917; CHECK-NEXT:    vpst
918; CHECK-NEXT:    vstrbt.16 q0, [r0]
919; CHECK-NEXT:    bx lr
920entry:
921  %0 = trunc <8 x i16> %value to <8 x i8>
922  %1 = bitcast i8* %base to <8 x i8>*
923  %2 = zext i16 %p to i32
924  %3 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
925  call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %0, <8 x i8>* %1, i32 1, <8 x i1> %3)
926  ret void
927}
928
929declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32 immarg, <8 x i1>)
930
931define arm_aapcs_vfpcc void @test_vstrbq_p_s32(i8* %base, <4 x i32> %value, i16 zeroext %p) {
932; CHECK-LABEL: test_vstrbq_p_s32:
933; CHECK:       @ %bb.0: @ %entry
934; CHECK-NEXT:    vmsr p0, r1
935; CHECK-NEXT:    vpst
936; CHECK-NEXT:    vstrbt.32 q0, [r0]
937; CHECK-NEXT:    bx lr
938entry:
939  %0 = trunc <4 x i32> %value to <4 x i8>
940  %1 = bitcast i8* %base to <4 x i8>*
941  %2 = zext i16 %p to i32
942  %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
943  call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %0, <4 x i8>* %1, i32 1, <4 x i1> %3)
944  ret void
945}
946
947declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32 immarg, <4 x i1>)
948
949define arm_aapcs_vfpcc void @test_vstrbq_p_u8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
950; CHECK-LABEL: test_vstrbq_p_u8:
951; CHECK:       @ %bb.0: @ %entry
952; CHECK-NEXT:    vmsr p0, r1
953; CHECK-NEXT:    vpst
954; CHECK-NEXT:    vstrbt.8 q0, [r0]
955; CHECK-NEXT:    bx lr
956entry:
957  %0 = bitcast i8* %base to <16 x i8>*
958  %1 = zext i16 %p to i32
959  %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
960  call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
961  ret void
962}
963
964define arm_aapcs_vfpcc void @test_vstrbq_p_u16(i8* %base, <8 x i16> %value, i16 zeroext %p) {
965; CHECK-LABEL: test_vstrbq_p_u16:
966; CHECK:       @ %bb.0: @ %entry
967; CHECK-NEXT:    vmsr p0, r1
968; CHECK-NEXT:    vpst
969; CHECK-NEXT:    vstrbt.16 q0, [r0]
970; CHECK-NEXT:    bx lr
971entry:
972  %0 = trunc <8 x i16> %value to <8 x i8>
973  %1 = bitcast i8* %base to <8 x i8>*
974  %2 = zext i16 %p to i32
975  %3 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
976  call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %0, <8 x i8>* %1, i32 1, <8 x i1> %3)
977  ret void
978}
979
980define arm_aapcs_vfpcc void @test_vstrbq_p_u32(i8* %base, <4 x i32> %value, i16 zeroext %p) {
981; CHECK-LABEL: test_vstrbq_p_u32:
982; CHECK:       @ %bb.0: @ %entry
983; CHECK-NEXT:    vmsr p0, r1
984; CHECK-NEXT:    vpst
985; CHECK-NEXT:    vstrbt.32 q0, [r0]
986; CHECK-NEXT:    bx lr
987entry:
988  %0 = trunc <4 x i32> %value to <4 x i8>
989  %1 = bitcast i8* %base to <4 x i8>*
990  %2 = zext i16 %p to i32
991  %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
992  call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %0, <4 x i8>* %1, i32 1, <4 x i1> %3)
993  ret void
994}
995
996define arm_aapcs_vfpcc void @test_vstrhq_f16(half* %base, <8 x half> %value) {
997; CHECK-LABEL: test_vstrhq_f16:
998; CHECK:       @ %bb.0: @ %entry
999; CHECK-NEXT:    vstrh.16 q0, [r0]
1000; CHECK-NEXT:    bx lr
1001entry:
1002  %0 = bitcast half* %base to <8 x half>*
1003  store <8 x half> %value, <8 x half>* %0, align 2
1004  ret void
1005}
1006
1007define arm_aapcs_vfpcc void @test_vstrhq_s16(i16* %base, <8 x i16> %value) {
1008; CHECK-LABEL: test_vstrhq_s16:
1009; CHECK:       @ %bb.0: @ %entry
1010; CHECK-NEXT:    vstrh.16 q0, [r0]
1011; CHECK-NEXT:    bx lr
1012entry:
1013  %0 = bitcast i16* %base to <8 x i16>*
1014  store <8 x i16> %value, <8 x i16>* %0, align 2
1015  ret void
1016}
1017
1018define arm_aapcs_vfpcc void @test_vstrhq_s32(i16* %base, <4 x i32> %value) {
1019; CHECK-LABEL: test_vstrhq_s32:
1020; CHECK:       @ %bb.0: @ %entry
1021; CHECK-NEXT:    vstrh.32 q0, [r0]
1022; CHECK-NEXT:    bx lr
1023entry:
1024  %0 = trunc <4 x i32> %value to <4 x i16>
1025  %1 = bitcast i16* %base to <4 x i16>*
1026  store <4 x i16> %0, <4 x i16>* %1, align 2
1027  ret void
1028}
1029
1030define arm_aapcs_vfpcc void @test_vstrhq_u16(i16* %base, <8 x i16> %value) {
1031; CHECK-LABEL: test_vstrhq_u16:
1032; CHECK:       @ %bb.0: @ %entry
1033; CHECK-NEXT:    vstrh.16 q0, [r0]
1034; CHECK-NEXT:    bx lr
1035entry:
1036  %0 = bitcast i16* %base to <8 x i16>*
1037  store <8 x i16> %value, <8 x i16>* %0, align 2
1038  ret void
1039}
1040
1041define arm_aapcs_vfpcc void @test_vstrhq_u32(i16* %base, <4 x i32> %value) {
1042; CHECK-LABEL: test_vstrhq_u32:
1043; CHECK:       @ %bb.0: @ %entry
1044; CHECK-NEXT:    vstrh.32 q0, [r0]
1045; CHECK-NEXT:    bx lr
1046entry:
1047  %0 = trunc <4 x i32> %value to <4 x i16>
1048  %1 = bitcast i16* %base to <4 x i16>*
1049  store <4 x i16> %0, <4 x i16>* %1, align 2
1050  ret void
1051}
1052
1053define arm_aapcs_vfpcc void @test_vstrhq_p_f16(half* %base, <8 x half> %value, i16 zeroext %p) {
1054; CHECK-LABEL: test_vstrhq_p_f16:
1055; CHECK:       @ %bb.0: @ %entry
1056; CHECK-NEXT:    vmsr p0, r1
1057; CHECK-NEXT:    vpst
1058; CHECK-NEXT:    vstrht.16 q0, [r0]
1059; CHECK-NEXT:    bx lr
1060entry:
1061  %0 = bitcast half* %base to <8 x half>*
1062  %1 = zext i16 %p to i32
1063  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
1064  call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %value, <8 x half>* %0, i32 2, <8 x i1> %2)
1065  ret void
1066}
1067
1068define arm_aapcs_vfpcc void @test_vstrhq_p_s16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
1069; CHECK-LABEL: test_vstrhq_p_s16:
1070; CHECK:       @ %bb.0: @ %entry
1071; CHECK-NEXT:    vmsr p0, r1
1072; CHECK-NEXT:    vpst
1073; CHECK-NEXT:    vstrht.16 q0, [r0]
1074; CHECK-NEXT:    bx lr
1075entry:
1076  %0 = bitcast i16* %base to <8 x i16>*
1077  %1 = zext i16 %p to i32
1078  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
1079  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
1080  ret void
1081}
1082
1083define arm_aapcs_vfpcc void @test_vstrhq_p_s32(i16* %base, <4 x i32> %value, i16 zeroext %p) {
1084; CHECK-LABEL: test_vstrhq_p_s32:
1085; CHECK:       @ %bb.0: @ %entry
1086; CHECK-NEXT:    vmsr p0, r1
1087; CHECK-NEXT:    vpst
1088; CHECK-NEXT:    vstrht.32 q0, [r0]
1089; CHECK-NEXT:    bx lr
1090entry:
1091  %0 = trunc <4 x i32> %value to <4 x i16>
1092  %1 = bitcast i16* %base to <4 x i16>*
1093  %2 = zext i16 %p to i32
1094  %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
1095  call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %0, <4 x i16>* %1, i32 2, <4 x i1> %3)
1096  ret void
1097}
1098
1099declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>)
1100
1101define arm_aapcs_vfpcc void @test_vstrhq_p_u16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
1102; CHECK-LABEL: test_vstrhq_p_u16:
1103; CHECK:       @ %bb.0: @ %entry
1104; CHECK-NEXT:    vmsr p0, r1
1105; CHECK-NEXT:    vpst
1106; CHECK-NEXT:    vstrht.16 q0, [r0]
1107; CHECK-NEXT:    bx lr
1108entry:
1109  %0 = bitcast i16* %base to <8 x i16>*
1110  %1 = zext i16 %p to i32
1111  %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
1112  call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
1113  ret void
1114}
1115
1116define arm_aapcs_vfpcc void @test_vstrhq_p_u32(i16* %base, <4 x i32> %value, i16 zeroext %p) {
1117; CHECK-LABEL: test_vstrhq_p_u32:
1118; CHECK:       @ %bb.0: @ %entry
1119; CHECK-NEXT:    vmsr p0, r1
1120; CHECK-NEXT:    vpst
1121; CHECK-NEXT:    vstrht.32 q0, [r0]
1122; CHECK-NEXT:    bx lr
1123entry:
1124  %0 = trunc <4 x i32> %value to <4 x i16>
1125  %1 = bitcast i16* %base to <4 x i16>*
1126  %2 = zext i16 %p to i32
1127  %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
1128  call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %0, <4 x i16>* %1, i32 2, <4 x i1> %3)
1129  ret void
1130}
1131
1132define arm_aapcs_vfpcc void @test_vstrwq_f32(float* %base, <4 x float> %value) {
1133; CHECK-LABEL: test_vstrwq_f32:
1134; CHECK:       @ %bb.0: @ %entry
1135; CHECK-NEXT:    vstrw.32 q0, [r0]
1136; CHECK-NEXT:    bx lr
1137entry:
1138  %0 = bitcast float* %base to <4 x float>*
1139  store <4 x float> %value, <4 x float>* %0, align 4
1140  ret void
1141}
1142
1143define arm_aapcs_vfpcc void @test_vstrwq_s32(i32* %base, <4 x i32> %value) {
1144; CHECK-LABEL: test_vstrwq_s32:
1145; CHECK:       @ %bb.0: @ %entry
1146; CHECK-NEXT:    vstrw.32 q0, [r0]
1147; CHECK-NEXT:    bx lr
1148entry:
1149  %0 = bitcast i32* %base to <4 x i32>*
1150  store <4 x i32> %value, <4 x i32>* %0, align 4
1151  ret void
1152}
1153
1154define arm_aapcs_vfpcc void @test_vstrwq_u32(i32* %base, <4 x i32> %value) {
1155; CHECK-LABEL: test_vstrwq_u32:
1156; CHECK:       @ %bb.0: @ %entry
1157; CHECK-NEXT:    vstrw.32 q0, [r0]
1158; CHECK-NEXT:    bx lr
1159entry:
1160  %0 = bitcast i32* %base to <4 x i32>*
1161  store <4 x i32> %value, <4 x i32>* %0, align 4
1162  ret void
1163}
1164
1165define arm_aapcs_vfpcc void @test_vstrwq_p_f32(float* %base, <4 x float> %value, i16 zeroext %p) {
1166; CHECK-LABEL: test_vstrwq_p_f32:
1167; CHECK:       @ %bb.0: @ %entry
1168; CHECK-NEXT:    vmsr p0, r1
1169; CHECK-NEXT:    vpst
1170; CHECK-NEXT:    vstrwt.32 q0, [r0]
1171; CHECK-NEXT:    bx lr
1172entry:
1173  %0 = bitcast float* %base to <4 x float>*
1174  %1 = zext i16 %p to i32
1175  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1176  call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %value, <4 x float>* %0, i32 4, <4 x i1> %2)
1177  ret void
1178}
1179
1180define arm_aapcs_vfpcc void @test_vstrwq_p_s32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
1181; CHECK-LABEL: test_vstrwq_p_s32:
1182; CHECK:       @ %bb.0: @ %entry
1183; CHECK-NEXT:    vmsr p0, r1
1184; CHECK-NEXT:    vpst
1185; CHECK-NEXT:    vstrwt.32 q0, [r0]
1186; CHECK-NEXT:    bx lr
1187entry:
1188  %0 = bitcast i32* %base to <4 x i32>*
1189  %1 = zext i16 %p to i32
1190  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1191  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)
1192  ret void
1193}
1194
1195define arm_aapcs_vfpcc void @test_vstrwq_p_u32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
1196; CHECK-LABEL: test_vstrwq_p_u32:
1197; CHECK:       @ %bb.0: @ %entry
1198; CHECK-NEXT:    vmsr p0, r1
1199; CHECK-NEXT:    vpst
1200; CHECK-NEXT:    vstrwt.32 q0, [r0]
1201; CHECK-NEXT:    bx lr
1202entry:
1203  %0 = bitcast i32* %base to <4 x i32>*
1204  %1 = zext i16 %p to i32
1205  %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1206  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)
1207  ret void
1208}
1209