1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
3
4declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
5declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
6declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
7
8declare i32 @llvm.arm.mve.vmldava.v16i8(i32, i32, i32, i32, <16 x i8>, <16 x i8>)
9declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>)
10declare i32 @llvm.arm.mve.vmldava.v4i32(i32, i32, i32, i32, <4 x i32>, <4 x i32>)
11
12declare i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32, i32, i32, i32, <16 x i8>, <16 x i8>, <16 x i1>)
13declare i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32, i32, i32, i32, <8 x i16>, <8 x i16>, <8 x i1>)
14declare i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32, i32, i32, i32, <4 x i32>, <4 x i32>, <4 x i1>)
15
16define arm_aapcs_vfpcc i32 @test_vmladavaq_s8(i32 %a, <16 x i8> %b, <16 x i8> %c) {
17; CHECK-LABEL: test_vmladavaq_s8:
18; CHECK:       @ %bb.0: @ %entry
19; CHECK-NEXT:    vmlava.s8 r0, q0, q1
20; CHECK-NEXT:    bx lr
21entry:
22  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 0, i32 %a, <16 x i8> %b, <16 x i8> %c)
23  ret i32 %0
24}
25
26define arm_aapcs_vfpcc i32 @test_vmladavaq_s16(i32 %a, <8 x i16> %b, <8 x i16> %c) {
27; CHECK-LABEL: test_vmladavaq_s16:
28; CHECK:       @ %bb.0: @ %entry
29; CHECK-NEXT:    vmlava.s16 r0, q0, q1
30; CHECK-NEXT:    bx lr
31entry:
32  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %a, <8 x i16> %b, <8 x i16> %c)
33  ret i32 %0
34}
35
36define arm_aapcs_vfpcc i32 @test_vmladavaq_s32(i32 %a, <4 x i32> %b, <4 x i32> %c) {
37; CHECK-LABEL: test_vmladavaq_s32:
38; CHECK:       @ %bb.0: @ %entry
39; CHECK-NEXT:    vmlava.s32 r0, q0, q1
40; CHECK-NEXT:    bx lr
41entry:
42  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %a, <4 x i32> %b, <4 x i32> %c)
43  ret i32 %0
44}
45
46define arm_aapcs_vfpcc i32 @test_vmladavaq_u8(i32 %a, <16 x i8> %b, <16 x i8> %c) {
47; CHECK-LABEL: test_vmladavaq_u8:
48; CHECK:       @ %bb.0: @ %entry
49; CHECK-NEXT:    vmlava.u8 r0, q0, q1
50; CHECK-NEXT:    bx lr
51entry:
52  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 1, i32 0, i32 0, i32 %a, <16 x i8> %b, <16 x i8> %c)
53  ret i32 %0
54}
55
56define arm_aapcs_vfpcc i32 @test_vmladavaq_u16(i32 %a, <8 x i16> %b, <8 x i16> %c) {
57; CHECK-LABEL: test_vmladavaq_u16:
58; CHECK:       @ %bb.0: @ %entry
59; CHECK-NEXT:    vmlava.u16 r0, q0, q1
60; CHECK-NEXT:    bx lr
61entry:
62  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 1, i32 0, i32 0, i32 %a, <8 x i16> %b, <8 x i16> %c)
63  ret i32 %0
64}
65
66define arm_aapcs_vfpcc i32 @test_vmladavaq_u32(i32 %a, <4 x i32> %b, <4 x i32> %c) {
67; CHECK-LABEL: test_vmladavaq_u32:
68; CHECK:       @ %bb.0: @ %entry
69; CHECK-NEXT:    vmlava.u32 r0, q0, q1
70; CHECK-NEXT:    bx lr
71entry:
72  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 1, i32 0, i32 0, i32 %a, <4 x i32> %b, <4 x i32> %c)
73  ret i32 %0
74}
75
76define arm_aapcs_vfpcc i32 @test_vmladavaxq_s8(i32 %a, <16 x i8> %b, <16 x i8> %c) {
77; CHECK-LABEL: test_vmladavaxq_s8:
78; CHECK:       @ %bb.0: @ %entry
79; CHECK-NEXT:    vmladavax.s8 r0, q0, q1
80; CHECK-NEXT:    bx lr
81entry:
82  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 1, i32 %a, <16 x i8> %b, <16 x i8> %c)
83  ret i32 %0
84}
85
86define arm_aapcs_vfpcc i32 @test_vmladavaxq_s16(i32 %a, <8 x i16> %b, <8 x i16> %c) {
87; CHECK-LABEL: test_vmladavaxq_s16:
88; CHECK:       @ %bb.0: @ %entry
89; CHECK-NEXT:    vmladavax.s16 r0, q0, q1
90; CHECK-NEXT:    bx lr
91entry:
92  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 1, i32 %a, <8 x i16> %b, <8 x i16> %c)
93  ret i32 %0
94}
95
96define arm_aapcs_vfpcc i32 @test_vmladavaxq_s32(i32 %a, <4 x i32> %b, <4 x i32> %c) {
97; CHECK-LABEL: test_vmladavaxq_s32:
98; CHECK:       @ %bb.0: @ %entry
99; CHECK-NEXT:    vmladavax.s32 r0, q0, q1
100; CHECK-NEXT:    bx lr
101entry:
102  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 1, i32 %a, <4 x i32> %b, <4 x i32> %c)
103  ret i32 %0
104}
105
106define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s8(i32 %a, <16 x i8> %b, <16 x i8> %c) {
107; CHECK-LABEL: test_vmlsdavaq_s8:
108; CHECK:       @ %bb.0: @ %entry
109; CHECK-NEXT:    vmlsdava.s8 r0, q0, q1
110; CHECK-NEXT:    bx lr
111entry:
112  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 0, i32 %a, <16 x i8> %b, <16 x i8> %c)
113  ret i32 %0
114}
115
116define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s16(i32 %a, <8 x i16> %b, <8 x i16> %c) {
117; CHECK-LABEL: test_vmlsdavaq_s16:
118; CHECK:       @ %bb.0: @ %entry
119; CHECK-NEXT:    vmlsdava.s16 r0, q0, q1
120; CHECK-NEXT:    bx lr
121entry:
122  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 0, i32 %a, <8 x i16> %b, <8 x i16> %c)
123  ret i32 %0
124}
125
126define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s32(i32 %a, <4 x i32> %b, <4 x i32> %c) {
127; CHECK-LABEL: test_vmlsdavaq_s32:
128; CHECK:       @ %bb.0: @ %entry
129; CHECK-NEXT:    vmlsdava.s32 r0, q0, q1
130; CHECK-NEXT:    bx lr
131entry:
132  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 0, i32 %a, <4 x i32> %b, <4 x i32> %c)
133  ret i32 %0
134}
135
136define arm_aapcs_vfpcc i32 @test_vmlsdavaxq_s8(i32 %a, <16 x i8> %b, <16 x i8> %c) {
137; CHECK-LABEL: test_vmlsdavaxq_s8:
138; CHECK:       @ %bb.0: @ %entry
139; CHECK-NEXT:    vmlsdavax.s8 r0, q0, q1
140; CHECK-NEXT:    bx lr
141entry:
142  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 1, i32 %a, <16 x i8> %b, <16 x i8> %c)
143  ret i32 %0
144}
145
146define arm_aapcs_vfpcc i32 @test_vmlsdavaxq_s16(i32 %a, <8 x i16> %b, <8 x i16> %c) {
147; CHECK-LABEL: test_vmlsdavaxq_s16:
148; CHECK:       @ %bb.0: @ %entry
149; CHECK-NEXT:    vmlsdavax.s16 r0, q0, q1
150; CHECK-NEXT:    bx lr
151entry:
152  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 1, i32 %a, <8 x i16> %b, <8 x i16> %c)
153  ret i32 %0
154}
155
156define arm_aapcs_vfpcc i32 @test_vmlsdavaxq_s32(i32 %a, <4 x i32> %b, <4 x i32> %c) {
157; CHECK-LABEL: test_vmlsdavaxq_s32:
158; CHECK:       @ %bb.0: @ %entry
159; CHECK-NEXT:    vmlsdavax.s32 r0, q0, q1
160; CHECK-NEXT:    bx lr
161entry:
162  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 1, i32 %a, <4 x i32> %b, <4 x i32> %c)
163  ret i32 %0
164}
165
166define arm_aapcs_vfpcc i32 @test_vmladavaq_p_s8(i32 %a, <16 x i8> %b, <16 x i8> %c, i16 zeroext %p) {
167; CHECK-LABEL: test_vmladavaq_p_s8:
168; CHECK:       @ %bb.0: @ %entry
169; CHECK-NEXT:    vmsr p0, r1
170; CHECK-NEXT:    vpst
171; CHECK-NEXT:    vmlavat.s8 r0, q0, q1
172; CHECK-NEXT:    bx lr
173entry:
174  %0 = zext i16 %p to i32
175  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
176  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %1)
177  ret i32 %2
178}
179
180define arm_aapcs_vfpcc i32 @test_vmladavaq_p_s16(i32 %a, <8 x i16> %b, <8 x i16> %c, i16 zeroext %p) {
181; CHECK-LABEL: test_vmladavaq_p_s16:
182; CHECK:       @ %bb.0: @ %entry
183; CHECK-NEXT:    vmsr p0, r1
184; CHECK-NEXT:    vpst
185; CHECK-NEXT:    vmlavat.s16 r0, q0, q1
186; CHECK-NEXT:    bx lr
187entry:
188  %0 = zext i16 %p to i32
189  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
190  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %1)
191  ret i32 %2
192}
193
194define arm_aapcs_vfpcc i32 @test_vmladavaq_p_s32(i32 %a, <4 x i32> %b, <4 x i32> %c, i16 zeroext %p) {
195; CHECK-LABEL: test_vmladavaq_p_s32:
196; CHECK:       @ %bb.0: @ %entry
197; CHECK-NEXT:    vmsr p0, r1
198; CHECK-NEXT:    vpst
199; CHECK-NEXT:    vmlavat.s32 r0, q0, q1
200; CHECK-NEXT:    bx lr
201entry:
202  %0 = zext i16 %p to i32
203  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
204  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %1)
205  ret i32 %2
206}
207
208define arm_aapcs_vfpcc i32 @test_vmladavaq_p_u8(i32 %a, <16 x i8> %b, <16 x i8> %c, i16 zeroext %p) {
209; CHECK-LABEL: test_vmladavaq_p_u8:
210; CHECK:       @ %bb.0: @ %entry
211; CHECK-NEXT:    vmsr p0, r1
212; CHECK-NEXT:    vpst
213; CHECK-NEXT:    vmlavat.u8 r0, q0, q1
214; CHECK-NEXT:    bx lr
215entry:
216  %0 = zext i16 %p to i32
217  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
218  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 1, i32 0, i32 0, i32 %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %1)
219  ret i32 %2
220}
221
222define arm_aapcs_vfpcc i32 @test_vmladavaq_p_u16(i32 %a, <8 x i16> %b, <8 x i16> %c, i16 zeroext %p) {
223; CHECK-LABEL: test_vmladavaq_p_u16:
224; CHECK:       @ %bb.0: @ %entry
225; CHECK-NEXT:    vmsr p0, r1
226; CHECK-NEXT:    vpst
227; CHECK-NEXT:    vmlavat.u16 r0, q0, q1
228; CHECK-NEXT:    bx lr
229entry:
230  %0 = zext i16 %p to i32
231  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
232  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %1)
233  ret i32 %2
234}
235
236define arm_aapcs_vfpcc i32 @test_vmladavaq_p_u32(i32 %a, <4 x i32> %b, <4 x i32> %c, i16 zeroext %p) {
237; CHECK-LABEL: test_vmladavaq_p_u32:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vmsr p0, r1
240; CHECK-NEXT:    vpst
241; CHECK-NEXT:    vmlavat.u32 r0, q0, q1
242; CHECK-NEXT:    bx lr
243entry:
244  %0 = zext i16 %p to i32
245  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
246  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %1)
247  ret i32 %2
248}
249
250define arm_aapcs_vfpcc i32 @test_vmladavaxq_p_s8(i32 %a, <16 x i8> %b, <16 x i8> %c, i16 zeroext %p) {
251; CHECK-LABEL: test_vmladavaxq_p_s8:
252; CHECK:       @ %bb.0: @ %entry
253; CHECK-NEXT:    vmsr p0, r1
254; CHECK-NEXT:    vpst
255; CHECK-NEXT:    vmladavaxt.s8 r0, q0, q1
256; CHECK-NEXT:    bx lr
257entry:
258  %0 = zext i16 %p to i32
259  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
260  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 1, i32 %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %1)
261  ret i32 %2
262}
263
264define arm_aapcs_vfpcc i32 @test_vmladavaxq_p_s16(i32 %a, <8 x i16> %b, <8 x i16> %c, i16 zeroext %p) {
265; CHECK-LABEL: test_vmladavaxq_p_s16:
266; CHECK:       @ %bb.0: @ %entry
267; CHECK-NEXT:    vmsr p0, r1
268; CHECK-NEXT:    vpst
269; CHECK-NEXT:    vmladavaxt.s16 r0, q0, q1
270; CHECK-NEXT:    bx lr
271entry:
272  %0 = zext i16 %p to i32
273  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
274  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %1)
275  ret i32 %2
276}
277
278define arm_aapcs_vfpcc i32 @test_vmladavaxq_p_s32(i32 %a, <4 x i32> %b, <4 x i32> %c, i16 zeroext %p) {
279; CHECK-LABEL: test_vmladavaxq_p_s32:
280; CHECK:       @ %bb.0: @ %entry
281; CHECK-NEXT:    vmsr p0, r1
282; CHECK-NEXT:    vpst
283; CHECK-NEXT:    vmladavaxt.s32 r0, q0, q1
284; CHECK-NEXT:    bx lr
285entry:
286  %0 = zext i16 %p to i32
287  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
288  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %1)
289  ret i32 %2
290}
291
292define arm_aapcs_vfpcc i32 @test_vmlsdavaq_p_s8(i32 %a, <16 x i8> %b, <16 x i8> %c, i16 zeroext %p) {
293; CHECK-LABEL: test_vmlsdavaq_p_s8:
294; CHECK:       @ %bb.0: @ %entry
295; CHECK-NEXT:    vmsr p0, r1
296; CHECK-NEXT:    vpst
297; CHECK-NEXT:    vmlsdavat.s8 r0, q0, q1
298; CHECK-NEXT:    bx lr
299entry:
300  %0 = zext i16 %p to i32
301  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
302  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 1, i32 0, i32 %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %1)
303  ret i32 %2
304}
305
306define arm_aapcs_vfpcc i32 @test_vmlsdavaq_p_s16(i32 %a, <8 x i16> %b, <8 x i16> %c, i16 zeroext %p) {
307; CHECK-LABEL: test_vmlsdavaq_p_s16:
308; CHECK:       @ %bb.0: @ %entry
309; CHECK-NEXT:    vmsr p0, r1
310; CHECK-NEXT:    vpst
311; CHECK-NEXT:    vmlsdavat.s16 r0, q0, q1
312; CHECK-NEXT:    bx lr
313entry:
314  %0 = zext i16 %p to i32
315  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
316  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %1)
317  ret i32 %2
318}
319
320define arm_aapcs_vfpcc i32 @test_vmlsdavaq_p_s32(i32 %a, <4 x i32> %b, <4 x i32> %c, i16 zeroext %p) {
321; CHECK-LABEL: test_vmlsdavaq_p_s32:
322; CHECK:       @ %bb.0: @ %entry
323; CHECK-NEXT:    vmsr p0, r1
324; CHECK-NEXT:    vpst
325; CHECK-NEXT:    vmlsdavat.s32 r0, q0, q1
326; CHECK-NEXT:    bx lr
327entry:
328  %0 = zext i16 %p to i32
329  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
330  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %1)
331  ret i32 %2
332}
333
334define arm_aapcs_vfpcc i32 @test_vmlsdavaxq_p_s8(i32 %a, <16 x i8> %b, <16 x i8> %c, i16 zeroext %p) {
335; CHECK-LABEL: test_vmlsdavaxq_p_s8:
336; CHECK:       @ %bb.0: @ %entry
337; CHECK-NEXT:    vmsr p0, r1
338; CHECK-NEXT:    vpst
339; CHECK-NEXT:    vmlsdavaxt.s8 r0, q0, q1
340; CHECK-NEXT:    bx lr
341entry:
342  %0 = zext i16 %p to i32
343  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
344  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 1, i32 1, i32 %a, <16 x i8> %b, <16 x i8> %c, <16 x i1> %1)
345  ret i32 %2
346}
347
348define arm_aapcs_vfpcc i32 @test_vmlsdavaxq_p_s16(i32 %a, <8 x i16> %b, <8 x i16> %c, i16 zeroext %p) {
349; CHECK-LABEL: test_vmlsdavaxq_p_s16:
350; CHECK:       @ %bb.0: @ %entry
351; CHECK-NEXT:    vmsr p0, r1
352; CHECK-NEXT:    vpst
353; CHECK-NEXT:    vmlsdavaxt.s16 r0, q0, q1
354; CHECK-NEXT:    bx lr
355entry:
356  %0 = zext i16 %p to i32
357  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
358  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 %a, <8 x i16> %b, <8 x i16> %c, <8 x i1> %1)
359  ret i32 %2
360}
361
362define arm_aapcs_vfpcc i32 @test_vmlsdavaxq_p_s32(i32 %a, <4 x i32> %b, <4 x i32> %c, i16 zeroext %p) {
363; CHECK-LABEL: test_vmlsdavaxq_p_s32:
364; CHECK:       @ %bb.0: @ %entry
365; CHECK-NEXT:    vmsr p0, r1
366; CHECK-NEXT:    vpst
367; CHECK-NEXT:    vmlsdavaxt.s32 r0, q0, q1
368; CHECK-NEXT:    bx lr
369entry:
370  %0 = zext i16 %p to i32
371  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
372  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 %a, <4 x i32> %b, <4 x i32> %c, <4 x i1> %1)
373  ret i32 %2
374}
375
376define arm_aapcs_vfpcc i32 @test_vmladavq_s8(<16 x i8> %a, <16 x i8> %b) {
377; CHECK-LABEL: test_vmladavq_s8:
378; CHECK:       @ %bb.0: @ %entry
379; CHECK-NEXT:    vmlav.s8 r0, q0, q1
380; CHECK-NEXT:    bx lr
381entry:
382  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 0, i32 0, <16 x i8> %a, <16 x i8> %b)
383  ret i32 %0
384}
385
386define arm_aapcs_vfpcc i32 @test_vmladavq_s16(<8 x i16> %a, <8 x i16> %b) {
387; CHECK-LABEL: test_vmladavq_s16:
388; CHECK:       @ %bb.0: @ %entry
389; CHECK-NEXT:    vmlav.s16 r0, q0, q1
390; CHECK-NEXT:    bx lr
391entry:
392  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 0, <8 x i16> %a, <8 x i16> %b)
393  ret i32 %0
394}
395
396define arm_aapcs_vfpcc i32 @test_vmladavq_s32(<4 x i32> %a, <4 x i32> %b) {
397; CHECK-LABEL: test_vmladavq_s32:
398; CHECK:       @ %bb.0: @ %entry
399; CHECK-NEXT:    vmlav.s32 r0, q0, q1
400; CHECK-NEXT:    bx lr
401entry:
402  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 0, <4 x i32> %a, <4 x i32> %b)
403  ret i32 %0
404}
405
406define arm_aapcs_vfpcc i32 @test_vmladavq_u8(<16 x i8> %a, <16 x i8> %b) {
407; CHECK-LABEL: test_vmladavq_u8:
408; CHECK:       @ %bb.0: @ %entry
409; CHECK-NEXT:    vmlav.u8 r0, q0, q1
410; CHECK-NEXT:    bx lr
411entry:
412  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 1, i32 0, i32 0, i32 0, <16 x i8> %a, <16 x i8> %b)
413  ret i32 %0
414}
415
416define arm_aapcs_vfpcc i32 @test_vmladavq_u16(<8 x i16> %a, <8 x i16> %b) {
417; CHECK-LABEL: test_vmladavq_u16:
418; CHECK:       @ %bb.0: @ %entry
419; CHECK-NEXT:    vmlav.u16 r0, q0, q1
420; CHECK-NEXT:    bx lr
421entry:
422  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 1, i32 0, i32 0, i32 0, <8 x i16> %a, <8 x i16> %b)
423  ret i32 %0
424}
425
426define arm_aapcs_vfpcc i32 @test_vmladavq_u32(<4 x i32> %a, <4 x i32> %b) {
427; CHECK-LABEL: test_vmladavq_u32:
428; CHECK:       @ %bb.0: @ %entry
429; CHECK-NEXT:    vmlav.u32 r0, q0, q1
430; CHECK-NEXT:    bx lr
431entry:
432  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 1, i32 0, i32 0, i32 0, <4 x i32> %a, <4 x i32> %b)
433  ret i32 %0
434}
435
436define arm_aapcs_vfpcc i32 @test_vmladavxq_s8(<16 x i8> %a, <16 x i8> %b) {
437; CHECK-LABEL: test_vmladavxq_s8:
438; CHECK:       @ %bb.0: @ %entry
439; CHECK-NEXT:    vmladavx.s8 r0, q0, q1
440; CHECK-NEXT:    bx lr
441entry:
442  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 1, i32 0, <16 x i8> %a, <16 x i8> %b)
443  ret i32 %0
444}
445
446define arm_aapcs_vfpcc i32 @test_vmladavxq_s16(<8 x i16> %a, <8 x i16> %b) {
447; CHECK-LABEL: test_vmladavxq_s16:
448; CHECK:       @ %bb.0: @ %entry
449; CHECK-NEXT:    vmladavx.s16 r0, q0, q1
450; CHECK-NEXT:    bx lr
451entry:
452  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 1, i32 0, <8 x i16> %a, <8 x i16> %b)
453  ret i32 %0
454}
455
456define arm_aapcs_vfpcc i32 @test_vmladavxq_s32(<4 x i32> %a, <4 x i32> %b) {
457; CHECK-LABEL: test_vmladavxq_s32:
458; CHECK:       @ %bb.0: @ %entry
459; CHECK-NEXT:    vmladavx.s32 r0, q0, q1
460; CHECK-NEXT:    bx lr
461entry:
462  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 1, i32 0, <4 x i32> %a, <4 x i32> %b)
463  ret i32 %0
464}
465
466define arm_aapcs_vfpcc i32 @test_vmlsdavq_s8(<16 x i8> %a, <16 x i8> %b) {
467; CHECK-LABEL: test_vmlsdavq_s8:
468; CHECK:       @ %bb.0: @ %entry
469; CHECK-NEXT:    vmlsdav.s8 r0, q0, q1
470; CHECK-NEXT:    bx lr
471entry:
472  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 0, i32 0, <16 x i8> %a, <16 x i8> %b)
473  ret i32 %0
474}
475
476define arm_aapcs_vfpcc i32 @test_vmlsdavq_s16(<8 x i16> %a, <8 x i16> %b) {
477; CHECK-LABEL: test_vmlsdavq_s16:
478; CHECK:       @ %bb.0: @ %entry
479; CHECK-NEXT:    vmlsdav.s16 r0, q0, q1
480; CHECK-NEXT:    bx lr
481entry:
482  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 0, i32 0, <8 x i16> %a, <8 x i16> %b)
483  ret i32 %0
484}
485
486define arm_aapcs_vfpcc i32 @test_vmlsdavq_s32(<4 x i32> %a, <4 x i32> %b) {
487; CHECK-LABEL: test_vmlsdavq_s32:
488; CHECK:       @ %bb.0: @ %entry
489; CHECK-NEXT:    vmlsdav.s32 r0, q0, q1
490; CHECK-NEXT:    bx lr
491entry:
492  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 0, i32 0, <4 x i32> %a, <4 x i32> %b)
493  ret i32 %0
494}
495
496define arm_aapcs_vfpcc i32 @test_vmlsdavxq_s8(<16 x i8> %a, <16 x i8> %b) {
497; CHECK-LABEL: test_vmlsdavxq_s8:
498; CHECK:       @ %bb.0: @ %entry
499; CHECK-NEXT:    vmlsdavx.s8 r0, q0, q1
500; CHECK-NEXT:    bx lr
501entry:
502  %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 1, i32 0, <16 x i8> %a, <16 x i8> %b)
503  ret i32 %0
504}
505
506define arm_aapcs_vfpcc i32 @test_vmlsdavxq_s16(<8 x i16> %a, <8 x i16> %b) {
507; CHECK-LABEL: test_vmlsdavxq_s16:
508; CHECK:       @ %bb.0: @ %entry
509; CHECK-NEXT:    vmlsdavx.s16 r0, q0, q1
510; CHECK-NEXT:    bx lr
511entry:
512  %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 1, i32 0, <8 x i16> %a, <8 x i16> %b)
513  ret i32 %0
514}
515
516define arm_aapcs_vfpcc i32 @test_vmlsdavxq_s32(<4 x i32> %a, <4 x i32> %b) {
517; CHECK-LABEL: test_vmlsdavxq_s32:
518; CHECK:       @ %bb.0: @ %entry
519; CHECK-NEXT:    vmlsdavx.s32 r0, q0, q1
520; CHECK-NEXT:    bx lr
521entry:
522  %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 1, i32 0, <4 x i32> %a, <4 x i32> %b)
523  ret i32 %0
524}
525
526define arm_aapcs_vfpcc i32 @test_vmladavq_p_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
527; CHECK-LABEL: test_vmladavq_p_s8:
528; CHECK:       @ %bb.0: @ %entry
529; CHECK-NEXT:    vmsr p0, r0
530; CHECK-NEXT:    vpst
531; CHECK-NEXT:    vmlavt.s8 r0, q0, q1
532; CHECK-NEXT:    bx lr
533entry:
534  %0 = zext i16 %p to i32
535  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
536  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 0, i32 0, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
537  ret i32 %2
538}
539
540define arm_aapcs_vfpcc i32 @test_vmladavq_p_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
541; CHECK-LABEL: test_vmladavq_p_s16:
542; CHECK:       @ %bb.0: @ %entry
543; CHECK-NEXT:    vmsr p0, r0
544; CHECK-NEXT:    vpst
545; CHECK-NEXT:    vmlavt.s16 r0, q0, q1
546; CHECK-NEXT:    bx lr
547entry:
548  %0 = zext i16 %p to i32
549  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
550  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 0, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
551  ret i32 %2
552}
553
554define arm_aapcs_vfpcc i32 @test_vmladavq_p_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
555; CHECK-LABEL: test_vmladavq_p_s32:
556; CHECK:       @ %bb.0: @ %entry
557; CHECK-NEXT:    vmsr p0, r0
558; CHECK-NEXT:    vpst
559; CHECK-NEXT:    vmlavt.s32 r0, q0, q1
560; CHECK-NEXT:    bx lr
561entry:
562  %0 = zext i16 %p to i32
563  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
564  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 0, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
565  ret i32 %2
566}
567
568define arm_aapcs_vfpcc i32 @test_vmladavq_p_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
569; CHECK-LABEL: test_vmladavq_p_u8:
570; CHECK:       @ %bb.0: @ %entry
571; CHECK-NEXT:    vmsr p0, r0
572; CHECK-NEXT:    vpst
573; CHECK-NEXT:    vmlavt.u8 r0, q0, q1
574; CHECK-NEXT:    bx lr
575entry:
576  %0 = zext i16 %p to i32
577  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
578  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 1, i32 0, i32 0, i32 0, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
579  ret i32 %2
580}
581
582define arm_aapcs_vfpcc i32 @test_vmladavq_p_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
583; CHECK-LABEL: test_vmladavq_p_u16:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    vmsr p0, r0
586; CHECK-NEXT:    vpst
587; CHECK-NEXT:    vmlavt.u16 r0, q0, q1
588; CHECK-NEXT:    bx lr
589entry:
590  %0 = zext i16 %p to i32
591  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
592  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 0, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
593  ret i32 %2
594}
595
596define arm_aapcs_vfpcc i32 @test_vmladavq_p_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
597; CHECK-LABEL: test_vmladavq_p_u32:
598; CHECK:       @ %bb.0: @ %entry
599; CHECK-NEXT:    vmsr p0, r0
600; CHECK-NEXT:    vpst
601; CHECK-NEXT:    vmlavt.u32 r0, q0, q1
602; CHECK-NEXT:    bx lr
603entry:
604  %0 = zext i16 %p to i32
605  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
606  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 0, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
607  ret i32 %2
608}
609
610define arm_aapcs_vfpcc i32 @test_vmladavxq_p_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
611; CHECK-LABEL: test_vmladavxq_p_s8:
612; CHECK:       @ %bb.0: @ %entry
613; CHECK-NEXT:    vmsr p0, r0
614; CHECK-NEXT:    vpst
615; CHECK-NEXT:    vmladavxt.s8 r0, q0, q1
616; CHECK-NEXT:    bx lr
617entry:
618  %0 = zext i16 %p to i32
619  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
620  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 0, i32 1, i32 0, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
621  ret i32 %2
622}
623
624define arm_aapcs_vfpcc i32 @test_vmladavxq_p_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
625; CHECK-LABEL: test_vmladavxq_p_s16:
626; CHECK:       @ %bb.0: @ %entry
627; CHECK-NEXT:    vmsr p0, r0
628; CHECK-NEXT:    vpst
629; CHECK-NEXT:    vmladavxt.s16 r0, q0, q1
630; CHECK-NEXT:    bx lr
631entry:
632  %0 = zext i16 %p to i32
633  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
634  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 0, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
635  ret i32 %2
636}
637
638define arm_aapcs_vfpcc i32 @test_vmladavxq_p_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
639; CHECK-LABEL: test_vmladavxq_p_s32:
640; CHECK:       @ %bb.0: @ %entry
641; CHECK-NEXT:    vmsr p0, r0
642; CHECK-NEXT:    vpst
643; CHECK-NEXT:    vmladavxt.s32 r0, q0, q1
644; CHECK-NEXT:    bx lr
645entry:
646  %0 = zext i16 %p to i32
647  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
648  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 0, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
649  ret i32 %2
650}
651
652define arm_aapcs_vfpcc i32 @test_vmlsdavq_p_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
653; CHECK-LABEL: test_vmlsdavq_p_s8:
654; CHECK:       @ %bb.0: @ %entry
655; CHECK-NEXT:    vmsr p0, r0
656; CHECK-NEXT:    vpst
657; CHECK-NEXT:    vmlsdavt.s8 r0, q0, q1
658; CHECK-NEXT:    bx lr
659entry:
660  %0 = zext i16 %p to i32
661  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
662  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 1, i32 0, i32 0, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
663  ret i32 %2
664}
665
666define arm_aapcs_vfpcc i32 @test_vmlsdavq_p_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
667; CHECK-LABEL: test_vmlsdavq_p_s16:
668; CHECK:       @ %bb.0: @ %entry
669; CHECK-NEXT:    vmsr p0, r0
670; CHECK-NEXT:    vpst
671; CHECK-NEXT:    vmlsdavt.s16 r0, q0, q1
672; CHECK-NEXT:    bx lr
673entry:
674  %0 = zext i16 %p to i32
675  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
676  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 0, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
677  ret i32 %2
678}
679
680define arm_aapcs_vfpcc i32 @test_vmlsdavq_p_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
681; CHECK-LABEL: test_vmlsdavq_p_s32:
682; CHECK:       @ %bb.0: @ %entry
683; CHECK-NEXT:    vmsr p0, r0
684; CHECK-NEXT:    vpst
685; CHECK-NEXT:    vmlsdavt.s32 r0, q0, q1
686; CHECK-NEXT:    bx lr
687entry:
688  %0 = zext i16 %p to i32
689  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
690  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 0, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
691  ret i32 %2
692}
693
694define arm_aapcs_vfpcc i32 @test_vmlsdavxq_p_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
695; CHECK-LABEL: test_vmlsdavxq_p_s8:
696; CHECK:       @ %bb.0: @ %entry
697; CHECK-NEXT:    vmsr p0, r0
698; CHECK-NEXT:    vpst
699; CHECK-NEXT:    vmlsdavxt.s8 r0, q0, q1
700; CHECK-NEXT:    bx lr
701entry:
702  %0 = zext i16 %p to i32
703  %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
704  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v16i8.v16i1(i32 0, i32 1, i32 1, i32 0, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
705  ret i32 %2
706}
707
708define arm_aapcs_vfpcc i32 @test_vmlsdavxq_p_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
709; CHECK-LABEL: test_vmlsdavxq_p_s16:
710; CHECK:       @ %bb.0: @ %entry
711; CHECK-NEXT:    vmsr p0, r0
712; CHECK-NEXT:    vpst
713; CHECK-NEXT:    vmlsdavxt.s16 r0, q0, q1
714; CHECK-NEXT:    bx lr
715entry:
716  %0 = zext i16 %p to i32
717  %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
718  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 0, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
719  ret i32 %2
720}
721
722define arm_aapcs_vfpcc i32 @test_vmlsdavxq_p_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
723; CHECK-LABEL: test_vmlsdavxq_p_s32:
724; CHECK:       @ %bb.0: @ %entry
725; CHECK-NEXT:    vmsr p0, r0
726; CHECK-NEXT:    vpst
727; CHECK-NEXT:    vmlsdavxt.s32 r0, q0, q1
728; CHECK-NEXT:    bx lr
729entry:
730  %0 = zext i16 %p to i32
731  %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
732  %2 = call i32 @llvm.arm.mve.vmldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 0, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
733  ret i32 %2
734}
735