1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4define arm_aapcs_vfpcc void @reg(<8 x i16> %acc0, <8 x i16> %acc1, i32* nocapture %px, i16 signext %p0) {
5; CHECK-LABEL: reg:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    .save {r4, r6, r7, lr}
8; CHECK-NEXT:    push {r4, r6, r7, lr}
9; CHECK-NEXT:    movw r1, #52428
10; CHECK-NEXT:    vmsr p0, r1
11; CHECK-NEXT:    vpstete
12; CHECK-NEXT:    vaddvt.s16 r12, q1
13; CHECK-NEXT:    vaddve.s16 r2, q1
14; CHECK-NEXT:    vaddvt.s16 r4, q0
15; CHECK-NEXT:    vaddve.s16 r6, q0
16; CHECK-NEXT:    strd r6, r4, [r0]
17; CHECK-NEXT:    strd r2, r12, [r0, #8]
18; CHECK-NEXT:    pop {r4, r6, r7, pc}
19entry:
20  %0 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 13107)
21  %1 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc0, i32 0, <8 x i1> %0)
22  %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 52428)
23  %3 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc0, i32 0, <8 x i1> %2)
24  %4 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc1, i32 0, <8 x i1> %0)
25  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc1, i32 0, <8 x i1> %2)
26  store i32 %1, i32* %px, align 4
27  %arrayidx1 = getelementptr inbounds i32, i32* %px, i32 1
28  store i32 %3, i32* %arrayidx1, align 4
29  %arrayidx2 = getelementptr inbounds i32, i32* %px, i32 2
30  store i32 %4, i32* %arrayidx2, align 4
31  %arrayidx3 = getelementptr inbounds i32, i32* %px, i32 3
32  store i32 %5, i32* %arrayidx3, align 4
33  ret void
34}
35
36
37define arm_aapcs_vfpcc void @const(<8 x i16> %acc0, <8 x i16> %acc1, i32* nocapture %px, i16 signext %p0) {
38; CHECK-LABEL: const:
39; CHECK:       @ %bb.0: @ %entry
40; CHECK-NEXT:    .save {r4, r6, r7, lr}
41; CHECK-NEXT:    push {r4, r6, r7, lr}
42; CHECK-NEXT:    vmsr p0, r1
43; CHECK-NEXT:    vpsttee
44; CHECK-NEXT:    vaddvt.s16 r12, q1
45; CHECK-NEXT:    vaddvt.s16 r2, q0
46; CHECK-NEXT:    vaddve.s16 r4, q1
47; CHECK-NEXT:    vaddve.s16 r6, q0
48; CHECK-NEXT:    stm.w r0, {r2, r6, r12}
49; CHECK-NEXT:    str r4, [r0, #12]
50; CHECK-NEXT:    pop {r4, r6, r7, pc}
51entry:
52  %0 = zext i16 %p0 to i32
53  %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
54  %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc0, i32 0, <8 x i1> %1)
55  %3 = xor i16 %p0, -1
56  %4 = zext i16 %3 to i32
57  %5 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %4)
58  %6 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc0, i32 0, <8 x i1> %5)
59  %7 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc1, i32 0, <8 x i1> %1)
60  %8 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %acc1, i32 0, <8 x i1> %5)
61  store i32 %2, i32* %px, align 4
62  %arrayidx1 = getelementptr inbounds i32, i32* %px, i32 1
63  store i32 %6, i32* %arrayidx1, align 4
64  %arrayidx2 = getelementptr inbounds i32, i32* %px, i32 2
65  store i32 %7, i32* %arrayidx2, align 4
66  %arrayidx3 = getelementptr inbounds i32, i32* %px, i32 3
67  store i32 %8, i32* %arrayidx3, align 4
68  ret void
69}
70
71
72
73define arm_aapcs_vfpcc <4 x i32> @xorvpnot_i32(<4 x i32> %acc0, i16 signext %p0) {
74; CHECK-LABEL: xorvpnot_i32:
75; CHECK:       @ %bb.0: @ %entry
76; CHECK-NEXT:    vmsr p0, r0
77; CHECK-NEXT:    vmov.i32 q1, #0x0
78; CHECK-NEXT:    vpnot
79; CHECK-NEXT:    vpsel q0, q0, q1
80; CHECK-NEXT:    bx lr
81entry:
82  %l3 = xor i16 %p0, -1
83  %l4 = zext i16 %l3 to i32
84  %l5 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l4)
85  %l6 = select <4 x i1> %l5, <4 x i32> %acc0, <4 x i32> zeroinitializer
86  ret <4 x i32> %l6
87}
88
89define arm_aapcs_vfpcc <8 x i16> @xorvpnot_i16(<8 x i16> %acc0, i16 signext %p0) {
90; CHECK-LABEL: xorvpnot_i16:
91; CHECK:       @ %bb.0: @ %entry
92; CHECK-NEXT:    vmsr p0, r0
93; CHECK-NEXT:    vmov.i32 q1, #0x0
94; CHECK-NEXT:    vpnot
95; CHECK-NEXT:    vpsel q0, q0, q1
96; CHECK-NEXT:    bx lr
97entry:
98  %l3 = xor i16 %p0, -1
99  %l4 = zext i16 %l3 to i32
100  %l5 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %l4)
101  %l6 = select <8 x i1> %l5, <8 x i16> %acc0, <8 x i16> zeroinitializer
102  ret <8 x i16> %l6
103}
104
105define arm_aapcs_vfpcc <16 x i8> @xorvpnot_i8(<16 x i8> %acc0, i16 signext %p0) {
106; CHECK-LABEL: xorvpnot_i8:
107; CHECK:       @ %bb.0: @ %entry
108; CHECK-NEXT:    vmsr p0, r0
109; CHECK-NEXT:    vmov.i32 q1, #0x0
110; CHECK-NEXT:    vpnot
111; CHECK-NEXT:    vpsel q0, q0, q1
112; CHECK-NEXT:    bx lr
113entry:
114  %l3 = xor i16 %p0, -1
115  %l4 = zext i16 %l3 to i32
116  %l5 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %l4)
117  %l6 = select <16 x i1> %l5, <16 x i8> %acc0, <16 x i8> zeroinitializer
118  ret <16 x i8> %l6
119}
120
121define arm_aapcs_vfpcc <16 x i8> @xorvpnot_i8_2(<16 x i8> %acc0, i32 %p0) {
122; CHECK-LABEL: xorvpnot_i8_2:
123; CHECK:       @ %bb.0: @ %entry
124; CHECK-NEXT:    vmsr p0, r0
125; CHECK-NEXT:    vmov.i32 q1, #0x0
126; CHECK-NEXT:    vpnot
127; CHECK-NEXT:    vpsel q0, q0, q1
128; CHECK-NEXT:    bx lr
129entry:
130  %l3 = xor i32 %p0, 65535
131  %l5 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %l3)
132  %l6 = select <16 x i1> %l5, <16 x i8> %acc0, <16 x i8> zeroinitializer
133  ret <16 x i8> %l6
134}
135
136
137
138define arm_aapcs_vfpcc i32 @const_mask_1(<4 x i32> %0, <4 x i32> %1, i32 %2) {
139; CHECK-LABEL: const_mask_1:
140; CHECK:       @ %bb.0:
141; CHECK-NEXT:    movs r1, #1
142; CHECK-NEXT:    vmsr p0, r1
143; CHECK-NEXT:    vpsttee
144; CHECK-NEXT:    vaddvat.s32 r0, q0
145; CHECK-NEXT:    vaddvat.s32 r0, q1
146; CHECK-NEXT:    vaddvae.s32 r0, q0
147; CHECK-NEXT:    vaddvae.s32 r0, q1
148; CHECK-NEXT:    bx lr
149  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1)
150  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
151  %6 = add i32 %5, %2
152  %7 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %4)
153  %8 = add i32 %6, %7
154  %9 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 65534)
155  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %9)
156  %11 = add i32 %8, %10
157  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %9)
158  %13 = add i32 %11, %12
159  ret i32 %13
160}
161
162define arm_aapcs_vfpcc i32 @const_mask_not1(<4 x i32> %0, <4 x i32> %1, i32 %2) {
163; CHECK-LABEL: const_mask_not1:
164; CHECK:       @ %bb.0:
165; CHECK-NEXT:    movs r1, #1
166; CHECK-NEXT:    vmsr p0, r1
167; CHECK-NEXT:    vpstt
168; CHECK-NEXT:    vaddvat.s32 r0, q0
169; CHECK-NEXT:    vaddvat.s32 r0, q1
170; CHECK-NEXT:    movw r1, #65533
171; CHECK-NEXT:    vmsr p0, r1
172; CHECK-NEXT:    vpstt
173; CHECK-NEXT:    vaddvat.s32 r0, q0
174; CHECK-NEXT:    vaddvat.s32 r0, q1
175; CHECK-NEXT:    bx lr
176  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1)
177  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
178  %6 = add i32 %5, %2
179  %7 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %4)
180  %8 = add i32 %6, %7
181  %9 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 65533)
182  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %9)
183  %11 = add i32 %8, %10
184  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %9)
185  %13 = add i32 %11, %12
186  ret i32 %13
187}
188
189define arm_aapcs_vfpcc i32 @const_mask_1234(<4 x i32> %0, <4 x i32> %1, i32 %2) {
190; CHECK-LABEL: const_mask_1234:
191; CHECK:       @ %bb.0:
192; CHECK-NEXT:    movw r1, #1234
193; CHECK-NEXT:    vmsr p0, r1
194; CHECK-NEXT:    vpsttee
195; CHECK-NEXT:    vaddvat.s32 r0, q0
196; CHECK-NEXT:    vaddvat.s32 r0, q1
197; CHECK-NEXT:    vaddvae.s32 r0, q0
198; CHECK-NEXT:    vaddvae.s32 r0, q1
199; CHECK-NEXT:    bx lr
200  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1234)
201  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
202  %6 = add i32 %5, %2
203  %7 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %4)
204  %8 = add i32 %6, %7
205  %9 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 64301)
206  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %9)
207  %11 = add i32 %8, %10
208  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %9)
209  %13 = add i32 %11, %12
210  ret i32 %13
211}
212
213define arm_aapcs_vfpcc i32 @const_mask_abab(<4 x i32> %0, <4 x i32> %1, i32 %2) {
214; CHECK-LABEL: const_mask_abab:
215; CHECK:       @ %bb.0:
216; CHECK-NEXT:    movw r1, #1234
217; CHECK-NEXT:    vmsr p0, r1
218; CHECK-NEXT:    vpstete
219; CHECK-NEXT:    vaddvat.s32 r0, q0
220; CHECK-NEXT:    vaddvae.s32 r0, q1
221; CHECK-NEXT:    vaddvat.s32 r0, q1
222; CHECK-NEXT:    vaddvae.s32 r0, q0
223; CHECK-NEXT:    bx lr
224  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1234)
225  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
226  %6 = add i32 %5, %2
227  %7 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 64301)
228  %8 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %7)
229  %9 = add i32 %6, %8
230  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %4)
231  %11 = add i32 %9, %10
232  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %7)
233  %13 = add i32 %11, %12
234  ret i32 %13
235}
236
237define arm_aapcs_vfpcc i32 @const_mask_abbreakab(<4 x i32> %0, <4 x i32> %1, i32 %2) {
238; CHECK-LABEL: const_mask_abbreakab:
239; CHECK:       @ %bb.0:
240; CHECK-NEXT:    movw r1, #1234
241; CHECK-NEXT:    vmsr p0, r1
242; CHECK-NEXT:    vpste
243; CHECK-NEXT:    vaddvat.s32 r0, q0
244; CHECK-NEXT:    vaddvae.s32 r0, q1
245; CHECK-NEXT:    vadd.i32 q1, q0, r0
246; CHECK-NEXT:    vpnot
247; CHECK-NEXT:    vpste
248; CHECK-NEXT:    vaddvat.s32 r0, q1
249; CHECK-NEXT:    vaddvae.s32 r0, q0
250; CHECK-NEXT:    bx lr
251  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1234)
252  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
253  %6 = add i32 %5, %2
254  %7 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 64301)
255  %8 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %7)
256  %9 = add i32 %6, %8
257  %si = insertelement <4 x i32> undef, i32 %9, i32 0
258  %s = shufflevector <4 x i32> %si, <4 x i32> undef, <4 x i32> zeroinitializer
259  %nadd = add <4 x i32> %0, %s
260  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %nadd, i32 0, <4 x i1> %4)
261  %11 = add i32 %9, %10
262  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %7)
263  %13 = add i32 %11, %12
264  ret i32 %13
265}
266
267define arm_aapcs_vfpcc i32 @const_mask_break(<4 x i32> %0, <4 x i32> %1, i32 %2) {
268; CHECK-LABEL: const_mask_break:
269; CHECK:       @ %bb.0:
270; CHECK-NEXT:    movw r1, #1234
271; CHECK-NEXT:    vmsr p0, r1
272; CHECK-NEXT:    vpstt
273; CHECK-NEXT:    vaddvat.s32 r0, q0
274; CHECK-NEXT:    vaddvat.s32 r0, q1
275; CHECK-NEXT:    vadd.i32 q1, q0, r0
276; CHECK-NEXT:    vpnot
277; CHECK-NEXT:    vpstt
278; CHECK-NEXT:    vaddvat.s32 r0, q1
279; CHECK-NEXT:    vaddvat.s32 r0, q0
280; CHECK-NEXT:    bx lr
281  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1234)
282  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
283  %6 = add i32 %5, %2
284  %7 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 64301)
285  %8 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %4)
286  %9 = add i32 %6, %8
287  %si = insertelement <4 x i32> undef, i32 %9, i32 0
288  %s = shufflevector <4 x i32> %si, <4 x i32> undef, <4 x i32> zeroinitializer
289  %nadd = add <4 x i32> %0, %s
290  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %nadd, i32 0, <4 x i1> %7)
291  %11 = add i32 %9, %10
292  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %7)
293  %13 = add i32 %11, %12
294  ret i32 %13
295}
296
297define arm_aapcs_vfpcc i32 @const_mask_threepred(<4 x i32> %0, <4 x i32> %1, i32 %2) {
298; CHECK-LABEL: const_mask_threepred:
299; CHECK:       @ %bb.0:
300; CHECK-NEXT:    movw r1, #1234
301; CHECK-NEXT:    vmsr p0, r1
302; CHECK-NEXT:    vpstt
303; CHECK-NEXT:    vaddvat.s32 r0, q0
304; CHECK-NEXT:    vaddvat.s32 r0, q1
305; CHECK-NEXT:    movw r1, #64300
306; CHECK-NEXT:    vmsr p0, r1
307; CHECK-NEXT:    vpst
308; CHECK-NEXT:    vaddvat.s32 r0, q1
309; CHECK-NEXT:    movw r1, #64301
310; CHECK-NEXT:    vmsr p0, r1
311; CHECK-NEXT:    vpstt
312; CHECK-NEXT:    vaddvat.s32 r0, q1
313; CHECK-NEXT:    vaddvat.s32 r0, q0
314; CHECK-NEXT:    bx lr
315  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1234)
316  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
317  %6 = add i32 %5, %2
318  %7 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 64301)
319  %8 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %4)
320  %9 = add i32 %6, %8
321  %n7 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 64300)
322  %n8 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %n7)
323  %n9 = add i32 %9, %n8
324  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %7)
325  %11 = add i32 %n9, %10
326  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %7)
327  %13 = add i32 %11, %12
328  ret i32 %13
329}
330
331define arm_aapcs_vfpcc i32 @const_mask_threepredabab(<4 x i32> %0, <4 x i32> %1, i32 %2) {
332; CHECK-LABEL: const_mask_threepredabab:
333; CHECK:       @ %bb.0:
334; CHECK-NEXT:    .pad #4
335; CHECK-NEXT:    sub sp, #4
336; CHECK-NEXT:    movw r1, #1234
337; CHECK-NEXT:    vmsr p0, r1
338; CHECK-NEXT:    vstr p0, [sp] @ 4-byte Spill
339; CHECK-NEXT:    vpst
340; CHECK-NEXT:    vaddvat.s32 r0, q0
341; CHECK-NEXT:    vldr p0, [sp] @ 4-byte Reload
342; CHECK-NEXT:    vpnot
343; CHECK-NEXT:    vpst
344; CHECK-NEXT:    vaddvat.s32 r0, q1
345; CHECK-NEXT:    vpt.s32 gt, q1, q0
346; CHECK-NEXT:    vaddvat.s32 r0, q1
347; CHECK-NEXT:    vldr p0, [sp] @ 4-byte Reload
348; CHECK-NEXT:    vpste
349; CHECK-NEXT:    vaddvat.s32 r0, q1
350; CHECK-NEXT:    vaddvae.s32 r0, q0
351; CHECK-NEXT:    add sp, #4
352; CHECK-NEXT:    bx lr
353  %4 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 1234)
354  %5 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %4)
355  %6 = add i32 %5, %2
356  %7 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 64301)
357  %8 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %7)
358  %9 = add i32 %6, %8
359  %n7 = icmp slt <4 x i32> %0, %1
360  %n8 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %n7)
361  %n9 = add i32 %9, %n8
362  %10 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %1, i32 0, <4 x i1> %4)
363  %11 = add i32 %n9, %10
364  %12 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %0, i32 0, <4 x i1> %7)
365  %13 = add i32 %11, %12
366  ret i32 %13
367}
368
369
370declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
371declare i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1>)
372declare i32 @llvm.arm.mve.pred.v2i.v16i1(<16 x i1>)
373
374declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
375declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
376declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
377
378declare i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
379declare i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>)
380declare i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>)
381