1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
5; CHECK-LABEL: add_v4i32_x:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vctp.32 r0
8; CHECK-NEXT:    vpst
9; CHECK-NEXT:    vaddt.i32 q0, q0, q1
10; CHECK-NEXT:    bx lr
11entry:
12  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
13  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
14  %b = add <4 x i32> %a, %x
15  ret <4 x i32> %b
16}
17
18define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
19; CHECK-LABEL: add_v8i16_x:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vctp.16 r0
22; CHECK-NEXT:    vpst
23; CHECK-NEXT:    vaddt.i16 q0, q0, q1
24; CHECK-NEXT:    bx lr
25entry:
26  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
27  %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
28  %b = add <8 x i16> %a, %x
29  ret <8 x i16> %b
30}
31
32define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
33; CHECK-LABEL: add_v16i8_x:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    vctp.8 r0
36; CHECK-NEXT:    vpst
37; CHECK-NEXT:    vaddt.i8 q0, q0, q1
38; CHECK-NEXT:    bx lr
39entry:
40  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
41  %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
42  %b = add <16 x i8> %a, %x
43  ret <16 x i8> %b
44}
45
46define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
47; CHECK-LABEL: sub_v4i32_x:
48; CHECK:       @ %bb.0: @ %entry
49; CHECK-NEXT:    vctp.32 r0
50; CHECK-NEXT:    vpst
51; CHECK-NEXT:    vsubt.i32 q0, q0, q1
52; CHECK-NEXT:    bx lr
53entry:
54  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
55  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
56  %b = sub <4 x i32> %x, %a
57  ret <4 x i32> %b
58}
59
60define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
61; CHECK-LABEL: sub_v8i16_x:
62; CHECK:       @ %bb.0: @ %entry
63; CHECK-NEXT:    vctp.16 r0
64; CHECK-NEXT:    vpst
65; CHECK-NEXT:    vsubt.i16 q0, q0, q1
66; CHECK-NEXT:    bx lr
67entry:
68  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
69  %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
70  %b = sub <8 x i16> %x, %a
71  ret <8 x i16> %b
72}
73
74define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
75; CHECK-LABEL: sub_v16i8_x:
76; CHECK:       @ %bb.0: @ %entry
77; CHECK-NEXT:    vctp.8 r0
78; CHECK-NEXT:    vpst
79; CHECK-NEXT:    vsubt.i8 q0, q0, q1
80; CHECK-NEXT:    bx lr
81entry:
82  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
83  %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
84  %b = sub <16 x i8> %x, %a
85  ret <16 x i8> %b
86}
87
88define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
89; CHECK-LABEL: mul_v4i32_x:
90; CHECK:       @ %bb.0: @ %entry
91; CHECK-NEXT:    vctp.32 r0
92; CHECK-NEXT:    vpst
93; CHECK-NEXT:    vmult.i32 q0, q0, q1
94; CHECK-NEXT:    bx lr
95entry:
96  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
97  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
98  %b = mul <4 x i32> %a, %x
99  ret <4 x i32> %b
100}
101
102define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
103; CHECK-LABEL: mul_v8i16_x:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vctp.16 r0
106; CHECK-NEXT:    vpst
107; CHECK-NEXT:    vmult.i16 q0, q0, q1
108; CHECK-NEXT:    bx lr
109entry:
110  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
111  %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
112  %b = mul <8 x i16> %a, %x
113  ret <8 x i16> %b
114}
115
116define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
117; CHECK-LABEL: mul_v16i8_x:
118; CHECK:       @ %bb.0: @ %entry
119; CHECK-NEXT:    vctp.8 r0
120; CHECK-NEXT:    vpst
121; CHECK-NEXT:    vmult.i8 q0, q0, q1
122; CHECK-NEXT:    bx lr
123entry:
124  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
125  %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
126  %b = mul <16 x i8> %a, %x
127  ret <16 x i8> %b
128}
129
130define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
131; CHECK-LABEL: and_v4i32_x:
132; CHECK:       @ %bb.0: @ %entry
133; CHECK-NEXT:    vctp.32 r0
134; CHECK-NEXT:    vpst
135; CHECK-NEXT:    vandt q0, q0, q1
136; CHECK-NEXT:    bx lr
137entry:
138  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
139  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
140  %b = and <4 x i32> %a, %x
141  ret <4 x i32> %b
142}
143
144define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
145; CHECK-LABEL: and_v8i16_x:
146; CHECK:       @ %bb.0: @ %entry
147; CHECK-NEXT:    vctp.16 r0
148; CHECK-NEXT:    vpst
149; CHECK-NEXT:    vandt q0, q0, q1
150; CHECK-NEXT:    bx lr
151entry:
152  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
153  %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
154  %b = and <8 x i16> %a, %x
155  ret <8 x i16> %b
156}
157
158define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
159; CHECK-LABEL: and_v16i8_x:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vctp.8 r0
162; CHECK-NEXT:    vpst
163; CHECK-NEXT:    vandt q0, q0, q1
164; CHECK-NEXT:    bx lr
165entry:
166  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
167  %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
168  %b = and <16 x i8> %a, %x
169  ret <16 x i8> %b
170}
171
172define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
173; CHECK-LABEL: or_v4i32_x:
174; CHECK:       @ %bb.0: @ %entry
175; CHECK-NEXT:    vctp.32 r0
176; CHECK-NEXT:    vpst
177; CHECK-NEXT:    vorrt q0, q0, q1
178; CHECK-NEXT:    bx lr
179entry:
180  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
181  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
182  %b = or <4 x i32> %a, %x
183  ret <4 x i32> %b
184}
185
186define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
187; CHECK-LABEL: or_v8i16_x:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vctp.16 r0
190; CHECK-NEXT:    vpst
191; CHECK-NEXT:    vorrt q0, q0, q1
192; CHECK-NEXT:    bx lr
193entry:
194  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
195  %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
196  %b = or <8 x i16> %a, %x
197  ret <8 x i16> %b
198}
199
200define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
201; CHECK-LABEL: or_v16i8_x:
202; CHECK:       @ %bb.0: @ %entry
203; CHECK-NEXT:    vctp.8 r0
204; CHECK-NEXT:    vpst
205; CHECK-NEXT:    vorrt q0, q0, q1
206; CHECK-NEXT:    bx lr
207entry:
208  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
209  %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
210  %b = or <16 x i8> %a, %x
211  ret <16 x i8> %b
212}
213
214define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
215; CHECK-LABEL: xor_v4i32_x:
216; CHECK:       @ %bb.0: @ %entry
217; CHECK-NEXT:    vctp.32 r0
218; CHECK-NEXT:    vpst
219; CHECK-NEXT:    veort q0, q0, q1
220; CHECK-NEXT:    bx lr
221entry:
222  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
223  %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer
224  %b = xor <4 x i32> %a, %x
225  ret <4 x i32> %b
226}
227
228define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
229; CHECK-LABEL: xor_v8i16_x:
230; CHECK:       @ %bb.0: @ %entry
231; CHECK-NEXT:    vctp.16 r0
232; CHECK-NEXT:    vpst
233; CHECK-NEXT:    veort q0, q0, q1
234; CHECK-NEXT:    bx lr
235entry:
236  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
237  %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer
238  %b = xor <8 x i16> %a, %x
239  ret <8 x i16> %b
240}
241
242define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
243; CHECK-LABEL: xor_v16i8_x:
244; CHECK:       @ %bb.0: @ %entry
245; CHECK-NEXT:    vctp.8 r0
246; CHECK-NEXT:    vpst
247; CHECK-NEXT:    veort q0, q0, q1
248; CHECK-NEXT:    bx lr
249entry:
250  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
251  %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer
252  %b = xor <16 x i8> %a, %x
253  ret <16 x i8> %b
254}
255
256define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
257; CHECK-LABEL: andnot_v4i32_x:
258; CHECK:       @ %bb.0: @ %entry
259; CHECK-NEXT:    vmvn q1, q1
260; CHECK-NEXT:    vctp.32 r0
261; CHECK-NEXT:    vpst
262; CHECK-NEXT:    vandt q0, q0, q1
263; CHECK-NEXT:    bx lr
264entry:
265  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
266  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
267  %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
268  %b = and <4 x i32> %a, %x
269  ret <4 x i32> %b
270}
271
272define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
273; CHECK-LABEL: andnot_v8i16_x:
274; CHECK:       @ %bb.0: @ %entry
275; CHECK-NEXT:    vmvn q1, q1
276; CHECK-NEXT:    vctp.16 r0
277; CHECK-NEXT:    vpst
278; CHECK-NEXT:    vandt q0, q0, q1
279; CHECK-NEXT:    bx lr
280entry:
281  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
282  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
283  %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
284  %b = and <8 x i16> %a, %x
285  ret <8 x i16> %b
286}
287
288define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
289; CHECK-LABEL: andnot_v16i8_x:
290; CHECK:       @ %bb.0: @ %entry
291; CHECK-NEXT:    vmvn q1, q1
292; CHECK-NEXT:    vctp.8 r0
293; CHECK-NEXT:    vpst
294; CHECK-NEXT:    vandt q0, q0, q1
295; CHECK-NEXT:    bx lr
296entry:
297  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
298  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
299  %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
300  %b = and <16 x i8> %a, %x
301  ret <16 x i8> %b
302}
303
304define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
305; CHECK-LABEL: ornot_v4i32_x:
306; CHECK:       @ %bb.0: @ %entry
307; CHECK-NEXT:    vmvn q1, q1
308; CHECK-NEXT:    vctp.32 r0
309; CHECK-NEXT:    vpst
310; CHECK-NEXT:    vorrt q0, q0, q1
311; CHECK-NEXT:    bx lr
312entry:
313  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
314  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
315  %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> zeroinitializer
316  %b = or <4 x i32> %a, %x
317  ret <4 x i32> %b
318}
319
320define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
321; CHECK-LABEL: ornot_v8i16_x:
322; CHECK:       @ %bb.0: @ %entry
323; CHECK-NEXT:    vmvn q1, q1
324; CHECK-NEXT:    vctp.16 r0
325; CHECK-NEXT:    vpst
326; CHECK-NEXT:    vorrt q0, q0, q1
327; CHECK-NEXT:    bx lr
328entry:
329  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
330  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
331  %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> zeroinitializer
332  %b = or <8 x i16> %a, %x
333  ret <8 x i16> %b
334}
335
336define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
337; CHECK-LABEL: ornot_v16i8_x:
338; CHECK:       @ %bb.0: @ %entry
339; CHECK-NEXT:    vmvn q1, q1
340; CHECK-NEXT:    vctp.8 r0
341; CHECK-NEXT:    vpst
342; CHECK-NEXT:    vorrt q0, q0, q1
343; CHECK-NEXT:    bx lr
344entry:
345  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
346  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
347  %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> zeroinitializer
348  %b = or <16 x i8> %a, %x
349  ret <16 x i8> %b
350}
351
352define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
353; CHECK-LABEL: fadd_v4f32_x:
354; CHECK:       @ %bb.0: @ %entry
355; CHECK-NEXT:    vctp.32 r0
356; CHECK-NEXT:    vpst
357; CHECK-NEXT:    vaddt.f32 q0, q0, q1
358; CHECK-NEXT:    bx lr
359entry:
360  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
361  %a = fadd <4 x float> %x, %y
362  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
363  ret <4 x float> %b
364}
365
366define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
367; CHECK-LABEL: fadd_v8f16_x:
368; CHECK:       @ %bb.0: @ %entry
369; CHECK-NEXT:    vctp.16 r0
370; CHECK-NEXT:    vpst
371; CHECK-NEXT:    vaddt.f16 q0, q0, q1
372; CHECK-NEXT:    bx lr
373entry:
374  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
375  %a = fadd <8 x half> %x, %y
376  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
377  ret <8 x half> %b
378}
379
380define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
381; CHECK-LABEL: fsub_v4f32_x:
382; CHECK:       @ %bb.0: @ %entry
383; CHECK-NEXT:    vctp.32 r0
384; CHECK-NEXT:    vpst
385; CHECK-NEXT:    vsubt.f32 q0, q0, q1
386; CHECK-NEXT:    bx lr
387entry:
388  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
389  %a = fsub <4 x float> %x, %y
390  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
391  ret <4 x float> %b
392}
393
394define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
395; CHECK-LABEL: fsub_v8f16_x:
396; CHECK:       @ %bb.0: @ %entry
397; CHECK-NEXT:    vctp.16 r0
398; CHECK-NEXT:    vpst
399; CHECK-NEXT:    vsubt.f16 q0, q0, q1
400; CHECK-NEXT:    bx lr
401entry:
402  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
403  %a = fsub <8 x half> %x, %y
404  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
405  ret <8 x half> %b
406}
407
408define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
409; CHECK-LABEL: fmul_v4f32_x:
410; CHECK:       @ %bb.0: @ %entry
411; CHECK-NEXT:    vctp.32 r0
412; CHECK-NEXT:    vpst
413; CHECK-NEXT:    vmult.f32 q0, q0, q1
414; CHECK-NEXT:    bx lr
415entry:
416  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
417  %a = fmul <4 x float> %x, %y
418  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
419  ret <4 x float> %b
420}
421
422define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
423; CHECK-LABEL: fmul_v8f16_x:
424; CHECK:       @ %bb.0: @ %entry
425; CHECK-NEXT:    vctp.16 r0
426; CHECK-NEXT:    vpst
427; CHECK-NEXT:    vmult.f16 q0, q0, q1
428; CHECK-NEXT:    bx lr
429entry:
430  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
431  %a = fmul <8 x half> %x, %y
432  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
433  ret <8 x half> %b
434}
435
436define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
437; CHECK-LABEL: icmp_slt_v4i32_x:
438; CHECK:       @ %bb.0: @ %entry
439; CHECK-NEXT:    vctp.32 r0
440; CHECK-NEXT:    vpst
441; CHECK-NEXT:    vmint.s32 q0, q0, q1
442; CHECK-NEXT:    bx lr
443entry:
444  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
445  %a1 = icmp slt <4 x i32> %x, %y
446  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
447  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
448  ret <4 x i32> %b
449}
450
451define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
452; CHECK-LABEL: icmp_slt_v8i16_x:
453; CHECK:       @ %bb.0: @ %entry
454; CHECK-NEXT:    vctp.16 r0
455; CHECK-NEXT:    vpst
456; CHECK-NEXT:    vmint.s16 q0, q0, q1
457; CHECK-NEXT:    bx lr
458entry:
459  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
460  %a1 = icmp slt <8 x i16> %x, %y
461  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
462  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
463  ret <8 x i16> %b
464}
465
466define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
467; CHECK-LABEL: icmp_slt_v16i8_x:
468; CHECK:       @ %bb.0: @ %entry
469; CHECK-NEXT:    vctp.8 r0
470; CHECK-NEXT:    vpst
471; CHECK-NEXT:    vmint.s8 q0, q0, q1
472; CHECK-NEXT:    bx lr
473entry:
474  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
475  %a1 = icmp slt <16 x i8> %x, %y
476  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
477  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
478  ret <16 x i8> %b
479}
480
481define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
482; CHECK-LABEL: icmp_sgt_v4i32_x:
483; CHECK:       @ %bb.0: @ %entry
484; CHECK-NEXT:    vctp.32 r0
485; CHECK-NEXT:    vpst
486; CHECK-NEXT:    vmaxt.s32 q0, q0, q1
487; CHECK-NEXT:    bx lr
488entry:
489  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
490  %a1 = icmp sgt <4 x i32> %x, %y
491  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
492  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
493  ret <4 x i32> %b
494}
495
496define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
497; CHECK-LABEL: icmp_sgt_v8i16_x:
498; CHECK:       @ %bb.0: @ %entry
499; CHECK-NEXT:    vctp.16 r0
500; CHECK-NEXT:    vpst
501; CHECK-NEXT:    vmaxt.s16 q0, q0, q1
502; CHECK-NEXT:    bx lr
503entry:
504  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
505  %a1 = icmp sgt <8 x i16> %x, %y
506  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
507  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
508  ret <8 x i16> %b
509}
510
511define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
512; CHECK-LABEL: icmp_sgt_v16i8_x:
513; CHECK:       @ %bb.0: @ %entry
514; CHECK-NEXT:    vctp.8 r0
515; CHECK-NEXT:    vpst
516; CHECK-NEXT:    vmaxt.s8 q0, q0, q1
517; CHECK-NEXT:    bx lr
518entry:
519  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
520  %a1 = icmp sgt <16 x i8> %x, %y
521  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
522  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
523  ret <16 x i8> %b
524}
525
526define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
527; CHECK-LABEL: icmp_ult_v4i32_x:
528; CHECK:       @ %bb.0: @ %entry
529; CHECK-NEXT:    vctp.32 r0
530; CHECK-NEXT:    vpst
531; CHECK-NEXT:    vmint.u32 q0, q0, q1
532; CHECK-NEXT:    bx lr
533entry:
534  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
535  %a1 = icmp ult <4 x i32> %x, %y
536  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
537  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
538  ret <4 x i32> %b
539}
540
541define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
542; CHECK-LABEL: icmp_ult_v8i16_x:
543; CHECK:       @ %bb.0: @ %entry
544; CHECK-NEXT:    vctp.16 r0
545; CHECK-NEXT:    vpst
546; CHECK-NEXT:    vmint.u16 q0, q0, q1
547; CHECK-NEXT:    bx lr
548entry:
549  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
550  %a1 = icmp ult <8 x i16> %x, %y
551  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
552  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
553  ret <8 x i16> %b
554}
555
556define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
557; CHECK-LABEL: icmp_ult_v16i8_x:
558; CHECK:       @ %bb.0: @ %entry
559; CHECK-NEXT:    vctp.8 r0
560; CHECK-NEXT:    vpst
561; CHECK-NEXT:    vmint.u8 q0, q0, q1
562; CHECK-NEXT:    bx lr
563entry:
564  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
565  %a1 = icmp ult <16 x i8> %x, %y
566  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
567  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
568  ret <16 x i8> %b
569}
570
571define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
572; CHECK-LABEL: icmp_ugt_v4i32_x:
573; CHECK:       @ %bb.0: @ %entry
574; CHECK-NEXT:    vctp.32 r0
575; CHECK-NEXT:    vpst
576; CHECK-NEXT:    vmaxt.u32 q0, q0, q1
577; CHECK-NEXT:    bx lr
578entry:
579  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
580  %a1 = icmp ugt <4 x i32> %x, %y
581  %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y
582  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
583  ret <4 x i32> %b
584}
585
586define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
587; CHECK-LABEL: icmp_ugt_v8i16_x:
588; CHECK:       @ %bb.0: @ %entry
589; CHECK-NEXT:    vctp.16 r0
590; CHECK-NEXT:    vpst
591; CHECK-NEXT:    vmaxt.u16 q0, q0, q1
592; CHECK-NEXT:    bx lr
593entry:
594  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
595  %a1 = icmp ugt <8 x i16> %x, %y
596  %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y
597  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
598  ret <8 x i16> %b
599}
600
601define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
602; CHECK-LABEL: icmp_ugt_v16i8_x:
603; CHECK:       @ %bb.0: @ %entry
604; CHECK-NEXT:    vctp.8 r0
605; CHECK-NEXT:    vpst
606; CHECK-NEXT:    vmaxt.u8 q0, q0, q1
607; CHECK-NEXT:    bx lr
608entry:
609  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
610  %a1 = icmp ugt <16 x i8> %x, %y
611  %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y
612  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
613  ret <16 x i8> %b
614}
615
616define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
617; CHECK-LABEL: fcmp_fast_olt_v4f32_x:
618; CHECK:       @ %bb.0: @ %entry
619; CHECK-NEXT:    vctp.32 r0
620; CHECK-NEXT:    vpst
621; CHECK-NEXT:    vminnmt.f32 q0, q0, q1
622; CHECK-NEXT:    bx lr
623entry:
624  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
625  %a1 = fcmp fast olt <4 x float> %x, %y
626  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
627  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
628  ret <4 x float> %b
629}
630
631define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
632; CHECK-LABEL: fcmp_fast_olt_v8f16_x:
633; CHECK:       @ %bb.0: @ %entry
634; CHECK-NEXT:    vctp.16 r0
635; CHECK-NEXT:    vpst
636; CHECK-NEXT:    vminnmt.f16 q0, q0, q1
637; CHECK-NEXT:    bx lr
638entry:
639  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
640  %a1 = fcmp fast olt <8 x half> %x, %y
641  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
642  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
643  ret <8 x half> %b
644}
645
646define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) {
647; CHECK-LABEL: fcmp_fast_ogt_v4f32_x:
648; CHECK:       @ %bb.0: @ %entry
649; CHECK-NEXT:    vctp.32 r0
650; CHECK-NEXT:    vpst
651; CHECK-NEXT:    vmaxnmt.f32 q0, q0, q1
652; CHECK-NEXT:    bx lr
653entry:
654  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
655  %a1 = fcmp fast ogt <4 x float> %x, %y
656  %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y
657  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
658  ret <4 x float> %b
659}
660
661define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) {
662; CHECK-LABEL: fcmp_fast_ogt_v8f16_x:
663; CHECK:       @ %bb.0: @ %entry
664; CHECK-NEXT:    vctp.16 r0
665; CHECK-NEXT:    vpst
666; CHECK-NEXT:    vmaxnmt.f16 q0, q0, q1
667; CHECK-NEXT:    bx lr
668entry:
669  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
670  %a1 = fcmp fast ogt <8 x half> %x, %y
671  %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y
672  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
673  ret <8 x half> %b
674}
675
676define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
677; CHECK-LABEL: sadd_sat_v4i32_x:
678; CHECK:       @ %bb.0: @ %entry
679; CHECK-NEXT:    vctp.32 r0
680; CHECK-NEXT:    vpst
681; CHECK-NEXT:    vqaddt.s32 q0, q0, q1
682; CHECK-NEXT:    bx lr
683entry:
684  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
685  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
686  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
687  ret <4 x i32> %b
688}
689
690define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
691; CHECK-LABEL: sadd_sat_v8i16_x:
692; CHECK:       @ %bb.0: @ %entry
693; CHECK-NEXT:    vctp.16 r0
694; CHECK-NEXT:    vpst
695; CHECK-NEXT:    vqaddt.s16 q0, q0, q1
696; CHECK-NEXT:    bx lr
697entry:
698  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
699  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
700  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
701  ret <8 x i16> %b
702}
703
704define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
705; CHECK-LABEL: sadd_sat_v16i8_x:
706; CHECK:       @ %bb.0: @ %entry
707; CHECK-NEXT:    vctp.8 r0
708; CHECK-NEXT:    vpst
709; CHECK-NEXT:    vqaddt.s8 q0, q0, q1
710; CHECK-NEXT:    bx lr
711entry:
712  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
713  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
714  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
715  ret <16 x i8> %b
716}
717
718define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
719; CHECK-LABEL: uadd_sat_v4i32_x:
720; CHECK:       @ %bb.0: @ %entry
721; CHECK-NEXT:    vctp.32 r0
722; CHECK-NEXT:    vpst
723; CHECK-NEXT:    vqaddt.u32 q0, q0, q1
724; CHECK-NEXT:    bx lr
725entry:
726  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
727  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
728  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
729  ret <4 x i32> %b
730}
731
732define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
733; CHECK-LABEL: uadd_sat_v8i16_x:
734; CHECK:       @ %bb.0: @ %entry
735; CHECK-NEXT:    vctp.16 r0
736; CHECK-NEXT:    vpst
737; CHECK-NEXT:    vqaddt.u16 q0, q0, q1
738; CHECK-NEXT:    bx lr
739entry:
740  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
741  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
742  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
743  ret <8 x i16> %b
744}
745
746define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
747; CHECK-LABEL: uadd_sat_v16i8_x:
748; CHECK:       @ %bb.0: @ %entry
749; CHECK-NEXT:    vctp.8 r0
750; CHECK-NEXT:    vpst
751; CHECK-NEXT:    vqaddt.u8 q0, q0, q1
752; CHECK-NEXT:    bx lr
753entry:
754  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
755  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
756  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
757  ret <16 x i8> %b
758}
759
760define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
761; CHECK-LABEL: ssub_sat_v4i32_x:
762; CHECK:       @ %bb.0: @ %entry
763; CHECK-NEXT:    vctp.32 r0
764; CHECK-NEXT:    vpst
765; CHECK-NEXT:    vqsubt.s32 q0, q0, q1
766; CHECK-NEXT:    bx lr
767entry:
768  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
769  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
770  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
771  ret <4 x i32> %b
772}
773
774define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
775; CHECK-LABEL: ssub_sat_v8i16_x:
776; CHECK:       @ %bb.0: @ %entry
777; CHECK-NEXT:    vctp.16 r0
778; CHECK-NEXT:    vpst
779; CHECK-NEXT:    vqsubt.s16 q0, q0, q1
780; CHECK-NEXT:    bx lr
781entry:
782  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
783  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
784  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
785  ret <8 x i16> %b
786}
787
788define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
789; CHECK-LABEL: ssub_sat_v16i8_x:
790; CHECK:       @ %bb.0: @ %entry
791; CHECK-NEXT:    vctp.8 r0
792; CHECK-NEXT:    vpst
793; CHECK-NEXT:    vqsubt.s8 q0, q0, q1
794; CHECK-NEXT:    bx lr
795entry:
796  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
797  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
798  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
799  ret <16 x i8> %b
800}
801
802define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) {
803; CHECK-LABEL: usub_sat_v4i32_x:
804; CHECK:       @ %bb.0: @ %entry
805; CHECK-NEXT:    vctp.32 r0
806; CHECK-NEXT:    vpst
807; CHECK-NEXT:    vqsubt.u32 q0, q0, q1
808; CHECK-NEXT:    bx lr
809entry:
810  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
811  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
812  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
813  ret <4 x i32> %b
814}
815
816define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) {
817; CHECK-LABEL: usub_sat_v8i16_x:
818; CHECK:       @ %bb.0: @ %entry
819; CHECK-NEXT:    vctp.16 r0
820; CHECK-NEXT:    vpst
821; CHECK-NEXT:    vqsubt.u16 q0, q0, q1
822; CHECK-NEXT:    bx lr
823entry:
824  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
825  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
826  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
827  ret <8 x i16> %b
828}
829
830define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) {
831; CHECK-LABEL: usub_sat_v16i8_x:
832; CHECK:       @ %bb.0: @ %entry
833; CHECK-NEXT:    vctp.8 r0
834; CHECK-NEXT:    vpst
835; CHECK-NEXT:    vqsubt.u8 q0, q0, q1
836; CHECK-NEXT:    bx lr
837entry:
838  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
839  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
840  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
841  ret <16 x i8> %b
842}
843
844define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
845; CHECK-LABEL: addqr_v4i32_x:
846; CHECK:       @ %bb.0: @ %entry
847; CHECK-NEXT:    vctp.32 r1
848; CHECK-NEXT:    vpst
849; CHECK-NEXT:    vaddt.i32 q0, q0, r0
850; CHECK-NEXT:    bx lr
851entry:
852  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
853  %i = insertelement <4 x i32> undef, i32 %y, i32 0
854  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
855  %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer
856  %b = add <4 x i32> %a, %x
857  ret <4 x i32> %b
858}
859
860define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
861; CHECK-LABEL: addqr_v8i16_x:
862; CHECK:       @ %bb.0: @ %entry
863; CHECK-NEXT:    vctp.16 r1
864; CHECK-NEXT:    vpst
865; CHECK-NEXT:    vaddt.i16 q0, q0, r0
866; CHECK-NEXT:    bx lr
867entry:
868  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
869  %i = insertelement <8 x i16> undef, i16 %y, i32 0
870  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
871  %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer
872  %b = add <8 x i16> %a, %x
873  ret <8 x i16> %b
874}
875
876define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
877; CHECK-LABEL: addqr_v16i8_x:
878; CHECK:       @ %bb.0: @ %entry
879; CHECK-NEXT:    vctp.8 r1
880; CHECK-NEXT:    vpst
881; CHECK-NEXT:    vaddt.i8 q0, q0, r0
882; CHECK-NEXT:    bx lr
883entry:
884  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
885  %i = insertelement <16 x i8> undef, i8 %y, i32 0
886  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
887  %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer
888  %b = add <16 x i8> %a, %x
889  ret <16 x i8> %b
890}
891
892define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
893; CHECK-LABEL: subqr_v4i32_x:
894; CHECK:       @ %bb.0: @ %entry
895; CHECK-NEXT:    vctp.32 r1
896; CHECK-NEXT:    vpst
897; CHECK-NEXT:    vsubt.i32 q0, q0, r0
898; CHECK-NEXT:    bx lr
899entry:
900  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
901  %i = insertelement <4 x i32> undef, i32 %y, i32 0
902  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
903  %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer
904  %b = sub <4 x i32> %x, %a
905  ret <4 x i32> %b
906}
907
908define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
909; CHECK-LABEL: subqr_v8i16_x:
910; CHECK:       @ %bb.0: @ %entry
911; CHECK-NEXT:    vctp.16 r1
912; CHECK-NEXT:    vpst
913; CHECK-NEXT:    vsubt.i16 q0, q0, r0
914; CHECK-NEXT:    bx lr
915entry:
916  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
917  %i = insertelement <8 x i16> undef, i16 %y, i32 0
918  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
919  %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer
920  %b = sub <8 x i16> %x, %a
921  ret <8 x i16> %b
922}
923
924define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
925; CHECK-LABEL: subqr_v16i8_x:
926; CHECK:       @ %bb.0: @ %entry
927; CHECK-NEXT:    vctp.8 r1
928; CHECK-NEXT:    vpst
929; CHECK-NEXT:    vsubt.i8 q0, q0, r0
930; CHECK-NEXT:    bx lr
931entry:
932  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
933  %i = insertelement <16 x i8> undef, i8 %y, i32 0
934  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
935  %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer
936  %b = sub <16 x i8> %x, %a
937  ret <16 x i8> %b
938}
939
940define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
941; CHECK-LABEL: mulqr_v4i32_x:
942; CHECK:       @ %bb.0: @ %entry
943; CHECK-NEXT:    vctp.32 r1
944; CHECK-NEXT:    vpst
945; CHECK-NEXT:    vmult.i32 q0, q0, r0
946; CHECK-NEXT:    bx lr
947entry:
948  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
949  %i = insertelement <4 x i32> undef, i32 %y, i32 0
950  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
951  %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
952  %b = mul <4 x i32> %a, %x
953  ret <4 x i32> %b
954}
955
956define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
957; CHECK-LABEL: mulqr_v8i16_x:
958; CHECK:       @ %bb.0: @ %entry
959; CHECK-NEXT:    vctp.16 r1
960; CHECK-NEXT:    vpst
961; CHECK-NEXT:    vmult.i16 q0, q0, r0
962; CHECK-NEXT:    bx lr
963entry:
964  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
965  %i = insertelement <8 x i16> undef, i16 %y, i32 0
966  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
967  %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
968  %b = mul <8 x i16> %a, %x
969  ret <8 x i16> %b
970}
971
972define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
973; CHECK-LABEL: mulqr_v16i8_x:
974; CHECK:       @ %bb.0: @ %entry
975; CHECK-NEXT:    vctp.8 r1
976; CHECK-NEXT:    vpst
977; CHECK-NEXT:    vmult.i8 q0, q0, r0
978; CHECK-NEXT:    bx lr
979entry:
980  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
981  %i = insertelement <16 x i8> undef, i8 %y, i32 0
982  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
983  %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
984  %b = mul <16 x i8> %a, %x
985  ret <16 x i8> %b
986}
987
988define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
989; CHECK-LABEL: faddqr_v4f32_x:
990; CHECK:       @ %bb.0: @ %entry
991; CHECK-NEXT:    vmov r1, s4
992; CHECK-NEXT:    vctp.32 r0
993; CHECK-NEXT:    vpst
994; CHECK-NEXT:    vaddt.f32 q0, q0, r1
995; CHECK-NEXT:    bx lr
996entry:
997  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
998  %i = insertelement <4 x float> undef, float %y, i32 0
999  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1000  %a = fadd <4 x float> %ys, %x
1001  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1002  ret <4 x float> %b
1003}
1004
1005define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1006; CHECK-LABEL: faddqr_v8f16_x:
1007; CHECK:       @ %bb.0: @ %entry
1008; CHECK-NEXT:    vmov.f16 r1, s4
1009; CHECK-NEXT:    vctp.16 r0
1010; CHECK-NEXT:    vpst
1011; CHECK-NEXT:    vaddt.f16 q0, q0, r1
1012; CHECK-NEXT:    bx lr
1013entry:
1014  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1015  %i = insertelement <8 x half> undef, half %y, i32 0
1016  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1017  %a = fadd <8 x half> %ys, %x
1018  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1019  ret <8 x half> %b
1020}
1021
1022define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1023; CHECK-LABEL: fsubqr_v4f32_x:
1024; CHECK:       @ %bb.0: @ %entry
1025; CHECK-NEXT:    vmov r1, s4
1026; CHECK-NEXT:    vctp.32 r0
1027; CHECK-NEXT:    vpst
1028; CHECK-NEXT:    vsubt.f32 q0, q0, r1
1029; CHECK-NEXT:    bx lr
1030entry:
1031  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1032  %i = insertelement <4 x float> undef, float %y, i32 0
1033  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1034  %a = fsub <4 x float> %x, %ys
1035  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1036  ret <4 x float> %b
1037}
1038
1039define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1040; CHECK-LABEL: fsubqr_v8f16_x:
1041; CHECK:       @ %bb.0: @ %entry
1042; CHECK-NEXT:    vmov.f16 r1, s4
1043; CHECK-NEXT:    vctp.16 r0
1044; CHECK-NEXT:    vpst
1045; CHECK-NEXT:    vsubt.f16 q0, q0, r1
1046; CHECK-NEXT:    bx lr
1047entry:
1048  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1049  %i = insertelement <8 x half> undef, half %y, i32 0
1050  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1051  %a = fsub <8 x half> %x, %ys
1052  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1053  ret <8 x half> %b
1054}
1055
1056define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) {
1057; CHECK-LABEL: fmulqr_v4f32_x:
1058; CHECK:       @ %bb.0: @ %entry
1059; CHECK-NEXT:    vmov r1, s4
1060; CHECK-NEXT:    vctp.32 r0
1061; CHECK-NEXT:    vpst
1062; CHECK-NEXT:    vmult.f32 q0, q0, r1
1063; CHECK-NEXT:    bx lr
1064entry:
1065  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1066  %i = insertelement <4 x float> undef, float %y, i32 0
1067  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1068  %a = fmul <4 x float> %ys, %x
1069  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x
1070  ret <4 x float> %b
1071}
1072
1073define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) {
1074; CHECK-LABEL: fmulqr_v8f16_x:
1075; CHECK:       @ %bb.0: @ %entry
1076; CHECK-NEXT:    vmov.f16 r1, s4
1077; CHECK-NEXT:    vctp.16 r0
1078; CHECK-NEXT:    vpst
1079; CHECK-NEXT:    vmult.f16 q0, q0, r1
1080; CHECK-NEXT:    bx lr
1081entry:
1082  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1083  %i = insertelement <8 x half> undef, half %y, i32 0
1084  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
1085  %a = fmul <8 x half> %ys, %x
1086  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x
1087  ret <8 x half> %b
1088}
1089
1090define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1091; CHECK-LABEL: sadd_satqr_v4i32_x:
1092; CHECK:       @ %bb.0: @ %entry
1093; CHECK-NEXT:    vctp.32 r1
1094; CHECK-NEXT:    vpst
1095; CHECK-NEXT:    vqaddt.s32 q0, q0, r0
1096; CHECK-NEXT:    bx lr
1097entry:
1098  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1099  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1100  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1101  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1102  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1103  ret <4 x i32> %b
1104}
1105
1106define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1107; CHECK-LABEL: sadd_satqr_v8i16_x:
1108; CHECK:       @ %bb.0: @ %entry
1109; CHECK-NEXT:    vctp.16 r1
1110; CHECK-NEXT:    vpst
1111; CHECK-NEXT:    vqaddt.s16 q0, q0, r0
1112; CHECK-NEXT:    bx lr
1113entry:
1114  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1115  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1116  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1117  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1118  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1119  ret <8 x i16> %b
1120}
1121
1122define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1123; CHECK-LABEL: sadd_satqr_v16i8_x:
1124; CHECK:       @ %bb.0: @ %entry
1125; CHECK-NEXT:    vctp.8 r1
1126; CHECK-NEXT:    vpst
1127; CHECK-NEXT:    vqaddt.s8 q0, q0, r0
1128; CHECK-NEXT:    bx lr
1129entry:
1130  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1131  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1132  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1133  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1134  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1135  ret <16 x i8> %b
1136}
1137
1138define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1139; CHECK-LABEL: uadd_satqr_v4i32_x:
1140; CHECK:       @ %bb.0: @ %entry
1141; CHECK-NEXT:    vctp.32 r1
1142; CHECK-NEXT:    vpst
1143; CHECK-NEXT:    vqaddt.u32 q0, q0, r0
1144; CHECK-NEXT:    bx lr
1145entry:
1146  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1147  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1148  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1149  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1150  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1151  ret <4 x i32> %b
1152}
1153
1154define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1155; CHECK-LABEL: uadd_satqr_v8i16_x:
1156; CHECK:       @ %bb.0: @ %entry
1157; CHECK-NEXT:    vctp.16 r1
1158; CHECK-NEXT:    vpst
1159; CHECK-NEXT:    vqaddt.u16 q0, q0, r0
1160; CHECK-NEXT:    bx lr
1161entry:
1162  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1163  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1164  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1165  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1166  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1167  ret <8 x i16> %b
1168}
1169
1170define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1171; CHECK-LABEL: uadd_satqr_v16i8_x:
1172; CHECK:       @ %bb.0: @ %entry
1173; CHECK-NEXT:    vctp.8 r1
1174; CHECK-NEXT:    vpst
1175; CHECK-NEXT:    vqaddt.u8 q0, q0, r0
1176; CHECK-NEXT:    bx lr
1177entry:
1178  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1179  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1180  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1181  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1182  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1183  ret <16 x i8> %b
1184}
1185
1186define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1187; CHECK-LABEL: ssub_satqr_v4i32_x:
1188; CHECK:       @ %bb.0: @ %entry
1189; CHECK-NEXT:    vctp.32 r1
1190; CHECK-NEXT:    vpst
1191; CHECK-NEXT:    vqsubt.s32 q0, q0, r0
1192; CHECK-NEXT:    bx lr
1193entry:
1194  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1195  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1196  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1197  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1198  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1199  ret <4 x i32> %b
1200}
1201
1202define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1203; CHECK-LABEL: ssub_satqr_v8i16_x:
1204; CHECK:       @ %bb.0: @ %entry
1205; CHECK-NEXT:    vctp.16 r1
1206; CHECK-NEXT:    vpst
1207; CHECK-NEXT:    vqsubt.s16 q0, q0, r0
1208; CHECK-NEXT:    bx lr
1209entry:
1210  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1211  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1212  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1213  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1214  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1215  ret <8 x i16> %b
1216}
1217
1218define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1219; CHECK-LABEL: ssub_satqr_v16i8_x:
1220; CHECK:       @ %bb.0: @ %entry
1221; CHECK-NEXT:    vctp.8 r1
1222; CHECK-NEXT:    vpst
1223; CHECK-NEXT:    vqsubt.s8 q0, q0, r0
1224; CHECK-NEXT:    bx lr
1225entry:
1226  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1227  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1228  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1229  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1230  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1231  ret <16 x i8> %b
1232}
1233
1234define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) {
1235; CHECK-LABEL: usub_satqr_v4i32_x:
1236; CHECK:       @ %bb.0: @ %entry
1237; CHECK-NEXT:    vctp.32 r1
1238; CHECK-NEXT:    vpst
1239; CHECK-NEXT:    vqsubt.u32 q0, q0, r0
1240; CHECK-NEXT:    bx lr
1241entry:
1242  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1243  %i = insertelement <4 x i32> undef, i32 %y, i32 0
1244  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
1245  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
1246  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x
1247  ret <4 x i32> %b
1248}
1249
1250define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) {
1251; CHECK-LABEL: usub_satqr_v8i16_x:
1252; CHECK:       @ %bb.0: @ %entry
1253; CHECK-NEXT:    vctp.16 r1
1254; CHECK-NEXT:    vpst
1255; CHECK-NEXT:    vqsubt.u16 q0, q0, r0
1256; CHECK-NEXT:    bx lr
1257entry:
1258  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1259  %i = insertelement <8 x i16> undef, i16 %y, i32 0
1260  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
1261  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
1262  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x
1263  ret <8 x i16> %b
1264}
1265
1266define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) {
1267; CHECK-LABEL: usub_satqr_v16i8_x:
1268; CHECK:       @ %bb.0: @ %entry
1269; CHECK-NEXT:    vctp.8 r1
1270; CHECK-NEXT:    vpst
1271; CHECK-NEXT:    vqsubt.u8 q0, q0, r0
1272; CHECK-NEXT:    bx lr
1273entry:
1274  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1275  %i = insertelement <16 x i8> undef, i8 %y, i32 0
1276  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
1277  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
1278  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x
1279  ret <16 x i8> %b
1280}
1281
1282define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1283; CHECK-LABEL: add_v4i32_y:
1284; CHECK:       @ %bb.0: @ %entry
1285; CHECK-NEXT:    vctp.32 r0
1286; CHECK-NEXT:    vpst
1287; CHECK-NEXT:    vaddt.i32 q1, q1, q0
1288; CHECK-NEXT:    vmov q0, q1
1289; CHECK-NEXT:    bx lr
1290entry:
1291  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1292  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1293  %b = add <4 x i32> %a, %y
1294  ret <4 x i32> %b
1295}
1296
1297define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1298; CHECK-LABEL: add_v8i16_y:
1299; CHECK:       @ %bb.0: @ %entry
1300; CHECK-NEXT:    vctp.16 r0
1301; CHECK-NEXT:    vpst
1302; CHECK-NEXT:    vaddt.i16 q1, q1, q0
1303; CHECK-NEXT:    vmov q0, q1
1304; CHECK-NEXT:    bx lr
1305entry:
1306  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1307  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1308  %b = add <8 x i16> %a, %y
1309  ret <8 x i16> %b
1310}
1311
1312define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1313; CHECK-LABEL: add_v16i8_y:
1314; CHECK:       @ %bb.0: @ %entry
1315; CHECK-NEXT:    vctp.8 r0
1316; CHECK-NEXT:    vpst
1317; CHECK-NEXT:    vaddt.i8 q1, q1, q0
1318; CHECK-NEXT:    vmov q0, q1
1319; CHECK-NEXT:    bx lr
1320entry:
1321  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1322  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
1323  %b = add <16 x i8> %a, %y
1324  ret <16 x i8> %b
1325}
1326
1327define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1328; CHECK-LABEL: sub_v4i32_y:
1329; CHECK:       @ %bb.0: @ %entry
1330; CHECK-NEXT:    vctp.32 r0
1331; CHECK-NEXT:    vpst
1332; CHECK-NEXT:    vsubt.i32 q1, q0, q1
1333; CHECK-NEXT:    vmov q0, q1
1334; CHECK-NEXT:    bx lr
1335entry:
1336  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1337  %a = sub <4 x i32> %x, %y
1338  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1339  ret <4 x i32> %b
1340}
1341
1342define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1343; CHECK-LABEL: sub_v8i16_y:
1344; CHECK:       @ %bb.0: @ %entry
1345; CHECK-NEXT:    vctp.16 r0
1346; CHECK-NEXT:    vpst
1347; CHECK-NEXT:    vsubt.i16 q1, q0, q1
1348; CHECK-NEXT:    vmov q0, q1
1349; CHECK-NEXT:    bx lr
1350entry:
1351  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1352  %a = sub <8 x i16> %x, %y
1353  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1354  ret <8 x i16> %b
1355}
1356
1357define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1358; CHECK-LABEL: sub_v16i8_y:
1359; CHECK:       @ %bb.0: @ %entry
1360; CHECK-NEXT:    vctp.8 r0
1361; CHECK-NEXT:    vpst
1362; CHECK-NEXT:    vsubt.i8 q1, q0, q1
1363; CHECK-NEXT:    vmov q0, q1
1364; CHECK-NEXT:    bx lr
1365entry:
1366  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1367  %a = sub <16 x i8> %x, %y
1368  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1369  ret <16 x i8> %b
1370}
1371
1372define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1373; CHECK-LABEL: mul_v4i32_y:
1374; CHECK:       @ %bb.0: @ %entry
1375; CHECK-NEXT:    vctp.32 r0
1376; CHECK-NEXT:    vpst
1377; CHECK-NEXT:    vmult.i32 q1, q1, q0
1378; CHECK-NEXT:    vmov q0, q1
1379; CHECK-NEXT:    bx lr
1380entry:
1381  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1382  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1383  %b = mul <4 x i32> %a, %y
1384  ret <4 x i32> %b
1385}
1386
1387define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1388; CHECK-LABEL: mul_v8i16_y:
1389; CHECK:       @ %bb.0: @ %entry
1390; CHECK-NEXT:    vctp.16 r0
1391; CHECK-NEXT:    vpst
1392; CHECK-NEXT:    vmult.i16 q1, q1, q0
1393; CHECK-NEXT:    vmov q0, q1
1394; CHECK-NEXT:    bx lr
1395entry:
1396  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1397  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
1398  %b = mul <8 x i16> %a, %y
1399  ret <8 x i16> %b
1400}
1401
1402define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1403; CHECK-LABEL: mul_v16i8_y:
1404; CHECK:       @ %bb.0: @ %entry
1405; CHECK-NEXT:    vctp.8 r0
1406; CHECK-NEXT:    vpst
1407; CHECK-NEXT:    vmult.i8 q1, q1, q0
1408; CHECK-NEXT:    vmov q0, q1
1409; CHECK-NEXT:    bx lr
1410entry:
1411  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1412  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
1413  %b = mul <16 x i8> %a, %y
1414  ret <16 x i8> %b
1415}
1416
1417define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1418; CHECK-LABEL: and_v4i32_y:
1419; CHECK:       @ %bb.0: @ %entry
1420; CHECK-NEXT:    vctp.32 r0
1421; CHECK-NEXT:    vpst
1422; CHECK-NEXT:    vandt q1, q1, q0
1423; CHECK-NEXT:    vmov q0, q1
1424; CHECK-NEXT:    bx lr
1425entry:
1426  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1427  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1428  %b = and <4 x i32> %a, %y
1429  ret <4 x i32> %b
1430}
1431
1432define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1433; CHECK-LABEL: and_v8i16_y:
1434; CHECK:       @ %bb.0: @ %entry
1435; CHECK-NEXT:    vctp.16 r0
1436; CHECK-NEXT:    vpst
1437; CHECK-NEXT:    vandt q1, q1, q0
1438; CHECK-NEXT:    vmov q0, q1
1439; CHECK-NEXT:    bx lr
1440entry:
1441  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1442  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1443  %b = and <8 x i16> %a, %y
1444  ret <8 x i16> %b
1445}
1446
1447define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1448; CHECK-LABEL: and_v16i8_y:
1449; CHECK:       @ %bb.0: @ %entry
1450; CHECK-NEXT:    vctp.8 r0
1451; CHECK-NEXT:    vpst
1452; CHECK-NEXT:    vandt q1, q1, q0
1453; CHECK-NEXT:    vmov q0, q1
1454; CHECK-NEXT:    bx lr
1455entry:
1456  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1457  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1458  %b = and <16 x i8> %a, %y
1459  ret <16 x i8> %b
1460}
1461
1462define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1463; CHECK-LABEL: or_v4i32_y:
1464; CHECK:       @ %bb.0: @ %entry
1465; CHECK-NEXT:    vctp.32 r0
1466; CHECK-NEXT:    vpst
1467; CHECK-NEXT:    vorrt q1, q1, q0
1468; CHECK-NEXT:    vmov q0, q1
1469; CHECK-NEXT:    bx lr
1470entry:
1471  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1472  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1473  %b = or <4 x i32> %a, %y
1474  ret <4 x i32> %b
1475}
1476
1477define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1478; CHECK-LABEL: or_v8i16_y:
1479; CHECK:       @ %bb.0: @ %entry
1480; CHECK-NEXT:    vctp.16 r0
1481; CHECK-NEXT:    vpst
1482; CHECK-NEXT:    vorrt q1, q1, q0
1483; CHECK-NEXT:    vmov q0, q1
1484; CHECK-NEXT:    bx lr
1485entry:
1486  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1487  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1488  %b = or <8 x i16> %a, %y
1489  ret <8 x i16> %b
1490}
1491
1492define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1493; CHECK-LABEL: or_v16i8_y:
1494; CHECK:       @ %bb.0: @ %entry
1495; CHECK-NEXT:    vctp.8 r0
1496; CHECK-NEXT:    vpst
1497; CHECK-NEXT:    vorrt q1, q1, q0
1498; CHECK-NEXT:    vmov q0, q1
1499; CHECK-NEXT:    bx lr
1500entry:
1501  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1502  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
1503  %b = or <16 x i8> %a, %y
1504  ret <16 x i8> %b
1505}
1506
1507define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1508; CHECK-LABEL: xor_v4i32_y:
1509; CHECK:       @ %bb.0: @ %entry
1510; CHECK-NEXT:    vctp.32 r0
1511; CHECK-NEXT:    vpst
1512; CHECK-NEXT:    veort q1, q1, q0
1513; CHECK-NEXT:    vmov q0, q1
1514; CHECK-NEXT:    bx lr
1515entry:
1516  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1517  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
1518  %b = xor <4 x i32> %a, %y
1519  ret <4 x i32> %b
1520}
1521
1522define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1523; CHECK-LABEL: xor_v8i16_y:
1524; CHECK:       @ %bb.0: @ %entry
1525; CHECK-NEXT:    vctp.16 r0
1526; CHECK-NEXT:    vpst
1527; CHECK-NEXT:    veort q1, q1, q0
1528; CHECK-NEXT:    vmov q0, q1
1529; CHECK-NEXT:    bx lr
1530entry:
1531  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1532  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
1533  %b = xor <8 x i16> %a, %y
1534  ret <8 x i16> %b
1535}
1536
1537define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1538; CHECK-LABEL: xor_v16i8_y:
1539; CHECK:       @ %bb.0: @ %entry
1540; CHECK-NEXT:    vctp.8 r0
1541; CHECK-NEXT:    vpst
1542; CHECK-NEXT:    veort q1, q1, q0
1543; CHECK-NEXT:    vmov q0, q1
1544; CHECK-NEXT:    bx lr
1545entry:
1546  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1547  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
1548  %b = xor <16 x i8> %a, %y
1549  ret <16 x i8> %b
1550}
1551
1552define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1553; CHECK-LABEL: andnot_v4i32_y:
1554; CHECK:       @ %bb.0: @ %entry
1555; CHECK-NEXT:    vctp.32 r0
1556; CHECK-NEXT:    vpst
1557; CHECK-NEXT:    vbict q1, q0, q1
1558; CHECK-NEXT:    vmov q0, q1
1559; CHECK-NEXT:    bx lr
1560entry:
1561  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1562  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1563  %a = and <4 x i32> %y1, %x
1564  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1565  ret <4 x i32> %b
1566}
1567
1568define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1569; CHECK-LABEL: andnot_v8i16_y:
1570; CHECK:       @ %bb.0: @ %entry
1571; CHECK-NEXT:    vctp.16 r0
1572; CHECK-NEXT:    vpst
1573; CHECK-NEXT:    vbict q1, q0, q1
1574; CHECK-NEXT:    vmov q0, q1
1575; CHECK-NEXT:    bx lr
1576entry:
1577  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1578  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1579  %a = and <8 x i16> %y1, %x
1580  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1581  ret <8 x i16> %b
1582}
1583
1584define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1585; CHECK-LABEL: andnot_v16i8_y:
1586; CHECK:       @ %bb.0: @ %entry
1587; CHECK-NEXT:    vctp.8 r0
1588; CHECK-NEXT:    vpst
1589; CHECK-NEXT:    vbict q1, q0, q1
1590; CHECK-NEXT:    vmov q0, q1
1591; CHECK-NEXT:    bx lr
1592entry:
1593  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1594  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1595  %a = and <16 x i8> %y1, %x
1596  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1597  ret <16 x i8> %b
1598}
1599
1600define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1601; CHECK-LABEL: ornot_v4i32_y:
1602; CHECK:       @ %bb.0: @ %entry
1603; CHECK-NEXT:    vctp.32 r0
1604; CHECK-NEXT:    vpst
1605; CHECK-NEXT:    vornt q1, q0, q1
1606; CHECK-NEXT:    vmov q0, q1
1607; CHECK-NEXT:    bx lr
1608entry:
1609  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1610  %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
1611  %a = or <4 x i32> %y1, %x
1612  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
1613  ret <4 x i32> %b
1614}
1615
1616define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1617; CHECK-LABEL: ornot_v8i16_y:
1618; CHECK:       @ %bb.0: @ %entry
1619; CHECK-NEXT:    vctp.16 r0
1620; CHECK-NEXT:    vpst
1621; CHECK-NEXT:    vornt q1, q0, q1
1622; CHECK-NEXT:    vmov q0, q1
1623; CHECK-NEXT:    bx lr
1624entry:
1625  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1626  %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
1627  %a = or <8 x i16> %y1, %x
1628  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
1629  ret <8 x i16> %b
1630}
1631
1632define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1633; CHECK-LABEL: ornot_v16i8_y:
1634; CHECK:       @ %bb.0: @ %entry
1635; CHECK-NEXT:    vctp.8 r0
1636; CHECK-NEXT:    vpst
1637; CHECK-NEXT:    vornt q1, q0, q1
1638; CHECK-NEXT:    vmov q0, q1
1639; CHECK-NEXT:    bx lr
1640entry:
1641  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1642  %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
1643  %a = or <16 x i8> %y1, %x
1644  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
1645  ret <16 x i8> %b
1646}
1647
1648define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1649; CHECK-LABEL: fadd_v4f32_y:
1650; CHECK:       @ %bb.0: @ %entry
1651; CHECK-NEXT:    vctp.32 r0
1652; CHECK-NEXT:    vpst
1653; CHECK-NEXT:    vaddt.f32 q1, q0, q1
1654; CHECK-NEXT:    vmov q0, q1
1655; CHECK-NEXT:    bx lr
1656entry:
1657  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1658  %a = fadd <4 x float> %x, %y
1659  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1660  ret <4 x float> %b
1661}
1662
1663define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1664; CHECK-LABEL: fadd_v8f16_y:
1665; CHECK:       @ %bb.0: @ %entry
1666; CHECK-NEXT:    vctp.16 r0
1667; CHECK-NEXT:    vpst
1668; CHECK-NEXT:    vaddt.f16 q1, q0, q1
1669; CHECK-NEXT:    vmov q0, q1
1670; CHECK-NEXT:    bx lr
1671entry:
1672  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1673  %a = fadd <8 x half> %x, %y
1674  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1675  ret <8 x half> %b
1676}
1677
1678define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1679; CHECK-LABEL: fsub_v4f32_y:
1680; CHECK:       @ %bb.0: @ %entry
1681; CHECK-NEXT:    vctp.32 r0
1682; CHECK-NEXT:    vpst
1683; CHECK-NEXT:    vsubt.f32 q1, q0, q1
1684; CHECK-NEXT:    vmov q0, q1
1685; CHECK-NEXT:    bx lr
1686entry:
1687  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1688  %a = fsub <4 x float> %x, %y
1689  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1690  ret <4 x float> %b
1691}
1692
1693define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1694; CHECK-LABEL: fsub_v8f16_y:
1695; CHECK:       @ %bb.0: @ %entry
1696; CHECK-NEXT:    vctp.16 r0
1697; CHECK-NEXT:    vpst
1698; CHECK-NEXT:    vsubt.f16 q1, q0, q1
1699; CHECK-NEXT:    vmov q0, q1
1700; CHECK-NEXT:    bx lr
1701entry:
1702  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1703  %a = fsub <8 x half> %x, %y
1704  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1705  ret <8 x half> %b
1706}
1707
1708define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1709; CHECK-LABEL: fmul_v4f32_y:
1710; CHECK:       @ %bb.0: @ %entry
1711; CHECK-NEXT:    vctp.32 r0
1712; CHECK-NEXT:    vpst
1713; CHECK-NEXT:    vmult.f32 q1, q0, q1
1714; CHECK-NEXT:    vmov q0, q1
1715; CHECK-NEXT:    bx lr
1716entry:
1717  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1718  %a = fmul <4 x float> %x, %y
1719  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y
1720  ret <4 x float> %b
1721}
1722
1723define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1724; CHECK-LABEL: fmul_v8f16_y:
1725; CHECK:       @ %bb.0: @ %entry
1726; CHECK-NEXT:    vctp.16 r0
1727; CHECK-NEXT:    vpst
1728; CHECK-NEXT:    vmult.f16 q1, q0, q1
1729; CHECK-NEXT:    vmov q0, q1
1730; CHECK-NEXT:    bx lr
1731entry:
1732  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1733  %a = fmul <8 x half> %x, %y
1734  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y
1735  ret <8 x half> %b
1736}
1737
1738define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1739; CHECK-LABEL: icmp_slt_v4i32_y:
1740; CHECK:       @ %bb.0: @ %entry
1741; CHECK-NEXT:    vctp.32 r0
1742; CHECK-NEXT:    vpstt
1743; CHECK-NEXT:    vcmpt.s32 gt, q1, q0
1744; CHECK-NEXT:    vmovt q1, q0
1745; CHECK-NEXT:    vmov q0, q1
1746; CHECK-NEXT:    bx lr
1747entry:
1748  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1749  %a1 = icmp slt <4 x i32> %x, %y
1750  %0 = and <4 x i1> %c, %a1
1751  %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
1752  ret <4 x i32> %b
1753}
1754
1755define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1756; CHECK-LABEL: icmp_slt_v8i16_y:
1757; CHECK:       @ %bb.0: @ %entry
1758; CHECK-NEXT:    vctp.16 r0
1759; CHECK-NEXT:    vpstt
1760; CHECK-NEXT:    vcmpt.s16 gt, q1, q0
1761; CHECK-NEXT:    vmovt q1, q0
1762; CHECK-NEXT:    vmov q0, q1
1763; CHECK-NEXT:    bx lr
1764entry:
1765  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1766  %a1 = icmp slt <8 x i16> %x, %y
1767  %0 = and <8 x i1> %c, %a1
1768  %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
1769  ret <8 x i16> %b
1770}
1771
1772define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1773; CHECK-LABEL: icmp_slt_v16i8_y:
1774; CHECK:       @ %bb.0: @ %entry
1775; CHECK-NEXT:    vctp.8 r0
1776; CHECK-NEXT:    vpstt
1777; CHECK-NEXT:    vcmpt.s8 gt, q1, q0
1778; CHECK-NEXT:    vmovt q1, q0
1779; CHECK-NEXT:    vmov q0, q1
1780; CHECK-NEXT:    bx lr
1781entry:
1782  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1783  %a1 = icmp slt <16 x i8> %x, %y
1784  %0 = and <16 x i1> %c, %a1
1785  %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
1786  ret <16 x i8> %b
1787}
1788
1789define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1790; CHECK-LABEL: icmp_sgt_v4i32_y:
1791; CHECK:       @ %bb.0: @ %entry
1792; CHECK-NEXT:    vctp.32 r0
1793; CHECK-NEXT:    vpstt
1794; CHECK-NEXT:    vcmpt.s32 gt, q0, q1
1795; CHECK-NEXT:    vmovt q1, q0
1796; CHECK-NEXT:    vmov q0, q1
1797; CHECK-NEXT:    bx lr
1798entry:
1799  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1800  %a1 = icmp sgt <4 x i32> %x, %y
1801  %0 = and <4 x i1> %c, %a1
1802  %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
1803  ret <4 x i32> %b
1804}
1805
1806define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1807; CHECK-LABEL: icmp_sgt_v8i16_y:
1808; CHECK:       @ %bb.0: @ %entry
1809; CHECK-NEXT:    vctp.16 r0
1810; CHECK-NEXT:    vpstt
1811; CHECK-NEXT:    vcmpt.s16 gt, q0, q1
1812; CHECK-NEXT:    vmovt q1, q0
1813; CHECK-NEXT:    vmov q0, q1
1814; CHECK-NEXT:    bx lr
1815entry:
1816  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1817  %a1 = icmp sgt <8 x i16> %x, %y
1818  %0 = and <8 x i1> %c, %a1
1819  %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
1820  ret <8 x i16> %b
1821}
1822
1823define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1824; CHECK-LABEL: icmp_sgt_v16i8_y:
1825; CHECK:       @ %bb.0: @ %entry
1826; CHECK-NEXT:    vctp.8 r0
1827; CHECK-NEXT:    vpstt
1828; CHECK-NEXT:    vcmpt.s8 gt, q0, q1
1829; CHECK-NEXT:    vmovt q1, q0
1830; CHECK-NEXT:    vmov q0, q1
1831; CHECK-NEXT:    bx lr
1832entry:
1833  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1834  %a1 = icmp sgt <16 x i8> %x, %y
1835  %0 = and <16 x i1> %c, %a1
1836  %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
1837  ret <16 x i8> %b
1838}
1839
1840define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1841; CHECK-LABEL: icmp_ult_v4i32_y:
1842; CHECK:       @ %bb.0: @ %entry
1843; CHECK-NEXT:    vctp.32 r0
1844; CHECK-NEXT:    vpstt
1845; CHECK-NEXT:    vcmpt.u32 hi, q1, q0
1846; CHECK-NEXT:    vmovt q1, q0
1847; CHECK-NEXT:    vmov q0, q1
1848; CHECK-NEXT:    bx lr
1849entry:
1850  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1851  %a1 = icmp ult <4 x i32> %x, %y
1852  %0 = and <4 x i1> %c, %a1
1853  %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
1854  ret <4 x i32> %b
1855}
1856
1857define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1858; CHECK-LABEL: icmp_ult_v8i16_y:
1859; CHECK:       @ %bb.0: @ %entry
1860; CHECK-NEXT:    vctp.16 r0
1861; CHECK-NEXT:    vpstt
1862; CHECK-NEXT:    vcmpt.u16 hi, q1, q0
1863; CHECK-NEXT:    vmovt q1, q0
1864; CHECK-NEXT:    vmov q0, q1
1865; CHECK-NEXT:    bx lr
1866entry:
1867  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1868  %a1 = icmp ult <8 x i16> %x, %y
1869  %0 = and <8 x i1> %c, %a1
1870  %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
1871  ret <8 x i16> %b
1872}
1873
1874define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1875; CHECK-LABEL: icmp_ult_v16i8_y:
1876; CHECK:       @ %bb.0: @ %entry
1877; CHECK-NEXT:    vctp.8 r0
1878; CHECK-NEXT:    vpstt
1879; CHECK-NEXT:    vcmpt.u8 hi, q1, q0
1880; CHECK-NEXT:    vmovt q1, q0
1881; CHECK-NEXT:    vmov q0, q1
1882; CHECK-NEXT:    bx lr
1883entry:
1884  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1885  %a1 = icmp ult <16 x i8> %x, %y
1886  %0 = and <16 x i1> %c, %a1
1887  %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
1888  ret <16 x i8> %b
1889}
1890
1891define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
1892; CHECK-LABEL: icmp_ugt_v4i32_y:
1893; CHECK:       @ %bb.0: @ %entry
1894; CHECK-NEXT:    vctp.32 r0
1895; CHECK-NEXT:    vpstt
1896; CHECK-NEXT:    vcmpt.u32 hi, q0, q1
1897; CHECK-NEXT:    vmovt q1, q0
1898; CHECK-NEXT:    vmov q0, q1
1899; CHECK-NEXT:    bx lr
1900entry:
1901  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1902  %a1 = icmp ugt <4 x i32> %x, %y
1903  %0 = and <4 x i1> %c, %a1
1904  %b = select <4 x i1> %0, <4 x i32> %x, <4 x i32> %y
1905  ret <4 x i32> %b
1906}
1907
1908define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
1909; CHECK-LABEL: icmp_ugt_v8i16_y:
1910; CHECK:       @ %bb.0: @ %entry
1911; CHECK-NEXT:    vctp.16 r0
1912; CHECK-NEXT:    vpstt
1913; CHECK-NEXT:    vcmpt.u16 hi, q0, q1
1914; CHECK-NEXT:    vmovt q1, q0
1915; CHECK-NEXT:    vmov q0, q1
1916; CHECK-NEXT:    bx lr
1917entry:
1918  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1919  %a1 = icmp ugt <8 x i16> %x, %y
1920  %0 = and <8 x i1> %c, %a1
1921  %b = select <8 x i1> %0, <8 x i16> %x, <8 x i16> %y
1922  ret <8 x i16> %b
1923}
1924
1925define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
1926; CHECK-LABEL: icmp_ugt_v16i8_y:
1927; CHECK:       @ %bb.0: @ %entry
1928; CHECK-NEXT:    vctp.8 r0
1929; CHECK-NEXT:    vpstt
1930; CHECK-NEXT:    vcmpt.u8 hi, q0, q1
1931; CHECK-NEXT:    vmovt q1, q0
1932; CHECK-NEXT:    vmov q0, q1
1933; CHECK-NEXT:    bx lr
1934entry:
1935  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
1936  %a1 = icmp ugt <16 x i8> %x, %y
1937  %0 = and <16 x i1> %c, %a1
1938  %b = select <16 x i1> %0, <16 x i8> %x, <16 x i8> %y
1939  ret <16 x i8> %b
1940}
1941
1942define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1943; CHECK-LABEL: fcmp_fast_olt_v4f32_y:
1944; CHECK:       @ %bb.0: @ %entry
1945; CHECK-NEXT:    vctp.32 r0
1946; CHECK-NEXT:    vpstt
1947; CHECK-NEXT:    vcmpt.f32 gt, q1, q0
1948; CHECK-NEXT:    vmovt q1, q0
1949; CHECK-NEXT:    vmov q0, q1
1950; CHECK-NEXT:    bx lr
1951entry:
1952  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1953  %a1 = fcmp fast olt <4 x float> %x, %y
1954  %0 = and <4 x i1> %c, %a1
1955  %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y
1956  ret <4 x float> %b
1957}
1958
1959define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1960; CHECK-LABEL: fcmp_fast_olt_v8f16_y:
1961; CHECK:       @ %bb.0: @ %entry
1962; CHECK-NEXT:    vctp.16 r0
1963; CHECK-NEXT:    vpstt
1964; CHECK-NEXT:    vcmpt.f16 gt, q1, q0
1965; CHECK-NEXT:    vmovt q1, q0
1966; CHECK-NEXT:    vmov q0, q1
1967; CHECK-NEXT:    bx lr
1968entry:
1969  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
1970  %a1 = fcmp fast olt <8 x half> %x, %y
1971  %0 = and <8 x i1> %c, %a1
1972  %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y
1973  ret <8 x half> %b
1974}
1975
1976define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) {
1977; CHECK-LABEL: fcmp_fast_ogt_v4f32_y:
1978; CHECK:       @ %bb.0: @ %entry
1979; CHECK-NEXT:    vctp.32 r0
1980; CHECK-NEXT:    vpstt
1981; CHECK-NEXT:    vcmpt.f32 gt, q0, q1
1982; CHECK-NEXT:    vmovt q1, q0
1983; CHECK-NEXT:    vmov q0, q1
1984; CHECK-NEXT:    bx lr
1985entry:
1986  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
1987  %a1 = fcmp fast ogt <4 x float> %x, %y
1988  %0 = and <4 x i1> %c, %a1
1989  %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y
1990  ret <4 x float> %b
1991}
1992
1993define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) {
1994; CHECK-LABEL: fcmp_fast_ogt_v8f16_y:
1995; CHECK:       @ %bb.0: @ %entry
1996; CHECK-NEXT:    vctp.16 r0
1997; CHECK-NEXT:    vpstt
1998; CHECK-NEXT:    vcmpt.f16 gt, q0, q1
1999; CHECK-NEXT:    vmovt q1, q0
2000; CHECK-NEXT:    vmov q0, q1
2001; CHECK-NEXT:    bx lr
2002entry:
2003  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2004  %a1 = fcmp fast ogt <8 x half> %x, %y
2005  %0 = and <8 x i1> %c, %a1
2006  %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y
2007  ret <8 x half> %b
2008}
2009
2010define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2011; CHECK-LABEL: sadd_sat_v4i32_y:
2012; CHECK:       @ %bb.0: @ %entry
2013; CHECK-NEXT:    vctp.32 r0
2014; CHECK-NEXT:    vpst
2015; CHECK-NEXT:    vqaddt.s32 q1, q0, q1
2016; CHECK-NEXT:    vmov q0, q1
2017; CHECK-NEXT:    bx lr
2018entry:
2019  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2020  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2021  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2022  ret <4 x i32> %b
2023}
2024
2025define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2026; CHECK-LABEL: sadd_sat_v8i16_y:
2027; CHECK:       @ %bb.0: @ %entry
2028; CHECK-NEXT:    vctp.16 r0
2029; CHECK-NEXT:    vpst
2030; CHECK-NEXT:    vqaddt.s16 q1, q0, q1
2031; CHECK-NEXT:    vmov q0, q1
2032; CHECK-NEXT:    bx lr
2033entry:
2034  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2035  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2036  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2037  ret <8 x i16> %b
2038}
2039
2040define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2041; CHECK-LABEL: sadd_sat_v16i8_y:
2042; CHECK:       @ %bb.0: @ %entry
2043; CHECK-NEXT:    vctp.8 r0
2044; CHECK-NEXT:    vpst
2045; CHECK-NEXT:    vqaddt.s8 q1, q0, q1
2046; CHECK-NEXT:    vmov q0, q1
2047; CHECK-NEXT:    bx lr
2048entry:
2049  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2050  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2051  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2052  ret <16 x i8> %b
2053}
2054
2055define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2056; CHECK-LABEL: uadd_sat_v4i32_y:
2057; CHECK:       @ %bb.0: @ %entry
2058; CHECK-NEXT:    vctp.32 r0
2059; CHECK-NEXT:    vpst
2060; CHECK-NEXT:    vqaddt.u32 q1, q0, q1
2061; CHECK-NEXT:    vmov q0, q1
2062; CHECK-NEXT:    bx lr
2063entry:
2064  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2065  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2066  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2067  ret <4 x i32> %b
2068}
2069
2070define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2071; CHECK-LABEL: uadd_sat_v8i16_y:
2072; CHECK:       @ %bb.0: @ %entry
2073; CHECK-NEXT:    vctp.16 r0
2074; CHECK-NEXT:    vpst
2075; CHECK-NEXT:    vqaddt.u16 q1, q0, q1
2076; CHECK-NEXT:    vmov q0, q1
2077; CHECK-NEXT:    bx lr
2078entry:
2079  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2080  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2081  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2082  ret <8 x i16> %b
2083}
2084
2085define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2086; CHECK-LABEL: uadd_sat_v16i8_y:
2087; CHECK:       @ %bb.0: @ %entry
2088; CHECK-NEXT:    vctp.8 r0
2089; CHECK-NEXT:    vpst
2090; CHECK-NEXT:    vqaddt.u8 q1, q0, q1
2091; CHECK-NEXT:    vmov q0, q1
2092; CHECK-NEXT:    bx lr
2093entry:
2094  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2095  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2096  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2097  ret <16 x i8> %b
2098}
2099
2100define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2101; CHECK-LABEL: ssub_sat_v4i32_y:
2102; CHECK:       @ %bb.0: @ %entry
2103; CHECK-NEXT:    vctp.32 r0
2104; CHECK-NEXT:    vpst
2105; CHECK-NEXT:    vqsubt.s32 q1, q0, q1
2106; CHECK-NEXT:    vmov q0, q1
2107; CHECK-NEXT:    bx lr
2108entry:
2109  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2110  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2111  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2112  ret <4 x i32> %b
2113}
2114
2115define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2116; CHECK-LABEL: ssub_sat_v8i16_y:
2117; CHECK:       @ %bb.0: @ %entry
2118; CHECK-NEXT:    vctp.16 r0
2119; CHECK-NEXT:    vpst
2120; CHECK-NEXT:    vqsubt.s16 q1, q0, q1
2121; CHECK-NEXT:    vmov q0, q1
2122; CHECK-NEXT:    bx lr
2123entry:
2124  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2125  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2126  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2127  ret <8 x i16> %b
2128}
2129
2130define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2131; CHECK-LABEL: ssub_sat_v16i8_y:
2132; CHECK:       @ %bb.0: @ %entry
2133; CHECK-NEXT:    vctp.8 r0
2134; CHECK-NEXT:    vpst
2135; CHECK-NEXT:    vqsubt.s8 q1, q0, q1
2136; CHECK-NEXT:    vmov q0, q1
2137; CHECK-NEXT:    bx lr
2138entry:
2139  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2140  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2141  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2142  ret <16 x i8> %b
2143}
2144
2145define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) {
2146; CHECK-LABEL: usub_sat_v4i32_y:
2147; CHECK:       @ %bb.0: @ %entry
2148; CHECK-NEXT:    vctp.32 r0
2149; CHECK-NEXT:    vpst
2150; CHECK-NEXT:    vqsubt.u32 q1, q0, q1
2151; CHECK-NEXT:    vmov q0, q1
2152; CHECK-NEXT:    bx lr
2153entry:
2154  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2155  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y)
2156  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y
2157  ret <4 x i32> %b
2158}
2159
2160define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) {
2161; CHECK-LABEL: usub_sat_v8i16_y:
2162; CHECK:       @ %bb.0: @ %entry
2163; CHECK-NEXT:    vctp.16 r0
2164; CHECK-NEXT:    vpst
2165; CHECK-NEXT:    vqsubt.u16 q1, q0, q1
2166; CHECK-NEXT:    vmov q0, q1
2167; CHECK-NEXT:    bx lr
2168entry:
2169  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2170  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y)
2171  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y
2172  ret <8 x i16> %b
2173}
2174
2175define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) {
2176; CHECK-LABEL: usub_sat_v16i8_y:
2177; CHECK:       @ %bb.0: @ %entry
2178; CHECK-NEXT:    vctp.8 r0
2179; CHECK-NEXT:    vpst
2180; CHECK-NEXT:    vqsubt.u8 q1, q0, q1
2181; CHECK-NEXT:    vmov q0, q1
2182; CHECK-NEXT:    bx lr
2183entry:
2184  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2185  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y)
2186  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y
2187  ret <16 x i8> %b
2188}
2189
2190define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2191; CHECK-LABEL: addqr_v4i32_y:
2192; CHECK:       @ %bb.0: @ %entry
2193; CHECK-NEXT:    vdup.32 q1, r0
2194; CHECK-NEXT:    vctp.32 r1
2195; CHECK-NEXT:    vpst
2196; CHECK-NEXT:    vaddt.i32 q1, q1, q0
2197; CHECK-NEXT:    vmov q0, q1
2198; CHECK-NEXT:    bx lr
2199entry:
2200  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2201  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2202  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2203  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer
2204  %b = add <4 x i32> %ys, %a
2205  ret <4 x i32> %b
2206}
2207
2208define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2209; CHECK-LABEL: addqr_v8i16_y:
2210; CHECK:       @ %bb.0: @ %entry
2211; CHECK-NEXT:    vdup.16 q1, r0
2212; CHECK-NEXT:    vctp.16 r1
2213; CHECK-NEXT:    vpst
2214; CHECK-NEXT:    vaddt.i16 q1, q1, q0
2215; CHECK-NEXT:    vmov q0, q1
2216; CHECK-NEXT:    bx lr
2217entry:
2218  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2219  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2220  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2221  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer
2222  %b = add <8 x i16> %ys, %a
2223  ret <8 x i16> %b
2224}
2225
2226define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2227; CHECK-LABEL: addqr_v16i8_y:
2228; CHECK:       @ %bb.0: @ %entry
2229; CHECK-NEXT:    vdup.8 q1, r0
2230; CHECK-NEXT:    vctp.8 r1
2231; CHECK-NEXT:    vpst
2232; CHECK-NEXT:    vaddt.i8 q1, q1, q0
2233; CHECK-NEXT:    vmov q0, q1
2234; CHECK-NEXT:    bx lr
2235entry:
2236  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2237  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2238  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2239  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer
2240  %b = add <16 x i8> %ys, %a
2241  ret <16 x i8> %b
2242}
2243
2244define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2245; CHECK-LABEL: subqr_v4i32_y:
2246; CHECK:       @ %bb.0: @ %entry
2247; CHECK-NEXT:    vdup.32 q1, r0
2248; CHECK-NEXT:    vctp.32 r1
2249; CHECK-NEXT:    vpst
2250; CHECK-NEXT:    vsubt.i32 q1, q0, r0
2251; CHECK-NEXT:    vmov q0, q1
2252; CHECK-NEXT:    bx lr
2253entry:
2254  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2255  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2256  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2257  %a = sub <4 x i32> %x, %ys
2258  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2259  ret <4 x i32> %b
2260}
2261
2262define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2263; CHECK-LABEL: subqr_v8i16_y:
2264; CHECK:       @ %bb.0: @ %entry
2265; CHECK-NEXT:    vdup.16 q1, r0
2266; CHECK-NEXT:    vctp.16 r1
2267; CHECK-NEXT:    vpst
2268; CHECK-NEXT:    vsubt.i16 q1, q0, r0
2269; CHECK-NEXT:    vmov q0, q1
2270; CHECK-NEXT:    bx lr
2271entry:
2272  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2273  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2274  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2275  %a = sub <8 x i16> %x, %ys
2276  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2277  ret <8 x i16> %b
2278}
2279
2280define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2281; CHECK-LABEL: subqr_v16i8_y:
2282; CHECK:       @ %bb.0: @ %entry
2283; CHECK-NEXT:    vdup.8 q1, r0
2284; CHECK-NEXT:    vctp.8 r1
2285; CHECK-NEXT:    vpst
2286; CHECK-NEXT:    vsubt.i8 q1, q0, r0
2287; CHECK-NEXT:    vmov q0, q1
2288; CHECK-NEXT:    bx lr
2289entry:
2290  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2291  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2292  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2293  %a = sub <16 x i8> %x, %ys
2294  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2295  ret <16 x i8> %b
2296}
2297
2298define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2299; CHECK-LABEL: mulqr_v4i32_y:
2300; CHECK:       @ %bb.0: @ %entry
2301; CHECK-NEXT:    vdup.32 q1, r0
2302; CHECK-NEXT:    vctp.32 r1
2303; CHECK-NEXT:    vpst
2304; CHECK-NEXT:    vmult.i32 q1, q1, q0
2305; CHECK-NEXT:    vmov q0, q1
2306; CHECK-NEXT:    bx lr
2307entry:
2308  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2309  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2310  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2311  %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2312  %b = mul <4 x i32> %ys, %a
2313  ret <4 x i32> %b
2314}
2315
2316define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2317; CHECK-LABEL: mulqr_v8i16_y:
2318; CHECK:       @ %bb.0: @ %entry
2319; CHECK-NEXT:    vdup.16 q1, r0
2320; CHECK-NEXT:    vctp.16 r1
2321; CHECK-NEXT:    vpst
2322; CHECK-NEXT:    vmult.i16 q1, q1, q0
2323; CHECK-NEXT:    vmov q0, q1
2324; CHECK-NEXT:    bx lr
2325entry:
2326  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2327  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2328  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2329  %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
2330  %b = mul <8 x i16> %ys, %a
2331  ret <8 x i16> %b
2332}
2333
2334define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2335; CHECK-LABEL: mulqr_v16i8_y:
2336; CHECK:       @ %bb.0: @ %entry
2337; CHECK-NEXT:    vdup.8 q1, r0
2338; CHECK-NEXT:    vctp.8 r1
2339; CHECK-NEXT:    vpst
2340; CHECK-NEXT:    vmult.i8 q1, q1, q0
2341; CHECK-NEXT:    vmov q0, q1
2342; CHECK-NEXT:    bx lr
2343entry:
2344  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2345  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2346  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2347  %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
2348  %b = mul <16 x i8> %ys, %a
2349  ret <16 x i8> %b
2350}
2351
2352define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2353; CHECK-LABEL: faddqr_v4f32_y:
2354; CHECK:       @ %bb.0: @ %entry
2355; CHECK-NEXT:    vmov r1, s4
2356; CHECK-NEXT:    vctp.32 r0
2357; CHECK-NEXT:    vdup.32 q1, r1
2358; CHECK-NEXT:    vpst
2359; CHECK-NEXT:    vaddt.f32 q1, q0, r1
2360; CHECK-NEXT:    vmov q0, q1
2361; CHECK-NEXT:    bx lr
2362entry:
2363  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2364  %i = insertelement <4 x float> undef, float %y, i32 0
2365  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2366  %a = fadd <4 x float> %ys, %x
2367  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2368  ret <4 x float> %b
2369}
2370
2371define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2372; CHECK-LABEL: faddqr_v8f16_y:
2373; CHECK:       @ %bb.0: @ %entry
2374; CHECK-NEXT:    vmov.f16 r1, s4
2375; CHECK-NEXT:    vctp.16 r0
2376; CHECK-NEXT:    vdup.16 q1, r1
2377; CHECK-NEXT:    vpst
2378; CHECK-NEXT:    vaddt.f16 q1, q0, r1
2379; CHECK-NEXT:    vmov q0, q1
2380; CHECK-NEXT:    bx lr
2381entry:
2382  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2383  %i = insertelement <8 x half> undef, half %y, i32 0
2384  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2385  %a = fadd <8 x half> %ys, %x
2386  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2387  ret <8 x half> %b
2388}
2389
2390define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2391; CHECK-LABEL: fsubqr_v4f32_y:
2392; CHECK:       @ %bb.0: @ %entry
2393; CHECK-NEXT:    vmov r1, s4
2394; CHECK-NEXT:    vctp.32 r0
2395; CHECK-NEXT:    vdup.32 q1, r1
2396; CHECK-NEXT:    vpst
2397; CHECK-NEXT:    vsubt.f32 q1, q0, r1
2398; CHECK-NEXT:    vmov q0, q1
2399; CHECK-NEXT:    bx lr
2400entry:
2401  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2402  %i = insertelement <4 x float> undef, float %y, i32 0
2403  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2404  %a = fsub <4 x float> %x, %ys
2405  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2406  ret <4 x float> %b
2407}
2408
2409define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2410; CHECK-LABEL: fsubqr_v8f16_y:
2411; CHECK:       @ %bb.0: @ %entry
2412; CHECK-NEXT:    vmov.f16 r1, s4
2413; CHECK-NEXT:    vctp.16 r0
2414; CHECK-NEXT:    vdup.16 q1, r1
2415; CHECK-NEXT:    vpst
2416; CHECK-NEXT:    vsubt.f16 q1, q0, r1
2417; CHECK-NEXT:    vmov q0, q1
2418; CHECK-NEXT:    bx lr
2419entry:
2420  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2421  %i = insertelement <8 x half> undef, half %y, i32 0
2422  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2423  %a = fsub <8 x half> %x, %ys
2424  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2425  ret <8 x half> %b
2426}
2427
2428define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) {
2429; CHECK-LABEL: fmulqr_v4f32_y:
2430; CHECK:       @ %bb.0: @ %entry
2431; CHECK-NEXT:    vmov r1, s4
2432; CHECK-NEXT:    vctp.32 r0
2433; CHECK-NEXT:    vdup.32 q1, r1
2434; CHECK-NEXT:    vpst
2435; CHECK-NEXT:    vmult.f32 q1, q0, r1
2436; CHECK-NEXT:    vmov q0, q1
2437; CHECK-NEXT:    bx lr
2438entry:
2439  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2440  %i = insertelement <4 x float> undef, float %y, i32 0
2441  %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
2442  %a = fmul <4 x float> %ys, %x
2443  %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys
2444  ret <4 x float> %b
2445}
2446
2447define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) {
2448; CHECK-LABEL: fmulqr_v8f16_y:
2449; CHECK:       @ %bb.0: @ %entry
2450; CHECK-NEXT:    vmov.f16 r1, s4
2451; CHECK-NEXT:    vctp.16 r0
2452; CHECK-NEXT:    vdup.16 q1, r1
2453; CHECK-NEXT:    vpst
2454; CHECK-NEXT:    vmult.f16 q1, q0, r1
2455; CHECK-NEXT:    vmov q0, q1
2456; CHECK-NEXT:    bx lr
2457entry:
2458  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2459  %i = insertelement <8 x half> undef, half %y, i32 0
2460  %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
2461  %a = fmul <8 x half> %ys, %x
2462  %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys
2463  ret <8 x half> %b
2464}
2465
2466define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2467; CHECK-LABEL: sadd_satqr_v4i32_y:
2468; CHECK:       @ %bb.0: @ %entry
2469; CHECK-NEXT:    vdup.32 q1, r0
2470; CHECK-NEXT:    vctp.32 r1
2471; CHECK-NEXT:    vpst
2472; CHECK-NEXT:    vqaddt.s32 q1, q0, r0
2473; CHECK-NEXT:    vmov q0, q1
2474; CHECK-NEXT:    bx lr
2475entry:
2476  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2477  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2478  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2479  %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2480  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2481  ret <4 x i32> %b
2482}
2483
2484define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2485; CHECK-LABEL: sadd_satqr_v8i16_y:
2486; CHECK:       @ %bb.0: @ %entry
2487; CHECK-NEXT:    vdup.16 q1, r0
2488; CHECK-NEXT:    vctp.16 r1
2489; CHECK-NEXT:    vpst
2490; CHECK-NEXT:    vqaddt.s16 q1, q0, r0
2491; CHECK-NEXT:    vmov q0, q1
2492; CHECK-NEXT:    bx lr
2493entry:
2494  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2495  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2496  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2497  %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2498  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2499  ret <8 x i16> %b
2500}
2501
2502define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2503; CHECK-LABEL: sadd_satqr_v16i8_y:
2504; CHECK:       @ %bb.0: @ %entry
2505; CHECK-NEXT:    vdup.8 q1, r0
2506; CHECK-NEXT:    vctp.8 r1
2507; CHECK-NEXT:    vpst
2508; CHECK-NEXT:    vqaddt.s8 q1, q0, r0
2509; CHECK-NEXT:    vmov q0, q1
2510; CHECK-NEXT:    bx lr
2511entry:
2512  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2513  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2514  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2515  %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2516  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2517  ret <16 x i8> %b
2518}
2519
2520define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2521; CHECK-LABEL: uadd_satqr_v4i32_y:
2522; CHECK:       @ %bb.0: @ %entry
2523; CHECK-NEXT:    vdup.32 q1, r0
2524; CHECK-NEXT:    vctp.32 r1
2525; CHECK-NEXT:    vpst
2526; CHECK-NEXT:    vqaddt.u32 q1, q0, r0
2527; CHECK-NEXT:    vmov q0, q1
2528; CHECK-NEXT:    bx lr
2529entry:
2530  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2531  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2532  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2533  %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2534  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2535  ret <4 x i32> %b
2536}
2537
2538define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2539; CHECK-LABEL: uadd_satqr_v8i16_y:
2540; CHECK:       @ %bb.0: @ %entry
2541; CHECK-NEXT:    vdup.16 q1, r0
2542; CHECK-NEXT:    vctp.16 r1
2543; CHECK-NEXT:    vpst
2544; CHECK-NEXT:    vqaddt.u16 q1, q0, r0
2545; CHECK-NEXT:    vmov q0, q1
2546; CHECK-NEXT:    bx lr
2547entry:
2548  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2549  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2550  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2551  %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2552  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2553  ret <8 x i16> %b
2554}
2555
2556define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2557; CHECK-LABEL: uadd_satqr_v16i8_y:
2558; CHECK:       @ %bb.0: @ %entry
2559; CHECK-NEXT:    vdup.8 q1, r0
2560; CHECK-NEXT:    vctp.8 r1
2561; CHECK-NEXT:    vpst
2562; CHECK-NEXT:    vqaddt.u8 q1, q0, r0
2563; CHECK-NEXT:    vmov q0, q1
2564; CHECK-NEXT:    bx lr
2565entry:
2566  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2567  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2568  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2569  %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2570  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2571  ret <16 x i8> %b
2572}
2573
2574define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2575; CHECK-LABEL: ssub_satqr_v4i32_y:
2576; CHECK:       @ %bb.0: @ %entry
2577; CHECK-NEXT:    vdup.32 q1, r0
2578; CHECK-NEXT:    vctp.32 r1
2579; CHECK-NEXT:    vpst
2580; CHECK-NEXT:    vqsubt.s32 q1, q0, r0
2581; CHECK-NEXT:    vmov q0, q1
2582; CHECK-NEXT:    bx lr
2583entry:
2584  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2585  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2586  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2587  %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2588  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2589  ret <4 x i32> %b
2590}
2591
2592define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2593; CHECK-LABEL: ssub_satqr_v8i16_y:
2594; CHECK:       @ %bb.0: @ %entry
2595; CHECK-NEXT:    vdup.16 q1, r0
2596; CHECK-NEXT:    vctp.16 r1
2597; CHECK-NEXT:    vpst
2598; CHECK-NEXT:    vqsubt.s16 q1, q0, r0
2599; CHECK-NEXT:    vmov q0, q1
2600; CHECK-NEXT:    bx lr
2601entry:
2602  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2603  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2604  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2605  %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2606  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2607  ret <8 x i16> %b
2608}
2609
2610define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2611; CHECK-LABEL: ssub_satqr_v16i8_y:
2612; CHECK:       @ %bb.0: @ %entry
2613; CHECK-NEXT:    vdup.8 q1, r0
2614; CHECK-NEXT:    vctp.8 r1
2615; CHECK-NEXT:    vpst
2616; CHECK-NEXT:    vqsubt.s8 q1, q0, r0
2617; CHECK-NEXT:    vmov q0, q1
2618; CHECK-NEXT:    bx lr
2619entry:
2620  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2621  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2622  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2623  %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2624  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2625  ret <16 x i8> %b
2626}
2627
2628define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) {
2629; CHECK-LABEL: usub_satqr_v4i32_y:
2630; CHECK:       @ %bb.0: @ %entry
2631; CHECK-NEXT:    vdup.32 q1, r0
2632; CHECK-NEXT:    vctp.32 r1
2633; CHECK-NEXT:    vpst
2634; CHECK-NEXT:    vqsubt.u32 q1, q0, r0
2635; CHECK-NEXT:    vmov q0, q1
2636; CHECK-NEXT:    bx lr
2637entry:
2638  %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n)
2639  %i = insertelement <4 x i32> undef, i32 %y, i32 0
2640  %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer
2641  %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys)
2642  %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys
2643  ret <4 x i32> %b
2644}
2645
2646define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) {
2647; CHECK-LABEL: usub_satqr_v8i16_y:
2648; CHECK:       @ %bb.0: @ %entry
2649; CHECK-NEXT:    vdup.16 q1, r0
2650; CHECK-NEXT:    vctp.16 r1
2651; CHECK-NEXT:    vpst
2652; CHECK-NEXT:    vqsubt.u16 q1, q0, r0
2653; CHECK-NEXT:    vmov q0, q1
2654; CHECK-NEXT:    bx lr
2655entry:
2656  %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n)
2657  %i = insertelement <8 x i16> undef, i16 %y, i32 0
2658  %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer
2659  %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys)
2660  %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys
2661  ret <8 x i16> %b
2662}
2663
2664define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) {
2665; CHECK-LABEL: usub_satqr_v16i8_y:
2666; CHECK:       @ %bb.0: @ %entry
2667; CHECK-NEXT:    vdup.8 q1, r0
2668; CHECK-NEXT:    vctp.8 r1
2669; CHECK-NEXT:    vpst
2670; CHECK-NEXT:    vqsubt.u8 q1, q0, r0
2671; CHECK-NEXT:    vmov q0, q1
2672; CHECK-NEXT:    bx lr
2673entry:
2674  %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n)
2675  %i = insertelement <16 x i8> undef, i8 %y, i32 0
2676  %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer
2677  %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys)
2678  %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys
2679  ret <16 x i8> %b
2680}
2681
2682declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>)
2683declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>)
2684declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
2685declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>)
2686declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>)
2687declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
2688declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>)
2689declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>)
2690declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
2691declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>)
2692declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>)
2693declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
2694
2695declare <16 x i1> @llvm.arm.mve.vctp8(i32)
2696declare <8 x i1> @llvm.arm.mve.vctp16(i32)
2697declare <4 x i1> @llvm.arm.mve.vctp32(i32)
2698