1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
3
4define arm_aapcs_vfpcc i32 @and_v2i32(<2 x i32> %x) {
5; CHECK-LABEL: and_v2i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    vmov r0, s2
8; CHECK-NEXT:    vmov r1, s0
9; CHECK-NEXT:    ands r0, r1
10; CHECK-NEXT:    bx lr
11entry:
12  %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x)
13  ret i32 %z
14}
15
16define arm_aapcs_vfpcc i32 @and_v4i32(<4 x i32> %x) {
17; CHECK-LABEL: and_v4i32:
18; CHECK:       @ %bb.0: @ %entry
19; CHECK-NEXT:    vmov r0, s3
20; CHECK-NEXT:    vmov r1, s2
21; CHECK-NEXT:    vmov r2, s0
22; CHECK-NEXT:    ands r0, r1
23; CHECK-NEXT:    vmov r1, s1
24; CHECK-NEXT:    ands r1, r2
25; CHECK-NEXT:    ands r0, r1
26; CHECK-NEXT:    bx lr
27entry:
28  %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x)
29  ret i32 %z
30}
31
32define arm_aapcs_vfpcc i32 @and_v8i32(<8 x i32> %x) {
33; CHECK-LABEL: and_v8i32:
34; CHECK:       @ %bb.0: @ %entry
35; CHECK-NEXT:    vand q0, q0, q1
36; CHECK-NEXT:    vmov r0, s3
37; CHECK-NEXT:    vmov r1, s2
38; CHECK-NEXT:    vmov r2, s0
39; CHECK-NEXT:    ands r0, r1
40; CHECK-NEXT:    vmov r1, s1
41; CHECK-NEXT:    ands r1, r2
42; CHECK-NEXT:    ands r0, r1
43; CHECK-NEXT:    bx lr
44entry:
45  %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x)
46  ret i32 %z
47}
48
49define arm_aapcs_vfpcc i16 @and_v4i16(<4 x i16> %x) {
50; CHECK-LABEL: and_v4i16:
51; CHECK:       @ %bb.0: @ %entry
52; CHECK-NEXT:    vmov r0, s3
53; CHECK-NEXT:    vmov r1, s2
54; CHECK-NEXT:    vmov r2, s0
55; CHECK-NEXT:    ands r0, r1
56; CHECK-NEXT:    vmov r1, s1
57; CHECK-NEXT:    ands r1, r2
58; CHECK-NEXT:    ands r0, r1
59; CHECK-NEXT:    bx lr
60entry:
61  %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x)
62  ret i16 %z
63}
64
65define arm_aapcs_vfpcc i16 @and_v8i16(<8 x i16> %x) {
66; CHECK-LABEL: and_v8i16:
67; CHECK:       @ %bb.0: @ %entry
68; CHECK-NEXT:    vrev32.16 q1, q0
69; CHECK-NEXT:    vand q0, q0, q1
70; CHECK-NEXT:    vmov.u16 r0, q0[6]
71; CHECK-NEXT:    vmov.u16 r1, q0[4]
72; CHECK-NEXT:    ands r0, r1
73; CHECK-NEXT:    vmov.u16 r1, q0[2]
74; CHECK-NEXT:    vmov.u16 r2, q0[0]
75; CHECK-NEXT:    ands r1, r2
76; CHECK-NEXT:    ands r0, r1
77; CHECK-NEXT:    bx lr
78entry:
79  %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x)
80  ret i16 %z
81}
82
83define arm_aapcs_vfpcc i16 @and_v16i16(<16 x i16> %x) {
84; CHECK-LABEL: and_v16i16:
85; CHECK:       @ %bb.0: @ %entry
86; CHECK-NEXT:    vand q0, q0, q1
87; CHECK-NEXT:    vrev32.16 q1, q0
88; CHECK-NEXT:    vand q0, q0, q1
89; CHECK-NEXT:    vmov.u16 r0, q0[6]
90; CHECK-NEXT:    vmov.u16 r1, q0[4]
91; CHECK-NEXT:    ands r0, r1
92; CHECK-NEXT:    vmov.u16 r1, q0[2]
93; CHECK-NEXT:    vmov.u16 r2, q0[0]
94; CHECK-NEXT:    ands r1, r2
95; CHECK-NEXT:    ands r0, r1
96; CHECK-NEXT:    bx lr
97entry:
98  %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x)
99  ret i16 %z
100}
101
102define arm_aapcs_vfpcc i8 @and_v8i8(<8 x i8> %x) {
103; CHECK-LABEL: and_v8i8:
104; CHECK:       @ %bb.0: @ %entry
105; CHECK-NEXT:    vrev32.16 q1, q0
106; CHECK-NEXT:    vand q0, q0, q1
107; CHECK-NEXT:    vmov.u16 r0, q0[6]
108; CHECK-NEXT:    vmov.u16 r1, q0[4]
109; CHECK-NEXT:    ands r0, r1
110; CHECK-NEXT:    vmov.u16 r1, q0[2]
111; CHECK-NEXT:    vmov.u16 r2, q0[0]
112; CHECK-NEXT:    ands r1, r2
113; CHECK-NEXT:    ands r0, r1
114; CHECK-NEXT:    bx lr
115entry:
116  %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x)
117  ret i8 %z
118}
119
120define arm_aapcs_vfpcc i8 @and_v16i8(<16 x i8> %x) {
121; CHECK-LABEL: and_v16i8:
122; CHECK:       @ %bb.0: @ %entry
123; CHECK-NEXT:    vrev16.8 q1, q0
124; CHECK-NEXT:    vand q0, q0, q1
125; CHECK-NEXT:    vrev32.8 q1, q0
126; CHECK-NEXT:    vand q0, q0, q1
127; CHECK-NEXT:    vmov.u8 r0, q0[12]
128; CHECK-NEXT:    vmov.u8 r1, q0[8]
129; CHECK-NEXT:    ands r0, r1
130; CHECK-NEXT:    vmov.u8 r1, q0[4]
131; CHECK-NEXT:    vmov.u8 r2, q0[0]
132; CHECK-NEXT:    ands r1, r2
133; CHECK-NEXT:    ands r0, r1
134; CHECK-NEXT:    bx lr
135entry:
136  %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x)
137  ret i8 %z
138}
139
140define arm_aapcs_vfpcc i8 @and_v32i8(<32 x i8> %x) {
141; CHECK-LABEL: and_v32i8:
142; CHECK:       @ %bb.0: @ %entry
143; CHECK-NEXT:    vand q0, q0, q1
144; CHECK-NEXT:    vrev16.8 q1, q0
145; CHECK-NEXT:    vand q0, q0, q1
146; CHECK-NEXT:    vrev32.8 q1, q0
147; CHECK-NEXT:    vand q0, q0, q1
148; CHECK-NEXT:    vmov.u8 r0, q0[12]
149; CHECK-NEXT:    vmov.u8 r1, q0[8]
150; CHECK-NEXT:    ands r0, r1
151; CHECK-NEXT:    vmov.u8 r1, q0[4]
152; CHECK-NEXT:    vmov.u8 r2, q0[0]
153; CHECK-NEXT:    ands r1, r2
154; CHECK-NEXT:    ands r0, r1
155; CHECK-NEXT:    bx lr
156entry:
157  %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x)
158  ret i8 %z
159}
160
161define arm_aapcs_vfpcc i64 @and_v1i64(<1 x i64> %x) {
162; CHECK-LABEL: and_v1i64:
163; CHECK:       @ %bb.0: @ %entry
164; CHECK-NEXT:    bx lr
165entry:
166  %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x)
167  ret i64 %z
168}
169
170define arm_aapcs_vfpcc i64 @and_v2i64(<2 x i64> %x) {
171; CHECK-LABEL: and_v2i64:
172; CHECK:       @ %bb.0: @ %entry
173; CHECK-NEXT:    vmov r0, s2
174; CHECK-NEXT:    vmov r1, s0
175; CHECK-NEXT:    vmov r2, s1
176; CHECK-NEXT:    ands r0, r1
177; CHECK-NEXT:    vmov r1, s3
178; CHECK-NEXT:    ands r1, r2
179; CHECK-NEXT:    bx lr
180entry:
181  %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x)
182  ret i64 %z
183}
184
185define arm_aapcs_vfpcc i64 @and_v4i64(<4 x i64> %x) {
186; CHECK-LABEL: and_v4i64:
187; CHECK:       @ %bb.0: @ %entry
188; CHECK-NEXT:    vand q0, q0, q1
189; CHECK-NEXT:    vmov r0, s2
190; CHECK-NEXT:    vmov r1, s0
191; CHECK-NEXT:    vmov r2, s1
192; CHECK-NEXT:    ands r0, r1
193; CHECK-NEXT:    vmov r1, s3
194; CHECK-NEXT:    ands r1, r2
195; CHECK-NEXT:    bx lr
196entry:
197  %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x)
198  ret i64 %z
199}
200
201define arm_aapcs_vfpcc i32 @and_v2i32_acc(<2 x i32> %x, i32 %y) {
202; CHECK-LABEL: and_v2i32_acc:
203; CHECK:       @ %bb.0: @ %entry
204; CHECK-NEXT:    vmov r1, s2
205; CHECK-NEXT:    vmov r2, s0
206; CHECK-NEXT:    ands r1, r2
207; CHECK-NEXT:    ands r0, r1
208; CHECK-NEXT:    bx lr
209entry:
210  %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x)
211  %r = and i32 %y, %z
212  ret i32 %r
213}
214
215define arm_aapcs_vfpcc i32 @and_v4i32_acc(<4 x i32> %x, i32 %y) {
216; CHECK-LABEL: and_v4i32_acc:
217; CHECK:       @ %bb.0: @ %entry
218; CHECK-NEXT:    vmov r1, s3
219; CHECK-NEXT:    vmov r2, s2
220; CHECK-NEXT:    vmov r3, s0
221; CHECK-NEXT:    ands r1, r2
222; CHECK-NEXT:    vmov r2, s1
223; CHECK-NEXT:    ands r2, r3
224; CHECK-NEXT:    ands r1, r2
225; CHECK-NEXT:    ands r0, r1
226; CHECK-NEXT:    bx lr
227entry:
228  %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x)
229  %r = and i32 %y, %z
230  ret i32 %r
231}
232
233define arm_aapcs_vfpcc i32 @and_v8i32_acc(<8 x i32> %x, i32 %y) {
234; CHECK-LABEL: and_v8i32_acc:
235; CHECK:       @ %bb.0: @ %entry
236; CHECK-NEXT:    vand q0, q0, q1
237; CHECK-NEXT:    vmov r1, s3
238; CHECK-NEXT:    vmov r2, s2
239; CHECK-NEXT:    vmov r3, s0
240; CHECK-NEXT:    ands r1, r2
241; CHECK-NEXT:    vmov r2, s1
242; CHECK-NEXT:    ands r2, r3
243; CHECK-NEXT:    ands r1, r2
244; CHECK-NEXT:    ands r0, r1
245; CHECK-NEXT:    bx lr
246entry:
247  %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x)
248  %r = and i32 %y, %z
249  ret i32 %r
250}
251
252define arm_aapcs_vfpcc i16 @and_v4i16_acc(<4 x i16> %x, i16 %y) {
253; CHECK-LABEL: and_v4i16_acc:
254; CHECK:       @ %bb.0: @ %entry
255; CHECK-NEXT:    vmov r1, s3
256; CHECK-NEXT:    vmov r2, s2
257; CHECK-NEXT:    vmov r3, s0
258; CHECK-NEXT:    ands r1, r2
259; CHECK-NEXT:    vmov r2, s1
260; CHECK-NEXT:    ands r2, r3
261; CHECK-NEXT:    ands r1, r2
262; CHECK-NEXT:    ands r0, r1
263; CHECK-NEXT:    bx lr
264entry:
265  %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x)
266  %r = and i16 %y, %z
267  ret i16 %r
268}
269
270define arm_aapcs_vfpcc i16 @and_v8i16_acc(<8 x i16> %x, i16 %y) {
271; CHECK-LABEL: and_v8i16_acc:
272; CHECK:       @ %bb.0: @ %entry
273; CHECK-NEXT:    vrev32.16 q1, q0
274; CHECK-NEXT:    vand q0, q0, q1
275; CHECK-NEXT:    vmov.u16 r1, q0[6]
276; CHECK-NEXT:    vmov.u16 r2, q0[4]
277; CHECK-NEXT:    ands r1, r2
278; CHECK-NEXT:    vmov.u16 r2, q0[2]
279; CHECK-NEXT:    vmov.u16 r3, q0[0]
280; CHECK-NEXT:    ands r2, r3
281; CHECK-NEXT:    ands r1, r2
282; CHECK-NEXT:    ands r0, r1
283; CHECK-NEXT:    bx lr
284entry:
285  %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x)
286  %r = and i16 %y, %z
287  ret i16 %r
288}
289
290define arm_aapcs_vfpcc i16 @and_v16i16_acc(<16 x i16> %x, i16 %y) {
291; CHECK-LABEL: and_v16i16_acc:
292; CHECK:       @ %bb.0: @ %entry
293; CHECK-NEXT:    vand q0, q0, q1
294; CHECK-NEXT:    vrev32.16 q1, q0
295; CHECK-NEXT:    vand q0, q0, q1
296; CHECK-NEXT:    vmov.u16 r1, q0[6]
297; CHECK-NEXT:    vmov.u16 r2, q0[4]
298; CHECK-NEXT:    ands r1, r2
299; CHECK-NEXT:    vmov.u16 r2, q0[2]
300; CHECK-NEXT:    vmov.u16 r3, q0[0]
301; CHECK-NEXT:    ands r2, r3
302; CHECK-NEXT:    ands r1, r2
303; CHECK-NEXT:    ands r0, r1
304; CHECK-NEXT:    bx lr
305entry:
306  %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x)
307  %r = and i16 %y, %z
308  ret i16 %r
309}
310
311define arm_aapcs_vfpcc i8 @and_v8i8_acc(<8 x i8> %x, i8 %y) {
312; CHECK-LABEL: and_v8i8_acc:
313; CHECK:       @ %bb.0: @ %entry
314; CHECK-NEXT:    vrev32.16 q1, q0
315; CHECK-NEXT:    vand q0, q0, q1
316; CHECK-NEXT:    vmov.u16 r1, q0[6]
317; CHECK-NEXT:    vmov.u16 r2, q0[4]
318; CHECK-NEXT:    ands r1, r2
319; CHECK-NEXT:    vmov.u16 r2, q0[2]
320; CHECK-NEXT:    vmov.u16 r3, q0[0]
321; CHECK-NEXT:    ands r2, r3
322; CHECK-NEXT:    ands r1, r2
323; CHECK-NEXT:    ands r0, r1
324; CHECK-NEXT:    bx lr
325entry:
326  %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x)
327  %r = and i8 %y, %z
328  ret i8 %r
329}
330
331define arm_aapcs_vfpcc i8 @and_v16i8_acc(<16 x i8> %x, i8 %y) {
332; CHECK-LABEL: and_v16i8_acc:
333; CHECK:       @ %bb.0: @ %entry
334; CHECK-NEXT:    vrev16.8 q1, q0
335; CHECK-NEXT:    vand q0, q0, q1
336; CHECK-NEXT:    vrev32.8 q1, q0
337; CHECK-NEXT:    vand q0, q0, q1
338; CHECK-NEXT:    vmov.u8 r1, q0[12]
339; CHECK-NEXT:    vmov.u8 r2, q0[8]
340; CHECK-NEXT:    ands r1, r2
341; CHECK-NEXT:    vmov.u8 r2, q0[4]
342; CHECK-NEXT:    vmov.u8 r3, q0[0]
343; CHECK-NEXT:    ands r2, r3
344; CHECK-NEXT:    ands r1, r2
345; CHECK-NEXT:    ands r0, r1
346; CHECK-NEXT:    bx lr
347entry:
348  %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x)
349  %r = and i8 %y, %z
350  ret i8 %r
351}
352
353define arm_aapcs_vfpcc i8 @and_v32i8_acc(<32 x i8> %x, i8 %y) {
354; CHECK-LABEL: and_v32i8_acc:
355; CHECK:       @ %bb.0: @ %entry
356; CHECK-NEXT:    vand q0, q0, q1
357; CHECK-NEXT:    vrev16.8 q1, q0
358; CHECK-NEXT:    vand q0, q0, q1
359; CHECK-NEXT:    vrev32.8 q1, q0
360; CHECK-NEXT:    vand q0, q0, q1
361; CHECK-NEXT:    vmov.u8 r1, q0[12]
362; CHECK-NEXT:    vmov.u8 r2, q0[8]
363; CHECK-NEXT:    ands r1, r2
364; CHECK-NEXT:    vmov.u8 r2, q0[4]
365; CHECK-NEXT:    vmov.u8 r3, q0[0]
366; CHECK-NEXT:    ands r2, r3
367; CHECK-NEXT:    ands r1, r2
368; CHECK-NEXT:    ands r0, r1
369; CHECK-NEXT:    bx lr
370entry:
371  %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x)
372  %r = and i8 %y, %z
373  ret i8 %r
374}
375
376define arm_aapcs_vfpcc i64 @and_v1i64_acc(<1 x i64> %x, i64 %y) {
377; CHECK-LABEL: and_v1i64_acc:
378; CHECK:       @ %bb.0: @ %entry
379; CHECK-NEXT:    ands r0, r2
380; CHECK-NEXT:    ands r1, r3
381; CHECK-NEXT:    bx lr
382entry:
383  %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x)
384  %r = and i64 %y, %z
385  ret i64 %r
386}
387
388define arm_aapcs_vfpcc i64 @and_v2i64_acc(<2 x i64> %x, i64 %y) {
389; CHECK-LABEL: and_v2i64_acc:
390; CHECK:       @ %bb.0: @ %entry
391; CHECK-NEXT:    vmov r2, s2
392; CHECK-NEXT:    vmov r3, s0
393; CHECK-NEXT:    ands r2, r3
394; CHECK-NEXT:    vmov r3, s1
395; CHECK-NEXT:    ands r0, r2
396; CHECK-NEXT:    vmov r2, s3
397; CHECK-NEXT:    ands r2, r3
398; CHECK-NEXT:    ands r1, r2
399; CHECK-NEXT:    bx lr
400entry:
401  %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x)
402  %r = and i64 %y, %z
403  ret i64 %r
404}
405
406define arm_aapcs_vfpcc i64 @and_v4i64_acc(<4 x i64> %x, i64 %y) {
407; CHECK-LABEL: and_v4i64_acc:
408; CHECK:       @ %bb.0: @ %entry
409; CHECK-NEXT:    vand q0, q0, q1
410; CHECK-NEXT:    vmov r2, s2
411; CHECK-NEXT:    vmov r3, s0
412; CHECK-NEXT:    ands r2, r3
413; CHECK-NEXT:    vmov r3, s1
414; CHECK-NEXT:    ands r0, r2
415; CHECK-NEXT:    vmov r2, s3
416; CHECK-NEXT:    ands r2, r3
417; CHECK-NEXT:    ands r1, r2
418; CHECK-NEXT:    bx lr
419entry:
420  %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x)
421  %r = and i64 %y, %z
422  ret i64 %r
423}
424
425define arm_aapcs_vfpcc i32 @or_v2i32(<2 x i32> %x) {
426; CHECK-LABEL: or_v2i32:
427; CHECK:       @ %bb.0: @ %entry
428; CHECK-NEXT:    vmov r0, s2
429; CHECK-NEXT:    vmov r1, s0
430; CHECK-NEXT:    orrs r0, r1
431; CHECK-NEXT:    bx lr
432entry:
433  %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x)
434  ret i32 %z
435}
436
437define arm_aapcs_vfpcc i32 @or_v4i32(<4 x i32> %x) {
438; CHECK-LABEL: or_v4i32:
439; CHECK:       @ %bb.0: @ %entry
440; CHECK-NEXT:    vmov r0, s3
441; CHECK-NEXT:    vmov r1, s2
442; CHECK-NEXT:    vmov r2, s0
443; CHECK-NEXT:    orrs r0, r1
444; CHECK-NEXT:    vmov r1, s1
445; CHECK-NEXT:    orrs r1, r2
446; CHECK-NEXT:    orrs r0, r1
447; CHECK-NEXT:    bx lr
448entry:
449  %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x)
450  ret i32 %z
451}
452
453define arm_aapcs_vfpcc i32 @or_v8i32(<8 x i32> %x) {
454; CHECK-LABEL: or_v8i32:
455; CHECK:       @ %bb.0: @ %entry
456; CHECK-NEXT:    vorr q0, q0, q1
457; CHECK-NEXT:    vmov r0, s3
458; CHECK-NEXT:    vmov r1, s2
459; CHECK-NEXT:    vmov r2, s0
460; CHECK-NEXT:    orrs r0, r1
461; CHECK-NEXT:    vmov r1, s1
462; CHECK-NEXT:    orrs r1, r2
463; CHECK-NEXT:    orrs r0, r1
464; CHECK-NEXT:    bx lr
465entry:
466  %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x)
467  ret i32 %z
468}
469
470define arm_aapcs_vfpcc i16 @or_v4i16(<4 x i16> %x) {
471; CHECK-LABEL: or_v4i16:
472; CHECK:       @ %bb.0: @ %entry
473; CHECK-NEXT:    vmov r0, s3
474; CHECK-NEXT:    vmov r1, s2
475; CHECK-NEXT:    vmov r2, s0
476; CHECK-NEXT:    orrs r0, r1
477; CHECK-NEXT:    vmov r1, s1
478; CHECK-NEXT:    orrs r1, r2
479; CHECK-NEXT:    orrs r0, r1
480; CHECK-NEXT:    bx lr
481entry:
482  %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x)
483  ret i16 %z
484}
485
486define arm_aapcs_vfpcc i16 @or_v8i16(<8 x i16> %x) {
487; CHECK-LABEL: or_v8i16:
488; CHECK:       @ %bb.0: @ %entry
489; CHECK-NEXT:    vrev32.16 q1, q0
490; CHECK-NEXT:    vorr q0, q0, q1
491; CHECK-NEXT:    vmov.u16 r0, q0[6]
492; CHECK-NEXT:    vmov.u16 r1, q0[4]
493; CHECK-NEXT:    orrs r0, r1
494; CHECK-NEXT:    vmov.u16 r1, q0[2]
495; CHECK-NEXT:    vmov.u16 r2, q0[0]
496; CHECK-NEXT:    orrs r1, r2
497; CHECK-NEXT:    orrs r0, r1
498; CHECK-NEXT:    bx lr
499entry:
500  %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x)
501  ret i16 %z
502}
503
504define arm_aapcs_vfpcc i16 @or_v16i16(<16 x i16> %x) {
505; CHECK-LABEL: or_v16i16:
506; CHECK:       @ %bb.0: @ %entry
507; CHECK-NEXT:    vorr q0, q0, q1
508; CHECK-NEXT:    vrev32.16 q1, q0
509; CHECK-NEXT:    vorr q0, q0, q1
510; CHECK-NEXT:    vmov.u16 r0, q0[6]
511; CHECK-NEXT:    vmov.u16 r1, q0[4]
512; CHECK-NEXT:    orrs r0, r1
513; CHECK-NEXT:    vmov.u16 r1, q0[2]
514; CHECK-NEXT:    vmov.u16 r2, q0[0]
515; CHECK-NEXT:    orrs r1, r2
516; CHECK-NEXT:    orrs r0, r1
517; CHECK-NEXT:    bx lr
518entry:
519  %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x)
520  ret i16 %z
521}
522
523define arm_aapcs_vfpcc i8 @or_v8i8(<8 x i8> %x) {
524; CHECK-LABEL: or_v8i8:
525; CHECK:       @ %bb.0: @ %entry
526; CHECK-NEXT:    vrev32.16 q1, q0
527; CHECK-NEXT:    vorr q0, q0, q1
528; CHECK-NEXT:    vmov.u16 r0, q0[6]
529; CHECK-NEXT:    vmov.u16 r1, q0[4]
530; CHECK-NEXT:    orrs r0, r1
531; CHECK-NEXT:    vmov.u16 r1, q0[2]
532; CHECK-NEXT:    vmov.u16 r2, q0[0]
533; CHECK-NEXT:    orrs r1, r2
534; CHECK-NEXT:    orrs r0, r1
535; CHECK-NEXT:    bx lr
536entry:
537  %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x)
538  ret i8 %z
539}
540
541define arm_aapcs_vfpcc i8 @or_v16i8(<16 x i8> %x) {
542; CHECK-LABEL: or_v16i8:
543; CHECK:       @ %bb.0: @ %entry
544; CHECK-NEXT:    vrev16.8 q1, q0
545; CHECK-NEXT:    vorr q0, q0, q1
546; CHECK-NEXT:    vrev32.8 q1, q0
547; CHECK-NEXT:    vorr q0, q0, q1
548; CHECK-NEXT:    vmov.u8 r0, q0[12]
549; CHECK-NEXT:    vmov.u8 r1, q0[8]
550; CHECK-NEXT:    orrs r0, r1
551; CHECK-NEXT:    vmov.u8 r1, q0[4]
552; CHECK-NEXT:    vmov.u8 r2, q0[0]
553; CHECK-NEXT:    orrs r1, r2
554; CHECK-NEXT:    orrs r0, r1
555; CHECK-NEXT:    bx lr
556entry:
557  %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x)
558  ret i8 %z
559}
560
561define arm_aapcs_vfpcc i8 @or_v32i8(<32 x i8> %x) {
562; CHECK-LABEL: or_v32i8:
563; CHECK:       @ %bb.0: @ %entry
564; CHECK-NEXT:    vorr q0, q0, q1
565; CHECK-NEXT:    vrev16.8 q1, q0
566; CHECK-NEXT:    vorr q0, q0, q1
567; CHECK-NEXT:    vrev32.8 q1, q0
568; CHECK-NEXT:    vorr q0, q0, q1
569; CHECK-NEXT:    vmov.u8 r0, q0[12]
570; CHECK-NEXT:    vmov.u8 r1, q0[8]
571; CHECK-NEXT:    orrs r0, r1
572; CHECK-NEXT:    vmov.u8 r1, q0[4]
573; CHECK-NEXT:    vmov.u8 r2, q0[0]
574; CHECK-NEXT:    orrs r1, r2
575; CHECK-NEXT:    orrs r0, r1
576; CHECK-NEXT:    bx lr
577entry:
578  %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x)
579  ret i8 %z
580}
581
582define arm_aapcs_vfpcc i64 @or_v1i64(<1 x i64> %x) {
583; CHECK-LABEL: or_v1i64:
584; CHECK:       @ %bb.0: @ %entry
585; CHECK-NEXT:    bx lr
586entry:
587  %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x)
588  ret i64 %z
589}
590
591define arm_aapcs_vfpcc i64 @or_v2i64(<2 x i64> %x) {
592; CHECK-LABEL: or_v2i64:
593; CHECK:       @ %bb.0: @ %entry
594; CHECK-NEXT:    vmov r0, s2
595; CHECK-NEXT:    vmov r1, s0
596; CHECK-NEXT:    vmov r2, s1
597; CHECK-NEXT:    orrs r0, r1
598; CHECK-NEXT:    vmov r1, s3
599; CHECK-NEXT:    orrs r1, r2
600; CHECK-NEXT:    bx lr
601entry:
602  %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x)
603  ret i64 %z
604}
605
606define arm_aapcs_vfpcc i64 @or_v4i64(<4 x i64> %x) {
607; CHECK-LABEL: or_v4i64:
608; CHECK:       @ %bb.0: @ %entry
609; CHECK-NEXT:    vorr q0, q0, q1
610; CHECK-NEXT:    vmov r0, s2
611; CHECK-NEXT:    vmov r1, s0
612; CHECK-NEXT:    vmov r2, s1
613; CHECK-NEXT:    orrs r0, r1
614; CHECK-NEXT:    vmov r1, s3
615; CHECK-NEXT:    orrs r1, r2
616; CHECK-NEXT:    bx lr
617entry:
618  %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x)
619  ret i64 %z
620}
621
622define arm_aapcs_vfpcc i32 @or_v2i32_acc(<2 x i32> %x, i32 %y) {
623; CHECK-LABEL: or_v2i32_acc:
624; CHECK:       @ %bb.0: @ %entry
625; CHECK-NEXT:    vmov r1, s2
626; CHECK-NEXT:    vmov r2, s0
627; CHECK-NEXT:    orrs r1, r2
628; CHECK-NEXT:    orrs r0, r1
629; CHECK-NEXT:    bx lr
630entry:
631  %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x)
632  %r = or i32 %y, %z
633  ret i32 %r
634}
635
636define arm_aapcs_vfpcc i32 @or_v4i32_acc(<4 x i32> %x, i32 %y) {
637; CHECK-LABEL: or_v4i32_acc:
638; CHECK:       @ %bb.0: @ %entry
639; CHECK-NEXT:    vmov r1, s3
640; CHECK-NEXT:    vmov r2, s2
641; CHECK-NEXT:    vmov r3, s0
642; CHECK-NEXT:    orrs r1, r2
643; CHECK-NEXT:    vmov r2, s1
644; CHECK-NEXT:    orrs r2, r3
645; CHECK-NEXT:    orrs r1, r2
646; CHECK-NEXT:    orrs r0, r1
647; CHECK-NEXT:    bx lr
648entry:
649  %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x)
650  %r = or i32 %y, %z
651  ret i32 %r
652}
653
654define arm_aapcs_vfpcc i32 @or_v8i32_acc(<8 x i32> %x, i32 %y) {
655; CHECK-LABEL: or_v8i32_acc:
656; CHECK:       @ %bb.0: @ %entry
657; CHECK-NEXT:    vorr q0, q0, q1
658; CHECK-NEXT:    vmov r1, s3
659; CHECK-NEXT:    vmov r2, s2
660; CHECK-NEXT:    vmov r3, s0
661; CHECK-NEXT:    orrs r1, r2
662; CHECK-NEXT:    vmov r2, s1
663; CHECK-NEXT:    orrs r2, r3
664; CHECK-NEXT:    orrs r1, r2
665; CHECK-NEXT:    orrs r0, r1
666; CHECK-NEXT:    bx lr
667entry:
668  %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x)
669  %r = or i32 %y, %z
670  ret i32 %r
671}
672
673define arm_aapcs_vfpcc i16 @or_v4i16_acc(<4 x i16> %x, i16 %y) {
674; CHECK-LABEL: or_v4i16_acc:
675; CHECK:       @ %bb.0: @ %entry
676; CHECK-NEXT:    vmov r1, s3
677; CHECK-NEXT:    vmov r2, s2
678; CHECK-NEXT:    vmov r3, s0
679; CHECK-NEXT:    orrs r1, r2
680; CHECK-NEXT:    vmov r2, s1
681; CHECK-NEXT:    orrs r2, r3
682; CHECK-NEXT:    orrs r1, r2
683; CHECK-NEXT:    orrs r0, r1
684; CHECK-NEXT:    bx lr
685entry:
686  %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x)
687  %r = or i16 %y, %z
688  ret i16 %r
689}
690
691define arm_aapcs_vfpcc i16 @or_v8i16_acc(<8 x i16> %x, i16 %y) {
692; CHECK-LABEL: or_v8i16_acc:
693; CHECK:       @ %bb.0: @ %entry
694; CHECK-NEXT:    vrev32.16 q1, q0
695; CHECK-NEXT:    vorr q0, q0, q1
696; CHECK-NEXT:    vmov.u16 r1, q0[6]
697; CHECK-NEXT:    vmov.u16 r2, q0[4]
698; CHECK-NEXT:    orrs r1, r2
699; CHECK-NEXT:    vmov.u16 r2, q0[2]
700; CHECK-NEXT:    vmov.u16 r3, q0[0]
701; CHECK-NEXT:    orrs r2, r3
702; CHECK-NEXT:    orrs r1, r2
703; CHECK-NEXT:    orrs r0, r1
704; CHECK-NEXT:    bx lr
705entry:
706  %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x)
707  %r = or i16 %y, %z
708  ret i16 %r
709}
710
711define arm_aapcs_vfpcc i16 @or_v16i16_acc(<16 x i16> %x, i16 %y) {
712; CHECK-LABEL: or_v16i16_acc:
713; CHECK:       @ %bb.0: @ %entry
714; CHECK-NEXT:    vorr q0, q0, q1
715; CHECK-NEXT:    vrev32.16 q1, q0
716; CHECK-NEXT:    vorr q0, q0, q1
717; CHECK-NEXT:    vmov.u16 r1, q0[6]
718; CHECK-NEXT:    vmov.u16 r2, q0[4]
719; CHECK-NEXT:    orrs r1, r2
720; CHECK-NEXT:    vmov.u16 r2, q0[2]
721; CHECK-NEXT:    vmov.u16 r3, q0[0]
722; CHECK-NEXT:    orrs r2, r3
723; CHECK-NEXT:    orrs r1, r2
724; CHECK-NEXT:    orrs r0, r1
725; CHECK-NEXT:    bx lr
726entry:
727  %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x)
728  %r = or i16 %y, %z
729  ret i16 %r
730}
731
732define arm_aapcs_vfpcc i8 @or_v8i8_acc(<8 x i8> %x, i8 %y) {
733; CHECK-LABEL: or_v8i8_acc:
734; CHECK:       @ %bb.0: @ %entry
735; CHECK-NEXT:    vrev32.16 q1, q0
736; CHECK-NEXT:    vorr q0, q0, q1
737; CHECK-NEXT:    vmov.u16 r1, q0[6]
738; CHECK-NEXT:    vmov.u16 r2, q0[4]
739; CHECK-NEXT:    orrs r1, r2
740; CHECK-NEXT:    vmov.u16 r2, q0[2]
741; CHECK-NEXT:    vmov.u16 r3, q0[0]
742; CHECK-NEXT:    orrs r2, r3
743; CHECK-NEXT:    orrs r1, r2
744; CHECK-NEXT:    orrs r0, r1
745; CHECK-NEXT:    bx lr
746entry:
747  %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x)
748  %r = or i8 %y, %z
749  ret i8 %r
750}
751
752define arm_aapcs_vfpcc i8 @or_v16i8_acc(<16 x i8> %x, i8 %y) {
753; CHECK-LABEL: or_v16i8_acc:
754; CHECK:       @ %bb.0: @ %entry
755; CHECK-NEXT:    vrev16.8 q1, q0
756; CHECK-NEXT:    vorr q0, q0, q1
757; CHECK-NEXT:    vrev32.8 q1, q0
758; CHECK-NEXT:    vorr q0, q0, q1
759; CHECK-NEXT:    vmov.u8 r1, q0[12]
760; CHECK-NEXT:    vmov.u8 r2, q0[8]
761; CHECK-NEXT:    orrs r1, r2
762; CHECK-NEXT:    vmov.u8 r2, q0[4]
763; CHECK-NEXT:    vmov.u8 r3, q0[0]
764; CHECK-NEXT:    orrs r2, r3
765; CHECK-NEXT:    orrs r1, r2
766; CHECK-NEXT:    orrs r0, r1
767; CHECK-NEXT:    bx lr
768entry:
769  %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x)
770  %r = or i8 %y, %z
771  ret i8 %r
772}
773
774define arm_aapcs_vfpcc i8 @or_v32i8_acc(<32 x i8> %x, i8 %y) {
775; CHECK-LABEL: or_v32i8_acc:
776; CHECK:       @ %bb.0: @ %entry
777; CHECK-NEXT:    vorr q0, q0, q1
778; CHECK-NEXT:    vrev16.8 q1, q0
779; CHECK-NEXT:    vorr q0, q0, q1
780; CHECK-NEXT:    vrev32.8 q1, q0
781; CHECK-NEXT:    vorr q0, q0, q1
782; CHECK-NEXT:    vmov.u8 r1, q0[12]
783; CHECK-NEXT:    vmov.u8 r2, q0[8]
784; CHECK-NEXT:    orrs r1, r2
785; CHECK-NEXT:    vmov.u8 r2, q0[4]
786; CHECK-NEXT:    vmov.u8 r3, q0[0]
787; CHECK-NEXT:    orrs r2, r3
788; CHECK-NEXT:    orrs r1, r2
789; CHECK-NEXT:    orrs r0, r1
790; CHECK-NEXT:    bx lr
791entry:
792  %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x)
793  %r = or i8 %y, %z
794  ret i8 %r
795}
796
797define arm_aapcs_vfpcc i64 @or_v1i64_acc(<1 x i64> %x, i64 %y) {
798; CHECK-LABEL: or_v1i64_acc:
799; CHECK:       @ %bb.0: @ %entry
800; CHECK-NEXT:    orrs r0, r2
801; CHECK-NEXT:    orrs r1, r3
802; CHECK-NEXT:    bx lr
803entry:
804  %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x)
805  %r = or i64 %y, %z
806  ret i64 %r
807}
808
809define arm_aapcs_vfpcc i64 @or_v2i64_acc(<2 x i64> %x, i64 %y) {
810; CHECK-LABEL: or_v2i64_acc:
811; CHECK:       @ %bb.0: @ %entry
812; CHECK-NEXT:    vmov r2, s2
813; CHECK-NEXT:    vmov r3, s0
814; CHECK-NEXT:    orrs r2, r3
815; CHECK-NEXT:    vmov r3, s1
816; CHECK-NEXT:    orrs r0, r2
817; CHECK-NEXT:    vmov r2, s3
818; CHECK-NEXT:    orrs r2, r3
819; CHECK-NEXT:    orrs r1, r2
820; CHECK-NEXT:    bx lr
821entry:
822  %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x)
823  %r = or i64 %y, %z
824  ret i64 %r
825}
826
827define arm_aapcs_vfpcc i64 @or_v4i64_acc(<4 x i64> %x, i64 %y) {
828; CHECK-LABEL: or_v4i64_acc:
829; CHECK:       @ %bb.0: @ %entry
830; CHECK-NEXT:    vorr q0, q0, q1
831; CHECK-NEXT:    vmov r2, s2
832; CHECK-NEXT:    vmov r3, s0
833; CHECK-NEXT:    orrs r2, r3
834; CHECK-NEXT:    vmov r3, s1
835; CHECK-NEXT:    orrs r0, r2
836; CHECK-NEXT:    vmov r2, s3
837; CHECK-NEXT:    orrs r2, r3
838; CHECK-NEXT:    orrs r1, r2
839; CHECK-NEXT:    bx lr
840entry:
841  %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x)
842  %r = or i64 %y, %z
843  ret i64 %r
844}
845
846define arm_aapcs_vfpcc i32 @xor_v2i32(<2 x i32> %x) {
847; CHECK-LABEL: xor_v2i32:
848; CHECK:       @ %bb.0: @ %entry
849; CHECK-NEXT:    vmov r0, s2
850; CHECK-NEXT:    vmov r1, s0
851; CHECK-NEXT:    eors r0, r1
852; CHECK-NEXT:    bx lr
853entry:
854  %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x)
855  ret i32 %z
856}
857
858define arm_aapcs_vfpcc i32 @xor_v4i32(<4 x i32> %x) {
859; CHECK-LABEL: xor_v4i32:
860; CHECK:       @ %bb.0: @ %entry
861; CHECK-NEXT:    vmov r0, s3
862; CHECK-NEXT:    vmov r1, s2
863; CHECK-NEXT:    vmov r2, s0
864; CHECK-NEXT:    eors r0, r1
865; CHECK-NEXT:    vmov r1, s1
866; CHECK-NEXT:    eors r1, r2
867; CHECK-NEXT:    eors r0, r1
868; CHECK-NEXT:    bx lr
869entry:
870  %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x)
871  ret i32 %z
872}
873
874define arm_aapcs_vfpcc i32 @xor_v8i32(<8 x i32> %x) {
875; CHECK-LABEL: xor_v8i32:
876; CHECK:       @ %bb.0: @ %entry
877; CHECK-NEXT:    veor q0, q0, q1
878; CHECK-NEXT:    vmov r0, s3
879; CHECK-NEXT:    vmov r1, s2
880; CHECK-NEXT:    vmov r2, s0
881; CHECK-NEXT:    eors r0, r1
882; CHECK-NEXT:    vmov r1, s1
883; CHECK-NEXT:    eors r1, r2
884; CHECK-NEXT:    eors r0, r1
885; CHECK-NEXT:    bx lr
886entry:
887  %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x)
888  ret i32 %z
889}
890
891define arm_aapcs_vfpcc i16 @xor_v4i16(<4 x i16> %x) {
892; CHECK-LABEL: xor_v4i16:
893; CHECK:       @ %bb.0: @ %entry
894; CHECK-NEXT:    vmov r0, s3
895; CHECK-NEXT:    vmov r1, s2
896; CHECK-NEXT:    vmov r2, s0
897; CHECK-NEXT:    eors r0, r1
898; CHECK-NEXT:    vmov r1, s1
899; CHECK-NEXT:    eors r1, r2
900; CHECK-NEXT:    eors r0, r1
901; CHECK-NEXT:    bx lr
902entry:
903  %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x)
904  ret i16 %z
905}
906
907define arm_aapcs_vfpcc i16 @xor_v8i16(<8 x i16> %x) {
908; CHECK-LABEL: xor_v8i16:
909; CHECK:       @ %bb.0: @ %entry
910; CHECK-NEXT:    vrev32.16 q1, q0
911; CHECK-NEXT:    veor q0, q0, q1
912; CHECK-NEXT:    vmov.u16 r0, q0[6]
913; CHECK-NEXT:    vmov.u16 r1, q0[4]
914; CHECK-NEXT:    eors r0, r1
915; CHECK-NEXT:    vmov.u16 r1, q0[2]
916; CHECK-NEXT:    vmov.u16 r2, q0[0]
917; CHECK-NEXT:    eors r1, r2
918; CHECK-NEXT:    eors r0, r1
919; CHECK-NEXT:    bx lr
920entry:
921  %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x)
922  ret i16 %z
923}
924
925define arm_aapcs_vfpcc i16 @xor_v16i16(<16 x i16> %x) {
926; CHECK-LABEL: xor_v16i16:
927; CHECK:       @ %bb.0: @ %entry
928; CHECK-NEXT:    veor q0, q0, q1
929; CHECK-NEXT:    vrev32.16 q1, q0
930; CHECK-NEXT:    veor q0, q0, q1
931; CHECK-NEXT:    vmov.u16 r0, q0[6]
932; CHECK-NEXT:    vmov.u16 r1, q0[4]
933; CHECK-NEXT:    eors r0, r1
934; CHECK-NEXT:    vmov.u16 r1, q0[2]
935; CHECK-NEXT:    vmov.u16 r2, q0[0]
936; CHECK-NEXT:    eors r1, r2
937; CHECK-NEXT:    eors r0, r1
938; CHECK-NEXT:    bx lr
939entry:
940  %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x)
941  ret i16 %z
942}
943
944define arm_aapcs_vfpcc i8 @xor_v8i8(<8 x i8> %x) {
945; CHECK-LABEL: xor_v8i8:
946; CHECK:       @ %bb.0: @ %entry
947; CHECK-NEXT:    vrev32.16 q1, q0
948; CHECK-NEXT:    veor q0, q0, q1
949; CHECK-NEXT:    vmov.u16 r0, q0[6]
950; CHECK-NEXT:    vmov.u16 r1, q0[4]
951; CHECK-NEXT:    eors r0, r1
952; CHECK-NEXT:    vmov.u16 r1, q0[2]
953; CHECK-NEXT:    vmov.u16 r2, q0[0]
954; CHECK-NEXT:    eors r1, r2
955; CHECK-NEXT:    eors r0, r1
956; CHECK-NEXT:    bx lr
957entry:
958  %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x)
959  ret i8 %z
960}
961
962define arm_aapcs_vfpcc i8 @xor_v16i8(<16 x i8> %x) {
963; CHECK-LABEL: xor_v16i8:
964; CHECK:       @ %bb.0: @ %entry
965; CHECK-NEXT:    vrev16.8 q1, q0
966; CHECK-NEXT:    veor q0, q0, q1
967; CHECK-NEXT:    vrev32.8 q1, q0
968; CHECK-NEXT:    veor q0, q0, q1
969; CHECK-NEXT:    vmov.u8 r0, q0[12]
970; CHECK-NEXT:    vmov.u8 r1, q0[8]
971; CHECK-NEXT:    eors r0, r1
972; CHECK-NEXT:    vmov.u8 r1, q0[4]
973; CHECK-NEXT:    vmov.u8 r2, q0[0]
974; CHECK-NEXT:    eors r1, r2
975; CHECK-NEXT:    eors r0, r1
976; CHECK-NEXT:    bx lr
977entry:
978  %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x)
979  ret i8 %z
980}
981
982define arm_aapcs_vfpcc i8 @xor_v32i8(<32 x i8> %x) {
983; CHECK-LABEL: xor_v32i8:
984; CHECK:       @ %bb.0: @ %entry
985; CHECK-NEXT:    veor q0, q0, q1
986; CHECK-NEXT:    vrev16.8 q1, q0
987; CHECK-NEXT:    veor q0, q0, q1
988; CHECK-NEXT:    vrev32.8 q1, q0
989; CHECK-NEXT:    veor q0, q0, q1
990; CHECK-NEXT:    vmov.u8 r0, q0[12]
991; CHECK-NEXT:    vmov.u8 r1, q0[8]
992; CHECK-NEXT:    eors r0, r1
993; CHECK-NEXT:    vmov.u8 r1, q0[4]
994; CHECK-NEXT:    vmov.u8 r2, q0[0]
995; CHECK-NEXT:    eors r1, r2
996; CHECK-NEXT:    eors r0, r1
997; CHECK-NEXT:    bx lr
998entry:
999  %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x)
1000  ret i8 %z
1001}
1002
1003define arm_aapcs_vfpcc i64 @xor_v1i64(<1 x i64> %x) {
1004; CHECK-LABEL: xor_v1i64:
1005; CHECK:       @ %bb.0: @ %entry
1006; CHECK-NEXT:    bx lr
1007entry:
1008  %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x)
1009  ret i64 %z
1010}
1011
1012define arm_aapcs_vfpcc i64 @xor_v2i64(<2 x i64> %x) {
1013; CHECK-LABEL: xor_v2i64:
1014; CHECK:       @ %bb.0: @ %entry
1015; CHECK-NEXT:    vmov r0, s2
1016; CHECK-NEXT:    vmov r1, s0
1017; CHECK-NEXT:    vmov r2, s1
1018; CHECK-NEXT:    eors r0, r1
1019; CHECK-NEXT:    vmov r1, s3
1020; CHECK-NEXT:    eors r1, r2
1021; CHECK-NEXT:    bx lr
1022entry:
1023  %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x)
1024  ret i64 %z
1025}
1026
1027define arm_aapcs_vfpcc i64 @xor_v4i64(<4 x i64> %x) {
1028; CHECK-LABEL: xor_v4i64:
1029; CHECK:       @ %bb.0: @ %entry
1030; CHECK-NEXT:    veor q0, q0, q1
1031; CHECK-NEXT:    vmov r0, s2
1032; CHECK-NEXT:    vmov r1, s0
1033; CHECK-NEXT:    vmov r2, s1
1034; CHECK-NEXT:    eors r0, r1
1035; CHECK-NEXT:    vmov r1, s3
1036; CHECK-NEXT:    eors r1, r2
1037; CHECK-NEXT:    bx lr
1038entry:
1039  %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x)
1040  ret i64 %z
1041}
1042
1043define arm_aapcs_vfpcc i32 @xor_v2i32_acc(<2 x i32> %x, i32 %y) {
1044; CHECK-LABEL: xor_v2i32_acc:
1045; CHECK:       @ %bb.0: @ %entry
1046; CHECK-NEXT:    vmov r1, s2
1047; CHECK-NEXT:    vmov r2, s0
1048; CHECK-NEXT:    eors r1, r2
1049; CHECK-NEXT:    eors r0, r1
1050; CHECK-NEXT:    bx lr
1051entry:
1052  %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x)
1053  %r = xor i32 %y, %z
1054  ret i32 %r
1055}
1056
1057define arm_aapcs_vfpcc i32 @xor_v4i32_acc(<4 x i32> %x, i32 %y) {
1058; CHECK-LABEL: xor_v4i32_acc:
1059; CHECK:       @ %bb.0: @ %entry
1060; CHECK-NEXT:    vmov r1, s3
1061; CHECK-NEXT:    vmov r2, s2
1062; CHECK-NEXT:    vmov r3, s0
1063; CHECK-NEXT:    eors r1, r2
1064; CHECK-NEXT:    vmov r2, s1
1065; CHECK-NEXT:    eors r2, r3
1066; CHECK-NEXT:    eors r1, r2
1067; CHECK-NEXT:    eors r0, r1
1068; CHECK-NEXT:    bx lr
1069entry:
1070  %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x)
1071  %r = xor i32 %y, %z
1072  ret i32 %r
1073}
1074
1075define arm_aapcs_vfpcc i32 @xor_v8i32_acc(<8 x i32> %x, i32 %y) {
1076; CHECK-LABEL: xor_v8i32_acc:
1077; CHECK:       @ %bb.0: @ %entry
1078; CHECK-NEXT:    veor q0, q0, q1
1079; CHECK-NEXT:    vmov r1, s3
1080; CHECK-NEXT:    vmov r2, s2
1081; CHECK-NEXT:    vmov r3, s0
1082; CHECK-NEXT:    eors r1, r2
1083; CHECK-NEXT:    vmov r2, s1
1084; CHECK-NEXT:    eors r2, r3
1085; CHECK-NEXT:    eors r1, r2
1086; CHECK-NEXT:    eors r0, r1
1087; CHECK-NEXT:    bx lr
1088entry:
1089  %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x)
1090  %r = xor i32 %y, %z
1091  ret i32 %r
1092}
1093
1094define arm_aapcs_vfpcc i16 @xor_v4i16_acc(<4 x i16> %x, i16 %y) {
1095; CHECK-LABEL: xor_v4i16_acc:
1096; CHECK:       @ %bb.0: @ %entry
1097; CHECK-NEXT:    vmov r1, s3
1098; CHECK-NEXT:    vmov r2, s2
1099; CHECK-NEXT:    vmov r3, s0
1100; CHECK-NEXT:    eors r1, r2
1101; CHECK-NEXT:    vmov r2, s1
1102; CHECK-NEXT:    eors r2, r3
1103; CHECK-NEXT:    eors r1, r2
1104; CHECK-NEXT:    eors r0, r1
1105; CHECK-NEXT:    bx lr
1106entry:
1107  %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x)
1108  %r = xor i16 %y, %z
1109  ret i16 %r
1110}
1111
1112define arm_aapcs_vfpcc i16 @xor_v8i16_acc(<8 x i16> %x, i16 %y) {
1113; CHECK-LABEL: xor_v8i16_acc:
1114; CHECK:       @ %bb.0: @ %entry
1115; CHECK-NEXT:    vrev32.16 q1, q0
1116; CHECK-NEXT:    veor q0, q0, q1
1117; CHECK-NEXT:    vmov.u16 r1, q0[6]
1118; CHECK-NEXT:    vmov.u16 r2, q0[4]
1119; CHECK-NEXT:    eors r1, r2
1120; CHECK-NEXT:    vmov.u16 r2, q0[2]
1121; CHECK-NEXT:    vmov.u16 r3, q0[0]
1122; CHECK-NEXT:    eors r2, r3
1123; CHECK-NEXT:    eors r1, r2
1124; CHECK-NEXT:    eors r0, r1
1125; CHECK-NEXT:    bx lr
1126entry:
1127  %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x)
1128  %r = xor i16 %y, %z
1129  ret i16 %r
1130}
1131
1132define arm_aapcs_vfpcc i16 @xor_v16i16_acc(<16 x i16> %x, i16 %y) {
1133; CHECK-LABEL: xor_v16i16_acc:
1134; CHECK:       @ %bb.0: @ %entry
1135; CHECK-NEXT:    veor q0, q0, q1
1136; CHECK-NEXT:    vrev32.16 q1, q0
1137; CHECK-NEXT:    veor q0, q0, q1
1138; CHECK-NEXT:    vmov.u16 r1, q0[6]
1139; CHECK-NEXT:    vmov.u16 r2, q0[4]
1140; CHECK-NEXT:    eors r1, r2
1141; CHECK-NEXT:    vmov.u16 r2, q0[2]
1142; CHECK-NEXT:    vmov.u16 r3, q0[0]
1143; CHECK-NEXT:    eors r2, r3
1144; CHECK-NEXT:    eors r1, r2
1145; CHECK-NEXT:    eors r0, r1
1146; CHECK-NEXT:    bx lr
1147entry:
1148  %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x)
1149  %r = xor i16 %y, %z
1150  ret i16 %r
1151}
1152
1153define arm_aapcs_vfpcc i8 @xor_v8i8_acc(<8 x i8> %x, i8 %y) {
1154; CHECK-LABEL: xor_v8i8_acc:
1155; CHECK:       @ %bb.0: @ %entry
1156; CHECK-NEXT:    vrev32.16 q1, q0
1157; CHECK-NEXT:    veor q0, q0, q1
1158; CHECK-NEXT:    vmov.u16 r1, q0[6]
1159; CHECK-NEXT:    vmov.u16 r2, q0[4]
1160; CHECK-NEXT:    eors r1, r2
1161; CHECK-NEXT:    vmov.u16 r2, q0[2]
1162; CHECK-NEXT:    vmov.u16 r3, q0[0]
1163; CHECK-NEXT:    eors r2, r3
1164; CHECK-NEXT:    eors r1, r2
1165; CHECK-NEXT:    eors r0, r1
1166; CHECK-NEXT:    bx lr
1167entry:
1168  %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x)
1169  %r = xor i8 %y, %z
1170  ret i8 %r
1171}
1172
1173define arm_aapcs_vfpcc i8 @xor_v16i8_acc(<16 x i8> %x, i8 %y) {
1174; CHECK-LABEL: xor_v16i8_acc:
1175; CHECK:       @ %bb.0: @ %entry
1176; CHECK-NEXT:    vrev16.8 q1, q0
1177; CHECK-NEXT:    veor q0, q0, q1
1178; CHECK-NEXT:    vrev32.8 q1, q0
1179; CHECK-NEXT:    veor q0, q0, q1
1180; CHECK-NEXT:    vmov.u8 r1, q0[12]
1181; CHECK-NEXT:    vmov.u8 r2, q0[8]
1182; CHECK-NEXT:    eors r1, r2
1183; CHECK-NEXT:    vmov.u8 r2, q0[4]
1184; CHECK-NEXT:    vmov.u8 r3, q0[0]
1185; CHECK-NEXT:    eors r2, r3
1186; CHECK-NEXT:    eors r1, r2
1187; CHECK-NEXT:    eors r0, r1
1188; CHECK-NEXT:    bx lr
1189entry:
1190  %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x)
1191  %r = xor i8 %y, %z
1192  ret i8 %r
1193}
1194
1195define arm_aapcs_vfpcc i8 @xor_v32i8_acc(<32 x i8> %x, i8 %y) {
1196; CHECK-LABEL: xor_v32i8_acc:
1197; CHECK:       @ %bb.0: @ %entry
1198; CHECK-NEXT:    veor q0, q0, q1
1199; CHECK-NEXT:    vrev16.8 q1, q0
1200; CHECK-NEXT:    veor q0, q0, q1
1201; CHECK-NEXT:    vrev32.8 q1, q0
1202; CHECK-NEXT:    veor q0, q0, q1
1203; CHECK-NEXT:    vmov.u8 r1, q0[12]
1204; CHECK-NEXT:    vmov.u8 r2, q0[8]
1205; CHECK-NEXT:    eors r1, r2
1206; CHECK-NEXT:    vmov.u8 r2, q0[4]
1207; CHECK-NEXT:    vmov.u8 r3, q0[0]
1208; CHECK-NEXT:    eors r2, r3
1209; CHECK-NEXT:    eors r1, r2
1210; CHECK-NEXT:    eors r0, r1
1211; CHECK-NEXT:    bx lr
1212entry:
1213  %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x)
1214  %r = xor i8 %y, %z
1215  ret i8 %r
1216}
1217
1218define arm_aapcs_vfpcc i64 @xor_v1i64_acc(<1 x i64> %x, i64 %y) {
1219; CHECK-LABEL: xor_v1i64_acc:
1220; CHECK:       @ %bb.0: @ %entry
1221; CHECK-NEXT:    eors r0, r2
1222; CHECK-NEXT:    eors r1, r3
1223; CHECK-NEXT:    bx lr
1224entry:
1225  %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x)
1226  %r = xor i64 %y, %z
1227  ret i64 %r
1228}
1229
1230define arm_aapcs_vfpcc i64 @xor_v2i64_acc(<2 x i64> %x, i64 %y) {
1231; CHECK-LABEL: xor_v2i64_acc:
1232; CHECK:       @ %bb.0: @ %entry
1233; CHECK-NEXT:    vmov r2, s2
1234; CHECK-NEXT:    vmov r3, s0
1235; CHECK-NEXT:    eors r2, r3
1236; CHECK-NEXT:    vmov r3, s1
1237; CHECK-NEXT:    eors r0, r2
1238; CHECK-NEXT:    vmov r2, s3
1239; CHECK-NEXT:    eors r2, r3
1240; CHECK-NEXT:    eors r1, r2
1241; CHECK-NEXT:    bx lr
1242entry:
1243  %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x)
1244  %r = xor i64 %y, %z
1245  ret i64 %r
1246}
1247
1248define arm_aapcs_vfpcc i64 @xor_v4i64_acc(<4 x i64> %x, i64 %y) {
1249; CHECK-LABEL: xor_v4i64_acc:
1250; CHECK:       @ %bb.0: @ %entry
1251; CHECK-NEXT:    veor q0, q0, q1
1252; CHECK-NEXT:    vmov r2, s2
1253; CHECK-NEXT:    vmov r3, s0
1254; CHECK-NEXT:    eors r2, r3
1255; CHECK-NEXT:    vmov r3, s1
1256; CHECK-NEXT:    eors r0, r2
1257; CHECK-NEXT:    vmov r2, s3
1258; CHECK-NEXT:    eors r2, r3
1259; CHECK-NEXT:    eors r1, r2
1260; CHECK-NEXT:    bx lr
1261entry:
1262  %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x)
1263  %r = xor i64 %y, %z
1264  ret i64 %r
1265}
1266
1267declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>)
1268declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>)
1269declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>)
1270declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>)
1271declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>)
1272declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>)
1273declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>)
1274declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>)
1275declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>)
1276declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>)
1277declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>)
1278declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>)
1279declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>)
1280declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>)
1281declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>)
1282declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>)
1283declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>)
1284declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
1285declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>)
1286declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>)
1287declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>)
1288declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>)
1289declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>)
1290declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>)
1291declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>)
1292declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>)
1293declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>)
1294declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>)
1295declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>)
1296declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>)
1297declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>)
1298declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>)
1299declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>)
1300declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>)
1301declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>)
1302declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>)
1303