1; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi soft %s -o - | FileCheck %s -check-prefix CHECK -check-prefix SOFT
2; RUN: llc -mtriple armeb-eabi -mattr v7,neon -float-abi hard %s -o - | FileCheck %s -check-prefix CHECK -check-prefix HARD
3
4; CHECK-LABEL: test_i64_f64:
5declare i64 @test_i64_f64_helper(double %p)
6define void @test_i64_f64(double* %p, i64* %q) {
7; SOFT: vadd.f64 [[REG:d[0-9]+]]
8; SOFT: vmov r1, r0, [[REG]]
9; HARD: vadd.f64 d0
10    %1 = load double* %p
11    %2 = fadd double %1, %1
12    %3 = call i64 @test_i64_f64_helper(double %2)
13    %4 = add i64 %3, %3
14    store i64 %4, i64* %q
15    ret void
16; CHECK: adds r1
17; CHECK: adc r0
18}
19
20; CHECK-LABEL: test_i64_v1i64:
21declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
22define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
23; SOFT: vadd.i64 [[REG:d[0-9]+]]
24; SOFT: vmov r1, r0, [[REG]]
25; HARD: vadd.i64 d0
26    %1 = load <1 x i64>* %p
27    %2 = add <1 x i64> %1, %1
28    %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
29    %4 = add i64 %3, %3
30    store i64 %4, i64* %q
31    ret void
32; CHECK: adds r1
33; CHECK: adc r0
34}
35
36; CHECK-LABEL: test_i64_v2f32:
37declare i64 @test_i64_v2f32_helper(<2 x float> %p)
38define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
39; SOFT: vrev64.32 [[REG:d[0-9]+]]
40; SOFT: vmov r1, r0, [[REG]]
41; HARD: vrev64.32 d0
42    %1 = load <2 x float>* %p
43    %2 = fadd <2 x float> %1, %1
44    %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
45    %4 = add i64 %3, %3
46    store i64 %4, i64* %q
47    ret void
48; CHECK: adds r1
49; CHECK: adc r0
50}
51
52; CHECK-LABEL: test_i64_v2i32:
53declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
54define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
55; SOFT: vrev64.32 [[REG:d[0-9]+]]
56; SOFT: vmov r1, r0, [[REG]]
57; HARD: vrev64.32 d0
58    %1 = load <2 x i32>* %p
59    %2 = add <2 x i32> %1, %1
60    %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
61    %4 = add i64 %3, %3
62    store i64 %4, i64* %q
63    ret void
64; CHECK: adds r1
65; CHECK: adc r0
66}
67
68; CHECK-LABEL: test_i64_v4i16:
69declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
70define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
71; SOFT: vrev64.16 [[REG:d[0-9]+]]
72; SOFT: vmov r1, r0, [[REG]]
73; HARD: vrev64.16 d0
74    %1 = load <4 x i16>* %p
75    %2 = add <4 x i16> %1, %1
76    %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
77    %4 = add i64 %3, %3
78    store i64 %4, i64* %q
79    ret void
80; CHECK: adds r1
81; CHECK: adc r0
82}
83
84; CHECK-LABEL: test_i64_v8i8:
85declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
86define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
87; SOFT: vrev64.8 [[REG:d[0-9]+]]
88; SOFT: vmov r1, r0, [[REG]]
89; HARD: vrev64.8 d0
90    %1 = load <8 x i8>* %p
91    %2 = add <8 x i8> %1, %1
92    %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
93    %4 = add i64 %3, %3
94    store i64 %4, i64* %q
95    ret void
96; CHECK: adds r1
97; CHECK: adc r0
98}
99
100; CHECK-LABEL: test_f64_i64:
101declare double @test_f64_i64_helper(i64 %p)
102define void @test_f64_i64(i64* %p, double* %q) {
103; CHECK: adds r1
104; CHECK: adc r0
105    %1 = load i64* %p
106    %2 = add i64 %1, %1
107    %3 = call double @test_f64_i64_helper(i64 %2)
108    %4 = fadd double %3, %3
109    store double %4, double* %q
110    ret void
111; SOFT: vmov [[REG:d[0-9]+]], r1, r0
112; SOFT: vadd.f64 [[REG]]
113; HARD: vadd.f64 {{d[0-9]+}}, d0
114}
115
116; CHECK-LABEL: test_f64_v1i64:
117declare double @test_f64_v1i64_helper(<1 x i64> %p)
118define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
119; SOFT: vadd.i64 [[REG:d[0-9]+]]
120; SOFT: vmov r1, r0, [[REG]]
121; HARD: vadd.i64 d0
122    %1 = load <1 x i64>* %p
123    %2 = add <1 x i64> %1, %1
124    %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
125    %4 = fadd double %3, %3
126    store double %4, double* %q
127    ret void
128; SOFT: vmov [[REG:d[0-9]+]], r1, r0
129; SOFT: vadd.f64 [[REG]]
130; HARD: vadd.f64 {{d[0-9]+}}, d0
131}
132
133; CHECK-LABEL: test_f64_v2f32:
134declare double @test_f64_v2f32_helper(<2 x float> %p)
135define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
136; SOFT: vrev64.32 [[REG:d[0-9]+]]
137; SOFT: vmov r1, r0, [[REG]]
138; HARD: vrev64.32 d0
139    %1 = load <2 x float>* %p
140    %2 = fadd <2 x float> %1, %1
141    %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
142    %4 = fadd double %3, %3
143    store double %4, double* %q
144    ret void
145; SOFT: vmov [[REG:d[0-9]+]], r1, r0
146; SOFT: vadd.f64 [[REG]]
147; HARD: vadd.f64 {{d[0-9]+}}, d0
148}
149
150; CHECK-LABEL: test_f64_v2i32:
151declare double @test_f64_v2i32_helper(<2 x i32> %p)
152define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
153; SOFT: vrev64.32 [[REG:d[0-9]+]]
154; SOFT: vmov r1, r0, [[REG]]
155; HARD: vrev64.32 d0
156    %1 = load <2 x i32>* %p
157    %2 = add <2 x i32> %1, %1
158    %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
159    %4 = fadd double %3, %3
160    store double %4, double* %q
161    ret void
162; SOFT: vmov [[REG:d[0-9]+]], r1, r0
163; SOFT: vadd.f64 [[REG]]
164; HARD: vadd.f64 {{d[0-9]+}}, d0
165}
166
167; CHECK-LABEL: test_f64_v4i16:
168declare double @test_f64_v4i16_helper(<4 x i16> %p)
169define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
170; SOFT: vrev64.16 [[REG:d[0-9]+]]
171; SOFT: vmov r1, r0, [[REG]]
172; HARD: vrev64.16 d0
173    %1 = load <4 x i16>* %p
174    %2 = add <4 x i16> %1, %1
175    %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
176    %4 = fadd double %3, %3
177    store double %4, double* %q
178    ret void
179; SOFT: vmov [[REG:d[0-9]+]], r1, r0
180; SOFT: vadd.f64 [[REG]]
181; HARD: vadd.f64 {{d[0-9]+}}, d0
182}
183
184; CHECK-LABEL: test_f64_v8i8:
185declare double @test_f64_v8i8_helper(<8 x i8> %p)
186define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
187; SOFT: vrev64.8 [[REG:d[0-9]+]]
188; SOFT: vmov r1, r0, [[REG]]
189; HARD: vrev64.8 d0
190    %1 = load <8 x i8>* %p
191    %2 = add <8 x i8> %1, %1
192    %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
193    %4 = fadd double %3, %3
194    store double %4, double* %q
195    ret void
196; SOFT: vmov [[REG:d[0-9]+]], r1, r0
197; SOFT: vadd.f64 [[REG]]
198; HARD: vadd.f64 {{d[0-9]+}}, d0
199}
200
201; CHECK-LABEL: test_v1i64_i64:
202declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
203define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
204; CHECK: adds r1
205; CHECK: adc r0
206    %1 = load i64* %p
207    %2 = add i64 %1, %1
208    %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
209    %4 = add <1 x i64> %3, %3
210    store <1 x i64> %4, <1 x i64>* %q
211    ret void
212; SOFT: vmov [[REG:d[0-9]+]], r1, r0
213; SOFT: vadd.i64 [[REG]]
214; HARD: vadd.i64 {{d[0-9]+}}, d0
215}
216
217; CHECK-LABEL: test_v1i64_f64:
218declare <1 x i64> @test_v1i64_f64_helper(double %p)
219define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
220; SOFT: vadd.f64 [[REG:d[0-9]+]]
221; SOFT: vmov r1, r0, [[REG]]
222; HARD: vadd.f64 d0
223    %1 = load double* %p
224    %2 = fadd double %1, %1
225    %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
226    %4 = add <1 x i64> %3, %3
227    store <1 x i64> %4, <1 x i64>* %q
228    ret void
229; SOFT: vmov [[REG:d[0-9]+]], r1, r0
230; SOFT: vadd.i64 [[REG]]
231; HARD: vadd.i64 {{d[0-9]+}}, d0
232}
233
234; CHECK-LABEL: test_v1i64_v2f32:
235declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
236define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
237; HARD: vrev64.32 d0
238; SOFT: vadd.f32 [[REG:d[0-9]+]]
239; SOFT: vmov r1, r0, [[REG]]
240    %1 = load <2 x float>* %p
241    %2 = fadd <2 x float> %1, %1
242    %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
243    %4 = add <1 x i64> %3, %3
244    store <1 x i64> %4, <1 x i64>* %q
245    ret void
246; SOFT: vmov [[REG:d[0-9]+]], r1, r0
247; SOFT: vadd.i64 [[REG]]
248; HARD: vadd.i64 {{d[0-9]+}}, d0
249}
250
251; CHECK-LABEL: test_v1i64_v2i32:
252declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
253define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
254; HARD: vrev64.32 d0
255; SOFT: vadd.i32 [[REG:d[0-9]+]]
256; SOFT: vrev64.32 [[REG]]
257; SOFT: vmov r1, r0, [[REG]]
258    %1 = load <2 x i32>* %p
259    %2 = add <2 x i32> %1, %1
260    %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
261    %4 = add <1 x i64> %3, %3
262    store <1 x i64> %4, <1 x i64>* %q
263    ret void
264; SOFT: vmov [[REG:d[0-9]+]], r1, r0
265; SOFT: vadd.i64 [[REG]]
266; HARD: vadd.i64 {{d[0-9]+}}, d0
267}
268
269; CHECK-LABEL: test_v1i64_v4i16:
270declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
271define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
272; SOFT: vrev64.16 [[REG:d[0-9]+]]
273; SOFT: vmov r1, r0, [[REG]]
274; HARD: vrev64.16 d0
275    %1 = load <4 x i16>* %p
276    %2 = add <4 x i16> %1, %1
277    %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
278    %4 = add <1 x i64> %3, %3
279    store <1 x i64> %4, <1 x i64>* %q
280    ret void
281; SOFT: vmov [[REG:d[0-9]+]], r1, r0
282; SOFT: vadd.i64 [[REG]]
283; HARD: vadd.i64 {{d[0-9]+}}, d0
284}
285
286; CHECK-LABEL: test_v1i64_v8i8:
287declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
288define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
289; SOFT: vrev64.8 [[REG:d[0-9]+]]
290; SOFT: vmov r1, r0, [[REG]]
291; HARD: vrev64.8 d0
292    %1 = load <8 x i8>* %p
293    %2 = add <8 x i8> %1, %1
294    %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
295    %4 = add <1 x i64> %3, %3
296    store <1 x i64> %4, <1 x i64>* %q
297    ret void
298; SOFT: vmov [[REG:d[0-9]+]], r1, r0
299; SOFT: vadd.i64 [[REG]]
300; HARD: vadd.i64 {{d[0-9]+}}, d0
301}
302
303; CHECK-LABEL: test_v2f32_i64:
304declare <2 x float> @test_v2f32_i64_helper(i64 %p)
305define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
306; CHECK: adds r1
307; CHECK: adc r0
308    %1 = load i64* %p
309    %2 = add i64 %1, %1
310    %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
311    %4 = fadd <2 x float> %3, %3
312    store <2 x float> %4, <2 x float>* %q
313    ret void
314; SOFT: vmov [[REG:d[0-9]+]], r1, r0
315; SOFT: vrev64.32 [[REG]]
316; HARD: vrev64.32 {{d[0-9]+}}, d0
317}
318
319; CHECK-LABEL: test_v2f32_f64:
320declare <2 x float> @test_v2f32_f64_helper(double %p)
321define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
322; SOFT: vadd.f64 [[REG:d[0-9]+]]
323; SOFT: vmov r1, r0, [[REG]]
324; HARD: vadd.f64 d0
325    %1 = load double* %p
326    %2 = fadd double %1, %1
327    %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
328    %4 = fadd <2 x float> %3, %3
329    store <2 x float> %4, <2 x float>* %q
330    ret void
331; SOFT: vmov [[REG:d[0-9]+]], r1, r0
332; SOFT: vrev64.32 [[REG]]
333; HARD: vrev64.32 {{d[0-9]+}}, d0
334}
335
336; CHECK-LABEL: test_v2f32_v1i64:
337declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
338define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
339; SOFT: vadd.i64 [[REG:d[0-9]+]]
340; SOFT: vmov r1, r0, [[REG]]
341; HARD: vadd.i64 d0
342    %1 = load <1 x i64>* %p
343    %2 = add <1 x i64> %1, %1
344    %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
345    %4 = fadd <2 x float> %3, %3
346    store <2 x float> %4, <2 x float>* %q
347    ret void
348; SOFT: vmov [[REG:d[0-9]+]], r1, r0
349; SOFT: vrev64.32 [[REG]]
350; HARD: vrev64.32 {{d[0-9]+}}, d0
351}
352
353; CHECK-LABEL: test_v2f32_v2i32:
354declare <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %p)
355define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
356; HARD: vrev64.32 d0
357; SOFT: vadd.i32 [[REG:d[0-9]+]]
358; SOFT: vrev64.32 [[REG]]
359; SOFT: vmov r1, r0, [[REG]]
360    %1 = load <2 x i32>* %p
361    %2 = add <2 x i32> %1, %1
362    %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
363    %4 = fadd <2 x float> %3, %3
364    store <2 x float> %4, <2 x float>* %q
365    ret void
366; SOFT: vmov [[REG:d[0-9]+]], r1, r0
367; SOFT: vrev64.32 [[REG]]
368; HARD: vrev64.32 {{d[0-9]+}}, d0
369}
370
371; CHECK-LABEL: test_v2f32_v4i16:
372declare <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %p)
373define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
374; SOFT: vrev64.16 [[REG:d[0-9]+]]
375; SOFT: vmov r1, r0, [[REG]]
376; HARD: vrev64.16 d0
377    %1 = load <4 x i16>* %p
378    %2 = add <4 x i16> %1, %1
379    %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
380    %4 = fadd <2 x float> %3, %3
381    store <2 x float> %4, <2 x float>* %q
382    ret void
383; SOFT: vmov [[REG:d[0-9]+]], r1, r0
384; SOFT: vrev64.32 [[REG]]
385; HARD: vrev64.32 {{d[0-9]+}}, d0
386}
387
388; CHECK-LABEL: test_v2f32_v8i8:
389declare <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %p)
390define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
391; SOFT: vrev64.8 [[REG:d[0-9]+]]
392; SOFT: vmov r1, r0, [[REG]]
393; HARD: vrev64.8 d0
394    %1 = load <8 x i8>* %p
395    %2 = add <8 x i8> %1, %1
396    %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
397    %4 = fadd <2 x float> %3, %3
398    store <2 x float> %4, <2 x float>* %q
399    ret void
400; SOFT: vmov [[REG:d[0-9]+]], r1, r0
401; SOFT: vrev64.32 [[REG]]
402; HARD: vrev64.32 {{d[0-9]+}}, d0
403}
404
405; CHECK-LABEL: test_v2i32_i64:
406declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
407define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
408; CHECK: adds r1
409; CHECK: adc r0
410    %1 = load i64* %p
411    %2 = add i64 %1, %1
412    %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
413    %4 = add <2 x i32> %3, %3
414    store <2 x i32> %4, <2 x i32>* %q
415    ret void
416; SOFT: vmov [[REG:d[0-9]+]], r1, r0
417; SOFT: vrev64.32 [[REG]]
418; HARD: vrev64.32 {{d[0-9]+}}, d0
419}
420
421; CHECK-LABEL: test_v2i32_f64:
422declare <2 x i32> @test_v2i32_f64_helper(double %p)
423define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
424; SOFT: vadd.f64 [[REG:d[0-9]+]]
425; SOFT: vmov r1, r0, [[REG]]
426; HARD: vadd.f64 d0
427    %1 = load double* %p
428    %2 = fadd double %1, %1
429    %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
430    %4 = add <2 x i32> %3, %3
431    store <2 x i32> %4, <2 x i32>* %q
432    ret void
433; SOFT: vmov [[REG:d[0-9]+]], r1, r0
434; SOFT: vrev64.32 [[REG]]
435; HARD: vrev64.32 {{d[0-9]+}}, d0
436}
437
438; CHECK-LABEL: test_v2i32_v1i64:
439declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
440define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
441; SOFT: vadd.i64 [[REG:d[0-9]+]]
442; SOFT: vmov r1, r0, [[REG]]
443; HARD: vadd.i64 d0
444    %1 = load <1 x i64>* %p
445    %2 = add <1 x i64> %1, %1
446    %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
447    %4 = add <2 x i32> %3, %3
448    store <2 x i32> %4, <2 x i32>* %q
449    ret void
450; SOFT: vmov [[REG:d[0-9]+]], r1, r0
451; SOFT: vrev64.32 [[REG]]
452; HARD: vrev64.32 {{d[0-9]+}}, d0
453}
454
455; CHECK-LABEL: test_v2i32_v2f32:
456declare <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %p)
457define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
458; HARD: vadd.f32 [[REG:d[0-9]+]]
459; HARD: vrev64.32 d0, [[REG]]
460; SOFT: vadd.f32 [[REG:d[0-9]+]]
461; SOFT: vrev64.32 [[REG]]
462; SOFT: vmov r1, r0, [[REG]]
463    %1 = load <2 x float>* %p
464    %2 = fadd <2 x float> %1, %1
465    %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
466    %4 = add <2 x i32> %3, %3
467    store <2 x i32> %4, <2 x i32>* %q
468    ret void
469; SOFT: vmov [[REG:d[0-9]+]], r1, r0
470; SOFT: vrev64.32 [[REG]]
471; HARD: vrev64.32 {{d[0-9]+}}, d0
472}
473
474; CHECK-LABEL: test_v2i32_v4i16:
475declare <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %p)
476define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
477; SOFT: vrev64.16 [[REG:d[0-9]+]]
478; SOFT: vmov r1, r0, [[REG]]
479; HARD: vrev64.16 d0
480    %1 = load <4 x i16>* %p
481    %2 = add <4 x i16> %1, %1
482    %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
483    %4 = add <2 x i32> %3, %3
484    store <2 x i32> %4, <2 x i32>* %q
485    ret void
486; SOFT: vmov [[REG:d[0-9]+]], r1, r0
487; SOFT: vrev64.32 [[REG]]
488; HARD: vrev64.32 {{d[0-9]+}}, d0
489}
490
491; CHECK-LABEL: test_v2i32_v8i8:
492declare <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %p)
493define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
494; SOFT: vrev64.8 [[REG:d[0-9]+]]
495; SOFT: vmov r1, r0, [[REG]]
496; HARD: vrev64.8 d0
497    %1 = load <8 x i8>* %p
498    %2 = add <8 x i8> %1, %1
499    %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
500    %4 = add <2 x i32> %3, %3
501    store <2 x i32> %4, <2 x i32>* %q
502    ret void
503; SOFT: vmov [[REG:d[0-9]+]], r1, r0
504; SOFT: vrev64.32 [[REG]]
505; HARD: vrev64.32 {{d[0-9]+}}, d0
506}
507
508; CHECK-LABEL: test_v4i16_i64:
509declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
510define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
511; CHECK: adds r1
512; CHECK: adc r0
513    %1 = load i64* %p
514    %2 = add i64 %1, %1
515    %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
516    %4 = add <4 x i16> %3, %3
517    store <4 x i16> %4, <4 x i16>* %q
518    ret void
519; SOFT: vmov [[REG:d[0-9]+]], r1, r0
520; SOFT: vrev64.16 [[REG]]
521; HARD: vrev64.16 {{d[0-9]+}}, d0
522}
523
524; CHECK-LABEL: test_v4i16_f64:
525declare <4 x i16> @test_v4i16_f64_helper(double %p)
526define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
527; SOFT: vadd.f64 [[REG:d[0-9]+]]
528; SOFT: vmov r1, r0, [[REG]]
529; HARD: vadd.f64 d0
530    %1 = load double* %p
531    %2 = fadd double %1, %1
532    %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
533    %4 = add <4 x i16> %3, %3
534    store <4 x i16> %4, <4 x i16>* %q
535    ret void
536; SOFT: vmov [[REG:d[0-9]+]], r1, r0
537; SOFT: vrev64.16 [[REG]]
538; HARD: vrev64.16 {{d[0-9]+}}, d0
539}
540
541; CHECK-LABEL: test_v4i16_v1i64:
542declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
543define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
544; SOFT: vadd.i64 [[REG:d[0-9]+]]
545; SOFT: vmov r1, r0, [[REG]]
546; HARD: vadd.i64 d0
547    %1 = load <1 x i64>* %p
548    %2 = add <1 x i64> %1, %1
549    %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
550    %4 = add <4 x i16> %3, %3
551    store <4 x i16> %4, <4 x i16>* %q
552    ret void
553; SOFT: vmov [[REG:d[0-9]+]], r1, r0
554; SOFT: vrev64.16 [[REG]]
555; HARD: vrev64.16 {{d[0-9]+}}, d0
556}
557
558; CHECK-LABEL: test_v4i16_v2f32:
559declare <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %p)
560define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
561; HARD: vadd.f32 [[REG:d[0-9]+]]
562; HARD: vrev64.32 d0, [[REG]]
563; SOFT: vadd.f32 [[REG:d[0-9]+]]
564; SOFT: vrev64.32 [[REG]]
565; SOFT: vmov r1, r0, [[REG]]
566    %1 = load <2 x float>* %p
567    %2 = fadd <2 x float> %1, %1
568    %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
569    %4 = add <4 x i16> %3, %3
570    store <4 x i16> %4, <4 x i16>* %q
571    ret void
572; SOFT: vmov [[REG:d[0-9]+]], r1, r0
573; SOFT: vrev64.16 [[REG]]
574; HARD: vrev64.16 {{d[0-9]+}}, d0
575}
576
577; CHECK-LABEL: test_v4i16_v2i32:
578declare <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %p)
579define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
580; HARD: vadd.i32 [[REG:d[0-9]+]]
581; HARD: vrev64.32 d0, [[REG]]
582; SOFT: vadd.i32 [[REG:d[0-9]+]]
583; SOFT: vrev64.32 [[REG]]
584; SOFT: vmov r1, r0, [[REG]]
585    %1 = load <2 x i32>* %p
586    %2 = add <2 x i32> %1, %1
587    %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
588    %4 = add <4 x i16> %3, %3
589    store <4 x i16> %4, <4 x i16>* %q
590    ret void
591; SOFT: vmov [[REG:d[0-9]+]], r1, r0
592; SOFT: vrev64.16 [[REG]]
593; HARD: vrev64.16 {{d[0-9]+}}, d0
594}
595
596; CHECK-LABEL: test_v4i16_v8i8:
597declare <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %p)
598define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
599; SOFT: vrev64.8 [[REG:d[0-9]+]]
600; SOFT: vmov r1, r0, [[REG]]
601; HARD: vrev64.8 d0
602    %1 = load <8 x i8>* %p
603    %2 = add <8 x i8> %1, %1
604    %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
605    %4 = add <4 x i16> %3, %3
606    store <4 x i16> %4, <4 x i16>* %q
607    ret void
608; SOFT: vmov [[REG:d[0-9]+]], r1, r0
609; SOFT: vrev64.16 [[REG]]
610; HARD: vrev64.16 {{d[0-9]+}}, d0
611}
612
613; CHECK-LABEL: test_v8i8_i64:
614declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
615define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
616; CHECK: adds r1
617; CHECK: adc r0
618    %1 = load i64* %p
619    %2 = add i64 %1, %1
620    %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
621    %4 = add <8 x i8> %3, %3
622    store <8 x i8> %4, <8 x i8>* %q
623    ret void
624; SOFT: vmov [[REG:d[0-9]+]], r1, r0
625; SOFT: vrev64.8 [[REG]]
626; HARD: vrev64.8 {{d[0-9]+}}, d0
627}
628
629; CHECK-LABEL: test_v8i8_f64:
630declare <8 x i8> @test_v8i8_f64_helper(double %p)
631define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
632; SOFT: vadd.f64 [[REG:d[0-9]+]]
633; SOFT: vmov r1, r0, [[REG]]
634; HARD: vadd.f64 d0
635    %1 = load double* %p
636    %2 = fadd double %1, %1
637    %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
638    %4 = add <8 x i8> %3, %3
639    store <8 x i8> %4, <8 x i8>* %q
640    ret void
641; SOFT: vmov [[REG:d[0-9]+]], r1, r0
642; SOFT: vrev64.8 [[REG]]
643; HARD: vrev64.8 {{d[0-9]+}}, d0
644}
645
646; CHECK-LABEL: test_v8i8_v1i64:
647declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
648define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
649; SOFT: vadd.i64 [[REG:d[0-9]+]]
650; SOFT: vmov r1, r0, [[REG]]
651; HARD: vadd.i64 d0
652    %1 = load <1 x i64>* %p
653    %2 = add <1 x i64> %1, %1
654    %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
655    %4 = add <8 x i8> %3, %3
656    store <8 x i8> %4, <8 x i8>* %q
657    ret void
658; SOFT: vmov [[REG:d[0-9]+]], r1, r0
659; SOFT: vrev64.8 [[REG]]
660; HARD: vrev64.8 {{d[0-9]+}}, d0
661}
662
663; CHECK-LABEL: test_v8i8_v2f32:
664declare <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %p)
665define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
666; SOFT: vrev64.32 [[REG:d[0-9]+]]
667; SOFT: vmov r1, r0, [[REG]]
668; HARD: vrev64.32 d0
669    %1 = load <2 x float>* %p
670    %2 = fadd <2 x float> %1, %1
671    %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
672    %4 = add <8 x i8> %3, %3
673    store <8 x i8> %4, <8 x i8>* %q
674    ret void
675; SOFT: vmov [[REG:d[0-9]+]], r1, r0
676; SOFT: vrev64.8 [[REG]]
677; HARD: vrev64.8 {{d[0-9]+}}, d0
678}
679
680; CHECK-LABEL: test_v8i8_v2i32:
681declare <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %p)
682define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
683; SOFT: vrev64.32 [[REG:d[0-9]+]]
684; SOFT: vmov r1, r0, [[REG]]
685; HARD: vrev64.32 d0
686    %1 = load <2 x i32>* %p
687    %2 = add <2 x i32> %1, %1
688    %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
689    %4 = add <8 x i8> %3, %3
690    store <8 x i8> %4, <8 x i8>* %q
691    ret void
692; SOFT: vmov [[REG:d[0-9]+]], r1, r0
693; SOFT: vrev64.8 [[REG]]
694; HARD: vrev64.8 {{d[0-9]+}}, d0
695}
696
697; CHECK-LABEL: test_v8i8_v4i16:
698declare <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %p)
699define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
700; SOFT: vrev64.16 [[REG:d[0-9]+]]
701; SOFT: vmov r1, r0, [[REG]]
702; HARD: vrev64.16 d0
703    %1 = load <4 x i16>* %p
704    %2 = add <4 x i16> %1, %1
705    %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
706    %4 = add <8 x i8> %3, %3
707    store <8 x i8> %4, <8 x i8>* %q
708    ret void
709; SOFT: vmov [[REG:d[0-9]+]], r1, r0
710; SOFT: vrev64.8 [[REG]]
711; HARD: vrev64.8 {{d[0-9]+}}, d0
712}
713
714; CHECK-LABEL: test_f128_v2f64:
715declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
716define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
717; SOFT: vadd.f64 [[REG2:d[0-9]+]]
718; SOFT: vadd.f64 [[REG1:d[0-9]+]]
719; SOFT: vmov r1, r0, [[REG1]]
720; SOFT: vmov r3, r2, [[REG2]]
721; HARD: vadd.f64 d1
722; HARD: vadd.f64 d0
723    %1 = load <2 x double>* %p
724    %2 = fadd <2 x double> %1, %1
725    %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
726    %4 = fadd fp128 %3, %3
727    store fp128 %4, fp128* %q
728    ret void
729; CHECK: stm sp, {r0, r1, r2, r3}
730}
731
732; CHECK-LABEL: test_f128_v2i64:
733declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
734define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
735; SOFT: vmov r1, r0
736; SOFT: vmov r3, r2
737; HARD: vadd.i64 q0
738    %1 = load <2 x i64>* %p
739    %2 = add <2 x i64> %1, %1
740    %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
741    %4 = fadd fp128 %3, %3
742    store fp128 %4, fp128* %q
743    ret void
744; CHECK: stm sp, {r0, r1, r2, r3}
745}
746
747; CHECK-LABEL: test_f128_v4f32:
748declare fp128 @test_f128_v4f32_helper(<4 x float> %p)
749define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
750; SOFT: vmov r1, r0
751; SOFT: vmov r3, r2
752; HARD: vrev64.32 q0
753    %1 = load <4 x float>* %p
754    %2 = fadd <4 x float> %1, %1
755    %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
756    %4 = fadd fp128 %3, %3
757    store fp128 %4, fp128* %q
758    ret void
759; CHECK: stm sp, {r0, r1, r2, r3}
760}
761
762; CHECK-LABEL: test_f128_v4i32:
763declare fp128 @test_f128_v4i32_helper(<4 x i32> %p)
764define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
765; SOFT: vmov r1, r0
766; SOFT: vmov r3, r2
767; HARD: vrev64.32 q0
768    %1 = load <4 x i32>* %p
769    %2 = add <4 x i32> %1, %1
770    %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
771    %4 = fadd fp128 %3, %3
772    store fp128 %4, fp128* %q
773    ret void
774; CHECK: stm sp, {r0, r1, r2, r3}
775}
776
777; CHECK-LABEL: test_f128_v8i16:
778declare fp128 @test_f128_v8i16_helper(<8 x i16> %p)
779define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
780; SOFT: vmov r1, r0
781; SOFT: vmov r3, r2
782; HARD: vrev64.16 q0
783    %1 = load <8 x i16>* %p
784    %2 = add <8 x i16> %1, %1
785    %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
786    %4 = fadd fp128 %3, %3
787    store fp128 %4, fp128* %q
788    ret void
789; CHECK: stm sp, {r0, r1, r2, r3}
790}
791
792; CHECK-LABEL: test_f128_v16i8:
793declare fp128 @test_f128_v16i8_helper(<16 x i8> %p)
794define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
795; SOFT: vmov r1, r0
796; SOFT: vmov r3, r2
797; HARD: vrev64.8 q0
798    %1 = load <16 x i8>* %p
799    %2 = add <16 x i8> %1, %1
800    %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
801    %4 = fadd fp128 %3, %3
802    store fp128 %4, fp128* %q
803    ret void
804; CHECK: stm sp, {r0, r1, r2, r3}
805}
806
807; CHECK-LABEL: test_v2f64_f128:
808declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
809define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
810    %1 = load fp128* %p
811    %2 = fadd fp128 %1, %1
812    %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
813    %4 = fadd <2 x double> %3, %3
814    store <2 x double> %4, <2 x double>* %q
815    ret void
816; SOFT: vmov {{d[0-9]+}}, r3, r2
817; SOFT: vmov {{d[0-9]+}}, r1, r0
818
819}
820
821; CHECK-LABEL: test_v2f64_v2i64:
822declare <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %p)
823define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
824; SOFT: vmov r1, r0
825; SOFT: vmov r3, r2
826; HARD: vadd.i64 q0
827    %1 = load <2 x i64>* %p
828    %2 = add <2 x i64> %1, %1
829    %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
830    %4 = fadd <2 x double> %3, %3
831    store <2 x double> %4, <2 x double>* %q
832    ret void
833; SOFT: vmov {{d[0-9]+}}, r3, r2
834; SOFT: vmov {{d[0-9]+}}, r1, r0
835}
836
837; CHECK-LABEL: test_v2f64_v4f32:
838declare <2 x double> @test_v2f64_v4f32_helper(<4 x float> %p)
839define void @test_v2f64_v4f32(<4 x float>* %p, <2 x double>* %q) {
840; SOFT: vmov r1, r0
841; SOFT: vmov r3, r2
842; HARD: vrev64.32 q0
843    %1 = load <4 x float>* %p
844    %2 = fadd <4 x float> %1, %1
845    %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
846    %4 = fadd <2 x double> %3, %3
847    store <2 x double> %4, <2 x double>* %q
848    ret void
849; SOFT: vmov {{d[0-9]+}}, r3, r2
850; SOFT: vmov {{d[0-9]+}}, r1, r0
851}
852
853; CHECK-LABEL: test_v2f64_v4i32:
854declare <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %p)
855define void @test_v2f64_v4i32(<4 x i32>* %p, <2 x double>* %q) {
856; SOFT: vmov r1, r0
857; SOFT: vmov r3, r2
858; HARD: vrev64.32 q0
859    %1 = load <4 x i32>* %p
860    %2 = add <4 x i32> %1, %1
861    %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
862    %4 = fadd <2 x double> %3, %3
863    store <2 x double> %4, <2 x double>* %q
864    ret void
865; SOFT: vmov {{d[0-9]+}}, r3, r2
866; SOFT: vmov {{d[0-9]+}}, r1, r0
867}
868
869; CHECK-LABEL: test_v2f64_v8i16:
870declare <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %p)
871define void @test_v2f64_v8i16(<8 x i16>* %p, <2 x double>* %q) {
872; SOFT: vmov r1, r0
873; SOFT: vmov r3, r2
874; HARD: vrev64.16 q0
875    %1 = load <8 x i16>* %p
876    %2 = add <8 x i16> %1, %1
877    %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
878    %4 = fadd <2 x double> %3, %3
879    store <2 x double> %4, <2 x double>* %q
880    ret void
881; SOFT: vmov {{d[0-9]+}}, r3, r2
882; SOFT: vmov {{d[0-9]+}}, r1, r0
883}
884
885; CHECK-LABEL: test_v2f64_v16i8:
886declare <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %p)
887define void @test_v2f64_v16i8(<16 x i8>* %p, <2 x double>* %q) {
888; SOFT: vmov r1, r0
889; SOFT: vmov r3, r2
890; HARD: vrev64.8 q0
891    %1 = load <16 x i8>* %p
892    %2 = add <16 x i8> %1, %1
893    %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
894    %4 = fadd <2 x double> %3, %3
895    store <2 x double> %4, <2 x double>* %q
896    ret void
897; SOFT: vmov {{d[0-9]+}}, r3, r2
898; SOFT: vmov {{d[0-9]+}}, r1, r0
899}
900
901; CHECK-LABEL: test_v2i64_f128:
902declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
903define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
904    %1 = load fp128* %p
905    %2 = fadd fp128 %1, %1
906    %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
907    %4 = add <2 x i64> %3, %3
908    store <2 x i64> %4, <2 x i64>* %q
909    ret void
910; SOFT: vmov {{d[0-9]+}}, r3, r2
911; SOFT: vmov {{d[0-9]+}}, r1, r0
912}
913
914; CHECK-LABEL: test_v2i64_v2f64:
915declare <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %p)
916define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
917; SOFT: vmov r1, r0, [[REG1]]
918; SOFT: vmov r3, r2, [[REG2]]
919; HARD: vadd.f64 d1
920; HARD: vadd.f64 d0
921    %1 = load <2 x double>* %p
922    %2 = fadd <2 x double> %1, %1
923    %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
924    %4 = add <2 x i64> %3, %3
925    store <2 x i64> %4, <2 x i64>* %q
926    ret void
927; SOFT: vmov {{d[0-9]+}}, r3, r2
928; SOFT: vmov {{d[0-9]+}}, r1, r0
929}
930
931; CHECK-LABEL: test_v2i64_v4f32:
932declare <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %p)
933define void @test_v2i64_v4f32(<4 x float>* %p, <2 x i64>* %q) {
934; SOFT: vmov r1, r0
935; SOFT: vmov r3, r2
936; HARD: vrev64.32 q0
937    %1 = load <4 x float>* %p
938    %2 = fadd <4 x float> %1, %1
939    %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
940    %4 = add <2 x i64> %3, %3
941    store <2 x i64> %4, <2 x i64>* %q
942    ret void
943; SOFT: vmov {{d[0-9]+}}, r3, r2
944; SOFT: vmov {{d[0-9]+}}, r1, r0
945}
946
947; CHECK-LABEL: test_v2i64_v4i32:
948declare <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %p)
949define void @test_v2i64_v4i32(<4 x i32>* %p, <2 x i64>* %q) {
950; SOFT: vmov r1, r0
951; SOFT: vmov r3, r2
952; HARD: vrev64.32 q0
953    %1 = load <4 x i32>* %p
954    %2 = add <4 x i32> %1, %1
955    %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
956    %4 = add <2 x i64> %3, %3
957    store <2 x i64> %4, <2 x i64>* %q
958    ret void
959; SOFT: vmov {{d[0-9]+}}, r3, r2
960; SOFT: vmov {{d[0-9]+}}, r1, r0
961}
962
963; CHECK-LABEL: test_v2i64_v8i16:
964declare <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %p)
965define void @test_v2i64_v8i16(<8 x i16>* %p, <2 x i64>* %q) {
966; SOFT: vmov r1, r0
967; SOFT: vmov r3, r2
968; HARD: vrev64.16 q0
969    %1 = load <8 x i16>* %p
970    %2 = add <8 x i16> %1, %1
971    %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
972    %4 = add <2 x i64> %3, %3
973    store <2 x i64> %4, <2 x i64>* %q
974    ret void
975; SOFT: vmov {{d[0-9]+}}, r3, r2
976; SOFT: vmov {{d[0-9]+}}, r1, r0
977}
978
979; CHECK-LABEL: test_v2i64_v16i8:
980declare <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %p)
981define void @test_v2i64_v16i8(<16 x i8>* %p, <2 x i64>* %q) {
982; SOFT: vmov r1, r0
983; SOFT: vmov r3, r2
984; HARD: vrev64.8 q0
985    %1 = load <16 x i8>* %p
986    %2 = add <16 x i8> %1, %1
987    %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
988    %4 = add <2 x i64> %3, %3
989    store <2 x i64> %4, <2 x i64>* %q
990    ret void
991; SOFT: vmov {{d[0-9]+}}, r3, r2
992; SOFT: vmov {{d[0-9]+}}, r1, r0
993}
994
995; CHECK-LABEL: test_v4f32_f128:
996declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
997define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
998    %1 = load fp128* %p
999    %2 = fadd fp128 %1, %1
1000    %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
1001    %4 = fadd <4 x float> %3, %3
1002    store <4 x float> %4, <4 x float>* %q
1003    ret void
1004; SOFT: vmov {{d[0-9]+}}, r3, r2
1005; SOFT: vmov {{d[0-9]+}}, r1, r0
1006}
1007
1008; CHECK-LABEL: test_v4f32_v2f64:
1009declare <4 x float> @test_v4f32_v2f64_helper(<2 x double> %p)
1010define void @test_v4f32_v2f64(<2 x double>* %p, <4 x float>* %q) {
1011; SOFT: vmov r1, r0
1012; SOFT: vmov r3, r2
1013; HARD: vadd.f64  d1
1014; HARD: vadd.f64  d0
1015    %1 = load <2 x double>* %p
1016    %2 = fadd <2 x double> %1, %1
1017    %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
1018    %4 = fadd <4 x float> %3, %3
1019    store <4 x float> %4, <4 x float>* %q
1020    ret void
1021; SOFT: vmov {{d[0-9]+}}, r3, r2
1022; SOFT: vmov {{d[0-9]+}}, r1, r0
1023}
1024
1025; CHECK-LABEL: test_v4f32_v2i64:
1026declare <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %p)
1027define void @test_v4f32_v2i64(<2 x i64>* %p, <4 x float>* %q) {
1028; SOFT: vmov r1, r0
1029; SOFT: vmov r3, r2
1030; HARD: vadd.i64 q0
1031    %1 = load <2 x i64>* %p
1032    %2 = add <2 x i64> %1, %1
1033    %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
1034    %4 = fadd <4 x float> %3, %3
1035    store <4 x float> %4, <4 x float>* %q
1036    ret void
1037; SOFT: vmov {{d[0-9]+}}, r3, r2
1038; SOFT: vmov {{d[0-9]+}}, r1, r0
1039}
1040
1041; CHECK-LABEL: test_v4f32_v4i32:
1042declare <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %p)
1043define void @test_v4f32_v4i32(<4 x i32>* %p, <4 x float>* %q) {
1044; SOFT: vmov r1, r0
1045; SOFT: vmov r3, r2
1046; HARD: vrev64.32 q0
1047    %1 = load <4 x i32>* %p
1048    %2 = add <4 x i32> %1, %1
1049    %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
1050    %4 = fadd <4 x float> %3, %3
1051    store <4 x float> %4, <4 x float>* %q
1052    ret void
1053; SOFT: vmov {{d[0-9]+}}, r3, r2
1054; SOFT: vmov {{d[0-9]+}}, r1, r0
1055}
1056
1057; CHECK-LABEL: test_v4f32_v8i16:
1058declare <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %p)
1059define void @test_v4f32_v8i16(<8 x i16>* %p, <4 x float>* %q) {
1060; SOFT: vmov r1, r0
1061; SOFT: vmov r3, r2
1062; HARD: vrev64.16 q0
1063    %1 = load <8 x i16>* %p
1064    %2 = add <8 x i16> %1, %1
1065    %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
1066    %4 = fadd <4 x float> %3, %3
1067    store <4 x float> %4, <4 x float>* %q
1068    ret void
1069; SOFT: vmov {{d[0-9]+}}, r3, r2
1070; SOFT: vmov {{d[0-9]+}}, r1, r0
1071}
1072
1073; CHECK-LABEL: test_v4f32_v16i8:
1074declare <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %p)
1075define void @test_v4f32_v16i8(<16 x i8>* %p, <4 x float>* %q) {
1076; SOFT: vmov r1, r0
1077; SOFT: vmov r3, r2
1078; HARD: vrev64.8 q0
1079    %1 = load <16 x i8>* %p
1080    %2 = add <16 x i8> %1, %1
1081    %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
1082    %4 = fadd <4 x float> %3, %3
1083    store <4 x float> %4, <4 x float>* %q
1084    ret void
1085; SOFT: vmov {{d[0-9]+}}, r3, r2
1086; SOFT: vmov {{d[0-9]+}}, r1, r0
1087}
1088
1089; CHECK-LABEL: test_v4i32_f128:
1090declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
1091define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
1092    %1 = load fp128* %p
1093    %2 = fadd fp128 %1, %1
1094    %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
1095    %4 = add <4 x i32> %3, %3
1096    store <4 x i32> %4, <4 x i32>* %q
1097    ret void
1098; SOFT: vmov {{d[0-9]+}}, r3, r2
1099; SOFT: vmov {{d[0-9]+}}, r1, r0
1100}
1101
1102; CHECK-LABEL: test_v4i32_v2f64:
1103declare <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %p)
1104define void @test_v4i32_v2f64(<2 x double>* %p, <4 x i32>* %q) {
1105; SOFT: vmov r1, r0
1106; SOFT: vmov r3, r2
1107; HARD: vadd.f64 d1
1108; HARD: vadd.f64 d0
1109    %1 = load <2 x double>* %p
1110    %2 = fadd <2 x double> %1, %1
1111    %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
1112    %4 = add <4 x i32> %3, %3
1113    store <4 x i32> %4, <4 x i32>* %q
1114    ret void
1115; SOFT: vmov {{d[0-9]+}}, r3, r2
1116; SOFT: vmov {{d[0-9]+}}, r1, r0
1117}
1118
1119; CHECK-LABEL: test_v4i32_v2i64:
1120declare <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %p)
1121define void @test_v4i32_v2i64(<2 x i64>* %p, <4 x i32>* %q) {
1122; SOFT: vmov r1, r0
1123; SOFT: vmov r3, r2
1124; HARD: vadd.i64 q0
1125    %1 = load <2 x i64>* %p
1126    %2 = add <2 x i64> %1, %1
1127    %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
1128    %4 = add <4 x i32> %3, %3
1129    store <4 x i32> %4, <4 x i32>* %q
1130    ret void
1131; SOFT: vmov {{d[0-9]+}}, r3, r2
1132; SOFT: vmov {{d[0-9]+}}, r1, r0
1133}
1134
1135; CHECK-LABEL: test_v4i32_v4f32:
1136declare <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %p)
1137define void @test_v4i32_v4f32(<4 x float>* %p, <4 x i32>* %q) {
1138; SOFT: vmov r1, r0
1139; SOFT: vmov r3, r2
1140; HARD: vrev64.32 q0
1141    %1 = load <4 x float>* %p
1142    %2 = fadd <4 x float> %1, %1
1143    %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
1144    %4 = add <4 x i32> %3, %3
1145    store <4 x i32> %4, <4 x i32>* %q
1146    ret void
1147; SOFT: vmov {{d[0-9]+}}, r3, r2
1148; SOFT: vmov {{d[0-9]+}}, r1, r0
1149}
1150
1151; CHECK-LABEL: test_v4i32_v8i16:
1152declare <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %p)
1153define void @test_v4i32_v8i16(<8 x i16>* %p, <4 x i32>* %q) {
1154; SOFT: vmov r1, r0
1155; SOFT: vmov r3, r2
1156; HARD: vrev64.16 q0
1157    %1 = load <8 x i16>* %p
1158    %2 = add <8 x i16> %1, %1
1159    %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
1160    %4 = add <4 x i32> %3, %3
1161    store <4 x i32> %4, <4 x i32>* %q
1162    ret void
1163; SOFT: vmov {{d[0-9]+}}, r3, r2
1164; SOFT: vmov {{d[0-9]+}}, r1, r0
1165}
1166
1167; CHECK-LABEL: test_v4i32_v16i8:
1168declare <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %p)
1169define void @test_v4i32_v16i8(<16 x i8>* %p, <4 x i32>* %q) {
1170; SOFT: vmov r1, r0
1171; SOFT: vmov r3, r2
1172; HARD: vrev64.8 q0
1173    %1 = load <16 x i8>* %p
1174    %2 = add <16 x i8> %1, %1
1175    %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
1176    %4 = add <4 x i32> %3, %3
1177    store <4 x i32> %4, <4 x i32>* %q
1178    ret void
1179; SOFT: vmov {{d[0-9]+}}, r3, r2
1180; SOFT: vmov {{d[0-9]+}}, r1, r0
1181}
1182
1183; CHECK-LABEL: test_v8i16_f128:
1184declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
1185define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
1186    %1 = load fp128* %p
1187    %2 = fadd fp128 %1, %1
1188    %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
1189    %4 = add <8 x i16> %3, %3
1190    store <8 x i16> %4, <8 x i16>* %q
1191    ret void
1192; SOFT: vmov {{d[0-9]+}}, r3, r2
1193; SOFT: vmov {{d[0-9]+}}, r1, r0
1194}
1195
1196; CHECK-LABEL: test_v8i16_v2f64:
1197declare <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %p)
1198define void @test_v8i16_v2f64(<2 x double>* %p, <8 x i16>* %q) {
1199; SOFT: vmov r1, r0
1200; SOFT: vmov r3, r2
1201; HARD: vadd.f64 d1
1202; HARD: vadd.f64 d0
1203    %1 = load <2 x double>* %p
1204    %2 = fadd <2 x double> %1, %1
1205    %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
1206    %4 = add <8 x i16> %3, %3
1207    store <8 x i16> %4, <8 x i16>* %q
1208    ret void
1209; SOFT: vmov {{d[0-9]+}}, r3, r2
1210; SOFT: vmov {{d[0-9]+}}, r1, r0
1211}
1212
1213; CHECK-LABEL: test_v8i16_v2i64:
1214declare <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %p)
1215define void @test_v8i16_v2i64(<2 x i64>* %p, <8 x i16>* %q) {
1216; SOFT: vmov r1, r0
1217; SOFT: vmov r3, r2
1218; HARD: vadd.i64 q0
1219    %1 = load <2 x i64>* %p
1220    %2 = add <2 x i64> %1, %1
1221    %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
1222    %4 = add <8 x i16> %3, %3
1223    store <8 x i16> %4, <8 x i16>* %q
1224    ret void
1225; SOFT: vmov {{d[0-9]+}}, r3, r2
1226; SOFT: vmov {{d[0-9]+}}, r1, r0
1227}
1228
1229; CHECK-LABEL: test_v8i16_v4f32:
1230declare <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %p)
1231define void @test_v8i16_v4f32(<4 x float>* %p, <8 x i16>* %q) {
1232; SOFT: vmov r1, r0
1233; SOFT: vmov r3, r2
1234; HARD: vrev64.32 q0
1235    %1 = load <4 x float>* %p
1236    %2 = fadd <4 x float> %1, %1
1237    %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
1238    %4 = add <8 x i16> %3, %3
1239    store <8 x i16> %4, <8 x i16>* %q
1240    ret void
1241; SOFT: vmov {{d[0-9]+}}, r3, r2
1242; SOFT: vmov {{d[0-9]+}}, r1, r0
1243}
1244
1245; CHECK-LABEL: test_v8i16_v4i32:
1246declare <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %p)
1247define void @test_v8i16_v4i32(<4 x i32>* %p, <8 x i16>* %q) {
1248; SOFT: vmov r1, r0
1249; SOFT: vmov r3, r2
1250; HARD: vrev64.32 q0
1251    %1 = load <4 x i32>* %p
1252    %2 = add <4 x i32> %1, %1
1253    %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
1254    %4 = add <8 x i16> %3, %3
1255    store <8 x i16> %4, <8 x i16>* %q
1256    ret void
1257; SOFT: vmov {{d[0-9]+}}, r3, r2
1258; SOFT: vmov {{d[0-9]+}}, r1, r0
1259}
1260
1261; CHECK-LABEL: test_v8i16_v16i8:
1262declare <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %p)
1263define void @test_v8i16_v16i8(<16 x i8>* %p, <8 x i16>* %q) {
1264; SOFT: vmov r1, r0
1265; SOFT: vmov r3, r2
1266; HARD: vrev64.8 q0
1267    %1 = load <16 x i8>* %p
1268    %2 = add <16 x i8> %1, %1
1269    %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
1270    %4 = add <8 x i16> %3, %3
1271    store <8 x i16> %4, <8 x i16>* %q
1272    ret void
1273; SOFT: vmov {{d[0-9]+}}, r3, r2
1274; SOFT: vmov {{d[0-9]+}}, r1, r0
1275}
1276
1277; CHECK-LABEL: test_v16i8_f128:
1278declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
1279define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
1280    %1 = load fp128* %p
1281    %2 = fadd fp128 %1, %1
1282    %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
1283    %4 = add <16 x i8> %3, %3
1284    store <16 x i8> %4, <16 x i8>* %q
1285    ret void
1286; SOFT: vmov {{d[0-9]+}}, r3, r2
1287; SOFT: vmov {{d[0-9]+}}, r1, r0
1288}
1289
1290; CHECK-LABEL: test_v16i8_v2f64:
1291declare <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %p)
1292define void @test_v16i8_v2f64(<2 x double>* %p, <16 x i8>* %q) {
1293; SOFT: vmov r1, r0
1294; SOFT: vmov r3, r2
1295; HARD: vadd.f64 d1
1296; HARD: vadd.f64 d0
1297    %1 = load <2 x double>* %p
1298    %2 = fadd <2 x double> %1, %1
1299    %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
1300    %4 = add <16 x i8> %3, %3
1301    store <16 x i8> %4, <16 x i8>* %q
1302    ret void
1303; SOFT: vmov {{d[0-9]+}}, r3, r2
1304; SOFT: vmov {{d[0-9]+}}, r1, r0
1305}
1306
1307; CHECK-LABEL: test_v16i8_v2i64:
1308declare <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %p)
1309define void @test_v16i8_v2i64(<2 x i64>* %p, <16 x i8>* %q) {
1310; SOFT: vmov r1, r0
1311; SOFT: vmov r3, r2
1312; HARD: vadd.i64 q0
1313    %1 = load <2 x i64>* %p
1314    %2 = add <2 x i64> %1, %1
1315    %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
1316    %4 = add <16 x i8> %3, %3
1317    store <16 x i8> %4, <16 x i8>* %q
1318    ret void
1319; SOFT: vmov {{d[0-9]+}}, r3, r2
1320; SOFT: vmov {{d[0-9]+}}, r1, r0
1321}
1322
1323; CHECK-LABEL: test_v16i8_v4f32:
1324declare <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %p)
1325define void @test_v16i8_v4f32(<4 x float>* %p, <16 x i8>* %q) {
1326; SOFT: vmov r1, r0
1327; SOFT: vmov r3, r2
1328; HARD: vrev64.32 q0
1329    %1 = load <4 x float>* %p
1330    %2 = fadd <4 x float> %1, %1
1331    %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
1332    %4 = add <16 x i8> %3, %3
1333    store <16 x i8> %4, <16 x i8>* %q
1334    ret void
1335; SOFT: vmov {{d[0-9]+}}, r3, r2
1336; SOFT: vmov {{d[0-9]+}}, r1, r0
1337}
1338
1339; CHECK-LABEL: test_v16i8_v4i32:
1340declare <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %p)
1341define void @test_v16i8_v4i32(<4 x i32>* %p, <16 x i8>* %q) {
1342; SOFT: vmov r1, r0
1343; SOFT: vmov r3, r2
1344; HARD: vrev64.32 q0
1345    %1 = load <4 x i32>* %p
1346    %2 = add <4 x i32> %1, %1
1347    %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
1348    %4 = add <16 x i8> %3, %3
1349    store <16 x i8> %4, <16 x i8>* %q
1350    ret void
1351; SOFT: vmov {{d[0-9]+}}, r3, r2
1352; SOFT: vmov {{d[0-9]+}}, r1, r0
1353}
1354
1355; CHECK-LABEL: test_v16i8_v8i16:
1356declare <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %p)
1357define void @test_v16i8_v8i16(<8 x i16>* %p, <16 x i8>* %q) {
1358; SOFT: vmov r1, r0
1359; SOFT: vmov r3, r2
1360; HARD: vrev64.16 q0
1361    %1 = load <8 x i16>* %p
1362    %2 = add <8 x i16> %1, %1
1363    %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
1364    %4 = add <16 x i8> %3, %3
1365    store <16 x i8> %4, <16 x i8>* %q
1366    ret void
1367; SOFT: vmov {{d[0-9]+}}, r3, r2
1368; SOFT: vmov {{d[0-9]+}}, r1, r0
1369}
1370