1; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s
2; RUN: llc -mtriple=arm-eabi -float-abi=soft -mcpu=swift %s -o - | FileCheck %s --check-prefix=SWIFT
3
4; CHECK: t1
5; CHECK: vld1.64
6; CHECK: vld1.64
7; CHECK: vadd.i64 q
8; CHECK: vst1.64
9; SWIFT: t1
10; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
11; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
12; SWIFT: vadd.i64 q
13; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
14define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
15entry:
16	%0 = load <2 x i64>, <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
17	%1 = load <2 x i64>, <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
18	%2 = add <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
19	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
20	store <4 x i32> %3, <4 x i32>* %r, align 16
21	ret void
22}
23
24; CHECK: t2
25; CHECK: vld1.64
26; CHECK: vld1.64
27; CHECK: vsub.i64 q
28; CHECK: vmov r0, r1, d
29; CHECK: vmov r2, r3, d
30; SWIFT: t2
31; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
32; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
33; SWIFT: vsub.i64 q
34; SWIFT: vmov r0, r1, d
35; SWIFT: vmov r2, r3, d
36define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
37entry:
38	%0 = load <2 x i64>, <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
39	%1 = load <2 x i64>, <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
40	%2 = sub <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
41	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
42	ret <4 x i32> %3
43}
44
45; Limited alignment.
46; SWIFT: t3
47; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
48; SWIFT: vld1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
49; SWIFT: vadd.i64 q
50; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
51define void @t3(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
52entry:
53	%0 = load <2 x i64>, <2 x i64>* %a, align 8
54	%1 = load <2 x i64>, <2 x i64>* %b, align 8
55	%2 = add <2 x i64> %0, %1
56	%3 = bitcast <2 x i64> %2 to <4 x i32>
57	store <4 x i32> %3, <4 x i32>* %r, align 8
58	ret void
59}
60