1; RUN: llc -mtriple=arm64_32-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
2
3define <2 x double> @test_insert_elt(<2 x double> %vec, double %val) {
4; CHECK-LABEL: test_insert_elt:
5; CHECK: mov.d v0[0], v1[0]
6  %res = insertelement <2 x double> %vec, double %val, i32 0
7  ret <2 x double> %res
8}
9
10define void @test_split_16B(<4 x float> %val, <4 x float>* %addr) {
11; CHECK-LABEL: test_split_16B:
12; CHECK: str q0, [x0]
13  store <4 x float> %val, <4 x float>* %addr, align 8
14  ret void
15}
16
17define void @test_split_16B_splat(<4 x i32>, <4 x i32>* %addr) {
18; CHECK-LABEL: test_split_16B_splat:
19; CHECK: str {{q[0-9]+}}
20
21  %vec.tmp0 = insertelement <4 x i32> undef, i32 42, i32 0
22  %vec.tmp1 = insertelement <4 x i32> %vec.tmp0, i32 42, i32 1
23  %vec.tmp2 = insertelement <4 x i32> %vec.tmp1, i32 42, i32 2
24  %vec = insertelement <4 x i32> %vec.tmp2, i32 42, i32 3
25
26  store <4 x i32> %vec, <4 x i32>* %addr, align 8
27  ret void
28}
29
30
31%vec = type <2 x double>
32
33declare {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8*)
34define {%vec, %vec} @test_neon_load(i8* %addr) {
35; CHECK-LABEL: test_neon_load:
36; CHECK: ld2r.2d { v0, v1 }, [x0]
37  %res = call {%vec, %vec} @llvm.aarch64.neon.ld2r.v2f64.p0i8(i8* %addr)
38  ret {%vec, %vec} %res
39}
40
41declare {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec, %vec, i64, i8*)
42define {%vec, %vec} @test_neon_load_lane(i8* %addr, %vec %in1, %vec %in2) {
43; CHECK-LABEL: test_neon_load_lane:
44; CHECK: ld2.d { v0, v1 }[0], [x0]
45  %res = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr)
46  ret {%vec, %vec} %res
47}
48
49declare void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec, %vec, i8*)
50define void @test_neon_store(i8* %addr, %vec %in1, %vec %in2) {
51; CHECK-LABEL: test_neon_store:
52; CHECK: st2.2d { v0, v1 }, [x0]
53  call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr)
54  ret void
55}
56
57declare void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec, %vec, i64, i8*)
58define void @test_neon_store_lane(i8* %addr, %vec %in1, %vec %in2) {
59; CHECK-LABEL: test_neon_store_lane:
60; CHECK: st2.d { v0, v1 }[1], [x0]
61  call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr)
62  ret void
63}
64
65declare {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8*)
66define {{%vec, %vec}, i8*} @test_neon_load_post(i8* %addr, i32 %offset) {
67; CHECK-LABEL: test_neon_load_post:
68; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
69; CHECK: ld2.2d { v0, v1 }, [x0], [[OFFSET]]
70
71  %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr)
72
73  %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset
74
75  %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0
76  %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1
77  ret {{%vec, %vec}, i8*} %res
78}
79
80define {{%vec, %vec}, i8*} @test_neon_load_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) {
81; CHECK-LABEL: test_neon_load_post_lane:
82; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
83; CHECK: ld2.d { v0, v1 }[1], [x0], [[OFFSET]]
84
85  %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 1, i8* %addr)
86
87  %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset
88
89  %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0
90  %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1
91  ret {{%vec, %vec}, i8*} %res
92}
93
94define i8* @test_neon_store_post(i8* %addr, i32 %offset, %vec %in1, %vec %in2) {
95; CHECK-LABEL: test_neon_store_post:
96; CHECK-DAG: sxtw [[OFFSET:x[0-9]+]], w1
97; CHECK: st2.2d { v0, v1 }, [x0], [[OFFSET]]
98
99  call void @llvm.aarch64.neon.st2.v2f64.p0i8(%vec %in1, %vec %in2, i8* %addr)
100
101  %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset
102
103  ret i8* %addr.new
104}
105
106define i8* @test_neon_store_post_lane(i8* %addr, i32 %offset, %vec %in1, %vec %in2) {
107; CHECK-LABEL: test_neon_store_post_lane:
108; CHECK: sxtw [[OFFSET:x[0-9]+]], w1
109; CHECK: st2.d { v0, v1 }[0], [x0], [[OFFSET]]
110
111  call void @llvm.aarch64.neon.st2lane.v2f64.p0i8(%vec %in1, %vec %in2, i64 0, i8* %addr)
112
113  %addr.new = getelementptr inbounds i8, i8* %addr, i32 %offset
114
115  ret i8* %addr.new
116}
117
118; ld1 is slightly different because it goes via ISelLowering of normal IR ops
119; rather than an intrinsic.
120define {%vec, double*} @test_neon_ld1_post_lane(double* %addr, i32 %offset, %vec %in) {
121; CHECK-LABEL: test_neon_ld1_post_lane:
122; CHECK: sbfiz [[OFFSET:x[0-9]+]], x1, #3, #32
123; CHECK: ld1.d { v0 }[0], [x0], [[OFFSET]]
124
125  %loaded = load double, double* %addr, align 8
126  %newvec = insertelement %vec %in, double %loaded, i32 0
127
128  %addr.new = getelementptr inbounds double, double* %addr, i32 %offset
129
130  %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0
131  %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1
132
133  ret {%vec, double*} %res
134}
135
136define {{%vec, %vec}, i8*} @test_neon_load_post_exact(i8* %addr) {
137; CHECK-LABEL: test_neon_load_post_exact:
138; CHECK: ld2.2d { v0, v1 }, [x0], #32
139
140  %vecs = call {%vec, %vec} @llvm.aarch64.neon.ld2.v2f64.p0i8(i8* %addr)
141
142  %addr.new = getelementptr inbounds i8, i8* %addr, i32 32
143
144  %res.tmp = insertvalue {{%vec, %vec}, i8*} undef, {%vec, %vec} %vecs, 0
145  %res = insertvalue {{%vec, %vec}, i8*} %res.tmp, i8* %addr.new, 1
146  ret {{%vec, %vec}, i8*} %res
147}
148
149define {%vec, double*} @test_neon_ld1_post_lane_exact(double* %addr, %vec %in) {
150; CHECK-LABEL: test_neon_ld1_post_lane_exact:
151; CHECK: ld1.d { v0 }[0], [x0], #8
152
153  %loaded = load double, double* %addr, align 8
154  %newvec = insertelement %vec %in, double %loaded, i32 0
155
156  %addr.new = getelementptr inbounds double, double* %addr, i32 1
157
158  %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0
159  %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1
160
161  ret {%vec, double*} %res
162}
163
164; As in the general load/store case, this GEP has defined semantics when the
165; address wraps. We cannot use post-indexed addressing.
166define {%vec, double*} @test_neon_ld1_notpost_lane_exact(double* %addr, %vec %in) {
167; CHECK-LABEL: test_neon_ld1_notpost_lane_exact:
168; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], #8
169; CHECK: add w0, w0, #8
170; CHECK: ret
171
172  %loaded = load double, double* %addr, align 8
173  %newvec = insertelement %vec %in, double %loaded, i32 0
174
175  %addr.new = getelementptr double, double* %addr, i32 1
176
177  %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0
178  %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1
179
180  ret {%vec, double*} %res
181}
182
183define {%vec, double*} @test_neon_ld1_notpost_lane(double* %addr, i32 %offset, %vec %in) {
184; CHECK-LABEL: test_neon_ld1_notpost_lane:
185; CHECK-NOT: ld1.d { {{v[0-9]+}} }[0], [{{x[0-9]+|sp}}], {{x[0-9]+|sp}}
186; CHECK: add w0, w0, w1, lsl #3
187; CHECK: ret
188
189  %loaded = load double, double* %addr, align 8
190  %newvec = insertelement %vec %in, double %loaded, i32 0
191
192  %addr.new = getelementptr double, double* %addr, i32 %offset
193
194  %res.tmp = insertvalue {%vec, double*} undef, %vec %newvec, 0
195  %res = insertvalue {%vec, double*} %res.tmp, double* %addr.new, 1
196
197  ret {%vec, double*} %res
198}
199