1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s
2
3%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
4%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
5%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>, <8 x i8>,  <8 x i8> }
6
7define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
8; CHECK-LABEL: ld2_8b
9; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
10; and from the argument of the function also defined by ABI (i.e., x0)
11; CHECK: ld2.8b { v0, v1 }, [x0]
12; CHECK-NEXT: ret
13	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
14	ret %struct.__neon_int8x8x2_t  %tmp2
15}
16
17define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
18; CHECK-LABEL: ld3_8b
19; Make sure we are using the operands defined by the ABI
20; CHECK: ld3.8b { v0, v1, v2 }, [x0]
21; CHECK-NEXT: ret
22	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
23	ret %struct.__neon_int8x8x3_t  %tmp2
24}
25
26define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
27; CHECK-LABEL: ld4_8b
28; Make sure we are using the operands defined by the ABI
29; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0]
30; CHECK-NEXT: ret
31	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
32	ret %struct.__neon_int8x8x4_t  %tmp2
33}
34
35declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*) nounwind readonly
36declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*) nounwind readonly
37declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*) nounwind readonly
38
39%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
40%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
41%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>, <16 x i8>,  <16 x i8> }
42
43define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
44; CHECK-LABEL: ld2_16b
45; Make sure we are using the operands defined by the ABI
46; CHECK: ld2.16b { v0, v1 }, [x0]
47; CHECK-NEXT: ret
48  %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
49  ret %struct.__neon_int8x16x2_t  %tmp2
50}
51
52define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
53; CHECK-LABEL: ld3_16b
54; Make sure we are using the operands defined by the ABI
55; CHECK: ld3.16b { v0, v1, v2 }, [x0]
56; CHECK-NEXT: ret
57  %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
58  ret %struct.__neon_int8x16x3_t  %tmp2
59}
60
61define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
62; CHECK-LABEL: ld4_16b
63; Make sure we are using the operands defined by the ABI
64; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0]
65; CHECK-NEXT: ret
66  %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
67  ret %struct.__neon_int8x16x4_t  %tmp2
68}
69
70declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*) nounwind readonly
71declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*) nounwind readonly
72declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*) nounwind readonly
73
74%struct.__neon_int16x4x2_t = type { <4 x i16>,  <4 x i16> }
75%struct.__neon_int16x4x3_t = type { <4 x i16>,  <4 x i16>,  <4 x i16> }
76%struct.__neon_int16x4x4_t = type { <4 x i16>,  <4 x i16>, <4 x i16>,  <4 x i16> }
77
78define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
79; CHECK-LABEL: ld2_4h
80; Make sure we are using the operands defined by the ABI
81; CHECK: ld2.4h { v0, v1 }, [x0]
82; CHECK-NEXT: ret
83	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
84	ret %struct.__neon_int16x4x2_t  %tmp2
85}
86
87define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
88; CHECK-LABEL: ld3_4h
89; Make sure we are using the operands defined by the ABI
90; CHECK: ld3.4h { v0, v1, v2 }, [x0]
91; CHECK-NEXT: ret
92	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
93	ret %struct.__neon_int16x4x3_t  %tmp2
94}
95
96define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
97; CHECK-LABEL: ld4_4h
98; Make sure we are using the operands defined by the ABI
99; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0]
100; CHECK-NEXT: ret
101	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
102	ret %struct.__neon_int16x4x4_t  %tmp2
103}
104
105declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*) nounwind readonly
106declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*) nounwind readonly
107declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*) nounwind readonly
108
109%struct.__neon_int16x8x2_t = type { <8 x i16>,  <8 x i16> }
110%struct.__neon_int16x8x3_t = type { <8 x i16>,  <8 x i16>,  <8 x i16> }
111%struct.__neon_int16x8x4_t = type { <8 x i16>,  <8 x i16>, <8 x i16>,  <8 x i16> }
112
113define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
114; CHECK-LABEL: ld2_8h
115; Make sure we are using the operands defined by the ABI
116; CHECK: ld2.8h { v0, v1 }, [x0]
117; CHECK-NEXT: ret
118  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
119  ret %struct.__neon_int16x8x2_t  %tmp2
120}
121
122define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
123; CHECK-LABEL: ld3_8h
124; Make sure we are using the operands defined by the ABI
125; CHECK: ld3.8h { v0, v1, v2 }, [x0]
126; CHECK-NEXT: ret
127  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
128  ret %struct.__neon_int16x8x3_t %tmp2
129}
130
131define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
132; CHECK-LABEL: ld4_8h
133; Make sure we are using the operands defined by the ABI
134; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0]
135; CHECK-NEXT: ret
136  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
137  ret %struct.__neon_int16x8x4_t  %tmp2
138}
139
140declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*) nounwind readonly
141declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*) nounwind readonly
142declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*) nounwind readonly
143
144%struct.__neon_int32x2x2_t = type { <2 x i32>,  <2 x i32> }
145%struct.__neon_int32x2x3_t = type { <2 x i32>,  <2 x i32>,  <2 x i32> }
146%struct.__neon_int32x2x4_t = type { <2 x i32>,  <2 x i32>, <2 x i32>,  <2 x i32> }
147
148define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
149; CHECK-LABEL: ld2_2s
150; Make sure we are using the operands defined by the ABI
151; CHECK: ld2.2s { v0, v1 }, [x0]
152; CHECK-NEXT: ret
153	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
154	ret %struct.__neon_int32x2x2_t  %tmp2
155}
156
157define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
158; CHECK-LABEL: ld3_2s
159; Make sure we are using the operands defined by the ABI
160; CHECK: ld3.2s { v0, v1, v2 }, [x0]
161; CHECK-NEXT: ret
162	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
163	ret %struct.__neon_int32x2x3_t  %tmp2
164}
165
166define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
167; CHECK-LABEL: ld4_2s
168; Make sure we are using the operands defined by the ABI
169; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0]
170; CHECK-NEXT: ret
171	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
172	ret %struct.__neon_int32x2x4_t  %tmp2
173}
174
175declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*) nounwind readonly
176declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*) nounwind readonly
177declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*) nounwind readonly
178
179%struct.__neon_int32x4x2_t = type { <4 x i32>,  <4 x i32> }
180%struct.__neon_int32x4x3_t = type { <4 x i32>,  <4 x i32>,  <4 x i32> }
181%struct.__neon_int32x4x4_t = type { <4 x i32>,  <4 x i32>, <4 x i32>,  <4 x i32> }
182
183define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
184; CHECK-LABEL: ld2_4s
185; Make sure we are using the operands defined by the ABI
186; CHECK: ld2.4s { v0, v1 }, [x0]
187; CHECK-NEXT: ret
188	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
189	ret %struct.__neon_int32x4x2_t  %tmp2
190}
191
192define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
193; CHECK-LABEL: ld3_4s
194; Make sure we are using the operands defined by the ABI
195; CHECK: ld3.4s { v0, v1, v2 }, [x0]
196; CHECK-NEXT: ret
197	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
198	ret %struct.__neon_int32x4x3_t  %tmp2
199}
200
201define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
202; CHECK-LABEL: ld4_4s
203; Make sure we are using the operands defined by the ABI
204; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0]
205; CHECK-NEXT: ret
206	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
207	ret %struct.__neon_int32x4x4_t  %tmp2
208}
209
210declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*) nounwind readonly
211declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*) nounwind readonly
212declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*) nounwind readonly
213
214%struct.__neon_int64x2x2_t = type { <2 x i64>,  <2 x i64> }
215%struct.__neon_int64x2x3_t = type { <2 x i64>,  <2 x i64>,  <2 x i64> }
216%struct.__neon_int64x2x4_t = type { <2 x i64>,  <2 x i64>, <2 x i64>,  <2 x i64> }
217
218define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
219; CHECK-LABEL: ld2_2d
220; Make sure we are using the operands defined by the ABI
221; CHECK: ld2.2d { v0, v1 }, [x0]
222; CHECK-NEXT: ret
223	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
224	ret %struct.__neon_int64x2x2_t  %tmp2
225}
226
227define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
228; CHECK-LABEL: ld3_2d
229; Make sure we are using the operands defined by the ABI
230; CHECK: ld3.2d { v0, v1, v2 }, [x0]
231; CHECK-NEXT: ret
232	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
233	ret %struct.__neon_int64x2x3_t  %tmp2
234}
235
236define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
237; CHECK-LABEL: ld4_2d
238; Make sure we are using the operands defined by the ABI
239; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0]
240; CHECK-NEXT: ret
241	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
242	ret %struct.__neon_int64x2x4_t  %tmp2
243}
244
245declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*) nounwind readonly
246declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*) nounwind readonly
247declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*) nounwind readonly
248
249%struct.__neon_int64x1x2_t = type { <1 x i64>,  <1 x i64> }
250%struct.__neon_int64x1x3_t = type { <1 x i64>,  <1 x i64>, <1 x i64> }
251%struct.__neon_int64x1x4_t = type { <1 x i64>,  <1 x i64>, <1 x i64>, <1 x i64> }
252
253
254define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
255; CHECK-LABEL: ld2_1di64
256; Make sure we are using the operands defined by the ABI
257; CHECK: ld1.1d { v0, v1 }, [x0]
258; CHECK-NEXT: ret
259	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
260	ret %struct.__neon_int64x1x2_t  %tmp2
261}
262
263define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
264; CHECK-LABEL: ld3_1di64
265; Make sure we are using the operands defined by the ABI
266; CHECK: ld1.1d { v0, v1, v2 }, [x0]
267; CHECK-NEXT: ret
268	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
269	ret %struct.__neon_int64x1x3_t  %tmp2
270}
271
272define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
273; CHECK-LABEL: ld4_1di64
274; Make sure we are using the operands defined by the ABI
275; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
276; CHECK-NEXT: ret
277	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
278	ret %struct.__neon_int64x1x4_t  %tmp2
279}
280
281
282declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*) nounwind readonly
283declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*) nounwind readonly
284declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*) nounwind readonly
285
286%struct.__neon_float64x1x2_t = type { <1 x double>,  <1 x double> }
287%struct.__neon_float64x1x3_t = type { <1 x double>,  <1 x double>, <1 x double> }
288%struct.__neon_float64x1x4_t = type { <1 x double>,  <1 x double>, <1 x double>, <1 x double> }
289
290
291define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
292; CHECK-LABEL: ld2_1df64
293; Make sure we are using the operands defined by the ABI
294; CHECK: ld1.1d { v0, v1 }, [x0]
295; CHECK-NEXT: ret
296	%tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
297	ret %struct.__neon_float64x1x2_t  %tmp2
298}
299
300define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
301; CHECK-LABEL: ld3_1df64
302; Make sure we are using the operands defined by the ABI
303; CHECK: ld1.1d { v0, v1, v2 }, [x0]
304; CHECK-NEXT: ret
305	%tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
306	ret %struct.__neon_float64x1x3_t  %tmp2
307}
308
309define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
310; CHECK-LABEL: ld4_1df64
311; Make sure we are using the operands defined by the ABI
312; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0]
313; CHECK-NEXT: ret
314	%tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
315	ret %struct.__neon_float64x1x4_t  %tmp2
316}
317
318declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0f64(double*) nounwind readonly
319declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0f64(double*) nounwind readonly
320declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0f64(double*) nounwind readonly
321
322
323define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, i8* %A) nounwind {
324; Make sure we are using the operands defined by the ABI
325; CHECK: ld2lane_16b
326; CHECK: ld2.b { v0, v1 }[1], [x0]
327; CHECK-NEXT: ret
328	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, i64 1, i8* %A)
329	ret %struct.__neon_int8x16x2_t  %tmp2
330}
331
332define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i8* %A) nounwind {
333; Make sure we are using the operands defined by the ABI
334; CHECK: ld3lane_16b
335; CHECK: ld3.b { v0, v1, v2 }[1], [x0]
336; CHECK-NEXT: ret
337	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, i8* %A)
338	ret %struct.__neon_int8x16x3_t  %tmp2
339}
340
341define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i8* %A) nounwind {
342; Make sure we are using the operands defined by the ABI
343; CHECK: ld4lane_16b
344; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0]
345; CHECK-NEXT: ret
346	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, i8* %A)
347	ret %struct.__neon_int8x16x4_t  %tmp2
348}
349
350declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
351declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
352declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
353
354define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, i16* %A) nounwind {
355; Make sure we are using the operands defined by the ABI
356; CHECK: ld2lane_8h
357; CHECK: ld2.h { v0, v1 }[1], [x0]
358; CHECK-NEXT: ret
359	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, i64 1, i16* %A)
360	ret %struct.__neon_int16x8x2_t  %tmp2
361}
362
363define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i16* %A) nounwind {
364; Make sure we are using the operands defined by the ABI
365; CHECK: ld3lane_8h
366; CHECK: ld3.h { v0, v1, v2 }[1], [x0]
367; CHECK-NEXT: ret
368	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, i16* %A)
369	ret %struct.__neon_int16x8x3_t  %tmp2
370}
371
372define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i16* %A) nounwind {
373; Make sure we are using the operands defined by the ABI
374; CHECK: ld4lane_8h
375; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0]
376; CHECK-NEXT: ret
377	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, i16* %A)
378	ret %struct.__neon_int16x8x4_t  %tmp2
379}
380
381declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
382declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
383declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
384
385define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, i32* %A) nounwind {
386; Make sure we are using the operands defined by the ABI
387; CHECK: ld2lane_4s
388; CHECK: ld2.s { v0, v1 }[1], [x0]
389; CHECK-NEXT: ret
390	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, i64 1, i32* %A)
391	ret %struct.__neon_int32x4x2_t  %tmp2
392}
393
394define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i32* %A) nounwind {
395; Make sure we are using the operands defined by the ABI
396; CHECK: ld3lane_4s
397; CHECK: ld3.s { v0, v1, v2 }[1], [x0]
398; CHECK-NEXT: ret
399	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, i32* %A)
400	ret %struct.__neon_int32x4x3_t  %tmp2
401}
402
403define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i32* %A) nounwind {
404; Make sure we are using the operands defined by the ABI
405; CHECK: ld4lane_4s
406; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0]
407; CHECK-NEXT: ret
408	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, i32* %A)
409	ret %struct.__neon_int32x4x4_t  %tmp2
410}
411
412declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
413declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
414declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
415
416define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, i64* %A) nounwind {
417; Make sure we are using the operands defined by the ABI
418; CHECK: ld2lane_2d
419; CHECK: ld2.d { v0, v1 }[1], [x0]
420; CHECK-NEXT: ret
421	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, i64 1, i64* %A)
422	ret %struct.__neon_int64x2x2_t  %tmp2
423}
424
425define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64* %A) nounwind {
426; Make sure we are using the operands defined by the ABI
427; CHECK: ld3lane_2d
428; CHECK: ld3.d { v0, v1, v2 }[1], [x0]
429; CHECK-NEXT: ret
430	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, i64* %A)
431	ret %struct.__neon_int64x2x3_t  %tmp2
432}
433
434define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64* %A) nounwind {
435; Make sure we are using the operands defined by the ABI
436; CHECK: ld4lane_2d
437; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0]
438; CHECK-NEXT: ret
439	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, i64* %A)
440	ret %struct.__neon_int64x2x4_t  %tmp2
441}
442
443declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
444declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
445declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
446
447define <8 x i8> @ld1r_8b(i8* %bar) {
448; CHECK: ld1r_8b
449; Make sure we are using the operands defined by the ABI
450; CHECK: ld1r.8b { v0 }, [x0]
451; CHECK-NEXT: ret
452  %tmp1 = load i8, i8* %bar
453  %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
454  %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
455  %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
456  %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
457  %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
458  %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
459  %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
460  %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
461  ret <8 x i8> %tmp9
462}
463
464define <16 x i8> @ld1r_16b(i8* %bar) {
465; CHECK: ld1r_16b
466; Make sure we are using the operands defined by the ABI
467; CHECK: ld1r.16b { v0 }, [x0]
468; CHECK-NEXT: ret
469  %tmp1 = load i8, i8* %bar
470  %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
471  %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
472  %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
473  %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
474  %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
475  %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
476  %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
477  %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
478  %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
479  %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
480  %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
481  %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
482  %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
483  %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
484  %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
485  %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
486  ret <16 x i8> %tmp17
487}
488
489define <4 x i16> @ld1r_4h(i16* %bar) {
490; CHECK: ld1r_4h
491; Make sure we are using the operands defined by the ABI
492; CHECK: ld1r.4h { v0 }, [x0]
493; CHECK-NEXT: ret
494  %tmp1 = load i16, i16* %bar
495  %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
496  %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
497  %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
498  %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
499  ret <4 x i16> %tmp5
500}
501
502define <8 x i16> @ld1r_8h(i16* %bar) {
503; CHECK: ld1r_8h
504; Make sure we are using the operands defined by the ABI
505; CHECK: ld1r.8h { v0 }, [x0]
506; CHECK-NEXT: ret
507  %tmp1 = load i16, i16* %bar
508  %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
509  %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
510  %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
511  %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
512  %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
513  %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
514  %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
515  %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
516  ret <8 x i16> %tmp9
517}
518
519define <2 x i32> @ld1r_2s(i32* %bar) {
520; CHECK: ld1r_2s
521; Make sure we are using the operands defined by the ABI
522; CHECK: ld1r.2s { v0 }, [x0]
523; CHECK-NEXT: ret
524  %tmp1 = load i32, i32* %bar
525  %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
526  %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
527  ret <2 x i32> %tmp3
528}
529
530define <4 x i32> @ld1r_4s(i32* %bar) {
531; CHECK: ld1r_4s
532; Make sure we are using the operands defined by the ABI
533; CHECK: ld1r.4s { v0 }, [x0]
534; CHECK-NEXT: ret
535  %tmp1 = load i32, i32* %bar
536  %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
537  %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
538  %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
539  %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
540  ret <4 x i32> %tmp5
541}
542
543define <2 x i64> @ld1r_2d(i64* %bar) {
544; CHECK: ld1r_2d
545; Make sure we are using the operands defined by the ABI
546; CHECK: ld1r.2d { v0 }, [x0]
547; CHECK-NEXT: ret
548  %tmp1 = load i64, i64* %bar
549  %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
550  %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
551  ret <2 x i64> %tmp3
552}
553
554define %struct.__neon_int8x8x2_t @ld2r_8b(i8* %A) nounwind {
555; CHECK: ld2r_8b
556; Make sure we are using the operands defined by the ABI
557; CHECK: ld2r.8b { v0, v1 }, [x0]
558; CHECK-NEXT: ret
559	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
560	ret %struct.__neon_int8x8x2_t  %tmp2
561}
562
563define %struct.__neon_int8x8x3_t @ld3r_8b(i8* %A) nounwind {
564; CHECK: ld3r_8b
565; Make sure we are using the operands defined by the ABI
566; CHECK: ld3r.8b { v0, v1, v2 }, [x0]
567; CHECK-NEXT: ret
568	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
569	ret %struct.__neon_int8x8x3_t  %tmp2
570}
571
572define %struct.__neon_int8x8x4_t @ld4r_8b(i8* %A) nounwind {
573; CHECK: ld4r_8b
574; Make sure we are using the operands defined by the ABI
575; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0]
576; CHECK-NEXT: ret
577	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
578	ret %struct.__neon_int8x8x4_t  %tmp2
579}
580
581declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
582declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
583declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
584
585define %struct.__neon_int8x16x2_t @ld2r_16b(i8* %A) nounwind {
586; CHECK: ld2r_16b
587; Make sure we are using the operands defined by the ABI
588; CHECK: ld2r.16b { v0, v1 }, [x0]
589; CHECK-NEXT: ret
590	%tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
591	ret %struct.__neon_int8x16x2_t  %tmp2
592}
593
594define %struct.__neon_int8x16x3_t @ld3r_16b(i8* %A) nounwind {
595; CHECK: ld3r_16b
596; Make sure we are using the operands defined by the ABI
597; CHECK: ld3r.16b { v0, v1, v2 }, [x0]
598; CHECK-NEXT: ret
599	%tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
600	ret %struct.__neon_int8x16x3_t  %tmp2
601}
602
603define %struct.__neon_int8x16x4_t @ld4r_16b(i8* %A) nounwind {
604; CHECK: ld4r_16b
605; Make sure we are using the operands defined by the ABI
606; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0]
607; CHECK-NEXT: ret
608	%tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
609	ret %struct.__neon_int8x16x4_t  %tmp2
610}
611
612declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
613declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
614declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
615
616define %struct.__neon_int16x4x2_t @ld2r_4h(i16* %A) nounwind {
617; CHECK: ld2r_4h
618; Make sure we are using the operands defined by the ABI
619; CHECK: ld2r.4h { v0, v1 }, [x0]
620; CHECK-NEXT: ret
621	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
622	ret %struct.__neon_int16x4x2_t  %tmp2
623}
624
625define %struct.__neon_int16x4x3_t @ld3r_4h(i16* %A) nounwind {
626; CHECK: ld3r_4h
627; Make sure we are using the operands defined by the ABI
628; CHECK: ld3r.4h { v0, v1, v2 }, [x0]
629; CHECK-NEXT: ret
630	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
631	ret %struct.__neon_int16x4x3_t  %tmp2
632}
633
634define %struct.__neon_int16x4x4_t @ld4r_4h(i16* %A) nounwind {
635; CHECK: ld4r_4h
636; Make sure we are using the operands defined by the ABI
637; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0]
638; CHECK-NEXT: ret
639	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
640	ret %struct.__neon_int16x4x4_t  %tmp2
641}
642
643declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
644declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
645declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
646
647define %struct.__neon_int16x8x2_t @ld2r_8h(i16* %A) nounwind {
648; CHECK: ld2r_8h
649; Make sure we are using the operands defined by the ABI
650; CHECK: ld2r.8h { v0, v1 }, [x0]
651; CHECK-NEXT: ret
652  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
653  ret %struct.__neon_int16x8x2_t  %tmp2
654}
655
656define %struct.__neon_int16x8x3_t @ld3r_8h(i16* %A) nounwind {
657; CHECK: ld3r_8h
658; Make sure we are using the operands defined by the ABI
659; CHECK: ld3r.8h { v0, v1, v2 }, [x0]
660; CHECK-NEXT: ret
661  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
662  ret %struct.__neon_int16x8x3_t  %tmp2
663}
664
665define %struct.__neon_int16x8x4_t @ld4r_8h(i16* %A) nounwind {
666; CHECK: ld4r_8h
667; Make sure we are using the operands defined by the ABI
668; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0]
669; CHECK-NEXT: ret
670  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
671  ret %struct.__neon_int16x8x4_t  %tmp2
672}
673
674declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
675declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
676declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
677
678define %struct.__neon_int32x2x2_t @ld2r_2s(i32* %A) nounwind {
679; CHECK: ld2r_2s
680; Make sure we are using the operands defined by the ABI
681; CHECK: ld2r.2s { v0, v1 }, [x0]
682; CHECK-NEXT: ret
683	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
684	ret %struct.__neon_int32x2x2_t  %tmp2
685}
686
687define %struct.__neon_int32x2x3_t @ld3r_2s(i32* %A) nounwind {
688; CHECK: ld3r_2s
689; Make sure we are using the operands defined by the ABI
690; CHECK: ld3r.2s { v0, v1, v2 }, [x0]
691; CHECK-NEXT: ret
692	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
693	ret %struct.__neon_int32x2x3_t  %tmp2
694}
695
696define %struct.__neon_int32x2x4_t @ld4r_2s(i32* %A) nounwind {
697; CHECK: ld4r_2s
698; Make sure we are using the operands defined by the ABI
699; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0]
700; CHECK-NEXT: ret
701	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
702	ret %struct.__neon_int32x2x4_t  %tmp2
703}
704
705declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
706declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
707declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
708
709define %struct.__neon_int32x4x2_t @ld2r_4s(i32* %A) nounwind {
710; CHECK: ld2r_4s
711; Make sure we are using the operands defined by the ABI
712; CHECK: ld2r.4s { v0, v1 }, [x0]
713; CHECK-NEXT: ret
714	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
715	ret %struct.__neon_int32x4x2_t  %tmp2
716}
717
718define %struct.__neon_int32x4x3_t @ld3r_4s(i32* %A) nounwind {
719; CHECK: ld3r_4s
720; Make sure we are using the operands defined by the ABI
721; CHECK: ld3r.4s { v0, v1, v2 }, [x0]
722; CHECK-NEXT: ret
723	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
724	ret %struct.__neon_int32x4x3_t  %tmp2
725}
726
727define %struct.__neon_int32x4x4_t @ld4r_4s(i32* %A) nounwind {
728; CHECK: ld4r_4s
729; Make sure we are using the operands defined by the ABI
730; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0]
731; CHECK-NEXT: ret
732	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
733	ret %struct.__neon_int32x4x4_t  %tmp2
734}
735
736declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
737declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
738declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
739
740define %struct.__neon_int64x1x2_t @ld2r_1d(i64* %A) nounwind {
741; CHECK: ld2r_1d
742; Make sure we are using the operands defined by the ABI
743; CHECK: ld2r.1d { v0, v1 }, [x0]
744; CHECK-NEXT: ret
745	%tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
746	ret %struct.__neon_int64x1x2_t  %tmp2
747}
748
749define %struct.__neon_int64x1x3_t @ld3r_1d(i64* %A) nounwind {
750; CHECK: ld3r_1d
751; Make sure we are using the operands defined by the ABI
752; CHECK: ld3r.1d { v0, v1, v2 }, [x0]
753; CHECK-NEXT: ret
754	%tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
755	ret %struct.__neon_int64x1x3_t  %tmp2
756}
757
758define %struct.__neon_int64x1x4_t @ld4r_1d(i64* %A) nounwind {
759; CHECK: ld4r_1d
760; Make sure we are using the operands defined by the ABI
761; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0]
762; CHECK-NEXT: ret
763	%tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
764	ret %struct.__neon_int64x1x4_t  %tmp2
765}
766
767declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
768declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
769declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
770
771define %struct.__neon_int64x2x2_t @ld2r_2d(i64* %A) nounwind {
772; CHECK: ld2r_2d
773; Make sure we are using the operands defined by the ABI
774; CHECK: ld2r.2d { v0, v1 }, [x0]
775; CHECK-NEXT: ret
776	%tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
777	ret %struct.__neon_int64x2x2_t  %tmp2
778}
779
780define %struct.__neon_int64x2x3_t @ld3r_2d(i64* %A) nounwind {
781; CHECK: ld3r_2d
782; Make sure we are using the operands defined by the ABI
783; CHECK: ld3r.2d { v0, v1, v2 }, [x0]
784; CHECK-NEXT: ret
785	%tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
786	ret %struct.__neon_int64x2x3_t  %tmp2
787}
788
789define %struct.__neon_int64x2x4_t @ld4r_2d(i64* %A) nounwind {
790; CHECK: ld4r_2d
791; Make sure we are using the operands defined by the ABI
792; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0]
793; CHECK-NEXT: ret
794	%tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
795	ret %struct.__neon_int64x2x4_t  %tmp2
796}
797
798declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
799declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
800declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
801
802define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
803; CHECK-LABEL: ld1_16b
804; Make sure we are using the operands defined by the ABI
805; CHECK: ld1.b { v0 }[0], [x0]
806; CHECK-NEXT: ret
807  %tmp1 = load i8, i8* %bar
808  %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
809  ret <16 x i8> %tmp2
810}
811
812define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
813; CHECK-LABEL: ld1_8h
814; Make sure we are using the operands defined by the ABI
815; CHECK: ld1.h { v0 }[0], [x0]
816; CHECK-NEXT: ret
817  %tmp1 = load i16, i16* %bar
818  %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
819  ret <8 x i16> %tmp2
820}
821
822define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
823; CHECK-LABEL: ld1_4s
824; Make sure we are using the operands defined by the ABI
825; CHECK: ld1.s { v0 }[0], [x0]
826; CHECK-NEXT: ret
827  %tmp1 = load i32, i32* %bar
828  %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
829  ret <4 x i32> %tmp2
830}
831
832define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
833; CHECK-LABEL: ld1_4s_float:
834; Make sure we are using the operands defined by the ABI
835; CHECK: ld1.s { v0 }[0], [x0]
836; CHECK-NEXT: ret
837  %tmp1 = load float, float* %bar
838  %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
839  ret <4 x float> %tmp2
840}
841
842define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
843; CHECK-LABEL: ld1_2d
844; Make sure we are using the operands defined by the ABI
845; CHECK: ld1.d { v0 }[0], [x0]
846; CHECK-NEXT: ret
847  %tmp1 = load i64, i64* %bar
848  %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
849  ret <2 x i64> %tmp2
850}
851
852define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
853; CHECK-LABEL: ld1_2d_double:
854; Make sure we are using the operands defined by the ABI
855; CHECK: ld1.d { v0 }[0], [x0]
856; CHECK-NEXT: ret
857  %tmp1 = load double, double* %bar
858  %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
859  ret <2 x double> %tmp2
860}
861
862define <1 x i64> @ld1_1d(<1 x i64>* %p) {
863; CHECK-LABEL: ld1_1d
864; Make sure we are using the operands defined by the ABI
865; CHECK: ldr [[REG:d[0-9]+]], [x0]
866; CHECK-NEXT: ret
867  %tmp = load <1 x i64>, <1 x i64>* %p, align 8
868  ret <1 x i64> %tmp
869}
870
871define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
872; CHECK-LABEL: ld1_8b
873; Make sure we are using the operands defined by the ABI
874; CHECK: ld1.b { v0 }[0], [x0]
875; CHECK-NEXT: ret
876  %tmp1 = load i8, i8* %bar
877  %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
878  ret <8 x i8> %tmp2
879}
880
881define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
882; CHECK-LABEL: ld1_4h
883; Make sure we are using the operands defined by the ABI
884; CHECK: ld1.h { v0 }[0], [x0]
885; CHECK-NEXT: ret
886  %tmp1 = load i16, i16* %bar
887  %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
888  ret <4 x i16> %tmp2
889}
890
891define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
892; CHECK-LABEL: ld1_2s:
893; Make sure we are using the operands defined by the ABI
894; CHECK: ld1.s { v0 }[0], [x0]
895; CHECK-NEXT: ret
896  %tmp1 = load i32, i32* %bar
897  %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
898  ret <2 x i32> %tmp2
899}
900
901define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
902; CHECK-LABEL: ld1_2s_float:
903; Make sure we are using the operands defined by the ABI
904; CHECK: ld1.s { v0 }[0], [x0]
905; CHECK-NEXT: ret
906  %tmp1 = load float, float* %bar
907  %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
908  ret <2 x float> %tmp2
909}
910
911
912; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
913define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
914entry:
915; CHECK: ld1r_2s_from_dup
916; CHECK: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0]
917; CHECK-NEXT: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1]
918; CHECK-NEXT: ushll.8h [[ARG1]], [[ARG1]], #0
919; CHECK-NEXT: ushll.8h [[ARG2]], [[ARG2]], #0
920; CHECK-NEXT: sub.4h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]]
921; CHECK-NEXT: str d[[RESREGNUM]], [x2]
922; CHECK-NEXT: ret
923  %tmp = bitcast i8* %a to i32*
924  %tmp1 = load i32, i32* %tmp, align 4
925  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
926  %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
927  %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
928  %tmp4 = bitcast i8* %b to i32*
929  %tmp5 = load i32, i32* %tmp4, align 4
930  %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
931  %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
932  %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
933  %vmovl.i.i = zext <8 x i8> %tmp3 to <8 x i16>
934  %vmovl.i4.i = zext <8 x i8> %tmp7 to <8 x i16>
935  %sub.i = sub <8 x i16> %vmovl.i.i, %vmovl.i4.i
936  %tmp8 = bitcast <8 x i16> %sub.i to <2 x i64>
937  %shuffle.i = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
938  %tmp9 = bitcast <1 x i64> %shuffle.i to <4 x i16>
939  %tmp10 = bitcast i16* %diff to <4 x i16>*
940  store <4 x i16> %tmp9, <4 x i16>* %tmp10, align 8
941  ret void
942}
943
944; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
945define <4 x float> @ld1r_4s_float(float* nocapture %x) {
946entry:
947; CHECK-LABEL: ld1r_4s_float
948; Make sure we are using the operands defined by the ABI
949; CHECK: ld1r.4s { v0 }, [x0]
950; CHECK-NEXT: ret
951  %tmp = load float, float* %x, align 4
952  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
953  %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
954  %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
955  %tmp4 = insertelement <4 x float> %tmp3, float %tmp, i32 3
956  ret <4 x float> %tmp4
957}
958
959define <2 x float> @ld1r_2s_float(float* nocapture %x) {
960entry:
961; CHECK-LABEL: ld1r_2s_float
962; Make sure we are using the operands defined by the ABI
963; CHECK: ld1r.2s { v0 }, [x0]
964; CHECK-NEXT: ret
965  %tmp = load float, float* %x, align 4
966  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
967  %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
968  ret <2 x float> %tmp2
969}
970
971define <2 x double> @ld1r_2d_double(double* nocapture %x) {
972entry:
973; CHECK-LABEL: ld1r_2d_double
974; Make sure we are using the operands defined by the ABI
975; CHECK: ld1r.2d { v0 }, [x0]
976; CHECK-NEXT: ret
977  %tmp = load double, double* %x, align 4
978  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
979  %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
980  ret <2 x double> %tmp2
981}
982
983define <1 x double> @ld1r_1d_double(double* nocapture %x) {
984entry:
985; CHECK-LABEL: ld1r_1d_double
986; Make sure we are using the operands defined by the ABI
987; CHECK: ldr d0, [x0]
988; CHECK-NEXT: ret
989  %tmp = load double, double* %x, align 4
990  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
991  ret <1 x double> %tmp1
992}
993
994define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
995entry:
996; CHECK-LABEL: ld1r_4s_float_shuff
997; Make sure we are using the operands defined by the ABI
998; CHECK: ld1r.4s { v0 }, [x0]
999; CHECK-NEXT: ret
1000  %tmp = load float, float* %x, align 4
1001  %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
1002  %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
1003  ret <4 x float> %lane
1004}
1005
1006define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
1007entry:
1008; CHECK-LABEL: ld1r_2s_float_shuff
1009; Make sure we are using the operands defined by the ABI
1010; CHECK: ld1r.2s { v0 }, [x0]
1011; CHECK-NEXT: ret
1012  %tmp = load float, float* %x, align 4
1013  %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
1014  %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
1015  ret <2 x float> %lane
1016}
1017
1018define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
1019entry:
1020; CHECK-LABEL: ld1r_2d_double_shuff
1021; Make sure we are using the operands defined by the ABI
1022; CHECK: ld1r.2d { v0 }, [x0]
1023; CHECK-NEXT: ret
1024  %tmp = load double, double* %x, align 4
1025  %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
1026  %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
1027  ret <2 x double> %lane
1028}
1029
1030define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
1031entry:
1032; CHECK-LABEL: ld1r_1d_double_shuff
1033; Make sure we are using the operands defined by the ABI
1034; CHECK: ldr d0, [x0]
1035; CHECK-NEXT: ret
1036  %tmp = load double, double* %x, align 4
1037  %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
1038  %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
1039  ret <1 x double> %lane
1040}
1041
1042%struct.__neon_float32x2x2_t = type { <2 x float>,  <2 x float> }
1043%struct.__neon_float32x2x3_t = type { <2 x float>,  <2 x float>,  <2 x float> }
1044%struct.__neon_float32x2x4_t = type { <2 x float>,  <2 x float>, <2 x float>,  <2 x float> }
1045
1046declare %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*) nounwind readonly
1047declare %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*) nounwind readonly
1048declare %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*) nounwind readonly
1049declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) nounwind readonly
1050declare %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*) nounwind readonly
1051declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*) nounwind readonly
1052
1053define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(i8* %addr) {
1054; CHECK-LABEL: ld1_x2_v8i8:
1055; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1056  %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %addr)
1057  ret %struct.__neon_int8x8x2_t %val
1058}
1059
1060define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(i16* %addr) {
1061; CHECK-LABEL: ld1_x2_v4i16:
1062; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1063  %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %addr)
1064  ret %struct.__neon_int16x4x2_t %val
1065}
1066
1067define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(i32* %addr) {
1068; CHECK-LABEL: ld1_x2_v2i32:
1069; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1070  %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %addr)
1071  ret %struct.__neon_int32x2x2_t %val
1072}
1073
1074define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(float* %addr) {
1075; CHECK-LABEL: ld1_x2_v2f32:
1076; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1077  %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr)
1078  ret %struct.__neon_float32x2x2_t %val
1079}
1080
1081define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(i64* %addr) {
1082; CHECK-LABEL: ld1_x2_v1i64:
1083; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1084  %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %addr)
1085  ret %struct.__neon_int64x1x2_t %val
1086}
1087
1088define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(double* %addr) {
1089; CHECK-LABEL: ld1_x2_v1f64:
1090; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1091  %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %addr)
1092  ret %struct.__neon_float64x1x2_t %val
1093}
1094
1095
1096%struct.__neon_float32x4x2_t = type { <4 x float>,  <4 x float> }
1097%struct.__neon_float32x4x3_t = type { <4 x float>,  <4 x float>,  <4 x float> }
1098%struct.__neon_float32x4x4_t = type { <4 x float>,  <4 x float>, <4 x float>,  <4 x float> }
1099
1100%struct.__neon_float64x2x2_t = type { <2 x double>,  <2 x double> }
1101%struct.__neon_float64x2x3_t = type { <2 x double>,  <2 x double>,  <2 x double> }
1102%struct.__neon_float64x2x4_t = type { <2 x double>,  <2 x double>, <2 x double>,  <2 x double> }
1103
1104declare %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*) nounwind readonly
1105declare %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*) nounwind readonly
1106declare %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*) nounwind readonly
1107declare %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*) nounwind readonly
1108declare %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*) nounwind readonly
1109declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*) nounwind readonly
1110
1111define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(i8* %addr) {
1112; CHECK-LABEL: ld1_x2_v16i8:
1113; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1114  %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %addr)
1115  ret %struct.__neon_int8x16x2_t %val
1116}
1117
1118define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(i16* %addr) {
1119; CHECK-LABEL: ld1_x2_v8i16:
1120; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1121  %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %addr)
1122  ret %struct.__neon_int16x8x2_t %val
1123}
1124
1125define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(i32* %addr) {
1126; CHECK-LABEL: ld1_x2_v4i32:
1127; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1128  %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %addr)
1129  ret %struct.__neon_int32x4x2_t %val
1130}
1131
1132define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(float* %addr) {
1133; CHECK-LABEL: ld1_x2_v4f32:
1134; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1135  %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %addr)
1136  ret %struct.__neon_float32x4x2_t %val
1137}
1138
1139define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(i64* %addr) {
1140; CHECK-LABEL: ld1_x2_v2i64:
1141; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1142  %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %addr)
1143  ret %struct.__neon_int64x2x2_t %val
1144}
1145
1146define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(double* %addr) {
1147; CHECK-LABEL: ld1_x2_v2f64:
1148; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1149  %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %addr)
1150  ret %struct.__neon_float64x2x2_t %val
1151}
1152
1153declare %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*) nounwind readonly
1154declare %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*) nounwind readonly
1155declare %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*) nounwind readonly
1156declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) nounwind readonly
1157declare %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*) nounwind readonly
1158declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*) nounwind readonly
1159
1160define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(i8* %addr) {
1161; CHECK-LABEL: ld1_x3_v8i8:
1162; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1163  %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %addr)
1164  ret %struct.__neon_int8x8x3_t %val
1165}
1166
1167define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(i16* %addr) {
1168; CHECK-LABEL: ld1_x3_v4i16:
1169; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1170  %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %addr)
1171  ret %struct.__neon_int16x4x3_t %val
1172}
1173
1174define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(i32* %addr) {
1175; CHECK-LABEL: ld1_x3_v2i32:
1176; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1177  %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %addr)
1178  ret %struct.__neon_int32x2x3_t %val
1179}
1180
1181define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(float* %addr) {
1182; CHECK-LABEL: ld1_x3_v2f32:
1183; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1184  %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr)
1185  ret %struct.__neon_float32x2x3_t %val
1186}
1187
1188define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(i64* %addr) {
1189; CHECK-LABEL: ld1_x3_v1i64:
1190; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1191  %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %addr)
1192  ret %struct.__neon_int64x1x3_t %val
1193}
1194
1195define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(double* %addr) {
1196; CHECK-LABEL: ld1_x3_v1f64:
1197; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1198  %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %addr)
1199  ret %struct.__neon_float64x1x3_t %val
1200}
1201
1202declare %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*) nounwind readonly
1203declare %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*) nounwind readonly
1204declare %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*) nounwind readonly
1205declare %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*) nounwind readonly
1206declare %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*) nounwind readonly
1207declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*) nounwind readonly
1208
1209define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(i8* %addr) {
1210; CHECK-LABEL: ld1_x3_v16i8:
1211; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1212  %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %addr)
1213  ret %struct.__neon_int8x16x3_t %val
1214}
1215
1216define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(i16* %addr) {
1217; CHECK-LABEL: ld1_x3_v8i16:
1218; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1219  %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %addr)
1220  ret %struct.__neon_int16x8x3_t %val
1221}
1222
1223define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(i32* %addr) {
1224; CHECK-LABEL: ld1_x3_v4i32:
1225; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1226  %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %addr)
1227  ret %struct.__neon_int32x4x3_t %val
1228}
1229
1230define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(float* %addr) {
1231; CHECK-LABEL: ld1_x3_v4f32:
1232; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1233  %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %addr)
1234  ret %struct.__neon_float32x4x3_t %val
1235}
1236
1237define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(i64* %addr) {
1238; CHECK-LABEL: ld1_x3_v2i64:
1239; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1240  %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %addr)
1241  ret %struct.__neon_int64x2x3_t %val
1242}
1243
1244define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(double* %addr) {
1245; CHECK-LABEL: ld1_x3_v2f64:
1246; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1247  %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %addr)
1248  ret %struct.__neon_float64x2x3_t %val
1249}
1250
1251declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*) nounwind readonly
1252declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*) nounwind readonly
1253declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*) nounwind readonly
1254declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) nounwind readonly
1255declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*) nounwind readonly
1256declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*) nounwind readonly
1257
1258define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(i8* %addr) {
1259; CHECK-LABEL: ld1_x4_v8i8:
1260; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1261  %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %addr)
1262  ret %struct.__neon_int8x8x4_t %val
1263}
1264
1265define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(i16* %addr) {
1266; CHECK-LABEL: ld1_x4_v4i16:
1267; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1268  %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %addr)
1269  ret %struct.__neon_int16x4x4_t %val
1270}
1271
1272define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(i32* %addr) {
1273; CHECK-LABEL: ld1_x4_v2i32:
1274; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1275  %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %addr)
1276  ret %struct.__neon_int32x2x4_t %val
1277}
1278
1279define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(float* %addr) {
1280; CHECK-LABEL: ld1_x4_v2f32:
1281; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1282  %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr)
1283  ret %struct.__neon_float32x2x4_t %val
1284}
1285
1286define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(i64* %addr) {
1287; CHECK-LABEL: ld1_x4_v1i64:
1288; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1289  %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %addr)
1290  ret %struct.__neon_int64x1x4_t %val
1291}
1292
1293define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(double* %addr) {
1294; CHECK-LABEL: ld1_x4_v1f64:
1295; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1296  %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %addr)
1297  ret %struct.__neon_float64x1x4_t %val
1298}
1299
1300declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*) nounwind readonly
1301declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*) nounwind readonly
1302declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*) nounwind readonly
1303declare %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*) nounwind readonly
1304declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*) nounwind readonly
1305declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*) nounwind readonly
1306
1307define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(i8* %addr) {
1308; CHECK-LABEL: ld1_x4_v16i8:
1309; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1310  %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %addr)
1311  ret %struct.__neon_int8x16x4_t %val
1312}
1313
1314define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(i16* %addr) {
1315; CHECK-LABEL: ld1_x4_v8i16:
1316; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1317  %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %addr)
1318  ret %struct.__neon_int16x8x4_t %val
1319}
1320
1321define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(i32* %addr) {
1322; CHECK-LABEL: ld1_x4_v4i32:
1323; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1324  %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %addr)
1325  ret %struct.__neon_int32x4x4_t %val
1326}
1327
1328define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(float* %addr) {
1329; CHECK-LABEL: ld1_x4_v4f32:
1330; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1331  %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %addr)
1332  ret %struct.__neon_float32x4x4_t %val
1333}
1334
1335define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(i64* %addr) {
1336; CHECK-LABEL: ld1_x4_v2i64:
1337; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1338  %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %addr)
1339  ret %struct.__neon_int64x2x4_t %val
1340}
1341
1342define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(double* %addr) {
1343; CHECK-LABEL: ld1_x4_v2f64:
1344; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1345  %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %addr)
1346  ret %struct.__neon_float64x2x4_t %val
1347}
1348