1; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2
3define void @st1lane_16b(<16 x i8> %A, i8* %D) {
4; CHECK-LABEL: st1lane_16b
5; CHECK: st1.b
6  %tmp = extractelement <16 x i8> %A, i32 1
7  store i8 %tmp, i8* %D
8  ret void
9}
10
11define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
12; CHECK-LABEL: st1lane_ro_16b
13; CHECK: add x[[XREG:[0-9]+]], x0, x1
14; CHECK: st1.b { v0 }[1], [x[[XREG]]]
15  %ptr = getelementptr i8* %D, i64 %offset
16  %tmp = extractelement <16 x i8> %A, i32 1
17  store i8 %tmp, i8* %ptr
18  ret void
19}
20
21define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
22; CHECK-LABEL: st1lane0_ro_16b
23; CHECK: add x[[XREG:[0-9]+]], x0, x1
24; CHECK: st1.b { v0 }[0], [x[[XREG]]]
25  %ptr = getelementptr i8* %D, i64 %offset
26  %tmp = extractelement <16 x i8> %A, i32 0
27  store i8 %tmp, i8* %ptr
28  ret void
29}
30
31define void @st1lane_8h(<8 x i16> %A, i16* %D) {
32; CHECK-LABEL: st1lane_8h
33; CHECK: st1.h
34  %tmp = extractelement <8 x i16> %A, i32 1
35  store i16 %tmp, i16* %D
36  ret void
37}
38
39define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
40; CHECK-LABEL: st1lane_ro_8h
41; CHECK: add x[[XREG:[0-9]+]], x0, x1
42; CHECK: st1.h { v0 }[1], [x[[XREG]]]
43  %ptr = getelementptr i16* %D, i64 %offset
44  %tmp = extractelement <8 x i16> %A, i32 1
45  store i16 %tmp, i16* %ptr
46  ret void
47}
48
49define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
50; CHECK-LABEL: st1lane0_ro_8h
51; CHECK: str h0, [x0, x1, lsl #1]
52  %ptr = getelementptr i16* %D, i64 %offset
53  %tmp = extractelement <8 x i16> %A, i32 0
54  store i16 %tmp, i16* %ptr
55  ret void
56}
57
58define void @st1lane_4s(<4 x i32> %A, i32* %D) {
59; CHECK-LABEL: st1lane_4s
60; CHECK: st1.s
61  %tmp = extractelement <4 x i32> %A, i32 1
62  store i32 %tmp, i32* %D
63  ret void
64}
65
66define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
67; CHECK-LABEL: st1lane_ro_4s
68; CHECK: add x[[XREG:[0-9]+]], x0, x1
69; CHECK: st1.s { v0 }[1], [x[[XREG]]]
70  %ptr = getelementptr i32* %D, i64 %offset
71  %tmp = extractelement <4 x i32> %A, i32 1
72  store i32 %tmp, i32* %ptr
73  ret void
74}
75
76define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
77; CHECK-LABEL: st1lane0_ro_4s
78; CHECK: str s0, [x0, x1, lsl #2]
79  %ptr = getelementptr i32* %D, i64 %offset
80  %tmp = extractelement <4 x i32> %A, i32 0
81  store i32 %tmp, i32* %ptr
82  ret void
83}
84
85define void @st1lane_4s_float(<4 x float> %A, float* %D) {
86; CHECK-LABEL: st1lane_4s_float
87; CHECK: st1.s
88  %tmp = extractelement <4 x float> %A, i32 1
89  store float %tmp, float* %D
90  ret void
91}
92
93define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
94; CHECK-LABEL: st1lane_ro_4s_float
95; CHECK: add x[[XREG:[0-9]+]], x0, x1
96; CHECK: st1.s { v0 }[1], [x[[XREG]]]
97  %ptr = getelementptr float* %D, i64 %offset
98  %tmp = extractelement <4 x float> %A, i32 1
99  store float %tmp, float* %ptr
100  ret void
101}
102
103define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
104; CHECK-LABEL: st1lane0_ro_4s_float
105; CHECK: str s0, [x0, x1, lsl #2]
106  %ptr = getelementptr float* %D, i64 %offset
107  %tmp = extractelement <4 x float> %A, i32 0
108  store float %tmp, float* %ptr
109  ret void
110}
111
112define void @st1lane_2d(<2 x i64> %A, i64* %D) {
113; CHECK-LABEL: st1lane_2d
114; CHECK: st1.d
115  %tmp = extractelement <2 x i64> %A, i32 1
116  store i64 %tmp, i64* %D
117  ret void
118}
119
120define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
121; CHECK-LABEL: st1lane_ro_2d
122; CHECK: add x[[XREG:[0-9]+]], x0, x1
123; CHECK: st1.d { v0 }[1], [x[[XREG]]]
124  %ptr = getelementptr i64* %D, i64 %offset
125  %tmp = extractelement <2 x i64> %A, i32 1
126  store i64 %tmp, i64* %ptr
127  ret void
128}
129
130define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
131; CHECK-LABEL: st1lane0_ro_2d
132; CHECK: str d0, [x0, x1, lsl #3]
133  %ptr = getelementptr i64* %D, i64 %offset
134  %tmp = extractelement <2 x i64> %A, i32 0
135  store i64 %tmp, i64* %ptr
136  ret void
137}
138
139define void @st1lane_2d_double(<2 x double> %A, double* %D) {
140; CHECK-LABEL: st1lane_2d_double
141; CHECK: st1.d
142  %tmp = extractelement <2 x double> %A, i32 1
143  store double %tmp, double* %D
144  ret void
145}
146
147define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
148; CHECK-LABEL: st1lane_ro_2d_double
149; CHECK: add x[[XREG:[0-9]+]], x0, x1
150; CHECK: st1.d { v0 }[1], [x[[XREG]]]
151  %ptr = getelementptr double* %D, i64 %offset
152  %tmp = extractelement <2 x double> %A, i32 1
153  store double %tmp, double* %ptr
154  ret void
155}
156
157define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
158; CHECK-LABEL: st1lane0_ro_2d_double
159; CHECK: str d0, [x0, x1, lsl #3]
160  %ptr = getelementptr double* %D, i64 %offset
161  %tmp = extractelement <2 x double> %A, i32 0
162  store double %tmp, double* %ptr
163  ret void
164}
165
166define void @st1lane_8b(<8 x i8> %A, i8* %D) {
167; CHECK-LABEL: st1lane_8b
168; CHECK: st1.b
169  %tmp = extractelement <8 x i8> %A, i32 1
170  store i8 %tmp, i8* %D
171  ret void
172}
173
174define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
175; CHECK-LABEL: st1lane_ro_8b
176; CHECK: add x[[XREG:[0-9]+]], x0, x1
177; CHECK: st1.b { v0 }[1], [x[[XREG]]]
178  %ptr = getelementptr i8* %D, i64 %offset
179  %tmp = extractelement <8 x i8> %A, i32 1
180  store i8 %tmp, i8* %ptr
181  ret void
182}
183
184define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
185; CHECK-LABEL: st1lane0_ro_8b
186; CHECK: add x[[XREG:[0-9]+]], x0, x1
187; CHECK: st1.b { v0 }[0], [x[[XREG]]]
188  %ptr = getelementptr i8* %D, i64 %offset
189  %tmp = extractelement <8 x i8> %A, i32 0
190  store i8 %tmp, i8* %ptr
191  ret void
192}
193
194define void @st1lane_4h(<4 x i16> %A, i16* %D) {
195; CHECK-LABEL: st1lane_4h
196; CHECK: st1.h
197  %tmp = extractelement <4 x i16> %A, i32 1
198  store i16 %tmp, i16* %D
199  ret void
200}
201
202define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
203; CHECK-LABEL: st1lane_ro_4h
204; CHECK: add x[[XREG:[0-9]+]], x0, x1
205; CHECK: st1.h { v0 }[1], [x[[XREG]]]
206  %ptr = getelementptr i16* %D, i64 %offset
207  %tmp = extractelement <4 x i16> %A, i32 1
208  store i16 %tmp, i16* %ptr
209  ret void
210}
211
212define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
213; CHECK-LABEL: st1lane0_ro_4h
214; CHECK: str h0, [x0, x1, lsl #1]
215  %ptr = getelementptr i16* %D, i64 %offset
216  %tmp = extractelement <4 x i16> %A, i32 0
217  store i16 %tmp, i16* %ptr
218  ret void
219}
220
221define void @st1lane_2s(<2 x i32> %A, i32* %D) {
222; CHECK-LABEL: st1lane_2s
223; CHECK: st1.s
224  %tmp = extractelement <2 x i32> %A, i32 1
225  store i32 %tmp, i32* %D
226  ret void
227}
228
229define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
230; CHECK-LABEL: st1lane_ro_2s
231; CHECK: add x[[XREG:[0-9]+]], x0, x1
232; CHECK: st1.s { v0 }[1], [x[[XREG]]]
233  %ptr = getelementptr i32* %D, i64 %offset
234  %tmp = extractelement <2 x i32> %A, i32 1
235  store i32 %tmp, i32* %ptr
236  ret void
237}
238
239define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
240; CHECK-LABEL: st1lane0_ro_2s
241; CHECK: str s0, [x0, x1, lsl #2]
242  %ptr = getelementptr i32* %D, i64 %offset
243  %tmp = extractelement <2 x i32> %A, i32 0
244  store i32 %tmp, i32* %ptr
245  ret void
246}
247
248define void @st1lane_2s_float(<2 x float> %A, float* %D) {
249; CHECK-LABEL: st1lane_2s_float
250; CHECK: st1.s
251  %tmp = extractelement <2 x float> %A, i32 1
252  store float %tmp, float* %D
253  ret void
254}
255
256define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
257; CHECK-LABEL: st1lane_ro_2s_float
258; CHECK: add x[[XREG:[0-9]+]], x0, x1
259; CHECK: st1.s { v0 }[1], [x[[XREG]]]
260  %ptr = getelementptr float* %D, i64 %offset
261  %tmp = extractelement <2 x float> %A, i32 1
262  store float %tmp, float* %ptr
263  ret void
264}
265
266define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
267; CHECK-LABEL: st1lane0_ro_2s_float
268; CHECK: str s0, [x0, x1, lsl #2]
269  %ptr = getelementptr float* %D, i64 %offset
270  %tmp = extractelement <2 x float> %A, i32 0
271  store float %tmp, float* %ptr
272  ret void
273}
274
275define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
276; CHECK-LABEL: st2lane_16b
277; CHECK: st2.b
278  call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
279  ret void
280}
281
282define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
283; CHECK-LABEL: st2lane_8h
284; CHECK: st2.h
285  call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
286  ret void
287}
288
289define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
290; CHECK-LABEL: st2lane_4s
291; CHECK: st2.s
292  call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
293  ret void
294}
295
296define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
297; CHECK-LABEL: st2lane_2d
298; CHECK: st2.d
299  call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
300  ret void
301}
302
303declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
304declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
305declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
306declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
307
308define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
309; CHECK-LABEL: st3lane_16b
310; CHECK: st3.b
311  call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
312  ret void
313}
314
315define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
316; CHECK-LABEL: st3lane_8h
317; CHECK: st3.h
318  call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
319  ret void
320}
321
322define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
323; CHECK-LABEL: st3lane_4s
324; CHECK: st3.s
325  call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
326  ret void
327}
328
329define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
330; CHECK-LABEL: st3lane_2d
331; CHECK: st3.d
332  call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
333  ret void
334}
335
336declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
337declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
338declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
339declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
340
341define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
342; CHECK-LABEL: st4lane_16b
343; CHECK: st4.b
344  call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
345  ret void
346}
347
348define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
349; CHECK-LABEL: st4lane_8h
350; CHECK: st4.h
351  call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
352  ret void
353}
354
355define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
356; CHECK-LABEL: st4lane_4s
357; CHECK: st4.s
358  call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
359  ret void
360}
361
362define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
363; CHECK-LABEL: st4lane_2d
364; CHECK: st4.d
365  call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
366  ret void
367}
368
369declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
370declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
371declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
372declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
373
374
375define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
376; CHECK-LABEL: st2_8b
377; CHECK st2.8b
378	call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
379	ret void
380}
381
382define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
383; CHECK-LABEL: st3_8b
384; CHECK st3.8b
385	call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
386	ret void
387}
388
389define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
390; CHECK-LABEL: st4_8b
391; CHECK st4.8b
392	call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
393	ret void
394}
395
396declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
397declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
398declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
399
400define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
401; CHECK-LABEL: st2_16b
402; CHECK st2.16b
403	call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
404	ret void
405}
406
407define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
408; CHECK-LABEL: st3_16b
409; CHECK st3.16b
410	call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
411	ret void
412}
413
414define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
415; CHECK-LABEL: st4_16b
416; CHECK st4.16b
417	call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
418	ret void
419}
420
421declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
422declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
423declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
424
425define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
426; CHECK-LABEL: st2_4h
427; CHECK st2.4h
428	call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
429	ret void
430}
431
432define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
433; CHECK-LABEL: st3_4h
434; CHECK st3.4h
435	call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
436	ret void
437}
438
439define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
440; CHECK-LABEL: st4_4h
441; CHECK st4.4h
442	call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
443	ret void
444}
445
446declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
447declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
448declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
449
450define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
451; CHECK-LABEL: st2_8h
452; CHECK st2.8h
453	call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
454	ret void
455}
456
457define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
458; CHECK-LABEL: st3_8h
459; CHECK st3.8h
460	call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
461	ret void
462}
463
464define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
465; CHECK-LABEL: st4_8h
466; CHECK st4.8h
467	call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
468	ret void
469}
470
471declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
472declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
473declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
474
475define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
476; CHECK-LABEL: st2_2s
477; CHECK st2.2s
478	call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
479	ret void
480}
481
482define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
483; CHECK-LABEL: st3_2s
484; CHECK st3.2s
485	call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
486	ret void
487}
488
489define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
490; CHECK-LABEL: st4_2s
491; CHECK st4.2s
492	call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
493	ret void
494}
495
496declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
497declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
498declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
499
500define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
501; CHECK-LABEL: st2_4s
502; CHECK st2.4s
503	call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
504	ret void
505}
506
507define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
508; CHECK-LABEL: st3_4s
509; CHECK st3.4s
510	call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
511	ret void
512}
513
514define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
515; CHECK-LABEL: st4_4s
516; CHECK st4.4s
517	call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
518	ret void
519}
520
521declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
522declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
523declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
524
525define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
526; CHECK-LABEL: st2_1d
527; CHECK st1.2d
528	call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
529	ret void
530}
531
532define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
533; CHECK-LABEL: st3_1d
534; CHECK st1.3d
535	call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
536	ret void
537}
538
539define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
540; CHECK-LABEL: st4_1d
541; CHECK st1.4d
542	call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
543	ret void
544}
545
546declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
547declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
548declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
549
550define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
551; CHECK-LABEL: st2_2d
552; CHECK st2.2d
553	call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
554	ret void
555}
556
557define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
558; CHECK-LABEL: st3_2d
559; CHECK st2.3d
560	call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
561	ret void
562}
563
564define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
565; CHECK-LABEL: st4_2d
566; CHECK st2.4d
567	call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
568	ret void
569}
570
571declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
572declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
573declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
574
575declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
576declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
577declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
578declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
579declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
580declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
581
582define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
583; CHECK-LABEL: st1_x2_v8i8:
584; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
585  call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
586  ret void
587}
588
589define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
590; CHECK-LABEL: st1_x2_v4i16:
591; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
592  call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
593  ret void
594}
595
596define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
597; CHECK-LABEL: st1_x2_v2i32:
598; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
599  call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
600  ret void
601}
602
603define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
604; CHECK-LABEL: st1_x2_v2f32:
605; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
606  call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
607  ret void
608}
609
610define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
611; CHECK-LABEL: st1_x2_v1i64:
612; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
613  call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
614  ret void
615}
616
617define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
618; CHECK-LABEL: st1_x2_v1f64:
619; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
620  call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
621  ret void
622}
623
624declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
625declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
626declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
627declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
628declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
629declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
630
631define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
632; CHECK-LABEL: st1_x2_v16i8:
633; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
634  call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
635  ret void
636}
637
638define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
639; CHECK-LABEL: st1_x2_v8i16:
640; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
641  call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
642  ret void
643}
644
645define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
646; CHECK-LABEL: st1_x2_v4i32:
647; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
648  call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
649  ret void
650}
651
652define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
653; CHECK-LABEL: st1_x2_v4f32:
654; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
655  call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
656  ret void
657}
658
659define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
660; CHECK-LABEL: st1_x2_v2i64:
661; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
662  call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
663  ret void
664}
665
666define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
667; CHECK-LABEL: st1_x2_v2f64:
668; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
669  call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
670  ret void
671}
672
673declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
674declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
675declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
676declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
677declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
678declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
679
680define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
681; CHECK-LABEL: st1_x3_v8i8:
682; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
683  call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
684  ret void
685}
686
687define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
688; CHECK-LABEL: st1_x3_v4i16:
689; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
690  call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
691  ret void
692}
693
694define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
695; CHECK-LABEL: st1_x3_v2i32:
696; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
697  call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
698  ret void
699}
700
701define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
702; CHECK-LABEL: st1_x3_v2f32:
703; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
704  call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
705  ret void
706}
707
708define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
709; CHECK-LABEL: st1_x3_v1i64:
710; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
711  call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
712  ret void
713}
714
715define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
716; CHECK-LABEL: st1_x3_v1f64:
717; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
718  call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
719  ret void
720}
721
722declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
723declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
724declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
725declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
726declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
727declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
728
729define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
730; CHECK-LABEL: st1_x3_v16i8:
731; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
732  call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
733  ret void
734}
735
736define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
737; CHECK-LABEL: st1_x3_v8i16:
738; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
739  call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
740  ret void
741}
742
743define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
744; CHECK-LABEL: st1_x3_v4i32:
745; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
746  call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
747  ret void
748}
749
750define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
751; CHECK-LABEL: st1_x3_v4f32:
752; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
753  call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
754  ret void
755}
756
757define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
758; CHECK-LABEL: st1_x3_v2i64:
759; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
760  call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
761  ret void
762}
763
764define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
765; CHECK-LABEL: st1_x3_v2f64:
766; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
767  call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
768  ret void
769}
770
771
772declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
773declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
774declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
775declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
776declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
777declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
778
779define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
780; CHECK-LABEL: st1_x4_v8i8:
781; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
782  call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
783  ret void
784}
785
786define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
787; CHECK-LABEL: st1_x4_v4i16:
788; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
789  call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
790  ret void
791}
792
793define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
794; CHECK-LABEL: st1_x4_v2i32:
795; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
796  call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
797  ret void
798}
799
800define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
801; CHECK-LABEL: st1_x4_v2f32:
802; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
803  call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
804  ret void
805}
806
807define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
808; CHECK-LABEL: st1_x4_v1i64:
809; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
810  call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
811  ret void
812}
813
814define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
815; CHECK-LABEL: st1_x4_v1f64:
816; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
817  call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
818  ret void
819}
820
821declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
822declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
823declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
824declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
825declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
826declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
827
828define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
829; CHECK-LABEL: st1_x4_v16i8:
830; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
831  call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
832  ret void
833}
834
835define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
836; CHECK-LABEL: st1_x4_v8i16:
837; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
838  call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
839  ret void
840}
841
842define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
843; CHECK-LABEL: st1_x4_v4i32:
844; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
845  call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
846  ret void
847}
848
849define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
850; CHECK-LABEL: st1_x4_v4f32:
851; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
852  call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
853  ret void
854}
855
856define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
857; CHECK-LABEL: st1_x4_v2i64:
858; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
859  call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
860  ret void
861}
862
863define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
864; CHECK-LABEL: st1_x4_v2f64:
865; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
866  call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)
867  ret void
868}
869