1; RUN: llc < %s -mtriple=armv8-linux-gnueabi -verify-machineinstrs \
2; RUN:     -asm-verbose=false | FileCheck %s
3
4; %struct.uint16x4x2_t = type { <4 x i16>, <4 x i16> }
5; %struct.uint16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
6; %struct.uint16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
7
8; %struct.uint32x2x2_t = type { <2 x i32>, <2 x i32> }
9; %struct.uint32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
10; %struct.uint32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
11
12; %struct.uint64x1x2_t = type { <1 x i64>, <1 x i64> }
13; %struct.uint64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
14; %struct.uint64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
15
16; %struct.uint8x8x2_t = type { <8 x i8>, <8 x i8> }
17; %struct.uint8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
18; %struct.uint8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
19
20; %struct.uint16x8x2_t = type { <8 x i16>, <8 x i16> }
21; %struct.uint16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
22; %struct.uint16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
23
24; %struct.uint32x4x2_t = type { <4 x i32>, <4 x i32> }
25; %struct.uint32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
26; %struct.uint32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
27
28; %struct.uint64x2x2_t = type { <2 x i64>, <2 x i64> }
29; %struct.uint64x2x3_t = type { <2 x i64>, <2 x i64>, <2 x i64> }
30; %struct.uint64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }
31
32; %struct.uint8x16x2_t = type { <16 x i8>, <16 x i8> }
33; %struct.uint8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> }
34; %struct.uint8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
35
36%struct.uint16x4x2_t = type { [2 x <4 x i16>] }
37%struct.uint16x4x3_t = type { [3 x <4 x i16>] }
38%struct.uint16x4x4_t = type { [4 x <4 x i16>] }
39%struct.uint32x2x2_t = type { [2 x <2 x i32>] }
40%struct.uint32x2x3_t = type { [3 x <2 x i32>] }
41%struct.uint32x2x4_t = type { [4 x <2 x i32>] }
42%struct.uint64x1x2_t = type { [2 x <1 x i64>] }
43%struct.uint64x1x3_t = type { [3 x <1 x i64>] }
44%struct.uint64x1x4_t = type { [4 x <1 x i64>] }
45%struct.uint8x8x2_t = type { [2 x <8 x i8>] }
46%struct.uint8x8x3_t = type { [3 x <8 x i8>] }
47%struct.uint8x8x4_t = type { [4 x <8 x i8>] }
48%struct.uint16x8x2_t = type { [2 x <8 x i16>] }
49%struct.uint16x8x3_t = type { [3 x <8 x i16>] }
50%struct.uint16x8x4_t = type { [4 x <8 x i16>] }
51%struct.uint32x4x2_t = type { [2 x <4 x i32>] }
52%struct.uint32x4x3_t = type { [3 x <4 x i32>] }
53%struct.uint32x4x4_t = type { [4 x <4 x i32>] }
54%struct.uint64x2x2_t = type { [2 x <2 x i64>] }
55%struct.uint64x2x3_t = type { [3 x <2 x i64>] }
56%struct.uint64x2x4_t = type { [4 x <2 x i64>] }
57%struct.uint8x16x2_t = type { [2 x <16 x i8>] }
58%struct.uint8x16x3_t = type { [3 x <16 x i8>] }
59%struct.uint8x16x4_t = type { [4 x <16 x i8>] }
60
61declare void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* nocapture, <4 x i16>, <4 x i16>) argmemonly nounwind
62declare void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* nocapture, <4 x i16>, <4 x i16>, <4 x i16>) argmemonly nounwind
63declare void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* nocapture, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) argmemonly nounwind
64
65declare void @llvm.arm.neon.vst1x2.p0i32.v2i32(i32* nocapture, <2 x i32>, <2 x i32>) argmemonly nounwind
66declare void @llvm.arm.neon.vst1x3.p0i32.v2i32(i32* nocapture, <2 x i32>, <2 x i32>, <2 x i32>) argmemonly nounwind
67declare void @llvm.arm.neon.vst1x4.p0i32.v2i32(i32* nocapture, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) argmemonly nounwind
68
69declare void @llvm.arm.neon.vst1x2.p0i64.v1i64(i64* nocapture, <1 x i64>, <1 x i64>) argmemonly nounwind
70declare void @llvm.arm.neon.vst1x3.p0i64.v1i64(i64* nocapture, <1 x i64>, <1 x i64>, <1 x i64>) argmemonly nounwind
71declare void @llvm.arm.neon.vst1x4.p0i64.v1i64(i64* nocapture, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) argmemonly nounwind
72
73declare void @llvm.arm.neon.vst1x2.p0i8.v8i8(i8* nocapture, <8 x i8>, <8 x i8>) argmemonly nounwind
74declare void @llvm.arm.neon.vst1x3.p0i8.v8i8(i8* nocapture, <8 x i8>, <8 x i8>, <8 x i8>) argmemonly nounwind
75declare void @llvm.arm.neon.vst1x4.p0i8.v8i8(i8* nocapture, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) argmemonly nounwind
76
77declare void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* nocapture, <8 x i16>, <8 x i16>) argmemonly nounwind
78declare void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* nocapture, <8 x i16>, <8 x i16>, <8 x i16>) argmemonly nounwind
79declare void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* nocapture, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) argmemonly nounwind
80
81declare void @llvm.arm.neon.vst1x2.p0i32.v4i32(i32* nocapture, <4 x i32>, <4 x i32>) argmemonly nounwind
82declare void @llvm.arm.neon.vst1x3.p0i32.v4i32(i32* nocapture, <4 x i32>, <4 x i32>, <4 x i32>) argmemonly nounwind
83declare void @llvm.arm.neon.vst1x4.p0i32.v4i32(i32* nocapture, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) argmemonly nounwind
84
85declare void @llvm.arm.neon.vst1x2.p0i64.v2i64(i64* nocapture, <2 x i64>, <2 x i64>) argmemonly nounwind
86declare void @llvm.arm.neon.vst1x3.p0i64.v2i64(i64* nocapture, <2 x i64>, <2 x i64>, <2 x i64>) argmemonly nounwind
87declare void @llvm.arm.neon.vst1x4.p0i64.v2i64(i64* nocapture, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) argmemonly nounwind
88
89declare void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>) argmemonly nounwind
90declare void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>, <16 x i8>) argmemonly nounwind
91declare void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* nocapture, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) argmemonly nounwind
92
93; CHECK-LABEL: test_vst1_u16_x2
94; CHECK: vst1.16 {d16, d17}, [r0:64]
95define void @test_vst1_u16_x2(i16* %a, %struct.uint16x4x2_t %b) nounwind {
96entry:
97  %b0 = extractvalue %struct.uint16x4x2_t %b, 0, 0
98  %b1 = extractvalue %struct.uint16x4x2_t %b, 0, 1
99  tail call void @llvm.arm.neon.vst1x2.p0i16.v4i16(i16* %a, <4 x i16> %b0, <4 x i16> %b1)
100  ret void
101}
102
103; CHECK-LABEL: test_vst1_u16_x3
104; CHECK: vst1.16 {d16, d17, d18}, [r0:64]
105define void @test_vst1_u16_x3(i16* %a, %struct.uint16x4x3_t %b) nounwind {
106entry:
107  %b0 = extractvalue %struct.uint16x4x3_t %b, 0, 0
108  %b1 = extractvalue %struct.uint16x4x3_t %b, 0, 1
109  %b2 = extractvalue %struct.uint16x4x3_t %b, 0, 2
110  tail call void @llvm.arm.neon.vst1x3.p0i16.v4i16(i16* %a, <4 x i16> %b0, <4 x i16> %b1, <4 x i16> %b2)
111  ret void
112}
113
114; CHECK-LABEL: test_vst1_u16_x4
115; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]
116define void @test_vst1_u16_x4(i16* %a, %struct.uint16x4x4_t %b) nounwind {
117entry:
118  %b0 = extractvalue %struct.uint16x4x4_t %b, 0, 0
119  %b1 = extractvalue %struct.uint16x4x4_t %b, 0, 1
120  %b2 = extractvalue %struct.uint16x4x4_t %b, 0, 2
121  %b3 = extractvalue %struct.uint16x4x4_t %b, 0, 3
122  tail call void @llvm.arm.neon.vst1x4.p0i16.v4i16(i16* %a, <4 x i16> %b0, <4 x i16> %b1, <4 x i16> %b2, <4 x i16> %b3)
123  ret void
124}
125
126; CHECK-LABEL: test_vst1_u32_x2
127; CHECK: vst1.32 {d16, d17}, [r0:64]
128define void @test_vst1_u32_x2(i32* %a, %struct.uint32x2x2_t %b) nounwind {
129entry:
130  %b0 = extractvalue %struct.uint32x2x2_t %b, 0, 0
131  %b1 = extractvalue %struct.uint32x2x2_t %b, 0, 1
132  tail call void @llvm.arm.neon.vst1x2.p0i32.v2i32(i32* %a, <2 x i32> %b0, <2 x i32> %b1)
133  ret void
134}
135
136; CHECK-LABEL: test_vst1_u32_x3
137; CHECK: vst1.32 {d16, d17, d18}, [r0:64]
138define void @test_vst1_u32_x3(i32* %a, %struct.uint32x2x3_t %b) nounwind {
139entry:
140  %b0 = extractvalue %struct.uint32x2x3_t %b, 0, 0
141  %b1 = extractvalue %struct.uint32x2x3_t %b, 0, 1
142  %b2 = extractvalue %struct.uint32x2x3_t %b, 0, 2
143  tail call void @llvm.arm.neon.vst1x3.p0i32.v2i32(i32* %a, <2 x i32> %b0, <2 x i32> %b1, <2 x i32> %b2)
144  ret void
145}
146
147; CHECK-LABEL: test_vst1_u32_x4
148; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]
149define void @test_vst1_u32_x4(i32* %a, %struct.uint32x2x4_t %b) nounwind {
150entry:
151  %b0 = extractvalue %struct.uint32x2x4_t %b, 0, 0
152  %b1 = extractvalue %struct.uint32x2x4_t %b, 0, 1
153  %b2 = extractvalue %struct.uint32x2x4_t %b, 0, 2
154  %b3 = extractvalue %struct.uint32x2x4_t %b, 0, 3
155  tail call void @llvm.arm.neon.vst1x4.p0i32.v2i32(i32* %a, <2 x i32> %b0, <2 x i32> %b1, <2 x i32> %b2, <2 x i32> %b3)
156  ret void
157}
158
159; CHECK-LABEL: test_vst1_u64_x2
160; CHECK: vst1.64 {d16, d17}, [r0:64]
161define void @test_vst1_u64_x2(i64* %a, %struct.uint64x1x2_t %b) nounwind {
162entry:
163  %b0 = extractvalue %struct.uint64x1x2_t %b, 0, 0
164  %b1 = extractvalue %struct.uint64x1x2_t %b, 0, 1
165  tail call void @llvm.arm.neon.vst1x2.p0i64.v1i64(i64* %a, <1 x i64> %b0, <1 x i64> %b1)
166  ret void
167}
168
169; CHECK-LABEL: test_vst1_u64_x3
170; CHECK: vst1.64 {d16, d17, d18}, [r0:64]
171define void @test_vst1_u64_x3(i64* %a, %struct.uint64x1x3_t %b) nounwind {
172entry:
173  %b0 = extractvalue %struct.uint64x1x3_t %b, 0, 0
174  %b1 = extractvalue %struct.uint64x1x3_t %b, 0, 1
175  %b2 = extractvalue %struct.uint64x1x3_t %b, 0, 2
176  tail call void @llvm.arm.neon.vst1x3.p0i64.v1i64(i64* %a, <1 x i64> %b0, <1 x i64> %b1, <1 x i64> %b2)
177  ret void
178}
179
180; CHECK-LABEL: test_vst1_u64_x4
181; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
182define void @test_vst1_u64_x4(i64* %a, %struct.uint64x1x4_t %b) nounwind {
183entry:
184  %b0 = extractvalue %struct.uint64x1x4_t %b, 0, 0
185  %b1 = extractvalue %struct.uint64x1x4_t %b, 0, 1
186  %b2 = extractvalue %struct.uint64x1x4_t %b, 0, 2
187  %b3 = extractvalue %struct.uint64x1x4_t %b, 0, 3
188  tail call void @llvm.arm.neon.vst1x4.p0i64.v1i64(i64* %a, <1 x i64> %b0, <1 x i64> %b1, <1 x i64> %b2, <1 x i64> %b3)
189  ret void
190}
191
192; CHECK-LABEL: test_vst1_u8_x2
193; CHECK: vst1.8 {d16, d17}, [r0:64]
194define void @test_vst1_u8_x2(i8* %a, %struct.uint8x8x2_t %b) nounwind {
195entry:
196  %b0 = extractvalue %struct.uint8x8x2_t %b, 0, 0
197  %b1 = extractvalue %struct.uint8x8x2_t %b, 0, 1
198  tail call void @llvm.arm.neon.vst1x2.p0i8.v8i8(i8* %a, <8 x i8> %b0, <8 x i8> %b1)
199  ret void
200}
201
202; CHECK-LABEL: test_vst1_u8_x3
203; CHECK: vst1.8 {d16, d17, d18}, [r0:64]
204define void @test_vst1_u8_x3(i8* %a, %struct.uint8x8x3_t %b) nounwind {
205entry:
206  %b0 = extractvalue %struct.uint8x8x3_t %b, 0, 0
207  %b1 = extractvalue %struct.uint8x8x3_t %b, 0, 1
208  %b2 = extractvalue %struct.uint8x8x3_t %b, 0, 2
209  tail call void @llvm.arm.neon.vst1x3.p0i8.v8i8(i8* %a, <8 x i8> %b0, <8 x i8> %b1, <8 x i8> %b2)
210  ret void
211}
212
213; CHECK-LABEL: test_vst1_u8_x4
214; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]
215define void @test_vst1_u8_x4(i8* %a, %struct.uint8x8x4_t %b) nounwind {
216entry:
217  %b0 = extractvalue %struct.uint8x8x4_t %b, 0, 0
218  %b1 = extractvalue %struct.uint8x8x4_t %b, 0, 1
219  %b2 = extractvalue %struct.uint8x8x4_t %b, 0, 2
220  %b3 = extractvalue %struct.uint8x8x4_t %b, 0, 3
221  tail call void @llvm.arm.neon.vst1x4.p0i8.v8i8(i8* %a, <8 x i8> %b0, <8 x i8> %b1, <8 x i8> %b2, <8 x i8> %b3)
222  ret void
223}
224
225; CHECK-LABEL: test_vst1q_u16_x2
226; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]
227define void @test_vst1q_u16_x2(i16* %a, %struct.uint16x8x2_t %b) nounwind {
228entry:
229  %b0 = extractvalue %struct.uint16x8x2_t %b, 0, 0
230  %b1 = extractvalue %struct.uint16x8x2_t %b, 0, 1
231  tail call void @llvm.arm.neon.vst1x2.p0i16.v8i16(i16* %a, <8 x i16> %b0, <8 x i16> %b1)
232  ret void
233}
234
235; CHECK-LABEL: test_vst1q_u16_x3
236; CHECK: vst1.16 {d16, d17, d18}, [r0:64]!
237; CHECK: vst1.16 {d19, d20, d21}, [r0:64]
238define void @test_vst1q_u16_x3(i16* %a, %struct.uint16x8x3_t %b) nounwind {
239entry:
240  %b0 = extractvalue %struct.uint16x8x3_t %b, 0, 0
241  %b1 = extractvalue %struct.uint16x8x3_t %b, 0, 1
242  %b2 = extractvalue %struct.uint16x8x3_t %b, 0, 2
243  tail call void @llvm.arm.neon.vst1x3.p0i16.v8i16(i16* %a, <8 x i16> %b0, <8 x i16> %b1, <8 x i16> %b2)
244  ret void
245}
246
247; CHECK-LABEL: test_vst1q_u16_x4
248; CHECK: vst1.16 {d16, d17, d18, d19}, [r0:256]!
249; CHECK: vst1.16 {d20, d21, d22, d23}, [r0:256]
250define void @test_vst1q_u16_x4(i16* %a, %struct.uint16x8x4_t %b) nounwind {
251entry:
252  %b0 = extractvalue %struct.uint16x8x4_t %b, 0, 0
253  %b1 = extractvalue %struct.uint16x8x4_t %b, 0, 1
254  %b2 = extractvalue %struct.uint16x8x4_t %b, 0, 2
255  %b3 = extractvalue %struct.uint16x8x4_t %b, 0, 3
256  tail call void @llvm.arm.neon.vst1x4.p0i16.v8i16(i16* %a, <8 x i16> %b0, <8 x i16> %b1, <8 x i16> %b2, <8 x i16> %b3)
257  ret void
258}
259
260; CHECK-LABEL: test_vst1q_u32_x2
261; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]
262define void @test_vst1q_u32_x2(i32* %a, %struct.uint32x4x2_t %b) nounwind {
263entry:
264  %b0 = extractvalue %struct.uint32x4x2_t %b, 0, 0
265  %b1 = extractvalue %struct.uint32x4x2_t %b, 0, 1
266  tail call void @llvm.arm.neon.vst1x2.p0i32.v4i32(i32* %a, <4 x i32> %b0, <4 x i32> %b1)
267  ret void
268}
269
270; CHECK-LABEL: test_vst1q_u32_x3
271; CHECK: vst1.32 {d16, d17, d18}, [r0:64]!
272; CHECK: vst1.32 {d19, d20, d21}, [r0:64]
273define void @test_vst1q_u32_x3(i32* %a, %struct.uint32x4x3_t %b) nounwind {
274entry:
275  %b0 = extractvalue %struct.uint32x4x3_t %b, 0, 0
276  %b1 = extractvalue %struct.uint32x4x3_t %b, 0, 1
277  %b2 = extractvalue %struct.uint32x4x3_t %b, 0, 2
278  tail call void @llvm.arm.neon.vst1x3.p0i32.v4i32(i32* %a, <4 x i32> %b0, <4 x i32> %b1, <4 x i32> %b2)
279  ret void
280}
281
282; CHECK-LABEL: test_vst1q_u32_x4
283; CHECK: vst1.32 {d16, d17, d18, d19}, [r0:256]!
284; CHECK: vst1.32 {d20, d21, d22, d23}, [r0:256]
285define void @test_vst1q_u32_x4(i32* %a, %struct.uint32x4x4_t %b) nounwind {
286entry:
287  %b0 = extractvalue %struct.uint32x4x4_t %b, 0, 0
288  %b1 = extractvalue %struct.uint32x4x4_t %b, 0, 1
289  %b2 = extractvalue %struct.uint32x4x4_t %b, 0, 2
290  %b3 = extractvalue %struct.uint32x4x4_t %b, 0, 3
291  tail call void @llvm.arm.neon.vst1x4.p0i32.v4i32(i32* %a, <4 x i32> %b0, <4 x i32> %b1, <4 x i32> %b2, <4 x i32> %b3)
292  ret void
293}
294
295; CHECK-LABEL: test_vst1q_u64_x2
296; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
297define void @test_vst1q_u64_x2(i64* %a, %struct.uint64x2x2_t %b) nounwind {
298entry:
299  %b0 = extractvalue %struct.uint64x2x2_t %b, 0, 0
300  %b1 = extractvalue %struct.uint64x2x2_t %b, 0, 1
301  tail call void @llvm.arm.neon.vst1x2.p0i64.v2i64(i64* %a, <2 x i64> %b0, <2 x i64> %b1)
302  ret void
303}
304
305; CHECK-LABEL: test_vst1q_u64_x3
306; CHECK: vst1.64 {d16, d17, d18}, [r0:64]!
307; CHECK: vst1.64 {d19, d20, d21}, [r0:64]
308define void @test_vst1q_u64_x3(i64* %a, %struct.uint64x2x3_t %b) nounwind {
309entry:
310  %b0 = extractvalue %struct.uint64x2x3_t %b, 0, 0
311  %b1 = extractvalue %struct.uint64x2x3_t %b, 0, 1
312  %b2 = extractvalue %struct.uint64x2x3_t %b, 0, 2
313  tail call void @llvm.arm.neon.vst1x3.p0i64.v2i64(i64* %a, <2 x i64> %b0, <2 x i64> %b1, <2 x i64> %b2)
314  ret void
315}
316
317; CHECK-LABEL: test_vst1q_u64_x4
318; CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]!
319; CHECK: vst1.64 {d20, d21, d22, d23}, [r0:256]
320define void @test_vst1q_u64_x4(i64* %a, %struct.uint64x2x4_t %b) nounwind {
321entry:
322  %b0 = extractvalue %struct.uint64x2x4_t %b, 0, 0
323  %b1 = extractvalue %struct.uint64x2x4_t %b, 0, 1
324  %b2 = extractvalue %struct.uint64x2x4_t %b, 0, 2
325  %b3 = extractvalue %struct.uint64x2x4_t %b, 0, 3
326  tail call void @llvm.arm.neon.vst1x4.p0i64.v2i64(i64* %a, <2 x i64> %b0, <2 x i64> %b1, <2 x i64> %b2, <2 x i64> %b3)
327  ret void
328}
329
330; CHECK-LABEL: test_vst1q_u8_x2
331; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]
332define void @test_vst1q_u8_x2(i8* %a, %struct.uint8x16x2_t %b) nounwind {
333entry:
334  %b0 = extractvalue %struct.uint8x16x2_t %b, 0, 0
335  %b1 = extractvalue %struct.uint8x16x2_t %b, 0, 1
336  tail call void @llvm.arm.neon.vst1x2.p0i8.v16i8(i8* %a, <16 x i8> %b0, <16 x i8> %b1)
337  ret void
338}
339
340; CHECK-LABEL: test_vst1q_u8_x3
341; CHECK: vst1.8 {d16, d17, d18}, [r0:64]!
342; CHECK: vst1.8 {d19, d20, d21}, [r0:64]
343define void @test_vst1q_u8_x3(i8* %a, %struct.uint8x16x3_t %b) nounwind {
344entry:
345  %b0 = extractvalue %struct.uint8x16x3_t %b, 0, 0
346  %b1 = extractvalue %struct.uint8x16x3_t %b, 0, 1
347  %b2 = extractvalue %struct.uint8x16x3_t %b, 0, 2
348  tail call void @llvm.arm.neon.vst1x3.p0i8.v16i8(i8* %a, <16 x i8> %b0, <16 x i8> %b1, <16 x i8> %b2)
349  ret void
350}
351
352; CHECK-LABEL: test_vst1q_u8_x4
353; CHECK: vst1.8 {d16, d17, d18, d19}, [r0:256]!
354; CHECK: vst1.8 {d20, d21, d22, d23}, [r0:256]
355define void @test_vst1q_u8_x4(i8* %a, %struct.uint8x16x4_t %b) nounwind {
356entry:
357  %b0 = extractvalue %struct.uint8x16x4_t %b, 0, 0
358  %b1 = extractvalue %struct.uint8x16x4_t %b, 0, 1
359  %b2 = extractvalue %struct.uint8x16x4_t %b, 0, 2
360  %b3 = extractvalue %struct.uint8x16x4_t %b, 0, 3
361  tail call void @llvm.arm.neon.vst1x4.p0i8.v16i8(i8* %a, <16 x i8> %b0, <16 x i8> %b1, <16 x i8> %b2, <16 x i8> %b3)
362  ret void
363}
364