1; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; svint8x2_t
9;
10
11define <vscale x 32 x i8> @ret_svint8x2_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
12; CHECK-LABEL: ret_svint8x2_t
13; CHECK:      mov z0.d, z1.d
14; CHECK-NEXT: mov z1.d, z2.d
15; CHECK-NEXT: ret
16  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
17  ret <vscale x 32 x i8> %tuple
18}
19
20define void @call_svint8x2_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %dummy_z2, <vscale x 16 x i8> %z3) #0 {
21; CHECK-LABEL: call_svint8x2_t
22; CHECK:      mov z0.d, z1.d
23; CHECK-NEXT: mov z1.d, z3.d
24; CHECK-NEXT: bl callee_svint8x2_t
25  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z3)
26  call void @callee_svint8x2_t(<vscale x 32 x i8> %tuple)
27  ret void
28}
29
30;
31; svint16x2_t
32;
33
34define <vscale x 16 x i16> @ret_svint16x2_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
35; CHECK-LABEL: ret_svint16x2_t
36; CHECK:      mov z0.d, z1.d
37; CHECK-NEXT: mov z1.d, z2.d
38; CHECK-NEXT: ret
39  %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
40  ret <vscale x 16 x i16> %tuple
41}
42
43define void @call_svint16x2_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %dummy_z2, <vscale x 8 x i16> %z3) #0 {
44; CHECK-LABEL: call_svint16x2_t
45; CHECK:      mov z0.d, z1.d
46; CHECK-NEXT: mov z1.d, z3.d
47; CHECK-NEXT: bl callee_svint16x2_t
48  %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z3)
49  call void @callee_svint16x2_t(<vscale x 16 x i16> %tuple)
50  ret void
51}
52
53;
54; svint32x2_t
55;
56
57define <vscale x 8 x i32> @ret_svint32x2_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
58; CHECK-LABEL: ret_svint32x2_t
59; CHECK:      mov z0.d, z1.d
60; CHECK-NEXT: mov z1.d, z2.d
61; CHECK-NEXT: ret
62  %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
63  ret <vscale x 8 x i32> %tuple
64}
65
66define void @call_svint32x2_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %dummy_z2, <vscale x 4 x i32> %z3) #0 {
67; CHECK-LABEL: call_svint32x2_t
68; CHECK:      mov z0.d, z1.d
69; CHECK-NEXT: mov z1.d, z3.d
70; CHECK-NEXT: bl callee_svint32x2_t
71  %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z3)
72  call void @callee_svint32x2_t(<vscale x 8 x i32> %tuple)
73  ret void
74}
75
76;
77; svint64x2_t
78;
79
80define <vscale x 4 x i64> @ret_svint64x2_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
81; CHECK-LABEL: ret_svint64x2_t
82; CHECK:      mov z0.d, z1.d
83; CHECK-NEXT: mov z1.d, z2.d
84; CHECK-NEXT: ret
85  %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
86  ret <vscale x 4 x i64> %tuple
87}
88
89define void @call_svint64x2_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %dummy_z2, <vscale x 2 x i64> %z3) #0 {
90; CHECK-LABEL: call_svint64x2_t
91; CHECK:      mov z0.d, z1.d
92; CHECK-NEXT: mov z1.d, z3.d
93; CHECK-NEXT: bl callee_svint64x2_t
94  %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z3)
95  call void @callee_svint64x2_t(<vscale x 4 x i64> %tuple)
96  ret void
97}
98
99;
100; svfloatx2_t
101;
102
103define <vscale x 8 x float> @ret_svfloatx2_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
104; CHECK-LABEL: ret_svfloatx2_t
105; CHECK:      mov z0.d, z1.d
106; CHECK-NEXT: mov z1.d, z2.d
107; CHECK-NEXT: ret
108  %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
109  ret <vscale x 8 x float> %tuple
110}
111
112define void @call_svfloatx2_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %dummy_z2, <vscale x 4 x float> %z3) #0 {
113; CHECK-LABEL: call_svfloatx2_t
114; CHECK:      mov z0.d, z1.d
115; CHECK-NEXT: mov z1.d, z3.d
116; CHECK-NEXT: bl callee_svfloatx2_t
117  %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z3)
118  call void @callee_svfloatx2_t(<vscale x 8 x float> %tuple)
119  ret void
120}
121
122;
123; svdoublex2_t
124;
125
126define <vscale x 4 x double> @ret_svdoublex2_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
127; CHECK-LABEL: ret_svdoublex2_t
128; CHECK:      mov z0.d, z1.d
129; CHECK-NEXT: mov z1.d, z2.d
130; CHECK-NEXT: ret
131  %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
132  ret <vscale x 4 x double> %tuple
133}
134
135define void @call_svdoublex2_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %dummy_z2, <vscale x 2 x double> %z3) #0 {
136; CHECK-LABEL: call_svdoublex2_t
137; CHECK:      mov z0.d, z1.d
138; CHECK-NEXT: mov z1.d, z3.d
139; CHECK-NEXT: bl callee_svdoublex2_t
140  %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z3)
141  call void @callee_svdoublex2_t(<vscale x 4 x double> %tuple)
142  ret void
143}
144
145;
146; svint8x3_t
147;
148
149define <vscale x 48 x i8> @ret_svint8x3_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
150; CHECK-LABEL: ret_svint8x3_t
151; CHECK:      mov z0.d, z1.d
152; CHECK-NEXT: mov z1.d, z2.d
153; CHECK-NEXT: mov z2.d, z3.d
154; CHECK-NEXT: ret
155  %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
156  ret <vscale x 48 x i8> %tuple
157}
158
159define void @call_svint8x3_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4) #0 {
160; CHECK-LABEL: call_svint8x3_t
161; CHECK:      mov z0.d, z1.d
162; CHECK-NEXT: mov z1.d, z2.d
163; CHECK-NEXT: mov z2.d, z4.d
164; CHECK-NEXT: bl callee_svint8x3_t
165  %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4)
166  call void @callee_svint8x3_t(<vscale x 48 x i8> %tuple)
167  ret void
168}
169
170;
171; svint16x3_t
172;
173
174define <vscale x 24 x i16> @ret_svint16x3_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
175; CHECK-LABEL: ret_svint16x3_t
176; CHECK:      mov z0.d, z1.d
177; CHECK-NEXT: mov z1.d, z2.d
178; CHECK-NEXT: mov z2.d, z3.d
179; CHECK-NEXT: ret
180  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
181  ret <vscale x 24 x i16> %tuple
182}
183
184define void @call_svint16x3_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4) #0 {
185; CHECK-LABEL: call_svint16x3_t
186; CHECK:      mov z0.d, z1.d
187; CHECK-NEXT: mov z1.d, z2.d
188; CHECK-NEXT: mov z2.d, z4.d
189; CHECK-NEXT: bl callee_svint16x3_t
190  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4)
191  call void @callee_svint16x3_t(<vscale x 24 x i16> %tuple)
192  ret void
193}
194
195;
196; svint32x3_t
197;
198
199define <vscale x 12 x i32> @ret_svint32x3_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
200; CHECK-LABEL: ret_svint32x3_t
201; CHECK:      mov z0.d, z1.d
202; CHECK-NEXT: mov z1.d, z2.d
203; CHECK-NEXT: mov z2.d, z3.d
204; CHECK-NEXT: ret
205  %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
206  ret <vscale x 12 x i32> %tuple
207}
208
209define void @call_svint32x3_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4) #0 {
210; CHECK-LABEL: call_svint32x3_t
211; CHECK:      mov z0.d, z1.d
212; CHECK-NEXT: mov z1.d, z2.d
213; CHECK-NEXT: mov z2.d, z4.d
214; CHECK-NEXT: bl callee_svint32x3_t
215  %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4)
216  call void @callee_svint32x3_t(<vscale x 12 x i32> %tuple)
217  ret void
218}
219
220;
221; svint64x3_t
222;
223
224define <vscale x 6 x i64> @ret_svint64x3_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
225; CHECK-LABEL: ret_svint64x3_t
226; CHECK:      mov z0.d, z1.d
227; CHECK-NEXT: mov z1.d, z2.d
228; CHECK-NEXT: mov z2.d, z3.d
229; CHECK-NEXT: ret
230  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
231  ret <vscale x 6 x i64> %tuple
232}
233
234define void @call_svint64x3_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4) #0 {
235; CHECK-LABEL: call_svint64x3_t
236; CHECK:      mov z0.d, z1.d
237; CHECK-NEXT: mov z1.d, z2.d
238; CHECK-NEXT: mov z2.d, z4.d
239; CHECK-NEXT: bl callee_svint64x3_t
240  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4)
241  call void @callee_svint64x3_t(<vscale x 6 x i64> %tuple)
242  ret void
243}
244
245;
246; svfloatx3_t
247;
248
249define <vscale x 12 x float> @ret_svfloatx3_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
250; CHECK-LABEL: ret_svfloatx3_t
251; CHECK:      mov z0.d, z1.d
252; CHECK-NEXT: mov z1.d, z2.d
253; CHECK-NEXT: mov z2.d, z3.d
254; CHECK-NEXT: ret
255  %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
256  ret <vscale x 12 x float> %tuple
257}
258
259define void @call_svfloatx3_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4) #0 {
260; CHECK-LABEL: call_svfloatx3_t
261; CHECK:      mov z0.d, z1.d
262; CHECK-NEXT: mov z1.d, z2.d
263; CHECK-NEXT: mov z2.d, z4.d
264; CHECK-NEXT: bl callee_svfloatx3_t
265  %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4)
266  call void @callee_svfloatx3_t(<vscale x 12 x float> %tuple)
267  ret void
268}
269
270;
271; svdoublex3_t
272;
273
274define <vscale x 6 x double> @ret_svdoublex3_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
275; CHECK-LABEL: ret_svdoublex3_t
276; CHECK:      mov z0.d, z1.d
277; CHECK-NEXT: mov z1.d, z2.d
278; CHECK-NEXT: mov z2.d, z3.d
279; CHECK-NEXT: ret
280  %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
281  ret <vscale x 6 x double> %tuple
282}
283
284define void @call_svdoublex3_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4) #0 {
285; CHECK-LABEL: call_svdoublex3_t
286; CHECK:      mov z0.d, z1.d
287; CHECK-NEXT: mov z1.d, z2.d
288; CHECK-NEXT: mov z2.d, z4.d
289; CHECK-NEXT: bl callee_svdoublex3_t
290  %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4)
291  call void @callee_svdoublex3_t(<vscale x 6 x double> %tuple)
292  ret void
293}
294
295;
296; svint8x4_t
297;
298
299define <vscale x 64 x i8> @ret_svint8x4_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4) #0 {
300; CHECK-LABEL: ret_svint8x4_t
301; CHECK:      mov z0.d, z1.d
302; CHECK-NEXT: mov z1.d, z2.d
303; CHECK-NEXT: mov z2.d, z3.d
304; CHECK-NEXT: mov z3.d, z4.d
305; CHECK-NEXT: ret
306  %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4)
307  ret <vscale x 64 x i8> %tuple
308}
309
310define void @call_svint8x4_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5) #0 {
311; CHECK-LABEL: call_svint8x4_t
312; CHECK:      mov z3.d, z5.d
313; CHECK-NEXT: mov z0.d, z1.d
314; CHECK-NEXT: mov z1.d, z2.d
315; CHECK-NEXT: mov z2.d, z4.d
316; CHECK-NEXT: bl callee_svint8x4_t
317  %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5)
318  call void @callee_svint8x4_t(<vscale x 64 x i8> %tuple)
319  ret void
320}
321
322;
323; svint16x4_t
324;
325
326define <vscale x 32 x i16> @ret_svint16x4_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4) #0 {
327; CHECK-LABEL: ret_svint16x4_t
328; CHECK:      mov z0.d, z1.d
329; CHECK-NEXT: mov z1.d, z2.d
330; CHECK-NEXT: mov z2.d, z3.d
331; CHECK-NEXT: mov z3.d, z4.d
332; CHECK-NEXT: ret
333  %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4)
334  ret <vscale x 32 x i16> %tuple
335}
336
337define void @call_svint16x4_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5) #0 {
338; CHECK-LABEL: call_svint16x4_t
339; CHECK:      mov z3.d, z5.d
340; CHECK-NEXT: mov z0.d, z1.d
341; CHECK-NEXT: mov z1.d, z2.d
342; CHECK-NEXT: mov z2.d, z4.d
343; CHECK-NEXT: bl callee_svint16x4_t
344  %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5)
345  call void @callee_svint16x4_t(<vscale x 32 x i16> %tuple)
346  ret void
347}
348
349;
350; svint32x4_t
351;
352
353define <vscale x 16 x i32> @ret_svint32x4_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4) #0 {
354; CHECK-LABEL: ret_svint32x4_t
355; CHECK:      mov z0.d, z1.d
356; CHECK-NEXT: mov z1.d, z2.d
357; CHECK-NEXT: mov z2.d, z3.d
358; CHECK-NEXT: mov z3.d, z4.d
359; CHECK-NEXT: ret
360  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4)
361  ret <vscale x 16 x i32> %tuple
362}
363
364define void @call_svint32x4_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
365; CHECK-LABEL: call_svint32x4_t
366; CHECK:      mov z3.d, z5.d
367; CHECK-NEXT: mov z0.d, z1.d
368; CHECK-NEXT: mov z1.d, z2.d
369; CHECK-NEXT: mov z2.d, z4.d
370; CHECK-NEXT: bl callee_svint32x4_t
371  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5)
372  call void @callee_svint32x4_t(<vscale x 16 x i32> %tuple)
373  ret void
374}
375
376;
377; svint64x4_t
378;
379
380define <vscale x 8 x i64> @ret_svint64x4_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4) #0 {
381; CHECK-LABEL: ret_svint64x4_t
382; CHECK:      mov z0.d, z1.d
383; CHECK-NEXT: mov z1.d, z2.d
384; CHECK-NEXT: mov z2.d, z3.d
385; CHECK-NEXT: mov z3.d, z4.d
386; CHECK-NEXT: ret
387  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4)
388  ret <vscale x 8 x i64> %tuple
389}
390
391define void @call_svint64x4_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5) #0 {
392; CHECK-LABEL: call_svint64x4_t
393; CHECK:      mov z3.d, z5.d
394; CHECK-NEXT: mov z0.d, z1.d
395; CHECK-NEXT: mov z1.d, z2.d
396; CHECK-NEXT: mov z2.d, z4.d
397; CHECK-NEXT: bl callee_svint64x4_t
398  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5)
399  call void @callee_svint64x4_t(<vscale x 8 x i64> %tuple)
400  ret void
401}
402
403;
404; svfloatx4_t
405;
406
407define <vscale x 16 x float> @ret_svfloatx4_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4) #0 {
408; CHECK-LABEL: ret_svfloatx4_t
409; CHECK:      mov z0.d, z1.d
410; CHECK-NEXT: mov z1.d, z2.d
411; CHECK-NEXT: mov z2.d, z3.d
412; CHECK-NEXT: mov z3.d, z4.d
413; CHECK-NEXT: ret
414  %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4)
415  ret <vscale x 16 x float> %tuple
416}
417
418define void @call_svfloatx4_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5) #0 {
419; CHECK-LABEL: call_svfloatx4_t
420; CHECK:      mov z3.d, z5.d
421; CHECK-NEXT: mov z0.d, z1.d
422; CHECK-NEXT: mov z1.d, z2.d
423; CHECK-NEXT: mov z2.d, z4.d
424; CHECK-NEXT: bl callee_svfloatx4_t
425  %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5)
426  call void @callee_svfloatx4_t(<vscale x 16 x float> %tuple)
427  ret void
428}
429
430;
431; svdoublex4_t
432;
433
434define <vscale x 8 x double> @ret_svdoublex4_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4) #0 {
435; CHECK-LABEL: ret_svdoublex4_t
436; CHECK:      mov z0.d, z1.d
437; CHECK-NEXT: mov z1.d, z2.d
438; CHECK-NEXT: mov z2.d, z3.d
439; CHECK-NEXT: mov z3.d, z4.d
440; CHECK-NEXT: ret
441  %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4)
442  ret <vscale x 8 x double> %tuple
443}
444
445define void @call_svdoublex4_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5) #0 {
446; CHECK-LABEL: call_svdoublex4_t
447; CHECK:      mov z3.d, z5.d
448; CHECK-NEXT: mov z0.d, z1.d
449; CHECK-NEXT: mov z1.d, z2.d
450; CHECK-NEXT: mov z2.d, z4.d
451; CHECK-NEXT: bl callee_svdoublex4_t
452  %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5)
453  call void @callee_svdoublex4_t(<vscale x 8 x double> %tuple)
454  ret void
455}
456
457attributes #0 = { nounwind "target-features"="+sve" }
458
459declare void @callee_svint8x2_t(<vscale x 32 x i8>)
460declare void @callee_svint16x2_t(<vscale x 16 x i16>)
461declare void @callee_svint32x2_t(<vscale x 8 x i32>)
462declare void @callee_svint64x2_t(<vscale x 4 x i64>)
463declare void @callee_svfloatx2_t(<vscale x 8 x float>)
464declare void @callee_svdoublex2_t(<vscale x 4 x double>)
465
466declare void @callee_svint8x3_t(<vscale x 48 x i8>)
467declare void @callee_svint16x3_t(<vscale x 24 x i16>)
468declare void @callee_svint32x3_t(<vscale x 12 x i32>)
469declare void @callee_svint64x3_t(<vscale x 6 x i64>)
470declare void @callee_svfloatx3_t(<vscale x 12 x float>)
471declare void @callee_svdoublex3_t(<vscale x 6 x double>)
472
473declare void @callee_svint8x4_t(<vscale x 64 x i8>)
474declare void @callee_svint16x4_t(<vscale x 32 x i16>)
475declare void @callee_svint32x4_t(<vscale x 16 x i32>)
476declare void @callee_svint64x4_t(<vscale x 8 x i64>)
477declare void @callee_svfloatx4_t(<vscale x 16 x float>)
478declare void @callee_svdoublex4_t(<vscale x 8 x double>)
479
480
481; x2
482declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
483declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
484declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
485declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
486declare <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
487declare <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
488
489; x3
490declare <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
491declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
492declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
493declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
494declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
495declare <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
496
497; x4
498declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
499declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
500declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
501declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
502declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
503declare <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
504