1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
3
4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
5; WARN-NOT: warning
6
7;
8; CLASTA (Vectors)
9;
10
11define <vscale x 16 x i8> @clasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
12; CHECK-LABEL: clasta_i8:
13; CHECK: clasta z0.b, p0, z0.b, z1.b
14; CHECK-NEXT: ret
15  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> %pg,
16                                                                  <vscale x 16 x i8> %a,
17                                                                  <vscale x 16 x i8> %b)
18  ret <vscale x 16 x i8> %out
19}
20
21define <vscale x 8 x i16> @clasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
22; CHECK-LABEL: clasta_i16:
23; CHECK: clasta z0.h, p0, z0.h, z1.h
24; CHECK-NEXT: ret
25  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> %pg,
26                                                                  <vscale x 8 x i16> %a,
27                                                                  <vscale x 8 x i16> %b)
28  ret <vscale x 8 x i16> %out
29}
30
31define <vscale x 4 x i32> @clasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
32; CHECK-LABEL: clasta_i32:
33; CHECK: clasta z0.s, p0, z0.s, z1.s
34; CHECK-NEXT: ret
35  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> %pg,
36                                                                  <vscale x 4 x i32> %a,
37                                                                  <vscale x 4 x i32> %b)
38  ret <vscale x 4 x i32> %out
39}
40
41define <vscale x 2 x i64> @clasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
42; CHECK-LABEL: clasta_i64:
43; CHECK: clasta z0.d, p0, z0.d, z1.d
44; CHECK-NEXT: ret
45  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> %pg,
46                                                                  <vscale x 2 x i64> %a,
47                                                                  <vscale x 2 x i64> %b)
48  ret <vscale x 2 x i64> %out
49}
50
51define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
52; CHECK-LABEL: clasta_f16:
53; CHECK: clasta z0.h, p0, z0.h, z1.h
54; CHECK-NEXT: ret
55  %out = call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> %pg,
56                                                                   <vscale x 8 x half> %a,
57                                                                   <vscale x 8 x half> %b)
58  ret <vscale x 8 x half> %out
59}
60
61define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
62; CHECK-LABEL: clasta_bf16:
63; CHECK: clasta z0.h, p0, z0.h, z1.h
64; CHECK-NEXT: ret
65  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
66                                                                      <vscale x 8 x bfloat> %a,
67                                                                      <vscale x 8 x bfloat> %b)
68  ret <vscale x 8 x bfloat> %out
69}
70
71define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
72; CHECK-LABEL: clasta_f32:
73; CHECK: clasta z0.s, p0, z0.s, z1.s
74; CHECK-NEXT: ret
75  %out = call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> %pg,
76                                                                    <vscale x 4 x float> %a,
77                                                                    <vscale x 4 x float> %b)
78  ret <vscale x 4 x float> %out
79}
80
81define <vscale x 2 x double> @clasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
82; CHECK-LABEL: clasta_f64:
83; CHECK: clasta z0.d, p0, z0.d, z1.d
84; CHECK-NEXT: ret
85  %out = call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> %pg,
86                                                                     <vscale x 2 x double> %a,
87                                                                     <vscale x 2 x double> %b)
88  ret <vscale x 2 x double> %out
89}
90
91;
92; CLASTA (Scalar)
93;
94
95define i8 @clasta_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
96; CHECK-LABEL: clasta_n_i8:
97; CHECK: clasta w0, p0, w0, z0.b
98; CHECK-NEXT: ret
99  %out = call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> %pg,
100                                                    i8 %a,
101                                                    <vscale x 16 x i8> %b)
102  ret i8 %out
103}
104
105define i16 @clasta_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
106; CHECK-LABEL: clasta_n_i16:
107; CHECK: clasta w0, p0, w0, z0.h
108; CHECK-NEXT: ret
109  %out = call i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1> %pg,
110                                                     i16 %a,
111                                                     <vscale x 8 x i16> %b)
112  ret i16 %out
113}
114
115define i32 @clasta_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
116; CHECK-LABEL: clasta_n_i32:
117; CHECK: clasta w0, p0, w0, z0.s
118; CHECK-NEXT: ret
119  %out = call i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1> %pg,
120                                                     i32 %a,
121                                                     <vscale x 4 x i32> %b)
122  ret i32 %out
123}
124
125define i64 @clasta_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
126; CHECK-LABEL: clasta_n_i64:
127; CHECK: clasta x0, p0, x0, z0.d
128; CHECK-NEXT: ret
129  %out = call i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1> %pg,
130                                                     i64 %a,
131                                                     <vscale x 2 x i64> %b)
132  ret i64 %out
133}
134
135define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
136; CHECK-LABEL: clasta_n_f16:
137; CHECK: clasta h0, p0, h0, z1.h
138; CHECK-NEXT: ret
139  %out = call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> %pg,
140                                                      half %a,
141                                                      <vscale x 8 x half> %b)
142  ret half %out
143}
144
145define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
146; CHECK-LABEL: clasta_n_bf16:
147; CHECK: clasta h0, p0, h0, z1.h
148; CHECK-NEXT: ret
149  %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
150                                                         bfloat %a,
151                                                         <vscale x 8 x bfloat> %b)
152  ret bfloat %out
153}
154
155define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
156; CHECK-LABEL: clasta_n_f32:
157; CHECK: clasta s0, p0, s0, z1.s
158; CHECK-NEXT: ret
159  %out = call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> %pg,
160                                                       float %a,
161                                                       <vscale x 4 x float> %b)
162  ret float %out
163}
164
165define double @clasta_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
166; CHECK-LABEL: clasta_n_f64:
167; CHECK: clasta d0, p0, d0, z1.d
168; CHECK-NEXT: ret
169  %out = call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> %pg,
170                                                        double %a,
171                                                        <vscale x 2 x double> %b)
172  ret double %out
173}
174
175;
176; CLASTB (Vectors)
177;
178
179define <vscale x 16 x i8> @clastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
180; CHECK-LABEL: clastb_i8:
181; CHECK: clastb z0.b, p0, z0.b, z1.b
182; CHECK-NEXT: ret
183  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> %pg,
184                                                                  <vscale x 16 x i8> %a,
185                                                                  <vscale x 16 x i8> %b)
186  ret <vscale x 16 x i8> %out
187}
188
189define <vscale x 8 x i16> @clastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
190; CHECK-LABEL: clastb_i16:
191; CHECK: clastb z0.h, p0, z0.h, z1.h
192; CHECK-NEXT: ret
193  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> %pg,
194                                                                  <vscale x 8 x i16> %a,
195                                                                  <vscale x 8 x i16> %b)
196  ret <vscale x 8 x i16> %out
197}
198
199define <vscale x 4 x i32> @clastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
200; CHECK-LABEL: clastb_i32:
201; CHECK: clastb z0.s, p0, z0.s, z1.s
202; CHECK-NEXT: ret
203  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> %pg,
204                                                                  <vscale x 4 x i32> %a,
205                                                                  <vscale x 4 x i32> %b)
206  ret <vscale x 4 x i32> %out
207}
208
209define <vscale x 2 x i64> @clastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
210; CHECK-LABEL: clastb_i64:
211; CHECK: clastb z0.d, p0, z0.d, z1.d
212; CHECK-NEXT: ret
213  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> %pg,
214                                                                  <vscale x 2 x i64> %a,
215                                                                  <vscale x 2 x i64> %b)
216  ret <vscale x 2 x i64> %out
217}
218
219define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
220; CHECK-LABEL: clastb_f16:
221; CHECK: clastb z0.h, p0, z0.h, z1.h
222; CHECK-NEXT: ret
223  %out = call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> %pg,
224                                                                   <vscale x 8 x half> %a,
225                                                                   <vscale x 8 x half> %b)
226  ret <vscale x 8 x half> %out
227}
228
229define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
230; CHECK-LABEL: clastb_bf16:
231; CHECK: clastb z0.h, p0, z0.h, z1.h
232; CHECK-NEXT: ret
233  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
234                                                                      <vscale x 8 x bfloat> %a,
235                                                                      <vscale x 8 x bfloat> %b)
236  ret <vscale x 8 x bfloat> %out
237}
238
239define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
240; CHECK-LABEL: clastb_f32:
241; CHECK: clastb z0.s, p0, z0.s, z1.s
242; CHECK-NEXT: ret
243  %out = call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> %pg,
244                                                                    <vscale x 4 x float> %a,
245                                                                    <vscale x 4 x float> %b)
246  ret <vscale x 4 x float> %out
247}
248
249define <vscale x 2 x double> @clastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
250; CHECK-LABEL: clastb_f64:
251; CHECK: clastb z0.d, p0, z0.d, z1.d
252; CHECK-NEXT: ret
253  %out = call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> %pg,
254                                                                     <vscale x 2 x double> %a,
255                                                                     <vscale x 2 x double> %b)
256  ret <vscale x 2 x double> %out
257}
258
259;
260; CLASTB (Scalar)
261;
262
263define i8 @clastb_n_i8(<vscale x 16 x i1> %pg, i8 %a, <vscale x 16 x i8> %b) {
264; CHECK-LABEL: clastb_n_i8:
265; CHECK: clastb w0, p0, w0, z0.b
266; CHECK-NEXT: ret
267  %out = call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> %pg,
268                                                    i8 %a,
269                                                    <vscale x 16 x i8> %b)
270  ret i8 %out
271}
272
273define i16 @clastb_n_i16(<vscale x 8 x i1> %pg, i16 %a, <vscale x 8 x i16> %b) {
274; CHECK-LABEL: clastb_n_i16:
275; CHECK: clastb w0, p0, w0, z0.h
276; CHECK-NEXT: ret
277  %out = call i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1> %pg,
278                                                     i16 %a,
279                                                     <vscale x 8 x i16> %b)
280  ret i16 %out
281}
282
283define i32 @clastb_n_i32(<vscale x 4 x i1> %pg, i32 %a, <vscale x 4 x i32> %b) {
284; CHECK-LABEL: clastb_n_i32:
285; CHECK: clastb w0, p0, w0, z0.s
286; CHECK-NEXT: ret
287  %out = call i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1> %pg,
288                                                     i32 %a,
289                                                     <vscale x 4 x i32> %b)
290  ret i32 %out
291}
292
293define i64 @clastb_n_i64(<vscale x 2 x i1> %pg, i64 %a, <vscale x 2 x i64> %b) {
294; CHECK-LABEL: clastb_n_i64:
295; CHECK: clastb x0, p0, x0, z0.d
296; CHECK-NEXT: ret
297  %out = call i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1> %pg,
298                                                     i64 %a,
299                                                     <vscale x 2 x i64> %b)
300  ret i64 %out
301}
302
303define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b) {
304; CHECK-LABEL: clastb_n_f16:
305; CHECK: clastb h0, p0, h0, z1.h
306; CHECK-NEXT: ret
307  %out = call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> %pg,
308                                                      half %a,
309                                                      <vscale x 8 x half> %b)
310  ret half %out
311}
312
313define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
314; CHECK-LABEL: clastb_n_bf16:
315; CHECK: clastb h0, p0, h0, z1.h
316; CHECK-NEXT: ret
317  %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
318                                                         bfloat %a,
319                                                         <vscale x 8 x bfloat> %b)
320  ret bfloat %out
321}
322
323define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
324; CHECK-LABEL: clastb_n_f32:
325; CHECK: clastb s0, p0, s0, z1.s
326; CHECK-NEXT: ret
327  %out = call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> %pg,
328                                                       float %a,
329                                                       <vscale x 4 x float> %b)
330  ret float %out
331}
332
333define double @clastb_n_f64(<vscale x 2 x i1> %pg, double %a, <vscale x 2 x double> %b) {
334; CHECK-LABEL: clastb_n_f64:
335; CHECK: clastb d0, p0, d0, z1.d
336; CHECK-NEXT: ret
337  %out = call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> %pg,
338                                                        double %a,
339                                                        <vscale x 2 x double> %b)
340  ret double %out
341}
342
343;
344; DUPQ
345;
346
347define <vscale x 16 x i8> @dupq_i8(<vscale x 16 x i8> %a) {
348; CHECK-LABEL: dupq_i8:
349; CHECK: mov z0.q, q0
350; CHECK-NEXT: ret
351  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 0)
352  ret <vscale x 16 x i8> %out
353}
354
355define <vscale x 8 x i16> @dupq_i16(<vscale x 8 x i16> %a) {
356; CHECK-LABEL: dupq_i16:
357; CHECK: mov z0.q, z0.q[1]
358; CHECK-NEXT: ret
359  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 1)
360  ret <vscale x 8 x i16> %out
361}
362
363define <vscale x 4 x i32> @dupq_i32(<vscale x 4 x i32> %a) {
364; CHECK-LABEL: dupq_i32:
365; CHECK: mov z0.q, z0.q[2]
366; CHECK-NEXT: ret
367  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 2)
368  ret <vscale x 4 x i32> %out
369}
370
371define <vscale x 2 x i64> @dupq_i64(<vscale x 2 x i64> %a) {
372; CHECK-LABEL: dupq_i64:
373; CHECK: mov z0.q, z0.q[3]
374; CHECK-NEXT: ret
375  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 3)
376  ret <vscale x 2 x i64> %out
377}
378
379define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
380; CHECK-LABEL: dupq_f16:
381; CHECK: mov z0.q, q0
382; CHECK-NEXT: ret
383  %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 0)
384  ret <vscale x 8 x half> %out
385}
386
387define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
388; CHECK-LABEL: dupq_bf16:
389; CHECK: mov z0.q, q0
390; CHECK-NEXT: ret
391  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
392  ret <vscale x 8 x bfloat> %out
393}
394
395define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
396; CHECK-LABEL: dupq_f32:
397; CHECK: mov z0.q, z0.q[1]
398; CHECK-NEXT: ret
399  %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 1)
400  ret <vscale x 4 x float> %out
401}
402
403define <vscale x 2 x double> @dupq_f64(<vscale x 2 x double> %a) {
404; CHECK-LABEL: dupq_f64:
405; CHECK: mov z0.q, z0.q[2]
406; CHECK-NEXT: ret
407  %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 2)
408  ret <vscale x 2 x double> %out
409}
410
411;
412; DUPQ_LANE
413;
414
415define <vscale x 16 x i8> @dupq_lane_i8(<vscale x 16 x i8> %a, i64 %idx) {
416; CHECK-LABEL: dupq_lane_i8:
417; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
418; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
419; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
420; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
421; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
422; CHECK-NEXT: tbl   z0.d, { z0.d }, [[Z4]].d
423; CHECK-NEXT: ret
424  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %a, i64 %idx)
425  ret <vscale x 16 x i8> %out
426}
427
428; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
429define <vscale x 8 x i16> @dupq_lane_i16(<vscale x 8 x i16> %a, i64 %idx) {
430; CHECK-LABEL: dupq_lane_i16:
431; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
432; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
433; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
434; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
435; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
436; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
437; CHECK-NEXT: ret
438  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %a, i64 %idx)
439  ret <vscale x 8 x i16> %out
440}
441
442; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
443define <vscale x 4 x i32> @dupq_lane_i32(<vscale x 4 x i32> %a, i64 %idx) {
444; CHECK-LABEL: dupq_lane_i32:
445; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
446; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
447; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
448; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
449; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
450; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
451; CHECK-NEXT: ret
452  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %a, i64 %idx)
453  ret <vscale x 4 x i32> %out
454}
455
456; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
457define <vscale x 2 x i64> @dupq_lane_i64(<vscale x 2 x i64> %a, i64 %idx) {
458; CHECK-LABEL: dupq_lane_i64:
459; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
460; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
461; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
462; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
463; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
464; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
465; CHECK-NEXT: ret
466  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 %idx)
467  ret <vscale x 2 x i64> %out
468}
469
470; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
471define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
472; CHECK-LABEL: dupq_lane_f16:
473; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
474; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
475; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
476; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
477; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
478; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
479; CHECK-NEXT: ret
480  %out = call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %a, i64 %idx)
481  ret <vscale x 8 x half> %out
482}
483
484; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
485define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
486; CHECK-LABEL: dupq_lane_bf16:
487; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
488; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
489; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
490; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
491; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
492; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
493; CHECK-NEXT: ret
494  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
495  ret <vscale x 8 x bfloat> %out
496}
497
498; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
499define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
500; CHECK-LABEL: dupq_lane_f32:
501; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
502; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
503; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
504; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
505; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
506; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
507; CHECK-NEXT: ret
508  %out = call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %a, i64 %idx)
509  ret <vscale x 4 x float> %out
510}
511
512; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
513define <vscale x 2 x double> @dupq_lane_f64(<vscale x 2 x double> %a, i64 %idx) {
514; CHECK-LABEL: dupq_lane_f64:
515; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
516; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
517; CHECK-DAG:  add   [[X1:x[0-9]+]], x0, x0
518; CHECK-DAG:  mov   [[Z3:z[0-9]+]].d, [[X1]]
519; CHECK:      add   [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
520; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
521; CHECK-NEXT: ret
522  %out = call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %a, i64 %idx)
523  ret <vscale x 2 x double> %out
524}
525
526; NOTE: Index out of range (0-3)
527define <vscale x 2 x i64> @dupq_i64_range(<vscale x 2 x i64> %a) {
528; CHECK-LABEL: dupq_i64_range:
529; CHECK-DAG:  index [[Z1:z[0-9]+]].d, #0, #1
530; CHECK-DAG:  and   [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
531; CHECK-DAG:  add   [[Z3:z[0-9]+]].d, [[Z2]].d, #8
532; CHECK: tbl z0.d, { z0.d }, [[Z3]].d
533; CHECK-NEXT: ret
534  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %a, i64 4)
535  ret <vscale x 2 x i64> %out
536}
537
538;
539; EXT
540;
541
542define <vscale x 16 x i8> @ext_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
543; CHECK-LABEL: ext_i8:
544; CHECK: ext z0.b, z0.b, z1.b, #255
545; CHECK-NEXT: ret
546  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> %a,
547                                                               <vscale x 16 x i8> %b,
548                                                               i32 255)
549  ret <vscale x 16 x i8> %out
550}
551
552define <vscale x 8 x i16> @ext_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
553; CHECK-LABEL: ext_i16:
554; CHECK: ext z0.b, z0.b, z1.b, #0
555; CHECK-NEXT: ret
556  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> %a,
557                                                               <vscale x 8 x i16> %b,
558                                                               i32 0)
559  ret <vscale x 8 x i16> %out
560}
561
562define <vscale x 4 x i32> @ext_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
563; CHECK-LABEL: ext_i32:
564; CHECK: ext z0.b, z0.b, z1.b, #4
565; CHECK-NEXT: ret
566  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> %a,
567                                                               <vscale x 4 x i32> %b,
568                                                               i32 1)
569  ret <vscale x 4 x i32> %out
570}
571
572define <vscale x 2 x i64> @ext_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
573; CHECK-LABEL: ext_i64:
574; CHECK: ext z0.b, z0.b, z1.b, #16
575; CHECK-NEXT: ret
576  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> %a,
577                                                               <vscale x 2 x i64> %b,
578                                                               i32 2)
579  ret <vscale x 2 x i64> %out
580}
581
582define <vscale x 8 x bfloat> @ext_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
583; CHECK-LABEL: ext_bf16:
584; CHECK: ext z0.b, z0.b, z1.b, #6
585; CHECK-NEXT: ret
586  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> %a,
587                                                                   <vscale x 8 x bfloat> %b,
588                                                                   i32 3)
589  ret <vscale x 8 x bfloat> %out
590}
591
592define <vscale x 8 x half> @ext_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
593; CHECK-LABEL: ext_f16:
594; CHECK: ext z0.b, z0.b, z1.b, #6
595; CHECK-NEXT: ret
596  %out = call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> %a,
597                                                                <vscale x 8 x half> %b,
598                                                                i32 3)
599  ret <vscale x 8 x half> %out
600}
601
602define <vscale x 4 x float> @ext_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
603; CHECK-LABEL: ext_f32:
604; CHECK: ext z0.b, z0.b, z1.b, #16
605; CHECK-NEXT: ret
606  %out = call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> %a,
607                                                                 <vscale x 4 x float> %b,
608                                                                 i32 4)
609  ret <vscale x 4 x float> %out
610}
611
612define <vscale x 2 x double> @ext_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
613; CHECK-LABEL: ext_f64:
614; CHECK: ext z0.b, z0.b, z1.b, #40
615; CHECK-NEXT: ret
616  %out = call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> %a,
617                                                                  <vscale x 2 x double> %b,
618                                                                  i32 5)
619  ret <vscale x 2 x double> %out
620}
621
622;
623; LASTA
624;
625
626define i8 @lasta_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
627; CHECK-LABEL: lasta_i8
628; CHECK: lasta w0, p0, z0.b
629; CHECK-NEXT: ret
630  %res = call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> %pg,
631                                                 <vscale x 16 x i8> %a)
632  ret i8 %res
633}
634
635define i16 @lasta_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
636; CHECK-LABEL: lasta_i16
637; CHECK: lasta w0, p0, z0.h
638; CHECK-NEXT: ret
639  %res = call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> %pg,
640                                                  <vscale x 8 x i16> %a)
641  ret i16 %res
642}
643
644define i32 @lasta_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
645; CHECK-LABEL: lasta_i32
646; CHECK: lasta w0, p0, z0.s
647; CHECK-NEXT: ret
648  %res = call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> %pg,
649                                                  <vscale x 4 x i32> %a)
650  ret i32 %res
651}
652
653define i64 @lasta_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
654; CHECK-LABEL:  lasta_i64
655; CHECK: lasta x0, p0, z0.d
656; CHECK-NEXT: ret
657  %res = call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> %pg,
658                                                  <vscale x 2 x i64> %a)
659  ret i64 %res
660}
661
662define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
663; CHECK-LABEL: lasta_f16
664; CHECK: lasta h0, p0, z0.h
665; CHECK-NEXT: ret
666  %res = call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> %pg,
667                                                   <vscale x 8 x half> %a)
668  ret half %res
669}
670
671define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
672; CHECK-LABEL: lasta_bf16
673; CHECK: lasta h0, p0, z0.h
674; CHECK-NEXT: ret
675  %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
676                                                      <vscale x 8 x bfloat> %a)
677  ret bfloat %res
678}
679
680define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
681; CHECK-LABEL: lasta_f32
682; CHECK: lasta s0, p0, z0.s
683; CHECK-NEXT: ret
684  %res = call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> %pg,
685                                                    <vscale x 4 x float> %a)
686  ret float %res
687}
688
689define float @lasta_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
690; CHECK-LABEL: lasta_f32_v2
691; CHECK: lasta s0, p0, z0.s
692; CHECK-NEXT: ret
693  %res = call float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1> %pg,
694                                                    <vscale x 2 x float> %a)
695  ret float %res
696}
697
698define double @lasta_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
699; CHECK-LABEL:  lasta_f64
700; CHECK: lasta d0, p0, z0.d
701; CHECK-NEXT: ret
702  %res = call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> %pg,
703                                                     <vscale x 2 x double> %a)
704  ret double %res
705}
706
707;
708; LASTB
709;
710
711define i8 @lastb_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
712; CHECK-LABEL: lastb_i8
713; CHECK: lastb w0, p0, z0.b
714; CHECK-NEXT: ret
715  %res = call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> %pg,
716                                                 <vscale x 16 x i8> %a)
717  ret i8 %res
718}
719
720define i16 @lastb_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
721; CHECK-LABEL: lastb_i16
722; CHECK: lastb w0, p0, z0.h
723; CHECK-NEXT: ret
724  %res = call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> %pg,
725                                                  <vscale x 8 x i16> %a)
726  ret i16 %res
727}
728
729define i32 @lastb_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
730; CHECK-LABEL: lastb_i32
731; CHECK: lastb w0, p0, z0.s
732; CHECK-NEXT: ret
733  %res = call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> %pg,
734                                                  <vscale x 4 x i32> %a)
735  ret i32 %res
736}
737
738define i64 @lastb_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
739; CHECK-LABEL:  lastb_i64
740; CHECK: lastb x0, p0, z0.d
741; CHECK-NEXT: ret
742  %res = call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> %pg,
743                                                  <vscale x 2 x i64> %a)
744  ret i64 %res
745}
746
747define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
748; CHECK-LABEL: lastb_f16
749; CHECK: lastb h0, p0, z0.h
750; CHECK-NEXT: ret
751  %res = call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> %pg,
752                                                   <vscale x 8 x half> %a)
753  ret half %res
754}
755
756define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
757; CHECK-LABEL: lastb_bf16
758; CHECK: lastb h0, p0, z0.h
759; CHECK-NEXT: ret
760  %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
761                                                      <vscale x 8 x bfloat> %a)
762  ret bfloat %res
763}
764
765define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
766; CHECK-LABEL: lastb_f32
767; CHECK: lastb s0, p0, z0.s
768; CHECK-NEXT: ret
769  %res = call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> %pg,
770                                                    <vscale x 4 x float> %a)
771  ret float %res
772}
773
774define float @lastb_f32_v2(<vscale x 2 x i1> %pg, <vscale x 2 x float> %a) {
775; CHECK-LABEL: lastb_f32_v2
776; CHECK: lastb s0, p0, z0.s
777; CHECK-NEXT: ret
778  %res = call float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1> %pg,
779                                                    <vscale x 2 x float> %a)
780  ret float %res
781}
782
783define double @lastb_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
784; CHECK-LABEL:  lastb_f64
785; CHECK: lastb d0, p0, z0.d
786; CHECK-NEXT: ret
787  %res = call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> %pg,
788                                                     <vscale x 2 x double> %a)
789  ret double %res
790}
791
792;
793; COMPACT
794;
795
796define <vscale x 4 x i32> @compact_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
797; CHECK-LABEL: compact_i32:
798; CHECK: compact z0.s, p0, z0.s
799; CHECK-NEXT: ret
800  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1> %pg,
801                                                                   <vscale x 4 x i32> %a)
802  ret <vscale x 4 x i32> %out
803}
804
805define <vscale x 2 x i64> @compact_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
806; CHECK-LABEL: compact_i64:
807; CHECK: compact z0.d, p0, z0.d
808; CHECK-NEXT: ret
809  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1> %pg,
810                                                                   <vscale x 2 x i64> %a)
811  ret <vscale x 2 x i64> %out
812}
813
814define <vscale x 4 x float> @compact_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
815; CHECK-LABEL: compact_f32:
816; CHECK: compact z0.s, p0, z0.s
817; CHECK-NEXT: ret
818  %out = call <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1> %pg,
819                                                                     <vscale x 4 x float> %a)
820  ret <vscale x 4 x float> %out
821}
822
823define <vscale x 2 x double> @compact_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a) {
824; CHECK-LABEL: compact_f64:
825; CHECK: compact z0.d, p0, z0.d
826; CHECK-NEXT: ret
827  %out = call <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1> %pg,
828                                                                      <vscale x 2 x double> %a)
829  ret <vscale x 2 x double> %out
830}
831
832;
833; REV
834;
835
836define <vscale x 16 x i1> @rev_b8( <vscale x 16 x i1> %a) {
837; CHECK-LABEL: rev_b8
838; CHECK: rev p0.b, p0.b
839; CHECK-NEXT: ret
840  %res = call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> %a)
841  ret <vscale x 16 x i1> %res
842}
843
844define <vscale x 8 x i1> @rev_b16(<vscale x 8 x i1> %a) {
845; CHECK-LABEL: rev_b16
846; CHECK: rev p0.h, p0.h
847; CHECK-NEXT: ret
848  %res = call <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1> %a)
849  ret <vscale x 8 x i1> %res
850}
851
852define <vscale x 4 x i1> @rev_b32(<vscale x 4 x i1> %a) {
853; CHECK-LABEL: rev_b32
854; CHECK: rev p0.s, p0.s
855; CHECK-NEXT: ret
856  %res = call <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1> %a)
857  ret <vscale x 4 x i1> %res
858}
859
860define <vscale x 2 x i1> @rev_b64(<vscale x 2 x i1> %a) {
861; CHECK-LABEL:  rev_b64
862; CHECK: rev p0.d, p0.d
863; CHECK-NEXT: ret
864  %res = call <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1> %a)
865  ret <vscale x 2 x i1> %res
866}
867
868define <vscale x 16 x i8> @rev_i8( <vscale x 16 x i8> %a) {
869; CHECK-LABEL: rev_i8
870; CHECK: rev z0.b, z0.b
871; CHECK-NEXT: ret
872  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> %a)
873  ret <vscale x 16 x i8> %res
874}
875
876define <vscale x 8 x i16> @rev_i16(<vscale x 8 x i16> %a) {
877; CHECK-LABEL: rev_i16
878; CHECK: rev z0.h, z0.h
879; CHECK-NEXT: ret
880  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> %a)
881  ret <vscale x 8 x i16> %res
882}
883
884define <vscale x 4 x i32> @rev_i32(<vscale x 4 x i32> %a) {
885; CHECK-LABEL: rev_i32
886; CHECK: rev z0.s, z0.s
887; CHECK-NEXT: ret
888  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> %a)
889  ret <vscale x 4 x i32> %res
890}
891
892define <vscale x 2 x i64> @rev_i64(<vscale x 2 x i64> %a) {
893; CHECK-LABEL:  rev_i64
894; CHECK: rev z0.d, z0.d
895; CHECK-NEXT: ret
896  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> %a)
897  ret <vscale x 2 x i64> %res
898}
899
900define <vscale x 8 x bfloat> @rev_bf16(<vscale x 8 x bfloat> %a) #0 {
901; CHECK-LABEL: rev_bf16
902; CHECK: rev z0.h, z0.h
903; CHECK-NEXT: ret
904  %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> %a)
905  ret <vscale x 8 x bfloat> %res
906}
907
908define <vscale x 8 x half> @rev_f16(<vscale x 8 x half> %a) {
909; CHECK-LABEL: rev_f16
910; CHECK: rev z0.h, z0.h
911; CHECK-NEXT: ret
912  %res = call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> %a)
913  ret <vscale x 8 x half> %res
914}
915
916define <vscale x 4 x float> @rev_f32(<vscale x 4 x float> %a) {
917; CHECK-LABEL: rev_f32
918; CHECK: rev z0.s, z0.s
919; CHECK-NEXT: ret
920  %res = call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> %a)
921  ret <vscale x 4 x float> %res
922}
923
924define <vscale x 2 x double> @rev_f64(<vscale x 2 x double> %a) {
925; CHECK-LABEL:  rev_f64
926; CHECK: rev z0.d, z0.d
927; CHECK-NEXT: ret
928  %res = call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> %a)
929  ret <vscale x 2 x double> %res
930}
931
932;
933; SPLICE
934;
935
936define <vscale x 16 x i8> @splice_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
937; CHECK-LABEL: splice_i8:
938; CHECK: splice z0.b, p0, z0.b, z1.b
939; CHECK-NEXT: ret
940  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> %pg,
941                                                                  <vscale x 16 x i8> %a,
942                                                                  <vscale x 16 x i8> %b)
943  ret <vscale x 16 x i8> %out
944}
945
946define <vscale x 8 x i16> @splice_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
947; CHECK-LABEL: splice_i16:
948; CHECK: splice z0.h, p0, z0.h, z1.h
949; CHECK-NEXT: ret
950  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> %pg,
951                                                                  <vscale x 8 x i16> %a,
952                                                                  <vscale x 8 x i16> %b)
953  ret <vscale x 8 x i16> %out
954}
955
956define <vscale x 4 x i32> @splice_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
957; CHECK-LABEL: splice_i32:
958; CHECK: splice z0.s, p0, z0.s, z1.s
959; CHECK-NEXT: ret
960  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> %pg,
961                                                                  <vscale x 4 x i32> %a,
962                                                                  <vscale x 4 x i32> %b)
963  ret <vscale x 4 x i32> %out
964}
965
966define <vscale x 2 x i64> @splice_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
967; CHECK-LABEL: splice_i64:
968; CHECK: splice z0.d, p0, z0.d, z1.d
969; CHECK-NEXT: ret
970  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> %pg,
971                                                                  <vscale x 2 x i64> %a,
972                                                                  <vscale x 2 x i64> %b)
973  ret <vscale x 2 x i64> %out
974}
975
976define <vscale x 8 x bfloat> @splice_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
977; CHECK-LABEL: splice_bf16:
978; CHECK: splice z0.h, p0, z0.h, z1.h
979; CHECK-NEXT: ret
980  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> %pg,
981                                                                      <vscale x 8 x bfloat> %a,
982                                                                      <vscale x 8 x bfloat> %b)
983  ret <vscale x 8 x bfloat> %out
984}
985
986define <vscale x 8 x half> @splice_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
987; CHECK-LABEL: splice_f16:
988; CHECK: splice z0.h, p0, z0.h, z1.h
989; CHECK-NEXT: ret
990  %out = call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> %pg,
991                                                                   <vscale x 8 x half> %a,
992                                                                   <vscale x 8 x half> %b)
993  ret <vscale x 8 x half> %out
994}
995
996define <vscale x 4 x float> @splice_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
997; CHECK-LABEL: splice_f32:
998; CHECK: splice z0.s, p0, z0.s, z1.s
999; CHECK-NEXT: ret
1000  %out = call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> %pg,
1001                                                                    <vscale x 4 x float> %a,
1002                                                                    <vscale x 4 x float> %b)
1003  ret <vscale x 4 x float> %out
1004}
1005
1006define <vscale x 2 x double> @splice_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1007; CHECK-LABEL: splice_f64:
1008; CHECK: splice z0.d, p0, z0.d, z1.d
1009; CHECK-NEXT: ret
1010  %out = call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> %pg,
1011                                                                     <vscale x 2 x double> %a,
1012                                                                     <vscale x 2 x double> %b)
1013  ret <vscale x 2 x double> %out
1014}
1015
1016;
1017; SUNPKHI
1018;
1019
1020define <vscale x 8 x i16> @sunpkhi_i16(<vscale x 16 x i8> %a) {
1021; CHECK-LABEL: sunpkhi_i16
1022; CHECK: sunpkhi z0.h, z0.b
1023; CHECK-NEXT: ret
1024  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1025  ret <vscale x 8 x i16> %res
1026}
1027
1028define <vscale x 4 x i32> @sunpkhi_i32(<vscale x 8 x i16> %a) {
1029; CHECK-LABEL: sunpkhi_i32
1030; CHECK: sunpkhi z0.s, z0.h
1031; CHECK-NEXT: ret
1032  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1033  ret <vscale x 4 x i32> %res
1034}
1035
1036define <vscale x 2 x i64> @sunpkhi_i64(<vscale x 4 x i32> %a) {
1037; CHECK-LABEL:  sunpkhi_i64
1038; CHECK: sunpkhi z0.d, z0.s
1039; CHECK-NEXT: ret
1040  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1041  ret <vscale x 2 x i64> %res
1042}
1043
1044;
1045; SUNPKLO
1046;
1047
1048define <vscale x 8 x i16> @sunpklo_i16(<vscale x 16 x i8> %a) {
1049; CHECK-LABEL: sunpklo_i16
1050; CHECK: sunpklo z0.h, z0.b
1051; CHECK-NEXT: ret
1052  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> %a)
1053  ret <vscale x 8 x i16> %res
1054}
1055
1056define <vscale x 4 x i32> @sunpklo_i32(<vscale x 8 x i16> %a) {
1057; CHECK-LABEL: sunpklo_i32
1058; CHECK: sunpklo z0.s, z0.h
1059; CHECK-NEXT: ret
1060  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> %a)
1061  ret <vscale x 4 x i32> %res
1062}
1063
1064define <vscale x 2 x i64> @sunpklo_i64(<vscale x 4 x i32> %a) {
1065; CHECK-LABEL:  sunpklo_i64
1066; CHECK: sunpklo z0.d, z0.s
1067; CHECK-NEXT: ret
1068  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> %a)
1069  ret <vscale x 2 x i64> %res
1070}
1071
1072;
1073; TBL
1074;
1075
1076define <vscale x 16 x i8> @tbl_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1077; CHECK-LABEL: tbl_i8:
1078; CHECK: tbl z0.b, { z0.b }, z1.b
1079; CHECK-NEXT: ret
1080  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> %a,
1081                                                               <vscale x 16 x i8> %b)
1082  ret <vscale x 16 x i8> %out
1083}
1084
1085define <vscale x 8 x i16> @tbl_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1086; CHECK-LABEL: tbl_i16:
1087; CHECK: tbl z0.h, { z0.h }, z1.h
1088; CHECK-NEXT: ret
1089  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> %a,
1090                                                               <vscale x 8 x i16> %b)
1091  ret <vscale x 8 x i16> %out
1092}
1093
1094define <vscale x 4 x i32> @tbl_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1095; CHECK-LABEL: tbl_i32:
1096; CHECK: tbl z0.s, { z0.s }, z1.s
1097; CHECK-NEXT: ret
1098  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> %a,
1099                                                               <vscale x 4 x i32> %b)
1100  ret <vscale x 4 x i32> %out
1101}
1102
1103define <vscale x 2 x i64> @tbl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1104; CHECK-LABEL: tbl_i64:
1105; CHECK: tbl z0.d, { z0.d }, z1.d
1106; CHECK-NEXT: ret
1107  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> %a,
1108                                                               <vscale x 2 x i64> %b)
1109  ret <vscale x 2 x i64> %out
1110}
1111
1112define <vscale x 8 x half> @tbl_f16(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
1113; CHECK-LABEL: tbl_f16:
1114; CHECK: tbl z0.h, { z0.h }, z1.h
1115; CHECK-NEXT: ret
1116  %out = call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> %a,
1117                                                                <vscale x 8 x i16> %b)
1118  ret <vscale x 8 x half> %out
1119}
1120
1121define <vscale x 8 x bfloat> @tbl_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i16> %b) #0 {
1122; CHECK-LABEL: tbl_bf16:
1123; CHECK: tbl z0.h, { z0.h }, z1.h
1124; CHECK-NEXT: ret
1125  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> %a,
1126                                                                   <vscale x 8 x i16> %b)
1127  ret <vscale x 8 x bfloat> %out
1128}
1129
1130define <vscale x 4 x float> @tbl_f32(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
1131; CHECK-LABEL: tbl_f32:
1132; CHECK: tbl z0.s, { z0.s }, z1.s
1133; CHECK-NEXT: ret
1134  %out = call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> %a,
1135                                                                 <vscale x 4 x i32> %b)
1136  ret <vscale x 4 x float> %out
1137}
1138
1139define <vscale x 2 x double> @tbl_f64(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
1140; CHECK-LABEL: tbl_f64:
1141; CHECK: tbl z0.d, { z0.d }, z1.d
1142; CHECK-NEXT: ret
1143  %out = call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> %a,
1144                                                                  <vscale x 2 x i64> %b)
1145  ret <vscale x 2 x double> %out
1146}
1147
1148;
1149; UUNPKHI
1150;
1151
1152define <vscale x 8 x i16> @uunpkhi_i16(<vscale x 16 x i8> %a) {
1153; CHECK-LABEL: uunpkhi_i16
1154; CHECK: uunpkhi z0.h, z0.b
1155; CHECK-NEXT: ret
1156  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8> %a)
1157  ret <vscale x 8 x i16> %res
1158}
1159
1160define <vscale x 4 x i32> @uunpkhi_i32(<vscale x 8 x i16> %a) {
1161; CHECK-LABEL: uunpkhi_i32
1162; CHECK: uunpkhi z0.s, z0.h
1163; CHECK-NEXT: ret
1164  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16> %a)
1165  ret <vscale x 4 x i32> %res
1166}
1167
1168define <vscale x 2 x i64> @uunpkhi_i64(<vscale x 4 x i32> %a) {
1169; CHECK-LABEL:  uunpkhi_i64
1170; CHECK: uunpkhi z0.d, z0.s
1171; CHECK-NEXT: ret
1172  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32> %a)
1173  ret <vscale x 2 x i64> %res
1174}
1175
1176;
1177; UUNPKLO
1178;
1179
1180define <vscale x 8 x i16> @uunpklo_i16(<vscale x 16 x i8> %a) {
1181; CHECK-LABEL: uunpklo_i16
1182; CHECK: uunpklo z0.h, z0.b
1183; CHECK-NEXT: ret
1184  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> %a)
1185  ret <vscale x 8 x i16> %res
1186}
1187
1188define <vscale x 4 x i32> @uunpklo_i32(<vscale x 8 x i16> %a) {
1189; CHECK-LABEL: uunpklo_i32
1190; CHECK: uunpklo z0.s, z0.h
1191; CHECK-NEXT: ret
1192  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> %a)
1193  ret <vscale x 4 x i32> %res
1194}
1195
1196define <vscale x 2 x i64> @uunpklo_i64(<vscale x 4 x i32> %a) {
1197; CHECK-LABEL:  uunpklo_i64
1198; CHECK: uunpklo z0.d, z0.s
1199; CHECK-NEXT: ret
1200  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> %a)
1201  ret <vscale x 2 x i64> %res
1202}
1203
1204;
1205; TRN1
1206;
1207
1208define <vscale x 16 x i1> @trn1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1209; CHECK-LABEL: trn1_b8:
1210; CHECK: trn1 p0.b, p0.b, p1.b
1211; CHECK-NEXT: ret
1212  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> %a,
1213                                                                <vscale x 16 x i1> %b)
1214  ret <vscale x 16 x i1> %out
1215}
1216
1217define <vscale x 8 x i1> @trn1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1218; CHECK-LABEL: trn1_b16:
1219; CHECK: trn1 p0.h, p0.h, p1.h
1220; CHECK-NEXT: ret
1221  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1> %a,
1222                                                              <vscale x 8 x i1> %b)
1223  ret <vscale x 8 x i1> %out
1224}
1225
1226define <vscale x 4 x i1> @trn1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1227; CHECK-LABEL: trn1_b32:
1228; CHECK: trn1 p0.s, p0.s, p1.s
1229; CHECK-NEXT: ret
1230  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1> %a,
1231                                                              <vscale x 4 x i1> %b)
1232  ret <vscale x 4 x i1> %out
1233}
1234
1235define <vscale x 2 x i1> @trn1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1236; CHECK-LABEL: trn1_b64:
1237; CHECK: trn1 p0.d, p0.d, p1.d
1238; CHECK-NEXT: ret
1239  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1> %a,
1240                                                              <vscale x 2 x i1> %b)
1241  ret <vscale x 2 x i1> %out
1242}
1243
1244define <vscale x 16 x i8> @trn1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1245; CHECK-LABEL: trn1_i8:
1246; CHECK: trn1 z0.b, z0.b, z1.b
1247; CHECK-NEXT: ret
1248  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> %a,
1249                                                                <vscale x 16 x i8> %b)
1250  ret <vscale x 16 x i8> %out
1251}
1252
1253define <vscale x 8 x i16> @trn1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1254; CHECK-LABEL: trn1_i16:
1255; CHECK: trn1 z0.h, z0.h, z1.h
1256; CHECK-NEXT: ret
1257  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> %a,
1258                                                                <vscale x 8 x i16> %b)
1259  ret <vscale x 8 x i16> %out
1260}
1261
1262define <vscale x 4 x i32> @trn1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1263; CHECK-LABEL: trn1_i32:
1264; CHECK: trn1 z0.s, z0.s, z1.s
1265; CHECK-NEXT: ret
1266  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> %a,
1267                                                                <vscale x 4 x i32> %b)
1268  ret <vscale x 4 x i32> %out
1269}
1270
1271define <vscale x 2 x i64> @trn1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1272; CHECK-LABEL: trn1_i64:
1273; CHECK: trn1 z0.d, z0.d, z1.d
1274; CHECK-NEXT: ret
1275  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> %a,
1276                                                                <vscale x 2 x i64> %b)
1277  ret <vscale x 2 x i64> %out
1278}
1279
1280define <vscale x 4 x half> @trn1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1281; CHECK-LABEL: trn1_f16_v4:
1282; CHECK: trn1 z0.s, z0.s, z1.s
1283; CHECK-NEXT: ret
1284  %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half> %a,
1285                                                                 <vscale x 4 x half> %b)
1286  ret <vscale x 4 x half> %out
1287}
1288
1289define <vscale x 8 x bfloat> @trn1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1290; CHECK-LABEL: trn1_bf16:
1291; CHECK: trn1 z0.h, z0.h, z1.h
1292; CHECK-NEXT: ret
1293  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> %a,
1294                                                                    <vscale x 8 x bfloat> %b)
1295  ret <vscale x 8 x bfloat> %out
1296}
1297
1298define <vscale x 8 x half> @trn1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1299; CHECK-LABEL: trn1_f16:
1300; CHECK: trn1 z0.h, z0.h, z1.h
1301; CHECK-NEXT: ret
1302  %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> %a,
1303                                                                 <vscale x 8 x half> %b)
1304  ret <vscale x 8 x half> %out
1305}
1306
1307define <vscale x 4 x float> @trn1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1308; CHECK-LABEL: trn1_f32:
1309; CHECK: trn1 z0.s, z0.s, z1.s
1310; CHECK-NEXT: ret
1311  %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> %a,
1312                                                                  <vscale x 4 x float> %b)
1313  ret <vscale x 4 x float> %out
1314}
1315
1316define <vscale x 2 x double> @trn1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1317; CHECK-LABEL: trn1_f64:
1318; CHECK: trn1 z0.d, z0.d, z1.d
1319; CHECK-NEXT: ret
1320  %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> %a,
1321                                                                   <vscale x 2 x double> %b)
1322  ret <vscale x 2 x double> %out
1323}
1324
1325;
1326; TRN2
1327;
1328
1329define <vscale x 16 x i1> @trn2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1330; CHECK-LABEL: trn2_b8:
1331; CHECK: trn2 p0.b, p0.b, p1.b
1332; CHECK-NEXT: ret
1333  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> %a,
1334                                                                <vscale x 16 x i1> %b)
1335  ret <vscale x 16 x i1> %out
1336}
1337
1338define <vscale x 8 x i1> @trn2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1339; CHECK-LABEL: trn2_b16:
1340; CHECK: trn2 p0.h, p0.h, p1.h
1341; CHECK-NEXT: ret
1342  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1> %a,
1343                                                              <vscale x 8 x i1> %b)
1344  ret <vscale x 8 x i1> %out
1345}
1346
1347define <vscale x 4 x i1> @trn2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1348; CHECK-LABEL: trn2_b32:
1349; CHECK: trn2 p0.s, p0.s, p1.s
1350; CHECK-NEXT: ret
1351  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1> %a,
1352                                                              <vscale x 4 x i1> %b)
1353  ret <vscale x 4 x i1> %out
1354}
1355
1356define <vscale x 2 x i1> @trn2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1357; CHECK-LABEL: trn2_b64:
1358; CHECK: trn2 p0.d, p0.d, p1.d
1359; CHECK-NEXT: ret
1360  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1> %a,
1361                                                              <vscale x 2 x i1> %b)
1362  ret <vscale x 2 x i1> %out
1363}
1364
1365define <vscale x 16 x i8> @trn2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1366; CHECK-LABEL: trn2_i8:
1367; CHECK: trn2 z0.b, z0.b, z1.b
1368; CHECK-NEXT: ret
1369  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> %a,
1370                                                                <vscale x 16 x i8> %b)
1371  ret <vscale x 16 x i8> %out
1372}
1373
1374define <vscale x 8 x i16> @trn2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1375; CHECK-LABEL: trn2_i16:
1376; CHECK: trn2 z0.h, z0.h, z1.h
1377; CHECK-NEXT: ret
1378  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> %a,
1379                                                                <vscale x 8 x i16> %b)
1380  ret <vscale x 8 x i16> %out
1381}
1382
1383define <vscale x 4 x i32> @trn2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1384; CHECK-LABEL: trn2_i32:
1385; CHECK: trn2 z0.s, z0.s, z1.s
1386; CHECK-NEXT: ret
1387  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> %a,
1388                                                                <vscale x 4 x i32> %b)
1389  ret <vscale x 4 x i32> %out
1390}
1391
1392define <vscale x 2 x i64> @trn2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1393; CHECK-LABEL: trn2_i64:
1394; CHECK: trn2 z0.d, z0.d, z1.d
1395; CHECK-NEXT: ret
1396  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> %a,
1397                                                                <vscale x 2 x i64> %b)
1398  ret <vscale x 2 x i64> %out
1399}
1400
1401define <vscale x 4 x half> @trn2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1402; CHECK-LABEL: trn2_f16_v4:
1403; CHECK: trn2 z0.s, z0.s, z1.s
1404; CHECK-NEXT: ret
1405  %out = call <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half> %a,
1406                                                                 <vscale x 4 x half> %b)
1407  ret <vscale x 4 x half> %out
1408}
1409
1410define <vscale x 8 x bfloat> @trn2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1411; CHECK-LABEL: trn2_bf16:
1412; CHECK: trn2 z0.h, z0.h, z1.h
1413; CHECK-NEXT: ret
1414  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> %a,
1415                                                                    <vscale x 8 x bfloat> %b)
1416  ret <vscale x 8 x bfloat> %out
1417}
1418
1419define <vscale x 8 x half> @trn2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1420; CHECK-LABEL: trn2_f16:
1421; CHECK: trn2 z0.h, z0.h, z1.h
1422; CHECK-NEXT: ret
1423  %out = call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> %a,
1424                                                                 <vscale x 8 x half> %b)
1425  ret <vscale x 8 x half> %out
1426}
1427
1428define <vscale x 4 x float> @trn2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1429; CHECK-LABEL: trn2_f32:
1430; CHECK: trn2 z0.s, z0.s, z1.s
1431; CHECK-NEXT: ret
1432  %out = call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> %a,
1433                                                                  <vscale x 4 x float> %b)
1434  ret <vscale x 4 x float> %out
1435}
1436
1437define <vscale x 2 x double> @trn2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1438; CHECK-LABEL: trn2_f64:
1439; CHECK: trn2 z0.d, z0.d, z1.d
1440; CHECK-NEXT: ret
1441  %out = call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> %a,
1442                                                                   <vscale x 2 x double> %b)
1443  ret <vscale x 2 x double> %out
1444}
1445
1446;
1447; UZP1
1448;
1449
1450define <vscale x 16 x i1> @uzp1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1451; CHECK-LABEL: uzp1_b8:
1452; CHECK: uzp1 p0.b, p0.b, p1.b
1453; CHECK-NEXT: ret
1454  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> %a,
1455                                                                <vscale x 16 x i1> %b)
1456  ret <vscale x 16 x i1> %out
1457}
1458
1459define <vscale x 8 x i1> @uzp1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1460; CHECK-LABEL: uzp1_b16:
1461; CHECK: uzp1 p0.h, p0.h, p1.h
1462; CHECK-NEXT: ret
1463  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1> %a,
1464                                                              <vscale x 8 x i1> %b)
1465  ret <vscale x 8 x i1> %out
1466}
1467
1468define <vscale x 4 x i1> @uzp1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1469; CHECK-LABEL: uzp1_b32:
1470; CHECK: uzp1 p0.s, p0.s, p1.s
1471; CHECK-NEXT: ret
1472  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1> %a,
1473                                                              <vscale x 4 x i1> %b)
1474  ret <vscale x 4 x i1> %out
1475}
1476
1477define <vscale x 2 x i1> @uzp1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1478; CHECK-LABEL: uzp1_b64:
1479; CHECK: uzp1 p0.d, p0.d, p1.d
1480; CHECK-NEXT: ret
1481  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1> %a,
1482                                                              <vscale x 2 x i1> %b)
1483  ret <vscale x 2 x i1> %out
1484}
1485
1486define <vscale x 16 x i8> @uzp1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1487; CHECK-LABEL: uzp1_i8:
1488; CHECK: uzp1 z0.b, z0.b, z1.b
1489; CHECK-NEXT: ret
1490  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> %a,
1491                                                                <vscale x 16 x i8> %b)
1492  ret <vscale x 16 x i8> %out
1493}
1494
1495define <vscale x 8 x i16> @uzp1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1496; CHECK-LABEL: uzp1_i16:
1497; CHECK: uzp1 z0.h, z0.h, z1.h
1498; CHECK-NEXT: ret
1499  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> %a,
1500                                                                <vscale x 8 x i16> %b)
1501  ret <vscale x 8 x i16> %out
1502}
1503
1504define <vscale x 4 x i32> @uzp1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1505; CHECK-LABEL: uzp1_i32:
1506; CHECK: uzp1 z0.s, z0.s, z1.s
1507; CHECK-NEXT: ret
1508  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a,
1509                                                                <vscale x 4 x i32> %b)
1510  ret <vscale x 4 x i32> %out
1511}
1512
1513define <vscale x 2 x i64> @uzp1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1514; CHECK-LABEL: uzp1_i64:
1515; CHECK: uzp1 z0.d, z0.d, z1.d
1516; CHECK-NEXT: ret
1517  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> %a,
1518                                                                <vscale x 2 x i64> %b)
1519  ret <vscale x 2 x i64> %out
1520}
1521
1522define <vscale x 4 x half> @uzp1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1523; CHECK-LABEL: uzp1_f16_v4:
1524; CHECK: uzp1 z0.s, z0.s, z1.s
1525; CHECK-NEXT: ret
1526  %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half> %a,
1527                                                                 <vscale x 4 x half> %b)
1528  ret <vscale x 4 x half> %out
1529}
1530
1531define <vscale x 8 x bfloat> @uzp1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1532; CHECK-LABEL: uzp1_bf16:
1533; CHECK: uzp1 z0.h, z0.h, z1.h
1534; CHECK-NEXT: ret
1535  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> %a,
1536                                                                    <vscale x 8 x bfloat> %b)
1537  ret <vscale x 8 x bfloat> %out
1538}
1539
1540define <vscale x 8 x half> @uzp1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1541; CHECK-LABEL: uzp1_f16:
1542; CHECK: uzp1 z0.h, z0.h, z1.h
1543; CHECK-NEXT: ret
1544  %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> %a,
1545                                                                 <vscale x 8 x half> %b)
1546  ret <vscale x 8 x half> %out
1547}
1548
1549define <vscale x 4 x float> @uzp1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1550; CHECK-LABEL: uzp1_f32:
1551; CHECK: uzp1 z0.s, z0.s, z1.s
1552; CHECK-NEXT: ret
1553  %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> %a,
1554                                                                  <vscale x 4 x float> %b)
1555  ret <vscale x 4 x float> %out
1556}
1557
1558define <vscale x 2 x double> @uzp1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1559; CHECK-LABEL: uzp1_f64:
1560; CHECK: uzp1 z0.d, z0.d, z1.d
1561; CHECK-NEXT: ret
1562  %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> %a,
1563                                                                   <vscale x 2 x double> %b)
1564  ret <vscale x 2 x double> %out
1565}
1566
1567;
1568; UZP2
1569;
1570
1571define <vscale x 16 x i1> @uzp2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1572; CHECK-LABEL: uzp2_b8:
1573; CHECK: uzp2 p0.b, p0.b, p1.b
1574; CHECK-NEXT: ret
1575  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> %a,
1576                                                                <vscale x 16 x i1> %b)
1577  ret <vscale x 16 x i1> %out
1578}
1579
1580define <vscale x 8 x i1> @uzp2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1581; CHECK-LABEL: uzp2_b16:
1582; CHECK: uzp2 p0.h, p0.h, p1.h
1583; CHECK-NEXT: ret
1584  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1> %a,
1585                                                              <vscale x 8 x i1> %b)
1586  ret <vscale x 8 x i1> %out
1587}
1588
1589define <vscale x 4 x i1> @uzp2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1590; CHECK-LABEL: uzp2_b32:
1591; CHECK: uzp2 p0.s, p0.s, p1.s
1592; CHECK-NEXT: ret
1593  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1> %a,
1594                                                              <vscale x 4 x i1> %b)
1595  ret <vscale x 4 x i1> %out
1596}
1597
1598define <vscale x 2 x i1> @uzp2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1599; CHECK-LABEL: uzp2_b64:
1600; CHECK: uzp2 p0.d, p0.d, p1.d
1601; CHECK-NEXT: ret
1602  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1> %a,
1603                                                              <vscale x 2 x i1> %b)
1604  ret <vscale x 2 x i1> %out
1605}
1606
1607define <vscale x 16 x i8> @uzp2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1608; CHECK-LABEL: uzp2_i8:
1609; CHECK: uzp2 z0.b, z0.b, z1.b
1610; CHECK-NEXT: ret
1611  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> %a,
1612                                                                <vscale x 16 x i8> %b)
1613  ret <vscale x 16 x i8> %out
1614}
1615
1616define <vscale x 8 x i16> @uzp2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1617; CHECK-LABEL: uzp2_i16:
1618; CHECK: uzp2 z0.h, z0.h, z1.h
1619; CHECK-NEXT: ret
1620  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> %a,
1621                                                                <vscale x 8 x i16> %b)
1622  ret <vscale x 8 x i16> %out
1623}
1624
1625define <vscale x 4 x i32> @uzp2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1626; CHECK-LABEL: uzp2_i32:
1627; CHECK: uzp2 z0.s, z0.s, z1.s
1628; CHECK-NEXT: ret
1629  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a,
1630                                                                <vscale x 4 x i32> %b)
1631  ret <vscale x 4 x i32> %out
1632}
1633
1634define <vscale x 2 x i64> @uzp2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1635; CHECK-LABEL: uzp2_i64:
1636; CHECK: uzp2 z0.d, z0.d, z1.d
1637; CHECK-NEXT: ret
1638  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> %a,
1639                                                                <vscale x 2 x i64> %b)
1640  ret <vscale x 2 x i64> %out
1641}
1642
1643define <vscale x 4 x half> @uzp2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1644; CHECK-LABEL: uzp2_f16_v4:
1645; CHECK: uzp2 z0.s, z0.s, z1.s
1646; CHECK-NEXT: ret
1647  %out = call <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half> %a,
1648                                                                 <vscale x 4 x half> %b)
1649  ret <vscale x 4 x half> %out
1650}
1651
1652define <vscale x 8 x bfloat> @uzp2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1653; CHECK-LABEL: uzp2_bf16:
1654; CHECK: uzp2 z0.h, z0.h, z1.h
1655; CHECK-NEXT: ret
1656  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> %a,
1657                                                                    <vscale x 8 x bfloat> %b)
1658  ret <vscale x 8 x bfloat> %out
1659}
1660
1661define <vscale x 8 x half> @uzp2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1662; CHECK-LABEL: uzp2_f16:
1663; CHECK: uzp2 z0.h, z0.h, z1.h
1664; CHECK-NEXT: ret
1665  %out = call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> %a,
1666                                                                 <vscale x 8 x half> %b)
1667  ret <vscale x 8 x half> %out
1668}
1669
1670define <vscale x 4 x float> @uzp2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1671; CHECK-LABEL: uzp2_f32:
1672; CHECK: uzp2 z0.s, z0.s, z1.s
1673; CHECK-NEXT: ret
1674  %out = call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> %a,
1675                                                                  <vscale x 4 x float> %b)
1676  ret <vscale x 4 x float> %out
1677}
1678
1679define <vscale x 2 x double> @uzp2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1680; CHECK-LABEL: uzp2_f64:
1681; CHECK: uzp2 z0.d, z0.d, z1.d
1682; CHECK-NEXT: ret
1683  %out = call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> %a,
1684                                                                   <vscale x 2 x double> %b)
1685  ret <vscale x 2 x double> %out
1686}
1687
1688;
1689; ZIP1
1690;
1691
1692define <vscale x 16 x i1> @zip1_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1693; CHECK-LABEL: zip1_b8:
1694; CHECK: zip1 p0.b, p0.b, p1.b
1695; CHECK-NEXT: ret
1696  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> %a,
1697                                                                <vscale x 16 x i1> %b)
1698  ret <vscale x 16 x i1> %out
1699}
1700
1701define <vscale x 8 x i1> @zip1_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1702; CHECK-LABEL: zip1_b16:
1703; CHECK: zip1 p0.h, p0.h, p1.h
1704; CHECK-NEXT: ret
1705  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1> %a,
1706                                                              <vscale x 8 x i1> %b)
1707  ret <vscale x 8 x i1> %out
1708}
1709
1710define <vscale x 4 x i1> @zip1_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1711; CHECK-LABEL: zip1_b32:
1712; CHECK: zip1 p0.s, p0.s, p1.s
1713; CHECK-NEXT: ret
1714  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1> %a,
1715                                                              <vscale x 4 x i1> %b)
1716  ret <vscale x 4 x i1> %out
1717}
1718
1719define <vscale x 2 x i1> @zip1_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1720; CHECK-LABEL: zip1_b64:
1721; CHECK: zip1 p0.d, p0.d, p1.d
1722; CHECK-NEXT: ret
1723  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1> %a,
1724                                                              <vscale x 2 x i1> %b)
1725  ret <vscale x 2 x i1> %out
1726}
1727
1728define <vscale x 16 x i8> @zip1_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1729; CHECK-LABEL: zip1_i8:
1730; CHECK: zip1 z0.b, z0.b, z1.b
1731; CHECK-NEXT: ret
1732  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> %a,
1733                                                                <vscale x 16 x i8> %b)
1734  ret <vscale x 16 x i8> %out
1735}
1736
1737define <vscale x 8 x i16> @zip1_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1738; CHECK-LABEL: zip1_i16:
1739; CHECK: zip1 z0.h, z0.h, z1.h
1740; CHECK-NEXT: ret
1741  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> %a,
1742                                                                <vscale x 8 x i16> %b)
1743  ret <vscale x 8 x i16> %out
1744}
1745
1746define <vscale x 4 x i32> @zip1_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1747; CHECK-LABEL: zip1_i32:
1748; CHECK: zip1 z0.s, z0.s, z1.s
1749; CHECK-NEXT: ret
1750  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %a,
1751                                                                <vscale x 4 x i32> %b)
1752  ret <vscale x 4 x i32> %out
1753}
1754
1755define <vscale x 2 x i64> @zip1_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1756; CHECK-LABEL: zip1_i64:
1757; CHECK: zip1 z0.d, z0.d, z1.d
1758; CHECK-NEXT: ret
1759  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> %a,
1760                                                                <vscale x 2 x i64> %b)
1761  ret <vscale x 2 x i64> %out
1762}
1763
1764define <vscale x 4 x half> @zip1_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1765; CHECK-LABEL: zip1_f16_v4:
1766; CHECK: zip1 z0.s, z0.s, z1.s
1767; CHECK-NEXT: ret
1768  %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half> %a,
1769                                                                 <vscale x 4 x half> %b)
1770  ret <vscale x 4 x half> %out
1771}
1772
1773define <vscale x 8 x bfloat> @zip1_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1774; CHECK-LABEL: zip1_bf16:
1775; CHECK: zip1 z0.h, z0.h, z1.h
1776; CHECK-NEXT: ret
1777  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> %a,
1778                                                                    <vscale x 8 x bfloat> %b)
1779  ret <vscale x 8 x bfloat> %out
1780}
1781
1782define <vscale x 8 x half> @zip1_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1783; CHECK-LABEL: zip1_f16:
1784; CHECK: zip1 z0.h, z0.h, z1.h
1785; CHECK-NEXT: ret
1786  %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> %a,
1787                                                                 <vscale x 8 x half> %b)
1788  ret <vscale x 8 x half> %out
1789}
1790
1791define <vscale x 4 x float> @zip1_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1792; CHECK-LABEL: zip1_f32:
1793; CHECK: zip1 z0.s, z0.s, z1.s
1794; CHECK-NEXT: ret
1795  %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> %a,
1796                                                                  <vscale x 4 x float> %b)
1797  ret <vscale x 4 x float> %out
1798}
1799
1800define <vscale x 2 x double> @zip1_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1801; CHECK-LABEL: zip1_f64:
1802; CHECK: zip1 z0.d, z0.d, z1.d
1803; CHECK-NEXT: ret
1804  %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> %a,
1805                                                                   <vscale x 2 x double> %b)
1806  ret <vscale x 2 x double> %out
1807}
1808
1809;
1810; ZIP2
1811;
1812
1813define <vscale x 16 x i1> @zip2_b8(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
1814; CHECK-LABEL: zip2_b8:
1815; CHECK: zip2 p0.b, p0.b, p1.b
1816; CHECK-NEXT: ret
1817  %out = call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> %a,
1818                                                                <vscale x 16 x i1> %b)
1819  ret <vscale x 16 x i1> %out
1820}
1821
1822define <vscale x 8 x i1> @zip2_b16(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
1823; CHECK-LABEL: zip2_b16:
1824; CHECK: zip2 p0.h, p0.h, p1.h
1825; CHECK-NEXT: ret
1826  %out = call <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1> %a,
1827                                                              <vscale x 8 x i1> %b)
1828  ret <vscale x 8 x i1> %out
1829}
1830
1831define <vscale x 4 x i1> @zip2_b32(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
1832; CHECK-LABEL: zip2_b32:
1833; CHECK: zip2 p0.s, p0.s, p1.s
1834; CHECK-NEXT: ret
1835  %out = call <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1> %a,
1836                                                              <vscale x 4 x i1> %b)
1837  ret <vscale x 4 x i1> %out
1838}
1839
1840define <vscale x 2 x i1> @zip2_b64(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
1841; CHECK-LABEL: zip2_b64:
1842; CHECK: zip2 p0.d, p0.d, p1.d
1843; CHECK-NEXT: ret
1844  %out = call <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1> %a,
1845                                                              <vscale x 2 x i1> %b)
1846  ret <vscale x 2 x i1> %out
1847}
1848
1849define <vscale x 16 x i8> @zip2_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1850; CHECK-LABEL: zip2_i8:
1851; CHECK: zip2 z0.b, z0.b, z1.b
1852; CHECK-NEXT: ret
1853  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> %a,
1854                                                                <vscale x 16 x i8> %b)
1855  ret <vscale x 16 x i8> %out
1856}
1857
1858define <vscale x 8 x i16> @zip2_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1859; CHECK-LABEL: zip2_i16:
1860; CHECK: zip2 z0.h, z0.h, z1.h
1861; CHECK-NEXT: ret
1862  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> %a,
1863                                                                <vscale x 8 x i16> %b)
1864  ret <vscale x 8 x i16> %out
1865}
1866
1867define <vscale x 4 x i32> @zip2_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1868; CHECK-LABEL: zip2_i32:
1869; CHECK: zip2 z0.s, z0.s, z1.s
1870; CHECK-NEXT: ret
1871  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %a,
1872                                                                <vscale x 4 x i32> %b)
1873  ret <vscale x 4 x i32> %out
1874}
1875
1876define <vscale x 2 x i64> @zip2_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
1877; CHECK-LABEL: zip2_i64:
1878; CHECK: zip2 z0.d, z0.d, z1.d
1879; CHECK-NEXT: ret
1880  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> %a,
1881                                                                <vscale x 2 x i64> %b)
1882  ret <vscale x 2 x i64> %out
1883}
1884
1885define <vscale x 4 x half> @zip2_f16_v4(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1886; CHECK-LABEL: zip2_f16_v4:
1887; CHECK: zip2 z0.s, z0.s, z1.s
1888; CHECK-NEXT: ret
1889  %out = call <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half> %a,
1890                                                                 <vscale x 4 x half> %b)
1891  ret <vscale x 4 x half> %out
1892}
1893
1894define <vscale x 8 x bfloat> @zip2_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
1895; CHECK-LABEL: zip2_bf16:
1896; CHECK: zip2 z0.h, z0.h, z1.h
1897; CHECK-NEXT: ret
1898  %out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> %a,
1899                                                                    <vscale x 8 x bfloat> %b)
1900  ret <vscale x 8 x bfloat> %out
1901}
1902
1903define <vscale x 8 x half> @zip2_f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1904; CHECK-LABEL: zip2_f16:
1905; CHECK: zip2 z0.h, z0.h, z1.h
1906; CHECK-NEXT: ret
1907  %out = call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> %a,
1908                                                                 <vscale x 8 x half> %b)
1909  ret <vscale x 8 x half> %out
1910}
1911
1912define <vscale x 4 x float> @zip2_f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1913; CHECK-LABEL: zip2_f32:
1914; CHECK: zip2 z0.s, z0.s, z1.s
1915; CHECK-NEXT: ret
1916  %out = call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> %a,
1917                                                                  <vscale x 4 x float> %b)
1918  ret <vscale x 4 x float> %out
1919}
1920
1921define <vscale x 2 x double> @zip2_f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1922; CHECK-LABEL: zip2_f64:
1923; CHECK: zip2 z0.d, z0.d, z1.d
1924; CHECK-NEXT: ret
1925  %out = call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> %a,
1926                                                                   <vscale x 2 x double> %b)
1927  ret <vscale x 2 x double> %out
1928}
1929
1930declare <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1931declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1932declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1933declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1934declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1935declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
1936declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1937declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1938
1939declare i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
1940declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
1941declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
1942declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
1943declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
1944declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
1945declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
1946declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
1947
1948declare <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1949declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1950declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1951declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
1952declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
1953declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
1954declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
1955declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1956
1957declare i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1>, i8, <vscale x 16 x i8>)
1958declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x 8 x i16>)
1959declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
1960declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
1961declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
1962declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
1963declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
1964declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
1965
1966declare <vscale x 4 x i32> @llvm.aarch64.sve.compact.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
1967declare <vscale x 2 x i64> @llvm.aarch64.sve.compact.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
1968declare <vscale x 4 x float> @llvm.aarch64.sve.compact.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
1969declare <vscale x 2 x double> @llvm.aarch64.sve.compact.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
1970
1971declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64)
1972declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64)
1973declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
1974declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
1975declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
1976declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
1977declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
1978declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)
1979
1980declare <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
1981declare <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1982declare <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1983declare <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
1984declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i32)
1985declare <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, i32)
1986declare <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, i32)
1987declare <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
1988
1989declare i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
1990declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
1991declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
1992declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
1993declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
1994declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
1995declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
1996declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
1997declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
1998
1999declare i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>)
2000declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>)
2001declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
2002declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
2003declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
2004declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
2005declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
2006declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
2007declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
2008
2009declare <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1>)
2010declare <vscale x 8 x i1> @llvm.aarch64.sve.rev.nxv8i1(<vscale x 8 x i1>)
2011declare <vscale x 4 x i1> @llvm.aarch64.sve.rev.nxv4i1(<vscale x 4 x i1>)
2012declare <vscale x 2 x i1> @llvm.aarch64.sve.rev.nxv2i1(<vscale x 2 x i1>)
2013declare <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8>)
2014declare <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16>)
2015declare <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32>)
2016declare <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64>)
2017declare <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat>)
2018declare <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half>)
2019declare <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float>)
2020declare <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double>)
2021
2022declare <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
2023declare <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
2024declare <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
2025declare <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
2026declare <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2027declare <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
2028declare <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
2029declare <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
2030
2031declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpkhi.nxv8i16(<vscale x 16 x i8>)
2032declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpkhi.nxv4i32(<vscale x 8 x i16>)
2033declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpkhi.nxv2i64(<vscale x 4 x i32>)
2034
2035declare <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8>)
2036declare <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16>)
2037declare <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32>)
2038
2039declare <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2040declare <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2041declare <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2042declare <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2043declare <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
2044declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i16>)
2045declare <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
2046declare <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
2047
2048declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpkhi.nxv8i16(<vscale x 16 x i8>)
2049declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpkhi.nxv4i32(<vscale x 8 x i16>)
2050declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpkhi.nxv2i64(<vscale x 4 x i32>)
2051
2052declare <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8>)
2053declare <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16>)
2054declare <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32>)
2055
2056declare <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2057declare <vscale x 8 x i1> @llvm.aarch64.sve.trn1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2058declare <vscale x 4 x i1> @llvm.aarch64.sve.trn1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2059declare <vscale x 2 x i1> @llvm.aarch64.sve.trn1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2060declare <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2061declare <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2062declare <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2063declare <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2064declare <vscale x 4 x half> @llvm.aarch64.sve.trn1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2065declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2066declare <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2067declare <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2068declare <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2069
2070declare <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2071declare <vscale x 8 x i1> @llvm.aarch64.sve.trn2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2072declare <vscale x 4 x i1> @llvm.aarch64.sve.trn2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2073declare <vscale x 2 x i1> @llvm.aarch64.sve.trn2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2074declare <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2075declare <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2076declare <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2077declare <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2078declare <vscale x 4 x half> @llvm.aarch64.sve.trn2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2079declare <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2080declare <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2081declare <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2082declare <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2083
2084declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2085declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2086declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2087declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2088declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2089declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2090declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2091declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2092declare <vscale x 4 x half> @llvm.aarch64.sve.uzp1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2093declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2094declare <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2095declare <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2096declare <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2097
2098declare <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2099declare <vscale x 8 x i1> @llvm.aarch64.sve.uzp2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2100declare <vscale x 4 x i1> @llvm.aarch64.sve.uzp2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2101declare <vscale x 2 x i1> @llvm.aarch64.sve.uzp2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2102declare <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2103declare <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2104declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2105declare <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2106declare <vscale x 4 x half> @llvm.aarch64.sve.uzp2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2107declare <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2108declare <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2109declare <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2110declare <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2111
2112declare <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2113declare <vscale x 8 x i1> @llvm.aarch64.sve.zip1.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2114declare <vscale x 4 x i1> @llvm.aarch64.sve.zip1.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2115declare <vscale x 2 x i1> @llvm.aarch64.sve.zip1.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2116declare <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2117declare <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2118declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2119declare <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2120declare <vscale x 4 x half> @llvm.aarch64.sve.zip1.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2121declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2122declare <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2123declare <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2124declare <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2125
2126declare <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>)
2127declare <vscale x 8 x i1> @llvm.aarch64.sve.zip2.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>)
2128declare <vscale x 4 x i1> @llvm.aarch64.sve.zip2.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>)
2129declare <vscale x 2 x i1> @llvm.aarch64.sve.zip2.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>)
2130declare <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
2131declare <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
2132declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
2133declare <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
2134declare <vscale x 4 x half> @llvm.aarch64.sve.zip2.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
2135declare <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
2136declare <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
2137declare <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
2138declare <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
2139
2140; +bf16 is required for the bfloat version.
2141attributes #0 = { "target-features"="+sve,+bf16" }
2142