1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2
3;
4; ASR
5;
6
7define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
8; CHECK-LABEL: asr_i8:
9; CHECK: asr z0.b, p0/m, z0.b, z1.b
10; CHECK-NEXT: ret
11  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
12                                                               <vscale x 16 x i8> %a,
13                                                               <vscale x 16 x i8> %b)
14  ret <vscale x 16 x i8> %out
15}
16
17define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
18; CHECK-LABEL: asr_i16:
19; CHECK: asr z0.h, p0/m, z0.h, z1.h
20; CHECK-NEXT: ret
21  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
22                                                               <vscale x 8 x i16> %a,
23                                                               <vscale x 8 x i16> %b)
24  ret <vscale x 8 x i16> %out
25}
26
27define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
28; CHECK-LABEL: asr_i32:
29; CHECK: asr z0.s, p0/m, z0.s, z1.s
30; CHECK-NEXT: ret
31  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
32                                                               <vscale x 4 x i32> %a,
33                                                               <vscale x 4 x i32> %b)
34  ret <vscale x 4 x i32> %out
35}
36
37define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
38; CHECK-LABEL: asr_i64:
39; CHECK: asr z0.d, p0/m, z0.d, z1.d
40; CHECK-NEXT: ret
41  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
42                                                               <vscale x 2 x i64> %a,
43                                                               <vscale x 2 x i64> %b)
44  ret <vscale x 2 x i64> %out
45}
46
47define <vscale x 16 x i8> @asr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
48; CHECK-LABEL: asr_wide_i8:
49; CHECK: asr z0.b, p0/m, z0.b, z1.d
50; CHECK-NEXT: ret
51  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
52                                                                    <vscale x 16 x i8> %a,
53                                                                    <vscale x 2 x i64> %b)
54  ret <vscale x 16 x i8> %out
55}
56
57define <vscale x 8 x i16> @asr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
58; CHECK-LABEL: asr_wide_i16:
59; CHECK: asr z0.h, p0/m, z0.h, z1.d
60; CHECK-NEXT: ret
61  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
62                                                                    <vscale x 8 x i16> %a,
63                                                                    <vscale x 2 x i64> %b)
64  ret <vscale x 8 x i16> %out
65}
66
67define <vscale x 4 x i32> @asr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
68; CHECK-LABEL: asr_wide_i32:
69; CHECK: asr z0.s, p0/m, z0.s, z1.d
70; CHECK-NEXT: ret
71  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
72                                                                    <vscale x 4 x i32> %a,
73                                                                    <vscale x 2 x i64> %b)
74  ret <vscale x 4 x i32> %out
75}
76
77;
78; ASRD
79;
80
81define <vscale x 16 x i8> @asrd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
82; CHECK-LABEL: asrd_i8:
83; CHECK: asrd z0.b, p0/m, z0.b, #1
84; CHECK-NEXT: ret
85  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %pg,
86                                                                <vscale x 16 x i8> %a,
87                                                                i32 1)
88  ret <vscale x 16 x i8> %out
89}
90
91define <vscale x 8 x i16> @asrd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
92; CHECK-LABEL: asrd_i16:
93; CHECK: asrd z0.h, p0/m, z0.h, #2
94; CHECK-NEXT: ret
95  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> %pg,
96                                                                <vscale x 8 x i16> %a,
97                                                                i32 2)
98  ret <vscale x 8 x i16> %out
99}
100
101define <vscale x 4 x i32> @asrd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
102; CHECK-LABEL: asrd_i32:
103; CHECK: asrd z0.s, p0/m, z0.s, #31
104; CHECK-NEXT: ret
105  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> %pg,
106                                                                <vscale x 4 x i32> %a,
107                                                                i32 31)
108  ret <vscale x 4 x i32> %out
109}
110
111define <vscale x 2 x i64> @asrd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
112; CHECK-LABEL: asrd_i64:
113; CHECK: asrd z0.d, p0/m, z0.d, #64
114; CHECK-NEXT: ret
115  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> %pg,
116                                                                <vscale x 2 x i64> %a,
117                                                                i32 64)
118  ret <vscale x 2 x i64> %out
119}
120
121;
122; INSR
123;
124
125define <vscale x 16 x i8> @insr_i8(<vscale x 16 x i8> %a, i8 %b) {
126; CHECK-LABEL: insr_i8:
127; CHECK: insr z0.b, w0
128; CHECK-NEXT: ret
129  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8> %a, i8 %b)
130  ret <vscale x 16 x i8> %out
131}
132
133define <vscale x 8 x i16> @insr_i16(<vscale x 8 x i16> %a, i16 %b) {
134; CHECK-LABEL: insr_i16:
135; CHECK: insr z0.h, w0
136; CHECK-NEXT: ret
137  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> %a, i16 %b)
138  ret <vscale x 8 x i16> %out
139}
140
141define <vscale x 4 x i32> @insr_i32(<vscale x 4 x i32> %a, i32 %b) {
142; CHECK-LABEL: insr_i32:
143; CHECK: insr z0.s, w0
144; CHECK-NEXT: ret
145  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32> %a, i32 %b)
146  ret <vscale x 4 x i32> %out
147}
148
149define <vscale x 2 x i64> @insr_i64(<vscale x 2 x i64> %a, i64 %b) {
150; CHECK-LABEL: insr_i64:
151; CHECK: insr z0.d, x0
152; CHECK-NEXT: ret
153  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64> %a, i64 %b)
154  ret <vscale x 2 x i64> %out
155}
156
157define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
158; CHECK-LABEL: insr_f16:
159; CHECK: insr z0.h, h1
160; CHECK-NEXT: ret
161  %out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)
162  ret <vscale x 8 x half> %out
163}
164
165define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
166; CHECK-LABEL: insr_f32:
167; CHECK: insr z0.s, s1
168; CHECK-NEXT: ret
169  %out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)
170  ret <vscale x 4 x float> %out
171}
172
173define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) {
174; CHECK-LABEL: insr_f64:
175; CHECK: insr z0.d, d1
176; CHECK-NEXT: ret
177  %out = call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> %a, double %b)
178  ret <vscale x 2 x double> %out
179}
180
181;
182; LSL
183;
184
185define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
186; CHECK-LABEL: lsl_i8:
187; CHECK: lsl z0.b, p0/m, z0.b, z1.b
188; CHECK-NEXT: ret
189  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
190                                                               <vscale x 16 x i8> %a,
191                                                               <vscale x 16 x i8> %b)
192  ret <vscale x 16 x i8> %out
193}
194
195define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
196; CHECK-LABEL: lsl_i16:
197; CHECK: lsl z0.h, p0/m, z0.h, z1.h
198; CHECK-NEXT: ret
199  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
200                                                               <vscale x 8 x i16> %a,
201                                                               <vscale x 8 x i16> %b)
202  ret <vscale x 8 x i16> %out
203}
204
205define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
206; CHECK-LABEL: lsl_i32:
207; CHECK: lsl z0.s, p0/m, z0.s, z1.s
208; CHECK-NEXT: ret
209  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
210                                                               <vscale x 4 x i32> %a,
211                                                               <vscale x 4 x i32> %b)
212  ret <vscale x 4 x i32> %out
213}
214
215define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
216; CHECK-LABEL: lsl_i64:
217; CHECK: lsl z0.d, p0/m, z0.d, z1.d
218; CHECK-NEXT: ret
219  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
220                                                               <vscale x 2 x i64> %a,
221                                                               <vscale x 2 x i64> %b)
222  ret <vscale x 2 x i64> %out
223}
224
225define <vscale x 16 x i8> @lsl_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
226; CHECK-LABEL: lsl_wide_i8:
227; CHECK: lsl z0.b, p0/m, z0.b, z1.d
228; CHECK-NEXT: ret
229  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
230                                                                    <vscale x 16 x i8> %a,
231                                                                    <vscale x 2 x i64> %b)
232  ret <vscale x 16 x i8> %out
233}
234
235define <vscale x 8 x i16> @lsl_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
236; CHECK-LABEL: lsl_wide_i16:
237; CHECK: lsl z0.h, p0/m, z0.h, z1.d
238; CHECK-NEXT: ret
239  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
240                                                                    <vscale x 8 x i16> %a,
241                                                                    <vscale x 2 x i64> %b)
242  ret <vscale x 8 x i16> %out
243}
244
245define <vscale x 4 x i32> @lsl_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
246; CHECK-LABEL: lsl_wide_i32:
247; CHECK: lsl z0.s, p0/m, z0.s, z1.d
248; CHECK-NEXT: ret
249  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
250                                                                    <vscale x 4 x i32> %a,
251                                                                    <vscale x 2 x i64> %b)
252  ret <vscale x 4 x i32> %out
253}
254
255;
256; LSR
257;
258
259define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
260; CHECK-LABEL: lsr_i8:
261; CHECK: lsr z0.b, p0/m, z0.b, z1.b
262; CHECK-NEXT: ret
263  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
264                                                               <vscale x 16 x i8> %a,
265                                                               <vscale x 16 x i8> %b)
266  ret <vscale x 16 x i8> %out
267}
268
269define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
270; CHECK-LABEL: lsr_i16:
271; CHECK: lsr z0.h, p0/m, z0.h, z1.h
272; CHECK-NEXT: ret
273  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
274                                                               <vscale x 8 x i16> %a,
275                                                               <vscale x 8 x i16> %b)
276  ret <vscale x 8 x i16> %out
277}
278
279define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
280; CHECK-LABEL: lsr_i32:
281; CHECK: lsr z0.s, p0/m, z0.s, z1.s
282; CHECK-NEXT: ret
283  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
284                                                               <vscale x 4 x i32> %a,
285                                                               <vscale x 4 x i32> %b)
286  ret <vscale x 4 x i32> %out
287}
288
289define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
290; CHECK-LABEL: lsr_i64:
291; CHECK: lsr z0.d, p0/m, z0.d, z1.d
292; CHECK-NEXT: ret
293  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
294                                                               <vscale x 2 x i64> %a,
295                                                               <vscale x 2 x i64> %b)
296  ret <vscale x 2 x i64> %out
297}
298
299define <vscale x 16 x i8> @lsr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
300; CHECK-LABEL: lsr_wide_i8:
301; CHECK: lsr z0.b, p0/m, z0.b, z1.d
302; CHECK-NEXT: ret
303  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
304                                                                    <vscale x 16 x i8> %a,
305                                                                    <vscale x 2 x i64> %b)
306  ret <vscale x 16 x i8> %out
307}
308
309define <vscale x 8 x i16> @lsr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
310; CHECK-LABEL: lsr_wide_i16:
311; CHECK: lsr z0.h, p0/m, z0.h, z1.d
312; CHECK-NEXT: ret
313  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
314                                                                    <vscale x 8 x i16> %a,
315                                                                    <vscale x 2 x i64> %b)
316  ret <vscale x 8 x i16> %out
317}
318
319define <vscale x 4 x i32> @lsr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
320; CHECK-LABEL: lsr_wide_i32:
321; CHECK: lsr z0.s, p0/m, z0.s, z1.d
322; CHECK-NEXT: ret
323  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
324                                                                    <vscale x 4 x i32> %a,
325                                                                    <vscale x 2 x i64> %b)
326  ret <vscale x 4 x i32> %out
327}
328
329declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
330declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
331declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
332declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
333
334declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
335declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
336declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
337
338declare <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
339declare <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
340declare <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
341declare <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
342
343declare <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8>, i8)
344declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i16)
345declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32)
346declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64)
347declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half)
348declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float)
349declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double)
350
351declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
352declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
353declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
354declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
355
356declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
357declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
358declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
359
360declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
361declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
362declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
363declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
364
365declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
366declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
367declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
368