1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
2
3;
4; EOR3 (vector, bitwise, unpredicated)
5;
6define <vscale x 16 x i8> @eor3_i8(<vscale x 16 x i8> %a,
7                                   <vscale x 16 x i8> %b,
8                                   <vscale x 16 x i8> %c) {
9; CHECK-LABEL: eor3_i8
10; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
11; CHECK-NEXT: ret
12  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.eor3.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
13  ret <vscale x 16 x i8> %res
14}
15
16define <vscale x 8 x i16> @eor3_i16(<vscale x 8 x i16> %a,
17                                    <vscale x 8 x i16> %b,
18                                    <vscale x 8 x i16> %c) {
19; CHECK-LABEL: eor3_i16
20; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
21; CHECK-NEXT: ret
22  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.eor3.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
23  ret <vscale x 8 x i16> %res
24}
25
26define <vscale x 4 x i32> @eor3_i32(<vscale x 4 x i32> %a,
27                                    <vscale x 4 x i32> %b,
28                                    <vscale x 4 x i32> %c) {
29; CHECK-LABEL: eor3_i32
30; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
31; CHECK-NEXT: ret
32  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.eor3.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
33  ret <vscale x 4 x i32> %res
34}
35
36define <vscale x 2 x i64> @eor3_i64(<vscale x 2 x i64> %a,
37                                    <vscale x 2 x i64> %b,
38                                    <vscale x 2 x i64> %c) {
39; CHECK-LABEL: eor3_i64
40; CHECK: eor3 z0.d, z0.d, z1.d, z2.d
41; CHECK-NEXT: ret
42  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.eor3.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
43  ret <vscale x 2 x i64> %res
44}
45
46;
47; BCAX (vector, bitwise, unpredicated)
48;
49define <vscale x 16 x i8> @bcax_i8(<vscale x 16 x i8> %a,
50                                   <vscale x 16 x i8> %b,
51                                   <vscale x 16 x i8> %c) {
52; CHECK-LABEL: bcax_i8
53; CHECK: bcax z0.d, z0.d, z1.d, z2.d
54; CHECK-NEXT: ret
55  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bcax.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
56  ret <vscale x 16 x i8> %res
57}
58
59define <vscale x 8 x i16> @bcax_i16(<vscale x 8 x i16> %a,
60                                    <vscale x 8 x i16> %b,
61                                    <vscale x 8 x i16> %c) {
62; CHECK-LABEL: bcax_i16
63; CHECK: bcax z0.d, z0.d, z1.d, z2.d
64; CHECK-NEXT: ret
65  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bcax.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
66  ret <vscale x 8 x i16> %res
67}
68
69define <vscale x 4 x i32> @bcax_i32(<vscale x 4 x i32> %a,
70                                    <vscale x 4 x i32> %b,
71                                    <vscale x 4 x i32> %c) {
72; CHECK-LABEL: bcax_i32
73; CHECK: bcax z0.d, z0.d, z1.d, z2.d
74; CHECK-NEXT: ret
75  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bcax.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
76  ret <vscale x 4 x i32> %res
77}
78
79define <vscale x 2 x i64> @bcax_i64(<vscale x 2 x i64> %a,
80                                    <vscale x 2 x i64> %b,
81                                    <vscale x 2 x i64> %c) {
82; CHECK-LABEL: bcax_i64
83; CHECK: bcax z0.d, z0.d, z1.d, z2.d
84; CHECK-NEXT: ret
85  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bcax.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
86  ret <vscale x 2 x i64> %res
87}
88
89;
90; BSL (vector, bitwise, unpredicated)
91;
92define <vscale x 16 x i8> @bsl_i8(<vscale x 16 x i8> %a,
93                                  <vscale x 16 x i8> %b,
94                                  <vscale x 16 x i8> %c) {
95; CHECK-LABEL: bsl_i8
96; CHECK: bsl z0.d, z0.d, z1.d, z2.d
97; CHECK-NEXT: ret
98  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
99  ret <vscale x 16 x i8> %res
100}
101
102define <vscale x 8 x i16> @bsl_i16(<vscale x 8 x i16> %a,
103                                   <vscale x 8 x i16> %b,
104                                   <vscale x 8 x i16> %c) {
105; CHECK-LABEL: bsl_i16
106; CHECK: bsl z0.d, z0.d, z1.d, z2.d
107; CHECK-NEXT: ret
108  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
109  ret <vscale x 8 x i16> %res
110}
111
112define <vscale x 4 x i32> @bsl_i32(<vscale x 4 x i32> %a,
113                                   <vscale x 4 x i32> %b,
114                                   <vscale x 4 x i32> %c) {
115; CHECK-LABEL: bsl_i32
116; CHECK: bsl z0.d, z0.d, z1.d, z2.d
117; CHECK-NEXT: ret
118  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
119  ret <vscale x 4 x i32> %res
120}
121
122define <vscale x 2 x i64> @bsl_i64(<vscale x 2 x i64> %a,
123                                   <vscale x 2 x i64> %b,
124                                   <vscale x 2 x i64> %c) {
125; CHECK-LABEL: bsl_i64
126; CHECK: bsl z0.d, z0.d, z1.d, z2.d
127; CHECK-NEXT: ret
128  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
129  ret <vscale x 2 x i64> %res
130}
131
132;
133; BSL1N (vector, bitwise, unpredicated)
134;
135define <vscale x 16 x i8> @bsl1n_i8(<vscale x 16 x i8> %a,
136                                    <vscale x 16 x i8> %b,
137                                    <vscale x 16 x i8> %c) {
138; CHECK-LABEL: bsl1n_i8
139; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
140; CHECK-NEXT: ret
141  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl1n.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
142  ret <vscale x 16 x i8> %res
143}
144
145define <vscale x 8 x i16> @bsl1n_i16(<vscale x 8 x i16> %a,
146                                     <vscale x 8 x i16> %b,
147                                     <vscale x 8 x i16> %c) {
148; CHECK-LABEL: bsl1n_i16
149; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
150; CHECK-NEXT: ret
151  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl1n.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
152  ret <vscale x 8 x i16> %res
153}
154
155define <vscale x 4 x i32> @bsl1n_i32(<vscale x 4 x i32> %a,
156                                     <vscale x 4 x i32> %b,
157                                     <vscale x 4 x i32> %c) {
158; CHECK-LABEL: bsl1n_i32
159; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
160; CHECK-NEXT: ret
161  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl1n.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
162  ret <vscale x 4 x i32> %res
163}
164
165define <vscale x 2 x i64> @bsl1n_i64(<vscale x 2 x i64> %a,
166                                     <vscale x 2 x i64> %b,
167                                     <vscale x 2 x i64> %c) {
168; CHECK-LABEL: bsl1n_i64
169; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d
170; CHECK-NEXT: ret
171  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl1n.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
172  ret <vscale x 2 x i64> %res
173}
174
175;
176; BSL2N (vector, bitwise, unpredicated)
177;
178define <vscale x 16 x i8> @bsl2n_i8(<vscale x 16 x i8> %a,
179                                    <vscale x 16 x i8> %b,
180                                    <vscale x 16 x i8> %c) {
181; CHECK-LABEL: bsl2n_i8
182; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
183; CHECK-NEXT: ret
184  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl2n.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
185  ret <vscale x 16 x i8> %res
186}
187
188define <vscale x 8 x i16> @bsl2n_i16(<vscale x 8 x i16> %a,
189                                     <vscale x 8 x i16> %b,
190                                     <vscale x 8 x i16> %c) {
191; CHECK-LABEL: bsl2n_i16
192; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
193; CHECK-NEXT: ret
194  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl2n.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
195  ret <vscale x 8 x i16> %res
196}
197
198define <vscale x 4 x i32> @bsl2n_i32(<vscale x 4 x i32> %a,
199                                     <vscale x 4 x i32> %b,
200                                     <vscale x 4 x i32> %c) {
201; CHECK-LABEL: bsl2n_i32
202; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
203; CHECK-NEXT: ret
204  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl2n.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
205  ret <vscale x 4 x i32> %res
206}
207
208define <vscale x 2 x i64> @bsl2n_i64(<vscale x 2 x i64> %a,
209                                     <vscale x 2 x i64> %b,
210                                     <vscale x 2 x i64> %c) {
211; CHECK-LABEL: bsl2n_i64
212; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d
213; CHECK-NEXT: ret
214  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl2n.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
215  ret <vscale x 2 x i64> %res
216}
217
218;
219; NBSL (vector, bitwise, unpredicated)
220;
221define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a,
222                                   <vscale x 16 x i8> %b,
223                                   <vscale x 16 x i8> %c) {
224; CHECK-LABEL: nbsl_i8
225; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
226; CHECK-NEXT: ret
227  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.nbsl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
228  ret <vscale x 16 x i8> %res
229}
230
231define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a,
232                                    <vscale x 8 x i16> %b,
233                                    <vscale x 8 x i16> %c) {
234; CHECK-LABEL: nbsl_i16
235; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
236; CHECK-NEXT: ret
237  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.nbsl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
238  ret <vscale x 8 x i16> %res
239}
240
241define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a,
242                                    <vscale x 4 x i32> %b,
243                                    <vscale x 4 x i32> %c) {
244; CHECK-LABEL: nbsl_i32
245; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
246; CHECK-NEXT: ret
247  %res = call <vscale x 4 x i32> @llvm.aarch64.sve.nbsl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
248  ret <vscale x 4 x i32> %res
249}
250
251define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a,
252                                    <vscale x 2 x i64> %b,
253                                    <vscale x 2 x i64> %c) {
254; CHECK-LABEL: nbsl_i64
255; CHECK: nbsl z0.d, z0.d, z1.d, z2.d
256; CHECK-NEXT: ret
257  %res = call <vscale x 2 x i64> @llvm.aarch64.sve.nbsl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
258  ret <vscale x 2 x i64> %res
259}
260
261;
262; XAR (vector, bitwise, unpredicated)
263;
264
265define <vscale x 16 x i8> @xar_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
266; CHECK-LABEL: xar_b:
267; CHECK: xar z0.b, z0.b, z1.b, #1
268; CHECK-NEXT: ret
269  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.xar.nxv16i8(<vscale x 16 x i8> %a,
270                                                               <vscale x 16 x i8> %b,
271                                                               i32 1)
272  ret <vscale x 16 x i8> %out
273}
274
275define <vscale x 8 x i16> @xar_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
276; CHECK-LABEL: xar_h:
277; CHECK: xar z0.h, z0.h, z1.h, #2
278; CHECK-NEXT: ret
279  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.xar.nxv8i16(<vscale x 8 x i16> %a,
280                                                               <vscale x 8 x i16> %b,
281                                                               i32 2)
282  ret <vscale x 8 x i16> %out
283}
284
285define <vscale x 4 x i32> @xar_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
286; CHECK-LABEL: xar_s:
287; CHECK: xar z0.s, z0.s, z1.s, #3
288; CHECK-NEXT: ret
289  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.xar.nxv4i32(<vscale x 4 x i32> %a,
290                                                               <vscale x 4 x i32> %b,
291                                                               i32 3)
292  ret <vscale x 4 x i32> %out
293}
294
295define <vscale x 2 x i64> @xar_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
296; CHECK-LABEL: xar_d:
297; CHECK: xar z0.d, z0.d, z1.d, #4
298; CHECK-NEXT: ret
299  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.xar.nxv2i64(<vscale x 2 x i64> %a,
300                                                               <vscale x 2 x i64> %b,
301                                                               i32 4)
302  ret <vscale x 2 x i64> %out
303}
304
305declare <vscale x 16 x i8> @llvm.aarch64.sve.eor3.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
306declare <vscale x 8 x i16> @llvm.aarch64.sve.eor3.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
307declare <vscale x 4 x i32> @llvm.aarch64.sve.eor3.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
308declare <vscale x 2 x i64> @llvm.aarch64.sve.eor3.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
309declare <vscale x 16 x i8> @llvm.aarch64.sve.bcax.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
310declare <vscale x 8 x i16> @llvm.aarch64.sve.bcax.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
311declare <vscale x 4 x i32> @llvm.aarch64.sve.bcax.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
312declare <vscale x 2 x i64> @llvm.aarch64.sve.bcax.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
313declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
314declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
315declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
316declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
317declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl1n.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
318declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl1n.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
319declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl1n.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
320declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl1n.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
321declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl2n.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
322declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl2n.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
323declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl2n.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
324declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl2n.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
325declare <vscale x 16 x i8> @llvm.aarch64.sve.nbsl.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>)
326declare <vscale x 8 x i16> @llvm.aarch64.sve.nbsl.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>)
327declare <vscale x 4 x i32> @llvm.aarch64.sve.nbsl.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>)
328declare <vscale x 2 x i64> @llvm.aarch64.sve.nbsl.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>)
329declare <vscale x 16 x i8> @llvm.aarch64.sve.xar.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
330declare <vscale x 8 x i16> @llvm.aarch64.sve.xar.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
331declare <vscale x 4 x i32> @llvm.aarch64.sve.xar.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
332declare <vscale x 2 x i64> @llvm.aarch64.sve.xar.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
333