1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
2
3;
4; CADD
5;
6
7define <vscale x 16 x i8> @cadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
8; CHECK-LABEL: cadd_b:
9; CHECK: cadd z0.b, z0.b, z1.b, #90
10; CHECK-NEXT: ret
11  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8> %a,
12                                                                  <vscale x 16 x i8> %b,
13                                                                  i32 90)
14  ret <vscale x 16 x i8> %out
15}
16
17define <vscale x 8 x i16> @cadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
18; CHECK-LABEL: cadd_h:
19; CHECK: cadd z0.h, z0.h, z1.h, #90
20; CHECK-NEXT: ret
21  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16> %a,
22                                                                  <vscale x 8 x i16> %b,
23                                                                  i32 90)
24  ret <vscale x 8 x i16> %out
25}
26
27define <vscale x 4 x i32> @cadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
28; CHECK-LABEL: cadd_s:
29; CHECK: cadd z0.s, z0.s, z1.s, #270
30; CHECK-NEXT: ret
31  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32> %a,
32                                                                  <vscale x 4 x i32> %b,
33                                                                  i32 270)
34  ret <vscale x 4 x i32> %out
35}
36
37define <vscale x 2 x i64> @cadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
38; CHECK-LABEL: cadd_d:
39; CHECK: cadd z0.d, z0.d, z1.d, #270
40; CHECK-NEXT: ret
41  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64> %a,
42                                                                  <vscale x 2 x i64> %b,
43                                                                  i32 270)
44  ret <vscale x 2 x i64> %out
45}
46
47;
48; SQCADD
49;
50
51define <vscale x 16 x i8> @sqcadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
52; CHECK-LABEL: sqcadd_b:
53; CHECK: sqcadd z0.b, z0.b, z1.b, #90
54; CHECK-NEXT: ret
55  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8> %a,
56                                                                    <vscale x 16 x i8> %b,
57                                                                    i32 90)
58  ret <vscale x 16 x i8> %out
59}
60
61define <vscale x 8 x i16> @sqcadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
62; CHECK-LABEL: sqcadd_h:
63; CHECK: sqcadd z0.h, z0.h, z1.h, #90
64; CHECK-NEXT: ret
65  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16> %a,
66                                                                    <vscale x 8 x i16> %b,
67                                                                    i32 90)
68  ret <vscale x 8 x i16> %out
69}
70
71define <vscale x 4 x i32> @sqcadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
72; CHECK-LABEL: sqcadd_s:
73; CHECK: sqcadd z0.s, z0.s, z1.s, #270
74; CHECK-NEXT: ret
75  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32> %a,
76                                                                    <vscale x 4 x i32> %b,
77                                                                    i32 270)
78  ret <vscale x 4 x i32> %out
79}
80
81define <vscale x 2 x i64> @sqcadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
82; CHECK-LABEL: sqcadd_d:
83; CHECK: sqcadd z0.d, z0.d, z1.d, #270
84; CHECK-NEXT: ret
85  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64> %a,
86                                                                    <vscale x 2 x i64> %b,
87                                                                    i32 270)
88  ret <vscale x 2 x i64> %out
89}
90
91;
92; CMLA
93;
94
95define <vscale x 16 x i8> @cmla_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
96; CHECK-LABEL: cmla_b:
97; CHECK: cmla z0.b, z1.b, z2.b, #90
98; CHECK-NEXT: ret
99  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8> %a,
100                                                                  <vscale x 16 x i8> %b,
101                                                                  <vscale x 16 x i8> %c,
102                                                                  i32 90)
103  ret <vscale x 16 x i8> %out
104}
105
106define <vscale x 8 x i16> @cmla_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
107; CHECK-LABEL: cmla_h:
108; CHECK: cmla z0.h, z1.h, z2.h, #180
109; CHECK-NEXT: ret
110  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16> %a,
111                                                                  <vscale x 8 x i16> %b,
112                                                                  <vscale x 8 x i16> %c,
113                                                                  i32 180)
114  ret <vscale x 8 x i16> %out
115}
116
117define <vscale x 4 x i32> @cmla_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
118; CHECK-LABEL: cmla_s:
119; CHECK: cmla z0.s, z1.s, z2.s, #270
120; CHECK-NEXT: ret
121  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32> %a,
122                                                                  <vscale x 4 x i32> %b,
123                                                                  <vscale x 4 x i32> %c,
124                                                                  i32 270)
125  ret <vscale x 4 x i32> %out
126}
127
128define <vscale x 2 x i64> @cmla_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
129; CHECK-LABEL: cmla_d:
130; CHECK: cmla z0.d, z1.d, z2.d, #0
131; CHECK-NEXT: ret
132  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64> %a,
133                                                                  <vscale x 2 x i64> %b,
134                                                                  <vscale x 2 x i64> %c,
135                                                                  i32 0)
136  ret <vscale x 2 x i64> %out
137}
138
139;
140; CMLA_LANE
141;
142
143define <vscale x 8 x i16> @cmla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
144; CHECK-LABEL: cmla_lane_h:
145; CHECK: cmla z0.h, z1.h, z2.h[1], #180
146; CHECK-NEXT: ret
147  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16> %a,
148                                                                       <vscale x 8 x i16> %b,
149                                                                       <vscale x 8 x i16> %c,
150                                                                       i32 1,
151                                                                       i32 180)
152  ret <vscale x 8 x i16> %out
153}
154
155define <vscale x 4 x i32> @cmla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
156; CHECK-LABEL: cmla_lane_s:
157; CHECK: cmla z0.s, z1.s, z2.s[0], #270
158; CHECK-NEXT: ret
159  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32> %a,
160                                                                       <vscale x 4 x i32> %b,
161                                                                       <vscale x 4 x i32> %c,
162                                                                       i32 0,
163                                                                       i32 270)
164  ret <vscale x 4 x i32> %out
165}
166
167;
168; QRDCMLAH
169;
170
171define <vscale x 16 x i8> @sqrdcmlah_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
172; CHECK-LABEL: sqrdcmlah_b:
173; CHECK: sqrdcmlah z0.b, z1.b, z2.b, #0
174; CHECK-NEXT: ret
175  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8> %a,
176                                                                       <vscale x 16 x i8> %b,
177                                                                       <vscale x 16 x i8> %c,
178                                                                       i32 0)
179  ret <vscale x 16 x i8> %out
180}
181
182define <vscale x 8 x i16> @sqrdcmlah_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
183; CHECK-LABEL: sqrdcmlah_h:
184; CHECK: sqrdcmlah z0.h, z1.h, z2.h, #90
185; CHECK-NEXT: ret
186  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16> %a,
187                                                                       <vscale x 8 x i16> %b,
188                                                                       <vscale x 8 x i16> %c,
189                                                                       i32 90)
190  ret <vscale x 8 x i16> %out
191}
192
193define <vscale x 4 x i32> @sqrdcmlah_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
194; CHECK-LABEL: sqrdcmlah_s:
195; CHECK: sqrdcmlah z0.s, z1.s, z2.s, #180
196; CHECK-NEXT: ret
197  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32> %a,
198                                                                       <vscale x 4 x i32> %b,
199                                                                       <vscale x 4 x i32> %c,
200                                                                       i32 180)
201  ret <vscale x 4 x i32> %out
202}
203
204define <vscale x 2 x i64> @sqrdcmlah_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
205; CHECK-LABEL: sqrdcmlah_d:
206; CHECK: sqrdcmlah z0.d, z1.d, z2.d, #270
207; CHECK-NEXT: ret
208  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64> %a,
209                                                                       <vscale x 2 x i64> %b,
210                                                                       <vscale x 2 x i64> %c,
211                                                                       i32 270)
212  ret <vscale x 2 x i64> %out
213}
214
215;
216; QRDCMLAH_LANE
217;
218
219define <vscale x 8 x i16> @sqrdcmlah_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
220; CHECK-LABEL: sqrdcmlah_lane_h:
221; CHECK: sqrdcmlah z0.h, z1.h, z2.h[1], #90
222; CHECK-NEXT: ret
223  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16> %a,
224                                                                            <vscale x 8 x i16> %b,
225                                                                            <vscale x 8 x i16> %c,
226                                                                            i32 1,
227                                                                            i32 90)
228  ret <vscale x 8 x i16> %out
229}
230
231define <vscale x 4 x i32> @sqrdcmlah_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
232; CHECK-LABEL: sqrdcmlah_lane_s:
233; CHECK: sqrdcmlah z0.s, z1.s, z2.s[0], #180
234; CHECK-NEXT: ret
235  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32> %a,
236                                                                            <vscale x 4 x i32> %b,
237                                                                            <vscale x 4 x i32> %c,
238                                                                            i32 0,
239                                                                            i32 180)
240  ret <vscale x 4 x i32> %out
241}
242
243declare <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
244declare <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
245declare <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
246declare <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
247
248declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
249declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
250declare <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
251declare <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
252
253declare <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
254declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
255declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
256declare <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
257
258declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32)
259declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32)
260
261declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
262declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
263declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
264declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
265
266declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32)
267declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32)
268