1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
2
3;
4; SABALB
5;
6
7define <vscale x 8 x i16> @sabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
8; CHECK-LABEL: sabalb_b:
9; CHECK: sabalb z0.h, z1.b, z2.b
10; CHECK-NEXT: ret
11  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalb.nxv8i16(<vscale x 8 x i16> %a,
12                                                                  <vscale x 16 x i8> %b,
13                                                                  <vscale x 16 x i8> %c)
14  ret <vscale x 8 x i16> %out
15}
16
17define <vscale x 4 x i32> @sabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
18; CHECK-LABEL: sabalb_h:
19; CHECK: sabalb z0.s, z1.h, z2.h
20; CHECK-NEXT: ret
21  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalb.nxv4i32(<vscale x 4 x i32> %a,
22                                                                  <vscale x 8 x i16> %b,
23                                                                  <vscale x 8 x i16> %c)
24  ret <vscale x 4 x i32> %out
25}
26
27define <vscale x 2 x i64> @sabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
28; CHECK-LABEL: sabalb_s:
29; CHECK: sabalb z0.d, z1.s, z2.s
30; CHECK-NEXT: ret
31  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalb.nxv2i64(<vscale x 2 x i64> %a,
32                                                                  <vscale x 4 x i32> %b,
33                                                                  <vscale x 4 x i32> %c)
34  ret <vscale x 2 x i64> %out
35}
36
37;
38; SABALT
39;
40
41define <vscale x 8 x i16> @sabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
42; CHECK-LABEL: sabalt_b:
43; CHECK: sabalt z0.h, z1.b, z2.b
44; CHECK-NEXT: ret
45  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalt.nxv8i16(<vscale x 8 x i16> %a,
46                                                                  <vscale x 16 x i8> %b,
47                                                                  <vscale x 16 x i8> %c)
48  ret <vscale x 8 x i16> %out
49}
50
51define <vscale x 4 x i32> @sabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
52; CHECK-LABEL: sabalt_h:
53; CHECK: sabalt z0.s, z1.h, z2.h
54; CHECK-NEXT: ret
55  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalt.nxv4i32(<vscale x 4 x i32> %a,
56                                                                  <vscale x 8 x i16> %b,
57                                                                  <vscale x 8 x i16> %c)
58  ret <vscale x 4 x i32> %out
59}
60
61define <vscale x 2 x i64> @sabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
62; CHECK-LABEL: sabalt_s:
63; CHECK: sabalt z0.d, z1.s, z2.s
64; CHECK-NEXT: ret
65  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalt.nxv2i64(<vscale x 2 x i64> %a,
66                                                                  <vscale x 4 x i32> %b,
67                                                                  <vscale x 4 x i32> %c)
68  ret <vscale x 2 x i64> %out
69}
70
71;
72; SABDLB
73;
74
75define <vscale x 8 x i16> @sabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
76; CHECK-LABEL: sabdlb_b:
77; CHECK: sabdlb z0.h, z0.b, z1.b
78; CHECK-NEXT: ret
79  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlb.nxv8i16(<vscale x 16 x i8> %a,
80                                                                  <vscale x 16 x i8> %b)
81  ret <vscale x 8 x i16> %out
82}
83
84define <vscale x 4 x i32> @sabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
85; CHECK-LABEL: sabdlb_h:
86; CHECK: sabdlb z0.s, z0.h, z1.h
87; CHECK-NEXT: ret
88  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlb.nxv4i32(<vscale x 8 x i16> %a,
89                                                                  <vscale x 8 x i16> %b)
90  ret <vscale x 4 x i32> %out
91}
92
93define <vscale x 2 x i64> @sabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
94; CHECK-LABEL: sabdlb_s:
95; CHECK: sabdlb z0.d, z0.s, z1.s
96; CHECK-NEXT: ret
97  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlb.nxv2i64(<vscale x 4 x i32> %a,
98                                                                  <vscale x 4 x i32> %b)
99  ret <vscale x 2 x i64> %out
100}
101
102;
103; SABDLT
104;
105
106define <vscale x 8 x i16> @sabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
107; CHECK-LABEL: sabdlt_b:
108; CHECK: sabdlt z0.h, z0.b, z1.b
109; CHECK-NEXT: ret
110  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlt.nxv8i16(<vscale x 16 x i8> %a,
111                                                                  <vscale x 16 x i8> %b)
112  ret <vscale x 8 x i16> %out
113}
114
115define <vscale x 4 x i32> @sabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
116; CHECK-LABEL: sabdlt_h:
117; CHECK: sabdlt z0.s, z0.h, z1.h
118; CHECK-NEXT: ret
119  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlt.nxv4i32(<vscale x 8 x i16> %a,
120                                                                  <vscale x 8 x i16> %b)
121  ret <vscale x 4 x i32> %out
122}
123
124define <vscale x 2 x i64> @sabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
125; CHECK-LABEL: sabdlt_s:
126; CHECK: sabdlt z0.d, z0.s, z1.s
127; CHECK-NEXT: ret
128  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlt.nxv2i64(<vscale x 4 x i32> %a,
129                                                                  <vscale x 4 x i32> %b)
130  ret <vscale x 2 x i64> %out
131}
132
133;
134; SADDLB
135;
136
137define <vscale x 8 x i16> @saddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
138; CHECK-LABEL: saddlb_b:
139; CHECK: saddlb z0.h, z0.b, z1.b
140; CHECK-NEXT: ret
141  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlb.nxv8i16(<vscale x 16 x i8> %a,
142                                                                  <vscale x 16 x i8> %b)
143  ret <vscale x 8 x i16> %out
144}
145
146define <vscale x 4 x i32> @saddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
147; CHECK-LABEL: saddlb_h:
148; CHECK: saddlb z0.s, z0.h, z1.h
149; CHECK-NEXT: ret
150  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlb.nxv4i32(<vscale x 8 x i16> %a,
151                                                                  <vscale x 8 x i16> %b)
152  ret <vscale x 4 x i32> %out
153}
154
155define <vscale x 2 x i64> @saddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
156; CHECK-LABEL: saddlb_s:
157; CHECK: saddlb z0.d, z0.s, z1.s
158; CHECK-NEXT: ret
159  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlb.nxv2i64(<vscale x 4 x i32> %a,
160                                                                  <vscale x 4 x i32> %b)
161  ret <vscale x 2 x i64> %out
162}
163
164;
165; SADDLT
166;
167
168define <vscale x 8 x i16> @saddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
169; CHECK-LABEL: saddlt_b:
170; CHECK: saddlt z0.h, z0.b, z1.b
171; CHECK-NEXT: ret
172  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlt.nxv8i16(<vscale x 16 x i8> %a,
173                                                                  <vscale x 16 x i8> %b)
174  ret <vscale x 8 x i16> %out
175}
176
177define <vscale x 4 x i32> @saddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
178; CHECK-LABEL: saddlt_h:
179; CHECK: saddlt z0.s, z0.h, z1.h
180; CHECK-NEXT: ret
181  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlt.nxv4i32(<vscale x 8 x i16> %a,
182                                                                  <vscale x 8 x i16> %b)
183  ret <vscale x 4 x i32> %out
184}
185
186define <vscale x 2 x i64> @saddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
187; CHECK-LABEL: saddlt_s:
188; CHECK: saddlt z0.d, z0.s, z1.s
189; CHECK-NEXT: ret
190  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlt.nxv2i64(<vscale x 4 x i32> %a,
191                                                                  <vscale x 4 x i32> %b)
192  ret <vscale x 2 x i64> %out
193}
194
195;
196; SADDWB
197;
198
199define <vscale x 8 x i16> @saddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
200; CHECK-LABEL: saddwb_b:
201; CHECK: saddwb z0.h, z0.h, z1.b
202; CHECK-NEXT: ret
203  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwb.nxv8i16(<vscale x 8 x i16> %a,
204                                                                  <vscale x 16 x i8> %b)
205  ret <vscale x 8 x i16> %out
206}
207
208define <vscale x 4 x i32> @saddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
209; CHECK-LABEL: saddwb_h:
210; CHECK: saddwb z0.s, z0.s, z1.h
211; CHECK-NEXT: ret
212  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwb.nxv4i32(<vscale x 4 x i32> %a,
213                                                                  <vscale x 8 x i16> %b)
214  ret <vscale x 4 x i32> %out
215}
216
217define <vscale x 2 x i64> @saddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
218; CHECK-LABEL: saddwb_s:
219; CHECK: saddwb z0.d, z0.d, z1.s
220; CHECK-NEXT: ret
221  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwb.nxv2i64(<vscale x 2 x i64> %a,
222                                                                  <vscale x 4 x i32> %b)
223  ret <vscale x 2 x i64> %out
224}
225
226;
227; SADDWT
228;
229
230define <vscale x 8 x i16> @saddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
231; CHECK-LABEL: saddwt_b:
232; CHECK: saddwt z0.h, z0.h, z1.b
233; CHECK-NEXT: ret
234  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwt.nxv8i16(<vscale x 8 x i16> %a,
235                                                                  <vscale x 16 x i8> %b)
236  ret <vscale x 8 x i16> %out
237}
238
239define <vscale x 4 x i32> @saddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
240; CHECK-LABEL: saddwt_h:
241; CHECK: saddwt z0.s, z0.s, z1.h
242; CHECK-NEXT: ret
243  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwt.nxv4i32(<vscale x 4 x i32> %a,
244                                                                  <vscale x 8 x i16> %b)
245  ret <vscale x 4 x i32> %out
246}
247
248define <vscale x 2 x i64> @saddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
249; CHECK-LABEL: saddwt_s:
250; CHECK: saddwt z0.d, z0.d, z1.s
251; CHECK-NEXT: ret
252  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwt.nxv2i64(<vscale x 2 x i64> %a,
253                                                                  <vscale x 4 x i32> %b)
254  ret <vscale x 2 x i64> %out
255}
256
257
258;
259; SMULLB (Vectors)
260;
261
262define <vscale x 8 x i16> @smullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
263; CHECK-LABEL: smullb_b:
264; CHECK: smullb z0.h, z0.b, z1.b
265; CHECK-NEXT: ret
266  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullb.nxv8i16(<vscale x 16 x i8> %a,
267                                                                  <vscale x 16 x i8> %b)
268  ret <vscale x 8 x i16> %out
269}
270
271define <vscale x 4 x i32> @smullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
272; CHECK-LABEL: smullb_h:
273; CHECK: smullb z0.s, z0.h, z1.h
274; CHECK-NEXT: ret
275  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.nxv4i32(<vscale x 8 x i16> %a,
276                                                                  <vscale x 8 x i16> %b)
277  ret <vscale x 4 x i32> %out
278}
279
280define <vscale x 2 x i64> @smullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
281; CHECK-LABEL: smullb_s:
282; CHECK: smullb z0.d, z0.s, z1.s
283; CHECK-NEXT: ret
284  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.nxv2i64(<vscale x 4 x i32> %a,
285                                                                  <vscale x 4 x i32> %b)
286  ret <vscale x 2 x i64> %out
287}
288
289;
290; SMULLB (Indexed)
291;
292
293define <vscale x 4 x i32> @smullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
294; CHECK-LABEL: smullb_lane_h:
295; CHECK: smullb z0.s, z0.h, z1.h[4]
296; CHECK-NEXT: ret
297  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.lane.nxv4i32(<vscale x 8 x i16> %a,
298                                                                       <vscale x 8 x i16> %b,
299                                                                       i32 4)
300  ret <vscale x 4 x i32> %out
301}
302
303define <vscale x 2 x i64> @smullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
304; CHECK-LABEL: smullb_lane_s:
305; CHECK: smullb z0.d, z0.s, z1.s[3]
306; CHECK-NEXT: ret
307  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.lane.nxv2i64(<vscale x 4 x i32> %a,
308                                                                       <vscale x 4 x i32> %b,
309                                                                       i32 3)
310  ret <vscale x 2 x i64> %out
311}
312
313;
314; SMULLT (Vectors)
315;
316
317define <vscale x 8 x i16> @smullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
318; CHECK-LABEL: smullt_b:
319; CHECK: smullt z0.h, z0.b, z1.b
320; CHECK-NEXT: ret
321  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullt.nxv8i16(<vscale x 16 x i8> %a,
322                                                                  <vscale x 16 x i8> %b)
323  ret <vscale x 8 x i16> %out
324}
325
326define <vscale x 4 x i32> @smullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
327; CHECK-LABEL: smullt_h:
328; CHECK: smullt z0.s, z0.h, z1.h
329; CHECK-NEXT: ret
330  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.nxv4i32(<vscale x 8 x i16> %a,
331                                                                  <vscale x 8 x i16> %b)
332  ret <vscale x 4 x i32> %out
333}
334
335define <vscale x 2 x i64> @smullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
336; CHECK-LABEL: smullt_s:
337; CHECK: smullt z0.d, z0.s, z1.s
338; CHECK-NEXT: ret
339  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.nxv2i64(<vscale x 4 x i32> %a,
340                                                                  <vscale x 4 x i32> %b)
341  ret <vscale x 2 x i64> %out
342}
343
344;
345; SMULLT (Indexed)
346;
347
348define <vscale x 4 x i32> @smullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
349; CHECK-LABEL: smullt_lane_h:
350; CHECK: smullt z0.s, z0.h, z1.h[5]
351; CHECK-NEXT: ret
352  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.lane.nxv4i32(<vscale x 8 x i16> %a,
353                                                                       <vscale x 8 x i16> %b,
354                                                                       i32 5)
355  ret <vscale x 4 x i32> %out
356}
357
358define <vscale x 2 x i64> @smullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
359; CHECK-LABEL: smullt_lane_s:
360; CHECK: smullt z0.d, z0.s, z1.s[2]
361; CHECK-NEXT: ret
362  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.lane.nxv2i64(<vscale x 4 x i32> %a,
363                                                                       <vscale x 4 x i32> %b,
364                                                                       i32 2)
365  ret <vscale x 2 x i64> %out
366}
367
368;
369; SQDMULLB (Vectors)
370;
371
372define <vscale x 8 x i16> @sqdmullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
373; CHECK-LABEL: sqdmullb_b:
374; CHECK: sqdmullb z0.h, z0.b, z1.b
375; CHECK-NEXT: ret
376  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullb.nxv8i16(<vscale x 16 x i8> %a,
377                                                                    <vscale x 16 x i8> %b)
378  ret <vscale x 8 x i16> %out
379}
380
381define <vscale x 4 x i32> @sqdmullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
382; CHECK-LABEL: sqdmullb_h:
383; CHECK: sqdmullb z0.s, z0.h, z1.h
384; CHECK-NEXT: ret
385  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.nxv4i32(<vscale x 8 x i16> %a,
386                                                                    <vscale x 8 x i16> %b)
387  ret <vscale x 4 x i32> %out
388}
389
390define <vscale x 2 x i64> @sqdmullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
391; CHECK-LABEL: sqdmullb_s:
392; CHECK: sqdmullb z0.d, z0.s, z1.s
393; CHECK-NEXT: ret
394  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.nxv2i64(<vscale x 4 x i32> %a,
395                                                                    <vscale x 4 x i32> %b)
396  ret <vscale x 2 x i64> %out
397}
398
399;
400; SQDMULLB (Indexed)
401;
402
403define <vscale x 4 x i32> @sqdmullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
404; CHECK-LABEL: sqdmullb_lane_h:
405; CHECK: sqdmullb z0.s, z0.h, z1.h[2]
406; CHECK-NEXT: ret
407  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(<vscale x 8 x i16> %a,
408                                                                         <vscale x 8 x i16> %b,
409                                                                         i32 2)
410  ret <vscale x 4 x i32> %out
411}
412
413define <vscale x 2 x i64> @sqdmullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
414; CHECK-LABEL: sqdmullb_lane_s:
415; CHECK: sqdmullb z0.d, z0.s, z1.s[1]
416; CHECK-NEXT: ret
417  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(<vscale x 4 x i32> %a,
418                                                                         <vscale x 4 x i32> %b,
419                                                                         i32 1)
420  ret <vscale x 2 x i64> %out
421}
422
423;
424; SQDMULLT (Vectors)
425;
426
427define <vscale x 8 x i16> @sqdmullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
428; CHECK-LABEL: sqdmullt_b:
429; CHECK: sqdmullt z0.h, z0.b, z1.b
430; CHECK-NEXT: ret
431  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullt.nxv8i16(<vscale x 16 x i8> %a,
432                                                                    <vscale x 16 x i8> %b)
433  ret <vscale x 8 x i16> %out
434}
435
436define <vscale x 4 x i32> @sqdmullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
437; CHECK-LABEL: sqdmullt_h:
438; CHECK: sqdmullt z0.s, z0.h, z1.h
439; CHECK-NEXT: ret
440  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.nxv4i32(<vscale x 8 x i16> %a,
441                                                                    <vscale x 8 x i16> %b)
442  ret <vscale x 4 x i32> %out
443}
444
445define <vscale x 2 x i64> @sqdmullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
446; CHECK-LABEL: sqdmullt_s:
447; CHECK: sqdmullt z0.d, z0.s, z1.s
448; CHECK-NEXT: ret
449  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.nxv2i64(<vscale x 4 x i32> %a,
450                                                                    <vscale x 4 x i32> %b)
451  ret <vscale x 2 x i64> %out
452}
453
454;
455; SQDMULLT (Indexed)
456;
457
458define <vscale x 4 x i32> @sqdmullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
459; CHECK-LABEL: sqdmullt_lane_h:
460; CHECK: sqdmullt z0.s, z0.h, z1.h[3]
461; CHECK-NEXT: ret
462  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(<vscale x 8 x i16> %a,
463                                                                         <vscale x 8 x i16> %b,
464                                                                         i32 3)
465  ret <vscale x 4 x i32> %out
466}
467
468define <vscale x 2 x i64> @sqdmullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
469; CHECK-LABEL: sqdmullt_lane_s:
470; CHECK: sqdmullt z0.d, z0.s, z1.s[0]
471; CHECK-NEXT: ret
472  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(<vscale x 4 x i32> %a,
473                                                                         <vscale x 4 x i32> %b,
474                                                                         i32 0)
475  ret <vscale x 2 x i64> %out
476}
477
478;
479; SSUBLB
480;
481
482define <vscale x 8 x i16> @ssublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
483; CHECK-LABEL: ssublb_b:
484; CHECK: ssublb z0.h, z0.b, z1.b
485; CHECK-NEXT: ret
486  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublb.nxv8i16(<vscale x 16 x i8> %a,
487                                                                  <vscale x 16 x i8> %b)
488  ret <vscale x 8 x i16> %out
489}
490
491define <vscale x 4 x i32> @ssublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
492; CHECK-LABEL: ssublb_h:
493; CHECK: ssublb z0.s, z0.h, z1.h
494; CHECK-NEXT: ret
495  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublb.nxv4i32(<vscale x 8 x i16> %a,
496                                                                  <vscale x 8 x i16> %b)
497  ret <vscale x 4 x i32> %out
498}
499
500define <vscale x 2 x i64> @ssublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
501; CHECK-LABEL: ssublb_s:
502; CHECK: ssublb z0.d, z0.s, z1.s
503; CHECK-NEXT: ret
504  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublb.nxv2i64(<vscale x 4 x i32> %a,
505                                                                  <vscale x 4 x i32> %b)
506  ret <vscale x 2 x i64> %out
507}
508
509;
510; SSHLLB
511;
512
513define <vscale x 8 x i16> @sshllb_b(<vscale x 16 x i8> %a) {
514; CHECK-LABEL: sshllb_b:
515; CHECK: sshllb z0.h, z0.b, #0
516; CHECK-NEXT: ret
517  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllb.nxv8i16(<vscale x 16 x i8> %a, i32 0)
518  ret <vscale x 8 x i16> %out
519}
520
521define <vscale x 4 x i32> @sshllb_h(<vscale x 8 x i16> %a) {
522; CHECK-LABEL: sshllb_h:
523; CHECK: sshllb z0.s, z0.h, #1
524; CHECK-NEXT: ret
525  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllb.nxv4i32(<vscale x 8 x i16> %a, i32 1)
526  ret <vscale x 4 x i32> %out
527}
528
529define <vscale x 2 x i64> @sshllb_s(<vscale x 4 x i32> %a) {
530; CHECK-LABEL: sshllb_s:
531; CHECK: sshllb z0.d, z0.s, #2
532; CHECK-NEXT: ret
533  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllb.nxv2i64(<vscale x 4 x i32> %a, i32 2)
534  ret <vscale x 2 x i64> %out
535}
536
537;
538; SSHLLT
539;
540
541define <vscale x 8 x i16> @sshllt_b(<vscale x 16 x i8> %a) {
542; CHECK-LABEL: sshllt_b:
543; CHECK: sshllt z0.h, z0.b, #3
544; CHECK-NEXT: ret
545  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllt.nxv8i16(<vscale x 16 x i8> %a, i32 3)
546  ret <vscale x 8 x i16> %out
547}
548
549define <vscale x 4 x i32> @sshllt_h(<vscale x 8 x i16> %a) {
550; CHECK-LABEL: sshllt_h:
551; CHECK: sshllt z0.s, z0.h, #4
552; CHECK-NEXT: ret
553  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllt.nxv4i32(<vscale x 8 x i16> %a, i32 4)
554  ret <vscale x 4 x i32> %out
555}
556
557define <vscale x 2 x i64> @sshllt_s(<vscale x 4 x i32> %a) {
558; CHECK-LABEL: sshllt_s:
559; CHECK: sshllt z0.d, z0.s, #5
560; CHECK-NEXT: ret
561  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllt.nxv2i64(<vscale x 4 x i32> %a, i32 5)
562  ret <vscale x 2 x i64> %out
563}
564
565;
566; SSUBLT
567;
568
569define <vscale x 8 x i16> @ssublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
570; CHECK-LABEL: ssublt_b:
571; CHECK: ssublt z0.h, z0.b, z1.b
572; CHECK-NEXT: ret
573  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublt.nxv8i16(<vscale x 16 x i8> %a,
574                                                                  <vscale x 16 x i8> %b)
575  ret <vscale x 8 x i16> %out
576}
577
578define <vscale x 4 x i32> @ssublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
579; CHECK-LABEL: ssublt_h:
580; CHECK: ssublt z0.s, z0.h, z1.h
581; CHECK-NEXT: ret
582  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublt.nxv4i32(<vscale x 8 x i16> %a,
583                                                                  <vscale x 8 x i16> %b)
584  ret <vscale x 4 x i32> %out
585}
586
587define <vscale x 2 x i64> @ssublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
588; CHECK-LABEL: ssublt_s:
589; CHECK: ssublt z0.d, z0.s, z1.s
590; CHECK-NEXT: ret
591  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublt.nxv2i64(<vscale x 4 x i32> %a,
592                                                                  <vscale x 4 x i32> %b)
593  ret <vscale x 2 x i64> %out
594}
595
596;
597; SSUBWB
598;
599
600define <vscale x 8 x i16> @ssubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
601; CHECK-LABEL: ssubwb_b:
602; CHECK: ssubwb z0.h, z0.h, z1.b
603; CHECK-NEXT: ret
604  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwb.nxv8i16(<vscale x 8 x i16> %a,
605                                                                  <vscale x 16 x i8> %b)
606  ret <vscale x 8 x i16> %out
607}
608
609define <vscale x 4 x i32> @ssubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
610; CHECK-LABEL: ssubwb_h:
611; CHECK: ssubwb z0.s, z0.s, z1.h
612; CHECK-NEXT: ret
613  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwb.nxv4i32(<vscale x 4 x i32> %a,
614                                                                  <vscale x 8 x i16> %b)
615  ret <vscale x 4 x i32> %out
616}
617
618define <vscale x 2 x i64> @ssubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
619; CHECK-LABEL: ssubwb_s:
620; CHECK: ssubwb z0.d, z0.d, z1.s
621; CHECK-NEXT: ret
622  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwb.nxv2i64(<vscale x 2 x i64> %a,
623                                                                  <vscale x 4 x i32> %b)
624  ret <vscale x 2 x i64> %out
625}
626
627;
628; SSUBWT
629;
630
631define <vscale x 8 x i16> @ssubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
632; CHECK-LABEL: ssubwt_b:
633; CHECK: ssubwt z0.h, z0.h, z1.b
634; CHECK-NEXT: ret
635  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwt.nxv8i16(<vscale x 8 x i16> %a,
636                                                                  <vscale x 16 x i8> %b)
637  ret <vscale x 8 x i16> %out
638}
639
640define <vscale x 4 x i32> @ssubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
641; CHECK-LABEL: ssubwt_h:
642; CHECK: ssubwt z0.s, z0.s, z1.h
643; CHECK-NEXT: ret
644  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwt.nxv4i32(<vscale x 4 x i32> %a,
645                                                                  <vscale x 8 x i16> %b)
646  ret <vscale x 4 x i32> %out
647}
648
649define <vscale x 2 x i64> @ssubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
650; CHECK-LABEL: ssubwt_s:
651; CHECK: ssubwt z0.d, z0.d, z1.s
652; CHECK-NEXT: ret
653  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwt.nxv2i64(<vscale x 2 x i64> %a,
654                                                                  <vscale x 4 x i32> %b)
655  ret <vscale x 2 x i64> %out
656}
657
658;
659; UABALB
660;
661
662define <vscale x 8 x i16> @uabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
663; CHECK-LABEL: uabalb_b:
664; CHECK: uabalb z0.h, z1.b, z2.b
665; CHECK-NEXT: ret
666  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalb.nxv8i16(<vscale x 8 x i16> %a,
667                                                                  <vscale x 16 x i8> %b,
668                                                                  <vscale x 16 x i8> %c)
669  ret <vscale x 8 x i16> %out
670}
671
672define <vscale x 4 x i32> @uabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
673; CHECK-LABEL: uabalb_h:
674; CHECK: uabalb z0.s, z1.h, z2.h
675; CHECK-NEXT: ret
676  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalb.nxv4i32(<vscale x 4 x i32> %a,
677                                                                  <vscale x 8 x i16> %b,
678                                                                  <vscale x 8 x i16> %c)
679  ret <vscale x 4 x i32> %out
680}
681
682define <vscale x 2 x i64> @uabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
683; CHECK-LABEL: uabalb_s:
684; CHECK: uabalb z0.d, z1.s, z2.s
685; CHECK-NEXT: ret
686  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalb.nxv2i64(<vscale x 2 x i64> %a,
687                                                                  <vscale x 4 x i32> %b,
688                                                                  <vscale x 4 x i32> %c)
689  ret <vscale x 2 x i64> %out
690}
691
692;
693; UABALT
694;
695
696define <vscale x 8 x i16> @uabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
697; CHECK-LABEL: uabalt_b:
698; CHECK: uabalt z0.h, z1.b, z2.b
699; CHECK-NEXT: ret
700  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalt.nxv8i16(<vscale x 8 x i16> %a,
701                                                                  <vscale x 16 x i8> %b,
702                                                                  <vscale x 16 x i8> %c)
703  ret <vscale x 8 x i16> %out
704}
705
706define <vscale x 4 x i32> @uabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
707; CHECK-LABEL: uabalt_h:
708; CHECK: uabalt z0.s, z1.h, z2.h
709; CHECK-NEXT: ret
710  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalt.nxv4i32(<vscale x 4 x i32> %a,
711                                                                  <vscale x 8 x i16> %b,
712                                                                  <vscale x 8 x i16> %c)
713  ret <vscale x 4 x i32> %out
714}
715
716define <vscale x 2 x i64> @uabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
717; CHECK-LABEL: uabalt_s:
718; CHECK: uabalt z0.d, z1.s, z2.s
719; CHECK-NEXT: ret
720  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalt.nxv2i64(<vscale x 2 x i64> %a,
721                                                                  <vscale x 4 x i32> %b,
722                                                                  <vscale x 4 x i32> %c)
723  ret <vscale x 2 x i64> %out
724}
725
726;
727; UABDLB
728;
729
730define <vscale x 8 x i16> @uabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
731; CHECK-LABEL: uabdlb_b:
732; CHECK: uabdlb z0.h, z0.b, z1.b
733; CHECK-NEXT: ret
734  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlb.nxv8i16(<vscale x 16 x i8> %a,
735                                                                  <vscale x 16 x i8> %b)
736  ret <vscale x 8 x i16> %out
737}
738
739define <vscale x 4 x i32> @uabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
740; CHECK-LABEL: uabdlb_h:
741; CHECK: uabdlb z0.s, z0.h, z1.h
742; CHECK-NEXT: ret
743  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlb.nxv4i32(<vscale x 8 x i16> %a,
744                                                                  <vscale x 8 x i16> %b)
745  ret <vscale x 4 x i32> %out
746}
747
748define <vscale x 2 x i64> @uabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
749; CHECK-LABEL: uabdlb_s:
750; CHECK: uabdlb z0.d, z0.s, z1.s
751; CHECK-NEXT: ret
752  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlb.nxv2i64(<vscale x 4 x i32> %a,
753                                                                  <vscale x 4 x i32> %b)
754  ret <vscale x 2 x i64> %out
755}
756
757;
758; UABDLT
759;
760
761define <vscale x 8 x i16> @uabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
762; CHECK-LABEL: uabdlt_b:
763; CHECK: uabdlt z0.h, z0.b, z1.b
764; CHECK-NEXT: ret
765  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlt.nxv8i16(<vscale x 16 x i8> %a,
766                                                                  <vscale x 16 x i8> %b)
767  ret <vscale x 8 x i16> %out
768}
769
770define <vscale x 4 x i32> @uabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
771; CHECK-LABEL: uabdlt_h:
772; CHECK: uabdlt z0.s, z0.h, z1.h
773; CHECK-NEXT: ret
774  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlt.nxv4i32(<vscale x 8 x i16> %a,
775                                                                  <vscale x 8 x i16> %b)
776  ret <vscale x 4 x i32> %out
777}
778
779define <vscale x 2 x i64> @uabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
780; CHECK-LABEL: uabdlt_s:
781; CHECK: uabdlt z0.d, z0.s, z1.s
782; CHECK-NEXT: ret
783  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlt.nxv2i64(<vscale x 4 x i32> %a,
784                                                                  <vscale x 4 x i32> %b)
785  ret <vscale x 2 x i64> %out
786}
787
788;
789; UADDLB
790;
791
792define <vscale x 8 x i16> @uaddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
793; CHECK-LABEL: uaddlb_b:
794; CHECK: uaddlb z0.h, z0.b, z1.b
795; CHECK-NEXT: ret
796  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlb.nxv8i16(<vscale x 16 x i8> %a,
797                                                                  <vscale x 16 x i8> %b)
798  ret <vscale x 8 x i16> %out
799}
800
801define <vscale x 4 x i32> @uaddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
802; CHECK-LABEL: uaddlb_h:
803; CHECK: uaddlb z0.s, z0.h, z1.h
804; CHECK-NEXT: ret
805  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlb.nxv4i32(<vscale x 8 x i16> %a,
806                                                                  <vscale x 8 x i16> %b)
807  ret <vscale x 4 x i32> %out
808}
809
810define <vscale x 2 x i64> @uaddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
811; CHECK-LABEL: uaddlb_s:
812; CHECK: uaddlb z0.d, z0.s, z1.s
813; CHECK-NEXT: ret
814  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlb.nxv2i64(<vscale x 4 x i32> %a,
815                                                                  <vscale x 4 x i32> %b)
816  ret <vscale x 2 x i64> %out
817}
818
819;
820; UADDLT
821;
822
823define <vscale x 8 x i16> @uaddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
824; CHECK-LABEL: uaddlt_b:
825; CHECK: uaddlt z0.h, z0.b, z1.b
826; CHECK-NEXT: ret
827  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlt.nxv8i16(<vscale x 16 x i8> %a,
828                                                                  <vscale x 16 x i8> %b)
829  ret <vscale x 8 x i16> %out
830}
831
832define <vscale x 4 x i32> @uaddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
833; CHECK-LABEL: uaddlt_h:
834; CHECK: uaddlt z0.s, z0.h, z1.h
835; CHECK-NEXT: ret
836  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlt.nxv4i32(<vscale x 8 x i16> %a,
837                                                                  <vscale x 8 x i16> %b)
838  ret <vscale x 4 x i32> %out
839}
840
841define <vscale x 2 x i64> @uaddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
842; CHECK-LABEL: uaddlt_s:
843; CHECK: uaddlt z0.d, z0.s, z1.s
844; CHECK-NEXT: ret
845  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlt.nxv2i64(<vscale x 4 x i32> %a,
846                                                                  <vscale x 4 x i32> %b)
847  ret <vscale x 2 x i64> %out
848}
849
850;
851; UADDWB
852;
853
854define <vscale x 8 x i16> @uaddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
855; CHECK-LABEL: uaddwb_b:
856; CHECK: uaddwb z0.h, z0.h, z1.b
857; CHECK-NEXT: ret
858  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwb.nxv8i16(<vscale x 8 x i16> %a,
859                                                                  <vscale x 16 x i8> %b)
860  ret <vscale x 8 x i16> %out
861}
862
863define <vscale x 4 x i32> @uaddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
864; CHECK-LABEL: uaddwb_h:
865; CHECK: uaddwb z0.s, z0.s, z1.h
866; CHECK-NEXT: ret
867  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwb.nxv4i32(<vscale x 4 x i32> %a,
868                                                                  <vscale x 8 x i16> %b)
869  ret <vscale x 4 x i32> %out
870}
871
872define <vscale x 2 x i64> @uaddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
873; CHECK-LABEL: uaddwb_s:
874; CHECK: uaddwb z0.d, z0.d, z1.s
875; CHECK-NEXT: ret
876  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwb.nxv2i64(<vscale x 2 x i64> %a,
877                                                                  <vscale x 4 x i32> %b)
878  ret <vscale x 2 x i64> %out
879}
880
881;
882; UADDWT
883;
884
885define <vscale x 8 x i16> @uaddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
886; CHECK-LABEL: uaddwt_b:
887; CHECK: uaddwt z0.h, z0.h, z1.b
888; CHECK-NEXT: ret
889  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwt.nxv8i16(<vscale x 8 x i16> %a,
890                                                                  <vscale x 16 x i8> %b)
891  ret <vscale x 8 x i16> %out
892}
893
894define <vscale x 4 x i32> @uaddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
895; CHECK-LABEL: uaddwt_h:
896; CHECK: uaddwt z0.s, z0.s, z1.h
897; CHECK-NEXT: ret
898  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwt.nxv4i32(<vscale x 4 x i32> %a,
899                                                                  <vscale x 8 x i16> %b)
900  ret <vscale x 4 x i32> %out
901}
902
903define <vscale x 2 x i64> @uaddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
904; CHECK-LABEL: uaddwt_s:
905; CHECK: uaddwt z0.d, z0.d, z1.s
906; CHECK-NEXT: ret
907  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwt.nxv2i64(<vscale x 2 x i64> %a,
908                                                                  <vscale x 4 x i32> %b)
909  ret <vscale x 2 x i64> %out
910}
911
912;
913; UMULLB (Vectors)
914;
915
916define <vscale x 8 x i16> @umullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
917; CHECK-LABEL: umullb_b:
918; CHECK: umullb z0.h, z0.b, z1.b
919; CHECK-NEXT: ret
920  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullb.nxv8i16(<vscale x 16 x i8> %a,
921                                                                  <vscale x 16 x i8> %b)
922  ret <vscale x 8 x i16> %out
923}
924
925define <vscale x 4 x i32> @umullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
926; CHECK-LABEL: umullb_h:
927; CHECK: umullb z0.s, z0.h, z1.h
928; CHECK-NEXT: ret
929  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.nxv4i32(<vscale x 8 x i16> %a,
930                                                                  <vscale x 8 x i16> %b)
931  ret <vscale x 4 x i32> %out
932}
933
934define <vscale x 2 x i64> @umullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
935; CHECK-LABEL: umullb_s:
936; CHECK: umullb z0.d, z0.s, z1.s
937; CHECK-NEXT: ret
938  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.nxv2i64(<vscale x 4 x i32> %a,
939                                                                  <vscale x 4 x i32> %b)
940  ret <vscale x 2 x i64> %out
941}
942
943;
944; UMULLB (Indexed)
945;
946
947define <vscale x 4 x i32> @umullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
948; CHECK-LABEL: umullb_lane_h:
949; CHECK: umullb z0.s, z0.h, z1.h[0]
950; CHECK-NEXT: ret
951  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.lane.nxv4i32(<vscale x 8 x i16> %a,
952                                                                       <vscale x 8 x i16> %b,
953                                                                       i32 0)
954  ret <vscale x 4 x i32> %out
955}
956
957
958define <vscale x 2 x i64> @umullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
959; CHECK-LABEL: umullb_lane_s:
960; CHECK: umullb z0.d, z0.s, z1.s[3]
961; CHECK-NEXT: ret
962  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.lane.nxv2i64(<vscale x 4 x i32> %a,
963                                                                       <vscale x 4 x i32> %b,
964                                                                       i32 3)
965  ret <vscale x 2 x i64> %out
966}
967
968;
969; UMULLT (Vectors)
970;
971
972define <vscale x 8 x i16> @umullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
973; CHECK-LABEL: umullt_b:
974; CHECK: umullt z0.h, z0.b, z1.b
975; CHECK-NEXT: ret
976  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullt.nxv8i16(<vscale x 16 x i8> %a,
977                                                                  <vscale x 16 x i8> %b)
978  ret <vscale x 8 x i16> %out
979}
980
981define <vscale x 4 x i32> @umullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
982; CHECK-LABEL: umullt_h:
983; CHECK: umullt z0.s, z0.h, z1.h
984; CHECK-NEXT: ret
985  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.nxv4i32(<vscale x 8 x i16> %a,
986                                                                  <vscale x 8 x i16> %b)
987  ret <vscale x 4 x i32> %out
988}
989
990define <vscale x 2 x i64> @umullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
991; CHECK-LABEL: umullt_s:
992; CHECK: umullt z0.d, z0.s, z1.s
993; CHECK-NEXT: ret
994  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.nxv2i64(<vscale x 4 x i32> %a,
995                                                                  <vscale x 4 x i32> %b)
996  ret <vscale x 2 x i64> %out
997}
998
999;
1000; UMULLT (Indexed)
1001;
1002
1003define <vscale x 4 x i32> @umullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1004; CHECK-LABEL: umullt_lane_h:
1005; CHECK: umullt z0.s, z0.h, z1.h[1]
1006; CHECK-NEXT: ret
1007  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.lane.nxv4i32(<vscale x 8 x i16> %a,
1008                                                                       <vscale x 8 x i16> %b,
1009                                                                       i32 1)
1010  ret <vscale x 4 x i32> %out
1011}
1012
1013define <vscale x 2 x i64> @umullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1014; CHECK-LABEL: umullt_lane_s:
1015; CHECK: umullt z0.d, z0.s, z1.s[2]
1016; CHECK-NEXT: ret
1017  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.lane.nxv2i64(<vscale x 4 x i32> %a,
1018                                                                       <vscale x 4 x i32> %b,
1019                                                                       i32 2)
1020  ret <vscale x 2 x i64> %out
1021}
1022
1023;
1024; USHLLB
1025;
1026
1027define <vscale x 8 x i16> @ushllb_b(<vscale x 16 x i8> %a) {
1028; CHECK-LABEL: ushllb_b:
1029; CHECK: ushllb z0.h, z0.b, #6
1030; CHECK-NEXT: ret
1031  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllb.nxv8i16(<vscale x 16 x i8> %a, i32 6)
1032  ret <vscale x 8 x i16> %out
1033}
1034
1035define <vscale x 4 x i32> @ushllb_h(<vscale x 8 x i16> %a) {
1036; CHECK-LABEL: ushllb_h:
1037; CHECK: ushllb z0.s, z0.h, #7
1038; CHECK-NEXT: ret
1039  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllb.nxv4i32(<vscale x 8 x i16> %a, i32 7)
1040  ret <vscale x 4 x i32> %out
1041}
1042
1043define <vscale x 2 x i64> @ushllb_s(<vscale x 4 x i32> %a) {
1044; CHECK-LABEL: ushllb_s:
1045; CHECK: ushllb z0.d, z0.s, #8
1046; CHECK-NEXT: ret
1047  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllb.nxv2i64(<vscale x 4 x i32> %a, i32 8)
1048  ret <vscale x 2 x i64> %out
1049}
1050
1051;
1052; USHLLT
1053;
1054
1055define <vscale x 8 x i16> @ushllt_b(<vscale x 16 x i8> %a) {
1056; CHECK-LABEL: ushllt_b:
1057; CHECK: ushllt z0.h, z0.b, #7
1058; CHECK-NEXT: ret
1059  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllt.nxv8i16(<vscale x 16 x i8> %a, i32 7)
1060  ret <vscale x 8 x i16> %out
1061}
1062
1063define <vscale x 4 x i32> @ushllt_h(<vscale x 8 x i16> %a) {
1064; CHECK-LABEL: ushllt_h:
1065; CHECK: ushllt z0.s, z0.h, #15
1066; CHECK-NEXT: ret
1067  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllt.nxv4i32(<vscale x 8 x i16> %a, i32 15)
1068  ret <vscale x 4 x i32> %out
1069}
1070
1071define <vscale x 2 x i64> @ushllt_s(<vscale x 4 x i32> %a) {
1072; CHECK-LABEL: ushllt_s:
1073; CHECK: ushllt z0.d, z0.s, #31
1074; CHECK-NEXT: ret
1075  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllt.nxv2i64(<vscale x 4 x i32> %a, i32 31)
1076  ret <vscale x 2 x i64> %out
1077}
1078
1079;
1080; USUBLB
1081;
1082
1083define <vscale x 8 x i16> @usublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1084; CHECK-LABEL: usublb_b:
1085; CHECK: usublb z0.h, z0.b, z1.b
1086; CHECK-NEXT: ret
1087  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublb.nxv8i16(<vscale x 16 x i8> %a,
1088                                                                  <vscale x 16 x i8> %b)
1089  ret <vscale x 8 x i16> %out
1090}
1091
1092define <vscale x 4 x i32> @usublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1093; CHECK-LABEL: usublb_h:
1094; CHECK: usublb z0.s, z0.h, z1.h
1095; CHECK-NEXT: ret
1096  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublb.nxv4i32(<vscale x 8 x i16> %a,
1097                                                                  <vscale x 8 x i16> %b)
1098  ret <vscale x 4 x i32> %out
1099}
1100
1101define <vscale x 2 x i64> @usublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1102; CHECK-LABEL: usublb_s:
1103; CHECK: usublb z0.d, z0.s, z1.s
1104; CHECK-NEXT: ret
1105  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublb.nxv2i64(<vscale x 4 x i32> %a,
1106                                                                  <vscale x 4 x i32> %b)
1107  ret <vscale x 2 x i64> %out
1108}
1109
1110;
1111; USUBLT
1112;
1113
1114define <vscale x 8 x i16> @usublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
1115; CHECK-LABEL: usublt_b:
1116; CHECK: usublt z0.h, z0.b, z1.b
1117; CHECK-NEXT: ret
1118  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublt.nxv8i16(<vscale x 16 x i8> %a,
1119                                                                  <vscale x 16 x i8> %b)
1120  ret <vscale x 8 x i16> %out
1121}
1122
1123define <vscale x 4 x i32> @usublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
1124; CHECK-LABEL: usublt_h:
1125; CHECK: usublt z0.s, z0.h, z1.h
1126; CHECK-NEXT: ret
1127  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublt.nxv4i32(<vscale x 8 x i16> %a,
1128                                                                  <vscale x 8 x i16> %b)
1129  ret <vscale x 4 x i32> %out
1130}
1131
1132define <vscale x 2 x i64> @usublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
1133; CHECK-LABEL: usublt_s:
1134; CHECK: usublt z0.d, z0.s, z1.s
1135; CHECK-NEXT: ret
1136  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublt.nxv2i64(<vscale x 4 x i32> %a,
1137                                                                  <vscale x 4 x i32> %b)
1138  ret <vscale x 2 x i64> %out
1139}
1140
1141;
1142; USUBWB
1143;
1144
1145define <vscale x 8 x i16> @usubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
1146; CHECK-LABEL: usubwb_b:
1147; CHECK: usubwb z0.h, z0.h, z1.b
1148; CHECK-NEXT: ret
1149  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwb.nxv8i16(<vscale x 8 x i16> %a,
1150                                                                  <vscale x 16 x i8> %b)
1151  ret <vscale x 8 x i16> %out
1152}
1153
1154define <vscale x 4 x i32> @usubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
1155; CHECK-LABEL: usubwb_h:
1156; CHECK: usubwb z0.s, z0.s, z1.h
1157; CHECK-NEXT: ret
1158  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwb.nxv4i32(<vscale x 4 x i32> %a,
1159                                                                  <vscale x 8 x i16> %b)
1160  ret <vscale x 4 x i32> %out
1161}
1162
1163define <vscale x 2 x i64> @usubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
1164; CHECK-LABEL: usubwb_s:
1165; CHECK: usubwb z0.d, z0.d, z1.s
1166; CHECK-NEXT: ret
1167  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwb.nxv2i64(<vscale x 2 x i64> %a,
1168                                                                  <vscale x 4 x i32> %b)
1169  ret <vscale x 2 x i64> %out
1170}
1171
1172;
1173; USUBWT
1174;
1175
1176define <vscale x 8 x i16> @usubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
1177; CHECK-LABEL: usubwt_b:
1178; CHECK: usubwt z0.h, z0.h, z1.b
1179; CHECK-NEXT: ret
1180  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwt.nxv8i16(<vscale x 8 x i16> %a,
1181                                                                  <vscale x 16 x i8> %b)
1182  ret <vscale x 8 x i16> %out
1183}
1184
1185define <vscale x 4 x i32> @usubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
1186; CHECK-LABEL: usubwt_h:
1187; CHECK: usubwt z0.s, z0.s, z1.h
1188; CHECK-NEXT: ret
1189  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwt.nxv4i32(<vscale x 4 x i32> %a,
1190                                                                  <vscale x 8 x i16> %b)
1191  ret <vscale x 4 x i32> %out
1192}
1193
1194define <vscale x 2 x i64> @usubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
1195; CHECK-LABEL: usubwt_s:
1196; CHECK: usubwt z0.d, z0.d, z1.s
1197; CHECK-NEXT: ret
1198  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwt.nxv2i64(<vscale x 2 x i64> %a,
1199                                                                  <vscale x 4 x i32> %b)
1200  ret <vscale x 2 x i64> %out
1201}
1202
1203declare <vscale x 8 x i16> @llvm.aarch64.sve.sabalb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1204declare <vscale x 4 x i32> @llvm.aarch64.sve.sabalb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1205declare <vscale x 2 x i64> @llvm.aarch64.sve.sabalb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1206
1207declare <vscale x 8 x i16> @llvm.aarch64.sve.sabalt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1208declare <vscale x 4 x i32> @llvm.aarch64.sve.sabalt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1209declare <vscale x 2 x i64> @llvm.aarch64.sve.sabalt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1210
1211declare <vscale x 8 x i16> @llvm.aarch64.sve.sabdlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1212declare <vscale x 4 x i32> @llvm.aarch64.sve.sabdlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1213declare <vscale x 2 x i64> @llvm.aarch64.sve.sabdlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1214
1215declare <vscale x 8 x i16> @llvm.aarch64.sve.sabdlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1216declare <vscale x 4 x i32> @llvm.aarch64.sve.sabdlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1217declare <vscale x 2 x i64> @llvm.aarch64.sve.sabdlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1218
1219declare <vscale x 8 x i16> @llvm.aarch64.sve.saddlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1220declare <vscale x 4 x i32> @llvm.aarch64.sve.saddlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1221declare <vscale x 2 x i64> @llvm.aarch64.sve.saddlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1222
1223declare <vscale x 8 x i16> @llvm.aarch64.sve.saddlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1224declare <vscale x 4 x i32> @llvm.aarch64.sve.saddlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1225declare <vscale x 2 x i64> @llvm.aarch64.sve.saddlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1226
1227declare <vscale x 8 x i16> @llvm.aarch64.sve.saddwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1228declare <vscale x 4 x i32> @llvm.aarch64.sve.saddwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1229declare <vscale x 2 x i64> @llvm.aarch64.sve.saddwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1230
1231declare <vscale x 8 x i16> @llvm.aarch64.sve.saddwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1232declare <vscale x 4 x i32> @llvm.aarch64.sve.saddwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1233declare <vscale x 2 x i64> @llvm.aarch64.sve.saddwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1234
1235declare <vscale x 8 x i16> @llvm.aarch64.sve.smullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1236declare <vscale x 4 x i32> @llvm.aarch64.sve.smullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1237declare <vscale x 2 x i64> @llvm.aarch64.sve.smullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1238
1239declare <vscale x 4 x i32> @llvm.aarch64.sve.smullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1240declare <vscale x 2 x i64> @llvm.aarch64.sve.smullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1241
1242declare <vscale x 8 x i16> @llvm.aarch64.sve.smullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1243declare <vscale x 4 x i32> @llvm.aarch64.sve.smullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1244declare <vscale x 2 x i64> @llvm.aarch64.sve.smullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1245
1246declare <vscale x 4 x i32> @llvm.aarch64.sve.smullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1247declare <vscale x 2 x i64> @llvm.aarch64.sve.smullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1248
1249declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1250declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1251declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1252
1253declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1254declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1255
1256declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1257declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1258declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1259
1260declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1261declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1262
1263declare <vscale x 8 x i16> @llvm.aarch64.sve.sshllb.nxv8i16(<vscale x 16 x i8>, i32)
1264declare <vscale x 4 x i32> @llvm.aarch64.sve.sshllb.nxv4i32(<vscale x 8 x i16>, i32)
1265declare <vscale x 2 x i64> @llvm.aarch64.sve.sshllb.nxv2i64(<vscale x 4 x i32>, i32)
1266
1267declare <vscale x 8 x i16> @llvm.aarch64.sve.sshllt.nxv8i16(<vscale x 16 x i8>, i32)
1268declare <vscale x 4 x i32> @llvm.aarch64.sve.sshllt.nxv4i32(<vscale x 8 x i16>, i32)
1269declare <vscale x 2 x i64> @llvm.aarch64.sve.sshllt.nxv2i64(<vscale x 4 x i32>, i32)
1270
1271declare <vscale x 8 x i16> @llvm.aarch64.sve.ssublb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1272declare <vscale x 4 x i32> @llvm.aarch64.sve.ssublb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1273declare <vscale x 2 x i64> @llvm.aarch64.sve.ssublb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1274
1275declare <vscale x 8 x i16> @llvm.aarch64.sve.ssublt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1276declare <vscale x 4 x i32> @llvm.aarch64.sve.ssublt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1277declare <vscale x 2 x i64> @llvm.aarch64.sve.ssublt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1278
1279declare <vscale x 8 x i16> @llvm.aarch64.sve.ssubwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1280declare <vscale x 4 x i32> @llvm.aarch64.sve.ssubwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1281declare <vscale x 2 x i64> @llvm.aarch64.sve.ssubwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1282
1283declare <vscale x 8 x i16> @llvm.aarch64.sve.ssubwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1284declare <vscale x 4 x i32> @llvm.aarch64.sve.ssubwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1285declare <vscale x 2 x i64> @llvm.aarch64.sve.ssubwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1286
1287declare <vscale x 8 x i16> @llvm.aarch64.sve.uabalb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1288declare <vscale x 4 x i32> @llvm.aarch64.sve.uabalb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1289declare <vscale x 2 x i64> @llvm.aarch64.sve.uabalb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1290
1291declare <vscale x 8 x i16> @llvm.aarch64.sve.uabalt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>)
1292declare <vscale x 4 x i32> @llvm.aarch64.sve.uabalt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>)
1293declare <vscale x 2 x i64> @llvm.aarch64.sve.uabalt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>)
1294
1295declare <vscale x 8 x i16> @llvm.aarch64.sve.uabdlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1296declare <vscale x 4 x i32> @llvm.aarch64.sve.uabdlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1297declare <vscale x 2 x i64> @llvm.aarch64.sve.uabdlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1298
1299declare <vscale x 8 x i16> @llvm.aarch64.sve.uabdlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1300declare <vscale x 4 x i32> @llvm.aarch64.sve.uabdlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1301declare <vscale x 2 x i64> @llvm.aarch64.sve.uabdlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1302
1303declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1304declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1305declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1306
1307declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1308declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1309declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1310
1311declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1312declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1313declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1314
1315declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1316declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1317declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1318
1319declare <vscale x 8 x i16> @llvm.aarch64.sve.umullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1320declare <vscale x 4 x i32> @llvm.aarch64.sve.umullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1321declare <vscale x 2 x i64> @llvm.aarch64.sve.umullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1322
1323declare <vscale x 4 x i32> @llvm.aarch64.sve.umullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1324declare <vscale x 2 x i64> @llvm.aarch64.sve.umullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1325
1326declare <vscale x 8 x i16> @llvm.aarch64.sve.umullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1327declare <vscale x 4 x i32> @llvm.aarch64.sve.umullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1328declare <vscale x 2 x i64> @llvm.aarch64.sve.umullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1329
1330declare <vscale x 4 x i32> @llvm.aarch64.sve.umullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
1331declare <vscale x 2 x i64> @llvm.aarch64.sve.umullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
1332
1333declare <vscale x 8 x i16> @llvm.aarch64.sve.ushllb.nxv8i16(<vscale x 16 x i8>, i32)
1334declare <vscale x 4 x i32> @llvm.aarch64.sve.ushllb.nxv4i32(<vscale x 8 x i16>, i32)
1335declare <vscale x 2 x i64> @llvm.aarch64.sve.ushllb.nxv2i64(<vscale x 4 x i32>, i32)
1336
1337declare <vscale x 8 x i16> @llvm.aarch64.sve.ushllt.nxv8i16(<vscale x 16 x i8>, i32)
1338declare <vscale x 4 x i32> @llvm.aarch64.sve.ushllt.nxv4i32(<vscale x 8 x i16>, i32)
1339declare <vscale x 2 x i64> @llvm.aarch64.sve.ushllt.nxv2i64(<vscale x 4 x i32>, i32)
1340
1341declare <vscale x 8 x i16> @llvm.aarch64.sve.usublb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1342declare <vscale x 4 x i32> @llvm.aarch64.sve.usublb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1343declare <vscale x 2 x i64> @llvm.aarch64.sve.usublb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1344
1345declare <vscale x 8 x i16> @llvm.aarch64.sve.usublt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>)
1346declare <vscale x 4 x i32> @llvm.aarch64.sve.usublt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>)
1347declare <vscale x 2 x i64> @llvm.aarch64.sve.usublt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>)
1348
1349declare <vscale x 8 x i16> @llvm.aarch64.sve.usubwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1350declare <vscale x 4 x i32> @llvm.aarch64.sve.usubwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1351declare <vscale x 2 x i64> @llvm.aarch64.sve.usubwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1352
1353declare <vscale x 8 x i16> @llvm.aarch64.sve.usubwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>)
1354declare <vscale x 4 x i32> @llvm.aarch64.sve.usubwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>)
1355declare <vscale x 2 x i64> @llvm.aarch64.sve.usubwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>)
1356