1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s 2 3; 4; SABALB 5; 6 7define <vscale x 8 x i16> @sabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 8; CHECK-LABEL: sabalb_b: 9; CHECK: sabalb z0.h, z1.b, z2.b 10; CHECK-NEXT: ret 11 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalb.nxv8i16(<vscale x 8 x i16> %a, 12 <vscale x 16 x i8> %b, 13 <vscale x 16 x i8> %c) 14 ret <vscale x 8 x i16> %out 15} 16 17define <vscale x 4 x i32> @sabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 18; CHECK-LABEL: sabalb_h: 19; CHECK: sabalb z0.s, z1.h, z2.h 20; CHECK-NEXT: ret 21 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalb.nxv4i32(<vscale x 4 x i32> %a, 22 <vscale x 8 x i16> %b, 23 <vscale x 8 x i16> %c) 24 ret <vscale x 4 x i32> %out 25} 26 27define <vscale x 2 x i64> @sabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 28; CHECK-LABEL: sabalb_s: 29; CHECK: sabalb z0.d, z1.s, z2.s 30; CHECK-NEXT: ret 31 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalb.nxv2i64(<vscale x 2 x i64> %a, 32 <vscale x 4 x i32> %b, 33 <vscale x 4 x i32> %c) 34 ret <vscale x 2 x i64> %out 35} 36 37; 38; SABALT 39; 40 41define <vscale x 8 x i16> @sabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 42; CHECK-LABEL: sabalt_b: 43; CHECK: sabalt z0.h, z1.b, z2.b 44; CHECK-NEXT: ret 45 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabalt.nxv8i16(<vscale x 8 x i16> %a, 46 <vscale x 16 x i8> %b, 47 <vscale x 16 x i8> %c) 48 ret <vscale x 8 x i16> %out 49} 50 51define <vscale x 4 x i32> @sabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 52; CHECK-LABEL: sabalt_h: 53; CHECK: sabalt z0.s, z1.h, z2.h 54; CHECK-NEXT: ret 55 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabalt.nxv4i32(<vscale x 4 x i32> %a, 56 <vscale x 8 x i16> %b, 57 <vscale x 8 x i16> %c) 58 ret <vscale x 4 x i32> %out 59} 60 61define <vscale x 2 x i64> @sabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 62; CHECK-LABEL: sabalt_s: 63; CHECK: sabalt z0.d, z1.s, z2.s 64; CHECK-NEXT: ret 65 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabalt.nxv2i64(<vscale x 2 x i64> %a, 66 <vscale x 4 x i32> %b, 67 <vscale x 4 x i32> %c) 68 ret <vscale x 2 x i64> %out 69} 70 71; 72; SABDLB 73; 74 75define <vscale x 8 x i16> @sabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 76; CHECK-LABEL: sabdlb_b: 77; CHECK: sabdlb z0.h, z0.b, z1.b 78; CHECK-NEXT: ret 79 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlb.nxv8i16(<vscale x 16 x i8> %a, 80 <vscale x 16 x i8> %b) 81 ret <vscale x 8 x i16> %out 82} 83 84define <vscale x 4 x i32> @sabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 85; CHECK-LABEL: sabdlb_h: 86; CHECK: sabdlb z0.s, z0.h, z1.h 87; CHECK-NEXT: ret 88 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlb.nxv4i32(<vscale x 8 x i16> %a, 89 <vscale x 8 x i16> %b) 90 ret <vscale x 4 x i32> %out 91} 92 93define <vscale x 2 x i64> @sabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 94; CHECK-LABEL: sabdlb_s: 95; CHECK: sabdlb z0.d, z0.s, z1.s 96; CHECK-NEXT: ret 97 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlb.nxv2i64(<vscale x 4 x i32> %a, 98 <vscale x 4 x i32> %b) 99 ret <vscale x 2 x i64> %out 100} 101 102; 103; SABDLT 104; 105 106define <vscale x 8 x i16> @sabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 107; CHECK-LABEL: sabdlt_b: 108; CHECK: sabdlt z0.h, z0.b, z1.b 109; CHECK-NEXT: ret 110 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sabdlt.nxv8i16(<vscale x 16 x i8> %a, 111 <vscale x 16 x i8> %b) 112 ret <vscale x 8 x i16> %out 113} 114 115define <vscale x 4 x i32> @sabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 116; CHECK-LABEL: sabdlt_h: 117; CHECK: sabdlt z0.s, z0.h, z1.h 118; CHECK-NEXT: ret 119 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sabdlt.nxv4i32(<vscale x 8 x i16> %a, 120 <vscale x 8 x i16> %b) 121 ret <vscale x 4 x i32> %out 122} 123 124define <vscale x 2 x i64> @sabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 125; CHECK-LABEL: sabdlt_s: 126; CHECK: sabdlt z0.d, z0.s, z1.s 127; CHECK-NEXT: ret 128 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sabdlt.nxv2i64(<vscale x 4 x i32> %a, 129 <vscale x 4 x i32> %b) 130 ret <vscale x 2 x i64> %out 131} 132 133; 134; SADDLB 135; 136 137define <vscale x 8 x i16> @saddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 138; CHECK-LABEL: saddlb_b: 139; CHECK: saddlb z0.h, z0.b, z1.b 140; CHECK-NEXT: ret 141 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlb.nxv8i16(<vscale x 16 x i8> %a, 142 <vscale x 16 x i8> %b) 143 ret <vscale x 8 x i16> %out 144} 145 146define <vscale x 4 x i32> @saddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 147; CHECK-LABEL: saddlb_h: 148; CHECK: saddlb z0.s, z0.h, z1.h 149; CHECK-NEXT: ret 150 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlb.nxv4i32(<vscale x 8 x i16> %a, 151 <vscale x 8 x i16> %b) 152 ret <vscale x 4 x i32> %out 153} 154 155define <vscale x 2 x i64> @saddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 156; CHECK-LABEL: saddlb_s: 157; CHECK: saddlb z0.d, z0.s, z1.s 158; CHECK-NEXT: ret 159 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlb.nxv2i64(<vscale x 4 x i32> %a, 160 <vscale x 4 x i32> %b) 161 ret <vscale x 2 x i64> %out 162} 163 164; 165; SADDLT 166; 167 168define <vscale x 8 x i16> @saddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 169; CHECK-LABEL: saddlt_b: 170; CHECK: saddlt z0.h, z0.b, z1.b 171; CHECK-NEXT: ret 172 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddlt.nxv8i16(<vscale x 16 x i8> %a, 173 <vscale x 16 x i8> %b) 174 ret <vscale x 8 x i16> %out 175} 176 177define <vscale x 4 x i32> @saddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 178; CHECK-LABEL: saddlt_h: 179; CHECK: saddlt z0.s, z0.h, z1.h 180; CHECK-NEXT: ret 181 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddlt.nxv4i32(<vscale x 8 x i16> %a, 182 <vscale x 8 x i16> %b) 183 ret <vscale x 4 x i32> %out 184} 185 186define <vscale x 2 x i64> @saddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 187; CHECK-LABEL: saddlt_s: 188; CHECK: saddlt z0.d, z0.s, z1.s 189; CHECK-NEXT: ret 190 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddlt.nxv2i64(<vscale x 4 x i32> %a, 191 <vscale x 4 x i32> %b) 192 ret <vscale x 2 x i64> %out 193} 194 195; 196; SADDWB 197; 198 199define <vscale x 8 x i16> @saddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 200; CHECK-LABEL: saddwb_b: 201; CHECK: saddwb z0.h, z0.h, z1.b 202; CHECK-NEXT: ret 203 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwb.nxv8i16(<vscale x 8 x i16> %a, 204 <vscale x 16 x i8> %b) 205 ret <vscale x 8 x i16> %out 206} 207 208define <vscale x 4 x i32> @saddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 209; CHECK-LABEL: saddwb_h: 210; CHECK: saddwb z0.s, z0.s, z1.h 211; CHECK-NEXT: ret 212 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwb.nxv4i32(<vscale x 4 x i32> %a, 213 <vscale x 8 x i16> %b) 214 ret <vscale x 4 x i32> %out 215} 216 217define <vscale x 2 x i64> @saddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 218; CHECK-LABEL: saddwb_s: 219; CHECK: saddwb z0.d, z0.d, z1.s 220; CHECK-NEXT: ret 221 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwb.nxv2i64(<vscale x 2 x i64> %a, 222 <vscale x 4 x i32> %b) 223 ret <vscale x 2 x i64> %out 224} 225 226; 227; SADDWT 228; 229 230define <vscale x 8 x i16> @saddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 231; CHECK-LABEL: saddwt_b: 232; CHECK: saddwt z0.h, z0.h, z1.b 233; CHECK-NEXT: ret 234 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saddwt.nxv8i16(<vscale x 8 x i16> %a, 235 <vscale x 16 x i8> %b) 236 ret <vscale x 8 x i16> %out 237} 238 239define <vscale x 4 x i32> @saddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 240; CHECK-LABEL: saddwt_h: 241; CHECK: saddwt z0.s, z0.s, z1.h 242; CHECK-NEXT: ret 243 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saddwt.nxv4i32(<vscale x 4 x i32> %a, 244 <vscale x 8 x i16> %b) 245 ret <vscale x 4 x i32> %out 246} 247 248define <vscale x 2 x i64> @saddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 249; CHECK-LABEL: saddwt_s: 250; CHECK: saddwt z0.d, z0.d, z1.s 251; CHECK-NEXT: ret 252 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saddwt.nxv2i64(<vscale x 2 x i64> %a, 253 <vscale x 4 x i32> %b) 254 ret <vscale x 2 x i64> %out 255} 256 257 258; 259; SMULLB (Vectors) 260; 261 262define <vscale x 8 x i16> @smullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 263; CHECK-LABEL: smullb_b: 264; CHECK: smullb z0.h, z0.b, z1.b 265; CHECK-NEXT: ret 266 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullb.nxv8i16(<vscale x 16 x i8> %a, 267 <vscale x 16 x i8> %b) 268 ret <vscale x 8 x i16> %out 269} 270 271define <vscale x 4 x i32> @smullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 272; CHECK-LABEL: smullb_h: 273; CHECK: smullb z0.s, z0.h, z1.h 274; CHECK-NEXT: ret 275 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.nxv4i32(<vscale x 8 x i16> %a, 276 <vscale x 8 x i16> %b) 277 ret <vscale x 4 x i32> %out 278} 279 280define <vscale x 2 x i64> @smullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 281; CHECK-LABEL: smullb_s: 282; CHECK: smullb z0.d, z0.s, z1.s 283; CHECK-NEXT: ret 284 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.nxv2i64(<vscale x 4 x i32> %a, 285 <vscale x 4 x i32> %b) 286 ret <vscale x 2 x i64> %out 287} 288 289; 290; SMULLB (Indexed) 291; 292 293define <vscale x 4 x i32> @smullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 294; CHECK-LABEL: smullb_lane_h: 295; CHECK: smullb z0.s, z0.h, z1.h[4] 296; CHECK-NEXT: ret 297 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullb.lane.nxv4i32(<vscale x 8 x i16> %a, 298 <vscale x 8 x i16> %b, 299 i32 4) 300 ret <vscale x 4 x i32> %out 301} 302 303define <vscale x 2 x i64> @smullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 304; CHECK-LABEL: smullb_lane_s: 305; CHECK: smullb z0.d, z0.s, z1.s[3] 306; CHECK-NEXT: ret 307 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullb.lane.nxv2i64(<vscale x 4 x i32> %a, 308 <vscale x 4 x i32> %b, 309 i32 3) 310 ret <vscale x 2 x i64> %out 311} 312 313; 314; SMULLT (Vectors) 315; 316 317define <vscale x 8 x i16> @smullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 318; CHECK-LABEL: smullt_b: 319; CHECK: smullt z0.h, z0.b, z1.b 320; CHECK-NEXT: ret 321 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smullt.nxv8i16(<vscale x 16 x i8> %a, 322 <vscale x 16 x i8> %b) 323 ret <vscale x 8 x i16> %out 324} 325 326define <vscale x 4 x i32> @smullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 327; CHECK-LABEL: smullt_h: 328; CHECK: smullt z0.s, z0.h, z1.h 329; CHECK-NEXT: ret 330 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.nxv4i32(<vscale x 8 x i16> %a, 331 <vscale x 8 x i16> %b) 332 ret <vscale x 4 x i32> %out 333} 334 335define <vscale x 2 x i64> @smullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 336; CHECK-LABEL: smullt_s: 337; CHECK: smullt z0.d, z0.s, z1.s 338; CHECK-NEXT: ret 339 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.nxv2i64(<vscale x 4 x i32> %a, 340 <vscale x 4 x i32> %b) 341 ret <vscale x 2 x i64> %out 342} 343 344; 345; SMULLT (Indexed) 346; 347 348define <vscale x 4 x i32> @smullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 349; CHECK-LABEL: smullt_lane_h: 350; CHECK: smullt z0.s, z0.h, z1.h[5] 351; CHECK-NEXT: ret 352 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smullt.lane.nxv4i32(<vscale x 8 x i16> %a, 353 <vscale x 8 x i16> %b, 354 i32 5) 355 ret <vscale x 4 x i32> %out 356} 357 358define <vscale x 2 x i64> @smullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 359; CHECK-LABEL: smullt_lane_s: 360; CHECK: smullt z0.d, z0.s, z1.s[2] 361; CHECK-NEXT: ret 362 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smullt.lane.nxv2i64(<vscale x 4 x i32> %a, 363 <vscale x 4 x i32> %b, 364 i32 2) 365 ret <vscale x 2 x i64> %out 366} 367 368; 369; SQDMULLB (Vectors) 370; 371 372define <vscale x 8 x i16> @sqdmullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 373; CHECK-LABEL: sqdmullb_b: 374; CHECK: sqdmullb z0.h, z0.b, z1.b 375; CHECK-NEXT: ret 376 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullb.nxv8i16(<vscale x 16 x i8> %a, 377 <vscale x 16 x i8> %b) 378 ret <vscale x 8 x i16> %out 379} 380 381define <vscale x 4 x i32> @sqdmullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 382; CHECK-LABEL: sqdmullb_h: 383; CHECK: sqdmullb z0.s, z0.h, z1.h 384; CHECK-NEXT: ret 385 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.nxv4i32(<vscale x 8 x i16> %a, 386 <vscale x 8 x i16> %b) 387 ret <vscale x 4 x i32> %out 388} 389 390define <vscale x 2 x i64> @sqdmullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 391; CHECK-LABEL: sqdmullb_s: 392; CHECK: sqdmullb z0.d, z0.s, z1.s 393; CHECK-NEXT: ret 394 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.nxv2i64(<vscale x 4 x i32> %a, 395 <vscale x 4 x i32> %b) 396 ret <vscale x 2 x i64> %out 397} 398 399; 400; SQDMULLB (Indexed) 401; 402 403define <vscale x 4 x i32> @sqdmullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 404; CHECK-LABEL: sqdmullb_lane_h: 405; CHECK: sqdmullb z0.s, z0.h, z1.h[2] 406; CHECK-NEXT: ret 407 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(<vscale x 8 x i16> %a, 408 <vscale x 8 x i16> %b, 409 i32 2) 410 ret <vscale x 4 x i32> %out 411} 412 413define <vscale x 2 x i64> @sqdmullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 414; CHECK-LABEL: sqdmullb_lane_s: 415; CHECK: sqdmullb z0.d, z0.s, z1.s[1] 416; CHECK-NEXT: ret 417 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(<vscale x 4 x i32> %a, 418 <vscale x 4 x i32> %b, 419 i32 1) 420 ret <vscale x 2 x i64> %out 421} 422 423; 424; SQDMULLT (Vectors) 425; 426 427define <vscale x 8 x i16> @sqdmullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 428; CHECK-LABEL: sqdmullt_b: 429; CHECK: sqdmullt z0.h, z0.b, z1.b 430; CHECK-NEXT: ret 431 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullt.nxv8i16(<vscale x 16 x i8> %a, 432 <vscale x 16 x i8> %b) 433 ret <vscale x 8 x i16> %out 434} 435 436define <vscale x 4 x i32> @sqdmullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 437; CHECK-LABEL: sqdmullt_h: 438; CHECK: sqdmullt z0.s, z0.h, z1.h 439; CHECK-NEXT: ret 440 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.nxv4i32(<vscale x 8 x i16> %a, 441 <vscale x 8 x i16> %b) 442 ret <vscale x 4 x i32> %out 443} 444 445define <vscale x 2 x i64> @sqdmullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 446; CHECK-LABEL: sqdmullt_s: 447; CHECK: sqdmullt z0.d, z0.s, z1.s 448; CHECK-NEXT: ret 449 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.nxv2i64(<vscale x 4 x i32> %a, 450 <vscale x 4 x i32> %b) 451 ret <vscale x 2 x i64> %out 452} 453 454; 455; SQDMULLT (Indexed) 456; 457 458define <vscale x 4 x i32> @sqdmullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 459; CHECK-LABEL: sqdmullt_lane_h: 460; CHECK: sqdmullt z0.s, z0.h, z1.h[3] 461; CHECK-NEXT: ret 462 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(<vscale x 8 x i16> %a, 463 <vscale x 8 x i16> %b, 464 i32 3) 465 ret <vscale x 4 x i32> %out 466} 467 468define <vscale x 2 x i64> @sqdmullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 469; CHECK-LABEL: sqdmullt_lane_s: 470; CHECK: sqdmullt z0.d, z0.s, z1.s[0] 471; CHECK-NEXT: ret 472 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(<vscale x 4 x i32> %a, 473 <vscale x 4 x i32> %b, 474 i32 0) 475 ret <vscale x 2 x i64> %out 476} 477 478; 479; SSUBLB 480; 481 482define <vscale x 8 x i16> @ssublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 483; CHECK-LABEL: ssublb_b: 484; CHECK: ssublb z0.h, z0.b, z1.b 485; CHECK-NEXT: ret 486 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublb.nxv8i16(<vscale x 16 x i8> %a, 487 <vscale x 16 x i8> %b) 488 ret <vscale x 8 x i16> %out 489} 490 491define <vscale x 4 x i32> @ssublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 492; CHECK-LABEL: ssublb_h: 493; CHECK: ssublb z0.s, z0.h, z1.h 494; CHECK-NEXT: ret 495 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublb.nxv4i32(<vscale x 8 x i16> %a, 496 <vscale x 8 x i16> %b) 497 ret <vscale x 4 x i32> %out 498} 499 500define <vscale x 2 x i64> @ssublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 501; CHECK-LABEL: ssublb_s: 502; CHECK: ssublb z0.d, z0.s, z1.s 503; CHECK-NEXT: ret 504 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublb.nxv2i64(<vscale x 4 x i32> %a, 505 <vscale x 4 x i32> %b) 506 ret <vscale x 2 x i64> %out 507} 508 509; 510; SSHLLB 511; 512 513define <vscale x 8 x i16> @sshllb_b(<vscale x 16 x i8> %a) { 514; CHECK-LABEL: sshllb_b: 515; CHECK: sshllb z0.h, z0.b, #0 516; CHECK-NEXT: ret 517 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllb.nxv8i16(<vscale x 16 x i8> %a, i32 0) 518 ret <vscale x 8 x i16> %out 519} 520 521define <vscale x 4 x i32> @sshllb_h(<vscale x 8 x i16> %a) { 522; CHECK-LABEL: sshllb_h: 523; CHECK: sshllb z0.s, z0.h, #1 524; CHECK-NEXT: ret 525 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllb.nxv4i32(<vscale x 8 x i16> %a, i32 1) 526 ret <vscale x 4 x i32> %out 527} 528 529define <vscale x 2 x i64> @sshllb_s(<vscale x 4 x i32> %a) { 530; CHECK-LABEL: sshllb_s: 531; CHECK: sshllb z0.d, z0.s, #2 532; CHECK-NEXT: ret 533 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllb.nxv2i64(<vscale x 4 x i32> %a, i32 2) 534 ret <vscale x 2 x i64> %out 535} 536 537; 538; SSHLLT 539; 540 541define <vscale x 8 x i16> @sshllt_b(<vscale x 16 x i8> %a) { 542; CHECK-LABEL: sshllt_b: 543; CHECK: sshllt z0.h, z0.b, #3 544; CHECK-NEXT: ret 545 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sshllt.nxv8i16(<vscale x 16 x i8> %a, i32 3) 546 ret <vscale x 8 x i16> %out 547} 548 549define <vscale x 4 x i32> @sshllt_h(<vscale x 8 x i16> %a) { 550; CHECK-LABEL: sshllt_h: 551; CHECK: sshllt z0.s, z0.h, #4 552; CHECK-NEXT: ret 553 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sshllt.nxv4i32(<vscale x 8 x i16> %a, i32 4) 554 ret <vscale x 4 x i32> %out 555} 556 557define <vscale x 2 x i64> @sshllt_s(<vscale x 4 x i32> %a) { 558; CHECK-LABEL: sshllt_s: 559; CHECK: sshllt z0.d, z0.s, #5 560; CHECK-NEXT: ret 561 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sshllt.nxv2i64(<vscale x 4 x i32> %a, i32 5) 562 ret <vscale x 2 x i64> %out 563} 564 565; 566; SSUBLT 567; 568 569define <vscale x 8 x i16> @ssublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 570; CHECK-LABEL: ssublt_b: 571; CHECK: ssublt z0.h, z0.b, z1.b 572; CHECK-NEXT: ret 573 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssublt.nxv8i16(<vscale x 16 x i8> %a, 574 <vscale x 16 x i8> %b) 575 ret <vscale x 8 x i16> %out 576} 577 578define <vscale x 4 x i32> @ssublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 579; CHECK-LABEL: ssublt_h: 580; CHECK: ssublt z0.s, z0.h, z1.h 581; CHECK-NEXT: ret 582 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssublt.nxv4i32(<vscale x 8 x i16> %a, 583 <vscale x 8 x i16> %b) 584 ret <vscale x 4 x i32> %out 585} 586 587define <vscale x 2 x i64> @ssublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 588; CHECK-LABEL: ssublt_s: 589; CHECK: ssublt z0.d, z0.s, z1.s 590; CHECK-NEXT: ret 591 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssublt.nxv2i64(<vscale x 4 x i32> %a, 592 <vscale x 4 x i32> %b) 593 ret <vscale x 2 x i64> %out 594} 595 596; 597; SSUBWB 598; 599 600define <vscale x 8 x i16> @ssubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 601; CHECK-LABEL: ssubwb_b: 602; CHECK: ssubwb z0.h, z0.h, z1.b 603; CHECK-NEXT: ret 604 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwb.nxv8i16(<vscale x 8 x i16> %a, 605 <vscale x 16 x i8> %b) 606 ret <vscale x 8 x i16> %out 607} 608 609define <vscale x 4 x i32> @ssubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 610; CHECK-LABEL: ssubwb_h: 611; CHECK: ssubwb z0.s, z0.s, z1.h 612; CHECK-NEXT: ret 613 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwb.nxv4i32(<vscale x 4 x i32> %a, 614 <vscale x 8 x i16> %b) 615 ret <vscale x 4 x i32> %out 616} 617 618define <vscale x 2 x i64> @ssubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 619; CHECK-LABEL: ssubwb_s: 620; CHECK: ssubwb z0.d, z0.d, z1.s 621; CHECK-NEXT: ret 622 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwb.nxv2i64(<vscale x 2 x i64> %a, 623 <vscale x 4 x i32> %b) 624 ret <vscale x 2 x i64> %out 625} 626 627; 628; SSUBWT 629; 630 631define <vscale x 8 x i16> @ssubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 632; CHECK-LABEL: ssubwt_b: 633; CHECK: ssubwt z0.h, z0.h, z1.b 634; CHECK-NEXT: ret 635 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssubwt.nxv8i16(<vscale x 8 x i16> %a, 636 <vscale x 16 x i8> %b) 637 ret <vscale x 8 x i16> %out 638} 639 640define <vscale x 4 x i32> @ssubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 641; CHECK-LABEL: ssubwt_h: 642; CHECK: ssubwt z0.s, z0.s, z1.h 643; CHECK-NEXT: ret 644 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssubwt.nxv4i32(<vscale x 4 x i32> %a, 645 <vscale x 8 x i16> %b) 646 ret <vscale x 4 x i32> %out 647} 648 649define <vscale x 2 x i64> @ssubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 650; CHECK-LABEL: ssubwt_s: 651; CHECK: ssubwt z0.d, z0.d, z1.s 652; CHECK-NEXT: ret 653 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssubwt.nxv2i64(<vscale x 2 x i64> %a, 654 <vscale x 4 x i32> %b) 655 ret <vscale x 2 x i64> %out 656} 657 658; 659; UABALB 660; 661 662define <vscale x 8 x i16> @uabalb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 663; CHECK-LABEL: uabalb_b: 664; CHECK: uabalb z0.h, z1.b, z2.b 665; CHECK-NEXT: ret 666 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalb.nxv8i16(<vscale x 8 x i16> %a, 667 <vscale x 16 x i8> %b, 668 <vscale x 16 x i8> %c) 669 ret <vscale x 8 x i16> %out 670} 671 672define <vscale x 4 x i32> @uabalb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 673; CHECK-LABEL: uabalb_h: 674; CHECK: uabalb z0.s, z1.h, z2.h 675; CHECK-NEXT: ret 676 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalb.nxv4i32(<vscale x 4 x i32> %a, 677 <vscale x 8 x i16> %b, 678 <vscale x 8 x i16> %c) 679 ret <vscale x 4 x i32> %out 680} 681 682define <vscale x 2 x i64> @uabalb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 683; CHECK-LABEL: uabalb_s: 684; CHECK: uabalb z0.d, z1.s, z2.s 685; CHECK-NEXT: ret 686 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalb.nxv2i64(<vscale x 2 x i64> %a, 687 <vscale x 4 x i32> %b, 688 <vscale x 4 x i32> %c) 689 ret <vscale x 2 x i64> %out 690} 691 692; 693; UABALT 694; 695 696define <vscale x 8 x i16> @uabalt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { 697; CHECK-LABEL: uabalt_b: 698; CHECK: uabalt z0.h, z1.b, z2.b 699; CHECK-NEXT: ret 700 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabalt.nxv8i16(<vscale x 8 x i16> %a, 701 <vscale x 16 x i8> %b, 702 <vscale x 16 x i8> %c) 703 ret <vscale x 8 x i16> %out 704} 705 706define <vscale x 4 x i32> @uabalt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { 707; CHECK-LABEL: uabalt_h: 708; CHECK: uabalt z0.s, z1.h, z2.h 709; CHECK-NEXT: ret 710 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabalt.nxv4i32(<vscale x 4 x i32> %a, 711 <vscale x 8 x i16> %b, 712 <vscale x 8 x i16> %c) 713 ret <vscale x 4 x i32> %out 714} 715 716define <vscale x 2 x i64> @uabalt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { 717; CHECK-LABEL: uabalt_s: 718; CHECK: uabalt z0.d, z1.s, z2.s 719; CHECK-NEXT: ret 720 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabalt.nxv2i64(<vscale x 2 x i64> %a, 721 <vscale x 4 x i32> %b, 722 <vscale x 4 x i32> %c) 723 ret <vscale x 2 x i64> %out 724} 725 726; 727; UABDLB 728; 729 730define <vscale x 8 x i16> @uabdlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 731; CHECK-LABEL: uabdlb_b: 732; CHECK: uabdlb z0.h, z0.b, z1.b 733; CHECK-NEXT: ret 734 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlb.nxv8i16(<vscale x 16 x i8> %a, 735 <vscale x 16 x i8> %b) 736 ret <vscale x 8 x i16> %out 737} 738 739define <vscale x 4 x i32> @uabdlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 740; CHECK-LABEL: uabdlb_h: 741; CHECK: uabdlb z0.s, z0.h, z1.h 742; CHECK-NEXT: ret 743 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlb.nxv4i32(<vscale x 8 x i16> %a, 744 <vscale x 8 x i16> %b) 745 ret <vscale x 4 x i32> %out 746} 747 748define <vscale x 2 x i64> @uabdlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 749; CHECK-LABEL: uabdlb_s: 750; CHECK: uabdlb z0.d, z0.s, z1.s 751; CHECK-NEXT: ret 752 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlb.nxv2i64(<vscale x 4 x i32> %a, 753 <vscale x 4 x i32> %b) 754 ret <vscale x 2 x i64> %out 755} 756 757; 758; UABDLT 759; 760 761define <vscale x 8 x i16> @uabdlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 762; CHECK-LABEL: uabdlt_b: 763; CHECK: uabdlt z0.h, z0.b, z1.b 764; CHECK-NEXT: ret 765 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uabdlt.nxv8i16(<vscale x 16 x i8> %a, 766 <vscale x 16 x i8> %b) 767 ret <vscale x 8 x i16> %out 768} 769 770define <vscale x 4 x i32> @uabdlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 771; CHECK-LABEL: uabdlt_h: 772; CHECK: uabdlt z0.s, z0.h, z1.h 773; CHECK-NEXT: ret 774 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uabdlt.nxv4i32(<vscale x 8 x i16> %a, 775 <vscale x 8 x i16> %b) 776 ret <vscale x 4 x i32> %out 777} 778 779define <vscale x 2 x i64> @uabdlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 780; CHECK-LABEL: uabdlt_s: 781; CHECK: uabdlt z0.d, z0.s, z1.s 782; CHECK-NEXT: ret 783 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uabdlt.nxv2i64(<vscale x 4 x i32> %a, 784 <vscale x 4 x i32> %b) 785 ret <vscale x 2 x i64> %out 786} 787 788; 789; UADDLB 790; 791 792define <vscale x 8 x i16> @uaddlb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 793; CHECK-LABEL: uaddlb_b: 794; CHECK: uaddlb z0.h, z0.b, z1.b 795; CHECK-NEXT: ret 796 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlb.nxv8i16(<vscale x 16 x i8> %a, 797 <vscale x 16 x i8> %b) 798 ret <vscale x 8 x i16> %out 799} 800 801define <vscale x 4 x i32> @uaddlb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 802; CHECK-LABEL: uaddlb_h: 803; CHECK: uaddlb z0.s, z0.h, z1.h 804; CHECK-NEXT: ret 805 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlb.nxv4i32(<vscale x 8 x i16> %a, 806 <vscale x 8 x i16> %b) 807 ret <vscale x 4 x i32> %out 808} 809 810define <vscale x 2 x i64> @uaddlb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 811; CHECK-LABEL: uaddlb_s: 812; CHECK: uaddlb z0.d, z0.s, z1.s 813; CHECK-NEXT: ret 814 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlb.nxv2i64(<vscale x 4 x i32> %a, 815 <vscale x 4 x i32> %b) 816 ret <vscale x 2 x i64> %out 817} 818 819; 820; UADDLT 821; 822 823define <vscale x 8 x i16> @uaddlt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 824; CHECK-LABEL: uaddlt_b: 825; CHECK: uaddlt z0.h, z0.b, z1.b 826; CHECK-NEXT: ret 827 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddlt.nxv8i16(<vscale x 16 x i8> %a, 828 <vscale x 16 x i8> %b) 829 ret <vscale x 8 x i16> %out 830} 831 832define <vscale x 4 x i32> @uaddlt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 833; CHECK-LABEL: uaddlt_h: 834; CHECK: uaddlt z0.s, z0.h, z1.h 835; CHECK-NEXT: ret 836 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddlt.nxv4i32(<vscale x 8 x i16> %a, 837 <vscale x 8 x i16> %b) 838 ret <vscale x 4 x i32> %out 839} 840 841define <vscale x 2 x i64> @uaddlt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 842; CHECK-LABEL: uaddlt_s: 843; CHECK: uaddlt z0.d, z0.s, z1.s 844; CHECK-NEXT: ret 845 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddlt.nxv2i64(<vscale x 4 x i32> %a, 846 <vscale x 4 x i32> %b) 847 ret <vscale x 2 x i64> %out 848} 849 850; 851; UADDWB 852; 853 854define <vscale x 8 x i16> @uaddwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 855; CHECK-LABEL: uaddwb_b: 856; CHECK: uaddwb z0.h, z0.h, z1.b 857; CHECK-NEXT: ret 858 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwb.nxv8i16(<vscale x 8 x i16> %a, 859 <vscale x 16 x i8> %b) 860 ret <vscale x 8 x i16> %out 861} 862 863define <vscale x 4 x i32> @uaddwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 864; CHECK-LABEL: uaddwb_h: 865; CHECK: uaddwb z0.s, z0.s, z1.h 866; CHECK-NEXT: ret 867 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwb.nxv4i32(<vscale x 4 x i32> %a, 868 <vscale x 8 x i16> %b) 869 ret <vscale x 4 x i32> %out 870} 871 872define <vscale x 2 x i64> @uaddwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 873; CHECK-LABEL: uaddwb_s: 874; CHECK: uaddwb z0.d, z0.d, z1.s 875; CHECK-NEXT: ret 876 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwb.nxv2i64(<vscale x 2 x i64> %a, 877 <vscale x 4 x i32> %b) 878 ret <vscale x 2 x i64> %out 879} 880 881; 882; UADDWT 883; 884 885define <vscale x 8 x i16> @uaddwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 886; CHECK-LABEL: uaddwt_b: 887; CHECK: uaddwt z0.h, z0.h, z1.b 888; CHECK-NEXT: ret 889 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaddwt.nxv8i16(<vscale x 8 x i16> %a, 890 <vscale x 16 x i8> %b) 891 ret <vscale x 8 x i16> %out 892} 893 894define <vscale x 4 x i32> @uaddwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 895; CHECK-LABEL: uaddwt_h: 896; CHECK: uaddwt z0.s, z0.s, z1.h 897; CHECK-NEXT: ret 898 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaddwt.nxv4i32(<vscale x 4 x i32> %a, 899 <vscale x 8 x i16> %b) 900 ret <vscale x 4 x i32> %out 901} 902 903define <vscale x 2 x i64> @uaddwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 904; CHECK-LABEL: uaddwt_s: 905; CHECK: uaddwt z0.d, z0.d, z1.s 906; CHECK-NEXT: ret 907 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaddwt.nxv2i64(<vscale x 2 x i64> %a, 908 <vscale x 4 x i32> %b) 909 ret <vscale x 2 x i64> %out 910} 911 912; 913; UMULLB (Vectors) 914; 915 916define <vscale x 8 x i16> @umullb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 917; CHECK-LABEL: umullb_b: 918; CHECK: umullb z0.h, z0.b, z1.b 919; CHECK-NEXT: ret 920 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullb.nxv8i16(<vscale x 16 x i8> %a, 921 <vscale x 16 x i8> %b) 922 ret <vscale x 8 x i16> %out 923} 924 925define <vscale x 4 x i32> @umullb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 926; CHECK-LABEL: umullb_h: 927; CHECK: umullb z0.s, z0.h, z1.h 928; CHECK-NEXT: ret 929 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.nxv4i32(<vscale x 8 x i16> %a, 930 <vscale x 8 x i16> %b) 931 ret <vscale x 4 x i32> %out 932} 933 934define <vscale x 2 x i64> @umullb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 935; CHECK-LABEL: umullb_s: 936; CHECK: umullb z0.d, z0.s, z1.s 937; CHECK-NEXT: ret 938 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.nxv2i64(<vscale x 4 x i32> %a, 939 <vscale x 4 x i32> %b) 940 ret <vscale x 2 x i64> %out 941} 942 943; 944; UMULLB (Indexed) 945; 946 947define <vscale x 4 x i32> @umullb_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 948; CHECK-LABEL: umullb_lane_h: 949; CHECK: umullb z0.s, z0.h, z1.h[0] 950; CHECK-NEXT: ret 951 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullb.lane.nxv4i32(<vscale x 8 x i16> %a, 952 <vscale x 8 x i16> %b, 953 i32 0) 954 ret <vscale x 4 x i32> %out 955} 956 957 958define <vscale x 2 x i64> @umullb_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 959; CHECK-LABEL: umullb_lane_s: 960; CHECK: umullb z0.d, z0.s, z1.s[3] 961; CHECK-NEXT: ret 962 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullb.lane.nxv2i64(<vscale x 4 x i32> %a, 963 <vscale x 4 x i32> %b, 964 i32 3) 965 ret <vscale x 2 x i64> %out 966} 967 968; 969; UMULLT (Vectors) 970; 971 972define <vscale x 8 x i16> @umullt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 973; CHECK-LABEL: umullt_b: 974; CHECK: umullt z0.h, z0.b, z1.b 975; CHECK-NEXT: ret 976 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umullt.nxv8i16(<vscale x 16 x i8> %a, 977 <vscale x 16 x i8> %b) 978 ret <vscale x 8 x i16> %out 979} 980 981define <vscale x 4 x i32> @umullt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 982; CHECK-LABEL: umullt_h: 983; CHECK: umullt z0.s, z0.h, z1.h 984; CHECK-NEXT: ret 985 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.nxv4i32(<vscale x 8 x i16> %a, 986 <vscale x 8 x i16> %b) 987 ret <vscale x 4 x i32> %out 988} 989 990define <vscale x 2 x i64> @umullt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 991; CHECK-LABEL: umullt_s: 992; CHECK: umullt z0.d, z0.s, z1.s 993; CHECK-NEXT: ret 994 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.nxv2i64(<vscale x 4 x i32> %a, 995 <vscale x 4 x i32> %b) 996 ret <vscale x 2 x i64> %out 997} 998 999; 1000; UMULLT (Indexed) 1001; 1002 1003define <vscale x 4 x i32> @umullt_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1004; CHECK-LABEL: umullt_lane_h: 1005; CHECK: umullt z0.s, z0.h, z1.h[1] 1006; CHECK-NEXT: ret 1007 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umullt.lane.nxv4i32(<vscale x 8 x i16> %a, 1008 <vscale x 8 x i16> %b, 1009 i32 1) 1010 ret <vscale x 4 x i32> %out 1011} 1012 1013define <vscale x 2 x i64> @umullt_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1014; CHECK-LABEL: umullt_lane_s: 1015; CHECK: umullt z0.d, z0.s, z1.s[2] 1016; CHECK-NEXT: ret 1017 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umullt.lane.nxv2i64(<vscale x 4 x i32> %a, 1018 <vscale x 4 x i32> %b, 1019 i32 2) 1020 ret <vscale x 2 x i64> %out 1021} 1022 1023; 1024; USHLLB 1025; 1026 1027define <vscale x 8 x i16> @ushllb_b(<vscale x 16 x i8> %a) { 1028; CHECK-LABEL: ushllb_b: 1029; CHECK: ushllb z0.h, z0.b, #6 1030; CHECK-NEXT: ret 1031 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllb.nxv8i16(<vscale x 16 x i8> %a, i32 6) 1032 ret <vscale x 8 x i16> %out 1033} 1034 1035define <vscale x 4 x i32> @ushllb_h(<vscale x 8 x i16> %a) { 1036; CHECK-LABEL: ushllb_h: 1037; CHECK: ushllb z0.s, z0.h, #7 1038; CHECK-NEXT: ret 1039 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllb.nxv4i32(<vscale x 8 x i16> %a, i32 7) 1040 ret <vscale x 4 x i32> %out 1041} 1042 1043define <vscale x 2 x i64> @ushllb_s(<vscale x 4 x i32> %a) { 1044; CHECK-LABEL: ushllb_s: 1045; CHECK: ushllb z0.d, z0.s, #8 1046; CHECK-NEXT: ret 1047 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllb.nxv2i64(<vscale x 4 x i32> %a, i32 8) 1048 ret <vscale x 2 x i64> %out 1049} 1050 1051; 1052; USHLLT 1053; 1054 1055define <vscale x 8 x i16> @ushllt_b(<vscale x 16 x i8> %a) { 1056; CHECK-LABEL: ushllt_b: 1057; CHECK: ushllt z0.h, z0.b, #7 1058; CHECK-NEXT: ret 1059 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ushllt.nxv8i16(<vscale x 16 x i8> %a, i32 7) 1060 ret <vscale x 8 x i16> %out 1061} 1062 1063define <vscale x 4 x i32> @ushllt_h(<vscale x 8 x i16> %a) { 1064; CHECK-LABEL: ushllt_h: 1065; CHECK: ushllt z0.s, z0.h, #15 1066; CHECK-NEXT: ret 1067 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ushllt.nxv4i32(<vscale x 8 x i16> %a, i32 15) 1068 ret <vscale x 4 x i32> %out 1069} 1070 1071define <vscale x 2 x i64> @ushllt_s(<vscale x 4 x i32> %a) { 1072; CHECK-LABEL: ushllt_s: 1073; CHECK: ushllt z0.d, z0.s, #31 1074; CHECK-NEXT: ret 1075 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ushllt.nxv2i64(<vscale x 4 x i32> %a, i32 31) 1076 ret <vscale x 2 x i64> %out 1077} 1078 1079; 1080; USUBLB 1081; 1082 1083define <vscale x 8 x i16> @usublb_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1084; CHECK-LABEL: usublb_b: 1085; CHECK: usublb z0.h, z0.b, z1.b 1086; CHECK-NEXT: ret 1087 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublb.nxv8i16(<vscale x 16 x i8> %a, 1088 <vscale x 16 x i8> %b) 1089 ret <vscale x 8 x i16> %out 1090} 1091 1092define <vscale x 4 x i32> @usublb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1093; CHECK-LABEL: usublb_h: 1094; CHECK: usublb z0.s, z0.h, z1.h 1095; CHECK-NEXT: ret 1096 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublb.nxv4i32(<vscale x 8 x i16> %a, 1097 <vscale x 8 x i16> %b) 1098 ret <vscale x 4 x i32> %out 1099} 1100 1101define <vscale x 2 x i64> @usublb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1102; CHECK-LABEL: usublb_s: 1103; CHECK: usublb z0.d, z0.s, z1.s 1104; CHECK-NEXT: ret 1105 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublb.nxv2i64(<vscale x 4 x i32> %a, 1106 <vscale x 4 x i32> %b) 1107 ret <vscale x 2 x i64> %out 1108} 1109 1110; 1111; USUBLT 1112; 1113 1114define <vscale x 8 x i16> @usublt_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 1115; CHECK-LABEL: usublt_b: 1116; CHECK: usublt z0.h, z0.b, z1.b 1117; CHECK-NEXT: ret 1118 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usublt.nxv8i16(<vscale x 16 x i8> %a, 1119 <vscale x 16 x i8> %b) 1120 ret <vscale x 8 x i16> %out 1121} 1122 1123define <vscale x 4 x i32> @usublt_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 1124; CHECK-LABEL: usublt_h: 1125; CHECK: usublt z0.s, z0.h, z1.h 1126; CHECK-NEXT: ret 1127 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usublt.nxv4i32(<vscale x 8 x i16> %a, 1128 <vscale x 8 x i16> %b) 1129 ret <vscale x 4 x i32> %out 1130} 1131 1132define <vscale x 2 x i64> @usublt_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 1133; CHECK-LABEL: usublt_s: 1134; CHECK: usublt z0.d, z0.s, z1.s 1135; CHECK-NEXT: ret 1136 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usublt.nxv2i64(<vscale x 4 x i32> %a, 1137 <vscale x 4 x i32> %b) 1138 ret <vscale x 2 x i64> %out 1139} 1140 1141; 1142; USUBWB 1143; 1144 1145define <vscale x 8 x i16> @usubwb_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 1146; CHECK-LABEL: usubwb_b: 1147; CHECK: usubwb z0.h, z0.h, z1.b 1148; CHECK-NEXT: ret 1149 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwb.nxv8i16(<vscale x 8 x i16> %a, 1150 <vscale x 16 x i8> %b) 1151 ret <vscale x 8 x i16> %out 1152} 1153 1154define <vscale x 4 x i32> @usubwb_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 1155; CHECK-LABEL: usubwb_h: 1156; CHECK: usubwb z0.s, z0.s, z1.h 1157; CHECK-NEXT: ret 1158 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwb.nxv4i32(<vscale x 4 x i32> %a, 1159 <vscale x 8 x i16> %b) 1160 ret <vscale x 4 x i32> %out 1161} 1162 1163define <vscale x 2 x i64> @usubwb_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 1164; CHECK-LABEL: usubwb_s: 1165; CHECK: usubwb z0.d, z0.d, z1.s 1166; CHECK-NEXT: ret 1167 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwb.nxv2i64(<vscale x 2 x i64> %a, 1168 <vscale x 4 x i32> %b) 1169 ret <vscale x 2 x i64> %out 1170} 1171 1172; 1173; USUBWT 1174; 1175 1176define <vscale x 8 x i16> @usubwt_b(<vscale x 8 x i16> %a, <vscale x 16 x i8> %b) { 1177; CHECK-LABEL: usubwt_b: 1178; CHECK: usubwt z0.h, z0.h, z1.b 1179; CHECK-NEXT: ret 1180 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usubwt.nxv8i16(<vscale x 8 x i16> %a, 1181 <vscale x 16 x i8> %b) 1182 ret <vscale x 8 x i16> %out 1183} 1184 1185define <vscale x 4 x i32> @usubwt_h(<vscale x 4 x i32> %a, <vscale x 8 x i16> %b) { 1186; CHECK-LABEL: usubwt_h: 1187; CHECK: usubwt z0.s, z0.s, z1.h 1188; CHECK-NEXT: ret 1189 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usubwt.nxv4i32(<vscale x 4 x i32> %a, 1190 <vscale x 8 x i16> %b) 1191 ret <vscale x 4 x i32> %out 1192} 1193 1194define <vscale x 2 x i64> @usubwt_s(<vscale x 2 x i64> %a, <vscale x 4 x i32> %b) { 1195; CHECK-LABEL: usubwt_s: 1196; CHECK: usubwt z0.d, z0.d, z1.s 1197; CHECK-NEXT: ret 1198 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usubwt.nxv2i64(<vscale x 2 x i64> %a, 1199 <vscale x 4 x i32> %b) 1200 ret <vscale x 2 x i64> %out 1201} 1202 1203declare <vscale x 8 x i16> @llvm.aarch64.sve.sabalb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1204declare <vscale x 4 x i32> @llvm.aarch64.sve.sabalb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1205declare <vscale x 2 x i64> @llvm.aarch64.sve.sabalb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1206 1207declare <vscale x 8 x i16> @llvm.aarch64.sve.sabalt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1208declare <vscale x 4 x i32> @llvm.aarch64.sve.sabalt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1209declare <vscale x 2 x i64> @llvm.aarch64.sve.sabalt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1210 1211declare <vscale x 8 x i16> @llvm.aarch64.sve.sabdlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1212declare <vscale x 4 x i32> @llvm.aarch64.sve.sabdlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1213declare <vscale x 2 x i64> @llvm.aarch64.sve.sabdlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1214 1215declare <vscale x 8 x i16> @llvm.aarch64.sve.sabdlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1216declare <vscale x 4 x i32> @llvm.aarch64.sve.sabdlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1217declare <vscale x 2 x i64> @llvm.aarch64.sve.sabdlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1218 1219declare <vscale x 8 x i16> @llvm.aarch64.sve.saddlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1220declare <vscale x 4 x i32> @llvm.aarch64.sve.saddlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1221declare <vscale x 2 x i64> @llvm.aarch64.sve.saddlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1222 1223declare <vscale x 8 x i16> @llvm.aarch64.sve.saddlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1224declare <vscale x 4 x i32> @llvm.aarch64.sve.saddlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1225declare <vscale x 2 x i64> @llvm.aarch64.sve.saddlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1226 1227declare <vscale x 8 x i16> @llvm.aarch64.sve.saddwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1228declare <vscale x 4 x i32> @llvm.aarch64.sve.saddwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1229declare <vscale x 2 x i64> @llvm.aarch64.sve.saddwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1230 1231declare <vscale x 8 x i16> @llvm.aarch64.sve.saddwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1232declare <vscale x 4 x i32> @llvm.aarch64.sve.saddwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1233declare <vscale x 2 x i64> @llvm.aarch64.sve.saddwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1234 1235declare <vscale x 8 x i16> @llvm.aarch64.sve.smullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1236declare <vscale x 4 x i32> @llvm.aarch64.sve.smullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1237declare <vscale x 2 x i64> @llvm.aarch64.sve.smullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1238 1239declare <vscale x 4 x i32> @llvm.aarch64.sve.smullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 1240declare <vscale x 2 x i64> @llvm.aarch64.sve.smullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1241 1242declare <vscale x 8 x i16> @llvm.aarch64.sve.smullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1243declare <vscale x 4 x i32> @llvm.aarch64.sve.smullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1244declare <vscale x 2 x i64> @llvm.aarch64.sve.smullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1245 1246declare <vscale x 4 x i32> @llvm.aarch64.sve.smullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 1247declare <vscale x 2 x i64> @llvm.aarch64.sve.smullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1248 1249declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1250declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1251declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1252 1253declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 1254declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1255 1256declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1257declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1258declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1259 1260declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 1261declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1262 1263declare <vscale x 8 x i16> @llvm.aarch64.sve.sshllb.nxv8i16(<vscale x 16 x i8>, i32) 1264declare <vscale x 4 x i32> @llvm.aarch64.sve.sshllb.nxv4i32(<vscale x 8 x i16>, i32) 1265declare <vscale x 2 x i64> @llvm.aarch64.sve.sshllb.nxv2i64(<vscale x 4 x i32>, i32) 1266 1267declare <vscale x 8 x i16> @llvm.aarch64.sve.sshllt.nxv8i16(<vscale x 16 x i8>, i32) 1268declare <vscale x 4 x i32> @llvm.aarch64.sve.sshllt.nxv4i32(<vscale x 8 x i16>, i32) 1269declare <vscale x 2 x i64> @llvm.aarch64.sve.sshllt.nxv2i64(<vscale x 4 x i32>, i32) 1270 1271declare <vscale x 8 x i16> @llvm.aarch64.sve.ssublb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1272declare <vscale x 4 x i32> @llvm.aarch64.sve.ssublb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1273declare <vscale x 2 x i64> @llvm.aarch64.sve.ssublb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1274 1275declare <vscale x 8 x i16> @llvm.aarch64.sve.ssublt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1276declare <vscale x 4 x i32> @llvm.aarch64.sve.ssublt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1277declare <vscale x 2 x i64> @llvm.aarch64.sve.ssublt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1278 1279declare <vscale x 8 x i16> @llvm.aarch64.sve.ssubwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1280declare <vscale x 4 x i32> @llvm.aarch64.sve.ssubwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1281declare <vscale x 2 x i64> @llvm.aarch64.sve.ssubwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1282 1283declare <vscale x 8 x i16> @llvm.aarch64.sve.ssubwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1284declare <vscale x 4 x i32> @llvm.aarch64.sve.ssubwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1285declare <vscale x 2 x i64> @llvm.aarch64.sve.ssubwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1286 1287declare <vscale x 8 x i16> @llvm.aarch64.sve.uabalb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1288declare <vscale x 4 x i32> @llvm.aarch64.sve.uabalb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1289declare <vscale x 2 x i64> @llvm.aarch64.sve.uabalb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1290 1291declare <vscale x 8 x i16> @llvm.aarch64.sve.uabalt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>, <vscale x 16 x i8>) 1292declare <vscale x 4 x i32> @llvm.aarch64.sve.uabalt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>, <vscale x 8 x i16>) 1293declare <vscale x 2 x i64> @llvm.aarch64.sve.uabalt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>, <vscale x 4 x i32>) 1294 1295declare <vscale x 8 x i16> @llvm.aarch64.sve.uabdlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1296declare <vscale x 4 x i32> @llvm.aarch64.sve.uabdlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1297declare <vscale x 2 x i64> @llvm.aarch64.sve.uabdlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1298 1299declare <vscale x 8 x i16> @llvm.aarch64.sve.uabdlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1300declare <vscale x 4 x i32> @llvm.aarch64.sve.uabdlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1301declare <vscale x 2 x i64> @llvm.aarch64.sve.uabdlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1302 1303declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddlb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1304declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddlb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1305declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddlb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1306 1307declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddlt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1308declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddlt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1309declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddlt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1310 1311declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1312declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1313declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1314 1315declare <vscale x 8 x i16> @llvm.aarch64.sve.uaddwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1316declare <vscale x 4 x i32> @llvm.aarch64.sve.uaddwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1317declare <vscale x 2 x i64> @llvm.aarch64.sve.uaddwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1318 1319declare <vscale x 8 x i16> @llvm.aarch64.sve.umullb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1320declare <vscale x 4 x i32> @llvm.aarch64.sve.umullb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1321declare <vscale x 2 x i64> @llvm.aarch64.sve.umullb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1322 1323declare <vscale x 4 x i32> @llvm.aarch64.sve.umullb.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 1324declare <vscale x 2 x i64> @llvm.aarch64.sve.umullb.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1325 1326declare <vscale x 8 x i16> @llvm.aarch64.sve.umullt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1327declare <vscale x 4 x i32> @llvm.aarch64.sve.umullt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1328declare <vscale x 2 x i64> @llvm.aarch64.sve.umullt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1329 1330declare <vscale x 4 x i32> @llvm.aarch64.sve.umullt.lane.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 1331declare <vscale x 2 x i64> @llvm.aarch64.sve.umullt.lane.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 1332 1333declare <vscale x 8 x i16> @llvm.aarch64.sve.ushllb.nxv8i16(<vscale x 16 x i8>, i32) 1334declare <vscale x 4 x i32> @llvm.aarch64.sve.ushllb.nxv4i32(<vscale x 8 x i16>, i32) 1335declare <vscale x 2 x i64> @llvm.aarch64.sve.ushllb.nxv2i64(<vscale x 4 x i32>, i32) 1336 1337declare <vscale x 8 x i16> @llvm.aarch64.sve.ushllt.nxv8i16(<vscale x 16 x i8>, i32) 1338declare <vscale x 4 x i32> @llvm.aarch64.sve.ushllt.nxv4i32(<vscale x 8 x i16>, i32) 1339declare <vscale x 2 x i64> @llvm.aarch64.sve.ushllt.nxv2i64(<vscale x 4 x i32>, i32) 1340 1341declare <vscale x 8 x i16> @llvm.aarch64.sve.usublb.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1342declare <vscale x 4 x i32> @llvm.aarch64.sve.usublb.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1343declare <vscale x 2 x i64> @llvm.aarch64.sve.usublb.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1344 1345declare <vscale x 8 x i16> @llvm.aarch64.sve.usublt.nxv8i16(<vscale x 16 x i8>, <vscale x 16 x i8>) 1346declare <vscale x 4 x i32> @llvm.aarch64.sve.usublt.nxv4i32(<vscale x 8 x i16>, <vscale x 8 x i16>) 1347declare <vscale x 2 x i64> @llvm.aarch64.sve.usublt.nxv2i64(<vscale x 4 x i32>, <vscale x 4 x i32>) 1348 1349declare <vscale x 8 x i16> @llvm.aarch64.sve.usubwb.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1350declare <vscale x 4 x i32> @llvm.aarch64.sve.usubwb.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1351declare <vscale x 2 x i64> @llvm.aarch64.sve.usubwb.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1352 1353declare <vscale x 8 x i16> @llvm.aarch64.sve.usubwt.nxv8i16(<vscale x 8 x i16>, <vscale x 16 x i8>) 1354declare <vscale x 4 x i32> @llvm.aarch64.sve.usubwt.nxv4i32(<vscale x 4 x i32>, <vscale x 8 x i16>) 1355declare <vscale x 2 x i64> @llvm.aarch64.sve.usubwt.nxv2i64(<vscale x 2 x i64>, <vscale x 4 x i32>) 1356