1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 2 3; 4; ASR 5; 6 7define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 8; CHECK-LABEL: asr_i8: 9; CHECK: asr z0.b, p0/m, z0.b, z1.b 10; CHECK-NEXT: ret 11 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg, 12 <vscale x 16 x i8> %a, 13 <vscale x 16 x i8> %b) 14 ret <vscale x 16 x i8> %out 15} 16 17define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 18; CHECK-LABEL: asr_i16: 19; CHECK: asr z0.h, p0/m, z0.h, z1.h 20; CHECK-NEXT: ret 21 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg, 22 <vscale x 8 x i16> %a, 23 <vscale x 8 x i16> %b) 24 ret <vscale x 8 x i16> %out 25} 26 27define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 28; CHECK-LABEL: asr_i32: 29; CHECK: asr z0.s, p0/m, z0.s, z1.s 30; CHECK-NEXT: ret 31 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg, 32 <vscale x 4 x i32> %a, 33 <vscale x 4 x i32> %b) 34 ret <vscale x 4 x i32> %out 35} 36 37define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 38; CHECK-LABEL: asr_i64: 39; CHECK: asr z0.d, p0/m, z0.d, z1.d 40; CHECK-NEXT: ret 41 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg, 42 <vscale x 2 x i64> %a, 43 <vscale x 2 x i64> %b) 44 ret <vscale x 2 x i64> %out 45} 46 47define <vscale x 16 x i8> @asr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) { 48; CHECK-LABEL: asr_wide_i8: 49; CHECK: asr z0.b, p0/m, z0.b, z1.d 50; CHECK-NEXT: ret 51 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg, 52 <vscale x 16 x i8> %a, 53 <vscale x 2 x i64> %b) 54 ret <vscale x 16 x i8> %out 55} 56 57define <vscale x 8 x i16> @asr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) { 58; CHECK-LABEL: asr_wide_i16: 59; CHECK: asr z0.h, p0/m, z0.h, z1.d 60; CHECK-NEXT: ret 61 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg, 62 <vscale x 8 x i16> %a, 63 <vscale x 2 x i64> %b) 64 ret <vscale x 8 x i16> %out 65} 66 67define <vscale x 4 x i32> @asr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 68; CHECK-LABEL: asr_wide_i32: 69; CHECK: asr z0.s, p0/m, z0.s, z1.d 70; CHECK-NEXT: ret 71 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg, 72 <vscale x 4 x i32> %a, 73 <vscale x 2 x i64> %b) 74 ret <vscale x 4 x i32> %out 75} 76 77; 78; ASRD 79; 80 81define <vscale x 16 x i8> @asrd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) { 82; CHECK-LABEL: asrd_i8: 83; CHECK: asrd z0.b, p0/m, z0.b, #1 84; CHECK-NEXT: ret 85 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %pg, 86 <vscale x 16 x i8> %a, 87 i32 1) 88 ret <vscale x 16 x i8> %out 89} 90 91define <vscale x 8 x i16> @asrd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) { 92; CHECK-LABEL: asrd_i16: 93; CHECK: asrd z0.h, p0/m, z0.h, #2 94; CHECK-NEXT: ret 95 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> %pg, 96 <vscale x 8 x i16> %a, 97 i32 2) 98 ret <vscale x 8 x i16> %out 99} 100 101define <vscale x 4 x i32> @asrd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) { 102; CHECK-LABEL: asrd_i32: 103; CHECK: asrd z0.s, p0/m, z0.s, #31 104; CHECK-NEXT: ret 105 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> %pg, 106 <vscale x 4 x i32> %a, 107 i32 31) 108 ret <vscale x 4 x i32> %out 109} 110 111define <vscale x 2 x i64> @asrd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) { 112; CHECK-LABEL: asrd_i64: 113; CHECK: asrd z0.d, p0/m, z0.d, #64 114; CHECK-NEXT: ret 115 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> %pg, 116 <vscale x 2 x i64> %a, 117 i32 64) 118 ret <vscale x 2 x i64> %out 119} 120 121; 122; INSR 123; 124 125define <vscale x 16 x i8> @insr_i8(<vscale x 16 x i8> %a, i8 %b) { 126; CHECK-LABEL: insr_i8: 127; CHECK: insr z0.b, w0 128; CHECK-NEXT: ret 129 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8> %a, i8 %b) 130 ret <vscale x 16 x i8> %out 131} 132 133define <vscale x 8 x i16> @insr_i16(<vscale x 8 x i16> %a, i16 %b) { 134; CHECK-LABEL: insr_i16: 135; CHECK: insr z0.h, w0 136; CHECK-NEXT: ret 137 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> %a, i16 %b) 138 ret <vscale x 8 x i16> %out 139} 140 141define <vscale x 4 x i32> @insr_i32(<vscale x 4 x i32> %a, i32 %b) { 142; CHECK-LABEL: insr_i32: 143; CHECK: insr z0.s, w0 144; CHECK-NEXT: ret 145 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32> %a, i32 %b) 146 ret <vscale x 4 x i32> %out 147} 148 149define <vscale x 2 x i64> @insr_i64(<vscale x 2 x i64> %a, i64 %b) { 150; CHECK-LABEL: insr_i64: 151; CHECK: insr z0.d, x0 152; CHECK-NEXT: ret 153 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64> %a, i64 %b) 154 ret <vscale x 2 x i64> %out 155} 156 157define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) { 158; CHECK-LABEL: insr_f16: 159; CHECK: insr z0.h, h1 160; CHECK-NEXT: ret 161 %out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b) 162 ret <vscale x 8 x half> %out 163} 164 165define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) { 166; CHECK-LABEL: insr_f32: 167; CHECK: insr z0.s, s1 168; CHECK-NEXT: ret 169 %out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b) 170 ret <vscale x 4 x float> %out 171} 172 173define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) { 174; CHECK-LABEL: insr_f64: 175; CHECK: insr z0.d, d1 176; CHECK-NEXT: ret 177 %out = call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> %a, double %b) 178 ret <vscale x 2 x double> %out 179} 180 181; 182; LSL 183; 184 185define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 186; CHECK-LABEL: lsl_i8: 187; CHECK: lsl z0.b, p0/m, z0.b, z1.b 188; CHECK-NEXT: ret 189 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg, 190 <vscale x 16 x i8> %a, 191 <vscale x 16 x i8> %b) 192 ret <vscale x 16 x i8> %out 193} 194 195define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 196; CHECK-LABEL: lsl_i16: 197; CHECK: lsl z0.h, p0/m, z0.h, z1.h 198; CHECK-NEXT: ret 199 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg, 200 <vscale x 8 x i16> %a, 201 <vscale x 8 x i16> %b) 202 ret <vscale x 8 x i16> %out 203} 204 205define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 206; CHECK-LABEL: lsl_i32: 207; CHECK: lsl z0.s, p0/m, z0.s, z1.s 208; CHECK-NEXT: ret 209 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg, 210 <vscale x 4 x i32> %a, 211 <vscale x 4 x i32> %b) 212 ret <vscale x 4 x i32> %out 213} 214 215define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 216; CHECK-LABEL: lsl_i64: 217; CHECK: lsl z0.d, p0/m, z0.d, z1.d 218; CHECK-NEXT: ret 219 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg, 220 <vscale x 2 x i64> %a, 221 <vscale x 2 x i64> %b) 222 ret <vscale x 2 x i64> %out 223} 224 225define <vscale x 16 x i8> @lsl_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) { 226; CHECK-LABEL: lsl_wide_i8: 227; CHECK: lsl z0.b, p0/m, z0.b, z1.d 228; CHECK-NEXT: ret 229 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg, 230 <vscale x 16 x i8> %a, 231 <vscale x 2 x i64> %b) 232 ret <vscale x 16 x i8> %out 233} 234 235define <vscale x 8 x i16> @lsl_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) { 236; CHECK-LABEL: lsl_wide_i16: 237; CHECK: lsl z0.h, p0/m, z0.h, z1.d 238; CHECK-NEXT: ret 239 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg, 240 <vscale x 8 x i16> %a, 241 <vscale x 2 x i64> %b) 242 ret <vscale x 8 x i16> %out 243} 244 245define <vscale x 4 x i32> @lsl_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 246; CHECK-LABEL: lsl_wide_i32: 247; CHECK: lsl z0.s, p0/m, z0.s, z1.d 248; CHECK-NEXT: ret 249 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg, 250 <vscale x 4 x i32> %a, 251 <vscale x 2 x i64> %b) 252 ret <vscale x 4 x i32> %out 253} 254 255; 256; LSR 257; 258 259define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 260; CHECK-LABEL: lsr_i8: 261; CHECK: lsr z0.b, p0/m, z0.b, z1.b 262; CHECK-NEXT: ret 263 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg, 264 <vscale x 16 x i8> %a, 265 <vscale x 16 x i8> %b) 266 ret <vscale x 16 x i8> %out 267} 268 269define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 270; CHECK-LABEL: lsr_i16: 271; CHECK: lsr z0.h, p0/m, z0.h, z1.h 272; CHECK-NEXT: ret 273 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg, 274 <vscale x 8 x i16> %a, 275 <vscale x 8 x i16> %b) 276 ret <vscale x 8 x i16> %out 277} 278 279define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 280; CHECK-LABEL: lsr_i32: 281; CHECK: lsr z0.s, p0/m, z0.s, z1.s 282; CHECK-NEXT: ret 283 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg, 284 <vscale x 4 x i32> %a, 285 <vscale x 4 x i32> %b) 286 ret <vscale x 4 x i32> %out 287} 288 289define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 290; CHECK-LABEL: lsr_i64: 291; CHECK: lsr z0.d, p0/m, z0.d, z1.d 292; CHECK-NEXT: ret 293 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg, 294 <vscale x 2 x i64> %a, 295 <vscale x 2 x i64> %b) 296 ret <vscale x 2 x i64> %out 297} 298 299define <vscale x 16 x i8> @lsr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) { 300; CHECK-LABEL: lsr_wide_i8: 301; CHECK: lsr z0.b, p0/m, z0.b, z1.d 302; CHECK-NEXT: ret 303 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg, 304 <vscale x 16 x i8> %a, 305 <vscale x 2 x i64> %b) 306 ret <vscale x 16 x i8> %out 307} 308 309define <vscale x 8 x i16> @lsr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) { 310; CHECK-LABEL: lsr_wide_i16: 311; CHECK: lsr z0.h, p0/m, z0.h, z1.d 312; CHECK-NEXT: ret 313 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg, 314 <vscale x 8 x i16> %a, 315 <vscale x 2 x i64> %b) 316 ret <vscale x 8 x i16> %out 317} 318 319define <vscale x 4 x i32> @lsr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) { 320; CHECK-LABEL: lsr_wide_i32: 321; CHECK: lsr z0.s, p0/m, z0.s, z1.d 322; CHECK-NEXT: ret 323 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg, 324 <vscale x 4 x i32> %a, 325 <vscale x 2 x i64> %b) 326 ret <vscale x 4 x i32> %out 327} 328 329declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 330declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 331declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 332declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 333 334declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>) 335declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) 336declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>) 337 338declare <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32) 339declare <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32) 340declare <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32) 341declare <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32) 342 343declare <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8>, i8) 344declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i16) 345declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32) 346declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64) 347declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half) 348declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float) 349declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double) 350 351declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 352declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 353declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 354declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 355 356declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>) 357declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) 358declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>) 359 360declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) 361declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>) 362declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) 363declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>) 364 365declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>) 366declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>) 367declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>) 368