1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s 2 3; 4; EOR3 (vector, bitwise, unpredicated) 5; 6define <vscale x 16 x i8> @eor3_i8(<vscale x 16 x i8> %a, 7 <vscale x 16 x i8> %b, 8 <vscale x 16 x i8> %c) { 9; CHECK-LABEL: eor3_i8 10; CHECK: eor3 z0.d, z0.d, z1.d, z2.d 11; CHECK-NEXT: ret 12 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.eor3.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) 13 ret <vscale x 16 x i8> %res 14} 15 16define <vscale x 8 x i16> @eor3_i16(<vscale x 8 x i16> %a, 17 <vscale x 8 x i16> %b, 18 <vscale x 8 x i16> %c) { 19; CHECK-LABEL: eor3_i16 20; CHECK: eor3 z0.d, z0.d, z1.d, z2.d 21; CHECK-NEXT: ret 22 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.eor3.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) 23 ret <vscale x 8 x i16> %res 24} 25 26define <vscale x 4 x i32> @eor3_i32(<vscale x 4 x i32> %a, 27 <vscale x 4 x i32> %b, 28 <vscale x 4 x i32> %c) { 29; CHECK-LABEL: eor3_i32 30; CHECK: eor3 z0.d, z0.d, z1.d, z2.d 31; CHECK-NEXT: ret 32 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.eor3.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) 33 ret <vscale x 4 x i32> %res 34} 35 36define <vscale x 2 x i64> @eor3_i64(<vscale x 2 x i64> %a, 37 <vscale x 2 x i64> %b, 38 <vscale x 2 x i64> %c) { 39; CHECK-LABEL: eor3_i64 40; CHECK: eor3 z0.d, z0.d, z1.d, z2.d 41; CHECK-NEXT: ret 42 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.eor3.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) 43 ret <vscale x 2 x i64> %res 44} 45 46; 47; BCAX (vector, bitwise, unpredicated) 48; 49define <vscale x 16 x i8> @bcax_i8(<vscale x 16 x i8> %a, 50 <vscale x 16 x i8> %b, 51 <vscale x 16 x i8> %c) { 52; CHECK-LABEL: bcax_i8 53; CHECK: bcax z0.d, z0.d, z1.d, z2.d 54; CHECK-NEXT: ret 55 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bcax.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) 56 ret <vscale x 16 x i8> %res 57} 58 59define <vscale x 8 x i16> @bcax_i16(<vscale x 8 x i16> %a, 60 <vscale x 8 x i16> %b, 61 <vscale x 8 x i16> %c) { 62; CHECK-LABEL: bcax_i16 63; CHECK: bcax z0.d, z0.d, z1.d, z2.d 64; CHECK-NEXT: ret 65 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bcax.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) 66 ret <vscale x 8 x i16> %res 67} 68 69define <vscale x 4 x i32> @bcax_i32(<vscale x 4 x i32> %a, 70 <vscale x 4 x i32> %b, 71 <vscale x 4 x i32> %c) { 72; CHECK-LABEL: bcax_i32 73; CHECK: bcax z0.d, z0.d, z1.d, z2.d 74; CHECK-NEXT: ret 75 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bcax.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) 76 ret <vscale x 4 x i32> %res 77} 78 79define <vscale x 2 x i64> @bcax_i64(<vscale x 2 x i64> %a, 80 <vscale x 2 x i64> %b, 81 <vscale x 2 x i64> %c) { 82; CHECK-LABEL: bcax_i64 83; CHECK: bcax z0.d, z0.d, z1.d, z2.d 84; CHECK-NEXT: ret 85 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bcax.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) 86 ret <vscale x 2 x i64> %res 87} 88 89; 90; BSL (vector, bitwise, unpredicated) 91; 92define <vscale x 16 x i8> @bsl_i8(<vscale x 16 x i8> %a, 93 <vscale x 16 x i8> %b, 94 <vscale x 16 x i8> %c) { 95; CHECK-LABEL: bsl_i8 96; CHECK: bsl z0.d, z0.d, z1.d, z2.d 97; CHECK-NEXT: ret 98 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) 99 ret <vscale x 16 x i8> %res 100} 101 102define <vscale x 8 x i16> @bsl_i16(<vscale x 8 x i16> %a, 103 <vscale x 8 x i16> %b, 104 <vscale x 8 x i16> %c) { 105; CHECK-LABEL: bsl_i16 106; CHECK: bsl z0.d, z0.d, z1.d, z2.d 107; CHECK-NEXT: ret 108 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) 109 ret <vscale x 8 x i16> %res 110} 111 112define <vscale x 4 x i32> @bsl_i32(<vscale x 4 x i32> %a, 113 <vscale x 4 x i32> %b, 114 <vscale x 4 x i32> %c) { 115; CHECK-LABEL: bsl_i32 116; CHECK: bsl z0.d, z0.d, z1.d, z2.d 117; CHECK-NEXT: ret 118 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) 119 ret <vscale x 4 x i32> %res 120} 121 122define <vscale x 2 x i64> @bsl_i64(<vscale x 2 x i64> %a, 123 <vscale x 2 x i64> %b, 124 <vscale x 2 x i64> %c) { 125; CHECK-LABEL: bsl_i64 126; CHECK: bsl z0.d, z0.d, z1.d, z2.d 127; CHECK-NEXT: ret 128 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) 129 ret <vscale x 2 x i64> %res 130} 131 132; 133; BSL1N (vector, bitwise, unpredicated) 134; 135define <vscale x 16 x i8> @bsl1n_i8(<vscale x 16 x i8> %a, 136 <vscale x 16 x i8> %b, 137 <vscale x 16 x i8> %c) { 138; CHECK-LABEL: bsl1n_i8 139; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d 140; CHECK-NEXT: ret 141 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl1n.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) 142 ret <vscale x 16 x i8> %res 143} 144 145define <vscale x 8 x i16> @bsl1n_i16(<vscale x 8 x i16> %a, 146 <vscale x 8 x i16> %b, 147 <vscale x 8 x i16> %c) { 148; CHECK-LABEL: bsl1n_i16 149; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d 150; CHECK-NEXT: ret 151 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl1n.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) 152 ret <vscale x 8 x i16> %res 153} 154 155define <vscale x 4 x i32> @bsl1n_i32(<vscale x 4 x i32> %a, 156 <vscale x 4 x i32> %b, 157 <vscale x 4 x i32> %c) { 158; CHECK-LABEL: bsl1n_i32 159; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d 160; CHECK-NEXT: ret 161 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl1n.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) 162 ret <vscale x 4 x i32> %res 163} 164 165define <vscale x 2 x i64> @bsl1n_i64(<vscale x 2 x i64> %a, 166 <vscale x 2 x i64> %b, 167 <vscale x 2 x i64> %c) { 168; CHECK-LABEL: bsl1n_i64 169; CHECK: bsl1n z0.d, z0.d, z1.d, z2.d 170; CHECK-NEXT: ret 171 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl1n.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) 172 ret <vscale x 2 x i64> %res 173} 174 175; 176; BSL2N (vector, bitwise, unpredicated) 177; 178define <vscale x 16 x i8> @bsl2n_i8(<vscale x 16 x i8> %a, 179 <vscale x 16 x i8> %b, 180 <vscale x 16 x i8> %c) { 181; CHECK-LABEL: bsl2n_i8 182; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d 183; CHECK-NEXT: ret 184 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.bsl2n.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) 185 ret <vscale x 16 x i8> %res 186} 187 188define <vscale x 8 x i16> @bsl2n_i16(<vscale x 8 x i16> %a, 189 <vscale x 8 x i16> %b, 190 <vscale x 8 x i16> %c) { 191; CHECK-LABEL: bsl2n_i16 192; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d 193; CHECK-NEXT: ret 194 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.bsl2n.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) 195 ret <vscale x 8 x i16> %res 196} 197 198define <vscale x 4 x i32> @bsl2n_i32(<vscale x 4 x i32> %a, 199 <vscale x 4 x i32> %b, 200 <vscale x 4 x i32> %c) { 201; CHECK-LABEL: bsl2n_i32 202; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d 203; CHECK-NEXT: ret 204 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.bsl2n.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) 205 ret <vscale x 4 x i32> %res 206} 207 208define <vscale x 2 x i64> @bsl2n_i64(<vscale x 2 x i64> %a, 209 <vscale x 2 x i64> %b, 210 <vscale x 2 x i64> %c) { 211; CHECK-LABEL: bsl2n_i64 212; CHECK: bsl2n z0.d, z0.d, z1.d, z2.d 213; CHECK-NEXT: ret 214 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.bsl2n.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) 215 ret <vscale x 2 x i64> %res 216} 217 218; 219; NBSL (vector, bitwise, unpredicated) 220; 221define <vscale x 16 x i8> @nbsl_i8(<vscale x 16 x i8> %a, 222 <vscale x 16 x i8> %b, 223 <vscale x 16 x i8> %c) { 224; CHECK-LABEL: nbsl_i8 225; CHECK: nbsl z0.d, z0.d, z1.d, z2.d 226; CHECK-NEXT: ret 227 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.nbsl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) 228 ret <vscale x 16 x i8> %res 229} 230 231define <vscale x 8 x i16> @nbsl_i16(<vscale x 8 x i16> %a, 232 <vscale x 8 x i16> %b, 233 <vscale x 8 x i16> %c) { 234; CHECK-LABEL: nbsl_i16 235; CHECK: nbsl z0.d, z0.d, z1.d, z2.d 236; CHECK-NEXT: ret 237 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.nbsl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) 238 ret <vscale x 8 x i16> %res 239} 240 241define <vscale x 4 x i32> @nbsl_i32(<vscale x 4 x i32> %a, 242 <vscale x 4 x i32> %b, 243 <vscale x 4 x i32> %c) { 244; CHECK-LABEL: nbsl_i32 245; CHECK: nbsl z0.d, z0.d, z1.d, z2.d 246; CHECK-NEXT: ret 247 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.nbsl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) 248 ret <vscale x 4 x i32> %res 249} 250 251define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, 252 <vscale x 2 x i64> %b, 253 <vscale x 2 x i64> %c) { 254; CHECK-LABEL: nbsl_i64 255; CHECK: nbsl z0.d, z0.d, z1.d, z2.d 256; CHECK-NEXT: ret 257 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.nbsl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) 258 ret <vscale x 2 x i64> %res 259} 260 261; 262; XAR (vector, bitwise, unpredicated) 263; 264 265define <vscale x 16 x i8> @xar_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 266; CHECK-LABEL: xar_b: 267; CHECK: xar z0.b, z0.b, z1.b, #1 268; CHECK-NEXT: ret 269 %out = call <vscale x 16 x i8> @llvm.aarch64.sve.xar.nxv16i8(<vscale x 16 x i8> %a, 270 <vscale x 16 x i8> %b, 271 i32 1) 272 ret <vscale x 16 x i8> %out 273} 274 275define <vscale x 8 x i16> @xar_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 276; CHECK-LABEL: xar_h: 277; CHECK: xar z0.h, z0.h, z1.h, #2 278; CHECK-NEXT: ret 279 %out = call <vscale x 8 x i16> @llvm.aarch64.sve.xar.nxv8i16(<vscale x 8 x i16> %a, 280 <vscale x 8 x i16> %b, 281 i32 2) 282 ret <vscale x 8 x i16> %out 283} 284 285define <vscale x 4 x i32> @xar_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 286; CHECK-LABEL: xar_s: 287; CHECK: xar z0.s, z0.s, z1.s, #3 288; CHECK-NEXT: ret 289 %out = call <vscale x 4 x i32> @llvm.aarch64.sve.xar.nxv4i32(<vscale x 4 x i32> %a, 290 <vscale x 4 x i32> %b, 291 i32 3) 292 ret <vscale x 4 x i32> %out 293} 294 295define <vscale x 2 x i64> @xar_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 296; CHECK-LABEL: xar_d: 297; CHECK: xar z0.d, z0.d, z1.d, #4 298; CHECK-NEXT: ret 299 %out = call <vscale x 2 x i64> @llvm.aarch64.sve.xar.nxv2i64(<vscale x 2 x i64> %a, 300 <vscale x 2 x i64> %b, 301 i32 4) 302 ret <vscale x 2 x i64> %out 303} 304 305declare <vscale x 16 x i8> @llvm.aarch64.sve.eor3.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) 306declare <vscale x 8 x i16> @llvm.aarch64.sve.eor3.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) 307declare <vscale x 4 x i32> @llvm.aarch64.sve.eor3.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) 308declare <vscale x 2 x i64> @llvm.aarch64.sve.eor3.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) 309declare <vscale x 16 x i8> @llvm.aarch64.sve.bcax.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) 310declare <vscale x 8 x i16> @llvm.aarch64.sve.bcax.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) 311declare <vscale x 4 x i32> @llvm.aarch64.sve.bcax.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) 312declare <vscale x 2 x i64> @llvm.aarch64.sve.bcax.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) 313declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) 314declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) 315declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) 316declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) 317declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl1n.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) 318declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl1n.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) 319declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl1n.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) 320declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl1n.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) 321declare <vscale x 16 x i8> @llvm.aarch64.sve.bsl2n.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) 322declare <vscale x 8 x i16> @llvm.aarch64.sve.bsl2n.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) 323declare <vscale x 4 x i32> @llvm.aarch64.sve.bsl2n.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) 324declare <vscale x 2 x i64> @llvm.aarch64.sve.bsl2n.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) 325declare <vscale x 16 x i8> @llvm.aarch64.sve.nbsl.nxv16i8(<vscale x 16 x i8>,<vscale x 16 x i8>,<vscale x 16 x i8>) 326declare <vscale x 8 x i16> @llvm.aarch64.sve.nbsl.nxv8i16(<vscale x 8 x i16>,<vscale x 8 x i16>,<vscale x 8 x i16>) 327declare <vscale x 4 x i32> @llvm.aarch64.sve.nbsl.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) 328declare <vscale x 2 x i64> @llvm.aarch64.sve.nbsl.nxv2i64(<vscale x 2 x i64>,<vscale x 2 x i64>,<vscale x 2 x i64>) 329declare <vscale x 16 x i8> @llvm.aarch64.sve.xar.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32) 330declare <vscale x 8 x i16> @llvm.aarch64.sve.xar.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32) 331declare <vscale x 4 x i32> @llvm.aarch64.sve.xar.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32) 332declare <vscale x 2 x i64> @llvm.aarch64.sve.xar.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32) 333