1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s 3 4; https://bugs.llvm.org/show_bug.cgi?id=37104 5 6; All the advanced stuff (negative tests, commutativity) is handled in the 7; scalar version of the test only. 8 9; ============================================================================ ; 10; 8-bit vector width 11; ============================================================================ ; 12 13define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 14; CHECK-LABEL: out_v1i8: 15; CHECK: // %bb.0: 16; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 17; CHECK-NEXT: ret 18 %mx = and <1 x i8> %x, %mask 19 %notmask = xor <1 x i8> %mask, <i8 -1> 20 %my = and <1 x i8> %y, %notmask 21 %r = or <1 x i8> %mx, %my 22 ret <1 x i8> %r 23} 24 25; ============================================================================ ; 26; 16-bit vector width 27; ============================================================================ ; 28 29define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 30; CHECK-LABEL: out_v2i8: 31; CHECK: // %bb.0: 32; CHECK-NEXT: movi d3, #0x0000ff000000ff 33; CHECK-NEXT: and v0.8b, v0.8b, v2.8b 34; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b 35; CHECK-NEXT: and v1.8b, v1.8b, v2.8b 36; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 37; CHECK-NEXT: ret 38 %mx = and <2 x i8> %x, %mask 39 %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1> 40 %my = and <2 x i8> %y, %notmask 41 %r = or <2 x i8> %mx, %my 42 ret <2 x i8> %r 43} 44 45define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 46; CHECK-LABEL: out_v1i16: 47; CHECK: // %bb.0: 48; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 49; CHECK-NEXT: ret 50 %mx = and <1 x i16> %x, %mask 51 %notmask = xor <1 x i16> %mask, <i16 -1> 52 %my = and <1 x i16> %y, %notmask 53 %r = or <1 x i16> %mx, %my 54 ret <1 x i16> %r 55} 56 57; ============================================================================ ; 58; 32-bit vector width 59; ============================================================================ ; 60 61define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 62; CHECK-LABEL: out_v4i8: 63; CHECK: // %bb.0: 64; CHECK-NEXT: movi d3, #0xff00ff00ff00ff 65; CHECK-NEXT: and v0.8b, v0.8b, v2.8b 66; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b 67; CHECK-NEXT: and v1.8b, v1.8b, v2.8b 68; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 69; CHECK-NEXT: ret 70 %mx = and <4 x i8> %x, %mask 71 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1> 72 %my = and <4 x i8> %y, %notmask 73 %r = or <4 x i8> %mx, %my 74 ret <4 x i8> %r 75} 76 77define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 78; CHECK-LABEL: out_v4i8_undef: 79; CHECK: // %bb.0: 80; CHECK-NEXT: movi d3, #0xff00ff00ff00ff 81; CHECK-NEXT: and v0.8b, v0.8b, v2.8b 82; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b 83; CHECK-NEXT: and v1.8b, v1.8b, v2.8b 84; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 85; CHECK-NEXT: ret 86 %mx = and <4 x i8> %x, %mask 87 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1> 88 %my = and <4 x i8> %y, %notmask 89 %r = or <4 x i8> %mx, %my 90 ret <4 x i8> %r 91} 92 93define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 94; CHECK-LABEL: out_v2i16: 95; CHECK: // %bb.0: 96; CHECK-NEXT: movi d3, #0x00ffff0000ffff 97; CHECK-NEXT: and v0.8b, v0.8b, v2.8b 98; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b 99; CHECK-NEXT: and v1.8b, v1.8b, v2.8b 100; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 101; CHECK-NEXT: ret 102 %mx = and <2 x i16> %x, %mask 103 %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1> 104 %my = and <2 x i16> %y, %notmask 105 %r = or <2 x i16> %mx, %my 106 ret <2 x i16> %r 107} 108 109define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 110; CHECK-LABEL: out_v1i32: 111; CHECK: // %bb.0: 112; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 113; CHECK-NEXT: ret 114 %mx = and <1 x i32> %x, %mask 115 %notmask = xor <1 x i32> %mask, <i32 -1> 116 %my = and <1 x i32> %y, %notmask 117 %r = or <1 x i32> %mx, %my 118 ret <1 x i32> %r 119} 120 121; ============================================================================ ; 122; 64-bit vector width 123; ============================================================================ ; 124 125define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 126; CHECK-LABEL: out_v8i8: 127; CHECK: // %bb.0: 128; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 129; CHECK-NEXT: ret 130 %mx = and <8 x i8> %x, %mask 131 %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 132 %my = and <8 x i8> %y, %notmask 133 %r = or <8 x i8> %mx, %my 134 ret <8 x i8> %r 135} 136 137define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 138; CHECK-LABEL: out_v4i16: 139; CHECK: // %bb.0: 140; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 141; CHECK-NEXT: ret 142 %mx = and <4 x i16> %x, %mask 143 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1> 144 %my = and <4 x i16> %y, %notmask 145 %r = or <4 x i16> %mx, %my 146 ret <4 x i16> %r 147} 148 149define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 150; CHECK-LABEL: out_v4i16_undef: 151; CHECK: // %bb.0: 152; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 153; CHECK-NEXT: ret 154 %mx = and <4 x i16> %x, %mask 155 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1> 156 %my = and <4 x i16> %y, %notmask 157 %r = or <4 x i16> %mx, %my 158 ret <4 x i16> %r 159} 160 161define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 162; CHECK-LABEL: out_v2i32: 163; CHECK: // %bb.0: 164; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 165; CHECK-NEXT: ret 166 %mx = and <2 x i32> %x, %mask 167 %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1> 168 %my = and <2 x i32> %y, %notmask 169 %r = or <2 x i32> %mx, %my 170 ret <2 x i32> %r 171} 172 173define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 174; CHECK-LABEL: out_v1i64: 175; CHECK: // %bb.0: 176; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 177; CHECK-NEXT: ret 178 %mx = and <1 x i64> %x, %mask 179 %notmask = xor <1 x i64> %mask, <i64 -1> 180 %my = and <1 x i64> %y, %notmask 181 %r = or <1 x i64> %mx, %my 182 ret <1 x i64> %r 183} 184 185; ============================================================================ ; 186; 128-bit vector width 187; ============================================================================ ; 188 189define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 190; CHECK-LABEL: out_v16i8: 191; CHECK: // %bb.0: 192; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 193; CHECK-NEXT: ret 194 %mx = and <16 x i8> %x, %mask 195 %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 196 %my = and <16 x i8> %y, %notmask 197 %r = or <16 x i8> %mx, %my 198 ret <16 x i8> %r 199} 200 201define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 202; CHECK-LABEL: out_v8i16: 203; CHECK: // %bb.0: 204; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 205; CHECK-NEXT: ret 206 %mx = and <8 x i16> %x, %mask 207 %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 208 %my = and <8 x i16> %y, %notmask 209 %r = or <8 x i16> %mx, %my 210 ret <8 x i16> %r 211} 212 213define <4 x i32> @out_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { 214; CHECK-LABEL: out_v4i32: 215; CHECK: // %bb.0: 216; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 217; CHECK-NEXT: ret 218 %mx = and <4 x i32> %x, %mask 219 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 220 %my = and <4 x i32> %y, %notmask 221 %r = or <4 x i32> %mx, %my 222 ret <4 x i32> %r 223} 224 225define <4 x i32> @out_v4i32_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { 226; CHECK-LABEL: out_v4i32_undef: 227; CHECK: // %bb.0: 228; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 229; CHECK-NEXT: ret 230 %mx = and <4 x i32> %x, %mask 231 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1> 232 %my = and <4 x i32> %y, %notmask 233 %r = or <4 x i32> %mx, %my 234 ret <4 x i32> %r 235} 236 237define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 238; CHECK-LABEL: out_v2i64: 239; CHECK: // %bb.0: 240; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 241; CHECK-NEXT: ret 242 %mx = and <2 x i64> %x, %mask 243 %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1> 244 %my = and <2 x i64> %y, %notmask 245 %r = or <2 x i64> %mx, %my 246 ret <2 x i64> %r 247} 248 249;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 250; Should be the same as the previous one. 251;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 252 253; ============================================================================ ; 254; 8-bit vector width 255; ============================================================================ ; 256 257define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 258; CHECK-LABEL: in_v1i8: 259; CHECK: // %bb.0: 260; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 261; CHECK-NEXT: ret 262 %n0 = xor <1 x i8> %x, %y 263 %n1 = and <1 x i8> %n0, %mask 264 %r = xor <1 x i8> %n1, %y 265 ret <1 x i8> %r 266} 267 268; ============================================================================ ; 269; 16-bit vector width 270; ============================================================================ ; 271 272define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 273; CHECK-LABEL: in_v2i8: 274; CHECK: // %bb.0: 275; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 276; CHECK-NEXT: ret 277 %n0 = xor <2 x i8> %x, %y 278 %n1 = and <2 x i8> %n0, %mask 279 %r = xor <2 x i8> %n1, %y 280 ret <2 x i8> %r 281} 282 283define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 284; CHECK-LABEL: in_v1i16: 285; CHECK: // %bb.0: 286; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 287; CHECK-NEXT: ret 288 %n0 = xor <1 x i16> %x, %y 289 %n1 = and <1 x i16> %n0, %mask 290 %r = xor <1 x i16> %n1, %y 291 ret <1 x i16> %r 292} 293 294; ============================================================================ ; 295; 32-bit vector width 296; ============================================================================ ; 297 298define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 299; CHECK-LABEL: in_v4i8: 300; CHECK: // %bb.0: 301; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 302; CHECK-NEXT: ret 303 %n0 = xor <4 x i8> %x, %y 304 %n1 = and <4 x i8> %n0, %mask 305 %r = xor <4 x i8> %n1, %y 306 ret <4 x i8> %r 307} 308 309define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 310; CHECK-LABEL: in_v2i16: 311; CHECK: // %bb.0: 312; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 313; CHECK-NEXT: ret 314 %n0 = xor <2 x i16> %x, %y 315 %n1 = and <2 x i16> %n0, %mask 316 %r = xor <2 x i16> %n1, %y 317 ret <2 x i16> %r 318} 319 320define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 321; CHECK-LABEL: in_v1i32: 322; CHECK: // %bb.0: 323; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 324; CHECK-NEXT: ret 325 %n0 = xor <1 x i32> %x, %y 326 %n1 = and <1 x i32> %n0, %mask 327 %r = xor <1 x i32> %n1, %y 328 ret <1 x i32> %r 329} 330 331; ============================================================================ ; 332; 64-bit vector width 333; ============================================================================ ; 334 335define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 336; CHECK-LABEL: in_v8i8: 337; CHECK: // %bb.0: 338; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 339; CHECK-NEXT: ret 340 %n0 = xor <8 x i8> %x, %y 341 %n1 = and <8 x i8> %n0, %mask 342 %r = xor <8 x i8> %n1, %y 343 ret <8 x i8> %r 344} 345 346define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 347; CHECK-LABEL: in_v4i16: 348; CHECK: // %bb.0: 349; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 350; CHECK-NEXT: ret 351 %n0 = xor <4 x i16> %x, %y 352 %n1 = and <4 x i16> %n0, %mask 353 %r = xor <4 x i16> %n1, %y 354 ret <4 x i16> %r 355} 356 357define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 358; CHECK-LABEL: in_v2i32: 359; CHECK: // %bb.0: 360; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 361; CHECK-NEXT: ret 362 %n0 = xor <2 x i32> %x, %y 363 %n1 = and <2 x i32> %n0, %mask 364 %r = xor <2 x i32> %n1, %y 365 ret <2 x i32> %r 366} 367 368define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 369; CHECK-LABEL: in_v1i64: 370; CHECK: // %bb.0: 371; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 372; CHECK-NEXT: ret 373 %n0 = xor <1 x i64> %x, %y 374 %n1 = and <1 x i64> %n0, %mask 375 %r = xor <1 x i64> %n1, %y 376 ret <1 x i64> %r 377} 378 379; ============================================================================ ; 380; 128-bit vector width 381; ============================================================================ ; 382 383define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 384; CHECK-LABEL: in_v16i8: 385; CHECK: // %bb.0: 386; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 387; CHECK-NEXT: ret 388 %n0 = xor <16 x i8> %x, %y 389 %n1 = and <16 x i8> %n0, %mask 390 %r = xor <16 x i8> %n1, %y 391 ret <16 x i8> %r 392} 393 394define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 395; CHECK-LABEL: in_v8i16: 396; CHECK: // %bb.0: 397; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 398; CHECK-NEXT: ret 399 %n0 = xor <8 x i16> %x, %y 400 %n1 = and <8 x i16> %n0, %mask 401 %r = xor <8 x i16> %n1, %y 402 ret <8 x i16> %r 403} 404 405define <4 x i32> @in_v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) nounwind { 406; CHECK-LABEL: in_v4i32: 407; CHECK: // %bb.0: 408; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 409; CHECK-NEXT: ret 410 %n0 = xor <4 x i32> %x, %y 411 %n1 = and <4 x i32> %n0, %mask 412 %r = xor <4 x i32> %n1, %y 413 ret <4 x i32> %r 414} 415 416define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 417; CHECK-LABEL: in_v2i64: 418; CHECK: // %bb.0: 419; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 420; CHECK-NEXT: ret 421 %n0 = xor <2 x i64> %x, %y 422 %n1 = and <2 x i64> %n0, %mask 423 %r = xor <2 x i64> %n1, %y 424 ret <2 x i64> %r 425} 426