1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc i32 @and_v2i32(<2 x i32> %x) { 5; CHECK-LABEL: and_v2i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmov r0, s2 8; CHECK-NEXT: vmov r1, s0 9; CHECK-NEXT: ands r0, r1 10; CHECK-NEXT: bx lr 11entry: 12 %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x) 13 ret i32 %z 14} 15 16define arm_aapcs_vfpcc i32 @and_v4i32(<4 x i32> %x) { 17; CHECK-LABEL: and_v4i32: 18; CHECK: @ %bb.0: @ %entry 19; CHECK-NEXT: vmov r0, s3 20; CHECK-NEXT: vmov r1, s2 21; CHECK-NEXT: vmov r2, s0 22; CHECK-NEXT: ands r0, r1 23; CHECK-NEXT: vmov r1, s1 24; CHECK-NEXT: ands r1, r2 25; CHECK-NEXT: ands r0, r1 26; CHECK-NEXT: bx lr 27entry: 28 %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x) 29 ret i32 %z 30} 31 32define arm_aapcs_vfpcc i32 @and_v8i32(<8 x i32> %x) { 33; CHECK-LABEL: and_v8i32: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vand q0, q0, q1 36; CHECK-NEXT: vmov r0, s3 37; CHECK-NEXT: vmov r1, s2 38; CHECK-NEXT: vmov r2, s0 39; CHECK-NEXT: ands r0, r1 40; CHECK-NEXT: vmov r1, s1 41; CHECK-NEXT: ands r1, r2 42; CHECK-NEXT: ands r0, r1 43; CHECK-NEXT: bx lr 44entry: 45 %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x) 46 ret i32 %z 47} 48 49define arm_aapcs_vfpcc i16 @and_v4i16(<4 x i16> %x) { 50; CHECK-LABEL: and_v4i16: 51; CHECK: @ %bb.0: @ %entry 52; CHECK-NEXT: vmov r0, s3 53; CHECK-NEXT: vmov r1, s2 54; CHECK-NEXT: vmov r2, s0 55; CHECK-NEXT: ands r0, r1 56; CHECK-NEXT: vmov r1, s1 57; CHECK-NEXT: ands r1, r2 58; CHECK-NEXT: ands r0, r1 59; CHECK-NEXT: bx lr 60entry: 61 %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x) 62 ret i16 %z 63} 64 65define arm_aapcs_vfpcc i16 @and_v8i16(<8 x i16> %x) { 66; CHECK-LABEL: and_v8i16: 67; CHECK: @ %bb.0: @ %entry 68; CHECK-NEXT: vrev32.16 q1, q0 69; CHECK-NEXT: vand q0, q0, q1 70; CHECK-NEXT: vmov.u16 r0, q0[6] 71; CHECK-NEXT: vmov.u16 r1, q0[4] 72; CHECK-NEXT: ands r0, r1 73; CHECK-NEXT: vmov.u16 r1, q0[2] 74; CHECK-NEXT: vmov.u16 r2, q0[0] 75; CHECK-NEXT: ands r1, r2 76; CHECK-NEXT: ands r0, r1 77; CHECK-NEXT: bx lr 78entry: 79 %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x) 80 ret i16 %z 81} 82 83define arm_aapcs_vfpcc i16 @and_v16i16(<16 x i16> %x) { 84; CHECK-LABEL: and_v16i16: 85; CHECK: @ %bb.0: @ %entry 86; CHECK-NEXT: vand q0, q0, q1 87; CHECK-NEXT: vrev32.16 q1, q0 88; CHECK-NEXT: vand q0, q0, q1 89; CHECK-NEXT: vmov.u16 r0, q0[6] 90; CHECK-NEXT: vmov.u16 r1, q0[4] 91; CHECK-NEXT: ands r0, r1 92; CHECK-NEXT: vmov.u16 r1, q0[2] 93; CHECK-NEXT: vmov.u16 r2, q0[0] 94; CHECK-NEXT: ands r1, r2 95; CHECK-NEXT: ands r0, r1 96; CHECK-NEXT: bx lr 97entry: 98 %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x) 99 ret i16 %z 100} 101 102define arm_aapcs_vfpcc i8 @and_v8i8(<8 x i8> %x) { 103; CHECK-LABEL: and_v8i8: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vrev32.16 q1, q0 106; CHECK-NEXT: vand q0, q0, q1 107; CHECK-NEXT: vmov.u16 r0, q0[6] 108; CHECK-NEXT: vmov.u16 r1, q0[4] 109; CHECK-NEXT: ands r0, r1 110; CHECK-NEXT: vmov.u16 r1, q0[2] 111; CHECK-NEXT: vmov.u16 r2, q0[0] 112; CHECK-NEXT: ands r1, r2 113; CHECK-NEXT: ands r0, r1 114; CHECK-NEXT: bx lr 115entry: 116 %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x) 117 ret i8 %z 118} 119 120define arm_aapcs_vfpcc i8 @and_v16i8(<16 x i8> %x) { 121; CHECK-LABEL: and_v16i8: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vrev16.8 q1, q0 124; CHECK-NEXT: vand q0, q0, q1 125; CHECK-NEXT: vrev32.8 q1, q0 126; CHECK-NEXT: vand q0, q0, q1 127; CHECK-NEXT: vmov.u8 r0, q0[12] 128; CHECK-NEXT: vmov.u8 r1, q0[8] 129; CHECK-NEXT: ands r0, r1 130; CHECK-NEXT: vmov.u8 r1, q0[4] 131; CHECK-NEXT: vmov.u8 r2, q0[0] 132; CHECK-NEXT: ands r1, r2 133; CHECK-NEXT: ands r0, r1 134; CHECK-NEXT: bx lr 135entry: 136 %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x) 137 ret i8 %z 138} 139 140define arm_aapcs_vfpcc i8 @and_v32i8(<32 x i8> %x) { 141; CHECK-LABEL: and_v32i8: 142; CHECK: @ %bb.0: @ %entry 143; CHECK-NEXT: vand q0, q0, q1 144; CHECK-NEXT: vrev16.8 q1, q0 145; CHECK-NEXT: vand q0, q0, q1 146; CHECK-NEXT: vrev32.8 q1, q0 147; CHECK-NEXT: vand q0, q0, q1 148; CHECK-NEXT: vmov.u8 r0, q0[12] 149; CHECK-NEXT: vmov.u8 r1, q0[8] 150; CHECK-NEXT: ands r0, r1 151; CHECK-NEXT: vmov.u8 r1, q0[4] 152; CHECK-NEXT: vmov.u8 r2, q0[0] 153; CHECK-NEXT: ands r1, r2 154; CHECK-NEXT: ands r0, r1 155; CHECK-NEXT: bx lr 156entry: 157 %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x) 158 ret i8 %z 159} 160 161define arm_aapcs_vfpcc i64 @and_v1i64(<1 x i64> %x) { 162; CHECK-LABEL: and_v1i64: 163; CHECK: @ %bb.0: @ %entry 164; CHECK-NEXT: bx lr 165entry: 166 %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x) 167 ret i64 %z 168} 169 170define arm_aapcs_vfpcc i64 @and_v2i64(<2 x i64> %x) { 171; CHECK-LABEL: and_v2i64: 172; CHECK: @ %bb.0: @ %entry 173; CHECK-NEXT: vmov r0, s2 174; CHECK-NEXT: vmov r1, s0 175; CHECK-NEXT: vmov r2, s1 176; CHECK-NEXT: ands r0, r1 177; CHECK-NEXT: vmov r1, s3 178; CHECK-NEXT: ands r1, r2 179; CHECK-NEXT: bx lr 180entry: 181 %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x) 182 ret i64 %z 183} 184 185define arm_aapcs_vfpcc i64 @and_v4i64(<4 x i64> %x) { 186; CHECK-LABEL: and_v4i64: 187; CHECK: @ %bb.0: @ %entry 188; CHECK-NEXT: vand q0, q0, q1 189; CHECK-NEXT: vmov r0, s2 190; CHECK-NEXT: vmov r1, s0 191; CHECK-NEXT: vmov r2, s1 192; CHECK-NEXT: ands r0, r1 193; CHECK-NEXT: vmov r1, s3 194; CHECK-NEXT: ands r1, r2 195; CHECK-NEXT: bx lr 196entry: 197 %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x) 198 ret i64 %z 199} 200 201define arm_aapcs_vfpcc i32 @and_v2i32_acc(<2 x i32> %x, i32 %y) { 202; CHECK-LABEL: and_v2i32_acc: 203; CHECK: @ %bb.0: @ %entry 204; CHECK-NEXT: vmov r1, s2 205; CHECK-NEXT: vmov r2, s0 206; CHECK-NEXT: ands r1, r2 207; CHECK-NEXT: ands r0, r1 208; CHECK-NEXT: bx lr 209entry: 210 %z = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %x) 211 %r = and i32 %y, %z 212 ret i32 %r 213} 214 215define arm_aapcs_vfpcc i32 @and_v4i32_acc(<4 x i32> %x, i32 %y) { 216; CHECK-LABEL: and_v4i32_acc: 217; CHECK: @ %bb.0: @ %entry 218; CHECK-NEXT: vmov r1, s3 219; CHECK-NEXT: vmov r2, s2 220; CHECK-NEXT: vmov r3, s0 221; CHECK-NEXT: ands r1, r2 222; CHECK-NEXT: vmov r2, s1 223; CHECK-NEXT: ands r2, r3 224; CHECK-NEXT: ands r1, r2 225; CHECK-NEXT: ands r0, r1 226; CHECK-NEXT: bx lr 227entry: 228 %z = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %x) 229 %r = and i32 %y, %z 230 ret i32 %r 231} 232 233define arm_aapcs_vfpcc i32 @and_v8i32_acc(<8 x i32> %x, i32 %y) { 234; CHECK-LABEL: and_v8i32_acc: 235; CHECK: @ %bb.0: @ %entry 236; CHECK-NEXT: vand q0, q0, q1 237; CHECK-NEXT: vmov r1, s3 238; CHECK-NEXT: vmov r2, s2 239; CHECK-NEXT: vmov r3, s0 240; CHECK-NEXT: ands r1, r2 241; CHECK-NEXT: vmov r2, s1 242; CHECK-NEXT: ands r2, r3 243; CHECK-NEXT: ands r1, r2 244; CHECK-NEXT: ands r0, r1 245; CHECK-NEXT: bx lr 246entry: 247 %z = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %x) 248 %r = and i32 %y, %z 249 ret i32 %r 250} 251 252define arm_aapcs_vfpcc i16 @and_v4i16_acc(<4 x i16> %x, i16 %y) { 253; CHECK-LABEL: and_v4i16_acc: 254; CHECK: @ %bb.0: @ %entry 255; CHECK-NEXT: vmov r1, s3 256; CHECK-NEXT: vmov r2, s2 257; CHECK-NEXT: vmov r3, s0 258; CHECK-NEXT: ands r1, r2 259; CHECK-NEXT: vmov r2, s1 260; CHECK-NEXT: ands r2, r3 261; CHECK-NEXT: ands r1, r2 262; CHECK-NEXT: ands r0, r1 263; CHECK-NEXT: bx lr 264entry: 265 %z = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %x) 266 %r = and i16 %y, %z 267 ret i16 %r 268} 269 270define arm_aapcs_vfpcc i16 @and_v8i16_acc(<8 x i16> %x, i16 %y) { 271; CHECK-LABEL: and_v8i16_acc: 272; CHECK: @ %bb.0: @ %entry 273; CHECK-NEXT: vrev32.16 q1, q0 274; CHECK-NEXT: vand q0, q0, q1 275; CHECK-NEXT: vmov.u16 r1, q0[6] 276; CHECK-NEXT: vmov.u16 r2, q0[4] 277; CHECK-NEXT: ands r1, r2 278; CHECK-NEXT: vmov.u16 r2, q0[2] 279; CHECK-NEXT: vmov.u16 r3, q0[0] 280; CHECK-NEXT: ands r2, r3 281; CHECK-NEXT: ands r1, r2 282; CHECK-NEXT: ands r0, r1 283; CHECK-NEXT: bx lr 284entry: 285 %z = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %x) 286 %r = and i16 %y, %z 287 ret i16 %r 288} 289 290define arm_aapcs_vfpcc i16 @and_v16i16_acc(<16 x i16> %x, i16 %y) { 291; CHECK-LABEL: and_v16i16_acc: 292; CHECK: @ %bb.0: @ %entry 293; CHECK-NEXT: vand q0, q0, q1 294; CHECK-NEXT: vrev32.16 q1, q0 295; CHECK-NEXT: vand q0, q0, q1 296; CHECK-NEXT: vmov.u16 r1, q0[6] 297; CHECK-NEXT: vmov.u16 r2, q0[4] 298; CHECK-NEXT: ands r1, r2 299; CHECK-NEXT: vmov.u16 r2, q0[2] 300; CHECK-NEXT: vmov.u16 r3, q0[0] 301; CHECK-NEXT: ands r2, r3 302; CHECK-NEXT: ands r1, r2 303; CHECK-NEXT: ands r0, r1 304; CHECK-NEXT: bx lr 305entry: 306 %z = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %x) 307 %r = and i16 %y, %z 308 ret i16 %r 309} 310 311define arm_aapcs_vfpcc i8 @and_v8i8_acc(<8 x i8> %x, i8 %y) { 312; CHECK-LABEL: and_v8i8_acc: 313; CHECK: @ %bb.0: @ %entry 314; CHECK-NEXT: vrev32.16 q1, q0 315; CHECK-NEXT: vand q0, q0, q1 316; CHECK-NEXT: vmov.u16 r1, q0[6] 317; CHECK-NEXT: vmov.u16 r2, q0[4] 318; CHECK-NEXT: ands r1, r2 319; CHECK-NEXT: vmov.u16 r2, q0[2] 320; CHECK-NEXT: vmov.u16 r3, q0[0] 321; CHECK-NEXT: ands r2, r3 322; CHECK-NEXT: ands r1, r2 323; CHECK-NEXT: ands r0, r1 324; CHECK-NEXT: bx lr 325entry: 326 %z = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %x) 327 %r = and i8 %y, %z 328 ret i8 %r 329} 330 331define arm_aapcs_vfpcc i8 @and_v16i8_acc(<16 x i8> %x, i8 %y) { 332; CHECK-LABEL: and_v16i8_acc: 333; CHECK: @ %bb.0: @ %entry 334; CHECK-NEXT: vrev16.8 q1, q0 335; CHECK-NEXT: vand q0, q0, q1 336; CHECK-NEXT: vrev32.8 q1, q0 337; CHECK-NEXT: vand q0, q0, q1 338; CHECK-NEXT: vmov.u8 r1, q0[12] 339; CHECK-NEXT: vmov.u8 r2, q0[8] 340; CHECK-NEXT: ands r1, r2 341; CHECK-NEXT: vmov.u8 r2, q0[4] 342; CHECK-NEXT: vmov.u8 r3, q0[0] 343; CHECK-NEXT: ands r2, r3 344; CHECK-NEXT: ands r1, r2 345; CHECK-NEXT: ands r0, r1 346; CHECK-NEXT: bx lr 347entry: 348 %z = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %x) 349 %r = and i8 %y, %z 350 ret i8 %r 351} 352 353define arm_aapcs_vfpcc i8 @and_v32i8_acc(<32 x i8> %x, i8 %y) { 354; CHECK-LABEL: and_v32i8_acc: 355; CHECK: @ %bb.0: @ %entry 356; CHECK-NEXT: vand q0, q0, q1 357; CHECK-NEXT: vrev16.8 q1, q0 358; CHECK-NEXT: vand q0, q0, q1 359; CHECK-NEXT: vrev32.8 q1, q0 360; CHECK-NEXT: vand q0, q0, q1 361; CHECK-NEXT: vmov.u8 r1, q0[12] 362; CHECK-NEXT: vmov.u8 r2, q0[8] 363; CHECK-NEXT: ands r1, r2 364; CHECK-NEXT: vmov.u8 r2, q0[4] 365; CHECK-NEXT: vmov.u8 r3, q0[0] 366; CHECK-NEXT: ands r2, r3 367; CHECK-NEXT: ands r1, r2 368; CHECK-NEXT: ands r0, r1 369; CHECK-NEXT: bx lr 370entry: 371 %z = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %x) 372 %r = and i8 %y, %z 373 ret i8 %r 374} 375 376define arm_aapcs_vfpcc i64 @and_v1i64_acc(<1 x i64> %x, i64 %y) { 377; CHECK-LABEL: and_v1i64_acc: 378; CHECK: @ %bb.0: @ %entry 379; CHECK-NEXT: ands r0, r2 380; CHECK-NEXT: ands r1, r3 381; CHECK-NEXT: bx lr 382entry: 383 %z = call i64 @llvm.vector.reduce.and.v1i64(<1 x i64> %x) 384 %r = and i64 %y, %z 385 ret i64 %r 386} 387 388define arm_aapcs_vfpcc i64 @and_v2i64_acc(<2 x i64> %x, i64 %y) { 389; CHECK-LABEL: and_v2i64_acc: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vmov r2, s2 392; CHECK-NEXT: vmov r3, s0 393; CHECK-NEXT: ands r2, r3 394; CHECK-NEXT: vmov r3, s1 395; CHECK-NEXT: ands r0, r2 396; CHECK-NEXT: vmov r2, s3 397; CHECK-NEXT: ands r2, r3 398; CHECK-NEXT: ands r1, r2 399; CHECK-NEXT: bx lr 400entry: 401 %z = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %x) 402 %r = and i64 %y, %z 403 ret i64 %r 404} 405 406define arm_aapcs_vfpcc i64 @and_v4i64_acc(<4 x i64> %x, i64 %y) { 407; CHECK-LABEL: and_v4i64_acc: 408; CHECK: @ %bb.0: @ %entry 409; CHECK-NEXT: vand q0, q0, q1 410; CHECK-NEXT: vmov r2, s2 411; CHECK-NEXT: vmov r3, s0 412; CHECK-NEXT: ands r2, r3 413; CHECK-NEXT: vmov r3, s1 414; CHECK-NEXT: ands r0, r2 415; CHECK-NEXT: vmov r2, s3 416; CHECK-NEXT: ands r2, r3 417; CHECK-NEXT: ands r1, r2 418; CHECK-NEXT: bx lr 419entry: 420 %z = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %x) 421 %r = and i64 %y, %z 422 ret i64 %r 423} 424 425define arm_aapcs_vfpcc i32 @or_v2i32(<2 x i32> %x) { 426; CHECK-LABEL: or_v2i32: 427; CHECK: @ %bb.0: @ %entry 428; CHECK-NEXT: vmov r0, s2 429; CHECK-NEXT: vmov r1, s0 430; CHECK-NEXT: orrs r0, r1 431; CHECK-NEXT: bx lr 432entry: 433 %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x) 434 ret i32 %z 435} 436 437define arm_aapcs_vfpcc i32 @or_v4i32(<4 x i32> %x) { 438; CHECK-LABEL: or_v4i32: 439; CHECK: @ %bb.0: @ %entry 440; CHECK-NEXT: vmov r0, s3 441; CHECK-NEXT: vmov r1, s2 442; CHECK-NEXT: vmov r2, s0 443; CHECK-NEXT: orrs r0, r1 444; CHECK-NEXT: vmov r1, s1 445; CHECK-NEXT: orrs r1, r2 446; CHECK-NEXT: orrs r0, r1 447; CHECK-NEXT: bx lr 448entry: 449 %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x) 450 ret i32 %z 451} 452 453define arm_aapcs_vfpcc i32 @or_v8i32(<8 x i32> %x) { 454; CHECK-LABEL: or_v8i32: 455; CHECK: @ %bb.0: @ %entry 456; CHECK-NEXT: vorr q0, q0, q1 457; CHECK-NEXT: vmov r0, s3 458; CHECK-NEXT: vmov r1, s2 459; CHECK-NEXT: vmov r2, s0 460; CHECK-NEXT: orrs r0, r1 461; CHECK-NEXT: vmov r1, s1 462; CHECK-NEXT: orrs r1, r2 463; CHECK-NEXT: orrs r0, r1 464; CHECK-NEXT: bx lr 465entry: 466 %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x) 467 ret i32 %z 468} 469 470define arm_aapcs_vfpcc i16 @or_v4i16(<4 x i16> %x) { 471; CHECK-LABEL: or_v4i16: 472; CHECK: @ %bb.0: @ %entry 473; CHECK-NEXT: vmov r0, s3 474; CHECK-NEXT: vmov r1, s2 475; CHECK-NEXT: vmov r2, s0 476; CHECK-NEXT: orrs r0, r1 477; CHECK-NEXT: vmov r1, s1 478; CHECK-NEXT: orrs r1, r2 479; CHECK-NEXT: orrs r0, r1 480; CHECK-NEXT: bx lr 481entry: 482 %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x) 483 ret i16 %z 484} 485 486define arm_aapcs_vfpcc i16 @or_v8i16(<8 x i16> %x) { 487; CHECK-LABEL: or_v8i16: 488; CHECK: @ %bb.0: @ %entry 489; CHECK-NEXT: vrev32.16 q1, q0 490; CHECK-NEXT: vorr q0, q0, q1 491; CHECK-NEXT: vmov.u16 r0, q0[6] 492; CHECK-NEXT: vmov.u16 r1, q0[4] 493; CHECK-NEXT: orrs r0, r1 494; CHECK-NEXT: vmov.u16 r1, q0[2] 495; CHECK-NEXT: vmov.u16 r2, q0[0] 496; CHECK-NEXT: orrs r1, r2 497; CHECK-NEXT: orrs r0, r1 498; CHECK-NEXT: bx lr 499entry: 500 %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x) 501 ret i16 %z 502} 503 504define arm_aapcs_vfpcc i16 @or_v16i16(<16 x i16> %x) { 505; CHECK-LABEL: or_v16i16: 506; CHECK: @ %bb.0: @ %entry 507; CHECK-NEXT: vorr q0, q0, q1 508; CHECK-NEXT: vrev32.16 q1, q0 509; CHECK-NEXT: vorr q0, q0, q1 510; CHECK-NEXT: vmov.u16 r0, q0[6] 511; CHECK-NEXT: vmov.u16 r1, q0[4] 512; CHECK-NEXT: orrs r0, r1 513; CHECK-NEXT: vmov.u16 r1, q0[2] 514; CHECK-NEXT: vmov.u16 r2, q0[0] 515; CHECK-NEXT: orrs r1, r2 516; CHECK-NEXT: orrs r0, r1 517; CHECK-NEXT: bx lr 518entry: 519 %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x) 520 ret i16 %z 521} 522 523define arm_aapcs_vfpcc i8 @or_v8i8(<8 x i8> %x) { 524; CHECK-LABEL: or_v8i8: 525; CHECK: @ %bb.0: @ %entry 526; CHECK-NEXT: vrev32.16 q1, q0 527; CHECK-NEXT: vorr q0, q0, q1 528; CHECK-NEXT: vmov.u16 r0, q0[6] 529; CHECK-NEXT: vmov.u16 r1, q0[4] 530; CHECK-NEXT: orrs r0, r1 531; CHECK-NEXT: vmov.u16 r1, q0[2] 532; CHECK-NEXT: vmov.u16 r2, q0[0] 533; CHECK-NEXT: orrs r1, r2 534; CHECK-NEXT: orrs r0, r1 535; CHECK-NEXT: bx lr 536entry: 537 %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x) 538 ret i8 %z 539} 540 541define arm_aapcs_vfpcc i8 @or_v16i8(<16 x i8> %x) { 542; CHECK-LABEL: or_v16i8: 543; CHECK: @ %bb.0: @ %entry 544; CHECK-NEXT: vrev16.8 q1, q0 545; CHECK-NEXT: vorr q0, q0, q1 546; CHECK-NEXT: vrev32.8 q1, q0 547; CHECK-NEXT: vorr q0, q0, q1 548; CHECK-NEXT: vmov.u8 r0, q0[12] 549; CHECK-NEXT: vmov.u8 r1, q0[8] 550; CHECK-NEXT: orrs r0, r1 551; CHECK-NEXT: vmov.u8 r1, q0[4] 552; CHECK-NEXT: vmov.u8 r2, q0[0] 553; CHECK-NEXT: orrs r1, r2 554; CHECK-NEXT: orrs r0, r1 555; CHECK-NEXT: bx lr 556entry: 557 %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x) 558 ret i8 %z 559} 560 561define arm_aapcs_vfpcc i8 @or_v32i8(<32 x i8> %x) { 562; CHECK-LABEL: or_v32i8: 563; CHECK: @ %bb.0: @ %entry 564; CHECK-NEXT: vorr q0, q0, q1 565; CHECK-NEXT: vrev16.8 q1, q0 566; CHECK-NEXT: vorr q0, q0, q1 567; CHECK-NEXT: vrev32.8 q1, q0 568; CHECK-NEXT: vorr q0, q0, q1 569; CHECK-NEXT: vmov.u8 r0, q0[12] 570; CHECK-NEXT: vmov.u8 r1, q0[8] 571; CHECK-NEXT: orrs r0, r1 572; CHECK-NEXT: vmov.u8 r1, q0[4] 573; CHECK-NEXT: vmov.u8 r2, q0[0] 574; CHECK-NEXT: orrs r1, r2 575; CHECK-NEXT: orrs r0, r1 576; CHECK-NEXT: bx lr 577entry: 578 %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x) 579 ret i8 %z 580} 581 582define arm_aapcs_vfpcc i64 @or_v1i64(<1 x i64> %x) { 583; CHECK-LABEL: or_v1i64: 584; CHECK: @ %bb.0: @ %entry 585; CHECK-NEXT: bx lr 586entry: 587 %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x) 588 ret i64 %z 589} 590 591define arm_aapcs_vfpcc i64 @or_v2i64(<2 x i64> %x) { 592; CHECK-LABEL: or_v2i64: 593; CHECK: @ %bb.0: @ %entry 594; CHECK-NEXT: vmov r0, s2 595; CHECK-NEXT: vmov r1, s0 596; CHECK-NEXT: vmov r2, s1 597; CHECK-NEXT: orrs r0, r1 598; CHECK-NEXT: vmov r1, s3 599; CHECK-NEXT: orrs r1, r2 600; CHECK-NEXT: bx lr 601entry: 602 %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x) 603 ret i64 %z 604} 605 606define arm_aapcs_vfpcc i64 @or_v4i64(<4 x i64> %x) { 607; CHECK-LABEL: or_v4i64: 608; CHECK: @ %bb.0: @ %entry 609; CHECK-NEXT: vorr q0, q0, q1 610; CHECK-NEXT: vmov r0, s2 611; CHECK-NEXT: vmov r1, s0 612; CHECK-NEXT: vmov r2, s1 613; CHECK-NEXT: orrs r0, r1 614; CHECK-NEXT: vmov r1, s3 615; CHECK-NEXT: orrs r1, r2 616; CHECK-NEXT: bx lr 617entry: 618 %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x) 619 ret i64 %z 620} 621 622define arm_aapcs_vfpcc i32 @or_v2i32_acc(<2 x i32> %x, i32 %y) { 623; CHECK-LABEL: or_v2i32_acc: 624; CHECK: @ %bb.0: @ %entry 625; CHECK-NEXT: vmov r1, s2 626; CHECK-NEXT: vmov r2, s0 627; CHECK-NEXT: orrs r1, r2 628; CHECK-NEXT: orrs r0, r1 629; CHECK-NEXT: bx lr 630entry: 631 %z = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> %x) 632 %r = or i32 %y, %z 633 ret i32 %r 634} 635 636define arm_aapcs_vfpcc i32 @or_v4i32_acc(<4 x i32> %x, i32 %y) { 637; CHECK-LABEL: or_v4i32_acc: 638; CHECK: @ %bb.0: @ %entry 639; CHECK-NEXT: vmov r1, s3 640; CHECK-NEXT: vmov r2, s2 641; CHECK-NEXT: vmov r3, s0 642; CHECK-NEXT: orrs r1, r2 643; CHECK-NEXT: vmov r2, s1 644; CHECK-NEXT: orrs r2, r3 645; CHECK-NEXT: orrs r1, r2 646; CHECK-NEXT: orrs r0, r1 647; CHECK-NEXT: bx lr 648entry: 649 %z = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %x) 650 %r = or i32 %y, %z 651 ret i32 %r 652} 653 654define arm_aapcs_vfpcc i32 @or_v8i32_acc(<8 x i32> %x, i32 %y) { 655; CHECK-LABEL: or_v8i32_acc: 656; CHECK: @ %bb.0: @ %entry 657; CHECK-NEXT: vorr q0, q0, q1 658; CHECK-NEXT: vmov r1, s3 659; CHECK-NEXT: vmov r2, s2 660; CHECK-NEXT: vmov r3, s0 661; CHECK-NEXT: orrs r1, r2 662; CHECK-NEXT: vmov r2, s1 663; CHECK-NEXT: orrs r2, r3 664; CHECK-NEXT: orrs r1, r2 665; CHECK-NEXT: orrs r0, r1 666; CHECK-NEXT: bx lr 667entry: 668 %z = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> %x) 669 %r = or i32 %y, %z 670 ret i32 %r 671} 672 673define arm_aapcs_vfpcc i16 @or_v4i16_acc(<4 x i16> %x, i16 %y) { 674; CHECK-LABEL: or_v4i16_acc: 675; CHECK: @ %bb.0: @ %entry 676; CHECK-NEXT: vmov r1, s3 677; CHECK-NEXT: vmov r2, s2 678; CHECK-NEXT: vmov r3, s0 679; CHECK-NEXT: orrs r1, r2 680; CHECK-NEXT: vmov r2, s1 681; CHECK-NEXT: orrs r2, r3 682; CHECK-NEXT: orrs r1, r2 683; CHECK-NEXT: orrs r0, r1 684; CHECK-NEXT: bx lr 685entry: 686 %z = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> %x) 687 %r = or i16 %y, %z 688 ret i16 %r 689} 690 691define arm_aapcs_vfpcc i16 @or_v8i16_acc(<8 x i16> %x, i16 %y) { 692; CHECK-LABEL: or_v8i16_acc: 693; CHECK: @ %bb.0: @ %entry 694; CHECK-NEXT: vrev32.16 q1, q0 695; CHECK-NEXT: vorr q0, q0, q1 696; CHECK-NEXT: vmov.u16 r1, q0[6] 697; CHECK-NEXT: vmov.u16 r2, q0[4] 698; CHECK-NEXT: orrs r1, r2 699; CHECK-NEXT: vmov.u16 r2, q0[2] 700; CHECK-NEXT: vmov.u16 r3, q0[0] 701; CHECK-NEXT: orrs r2, r3 702; CHECK-NEXT: orrs r1, r2 703; CHECK-NEXT: orrs r0, r1 704; CHECK-NEXT: bx lr 705entry: 706 %z = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> %x) 707 %r = or i16 %y, %z 708 ret i16 %r 709} 710 711define arm_aapcs_vfpcc i16 @or_v16i16_acc(<16 x i16> %x, i16 %y) { 712; CHECK-LABEL: or_v16i16_acc: 713; CHECK: @ %bb.0: @ %entry 714; CHECK-NEXT: vorr q0, q0, q1 715; CHECK-NEXT: vrev32.16 q1, q0 716; CHECK-NEXT: vorr q0, q0, q1 717; CHECK-NEXT: vmov.u16 r1, q0[6] 718; CHECK-NEXT: vmov.u16 r2, q0[4] 719; CHECK-NEXT: orrs r1, r2 720; CHECK-NEXT: vmov.u16 r2, q0[2] 721; CHECK-NEXT: vmov.u16 r3, q0[0] 722; CHECK-NEXT: orrs r2, r3 723; CHECK-NEXT: orrs r1, r2 724; CHECK-NEXT: orrs r0, r1 725; CHECK-NEXT: bx lr 726entry: 727 %z = call i16 @llvm.vector.reduce.or.v16i16(<16 x i16> %x) 728 %r = or i16 %y, %z 729 ret i16 %r 730} 731 732define arm_aapcs_vfpcc i8 @or_v8i8_acc(<8 x i8> %x, i8 %y) { 733; CHECK-LABEL: or_v8i8_acc: 734; CHECK: @ %bb.0: @ %entry 735; CHECK-NEXT: vrev32.16 q1, q0 736; CHECK-NEXT: vorr q0, q0, q1 737; CHECK-NEXT: vmov.u16 r1, q0[6] 738; CHECK-NEXT: vmov.u16 r2, q0[4] 739; CHECK-NEXT: orrs r1, r2 740; CHECK-NEXT: vmov.u16 r2, q0[2] 741; CHECK-NEXT: vmov.u16 r3, q0[0] 742; CHECK-NEXT: orrs r2, r3 743; CHECK-NEXT: orrs r1, r2 744; CHECK-NEXT: orrs r0, r1 745; CHECK-NEXT: bx lr 746entry: 747 %z = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %x) 748 %r = or i8 %y, %z 749 ret i8 %r 750} 751 752define arm_aapcs_vfpcc i8 @or_v16i8_acc(<16 x i8> %x, i8 %y) { 753; CHECK-LABEL: or_v16i8_acc: 754; CHECK: @ %bb.0: @ %entry 755; CHECK-NEXT: vrev16.8 q1, q0 756; CHECK-NEXT: vorr q0, q0, q1 757; CHECK-NEXT: vrev32.8 q1, q0 758; CHECK-NEXT: vorr q0, q0, q1 759; CHECK-NEXT: vmov.u8 r1, q0[12] 760; CHECK-NEXT: vmov.u8 r2, q0[8] 761; CHECK-NEXT: orrs r1, r2 762; CHECK-NEXT: vmov.u8 r2, q0[4] 763; CHECK-NEXT: vmov.u8 r3, q0[0] 764; CHECK-NEXT: orrs r2, r3 765; CHECK-NEXT: orrs r1, r2 766; CHECK-NEXT: orrs r0, r1 767; CHECK-NEXT: bx lr 768entry: 769 %z = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %x) 770 %r = or i8 %y, %z 771 ret i8 %r 772} 773 774define arm_aapcs_vfpcc i8 @or_v32i8_acc(<32 x i8> %x, i8 %y) { 775; CHECK-LABEL: or_v32i8_acc: 776; CHECK: @ %bb.0: @ %entry 777; CHECK-NEXT: vorr q0, q0, q1 778; CHECK-NEXT: vrev16.8 q1, q0 779; CHECK-NEXT: vorr q0, q0, q1 780; CHECK-NEXT: vrev32.8 q1, q0 781; CHECK-NEXT: vorr q0, q0, q1 782; CHECK-NEXT: vmov.u8 r1, q0[12] 783; CHECK-NEXT: vmov.u8 r2, q0[8] 784; CHECK-NEXT: orrs r1, r2 785; CHECK-NEXT: vmov.u8 r2, q0[4] 786; CHECK-NEXT: vmov.u8 r3, q0[0] 787; CHECK-NEXT: orrs r2, r3 788; CHECK-NEXT: orrs r1, r2 789; CHECK-NEXT: orrs r0, r1 790; CHECK-NEXT: bx lr 791entry: 792 %z = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %x) 793 %r = or i8 %y, %z 794 ret i8 %r 795} 796 797define arm_aapcs_vfpcc i64 @or_v1i64_acc(<1 x i64> %x, i64 %y) { 798; CHECK-LABEL: or_v1i64_acc: 799; CHECK: @ %bb.0: @ %entry 800; CHECK-NEXT: orrs r0, r2 801; CHECK-NEXT: orrs r1, r3 802; CHECK-NEXT: bx lr 803entry: 804 %z = call i64 @llvm.vector.reduce.or.v1i64(<1 x i64> %x) 805 %r = or i64 %y, %z 806 ret i64 %r 807} 808 809define arm_aapcs_vfpcc i64 @or_v2i64_acc(<2 x i64> %x, i64 %y) { 810; CHECK-LABEL: or_v2i64_acc: 811; CHECK: @ %bb.0: @ %entry 812; CHECK-NEXT: vmov r2, s2 813; CHECK-NEXT: vmov r3, s0 814; CHECK-NEXT: orrs r2, r3 815; CHECK-NEXT: vmov r3, s1 816; CHECK-NEXT: orrs r0, r2 817; CHECK-NEXT: vmov r2, s3 818; CHECK-NEXT: orrs r2, r3 819; CHECK-NEXT: orrs r1, r2 820; CHECK-NEXT: bx lr 821entry: 822 %z = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> %x) 823 %r = or i64 %y, %z 824 ret i64 %r 825} 826 827define arm_aapcs_vfpcc i64 @or_v4i64_acc(<4 x i64> %x, i64 %y) { 828; CHECK-LABEL: or_v4i64_acc: 829; CHECK: @ %bb.0: @ %entry 830; CHECK-NEXT: vorr q0, q0, q1 831; CHECK-NEXT: vmov r2, s2 832; CHECK-NEXT: vmov r3, s0 833; CHECK-NEXT: orrs r2, r3 834; CHECK-NEXT: vmov r3, s1 835; CHECK-NEXT: orrs r0, r2 836; CHECK-NEXT: vmov r2, s3 837; CHECK-NEXT: orrs r2, r3 838; CHECK-NEXT: orrs r1, r2 839; CHECK-NEXT: bx lr 840entry: 841 %z = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %x) 842 %r = or i64 %y, %z 843 ret i64 %r 844} 845 846define arm_aapcs_vfpcc i32 @xor_v2i32(<2 x i32> %x) { 847; CHECK-LABEL: xor_v2i32: 848; CHECK: @ %bb.0: @ %entry 849; CHECK-NEXT: vmov r0, s2 850; CHECK-NEXT: vmov r1, s0 851; CHECK-NEXT: eors r0, r1 852; CHECK-NEXT: bx lr 853entry: 854 %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x) 855 ret i32 %z 856} 857 858define arm_aapcs_vfpcc i32 @xor_v4i32(<4 x i32> %x) { 859; CHECK-LABEL: xor_v4i32: 860; CHECK: @ %bb.0: @ %entry 861; CHECK-NEXT: vmov r0, s3 862; CHECK-NEXT: vmov r1, s2 863; CHECK-NEXT: vmov r2, s0 864; CHECK-NEXT: eors r0, r1 865; CHECK-NEXT: vmov r1, s1 866; CHECK-NEXT: eors r1, r2 867; CHECK-NEXT: eors r0, r1 868; CHECK-NEXT: bx lr 869entry: 870 %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x) 871 ret i32 %z 872} 873 874define arm_aapcs_vfpcc i32 @xor_v8i32(<8 x i32> %x) { 875; CHECK-LABEL: xor_v8i32: 876; CHECK: @ %bb.0: @ %entry 877; CHECK-NEXT: veor q0, q0, q1 878; CHECK-NEXT: vmov r0, s3 879; CHECK-NEXT: vmov r1, s2 880; CHECK-NEXT: vmov r2, s0 881; CHECK-NEXT: eors r0, r1 882; CHECK-NEXT: vmov r1, s1 883; CHECK-NEXT: eors r1, r2 884; CHECK-NEXT: eors r0, r1 885; CHECK-NEXT: bx lr 886entry: 887 %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x) 888 ret i32 %z 889} 890 891define arm_aapcs_vfpcc i16 @xor_v4i16(<4 x i16> %x) { 892; CHECK-LABEL: xor_v4i16: 893; CHECK: @ %bb.0: @ %entry 894; CHECK-NEXT: vmov r0, s3 895; CHECK-NEXT: vmov r1, s2 896; CHECK-NEXT: vmov r2, s0 897; CHECK-NEXT: eors r0, r1 898; CHECK-NEXT: vmov r1, s1 899; CHECK-NEXT: eors r1, r2 900; CHECK-NEXT: eors r0, r1 901; CHECK-NEXT: bx lr 902entry: 903 %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x) 904 ret i16 %z 905} 906 907define arm_aapcs_vfpcc i16 @xor_v8i16(<8 x i16> %x) { 908; CHECK-LABEL: xor_v8i16: 909; CHECK: @ %bb.0: @ %entry 910; CHECK-NEXT: vrev32.16 q1, q0 911; CHECK-NEXT: veor q0, q0, q1 912; CHECK-NEXT: vmov.u16 r0, q0[6] 913; CHECK-NEXT: vmov.u16 r1, q0[4] 914; CHECK-NEXT: eors r0, r1 915; CHECK-NEXT: vmov.u16 r1, q0[2] 916; CHECK-NEXT: vmov.u16 r2, q0[0] 917; CHECK-NEXT: eors r1, r2 918; CHECK-NEXT: eors r0, r1 919; CHECK-NEXT: bx lr 920entry: 921 %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x) 922 ret i16 %z 923} 924 925define arm_aapcs_vfpcc i16 @xor_v16i16(<16 x i16> %x) { 926; CHECK-LABEL: xor_v16i16: 927; CHECK: @ %bb.0: @ %entry 928; CHECK-NEXT: veor q0, q0, q1 929; CHECK-NEXT: vrev32.16 q1, q0 930; CHECK-NEXT: veor q0, q0, q1 931; CHECK-NEXT: vmov.u16 r0, q0[6] 932; CHECK-NEXT: vmov.u16 r1, q0[4] 933; CHECK-NEXT: eors r0, r1 934; CHECK-NEXT: vmov.u16 r1, q0[2] 935; CHECK-NEXT: vmov.u16 r2, q0[0] 936; CHECK-NEXT: eors r1, r2 937; CHECK-NEXT: eors r0, r1 938; CHECK-NEXT: bx lr 939entry: 940 %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x) 941 ret i16 %z 942} 943 944define arm_aapcs_vfpcc i8 @xor_v8i8(<8 x i8> %x) { 945; CHECK-LABEL: xor_v8i8: 946; CHECK: @ %bb.0: @ %entry 947; CHECK-NEXT: vrev32.16 q1, q0 948; CHECK-NEXT: veor q0, q0, q1 949; CHECK-NEXT: vmov.u16 r0, q0[6] 950; CHECK-NEXT: vmov.u16 r1, q0[4] 951; CHECK-NEXT: eors r0, r1 952; CHECK-NEXT: vmov.u16 r1, q0[2] 953; CHECK-NEXT: vmov.u16 r2, q0[0] 954; CHECK-NEXT: eors r1, r2 955; CHECK-NEXT: eors r0, r1 956; CHECK-NEXT: bx lr 957entry: 958 %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x) 959 ret i8 %z 960} 961 962define arm_aapcs_vfpcc i8 @xor_v16i8(<16 x i8> %x) { 963; CHECK-LABEL: xor_v16i8: 964; CHECK: @ %bb.0: @ %entry 965; CHECK-NEXT: vrev16.8 q1, q0 966; CHECK-NEXT: veor q0, q0, q1 967; CHECK-NEXT: vrev32.8 q1, q0 968; CHECK-NEXT: veor q0, q0, q1 969; CHECK-NEXT: vmov.u8 r0, q0[12] 970; CHECK-NEXT: vmov.u8 r1, q0[8] 971; CHECK-NEXT: eors r0, r1 972; CHECK-NEXT: vmov.u8 r1, q0[4] 973; CHECK-NEXT: vmov.u8 r2, q0[0] 974; CHECK-NEXT: eors r1, r2 975; CHECK-NEXT: eors r0, r1 976; CHECK-NEXT: bx lr 977entry: 978 %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x) 979 ret i8 %z 980} 981 982define arm_aapcs_vfpcc i8 @xor_v32i8(<32 x i8> %x) { 983; CHECK-LABEL: xor_v32i8: 984; CHECK: @ %bb.0: @ %entry 985; CHECK-NEXT: veor q0, q0, q1 986; CHECK-NEXT: vrev16.8 q1, q0 987; CHECK-NEXT: veor q0, q0, q1 988; CHECK-NEXT: vrev32.8 q1, q0 989; CHECK-NEXT: veor q0, q0, q1 990; CHECK-NEXT: vmov.u8 r0, q0[12] 991; CHECK-NEXT: vmov.u8 r1, q0[8] 992; CHECK-NEXT: eors r0, r1 993; CHECK-NEXT: vmov.u8 r1, q0[4] 994; CHECK-NEXT: vmov.u8 r2, q0[0] 995; CHECK-NEXT: eors r1, r2 996; CHECK-NEXT: eors r0, r1 997; CHECK-NEXT: bx lr 998entry: 999 %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x) 1000 ret i8 %z 1001} 1002 1003define arm_aapcs_vfpcc i64 @xor_v1i64(<1 x i64> %x) { 1004; CHECK-LABEL: xor_v1i64: 1005; CHECK: @ %bb.0: @ %entry 1006; CHECK-NEXT: bx lr 1007entry: 1008 %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x) 1009 ret i64 %z 1010} 1011 1012define arm_aapcs_vfpcc i64 @xor_v2i64(<2 x i64> %x) { 1013; CHECK-LABEL: xor_v2i64: 1014; CHECK: @ %bb.0: @ %entry 1015; CHECK-NEXT: vmov r0, s2 1016; CHECK-NEXT: vmov r1, s0 1017; CHECK-NEXT: vmov r2, s1 1018; CHECK-NEXT: eors r0, r1 1019; CHECK-NEXT: vmov r1, s3 1020; CHECK-NEXT: eors r1, r2 1021; CHECK-NEXT: bx lr 1022entry: 1023 %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x) 1024 ret i64 %z 1025} 1026 1027define arm_aapcs_vfpcc i64 @xor_v4i64(<4 x i64> %x) { 1028; CHECK-LABEL: xor_v4i64: 1029; CHECK: @ %bb.0: @ %entry 1030; CHECK-NEXT: veor q0, q0, q1 1031; CHECK-NEXT: vmov r0, s2 1032; CHECK-NEXT: vmov r1, s0 1033; CHECK-NEXT: vmov r2, s1 1034; CHECK-NEXT: eors r0, r1 1035; CHECK-NEXT: vmov r1, s3 1036; CHECK-NEXT: eors r1, r2 1037; CHECK-NEXT: bx lr 1038entry: 1039 %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x) 1040 ret i64 %z 1041} 1042 1043define arm_aapcs_vfpcc i32 @xor_v2i32_acc(<2 x i32> %x, i32 %y) { 1044; CHECK-LABEL: xor_v2i32_acc: 1045; CHECK: @ %bb.0: @ %entry 1046; CHECK-NEXT: vmov r1, s2 1047; CHECK-NEXT: vmov r2, s0 1048; CHECK-NEXT: eors r1, r2 1049; CHECK-NEXT: eors r0, r1 1050; CHECK-NEXT: bx lr 1051entry: 1052 %z = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %x) 1053 %r = xor i32 %y, %z 1054 ret i32 %r 1055} 1056 1057define arm_aapcs_vfpcc i32 @xor_v4i32_acc(<4 x i32> %x, i32 %y) { 1058; CHECK-LABEL: xor_v4i32_acc: 1059; CHECK: @ %bb.0: @ %entry 1060; CHECK-NEXT: vmov r1, s3 1061; CHECK-NEXT: vmov r2, s2 1062; CHECK-NEXT: vmov r3, s0 1063; CHECK-NEXT: eors r1, r2 1064; CHECK-NEXT: vmov r2, s1 1065; CHECK-NEXT: eors r2, r3 1066; CHECK-NEXT: eors r1, r2 1067; CHECK-NEXT: eors r0, r1 1068; CHECK-NEXT: bx lr 1069entry: 1070 %z = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %x) 1071 %r = xor i32 %y, %z 1072 ret i32 %r 1073} 1074 1075define arm_aapcs_vfpcc i32 @xor_v8i32_acc(<8 x i32> %x, i32 %y) { 1076; CHECK-LABEL: xor_v8i32_acc: 1077; CHECK: @ %bb.0: @ %entry 1078; CHECK-NEXT: veor q0, q0, q1 1079; CHECK-NEXT: vmov r1, s3 1080; CHECK-NEXT: vmov r2, s2 1081; CHECK-NEXT: vmov r3, s0 1082; CHECK-NEXT: eors r1, r2 1083; CHECK-NEXT: vmov r2, s1 1084; CHECK-NEXT: eors r2, r3 1085; CHECK-NEXT: eors r1, r2 1086; CHECK-NEXT: eors r0, r1 1087; CHECK-NEXT: bx lr 1088entry: 1089 %z = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %x) 1090 %r = xor i32 %y, %z 1091 ret i32 %r 1092} 1093 1094define arm_aapcs_vfpcc i16 @xor_v4i16_acc(<4 x i16> %x, i16 %y) { 1095; CHECK-LABEL: xor_v4i16_acc: 1096; CHECK: @ %bb.0: @ %entry 1097; CHECK-NEXT: vmov r1, s3 1098; CHECK-NEXT: vmov r2, s2 1099; CHECK-NEXT: vmov r3, s0 1100; CHECK-NEXT: eors r1, r2 1101; CHECK-NEXT: vmov r2, s1 1102; CHECK-NEXT: eors r2, r3 1103; CHECK-NEXT: eors r1, r2 1104; CHECK-NEXT: eors r0, r1 1105; CHECK-NEXT: bx lr 1106entry: 1107 %z = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> %x) 1108 %r = xor i16 %y, %z 1109 ret i16 %r 1110} 1111 1112define arm_aapcs_vfpcc i16 @xor_v8i16_acc(<8 x i16> %x, i16 %y) { 1113; CHECK-LABEL: xor_v8i16_acc: 1114; CHECK: @ %bb.0: @ %entry 1115; CHECK-NEXT: vrev32.16 q1, q0 1116; CHECK-NEXT: veor q0, q0, q1 1117; CHECK-NEXT: vmov.u16 r1, q0[6] 1118; CHECK-NEXT: vmov.u16 r2, q0[4] 1119; CHECK-NEXT: eors r1, r2 1120; CHECK-NEXT: vmov.u16 r2, q0[2] 1121; CHECK-NEXT: vmov.u16 r3, q0[0] 1122; CHECK-NEXT: eors r2, r3 1123; CHECK-NEXT: eors r1, r2 1124; CHECK-NEXT: eors r0, r1 1125; CHECK-NEXT: bx lr 1126entry: 1127 %z = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> %x) 1128 %r = xor i16 %y, %z 1129 ret i16 %r 1130} 1131 1132define arm_aapcs_vfpcc i16 @xor_v16i16_acc(<16 x i16> %x, i16 %y) { 1133; CHECK-LABEL: xor_v16i16_acc: 1134; CHECK: @ %bb.0: @ %entry 1135; CHECK-NEXT: veor q0, q0, q1 1136; CHECK-NEXT: vrev32.16 q1, q0 1137; CHECK-NEXT: veor q0, q0, q1 1138; CHECK-NEXT: vmov.u16 r1, q0[6] 1139; CHECK-NEXT: vmov.u16 r2, q0[4] 1140; CHECK-NEXT: eors r1, r2 1141; CHECK-NEXT: vmov.u16 r2, q0[2] 1142; CHECK-NEXT: vmov.u16 r3, q0[0] 1143; CHECK-NEXT: eors r2, r3 1144; CHECK-NEXT: eors r1, r2 1145; CHECK-NEXT: eors r0, r1 1146; CHECK-NEXT: bx lr 1147entry: 1148 %z = call i16 @llvm.vector.reduce.xor.v16i16(<16 x i16> %x) 1149 %r = xor i16 %y, %z 1150 ret i16 %r 1151} 1152 1153define arm_aapcs_vfpcc i8 @xor_v8i8_acc(<8 x i8> %x, i8 %y) { 1154; CHECK-LABEL: xor_v8i8_acc: 1155; CHECK: @ %bb.0: @ %entry 1156; CHECK-NEXT: vrev32.16 q1, q0 1157; CHECK-NEXT: veor q0, q0, q1 1158; CHECK-NEXT: vmov.u16 r1, q0[6] 1159; CHECK-NEXT: vmov.u16 r2, q0[4] 1160; CHECK-NEXT: eors r1, r2 1161; CHECK-NEXT: vmov.u16 r2, q0[2] 1162; CHECK-NEXT: vmov.u16 r3, q0[0] 1163; CHECK-NEXT: eors r2, r3 1164; CHECK-NEXT: eors r1, r2 1165; CHECK-NEXT: eors r0, r1 1166; CHECK-NEXT: bx lr 1167entry: 1168 %z = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %x) 1169 %r = xor i8 %y, %z 1170 ret i8 %r 1171} 1172 1173define arm_aapcs_vfpcc i8 @xor_v16i8_acc(<16 x i8> %x, i8 %y) { 1174; CHECK-LABEL: xor_v16i8_acc: 1175; CHECK: @ %bb.0: @ %entry 1176; CHECK-NEXT: vrev16.8 q1, q0 1177; CHECK-NEXT: veor q0, q0, q1 1178; CHECK-NEXT: vrev32.8 q1, q0 1179; CHECK-NEXT: veor q0, q0, q1 1180; CHECK-NEXT: vmov.u8 r1, q0[12] 1181; CHECK-NEXT: vmov.u8 r2, q0[8] 1182; CHECK-NEXT: eors r1, r2 1183; CHECK-NEXT: vmov.u8 r2, q0[4] 1184; CHECK-NEXT: vmov.u8 r3, q0[0] 1185; CHECK-NEXT: eors r2, r3 1186; CHECK-NEXT: eors r1, r2 1187; CHECK-NEXT: eors r0, r1 1188; CHECK-NEXT: bx lr 1189entry: 1190 %z = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %x) 1191 %r = xor i8 %y, %z 1192 ret i8 %r 1193} 1194 1195define arm_aapcs_vfpcc i8 @xor_v32i8_acc(<32 x i8> %x, i8 %y) { 1196; CHECK-LABEL: xor_v32i8_acc: 1197; CHECK: @ %bb.0: @ %entry 1198; CHECK-NEXT: veor q0, q0, q1 1199; CHECK-NEXT: vrev16.8 q1, q0 1200; CHECK-NEXT: veor q0, q0, q1 1201; CHECK-NEXT: vrev32.8 q1, q0 1202; CHECK-NEXT: veor q0, q0, q1 1203; CHECK-NEXT: vmov.u8 r1, q0[12] 1204; CHECK-NEXT: vmov.u8 r2, q0[8] 1205; CHECK-NEXT: eors r1, r2 1206; CHECK-NEXT: vmov.u8 r2, q0[4] 1207; CHECK-NEXT: vmov.u8 r3, q0[0] 1208; CHECK-NEXT: eors r2, r3 1209; CHECK-NEXT: eors r1, r2 1210; CHECK-NEXT: eors r0, r1 1211; CHECK-NEXT: bx lr 1212entry: 1213 %z = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %x) 1214 %r = xor i8 %y, %z 1215 ret i8 %r 1216} 1217 1218define arm_aapcs_vfpcc i64 @xor_v1i64_acc(<1 x i64> %x, i64 %y) { 1219; CHECK-LABEL: xor_v1i64_acc: 1220; CHECK: @ %bb.0: @ %entry 1221; CHECK-NEXT: eors r0, r2 1222; CHECK-NEXT: eors r1, r3 1223; CHECK-NEXT: bx lr 1224entry: 1225 %z = call i64 @llvm.vector.reduce.xor.v1i64(<1 x i64> %x) 1226 %r = xor i64 %y, %z 1227 ret i64 %r 1228} 1229 1230define arm_aapcs_vfpcc i64 @xor_v2i64_acc(<2 x i64> %x, i64 %y) { 1231; CHECK-LABEL: xor_v2i64_acc: 1232; CHECK: @ %bb.0: @ %entry 1233; CHECK-NEXT: vmov r2, s2 1234; CHECK-NEXT: vmov r3, s0 1235; CHECK-NEXT: eors r2, r3 1236; CHECK-NEXT: vmov r3, s1 1237; CHECK-NEXT: eors r0, r2 1238; CHECK-NEXT: vmov r2, s3 1239; CHECK-NEXT: eors r2, r3 1240; CHECK-NEXT: eors r1, r2 1241; CHECK-NEXT: bx lr 1242entry: 1243 %z = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %x) 1244 %r = xor i64 %y, %z 1245 ret i64 %r 1246} 1247 1248define arm_aapcs_vfpcc i64 @xor_v4i64_acc(<4 x i64> %x, i64 %y) { 1249; CHECK-LABEL: xor_v4i64_acc: 1250; CHECK: @ %bb.0: @ %entry 1251; CHECK-NEXT: veor q0, q0, q1 1252; CHECK-NEXT: vmov r2, s2 1253; CHECK-NEXT: vmov r3, s0 1254; CHECK-NEXT: eors r2, r3 1255; CHECK-NEXT: vmov r3, s1 1256; CHECK-NEXT: eors r0, r2 1257; CHECK-NEXT: vmov r2, s3 1258; CHECK-NEXT: eors r2, r3 1259; CHECK-NEXT: eors r1, r2 1260; CHECK-NEXT: bx lr 1261entry: 1262 %z = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %x) 1263 %r = xor i64 %y, %z 1264 ret i64 %r 1265} 1266 1267declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) 1268declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) 1269declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) 1270declare i16 @llvm.vector.reduce.or.v16i16(<16 x i16>) 1271declare i16 @llvm.vector.reduce.or.v4i16(<4 x i16>) 1272declare i16 @llvm.vector.reduce.or.v8i16(<8 x i16>) 1273declare i16 @llvm.vector.reduce.xor.v16i16(<16 x i16>) 1274declare i16 @llvm.vector.reduce.xor.v4i16(<4 x i16>) 1275declare i16 @llvm.vector.reduce.xor.v8i16(<8 x i16>) 1276declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) 1277declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) 1278declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) 1279declare i32 @llvm.vector.reduce.or.v2i32(<2 x i32>) 1280declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) 1281declare i32 @llvm.vector.reduce.or.v8i32(<8 x i32>) 1282declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) 1283declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) 1284declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) 1285declare i64 @llvm.vector.reduce.and.v1i64(<1 x i64>) 1286declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) 1287declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) 1288declare i64 @llvm.vector.reduce.or.v1i64(<1 x i64>) 1289declare i64 @llvm.vector.reduce.or.v2i64(<2 x i64>) 1290declare i64 @llvm.vector.reduce.or.v4i64(<4 x i64>) 1291declare i64 @llvm.vector.reduce.xor.v1i64(<1 x i64>) 1292declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) 1293declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) 1294declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) 1295declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) 1296declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) 1297declare i8 @llvm.vector.reduce.or.v16i8(<16 x i8>) 1298declare i8 @llvm.vector.reduce.or.v32i8(<32 x i8>) 1299declare i8 @llvm.vector.reduce.or.v8i8(<8 x i8>) 1300declare i8 @llvm.vector.reduce.xor.v16i8(<16 x i8>) 1301declare i8 @llvm.vector.reduce.xor.v32i8(<32 x i8>) 1302declare i8 @llvm.vector.reduce.xor.v8i8(<8 x i8>) 1303