1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon | FileCheck %s 3 4define i1 @test_redand_v1i1(<1 x i1> %a) { 5; CHECK-LABEL: test_redand_v1i1: 6; CHECK: // %bb.0: 7; CHECK-NEXT: and w0, w0, #0x1 8; CHECK-NEXT: ret 9 %or_result = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a) 10 ret i1 %or_result 11} 12 13define i1 @test_redand_v2i1(<2 x i1> %a) { 14; CHECK-LABEL: test_redand_v2i1: 15; CHECK: // %bb.0: 16; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 17; CHECK-NEXT: mov w8, v0.s[1] 18; CHECK-NEXT: fmov w9, s0 19; CHECK-NEXT: and w8, w9, w8 20; CHECK-NEXT: and w0, w8, #0x1 21; CHECK-NEXT: ret 22 %or_result = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) 23 ret i1 %or_result 24} 25 26define i1 @test_redand_v4i1(<4 x i1> %a) { 27; CHECK-LABEL: test_redand_v4i1: 28; CHECK: // %bb.0: 29; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 30; CHECK-NEXT: umov w10, v0.h[1] 31; CHECK-NEXT: umov w11, v0.h[0] 32; CHECK-NEXT: umov w9, v0.h[2] 33; CHECK-NEXT: and w10, w11, w10 34; CHECK-NEXT: umov w8, v0.h[3] 35; CHECK-NEXT: and w9, w10, w9 36; CHECK-NEXT: and w8, w9, w8 37; CHECK-NEXT: and w0, w8, #0x1 38; CHECK-NEXT: ret 39 %or_result = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 40 ret i1 %or_result 41} 42 43define i1 @test_redand_v8i1(<8 x i1> %a) { 44; CHECK-LABEL: test_redand_v8i1: 45; CHECK: // %bb.0: 46; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 47; CHECK-NEXT: umov w14, v0.b[1] 48; CHECK-NEXT: umov w15, v0.b[0] 49; CHECK-NEXT: umov w13, v0.b[2] 50; CHECK-NEXT: and w14, w15, w14 51; CHECK-NEXT: umov w12, v0.b[3] 52; CHECK-NEXT: and w13, w14, w13 53; CHECK-NEXT: umov w11, v0.b[4] 54; CHECK-NEXT: and w12, w13, w12 55; CHECK-NEXT: umov w10, v0.b[5] 56; CHECK-NEXT: and w11, w12, w11 57; CHECK-NEXT: umov w9, v0.b[6] 58; CHECK-NEXT: and w10, w11, w10 59; CHECK-NEXT: umov w8, v0.b[7] 60; CHECK-NEXT: and w9, w10, w9 61; CHECK-NEXT: and w8, w9, w8 62; CHECK-NEXT: and w0, w8, #0x1 63; CHECK-NEXT: ret 64 %or_result = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 65 ret i1 %or_result 66} 67 68define i1 @test_redand_v16i1(<16 x i1> %a) { 69; CHECK-LABEL: test_redand_v16i1: 70; CHECK: // %bb.0: 71; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 72; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 73; CHECK-NEXT: umov w8, v0.b[1] 74; CHECK-NEXT: umov w9, v0.b[0] 75; CHECK-NEXT: and w8, w9, w8 76; CHECK-NEXT: umov w9, v0.b[2] 77; CHECK-NEXT: and w8, w8, w9 78; CHECK-NEXT: umov w9, v0.b[3] 79; CHECK-NEXT: and w8, w8, w9 80; CHECK-NEXT: umov w9, v0.b[4] 81; CHECK-NEXT: and w8, w8, w9 82; CHECK-NEXT: umov w9, v0.b[5] 83; CHECK-NEXT: and w8, w8, w9 84; CHECK-NEXT: umov w9, v0.b[6] 85; CHECK-NEXT: and w8, w8, w9 86; CHECK-NEXT: umov w9, v0.b[7] 87; CHECK-NEXT: and w8, w8, w9 88; CHECK-NEXT: and w0, w8, #0x1 89; CHECK-NEXT: ret 90 %or_result = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 91 ret i1 %or_result 92} 93 94define i8 @test_redand_v1i8(<1 x i8> %a) { 95; CHECK-LABEL: test_redand_v1i8: 96; CHECK: // %bb.0: 97; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 98; CHECK-NEXT: umov w0, v0.b[0] 99; CHECK-NEXT: ret 100 %and_result = call i8 @llvm.vector.reduce.and.v1i8(<1 x i8> %a) 101 ret i8 %and_result 102} 103 104define i8 @test_redand_v3i8(<3 x i8> %a) { 105; CHECK-LABEL: test_redand_v3i8: 106; CHECK: // %bb.0: 107; CHECK-NEXT: and w8, w0, w1 108; CHECK-NEXT: and w8, w8, w2 109; CHECK-NEXT: and w0, w8, #0xff 110; CHECK-NEXT: ret 111 %and_result = call i8 @llvm.vector.reduce.and.v3i8(<3 x i8> %a) 112 ret i8 %and_result 113} 114 115define i8 @test_redand_v4i8(<4 x i8> %a) { 116; CHECK-LABEL: test_redand_v4i8: 117; CHECK: // %bb.0: 118; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 119; CHECK-NEXT: umov w10, v0.h[1] 120; CHECK-NEXT: umov w11, v0.h[0] 121; CHECK-NEXT: umov w9, v0.h[2] 122; CHECK-NEXT: and w10, w11, w10 123; CHECK-NEXT: umov w8, v0.h[3] 124; CHECK-NEXT: and w9, w10, w9 125; CHECK-NEXT: and w0, w9, w8 126; CHECK-NEXT: ret 127 %and_result = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a) 128 ret i8 %and_result 129} 130 131define i8 @test_redand_v8i8(<8 x i8> %a) { 132; CHECK-LABEL: test_redand_v8i8: 133; CHECK: // %bb.0: 134; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 135; CHECK-NEXT: umov w14, v0.b[1] 136; CHECK-NEXT: umov w15, v0.b[0] 137; CHECK-NEXT: umov w13, v0.b[2] 138; CHECK-NEXT: and w14, w15, w14 139; CHECK-NEXT: umov w12, v0.b[3] 140; CHECK-NEXT: and w13, w14, w13 141; CHECK-NEXT: umov w11, v0.b[4] 142; CHECK-NEXT: and w12, w13, w12 143; CHECK-NEXT: umov w10, v0.b[5] 144; CHECK-NEXT: and w11, w12, w11 145; CHECK-NEXT: umov w9, v0.b[6] 146; CHECK-NEXT: and w10, w11, w10 147; CHECK-NEXT: umov w8, v0.b[7] 148; CHECK-NEXT: and w9, w10, w9 149; CHECK-NEXT: and w0, w9, w8 150; CHECK-NEXT: ret 151 %and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a) 152 ret i8 %and_result 153} 154 155define i8 @test_redand_v16i8(<16 x i8> %a) { 156; CHECK-LABEL: test_redand_v16i8: 157; CHECK: // %bb.0: 158; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 159; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 160; CHECK-NEXT: umov w8, v0.b[1] 161; CHECK-NEXT: umov w9, v0.b[0] 162; CHECK-NEXT: and w8, w9, w8 163; CHECK-NEXT: umov w9, v0.b[2] 164; CHECK-NEXT: and w8, w8, w9 165; CHECK-NEXT: umov w9, v0.b[3] 166; CHECK-NEXT: and w8, w8, w9 167; CHECK-NEXT: umov w9, v0.b[4] 168; CHECK-NEXT: and w8, w8, w9 169; CHECK-NEXT: umov w9, v0.b[5] 170; CHECK-NEXT: and w8, w8, w9 171; CHECK-NEXT: umov w9, v0.b[6] 172; CHECK-NEXT: and w8, w8, w9 173; CHECK-NEXT: umov w9, v0.b[7] 174; CHECK-NEXT: and w0, w8, w9 175; CHECK-NEXT: ret 176 %and_result = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a) 177 ret i8 %and_result 178} 179 180define i8 @test_redand_v32i8(<32 x i8> %a) { 181; CHECK-LABEL: test_redand_v32i8: 182; CHECK: // %bb.0: 183; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 184; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 185; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 186; CHECK-NEXT: umov w8, v0.b[1] 187; CHECK-NEXT: umov w9, v0.b[0] 188; CHECK-NEXT: and w8, w9, w8 189; CHECK-NEXT: umov w9, v0.b[2] 190; CHECK-NEXT: and w8, w8, w9 191; CHECK-NEXT: umov w9, v0.b[3] 192; CHECK-NEXT: and w8, w8, w9 193; CHECK-NEXT: umov w9, v0.b[4] 194; CHECK-NEXT: and w8, w8, w9 195; CHECK-NEXT: umov w9, v0.b[5] 196; CHECK-NEXT: and w8, w8, w9 197; CHECK-NEXT: umov w9, v0.b[6] 198; CHECK-NEXT: and w8, w8, w9 199; CHECK-NEXT: umov w9, v0.b[7] 200; CHECK-NEXT: and w0, w8, w9 201; CHECK-NEXT: ret 202 %and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a) 203 ret i8 %and_result 204} 205 206define i16 @test_redand_v4i16(<4 x i16> %a) { 207; CHECK-LABEL: test_redand_v4i16: 208; CHECK: // %bb.0: 209; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 210; CHECK-NEXT: umov w10, v0.h[1] 211; CHECK-NEXT: umov w11, v0.h[0] 212; CHECK-NEXT: umov w9, v0.h[2] 213; CHECK-NEXT: and w10, w11, w10 214; CHECK-NEXT: umov w8, v0.h[3] 215; CHECK-NEXT: and w9, w10, w9 216; CHECK-NEXT: and w0, w9, w8 217; CHECK-NEXT: ret 218 %and_result = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a) 219 ret i16 %and_result 220} 221 222define i16 @test_redand_v8i16(<8 x i16> %a) { 223; CHECK-LABEL: test_redand_v8i16: 224; CHECK: // %bb.0: 225; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 226; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 227; CHECK-NEXT: umov w8, v0.h[1] 228; CHECK-NEXT: umov w9, v0.h[0] 229; CHECK-NEXT: umov w10, v0.h[2] 230; CHECK-NEXT: and w8, w9, w8 231; CHECK-NEXT: and w8, w8, w10 232; CHECK-NEXT: umov w9, v0.h[3] 233; CHECK-NEXT: and w0, w8, w9 234; CHECK-NEXT: ret 235 %and_result = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a) 236 ret i16 %and_result 237} 238 239define i16 @test_redand_v16i16(<16 x i16> %a) { 240; CHECK-LABEL: test_redand_v16i16: 241; CHECK: // %bb.0: 242; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 243; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 244; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 245; CHECK-NEXT: umov w8, v0.h[1] 246; CHECK-NEXT: umov w9, v0.h[0] 247; CHECK-NEXT: umov w10, v0.h[2] 248; CHECK-NEXT: and w8, w9, w8 249; CHECK-NEXT: and w8, w8, w10 250; CHECK-NEXT: umov w9, v0.h[3] 251; CHECK-NEXT: and w0, w8, w9 252; CHECK-NEXT: ret 253 %and_result = call i16 @llvm.vector.reduce.and.v16i16(<16 x i16> %a) 254 ret i16 %and_result 255} 256 257define i32 @test_redand_v2i32(<2 x i32> %a) { 258; CHECK-LABEL: test_redand_v2i32: 259; CHECK: // %bb.0: 260; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 261; CHECK-NEXT: mov w8, v0.s[1] 262; CHECK-NEXT: fmov w9, s0 263; CHECK-NEXT: and w0, w9, w8 264; CHECK-NEXT: ret 265 %and_result = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> %a) 266 ret i32 %and_result 267} 268 269define i32 @test_redand_v4i32(<4 x i32> %a) { 270; CHECK-LABEL: test_redand_v4i32: 271; CHECK: // %bb.0: 272; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 273; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 274; CHECK-NEXT: mov w8, v0.s[1] 275; CHECK-NEXT: fmov w9, s0 276; CHECK-NEXT: and w0, w9, w8 277; CHECK-NEXT: ret 278 %and_result = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a) 279 ret i32 %and_result 280} 281 282define i32 @test_redand_v8i32(<8 x i32> %a) { 283; CHECK-LABEL: test_redand_v8i32: 284; CHECK: // %bb.0: 285; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 286; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 287; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 288; CHECK-NEXT: mov w8, v0.s[1] 289; CHECK-NEXT: fmov w9, s0 290; CHECK-NEXT: and w0, w9, w8 291; CHECK-NEXT: ret 292 %and_result = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> %a) 293 ret i32 %and_result 294} 295 296define i64 @test_redand_v2i64(<2 x i64> %a) { 297; CHECK-LABEL: test_redand_v2i64: 298; CHECK: // %bb.0: 299; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 300; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 301; CHECK-NEXT: fmov x0, d0 302; CHECK-NEXT: ret 303 %and_result = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> %a) 304 ret i64 %and_result 305} 306 307define i64 @test_redand_v4i64(<4 x i64> %a) { 308; CHECK-LABEL: test_redand_v4i64: 309; CHECK: // %bb.0: 310; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 311; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 312; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 313; CHECK-NEXT: fmov x0, d0 314; CHECK-NEXT: ret 315 %and_result = call i64 @llvm.vector.reduce.and.v4i64(<4 x i64> %a) 316 ret i64 %and_result 317} 318 319declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1>) 320declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>) 321declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>) 322declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) 323declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1>) 324declare i64 @llvm.vector.reduce.and.v2i64(<2 x i64>) 325declare i64 @llvm.vector.reduce.and.v4i64(<4 x i64>) 326declare i32 @llvm.vector.reduce.and.v2i32(<2 x i32>) 327declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) 328declare i32 @llvm.vector.reduce.and.v8i32(<8 x i32>) 329declare i16 @llvm.vector.reduce.and.v4i16(<4 x i16>) 330declare i16 @llvm.vector.reduce.and.v8i16(<8 x i16>) 331declare i16 @llvm.vector.reduce.and.v16i16(<16 x i16>) 332declare i8 @llvm.vector.reduce.and.v1i8(<1 x i8>) 333declare i8 @llvm.vector.reduce.and.v3i8(<3 x i8>) 334declare i8 @llvm.vector.reduce.and.v4i8(<4 x i8>) 335declare i8 @llvm.vector.reduce.and.v8i8(<8 x i8>) 336declare i8 @llvm.vector.reduce.and.v16i8(<16 x i8>) 337declare i8 @llvm.vector.reduce.and.v32i8(<32 x i8>) 338