1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK 3 4declare i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %a) 5declare i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %a) 6declare i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %a) 7declare i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %a) 8declare i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %a) 9declare i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %a) 10 11declare i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %a) 12declare i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %a) 13declare i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %a) 14declare i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %a) 15declare i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %a) 16declare i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %a) 17 18define i32 @reduce_and_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { 19; CHECK-LABEL: reduce_and_v1: 20; CHECK: // %bb.0: 21; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 22; CHECK-NEXT: smov w8, v0.b[0] 23; CHECK-NEXT: cmp w8, #0 // =0 24; CHECK-NEXT: csel w0, w0, w1, lt 25; CHECK-NEXT: ret 26 %x = icmp slt <1 x i8> %a0, zeroinitializer 27 %y = call i1 @llvm.experimental.vector.reduce.and.v1i1(<1 x i1> %x) 28 %z = select i1 %y, i32 %a1, i32 %a2 29 ret i32 %z 30} 31 32define i32 @reduce_and_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { 33; CHECK-LABEL: reduce_and_v2: 34; CHECK: // %bb.0: 35; CHECK-NEXT: shl v0.2s, v0.2s, #24 36; CHECK-NEXT: sshr v0.2s, v0.2s, #24 37; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 38; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s 39; CHECK-NEXT: fmov w8, s0 40; CHECK-NEXT: tst w8, #0x1 41; CHECK-NEXT: csel w0, w0, w1, ne 42; CHECK-NEXT: ret 43 %x = icmp slt <2 x i8> %a0, zeroinitializer 44 %y = call i1 @llvm.experimental.vector.reduce.and.v2i1(<2 x i1> %x) 45 %z = select i1 %y, i32 %a1, i32 %a2 46 ret i32 %z 47} 48 49define i32 @reduce_and_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { 50; CHECK-LABEL: reduce_and_v4: 51; CHECK: // %bb.0: 52; CHECK-NEXT: shl v0.4h, v0.4h, #8 53; CHECK-NEXT: sshr v0.4h, v0.4h, #8 54; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 55; CHECK-NEXT: uminv h0, v0.4h 56; CHECK-NEXT: fmov w8, s0 57; CHECK-NEXT: tst w8, #0x1 58; CHECK-NEXT: csel w0, w0, w1, ne 59; CHECK-NEXT: ret 60 %x = icmp slt <4 x i8> %a0, zeroinitializer 61 %y = call i1 @llvm.experimental.vector.reduce.and.v4i1(<4 x i1> %x) 62 %z = select i1 %y, i32 %a1, i32 %a2 63 ret i32 %z 64} 65 66define i32 @reduce_and_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { 67; CHECK-LABEL: reduce_and_v8: 68; CHECK: // %bb.0: 69; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 70; CHECK-NEXT: uminv b0, v0.8b 71; CHECK-NEXT: fmov w8, s0 72; CHECK-NEXT: tst w8, #0x1 73; CHECK-NEXT: csel w0, w0, w1, ne 74; CHECK-NEXT: ret 75 %x = icmp slt <8 x i8> %a0, zeroinitializer 76 %y = call i1 @llvm.experimental.vector.reduce.and.v8i1(<8 x i1> %x) 77 %z = select i1 %y, i32 %a1, i32 %a2 78 ret i32 %z 79} 80 81define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { 82; CHECK-LABEL: reduce_and_v16: 83; CHECK: // %bb.0: 84; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 85; CHECK-NEXT: uminv b0, v0.16b 86; CHECK-NEXT: fmov w8, s0 87; CHECK-NEXT: tst w8, #0x1 88; CHECK-NEXT: csel w0, w0, w1, ne 89; CHECK-NEXT: ret 90 %x = icmp slt <16 x i8> %a0, zeroinitializer 91 %y = call i1 @llvm.experimental.vector.reduce.and.v16i1(<16 x i1> %x) 92 %z = select i1 %y, i32 %a1, i32 %a2 93 ret i32 %z 94} 95 96define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { 97; CHECK-LABEL: reduce_and_v32: 98; CHECK: // %bb.0: 99; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 100; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 101; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 102; CHECK-NEXT: uminv b0, v0.16b 103; CHECK-NEXT: fmov w8, s0 104; CHECK-NEXT: tst w8, #0x1 105; CHECK-NEXT: csel w0, w0, w1, ne 106; CHECK-NEXT: ret 107 %x = icmp slt <32 x i8> %a0, zeroinitializer 108 %y = call i1 @llvm.experimental.vector.reduce.and.v32i1(<32 x i1> %x) 109 %z = select i1 %y, i32 %a1, i32 %a2 110 ret i32 %z 111} 112 113define i32 @reduce_or_v1(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { 114; CHECK-LABEL: reduce_or_v1: 115; CHECK: // %bb.0: 116; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 117; CHECK-NEXT: smov w8, v0.b[0] 118; CHECK-NEXT: cmp w8, #0 // =0 119; CHECK-NEXT: csel w0, w0, w1, lt 120; CHECK-NEXT: ret 121 %x = icmp slt <1 x i8> %a0, zeroinitializer 122 %y = call i1 @llvm.experimental.vector.reduce.or.v1i1(<1 x i1> %x) 123 %z = select i1 %y, i32 %a1, i32 %a2 124 ret i32 %z 125} 126 127define i32 @reduce_or_v2(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { 128; CHECK-LABEL: reduce_or_v2: 129; CHECK: // %bb.0: 130; CHECK-NEXT: shl v0.2s, v0.2s, #24 131; CHECK-NEXT: sshr v0.2s, v0.2s, #24 132; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 133; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s 134; CHECK-NEXT: fmov w8, s0 135; CHECK-NEXT: tst w8, #0x1 136; CHECK-NEXT: csel w0, w0, w1, ne 137; CHECK-NEXT: ret 138 %x = icmp slt <2 x i8> %a0, zeroinitializer 139 %y = call i1 @llvm.experimental.vector.reduce.or.v2i1(<2 x i1> %x) 140 %z = select i1 %y, i32 %a1, i32 %a2 141 ret i32 %z 142} 143 144define i32 @reduce_or_v4(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { 145; CHECK-LABEL: reduce_or_v4: 146; CHECK: // %bb.0: 147; CHECK-NEXT: shl v0.4h, v0.4h, #8 148; CHECK-NEXT: sshr v0.4h, v0.4h, #8 149; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 150; CHECK-NEXT: umaxv h0, v0.4h 151; CHECK-NEXT: fmov w8, s0 152; CHECK-NEXT: tst w8, #0x1 153; CHECK-NEXT: csel w0, w0, w1, ne 154; CHECK-NEXT: ret 155 %x = icmp slt <4 x i8> %a0, zeroinitializer 156 %y = call i1 @llvm.experimental.vector.reduce.or.v4i1(<4 x i1> %x) 157 %z = select i1 %y, i32 %a1, i32 %a2 158 ret i32 %z 159} 160 161define i32 @reduce_or_v8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { 162; CHECK-LABEL: reduce_or_v8: 163; CHECK: // %bb.0: 164; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 165; CHECK-NEXT: umaxv b0, v0.8b 166; CHECK-NEXT: fmov w8, s0 167; CHECK-NEXT: tst w8, #0x1 168; CHECK-NEXT: csel w0, w0, w1, ne 169; CHECK-NEXT: ret 170 %x = icmp slt <8 x i8> %a0, zeroinitializer 171 %y = call i1 @llvm.experimental.vector.reduce.or.v8i1(<8 x i1> %x) 172 %z = select i1 %y, i32 %a1, i32 %a2 173 ret i32 %z 174} 175 176define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { 177; CHECK-LABEL: reduce_or_v16: 178; CHECK: // %bb.0: 179; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 180; CHECK-NEXT: umaxv b0, v0.16b 181; CHECK-NEXT: fmov w8, s0 182; CHECK-NEXT: tst w8, #0x1 183; CHECK-NEXT: csel w0, w0, w1, ne 184; CHECK-NEXT: ret 185 %x = icmp slt <16 x i8> %a0, zeroinitializer 186 %y = call i1 @llvm.experimental.vector.reduce.or.v16i1(<16 x i1> %x) 187 %z = select i1 %y, i32 %a1, i32 %a2 188 ret i32 %z 189} 190 191define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { 192; CHECK-LABEL: reduce_or_v32: 193; CHECK: // %bb.0: 194; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 195; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 196; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 197; CHECK-NEXT: umaxv b0, v0.16b 198; CHECK-NEXT: fmov w8, s0 199; CHECK-NEXT: tst w8, #0x1 200; CHECK-NEXT: csel w0, w0, w1, ne 201; CHECK-NEXT: ret 202 %x = icmp slt <32 x i8> %a0, zeroinitializer 203 %y = call i1 @llvm.experimental.vector.reduce.or.v32i1(<32 x i1> %x) 204 %z = select i1 %y, i32 %a1, i32 %a2 205 ret i32 %z 206} 207