1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; ANDV 5 6define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) { 7; CHECK-LABEL: andv_nxv8i8: 8; CHECK: // %bb.0: 9; CHECK-NEXT: ptrue p0.h 10; CHECK-NEXT: andv h0, p0, z0.h 11; CHECK-NEXT: fmov w0, s0 12; CHECK-NEXT: ret 13 %res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a) 14 ret i8 %res 15} 16 17define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) { 18; CHECK-LABEL: andv_nxv8i32: 19; CHECK: // %bb.0: 20; CHECK-NEXT: and z0.d, z0.d, z1.d 21; CHECK-NEXT: ptrue p0.s 22; CHECK-NEXT: andv s0, p0, z0.s 23; CHECK-NEXT: fmov w0, s0 24; CHECK-NEXT: ret 25 %res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a) 26 ret i32 %res 27} 28 29; ORV 30 31define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) { 32; CHECK-LABEL: orv_nxv2i32: 33; CHECK: // %bb.0: 34; CHECK-NEXT: ptrue p0.d 35; CHECK-NEXT: orv d0, p0, z0.d 36; CHECK-NEXT: fmov x0, d0 37; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 38; CHECK-NEXT: ret 39 %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a) 40 ret i32 %res 41} 42 43define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) { 44; CHECK-LABEL: orv_nxv8i64: 45; CHECK: // %bb.0: 46; CHECK-NEXT: orr z1.d, z1.d, z3.d 47; CHECK-NEXT: orr z0.d, z0.d, z2.d 48; CHECK-NEXT: orr z0.d, z0.d, z1.d 49; CHECK-NEXT: ptrue p0.d 50; CHECK-NEXT: orv d0, p0, z0.d 51; CHECK-NEXT: fmov x0, d0 52; CHECK-NEXT: ret 53 %res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a) 54 ret i64 %res 55} 56 57; XORV 58 59define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) { 60; CHECK-LABEL: xorv_nxv2i16: 61; CHECK: // %bb.0: 62; CHECK-NEXT: ptrue p0.d 63; CHECK-NEXT: eorv d0, p0, z0.d 64; CHECK-NEXT: fmov x0, d0 65; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 66; CHECK-NEXT: ret 67 %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a) 68 ret i16 %res 69} 70 71define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) { 72; CHECK-LABEL: xorv_nxv8i32: 73; CHECK: // %bb.0: 74; CHECK-NEXT: eor z0.d, z0.d, z1.d 75; CHECK-NEXT: ptrue p0.s 76; CHECK-NEXT: eorv s0, p0, z0.s 77; CHECK-NEXT: fmov w0, s0 78; CHECK-NEXT: ret 79 %res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a) 80 ret i32 %res 81} 82 83; UADDV 84 85define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) { 86; CHECK-LABEL: uaddv_nxv4i16: 87; CHECK: // %bb.0: 88; CHECK-NEXT: ptrue p0.s 89; CHECK-NEXT: uaddv d0, p0, z0.s 90; CHECK-NEXT: fmov x0, d0 91; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 92; CHECK-NEXT: ret 93 %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a) 94 ret i16 %res 95} 96 97define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) { 98; CHECK-LABEL: uaddv_nxv16i16: 99; CHECK: // %bb.0: 100; CHECK-NEXT: add z0.h, z0.h, z1.h 101; CHECK-NEXT: ptrue p0.h 102; CHECK-NEXT: uaddv d0, p0, z0.h 103; CHECK-NEXT: fmov x0, d0 104; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 105; CHECK-NEXT: ret 106 %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a) 107 ret i16 %res 108} 109 110define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) { 111; CHECK-LABEL: uaddv_nxv16i32: 112; CHECK: // %bb.0: 113; CHECK-NEXT: add z1.s, z1.s, z3.s 114; CHECK-NEXT: add z0.s, z0.s, z2.s 115; CHECK-NEXT: add z0.s, z0.s, z1.s 116; CHECK-NEXT: ptrue p0.s 117; CHECK-NEXT: uaddv d0, p0, z0.s 118; CHECK-NEXT: fmov x0, d0 119; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 120; CHECK-NEXT: ret 121 %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a) 122 ret i32 %res 123} 124 125; UMINV 126 127define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) { 128; CHECK-LABEL: umin_nxv2i32: 129; CHECK: // %bb.0: 130; CHECK-NEXT: ptrue p0.d 131; CHECK-NEXT: and z0.d, z0.d, #0xffffffff 132; CHECK-NEXT: uminv d0, p0, z0.d 133; CHECK-NEXT: fmov x0, d0 134; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 135; CHECK-NEXT: ret 136 %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a) 137 ret i32 %res 138} 139 140define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) { 141; CHECK-LABEL: umin_nxv4i64: 142; CHECK: // %bb.0: 143; CHECK-NEXT: ptrue p0.d 144; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d 145; CHECK-NEXT: uminv d0, p0, z0.d 146; CHECK-NEXT: fmov x0, d0 147; CHECK-NEXT: ret 148 %res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a) 149 ret i64 %res 150} 151 152; SMINV 153 154define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) { 155; CHECK-LABEL: smin_nxv4i8: 156; CHECK: // %bb.0: 157; CHECK-NEXT: ptrue p0.s 158; CHECK-NEXT: sxtb z0.s, p0/m, z0.s 159; CHECK-NEXT: sminv s0, p0, z0.s 160; CHECK-NEXT: fmov w0, s0 161; CHECK-NEXT: ret 162 %res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a) 163 ret i8 %res 164} 165 166define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) { 167; CHECK-LABEL: smin_nxv8i32: 168; CHECK: // %bb.0: 169; CHECK-NEXT: ptrue p0.s 170; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s 171; CHECK-NEXT: sminv s0, p0, z0.s 172; CHECK-NEXT: fmov w0, s0 173; CHECK-NEXT: ret 174 %res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a) 175 ret i32 %res 176} 177 178; UMAXV 179 180define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) { 181; CHECK-LABEL: smin_nxv16i16: 182; CHECK: // %bb.0: 183; CHECK-NEXT: ptrue p0.h 184; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h 185; CHECK-NEXT: umaxv h0, p0, z0.h 186; CHECK-NEXT: fmov w0, s0 187; CHECK-NEXT: ret 188 %res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a) 189 ret i16 %res 190} 191 192; SMAXV 193 194define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) { 195; CHECK-LABEL: smin_nxv8i64: 196; CHECK: // %bb.0: 197; CHECK-NEXT: ptrue p0.d 198; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d 199; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d 200; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d 201; CHECK-NEXT: smaxv d0, p0, z0.d 202; CHECK-NEXT: fmov x0, d0 203; CHECK-NEXT: ret 204 %res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a) 205 ret i64 %res 206} 207 208declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>) 209declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>) 210 211declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>) 212declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>) 213 214declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>) 215declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>) 216 217declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>) 218declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>) 219declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>) 220 221declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>) 222declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>) 223 224declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>) 225declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>) 226 227declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>) 228 229declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>) 230