1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s 3; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 4 5; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 6; WARN-NOT: warning 7 8; ANDV 9 10define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) { 11; CHECK-LABEL: andv_nxv8i8: 12; CHECK: // %bb.0: 13; CHECK-NEXT: ptrue p0.h 14; CHECK-NEXT: andv h0, p0, z0.h 15; CHECK-NEXT: fmov w0, s0 16; CHECK-NEXT: ret 17 %res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a) 18 ret i8 %res 19} 20 21define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) { 22; CHECK-LABEL: andv_nxv8i32: 23; CHECK: // %bb.0: 24; CHECK-NEXT: and z0.d, z0.d, z1.d 25; CHECK-NEXT: ptrue p0.s 26; CHECK-NEXT: andv s0, p0, z0.s 27; CHECK-NEXT: fmov w0, s0 28; CHECK-NEXT: ret 29 %res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a) 30 ret i32 %res 31} 32 33; ORV 34 35define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) { 36; CHECK-LABEL: orv_nxv2i32: 37; CHECK: // %bb.0: 38; CHECK-NEXT: ptrue p0.d 39; CHECK-NEXT: orv d0, p0, z0.d 40; CHECK-NEXT: fmov x0, d0 41; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 42; CHECK-NEXT: ret 43 %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a) 44 ret i32 %res 45} 46 47define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) { 48; CHECK-LABEL: orv_nxv8i64: 49; CHECK: // %bb.0: 50; CHECK-NEXT: orr z1.d, z1.d, z3.d 51; CHECK-NEXT: orr z0.d, z0.d, z2.d 52; CHECK-NEXT: orr z0.d, z0.d, z1.d 53; CHECK-NEXT: ptrue p0.d 54; CHECK-NEXT: orv d0, p0, z0.d 55; CHECK-NEXT: fmov x0, d0 56; CHECK-NEXT: ret 57 %res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a) 58 ret i64 %res 59} 60 61; XORV 62 63define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) { 64; CHECK-LABEL: xorv_nxv2i16: 65; CHECK: // %bb.0: 66; CHECK-NEXT: ptrue p0.d 67; CHECK-NEXT: eorv d0, p0, z0.d 68; CHECK-NEXT: fmov x0, d0 69; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 70; CHECK-NEXT: ret 71 %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a) 72 ret i16 %res 73} 74 75define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) { 76; CHECK-LABEL: xorv_nxv8i32: 77; CHECK: // %bb.0: 78; CHECK-NEXT: eor z0.d, z0.d, z1.d 79; CHECK-NEXT: ptrue p0.s 80; CHECK-NEXT: eorv s0, p0, z0.s 81; CHECK-NEXT: fmov w0, s0 82; CHECK-NEXT: ret 83 %res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a) 84 ret i32 %res 85} 86 87; UADDV 88 89define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) { 90; CHECK-LABEL: uaddv_nxv4i16: 91; CHECK: // %bb.0: 92; CHECK-NEXT: ptrue p0.s 93; CHECK-NEXT: uaddv d0, p0, z0.s 94; CHECK-NEXT: fmov x0, d0 95; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 96; CHECK-NEXT: ret 97 %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a) 98 ret i16 %res 99} 100 101define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) { 102; CHECK-LABEL: uaddv_nxv16i16: 103; CHECK: // %bb.0: 104; CHECK-NEXT: add z0.h, z0.h, z1.h 105; CHECK-NEXT: ptrue p0.h 106; CHECK-NEXT: uaddv d0, p0, z0.h 107; CHECK-NEXT: fmov x0, d0 108; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 109; CHECK-NEXT: ret 110 %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a) 111 ret i16 %res 112} 113 114define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) { 115; CHECK-LABEL: uaddv_nxv16i32: 116; CHECK: // %bb.0: 117; CHECK-NEXT: add z1.s, z1.s, z3.s 118; CHECK-NEXT: add z0.s, z0.s, z2.s 119; CHECK-NEXT: add z0.s, z0.s, z1.s 120; CHECK-NEXT: ptrue p0.s 121; CHECK-NEXT: uaddv d0, p0, z0.s 122; CHECK-NEXT: fmov x0, d0 123; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 124; CHECK-NEXT: ret 125 %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a) 126 ret i32 %res 127} 128 129; UMINV 130 131define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) { 132; CHECK-LABEL: umin_nxv2i32: 133; CHECK: // %bb.0: 134; CHECK-NEXT: ptrue p0.d 135; CHECK-NEXT: and z0.d, z0.d, #0xffffffff 136; CHECK-NEXT: uminv d0, p0, z0.d 137; CHECK-NEXT: fmov x0, d0 138; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 139; CHECK-NEXT: ret 140 %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a) 141 ret i32 %res 142} 143 144define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) { 145; CHECK-LABEL: umin_nxv4i64: 146; CHECK: // %bb.0: 147; CHECK-NEXT: ptrue p0.d 148; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d 149; CHECK-NEXT: uminv d0, p0, z0.d 150; CHECK-NEXT: fmov x0, d0 151; CHECK-NEXT: ret 152 %res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a) 153 ret i64 %res 154} 155 156; SMINV 157 158define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) { 159; CHECK-LABEL: smin_nxv4i8: 160; CHECK: // %bb.0: 161; CHECK-NEXT: ptrue p0.s 162; CHECK-NEXT: sxtb z0.s, p0/m, z0.s 163; CHECK-NEXT: sminv s0, p0, z0.s 164; CHECK-NEXT: fmov w0, s0 165; CHECK-NEXT: ret 166 %res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a) 167 ret i8 %res 168} 169 170define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) { 171; CHECK-LABEL: smin_nxv8i32: 172; CHECK: // %bb.0: 173; CHECK-NEXT: ptrue p0.s 174; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s 175; CHECK-NEXT: sminv s0, p0, z0.s 176; CHECK-NEXT: fmov w0, s0 177; CHECK-NEXT: ret 178 %res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a) 179 ret i32 %res 180} 181 182; UMAXV 183 184define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) { 185; CHECK-LABEL: smin_nxv16i16: 186; CHECK: // %bb.0: 187; CHECK-NEXT: ptrue p0.h 188; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h 189; CHECK-NEXT: umaxv h0, p0, z0.h 190; CHECK-NEXT: fmov w0, s0 191; CHECK-NEXT: ret 192 %res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a) 193 ret i16 %res 194} 195 196; SMAXV 197 198define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) { 199; CHECK-LABEL: smin_nxv8i64: 200; CHECK: // %bb.0: 201; CHECK-NEXT: ptrue p0.d 202; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d 203; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d 204; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d 205; CHECK-NEXT: smaxv d0, p0, z0.d 206; CHECK-NEXT: fmov x0, d0 207; CHECK-NEXT: ret 208 %res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a) 209 ret i64 %res 210} 211 212declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>) 213declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>) 214 215declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>) 216declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>) 217 218declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>) 219declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>) 220 221declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>) 222declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>) 223declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>) 224 225declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>) 226declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>) 227 228declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>) 229declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>) 230 231declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>) 232 233declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>) 234