1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -O0 -mtriple=aarch64-apple-ios -global-isel -disable-expand-reductions -stop-after=irtranslator %s -o - | FileCheck %s 3 4declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) 5declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) 6 7define float @fadd_seq(float %start, <4 x float> %vec) { 8 ; CHECK-LABEL: name: fadd_seq 9 ; CHECK: bb.1 (%ir-block.0): 10 ; CHECK: liveins: $q1, $s0 11 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 12 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 13 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>) 14 ; CHECK: [[VECREDUCE_SEQ_FADD:%[0-9]+]]:_(s32) = G_VECREDUCE_SEQ_FADD [[COPY]](s32), [[BITCAST]](<4 x s32>) 15 ; CHECK: $s0 = COPY [[VECREDUCE_SEQ_FADD]](s32) 16 ; CHECK: RET_ReallyLR implicit $s0 17 %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec) 18 ret float %res 19} 20 21define float @fadd_fast(float %start, <4 x float> %vec) { 22 ; CHECK-LABEL: name: fadd_fast 23 ; CHECK: bb.1 (%ir-block.0): 24 ; CHECK: liveins: $q1, $s0 25 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 26 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 27 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY1]](<2 x s64>) 28 ; CHECK: [[VECREDUCE_FADD:%[0-9]+]]:_(s32) = reassoc G_VECREDUCE_FADD [[BITCAST]](<4 x s32>) 29 ; CHECK: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY]], [[VECREDUCE_FADD]] 30 ; CHECK: $s0 = COPY [[FADD]](s32) 31 ; CHECK: RET_ReallyLR implicit $s0 32 %res = call reassoc float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %vec) 33 ret float %res 34} 35 36define double @fmul_seq(double %start, <4 x double> %vec) { 37 ; CHECK-LABEL: name: fmul_seq 38 ; CHECK: bb.1 (%ir-block.0): 39 ; CHECK: liveins: $d0, $q1, $q2 40 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 41 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 42 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 43 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>) 44 ; CHECK: [[VECREDUCE_SEQ_FMUL:%[0-9]+]]:_(s64) = G_VECREDUCE_SEQ_FMUL [[COPY]](s64), [[CONCAT_VECTORS]](<4 x s64>) 45 ; CHECK: $d0 = COPY [[VECREDUCE_SEQ_FMUL]](s64) 46 ; CHECK: RET_ReallyLR implicit $d0 47 %res = call double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec) 48 ret double %res 49} 50 51define double @fmul_fast(double %start, <4 x double> %vec) { 52 ; CHECK-LABEL: name: fmul_fast 53 ; CHECK: bb.1 (%ir-block.0): 54 ; CHECK: liveins: $d0, $q1, $q2 55 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 56 ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 57 ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 58 ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s64>) = G_CONCAT_VECTORS [[COPY1]](<2 x s64>), [[COPY2]](<2 x s64>) 59 ; CHECK: [[VECREDUCE_FMUL:%[0-9]+]]:_(s64) = reassoc G_VECREDUCE_FMUL [[CONCAT_VECTORS]](<4 x s64>) 60 ; CHECK: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[COPY]], [[VECREDUCE_FMUL]] 61 ; CHECK: $d0 = COPY [[FMUL]](s64) 62 ; CHECK: RET_ReallyLR implicit $d0 63 %res = call reassoc double @llvm.vector.reduce.fmul.v4f64(double %start, <4 x double> %vec) 64 ret double %res 65} 66 67declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) 68declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 69 70define float @fmax(<4 x float> %vec) { 71 ; CHECK-LABEL: name: fmax 72 ; CHECK: bb.1 (%ir-block.0): 73 ; CHECK: liveins: $q0 74 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 75 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) 76 ; CHECK: [[VECREDUCE_FMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_FMAX [[BITCAST]](<4 x s32>) 77 ; CHECK: $s0 = COPY [[VECREDUCE_FMAX]](s32) 78 ; CHECK: RET_ReallyLR implicit $s0 79 %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %vec) 80 ret float %res 81} 82 83define float @fmin(<4 x float> %vec) { 84 ; CHECK-LABEL: name: fmin 85 ; CHECK: bb.1 (%ir-block.0): 86 ; CHECK: liveins: $q0 87 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 88 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) 89 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>) 90 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) 91 ; CHECK: RET_ReallyLR implicit $s0 92 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec) 93 ret float %res 94} 95 96define float @fmin_nnan(<4 x float> %vec) { 97 ; CHECK-LABEL: name: fmin_nnan 98 ; CHECK: bb.1 (%ir-block.0): 99 ; CHECK: liveins: $q0 100 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 101 ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) 102 ; CHECK: [[VECREDUCE_FMIN:%[0-9]+]]:_(s32) = nnan G_VECREDUCE_FMIN [[BITCAST]](<4 x s32>) 103 ; CHECK: $s0 = COPY [[VECREDUCE_FMIN]](s32) 104 ; CHECK: RET_ReallyLR implicit $s0 105 %res = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %vec) 106 ret float %res 107} 108 109declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 110 111define i32 @add(<4 x i32> %vec) { 112 ; CHECK-LABEL: name: add 113 ; CHECK: bb.1 (%ir-block.0): 114 ; CHECK: liveins: $q0 115 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 116 ; CHECK: [[VECREDUCE_ADD:%[0-9]+]]:_(s32) = G_VECREDUCE_ADD [[COPY]](<4 x s32>) 117 ; CHECK: $w0 = COPY [[VECREDUCE_ADD]](s32) 118 ; CHECK: RET_ReallyLR implicit $w0 119 %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %vec) 120 ret i32 %res 121} 122 123declare i32 @llvm.vector.reduce.mul.v4i32(<4 x i32>) 124 125define i32 @mul(<4 x i32> %vec) { 126 ; CHECK-LABEL: name: mul 127 ; CHECK: bb.1 (%ir-block.0): 128 ; CHECK: liveins: $q0 129 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 130 ; CHECK: [[VECREDUCE_MUL:%[0-9]+]]:_(s32) = G_VECREDUCE_MUL [[COPY]](<4 x s32>) 131 ; CHECK: $w0 = COPY [[VECREDUCE_MUL]](s32) 132 ; CHECK: RET_ReallyLR implicit $w0 133 %res = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %vec) 134 ret i32 %res 135} 136 137declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32>) 138 139define i32 @and(<4 x i32> %vec) { 140 ; CHECK-LABEL: name: and 141 ; CHECK: bb.1 (%ir-block.0): 142 ; CHECK: liveins: $q0 143 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 144 ; CHECK: [[VECREDUCE_AND:%[0-9]+]]:_(s32) = G_VECREDUCE_AND [[COPY]](<4 x s32>) 145 ; CHECK: $w0 = COPY [[VECREDUCE_AND]](s32) 146 ; CHECK: RET_ReallyLR implicit $w0 147 %res = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %vec) 148 ret i32 %res 149} 150 151declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32>) 152 153define i32 @or(<4 x i32> %vec) { 154 ; CHECK-LABEL: name: or 155 ; CHECK: bb.1 (%ir-block.0): 156 ; CHECK: liveins: $q0 157 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 158 ; CHECK: [[VECREDUCE_OR:%[0-9]+]]:_(s32) = G_VECREDUCE_OR [[COPY]](<4 x s32>) 159 ; CHECK: $w0 = COPY [[VECREDUCE_OR]](s32) 160 ; CHECK: RET_ReallyLR implicit $w0 161 %res = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %vec) 162 ret i32 %res 163} 164 165declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) 166 167define i32 @xor(<4 x i32> %vec) { 168 ; CHECK-LABEL: name: xor 169 ; CHECK: bb.1 (%ir-block.0): 170 ; CHECK: liveins: $q0 171 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 172 ; CHECK: [[VECREDUCE_XOR:%[0-9]+]]:_(s32) = G_VECREDUCE_XOR [[COPY]](<4 x s32>) 173 ; CHECK: $w0 = COPY [[VECREDUCE_XOR]](s32) 174 ; CHECK: RET_ReallyLR implicit $w0 175 %res = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %vec) 176 ret i32 %res 177} 178 179declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 180declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 181declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 182declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 183 184define i32 @smax(<4 x i32> %vec) { 185 ; CHECK-LABEL: name: smax 186 ; CHECK: bb.1 (%ir-block.0): 187 ; CHECK: liveins: $q0 188 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 189 ; CHECK: [[VECREDUCE_SMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_SMAX [[COPY]](<4 x s32>) 190 ; CHECK: $w0 = COPY [[VECREDUCE_SMAX]](s32) 191 ; CHECK: RET_ReallyLR implicit $w0 192 %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %vec) 193 ret i32 %res 194} 195 196define i32 @smin(<4 x i32> %vec) { 197 ; CHECK-LABEL: name: smin 198 ; CHECK: bb.1 (%ir-block.0): 199 ; CHECK: liveins: $q0 200 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 201 ; CHECK: [[VECREDUCE_SMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_SMIN [[COPY]](<4 x s32>) 202 ; CHECK: $w0 = COPY [[VECREDUCE_SMIN]](s32) 203 ; CHECK: RET_ReallyLR implicit $w0 204 %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %vec) 205 ret i32 %res 206} 207 208define i32 @umax(<4 x i32> %vec) { 209 ; CHECK-LABEL: name: umax 210 ; CHECK: bb.1 (%ir-block.0): 211 ; CHECK: liveins: $q0 212 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 213 ; CHECK: [[VECREDUCE_UMAX:%[0-9]+]]:_(s32) = G_VECREDUCE_UMAX [[COPY]](<4 x s32>) 214 ; CHECK: $w0 = COPY [[VECREDUCE_UMAX]](s32) 215 ; CHECK: RET_ReallyLR implicit $w0 216 %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %vec) 217 ret i32 %res 218} 219 220define i32 @umin(<4 x i32> %vec) { 221 ; CHECK-LABEL: name: umin 222 ; CHECK: bb.1 (%ir-block.0): 223 ; CHECK: liveins: $q0 224 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 225 ; CHECK: [[VECREDUCE_UMIN:%[0-9]+]]:_(s32) = G_VECREDUCE_UMIN [[COPY]](<4 x s32>) 226 ; CHECK: $w0 = COPY [[VECREDUCE_UMIN]](s32) 227 ; CHECK: RET_ReallyLR implicit $w0 228 %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %vec) 229 ret i32 %res 230} 231