1; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN 2; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD 3; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN 4; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD 5 6; The following tests use the balance-fp-ops feature, and should be independent of 7; the target cpu. 8 9; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP 10; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP 11 12; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so 13; our test strategy is to: 14; * Force the pass to always perform register swapping even if the dest register is of the 15; correct color already (-force-all) 16; * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance), 17; and run it twice, once where it always hints odd, and once where it always hints even. 18; 19; We then use regex magic to check that in the two cases the register allocation is 20; different; this is what gives us the testing coverage and distinguishes cases where 21; the pass has done some work versus accidental regalloc. 22 23target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 24target triple = "aarch64" 25 26; Non-overlapping groups - shouldn't need any changing at all. 27 28; CHECK-LABEL: f1: 29; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 30; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 31; CHECK: fmadd [[x]] 32; CHECK: fmsub [[x]] 33; CHECK: fmadd [[x]] 34; CHECK: str [[x]] 35 36define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 { 37entry: 38 %0 = load double, double* %p, align 8 39 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 40 %1 = load double, double* %arrayidx1, align 8 41 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 42 %2 = load double, double* %arrayidx2, align 8 43 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 44 %3 = load double, double* %arrayidx3, align 8 45 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 46 %4 = load double, double* %arrayidx4, align 8 47 %mul = fmul fast double %0, %1 48 %add = fadd fast double %mul, %4 49 %mul5 = fmul fast double %1, %2 50 %add6 = fadd fast double %mul5, %add 51 %mul7 = fmul fast double %1, %3 52 %sub = fsub fast double %add6, %mul7 53 %mul8 = fmul fast double %2, %3 54 %add9 = fadd fast double %mul8, %sub 55 store double %add9, double* %q, align 8 56 %arrayidx11 = getelementptr inbounds double, double* %p, i64 5 57 %5 = load double, double* %arrayidx11, align 8 58 %arrayidx12 = getelementptr inbounds double, double* %p, i64 6 59 %6 = load double, double* %arrayidx12, align 8 60 %arrayidx13 = getelementptr inbounds double, double* %p, i64 7 61 %7 = load double, double* %arrayidx13, align 8 62 %mul15 = fmul fast double %6, %7 63 %mul16 = fmul fast double %0, %5 64 %add17 = fadd fast double %mul16, %mul15 65 %mul18 = fmul fast double %5, %6 66 %add19 = fadd fast double %mul18, %add17 67 %arrayidx20 = getelementptr inbounds double, double* %q, i64 1 68 store double %add19, double* %arrayidx20, align 8 69 ret void 70} 71 72; Overlapping groups - coloring needed. 73 74; CHECK-LABEL: f2: 75; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 76; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]] 77; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 78; CHECK-ODD: fmul [[y:d[0-9]*[02468]]] 79; CHECK: fmadd [[x]] 80; CHECK: fmadd [[y]] 81; CHECK: fmsub [[x]] 82; CHECK: fmadd [[y]] 83; CHECK: fmadd [[x]] 84; CHECK-BALFP: stp [[x]], [[y]] 85; CHECK-A53-DAG: str [[x]] 86; CHECK-A53-DAG: str [[y]] 87 88define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 { 89entry: 90 %0 = load double, double* %p, align 8 91 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 92 %1 = load double, double* %arrayidx1, align 8 93 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 94 %2 = load double, double* %arrayidx2, align 8 95 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 96 %3 = load double, double* %arrayidx3, align 8 97 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 98 %4 = load double, double* %arrayidx4, align 8 99 %arrayidx5 = getelementptr inbounds double, double* %p, i64 5 100 %5 = load double, double* %arrayidx5, align 8 101 %arrayidx6 = getelementptr inbounds double, double* %p, i64 6 102 %6 = load double, double* %arrayidx6, align 8 103 %arrayidx7 = getelementptr inbounds double, double* %p, i64 7 104 %7 = load double, double* %arrayidx7, align 8 105 %mul = fmul fast double %0, %1 106 %add = fadd fast double %mul, %7 107 %mul8 = fmul fast double %5, %6 108 %mul9 = fmul fast double %1, %2 109 %add10 = fadd fast double %mul9, %add 110 %mul11 = fmul fast double %3, %4 111 %add12 = fadd fast double %mul11, %mul8 112 %mul13 = fmul fast double %1, %3 113 %sub = fsub fast double %add10, %mul13 114 %mul14 = fmul fast double %4, %5 115 %add15 = fadd fast double %mul14, %add12 116 %mul16 = fmul fast double %2, %3 117 %add17 = fadd fast double %mul16, %sub 118 store double %add17, double* %q, align 8 119 %arrayidx19 = getelementptr inbounds double, double* %q, i64 1 120 store double %add15, double* %arrayidx19, align 8 121 ret void 122} 123 124; Dest register is live on block exit - fixup needed. 125 126; CHECK-LABEL: f3: 127; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 128; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 129; CHECK: fmadd [[x]] 130; CHECK: fmsub [[x]] 131; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]] 132; CHECK: str [[y]] 133 134define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 { 135entry: 136 %0 = load double, double* %p, align 8 137 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 138 %1 = load double, double* %arrayidx1, align 8 139 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 140 %2 = load double, double* %arrayidx2, align 8 141 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 142 %3 = load double, double* %arrayidx3, align 8 143 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 144 %4 = load double, double* %arrayidx4, align 8 145 %mul = fmul fast double %0, %1 146 %add = fadd fast double %mul, %4 147 %mul5 = fmul fast double %1, %2 148 %add6 = fadd fast double %mul5, %add 149 %mul7 = fmul fast double %1, %3 150 %sub = fsub fast double %add6, %mul7 151 %mul8 = fmul fast double %2, %3 152 %add9 = fadd fast double %mul8, %sub 153 %cmp = fcmp oeq double %3, 0.000000e+00 154 br i1 %cmp, label %if.then, label %if.end 155 156if.then: ; preds = %entry 157 tail call void bitcast (void (...)* @g to void ()*)() #2 158 br label %if.end 159 160if.end: ; preds = %if.then, %entry 161 store double %add9, double* %q, align 8 162 ret void 163} 164 165declare void @g(...) #1 166 167; Single precision version of f2. 168 169; CHECK-LABEL: f4: 170; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 171; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]] 172; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 173; CHECK-ODD: fmul [[y:s[0-9]*[02468]]] 174; CHECK: fmadd [[x]] 175; CHECK: fmadd [[y]] 176; CHECK: fmsub [[x]] 177; CHECK: fmadd [[y]] 178; CHECK: fmadd [[x]] 179; CHECK-BALFP: stp [[x]], [[y]] 180; CHECK-A53-DAG: str [[x]] 181; CHECK-A53-DAG: str [[y]] 182 183define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 { 184entry: 185 %0 = load float, float* %p, align 4 186 %arrayidx1 = getelementptr inbounds float, float* %p, i64 1 187 %1 = load float, float* %arrayidx1, align 4 188 %arrayidx2 = getelementptr inbounds float, float* %p, i64 2 189 %2 = load float, float* %arrayidx2, align 4 190 %arrayidx3 = getelementptr inbounds float, float* %p, i64 3 191 %3 = load float, float* %arrayidx3, align 4 192 %arrayidx4 = getelementptr inbounds float, float* %p, i64 4 193 %4 = load float, float* %arrayidx4, align 4 194 %arrayidx5 = getelementptr inbounds float, float* %p, i64 5 195 %5 = load float, float* %arrayidx5, align 4 196 %arrayidx6 = getelementptr inbounds float, float* %p, i64 6 197 %6 = load float, float* %arrayidx6, align 4 198 %arrayidx7 = getelementptr inbounds float, float* %p, i64 7 199 %7 = load float, float* %arrayidx7, align 4 200 %mul = fmul fast float %0, %1 201 %add = fadd fast float %mul, %7 202 %mul8 = fmul fast float %5, %6 203 %mul9 = fmul fast float %1, %2 204 %add10 = fadd fast float %mul9, %add 205 %mul11 = fmul fast float %3, %4 206 %add12 = fadd fast float %mul11, %mul8 207 %mul13 = fmul fast float %1, %3 208 %sub = fsub fast float %add10, %mul13 209 %mul14 = fmul fast float %4, %5 210 %add15 = fadd fast float %mul14, %add12 211 %mul16 = fmul fast float %2, %3 212 %add17 = fadd fast float %mul16, %sub 213 store float %add17, float* %q, align 4 214 %arrayidx19 = getelementptr inbounds float, float* %q, i64 1 215 store float %add15, float* %arrayidx19, align 4 216 ret void 217} 218 219; Single precision version of f3 220 221; CHECK-LABEL: f5: 222; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]] 223; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]] 224; CHECK: fmadd [[x]] 225; CHECK: fmsub [[x]] 226; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]] 227; CHECK: str [[y]] 228 229define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 { 230entry: 231 %0 = load float, float* %p, align 4 232 %arrayidx1 = getelementptr inbounds float, float* %p, i64 1 233 %1 = load float, float* %arrayidx1, align 4 234 %arrayidx2 = getelementptr inbounds float, float* %p, i64 2 235 %2 = load float, float* %arrayidx2, align 4 236 %arrayidx3 = getelementptr inbounds float, float* %p, i64 3 237 %3 = load float, float* %arrayidx3, align 4 238 %arrayidx4 = getelementptr inbounds float, float* %p, i64 4 239 %4 = load float, float* %arrayidx4, align 4 240 %mul = fmul fast float %0, %1 241 %add = fadd fast float %mul, %4 242 %mul5 = fmul fast float %1, %2 243 %add6 = fadd fast float %mul5, %add 244 %mul7 = fmul fast float %1, %3 245 %sub = fsub fast float %add6, %mul7 246 %mul8 = fmul fast float %2, %3 247 %add9 = fadd fast float %mul8, %sub 248 %cmp = fcmp oeq float %3, 0.000000e+00 249 br i1 %cmp, label %if.then, label %if.end 250 251if.then: ; preds = %entry 252 tail call void bitcast (void (...)* @g to void ()*)() #2 253 br label %if.end 254 255if.end: ; preds = %if.then, %entry 256 store float %add9, float* %q, align 4 257 ret void 258} 259 260; Test that regmask clobbering stops a chain sequence. 261 262; CHECK-LABEL: f6: 263; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]] 264; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]] 265; CHECK: fmadd [[x]] 266; CHECK: fmsub [[x]] 267; CHECK: fmadd d0, {{.*}}, [[x]] 268; CHECK: bl hh 269; CHECK: str d0 270 271define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 { 272entry: 273 %0 = load double, double* %p, align 8 274 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 275 %1 = load double, double* %arrayidx1, align 8 276 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 277 %2 = load double, double* %arrayidx2, align 8 278 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 279 %3 = load double, double* %arrayidx3, align 8 280 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 281 %4 = load double, double* %arrayidx4, align 8 282 %mul = fmul fast double %0, %1 283 %add = fadd fast double %mul, %4 284 %mul5 = fmul fast double %1, %2 285 %add6 = fadd fast double %mul5, %add 286 %mul7 = fmul fast double %1, %3 287 %sub = fsub fast double %add6, %mul7 288 %mul8 = fmul fast double %2, %3 289 %add9 = fadd fast double %mul8, %sub 290 %call = tail call double @hh(double %add9) #2 291 store double %call, double* %q, align 8 292 ret void 293} 294 295declare double @hh(double) #1 296 297; Check that we correctly deal with repeated operands. 298; The following testcase creates: 299; %d1 = FADDDrr killed %d0, %d0 300; We'll get a crash if we naively look at the first operand, remove it 301; from the substitution list then look at the second operand. 302 303; CHECK: fmadd [[x:d[0-9]+]] 304; CHECK: fadd d1, [[x]], [[x]] 305 306define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 { 307entry: 308 %0 = load double, double* %p, align 8 309 %arrayidx1 = getelementptr inbounds double, double* %p, i64 1 310 %1 = load double, double* %arrayidx1, align 8 311 %arrayidx2 = getelementptr inbounds double, double* %p, i64 2 312 %2 = load double, double* %arrayidx2, align 8 313 %arrayidx3 = getelementptr inbounds double, double* %p, i64 3 314 %3 = load double, double* %arrayidx3, align 8 315 %arrayidx4 = getelementptr inbounds double, double* %p, i64 4 316 %4 = load double, double* %arrayidx4, align 8 317 %mul = fmul fast double %0, %1 318 %add = fadd fast double %mul, %4 319 %mul5 = fmul fast double %1, %2 320 %add6 = fadd fast double %mul5, %add 321 %mul7 = fmul fast double %1, %3 322 %sub = fsub fast double %add6, %mul7 323 %mul8 = fmul fast double %2, %3 324 %add9 = fadd fast double %mul8, %sub 325 %add10 = fadd fast double %add9, %add9 326 call void @hhh(double 0.0, double %add10) 327 ret void 328} 329 330declare void @hhh(double, double) 331 332attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 333attributes #1 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } 334attributes #2 = { nounwind } 335 336