1; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr7 < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR 2; RUN: llc -verify-machineinstrs -O3 -mcpu=pwr9 < %s | FileCheck %s -check-prefix=FIXPOINT 3target datalayout = "E-m:e-i64:64-n32:64" 4target triple = "powerpc64-unknown-linux-gnu" 5 6; Verify that the first two adds are independent regardless of how the inputs are 7; commuted. The destination registers are used as source registers for the third add. 8 9define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { 10; CHECK-LABEL: reassociate_adds1: 11; CHECK: # %bb.0: 12; CHECK: fadds [[REG0:[0-9]+]], 1, 2 13; CHECK: fadds [[REG1:[0-9]+]], 3, 4 14; CHECK: fadds 1, [[REG0]], [[REG1]] 15; CHECK-NEXT: blr 16 17 %t0 = fadd reassoc nsz float %x0, %x1 18 %t1 = fadd reassoc nsz float %t0, %x2 19 %t2 = fadd reassoc nsz float %t1, %x3 20 ret float %t2 21} 22 23define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { 24; CHECK-LABEL: reassociate_adds2: 25; CHECK: # %bb.0: 26; CHECK: fadds [[REG0:[0-9]+]], 1, 2 27; CHECK: fadds [[REG1:[0-9]+]], 3, 4 28; CHECK: fadds 1, [[REG0]], [[REG1]] 29; CHECK-NEXT: blr 30 31 %t0 = fadd reassoc nsz float %x0, %x1 32 %t1 = fadd reassoc nsz float %x2, %t0 33 %t2 = fadd reassoc nsz float %t1, %x3 34 ret float %t2 35} 36 37define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { 38; CHECK-LABEL: reassociate_adds3: 39; CHECK: # %bb.0: 40; CHECK: fadds [[REG0:[0-9]+]], 1, 2 41; CHECK: fadds [[REG1:[0-9]+]], 3, 4 42; CHECK: fadds 1, [[REG0]], [[REG1]] 43; CHECK-NEXT: blr 44 45 %t0 = fadd reassoc nsz float %x0, %x1 46 %t1 = fadd reassoc nsz float %t0, %x2 47 %t2 = fadd reassoc nsz float %x3, %t1 48 ret float %t2 49} 50 51define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { 52; CHECK-LABEL: reassociate_adds4: 53; CHECK: # %bb.0: 54; CHECK: fadds [[REG0:[0-9]+]], 1, 2 55; CHECK: fadds [[REG1:[0-9]+]], 3, 4 56; CHECK: fadds 1, [[REG0]], [[REG1]] 57; CHECK-NEXT: blr 58 59 %t0 = fadd reassoc nsz float %x0, %x1 60 %t1 = fadd reassoc nsz float %x2, %t0 61 %t2 = fadd reassoc nsz float %x3, %t1 62 ret float %t2 63} 64 65; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not 66; produced because that would cost more compile time. 67 68define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { 69; CHECK-LABEL: reassociate_adds5: 70; CHECK: # %bb.0: 71; CHECK-DAG: fadds [[REG12:[0-9]+]], 5, 6 72; CHECK-DAG: fadds [[REG0:[0-9]+]], 1, 2 73; CHECK-DAG: fadds [[REG11:[0-9]+]], 3, 4 74; CHECK-DAG: fadds [[REG13:[0-9]+]], [[REG12]], 7 75; CHECK-DAG: fadds [[REG1:[0-9]+]], [[REG0]], [[REG11]] 76; CHECK-DAG: fadds [[REG2:[0-9]+]], [[REG1]], [[REG13]] 77; CHECK: fadds 1, [[REG2]], 8 78; CHECK-NEXT: blr 79 80 %t0 = fadd reassoc nsz float %x0, %x1 81 %t1 = fadd reassoc nsz float %t0, %x2 82 %t2 = fadd reassoc nsz float %t1, %x3 83 %t3 = fadd reassoc nsz float %t2, %x4 84 %t4 = fadd reassoc nsz float %t3, %x5 85 %t5 = fadd reassoc nsz float %t4, %x6 86 %t6 = fadd reassoc nsz float %t5, %x7 87 ret float %t6 88} 89 90; Verify that we reassociate vector instructions too. 91 92define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 93; CHECK-LABEL: vector_reassociate_adds1: 94; CHECK: # %bb.0: 95; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 96; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 97; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] 98; CHECK-NEXT: blr 99 100 %t0 = fadd reassoc nsz <4 x float> %x0, %x1 101 %t1 = fadd reassoc nsz <4 x float> %t0, %x2 102 %t2 = fadd reassoc nsz <4 x float> %t1, %x3 103 ret <4 x float> %t2 104} 105 106define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 107; CHECK-LABEL: vector_reassociate_adds2: 108; CHECK: # %bb.0: 109; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 110; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 111; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] 112; CHECK-NEXT: blr 113 114 %t0 = fadd reassoc nsz <4 x float> %x0, %x1 115 %t1 = fadd reassoc nsz <4 x float> %x2, %t0 116 %t2 = fadd reassoc nsz <4 x float> %t1, %x3 117 ret <4 x float> %t2 118} 119 120define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 121; CHECK-LABEL: vector_reassociate_adds3: 122; CHECK: # %bb.0: 123; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 124; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 125; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] 126; CHECK-NEXT: blr 127 128 %t0 = fadd reassoc nsz <4 x float> %x0, %x1 129 %t1 = fadd reassoc nsz <4 x float> %t0, %x2 130 %t2 = fadd reassoc nsz <4 x float> %x3, %t1 131 ret <4 x float> %t2 132} 133 134define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 135; CHECK-LABEL: vector_reassociate_adds4: 136; CHECK: # %bb.0: 137; CHECK-PWR: xvaddsp [[REG0:[0-9]+]], 34, 35 138; CHECK-PWR: xvaddsp [[REG1:[0-9]+]], 36, 37 139; CHECK-PWR: xvaddsp 34, [[REG0]], [[REG1]] 140; CHECK-NEXT: blr 141 142 %t0 = fadd reassoc nsz <4 x float> %x0, %x1 143 %t1 = fadd reassoc nsz <4 x float> %x2, %t0 144 %t2 = fadd reassoc nsz <4 x float> %x3, %t1 145 ret <4 x float> %t2 146} 147 148define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { 149 %t0 = fdiv float %x0, %x1 150 %t1 = fadd float %x2, %t0 151 %t2 = fadd float %x3, %t1 152 ret float %t2 153} 154 155define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { 156 %t0 = fdiv float %x0, %x1 157 %t1 = fmul float %x2, %t0 158 %t2 = fmul float %x3, %t1 159 ret float %t2 160} 161 162define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { 163 %t0 = fdiv double %x0, %x1 164 %t1 = fadd double %x2, %t0 165 %t2 = fadd double %x3, %t1 166 ret double %t2 167} 168 169define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { 170 %t0 = fdiv double %x0, %x1 171 %t1 = fmul double %x2, %t0 172 %t2 = fmul double %x3, %t1 173 ret double %t2 174} 175 176define i32 @reassociate_mullw(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { 177; FIXPOINT-LABEL: reassociate_mullw: 178; FIXPOINT: # %bb.0: 179; FIXPOINT: mullw [[REG0:[0-9]+]], 3, 4 180; FIXPOINT: mullw [[REG1:[0-9]+]], 5, 6 181; FIXPOINT: mullw 3, [[REG0]], [[REG1]] 182; FIXPOINT-NEXT: blr 183 184 %t0 = mul i32 %x0, %x1 185 %t1 = mul i32 %t0, %x2 186 %t2 = mul i32 %t1, %x3 187 ret i32 %t2 188} 189 190define i64 @reassociate_mulld(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { 191; FIXPOINT-LABEL: reassociate_mulld: 192; FIXPOINT: # %bb.0: 193; FIXPOINT: mulld [[REG0:[0-9]+]], 3, 4 194; FIXPOINT: mulld [[REG1:[0-9]+]], 5, 6 195; FIXPOINT: mulld 3, [[REG0]], [[REG1]] 196; FIXPOINT-NEXT: blr 197 198 %t0 = mul i64 %x0, %x1 199 %t1 = mul i64 %t0, %x2 200 %t2 = mul i64 %t1, %x3 201 ret i64 %t2 202} 203 204define double @reassociate_mamaa_double(double %0, double %1, double %2, double %3, double %4, double %5) { 205; CHECK-LABEL: reassociate_mamaa_double: 206; CHECK: # %bb.0: 207; CHECK-PWR-DAG: xsmaddadp 1, 6, 5 208; CHECK-PWR-DAG: xsmaddadp 2, 4, 3 209; CHECK-PWR: xsadddp 1, 2, 1 210; CHECK-NEXT: blr 211 %7 = fmul reassoc nsz double %3, %2 212 %8 = fmul reassoc nsz double %5, %4 213 %9 = fadd reassoc nsz double %1, %0 214 %10 = fadd reassoc nsz double %9, %7 215 %11 = fadd reassoc nsz double %10, %8 216 ret double %11 217} 218 219define float @reassociate_mamaa_float(float %0, float %1, float %2, float %3, float %4, float %5) { 220; CHECK-LABEL: reassociate_mamaa_float: 221; CHECK: # %bb.0: 222; CHECK-DAG: fmadds [[REG0:[0-9]+]], 4, 3, 2 223; CHECK-DAG: fmadds [[REG1:[0-9]+]], 6, 5, 1 224; CHECK: fadds 1, [[REG0]], [[REG1]] 225; CHECK-NEXT: blr 226 %7 = fmul reassoc nsz float %3, %2 227 %8 = fmul reassoc nsz float %5, %4 228 %9 = fadd reassoc nsz float %1, %0 229 %10 = fadd reassoc nsz float %9, %7 230 %11 = fadd reassoc nsz float %10, %8 231 ret float %11 232} 233 234define <4 x float> @reassociate_mamaa_vec(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4, <4 x float> %5) { 235; CHECK-LABEL: reassociate_mamaa_vec: 236; CHECK: # %bb.0: 237; CHECK-PWR-DAG: xvmaddasp [[REG0:[0-9]+]], 39, 38 238; CHECK-PWR-DAG: xvmaddasp [[REG1:[0-9]+]], 37, 36 239; CHECK-PWR: xvaddsp 34, [[REG1]], [[REG0]] 240; CHECK-NEXT: blr 241 %7 = fmul reassoc nsz <4 x float> %3, %2 242 %8 = fmul reassoc nsz <4 x float> %5, %4 243 %9 = fadd reassoc nsz <4 x float> %1, %0 244 %10 = fadd reassoc nsz <4 x float> %9, %7 245 %11 = fadd reassoc nsz <4 x float> %10, %8 246 ret <4 x float> %11 247} 248 249define double @reassociate_mamama_double(double %0, double %1, double %2, double %3, double %4, double %5, double %6, double %7, double %8) { 250; CHECK-LABEL: reassociate_mamama_double: 251; CHECK: # %bb.0: 252; CHECK-PWR: xsmaddadp 7, 2, 1 253; CHECK-PWR-DAG: xsmuldp [[REG0:[0-9]+]], 4, 3 254; CHECK-PWR-DAG: xsmaddadp 7, 6, 5 255; CHECK-PWR-DAG: xsmaddadp [[REG0]], 9, 8 256; CHECK-PWR: xsadddp 1, 7, [[REG0]] 257; CHECK-NEXT: blr 258 %10 = fmul reassoc nsz double %1, %0 259 %11 = fmul reassoc nsz double %3, %2 260 %12 = fmul reassoc nsz double %5, %4 261 %13 = fmul reassoc nsz double %8, %7 262 %14 = fadd reassoc nsz double %11, %10 263 %15 = fadd reassoc nsz double %14, %6 264 %16 = fadd reassoc nsz double %15, %12 265 %17 = fadd reassoc nsz double %16, %13 266 ret double %17 267} 268 269define dso_local float @reassociate_mamama_8(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, 270 float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16) { 271; CHECK-LABEL: reassociate_mamama_8: 272; CHECK: # %bb.0: 273; CHECK-DAG: fmadds [[REG0:[0-9]+]], 3, 2, 1 274; CHECK-DAG: fmuls [[REG1:[0-9]+]], 5, 4 275; CHECK-DAG: fmadds [[REG2:[0-9]+]], 7, 6, [[REG0]] 276; CHECK-DAG: fmadds [[REG3:[0-9]+]], 9, 8, [[REG1]] 277; 278; CHECK-DAG: fmadds [[REG4:[0-9]+]], 13, 12, [[REG3]] 279; CHECK-DAG: fmadds [[REG5:[0-9]+]], 11, 10, [[REG2]] 280; 281; CHECK-DAG: fmadds [[REG6:[0-9]+]], 3, 2, [[REG4]] 282; CHECK-DAG: fmadds [[REG7:[0-9]+]], 5, 4, [[REG5]] 283; CHECK: fadds 1, [[REG7]], [[REG6]] 284; CHECK-NEXT: blr 285 %18 = fmul reassoc nsz float %2, %1 286 %19 = fadd reassoc nsz float %18, %0 287 %20 = fmul reassoc nsz float %4, %3 288 %21 = fadd reassoc nsz float %19, %20 289 %22 = fmul reassoc nsz float %6, %5 290 %23 = fadd reassoc nsz float %21, %22 291 %24 = fmul reassoc nsz float %8, %7 292 %25 = fadd reassoc nsz float %23, %24 293 %26 = fmul reassoc nsz float %10, %9 294 %27 = fadd reassoc nsz float %25, %26 295 %28 = fmul reassoc nsz float %12, %11 296 %29 = fadd reassoc nsz float %27, %28 297 %30 = fmul reassoc nsz float %14, %13 298 %31 = fadd reassoc nsz float %29, %30 299 %32 = fmul reassoc nsz float %16, %15 300 %33 = fadd reassoc nsz float %31, %32 301 ret float %33 302} 303 304