1; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s 2; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s 3 4; Also run with -schedule-ppc-vsx-fma-mutation-early as a stress test for the 5; live-interval-updating logic. 6; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -schedule-ppc-vsx-fma-mutation-early 7target datalayout = "E-m:e-i64:64-n32:64" 8target triple = "powerpc64-unknown-linux-gnu" 9 10define void @test1(double %a, double %b, double %c, double %e, double* nocapture %d) #0 { 11entry: 12 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a) 13 store double %0, double* %d, align 8 14 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a) 15 %arrayidx1 = getelementptr inbounds double* %d, i64 1 16 store double %1, double* %arrayidx1, align 8 17 ret void 18 19; CHECK-LABEL: @test1 20; CHECK-DAG: li [[C1:[0-9]+]], 8 21; CHECK-DAG: xsmaddmdp 3, 2, 1 22; CHECK-DAG: xsmaddadp 1, 2, 4 23; CHECK-DAG: stxsdx 3, 0, 7 24; CHECK-DAG: stxsdx 1, 7, [[C1]] 25; CHECK: blr 26 27; CHECK-FISL-LABEL: @test1 28; CHECK-FISL-DAG: fmr 0, 1 29; CHECK-FISL-DAG: xsmaddadp 0, 2, 3 30; CHECK-FISL-DAG: stxsdx 0, 0, 7 31; CHECK-FISL-DAG: xsmaddadp 1, 2, 4 32; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8 33; CHECK-FISL-DAG: stxsdx 1, 7, [[C1]] 34; CHECK-FISL: blr 35} 36 37define void @test2(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 { 38entry: 39 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a) 40 store double %0, double* %d, align 8 41 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a) 42 %arrayidx1 = getelementptr inbounds double* %d, i64 1 43 store double %1, double* %arrayidx1, align 8 44 %2 = tail call double @llvm.fma.f64(double %b, double %f, double %a) 45 %arrayidx2 = getelementptr inbounds double* %d, i64 2 46 store double %2, double* %arrayidx2, align 8 47 ret void 48 49; CHECK-LABEL: @test2 50; CHECK-DAG: li [[C1:[0-9]+]], 8 51; CHECK-DAG: li [[C2:[0-9]+]], 16 52; CHECK-DAG: xsmaddmdp 3, 2, 1 53; CHECK-DAG: xsmaddmdp 4, 2, 1 54; CHECK-DAG: xsmaddadp 1, 2, 5 55; CHECK-DAG: stxsdx 3, 0, 8 56; CHECK-DAG: stxsdx 4, 8, [[C1]] 57; CHECK-DAG: stxsdx 1, 8, [[C2]] 58; CHECK: blr 59 60; CHECK-FISL-LABEL: @test2 61; CHECK-FISL-DAG: fmr 0, 1 62; CHECK-FISL-DAG: xsmaddadp 0, 2, 3 63; CHECK-FISL-DAG: stxsdx 0, 0, 8 64; CHECK-FISL-DAG: fmr 0, 1 65; CHECK-FISL-DAG: xsmaddadp 0, 2, 4 66; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8 67; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]] 68; CHECK-FISL-DAG: xsmaddadp 1, 2, 5 69; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16 70; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]] 71; CHECK-FISL: blr 72} 73 74define void @test3(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 { 75entry: 76 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a) 77 store double %0, double* %d, align 8 78 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a) 79 %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1) 80 %arrayidx1 = getelementptr inbounds double* %d, i64 3 81 store double %2, double* %arrayidx1, align 8 82 %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a) 83 %arrayidx2 = getelementptr inbounds double* %d, i64 2 84 store double %3, double* %arrayidx2, align 8 85 %arrayidx3 = getelementptr inbounds double* %d, i64 1 86 store double %1, double* %arrayidx3, align 8 87 ret void 88 89; CHECK-LABEL: @test3 90; CHECK-DAG: fmr [[F1:[0-9]+]], 1 91; CHECK-DAG: li [[C1:[0-9]+]], 24 92; CHECK-DAG: li [[C2:[0-9]+]], 16 93; CHECK-DAG: li [[C3:[0-9]+]], 8 94; CHECK-DAG: xsmaddmdp 4, 2, 1 95; CHECK-DAG: xsmaddadp 1, 2, 5 96 97; Note: We could convert this next FMA to M-type as well, but it would require 98; re-ordering the instructions. 99; CHECK-DAG: xsmaddadp [[F1]], 2, 3 100 101; CHECK-DAG: xsmaddmdp 2, 3, 4 102; CHECK-DAG: stxsdx [[F1]], 0, 8 103; CHECK-DAG: stxsdx 2, 8, [[C1]] 104; CHECK-DAG: stxsdx 1, 8, [[C2]] 105; CHECK-DAG: stxsdx 4, 8, [[C3]] 106; CHECK: blr 107 108; CHECK-FISL-LABEL: @test3 109; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1 110; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4 111; CHECK-FISL-DAG: fmr 4, [[F1]] 112; CHECK-FISL-DAG: xsmaddadp 4, 2, 3 113; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24 114; CHECK-FISL-DAG: stxsdx 4, 8, [[C1]] 115; CHECK-FISL-DAG: xsmaddadp 1, 2, 5 116; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16 117; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]] 118; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8 119; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]] 120; CHECK-FISL: blr 121} 122 123define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 { 124entry: 125 %0 = tail call double @llvm.fma.f64(double %b, double %c, double %a) 126 store double %0, double* %d, align 8 127 %1 = tail call double @llvm.fma.f64(double %b, double %e, double %a) 128 %arrayidx1 = getelementptr inbounds double* %d, i64 1 129 store double %1, double* %arrayidx1, align 8 130 %2 = tail call double @llvm.fma.f64(double %b, double %c, double %1) 131 %arrayidx3 = getelementptr inbounds double* %d, i64 3 132 store double %2, double* %arrayidx3, align 8 133 %3 = tail call double @llvm.fma.f64(double %b, double %f, double %a) 134 %arrayidx4 = getelementptr inbounds double* %d, i64 2 135 store double %3, double* %arrayidx4, align 8 136 ret void 137 138; CHECK-LABEL: @test4 139; CHECK-DAG: fmr [[F1:[0-9]+]], 1 140; CHECK-DAG: li [[C1:[0-9]+]], 8 141; CHECK-DAG: li [[C2:[0-9]+]], 16 142; CHECK-DAG: xsmaddmdp 4, 2, 1 143 144; Note: We could convert this next FMA to M-type as well, but it would require 145; re-ordering the instructions. 146; CHECK-DAG: xsmaddadp 1, 2, 5 147 148; CHECK-DAG: xsmaddadp [[F1]], 2, 3 149; CHECK-DAG: stxsdx [[F1]], 0, 8 150; CHECK-DAG: stxsdx 4, 8, [[C1]] 151; CHECK-DAG: li [[C3:[0-9]+]], 24 152; CHECK-DAG: xsmaddadp 4, 2, 3 153; CHECK-DAG: stxsdx 4, 8, [[C3]] 154; CHECK-DAG: stxsdx 1, 8, [[C2]] 155; CHECK: blr 156 157; CHECK-FISL-LABEL: @test4 158; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1 159; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 3 160; CHECK-FISL-DAG: stxsdx 0, 0, 8 161; CHECK-FISL-DAG: fmr [[F1]], 1 162; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4 163; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8 164; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]] 165; CHECK-FISL-DAG: xsmaddadp 0, 2, 3 166; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24 167; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]] 168; CHECK-FISL-DAG: xsmaddadp 1, 2, 5 169; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16 170; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]] 171; CHECK-FISL: blr 172} 173 174declare double @llvm.fma.f64(double, double, double) #0 175 176define void @testv1(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double>* nocapture %d) #0 { 177entry: 178 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) 179 store <2 x double> %0, <2 x double>* %d, align 8 180 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) 181 %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1 182 store <2 x double> %1, <2 x double>* %arrayidx1, align 8 183 ret void 184 185; CHECK-LABEL: @testv1 186; CHECK-DAG: xvmaddmdp 36, 35, 34 187; CHECK-DAG: xvmaddadp 34, 35, 37 188; CHECK-DAG: li [[C1:[0-9]+]], 16 189; CHECK-DAG: stxvd2x 36, 0, 3 190; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]] 191; CHECK: blr 192 193; CHECK-FISL-LABEL: @testv1 194; CHECK-FISL-DAG: xxlor 0, 34, 34 195; CHECK-FISL-DAG: xvmaddadp 0, 35, 36 196; CHECK-FISL-DAG: stxvd2x 0, 0, 3 197; CHECK-FISL-DAG: xvmaddadp 34, 35, 37 198; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16 199; CHECK-FISL-DAG: stxvd2x 34, 3, [[C1:[0-9]+]] 200; CHECK-FISL: blr 201} 202 203define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 { 204entry: 205 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) 206 store <2 x double> %0, <2 x double>* %d, align 8 207 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) 208 %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1 209 store <2 x double> %1, <2 x double>* %arrayidx1, align 8 210 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a) 211 %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2 212 store <2 x double> %2, <2 x double>* %arrayidx2, align 8 213 ret void 214 215; CHECK-LABEL: @testv2 216; CHECK-DAG: xvmaddmdp 36, 35, 34 217; CHECK-DAG: xvmaddmdp 37, 35, 34 218; CHECK-DAG: li [[C1:[0-9]+]], 16 219; CHECK-DAG: li [[C2:[0-9]+]], 32 220; CHECK-DAG: xvmaddadp 34, 35, 38 221; CHECK-DAG: stxvd2x 36, 0, 3 222; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]] 223; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]] 224; CHECK: blr 225 226; CHECK-FISL-LABEL: @testv2 227; CHECK-FISL-DAG: xxlor 0, 34, 34 228; CHECK-FISL-DAG: xvmaddadp 0, 35, 36 229; CHECK-FISL-DAG: stxvd2x 0, 0, 3 230; CHECK-FISL-DAG: xxlor 0, 34, 34 231; CHECK-FISL-DAG: xvmaddadp 0, 35, 37 232; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16 233; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1:[0-9]+]] 234; CHECK-FISL-DAG: xvmaddadp 34, 35, 38 235; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32 236; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2:[0-9]+]] 237; CHECK-FISL: blr 238} 239 240define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 { 241entry: 242 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) 243 store <2 x double> %0, <2 x double>* %d, align 8 244 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) 245 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1) 246 %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 3 247 store <2 x double> %2, <2 x double>* %arrayidx1, align 8 248 %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a) 249 %arrayidx2 = getelementptr inbounds <2 x double>* %d, i64 2 250 store <2 x double> %3, <2 x double>* %arrayidx2, align 8 251 %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 1 252 store <2 x double> %1, <2 x double>* %arrayidx3, align 8 253 ret void 254 255; Note: There is some unavoidable changeability in this variant. If the 256; FMAs are reordered differently, the algorithm can pick a different 257; multiplicand to destroy, changing the register assignment. There isn't 258; a good way to express this possibility, so hopefully this doesn't change 259; too often. 260 261; CHECK-LABEL: @testv3 262; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34 263; CHECK-DAG: li [[C1:[0-9]+]], 48 264; CHECK-DAG: li [[C2:[0-9]+]], 32 265; CHECK-DAG: xvmaddmdp 37, 35, 34 266; CHECK-DAG: li [[C3:[0-9]+]], 16 267 268; Note: We could convert this next FMA to M-type as well, but it would require 269; re-ordering the instructions. 270; CHECK-DAG: xvmaddadp [[V1]], 35, 36 271 272; CHECK-DAG: xvmaddmdp 36, 35, 37 273; CHECK-DAG: xvmaddadp 34, 35, 38 274; CHECK-DAG: stxvd2x 32, 0, 3 275; CHECK-DAG: stxvd2x 36, 3, [[C1]] 276; CHECK-DAG: stxvd2x 34, 3, [[C2]] 277; CHECK-DAG: stxvd2x 37, 3, [[C3]] 278; CHECK: blr 279 280; CHECK-FISL-LABEL: @testv3 281; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34 282; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36 283; CHECK-FISL-DAG: stxvd2x [[V1]], 0, 3 284; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34 285; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37 286; CHECK-FISL-DAG: xxlor [[V3:[0-9]+]], 0, 0 287; CHECK-FISL-DAG: xvmaddadp [[V3]], 35, 36 288; CHECK-FISL-DAG: li [[C1:[0-9]+]], 48 289; CHECK-FISL-DAG: stxvd2x [[V3]], 3, [[C1]] 290; CHECK-FISL-DAG: xvmaddadp 34, 35, 38 291; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32 292; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]] 293; CHECK-FISL-DAG: li [[C3:[0-9]+]], 16 294; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]] 295; CHECK-FISL: blr 296} 297 298define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 { 299entry: 300 %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %a) 301 store <2 x double> %0, <2 x double>* %d, align 8 302 %1 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %e, <2 x double> %a) 303 %arrayidx1 = getelementptr inbounds <2 x double>* %d, i64 1 304 store <2 x double> %1, <2 x double>* %arrayidx1, align 8 305 %2 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> %1) 306 %arrayidx3 = getelementptr inbounds <2 x double>* %d, i64 3 307 store <2 x double> %2, <2 x double>* %arrayidx3, align 8 308 %3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %f, <2 x double> %a) 309 %arrayidx4 = getelementptr inbounds <2 x double>* %d, i64 2 310 store <2 x double> %3, <2 x double>* %arrayidx4, align 8 311 ret void 312 313; CHECK-LABEL: @testv4 314; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34 315; CHECK-DAG: xvmaddmdp 37, 35, 34 316; CHECK-DAG: li [[C1:[0-9]+]], 16 317; CHECK-DAG: li [[C2:[0-9]+]], 32 318; CHECK-DAG: xvmaddadp 34, 35, 38 319 320; Note: We could convert this next FMA to M-type as well, but it would require 321; re-ordering the instructions. 322; CHECK-DAG: xvmaddadp [[V1]], 35, 36 323 324; CHECK-DAG: stxvd2x 32, 0, 3 325; CHECK-DAG: stxvd2x 37, 3, [[C1]] 326; CHECK-DAG: li [[C3:[0-9]+]], 48 327; CHECK-DAG: xvmaddadp 37, 35, 36 328; CHECK-DAG: stxvd2x 37, 3, [[C3]] 329; CHECK-DAG: stxvd2x 34, 3, [[C2]] 330; CHECK: blr 331 332; CHECK-FISL-LABEL: @testv4 333; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34 334; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36 335; CHECK-FISL-DAG: stxvd2x 0, 0, 3 336; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34 337; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37 338; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16 339; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1]] 340; CHECK-FISL-DAG: xvmaddadp 0, 35, 37 341; CHECK-FISL-DAG: li [[C3:[0-9]+]], 48 342; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]] 343; CHECK-FISL-DAG: xvmaddadp 0, 35, 36 344; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32 345; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]] 346; CHECK-FISL: blr 347} 348 349declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 350 351attributes #0 = { nounwind readnone } 352 353