1 //===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements bottom-up and top-down register pressure reduction list 11 // schedulers, using standard algorithms. The basic approach uses a priority 12 // queue of available nodes to schedule. One at a time, nodes are taken from 13 // the priority queue (thus in priority order), checked for legality to 14 // schedule, and emitted if legal. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "ScheduleDAGSDNodes.h" 19 #include "llvm/ADT/ArrayRef.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallSet.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/CodeGen/ISDOpcodes.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/ScheduleDAG.h" 30 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 31 #include "llvm/CodeGen/SchedulerRegistry.h" 32 #include "llvm/CodeGen/SelectionDAGISel.h" 33 #include "llvm/CodeGen/SelectionDAGNodes.h" 34 #include "llvm/CodeGen/TargetInstrInfo.h" 35 #include "llvm/CodeGen/TargetLowering.h" 36 #include "llvm/CodeGen/TargetOpcodes.h" 37 #include "llvm/CodeGen/TargetRegisterInfo.h" 38 #include "llvm/CodeGen/TargetSubtargetInfo.h" 39 #include "llvm/Config/llvm-config.h" 40 #include "llvm/IR/InlineAsm.h" 41 #include "llvm/MC/MCInstrDesc.h" 42 #include "llvm/MC/MCRegisterInfo.h" 43 #include "llvm/Support/Casting.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/CommandLine.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/Debug.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/raw_ostream.h" 51 #include <algorithm> 52 #include <cassert> 53 #include <cstdint> 54 #include <cstdlib> 55 #include <iterator> 56 #include <limits> 57 #include <memory> 58 #include <utility> 59 #include <vector> 60 61 using namespace llvm; 62 63 #define DEBUG_TYPE "pre-RA-sched" 64 65 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 66 STATISTIC(NumUnfolds, "Number of nodes unfolded"); 67 STATISTIC(NumDups, "Number of duplicated nodes"); 68 STATISTIC(NumPRCopies, "Number of physical register copies"); 69 70 static RegisterScheduler 71 burrListDAGScheduler("list-burr", 72 "Bottom-up register reduction list scheduling", 73 createBURRListDAGScheduler); 74 75 static RegisterScheduler 76 sourceListDAGScheduler("source", 77 "Similar to list-burr but schedules in source " 78 "order when possible", 79 createSourceListDAGScheduler); 80 81 static RegisterScheduler 82 hybridListDAGScheduler("list-hybrid", 83 "Bottom-up register pressure aware list scheduling " 84 "which tries to balance latency and register pressure", 85 createHybridListDAGScheduler); 86 87 static RegisterScheduler 88 ILPListDAGScheduler("list-ilp", 89 "Bottom-up register pressure aware list scheduling " 90 "which tries to balance ILP and register pressure", 91 createILPListDAGScheduler); 92 93 static cl::opt<bool> DisableSchedCycles( 94 "disable-sched-cycles", cl::Hidden, cl::init(false), 95 cl::desc("Disable cycle-level precision during preRA scheduling")); 96 97 // Temporary sched=list-ilp flags until the heuristics are robust. 98 // Some options are also available under sched=list-hybrid. 99 static cl::opt<bool> DisableSchedRegPressure( 100 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 101 cl::desc("Disable regpressure priority in sched=list-ilp")); 102 static cl::opt<bool> DisableSchedLiveUses( 103 "disable-sched-live-uses", cl::Hidden, cl::init(true), 104 cl::desc("Disable live use priority in sched=list-ilp")); 105 static cl::opt<bool> DisableSchedVRegCycle( 106 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 107 cl::desc("Disable virtual register cycle interference checks")); 108 static cl::opt<bool> DisableSchedPhysRegJoin( 109 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 110 cl::desc("Disable physreg def-use affinity")); 111 static cl::opt<bool> DisableSchedStalls( 112 "disable-sched-stalls", cl::Hidden, cl::init(true), 113 cl::desc("Disable no-stall priority in sched=list-ilp")); 114 static cl::opt<bool> DisableSchedCriticalPath( 115 "disable-sched-critical-path", cl::Hidden, cl::init(false), 116 cl::desc("Disable critical path priority in sched=list-ilp")); 117 static cl::opt<bool> DisableSchedHeight( 118 "disable-sched-height", cl::Hidden, cl::init(false), 119 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 120 static cl::opt<bool> Disable2AddrHack( 121 "disable-2addr-hack", cl::Hidden, cl::init(true), 122 cl::desc("Disable scheduler's two-address hack")); 123 124 static cl::opt<int> MaxReorderWindow( 125 "max-sched-reorder", cl::Hidden, cl::init(6), 126 cl::desc("Number of instructions to allow ahead of the critical path " 127 "in sched=list-ilp")); 128 129 static cl::opt<unsigned> AvgIPC( 130 "sched-avg-ipc", cl::Hidden, cl::init(1), 131 cl::desc("Average inst/cycle whan no target itinerary exists.")); 132 133 namespace { 134 135 //===----------------------------------------------------------------------===// 136 /// ScheduleDAGRRList - The actual register reduction list scheduler 137 /// implementation. This supports both top-down and bottom-up scheduling. 138 /// 139 class ScheduleDAGRRList : public ScheduleDAGSDNodes { 140 private: 141 /// NeedLatency - True if the scheduler will make use of latency information. 142 bool NeedLatency; 143 144 /// AvailableQueue - The priority queue to use for the available SUnits. 145 SchedulingPriorityQueue *AvailableQueue; 146 147 /// PendingQueue - This contains all of the instructions whose operands have 148 /// been issued, but their results are not ready yet (due to the latency of 149 /// the operation). Once the operands becomes available, the instruction is 150 /// added to the AvailableQueue. 151 std::vector<SUnit *> PendingQueue; 152 153 /// HazardRec - The hazard recognizer to use. 154 ScheduleHazardRecognizer *HazardRec; 155 156 /// CurCycle - The current scheduler state corresponds to this cycle. 157 unsigned CurCycle = 0; 158 159 /// MinAvailableCycle - Cycle of the soonest available instruction. 160 unsigned MinAvailableCycle; 161 162 /// IssueCount - Count instructions issued in this cycle 163 /// Currently valid only for bottom-up scheduling. 164 unsigned IssueCount; 165 166 /// LiveRegDefs - A set of physical registers and their definition 167 /// that are "live". These nodes must be scheduled before any other nodes that 168 /// modifies the registers can be scheduled. 169 unsigned NumLiveRegs; 170 std::unique_ptr<SUnit*[]> LiveRegDefs; 171 std::unique_ptr<SUnit*[]> LiveRegGens; 172 173 // Collect interferences between physical register use/defs. 174 // Each interference is an SUnit and set of physical registers. 175 SmallVector<SUnit*, 4> Interferences; 176 177 using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>; 178 179 LRegsMapT LRegsMap; 180 181 /// Topo - A topological ordering for SUnits which permits fast IsReachable 182 /// and similar queries. 183 ScheduleDAGTopologicalSort Topo; 184 185 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 186 // DAG crawling. 187 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 188 189 public: 190 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 191 SchedulingPriorityQueue *availqueue, 192 CodeGenOpt::Level OptLevel) 193 : ScheduleDAGSDNodes(mf), 194 NeedLatency(needlatency), AvailableQueue(availqueue), 195 Topo(SUnits, nullptr) { 196 const TargetSubtargetInfo &STI = mf.getSubtarget(); 197 if (DisableSchedCycles || !NeedLatency) 198 HazardRec = new ScheduleHazardRecognizer(); 199 else 200 HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); 201 } 202 203 ~ScheduleDAGRRList() override { 204 delete HazardRec; 205 delete AvailableQueue; 206 } 207 208 void Schedule() override; 209 210 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 211 212 /// IsReachable - Checks if SU is reachable from TargetSU. 213 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 214 return Topo.IsReachable(SU, TargetSU); 215 } 216 217 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 218 /// create a cycle. 219 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 220 return Topo.WillCreateCycle(SU, TargetSU); 221 } 222 223 /// AddPred - adds a predecessor edge to SUnit SU. 224 /// This returns true if this is a new predecessor. 225 /// Updates the topological ordering if required. 226 void AddPred(SUnit *SU, const SDep &D) { 227 Topo.AddPred(SU, D.getSUnit()); 228 SU->addPred(D); 229 } 230 231 /// RemovePred - removes a predecessor edge from SUnit SU. 232 /// This returns true if an edge was removed. 233 /// Updates the topological ordering if required. 234 void RemovePred(SUnit *SU, const SDep &D) { 235 Topo.RemovePred(SU, D.getSUnit()); 236 SU->removePred(D); 237 } 238 239 private: 240 bool isReady(SUnit *SU) { 241 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 242 AvailableQueue->isReady(SU); 243 } 244 245 void ReleasePred(SUnit *SU, const SDep *PredEdge); 246 void ReleasePredecessors(SUnit *SU); 247 void ReleasePending(); 248 void AdvanceToCycle(unsigned NextCycle); 249 void AdvancePastStalls(SUnit *SU); 250 void EmitNode(SUnit *SU); 251 void ScheduleNodeBottomUp(SUnit*); 252 void CapturePred(SDep *PredEdge); 253 void UnscheduleNodeBottomUp(SUnit*); 254 void RestoreHazardCheckerBottomUp(); 255 void BacktrackBottomUp(SUnit*, SUnit*); 256 SUnit *TryUnfoldSU(SUnit *); 257 SUnit *CopyAndMoveSuccessors(SUnit*); 258 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 259 const TargetRegisterClass*, 260 const TargetRegisterClass*, 261 SmallVectorImpl<SUnit*>&); 262 bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); 263 264 void releaseInterferences(unsigned Reg = 0); 265 266 SUnit *PickNodeToScheduleBottomUp(); 267 void ListScheduleBottomUp(); 268 269 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 270 /// Updates the topological ordering if required. 271 SUnit *CreateNewSUnit(SDNode *N) { 272 unsigned NumSUnits = SUnits.size(); 273 SUnit *NewNode = newSUnit(N); 274 // Update the topological ordering. 275 if (NewNode->NodeNum >= NumSUnits) 276 Topo.InitDAGTopologicalSorting(); 277 return NewNode; 278 } 279 280 /// CreateClone - Creates a new SUnit from an existing one. 281 /// Updates the topological ordering if required. 282 SUnit *CreateClone(SUnit *N) { 283 unsigned NumSUnits = SUnits.size(); 284 SUnit *NewNode = Clone(N); 285 // Update the topological ordering. 286 if (NewNode->NodeNum >= NumSUnits) 287 Topo.InitDAGTopologicalSorting(); 288 return NewNode; 289 } 290 291 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't 292 /// need actual latency information but the hybrid scheduler does. 293 bool forceUnitLatencies() const override { 294 return !NeedLatency; 295 } 296 }; 297 298 } // end anonymous namespace 299 300 /// GetCostForDef - Looks up the register class and cost for a given definition. 301 /// Typically this just means looking up the representative register class, 302 /// but for untyped values (MVT::Untyped) it means inspecting the node's 303 /// opcode to determine what register class is being generated. 304 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 305 const TargetLowering *TLI, 306 const TargetInstrInfo *TII, 307 const TargetRegisterInfo *TRI, 308 unsigned &RegClass, unsigned &Cost, 309 const MachineFunction &MF) { 310 MVT VT = RegDefPos.GetValue(); 311 312 // Special handling for untyped values. These values can only come from 313 // the expansion of custom DAG-to-DAG patterns. 314 if (VT == MVT::Untyped) { 315 const SDNode *Node = RegDefPos.GetNode(); 316 317 // Special handling for CopyFromReg of untyped values. 318 if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { 319 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); 320 const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); 321 RegClass = RC->getID(); 322 Cost = 1; 323 return; 324 } 325 326 unsigned Opcode = Node->getMachineOpcode(); 327 if (Opcode == TargetOpcode::REG_SEQUENCE) { 328 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 329 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 330 RegClass = RC->getID(); 331 Cost = 1; 332 return; 333 } 334 335 unsigned Idx = RegDefPos.GetIdx(); 336 const MCInstrDesc Desc = TII->get(Opcode); 337 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); 338 RegClass = RC->getID(); 339 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 340 // better way to determine it. 341 Cost = 1; 342 } else { 343 RegClass = TLI->getRepRegClassFor(VT)->getID(); 344 Cost = TLI->getRepRegClassCostFor(VT); 345 } 346 } 347 348 /// Schedule - Schedule the DAG using list scheduling. 349 void ScheduleDAGRRList::Schedule() { 350 LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) 351 << " '" << BB->getName() << "' **********\n"); 352 353 CurCycle = 0; 354 IssueCount = 0; 355 MinAvailableCycle = 356 DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max(); 357 NumLiveRegs = 0; 358 // Allocate slots for each physical register, plus one for a special register 359 // to track the virtual resource of a calling sequence. 360 LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); 361 LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); 362 CallSeqEndForStart.clear(); 363 assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); 364 365 // Build the scheduling graph. 366 BuildSchedGraph(nullptr); 367 368 LLVM_DEBUG(dump()); 369 Topo.InitDAGTopologicalSorting(); 370 371 AvailableQueue->initNodes(SUnits); 372 373 HazardRec->Reset(); 374 375 // Execute the actual scheduling loop. 376 ListScheduleBottomUp(); 377 378 AvailableQueue->releaseState(); 379 380 LLVM_DEBUG({ 381 dbgs() << "*** Final schedule ***\n"; 382 dumpSchedule(); 383 dbgs() << '\n'; 384 }); 385 } 386 387 //===----------------------------------------------------------------------===// 388 // Bottom-Up Scheduling 389 //===----------------------------------------------------------------------===// 390 391 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 392 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 393 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 394 SUnit *PredSU = PredEdge->getSUnit(); 395 396 #ifndef NDEBUG 397 if (PredSU->NumSuccsLeft == 0) { 398 dbgs() << "*** Scheduling failed! ***\n"; 399 dumpNode(*PredSU); 400 dbgs() << " has been released too many times!\n"; 401 llvm_unreachable(nullptr); 402 } 403 #endif 404 --PredSU->NumSuccsLeft; 405 406 if (!forceUnitLatencies()) { 407 // Updating predecessor's height. This is now the cycle when the 408 // predecessor can be scheduled without causing a pipeline stall. 409 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 410 } 411 412 // If all the node's successors are scheduled, this node is ready 413 // to be scheduled. Ignore the special EntrySU node. 414 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 415 PredSU->isAvailable = true; 416 417 unsigned Height = PredSU->getHeight(); 418 if (Height < MinAvailableCycle) 419 MinAvailableCycle = Height; 420 421 if (isReady(PredSU)) { 422 AvailableQueue->push(PredSU); 423 } 424 // CapturePred and others may have left the node in the pending queue, avoid 425 // adding it twice. 426 else if (!PredSU->isPending) { 427 PredSU->isPending = true; 428 PendingQueue.push_back(PredSU); 429 } 430 } 431 } 432 433 /// IsChainDependent - Test if Outer is reachable from Inner through 434 /// chain dependencies. 435 static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 436 unsigned NestLevel, 437 const TargetInstrInfo *TII) { 438 SDNode *N = Outer; 439 while (true) { 440 if (N == Inner) 441 return true; 442 // For a TokenFactor, examine each operand. There may be multiple ways 443 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 444 // most nesting in order to ensure that we find the corresponding match. 445 if (N->getOpcode() == ISD::TokenFactor) { 446 for (const SDValue &Op : N->op_values()) 447 if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII)) 448 return true; 449 return false; 450 } 451 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 452 if (N->isMachineOpcode()) { 453 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 454 ++NestLevel; 455 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 456 if (NestLevel == 0) 457 return false; 458 --NestLevel; 459 } 460 } 461 // Otherwise, find the chain and continue climbing. 462 for (const SDValue &Op : N->op_values()) 463 if (Op.getValueType() == MVT::Other) { 464 N = Op.getNode(); 465 goto found_chain_operand; 466 } 467 return false; 468 found_chain_operand:; 469 if (N->getOpcode() == ISD::EntryToken) 470 return false; 471 } 472 } 473 474 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 475 /// the corresponding (lowered) CALLSEQ_BEGIN node. 476 /// 477 /// NestLevel and MaxNested are used in recursion to indcate the current level 478 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 479 /// level seen so far. 480 /// 481 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point 482 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 483 static SDNode * 484 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 485 const TargetInstrInfo *TII) { 486 while (true) { 487 // For a TokenFactor, examine each operand. There may be multiple ways 488 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 489 // most nesting in order to ensure that we find the corresponding match. 490 if (N->getOpcode() == ISD::TokenFactor) { 491 SDNode *Best = nullptr; 492 unsigned BestMaxNest = MaxNest; 493 for (const SDValue &Op : N->op_values()) { 494 unsigned MyNestLevel = NestLevel; 495 unsigned MyMaxNest = MaxNest; 496 if (SDNode *New = FindCallSeqStart(Op.getNode(), 497 MyNestLevel, MyMaxNest, TII)) 498 if (!Best || (MyMaxNest > BestMaxNest)) { 499 Best = New; 500 BestMaxNest = MyMaxNest; 501 } 502 } 503 assert(Best); 504 MaxNest = BestMaxNest; 505 return Best; 506 } 507 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 508 if (N->isMachineOpcode()) { 509 if (N->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 510 ++NestLevel; 511 MaxNest = std::max(MaxNest, NestLevel); 512 } else if (N->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 513 assert(NestLevel != 0); 514 --NestLevel; 515 if (NestLevel == 0) 516 return N; 517 } 518 } 519 // Otherwise, find the chain and continue climbing. 520 for (const SDValue &Op : N->op_values()) 521 if (Op.getValueType() == MVT::Other) { 522 N = Op.getNode(); 523 goto found_chain_operand; 524 } 525 return nullptr; 526 found_chain_operand:; 527 if (N->getOpcode() == ISD::EntryToken) 528 return nullptr; 529 } 530 } 531 532 /// Call ReleasePred for each predecessor, then update register live def/gen. 533 /// Always update LiveRegDefs for a register dependence even if the current SU 534 /// also defines the register. This effectively create one large live range 535 /// across a sequence of two-address node. This is important because the 536 /// entire chain must be scheduled together. Example: 537 /// 538 /// flags = (3) add 539 /// flags = (2) addc flags 540 /// flags = (1) addc flags 541 /// 542 /// results in 543 /// 544 /// LiveRegDefs[flags] = 3 545 /// LiveRegGens[flags] = 1 546 /// 547 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 548 /// interference on flags. 549 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 550 // Bottom up: release predecessors 551 for (SDep &Pred : SU->Preds) { 552 ReleasePred(SU, &Pred); 553 if (Pred.isAssignedRegDep()) { 554 // This is a physical register dependency and it's impossible or 555 // expensive to copy the register. Make sure nothing that can 556 // clobber the register is scheduled between the predecessor and 557 // this node. 558 SUnit *RegDef = LiveRegDefs[Pred.getReg()]; (void)RegDef; 559 assert((!RegDef || RegDef == SU || RegDef == Pred.getSUnit()) && 560 "interference on register dependence"); 561 LiveRegDefs[Pred.getReg()] = Pred.getSUnit(); 562 if (!LiveRegGens[Pred.getReg()]) { 563 ++NumLiveRegs; 564 LiveRegGens[Pred.getReg()] = SU; 565 } 566 } 567 } 568 569 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 570 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 571 // these nodes, to prevent other calls from being interscheduled with them. 572 unsigned CallResource = TRI->getNumRegs(); 573 if (!LiveRegDefs[CallResource]) 574 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 575 if (Node->isMachineOpcode() && 576 Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 577 unsigned NestLevel = 0; 578 unsigned MaxNest = 0; 579 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 580 assert(N && "Must find call sequence start"); 581 582 SUnit *Def = &SUnits[N->getNodeId()]; 583 CallSeqEndForStart[Def] = SU; 584 585 ++NumLiveRegs; 586 LiveRegDefs[CallResource] = Def; 587 LiveRegGens[CallResource] = SU; 588 break; 589 } 590 } 591 592 /// Check to see if any of the pending instructions are ready to issue. If 593 /// so, add them to the available queue. 594 void ScheduleDAGRRList::ReleasePending() { 595 if (DisableSchedCycles) { 596 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 597 return; 598 } 599 600 // If the available queue is empty, it is safe to reset MinAvailableCycle. 601 if (AvailableQueue->empty()) 602 MinAvailableCycle = std::numeric_limits<unsigned>::max(); 603 604 // Check to see if any of the pending instructions are ready to issue. If 605 // so, add them to the available queue. 606 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 607 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 608 if (ReadyCycle < MinAvailableCycle) 609 MinAvailableCycle = ReadyCycle; 610 611 if (PendingQueue[i]->isAvailable) { 612 if (!isReady(PendingQueue[i])) 613 continue; 614 AvailableQueue->push(PendingQueue[i]); 615 } 616 PendingQueue[i]->isPending = false; 617 PendingQueue[i] = PendingQueue.back(); 618 PendingQueue.pop_back(); 619 --i; --e; 620 } 621 } 622 623 /// Move the scheduler state forward by the specified number of Cycles. 624 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 625 if (NextCycle <= CurCycle) 626 return; 627 628 IssueCount = 0; 629 AvailableQueue->setCurCycle(NextCycle); 630 if (!HazardRec->isEnabled()) { 631 // Bypass lots of virtual calls in case of long latency. 632 CurCycle = NextCycle; 633 } 634 else { 635 for (; CurCycle != NextCycle; ++CurCycle) { 636 HazardRec->RecedeCycle(); 637 } 638 } 639 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 640 // available Q to release pending nodes at least once before popping. 641 ReleasePending(); 642 } 643 644 /// Move the scheduler state forward until the specified node's dependents are 645 /// ready and can be scheduled with no resource conflicts. 646 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 647 if (DisableSchedCycles) 648 return; 649 650 // FIXME: Nodes such as CopyFromReg probably should not advance the current 651 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 652 // has predecessors the cycle will be advanced when they are scheduled. 653 // But given the crude nature of modeling latency though such nodes, we 654 // currently need to treat these nodes like real instructions. 655 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 656 657 unsigned ReadyCycle = SU->getHeight(); 658 659 // Bump CurCycle to account for latency. We assume the latency of other 660 // available instructions may be hidden by the stall (not a full pipe stall). 661 // This updates the hazard recognizer's cycle before reserving resources for 662 // this instruction. 663 AdvanceToCycle(ReadyCycle); 664 665 // Calls are scheduled in their preceding cycle, so don't conflict with 666 // hazards from instructions after the call. EmitNode will reset the 667 // scoreboard state before emitting the call. 668 if (SU->isCall) 669 return; 670 671 // FIXME: For resource conflicts in very long non-pipelined stages, we 672 // should probably skip ahead here to avoid useless scoreboard checks. 673 int Stalls = 0; 674 while (true) { 675 ScheduleHazardRecognizer::HazardType HT = 676 HazardRec->getHazardType(SU, -Stalls); 677 678 if (HT == ScheduleHazardRecognizer::NoHazard) 679 break; 680 681 ++Stalls; 682 } 683 AdvanceToCycle(CurCycle + Stalls); 684 } 685 686 /// Record this SUnit in the HazardRecognizer. 687 /// Does not update CurCycle. 688 void ScheduleDAGRRList::EmitNode(SUnit *SU) { 689 if (!HazardRec->isEnabled()) 690 return; 691 692 // Check for phys reg copy. 693 if (!SU->getNode()) 694 return; 695 696 switch (SU->getNode()->getOpcode()) { 697 default: 698 assert(SU->getNode()->isMachineOpcode() && 699 "This target-independent node should not be scheduled."); 700 break; 701 case ISD::MERGE_VALUES: 702 case ISD::TokenFactor: 703 case ISD::LIFETIME_START: 704 case ISD::LIFETIME_END: 705 case ISD::CopyToReg: 706 case ISD::CopyFromReg: 707 case ISD::EH_LABEL: 708 // Noops don't affect the scoreboard state. Copies are likely to be 709 // removed. 710 return; 711 case ISD::INLINEASM: 712 // For inline asm, clear the pipeline state. 713 HazardRec->Reset(); 714 return; 715 } 716 if (SU->isCall) { 717 // Calls are scheduled with their preceding instructions. For bottom-up 718 // scheduling, clear the pipeline state before emitting. 719 HazardRec->Reset(); 720 } 721 722 HazardRec->EmitInstruction(SU); 723 } 724 725 static void resetVRegCycle(SUnit *SU); 726 727 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 728 /// count of its predecessors. If a predecessor pending count is zero, add it to 729 /// the Available queue. 730 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 731 LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 732 LLVM_DEBUG(dumpNode(*SU)); 733 734 #ifndef NDEBUG 735 if (CurCycle < SU->getHeight()) 736 LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight() 737 << "] pipeline stall!\n"); 738 #endif 739 740 // FIXME: Do not modify node height. It may interfere with 741 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 742 // node its ready cycle can aid heuristics, and after scheduling it can 743 // indicate the scheduled cycle. 744 SU->setHeightToAtLeast(CurCycle); 745 746 // Reserve resources for the scheduled instruction. 747 EmitNode(SU); 748 749 Sequence.push_back(SU); 750 751 AvailableQueue->scheduledNode(SU); 752 753 // If HazardRec is disabled, and each inst counts as one cycle, then 754 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 755 // PendingQueue for schedulers that implement HasReadyFilter. 756 if (!HazardRec->isEnabled() && AvgIPC < 2) 757 AdvanceToCycle(CurCycle + 1); 758 759 // Update liveness of predecessors before successors to avoid treating a 760 // two-address node as a live range def. 761 ReleasePredecessors(SU); 762 763 // Release all the implicit physical register defs that are live. 764 for (SDep &Succ : SU->Succs) { 765 // LiveRegDegs[Succ.getReg()] != SU when SU is a two-address node. 766 if (Succ.isAssignedRegDep() && LiveRegDefs[Succ.getReg()] == SU) { 767 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 768 --NumLiveRegs; 769 LiveRegDefs[Succ.getReg()] = nullptr; 770 LiveRegGens[Succ.getReg()] = nullptr; 771 releaseInterferences(Succ.getReg()); 772 } 773 } 774 // Release the special call resource dependence, if this is the beginning 775 // of a call. 776 unsigned CallResource = TRI->getNumRegs(); 777 if (LiveRegDefs[CallResource] == SU) 778 for (const SDNode *SUNode = SU->getNode(); SUNode; 779 SUNode = SUNode->getGluedNode()) { 780 if (SUNode->isMachineOpcode() && 781 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 782 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 783 --NumLiveRegs; 784 LiveRegDefs[CallResource] = nullptr; 785 LiveRegGens[CallResource] = nullptr; 786 releaseInterferences(CallResource); 787 } 788 } 789 790 resetVRegCycle(SU); 791 792 SU->isScheduled = true; 793 794 // Conditions under which the scheduler should eagerly advance the cycle: 795 // (1) No available instructions 796 // (2) All pipelines full, so available instructions must have hazards. 797 // 798 // If HazardRec is disabled, the cycle was pre-advanced before calling 799 // ReleasePredecessors. In that case, IssueCount should remain 0. 800 // 801 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 802 if (HazardRec->isEnabled() || AvgIPC > 1) { 803 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 804 ++IssueCount; 805 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 806 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 807 AdvanceToCycle(CurCycle + 1); 808 } 809 } 810 811 /// CapturePred - This does the opposite of ReleasePred. Since SU is being 812 /// unscheduled, increase the succ left count of its predecessors. Remove 813 /// them from AvailableQueue if necessary. 814 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 815 SUnit *PredSU = PredEdge->getSUnit(); 816 if (PredSU->isAvailable) { 817 PredSU->isAvailable = false; 818 if (!PredSU->isPending) 819 AvailableQueue->remove(PredSU); 820 } 821 822 assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() && 823 "NumSuccsLeft will overflow!"); 824 ++PredSU->NumSuccsLeft; 825 } 826 827 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 828 /// its predecessor states to reflect the change. 829 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 830 LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 831 LLVM_DEBUG(dumpNode(*SU)); 832 833 for (SDep &Pred : SU->Preds) { 834 CapturePred(&Pred); 835 if (Pred.isAssignedRegDep() && SU == LiveRegGens[Pred.getReg()]){ 836 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 837 assert(LiveRegDefs[Pred.getReg()] == Pred.getSUnit() && 838 "Physical register dependency violated?"); 839 --NumLiveRegs; 840 LiveRegDefs[Pred.getReg()] = nullptr; 841 LiveRegGens[Pred.getReg()] = nullptr; 842 releaseInterferences(Pred.getReg()); 843 } 844 } 845 846 // Reclaim the special call resource dependence, if this is the beginning 847 // of a call. 848 unsigned CallResource = TRI->getNumRegs(); 849 for (const SDNode *SUNode = SU->getNode(); SUNode; 850 SUNode = SUNode->getGluedNode()) { 851 if (SUNode->isMachineOpcode() && 852 SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { 853 SUnit *SeqEnd = CallSeqEndForStart[SU]; 854 assert(SeqEnd && "Call sequence start/end must be known"); 855 assert(!LiveRegDefs[CallResource]); 856 assert(!LiveRegGens[CallResource]); 857 ++NumLiveRegs; 858 LiveRegDefs[CallResource] = SU; 859 LiveRegGens[CallResource] = SeqEnd; 860 } 861 } 862 863 // Release the special call resource dependence, if this is the end 864 // of a call. 865 if (LiveRegGens[CallResource] == SU) 866 for (const SDNode *SUNode = SU->getNode(); SUNode; 867 SUNode = SUNode->getGluedNode()) { 868 if (SUNode->isMachineOpcode() && 869 SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 870 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 871 assert(LiveRegDefs[CallResource]); 872 assert(LiveRegGens[CallResource]); 873 --NumLiveRegs; 874 LiveRegDefs[CallResource] = nullptr; 875 LiveRegGens[CallResource] = nullptr; 876 releaseInterferences(CallResource); 877 } 878 } 879 880 for (auto &Succ : SU->Succs) { 881 if (Succ.isAssignedRegDep()) { 882 auto Reg = Succ.getReg(); 883 if (!LiveRegDefs[Reg]) 884 ++NumLiveRegs; 885 // This becomes the nearest def. Note that an earlier def may still be 886 // pending if this is a two-address node. 887 LiveRegDefs[Reg] = SU; 888 889 // Update LiveRegGen only if was empty before this unscheduling. 890 // This is to avoid incorrect updating LiveRegGen set in previous run. 891 if (!LiveRegGens[Reg]) { 892 // Find the successor with the lowest height. 893 LiveRegGens[Reg] = Succ.getSUnit(); 894 for (auto &Succ2 : SU->Succs) { 895 if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg && 896 Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight()) 897 LiveRegGens[Reg] = Succ2.getSUnit(); 898 } 899 } 900 } 901 } 902 if (SU->getHeight() < MinAvailableCycle) 903 MinAvailableCycle = SU->getHeight(); 904 905 SU->setHeightDirty(); 906 SU->isScheduled = false; 907 SU->isAvailable = true; 908 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 909 // Don't make available until backtracking is complete. 910 SU->isPending = true; 911 PendingQueue.push_back(SU); 912 } 913 else { 914 AvailableQueue->push(SU); 915 } 916 AvailableQueue->unscheduledNode(SU); 917 } 918 919 /// After backtracking, the hazard checker needs to be restored to a state 920 /// corresponding the current cycle. 921 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 922 HazardRec->Reset(); 923 924 unsigned LookAhead = std::min((unsigned)Sequence.size(), 925 HazardRec->getMaxLookAhead()); 926 if (LookAhead == 0) 927 return; 928 929 std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead); 930 unsigned HazardCycle = (*I)->getHeight(); 931 for (auto E = Sequence.end(); I != E; ++I) { 932 SUnit *SU = *I; 933 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 934 HazardRec->RecedeCycle(); 935 } 936 EmitNode(SU); 937 } 938 } 939 940 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 941 /// BTCycle in order to schedule a specific node. 942 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 943 SUnit *OldSU = Sequence.back(); 944 while (true) { 945 Sequence.pop_back(); 946 // FIXME: use ready cycle instead of height 947 CurCycle = OldSU->getHeight(); 948 UnscheduleNodeBottomUp(OldSU); 949 AvailableQueue->setCurCycle(CurCycle); 950 if (OldSU == BtSU) 951 break; 952 OldSU = Sequence.back(); 953 } 954 955 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 956 957 RestoreHazardCheckerBottomUp(); 958 959 ReleasePending(); 960 961 ++NumBacktracks; 962 } 963 964 static bool isOperandOf(const SUnit *SU, SDNode *N) { 965 for (const SDNode *SUNode = SU->getNode(); SUNode; 966 SUNode = SUNode->getGluedNode()) { 967 if (SUNode->isOperandOf(N)) 968 return true; 969 } 970 return false; 971 } 972 973 /// TryUnfold - Attempt to unfold 974 SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { 975 SDNode *N = SU->getNode(); 976 // Use while over if to ease fall through. 977 SmallVector<SDNode *, 2> NewNodes; 978 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 979 return nullptr; 980 981 // unfolding an x86 DEC64m operation results in store, dec, load which 982 // can't be handled here so quit 983 if (NewNodes.size() == 3) 984 return nullptr; 985 986 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 987 988 N = NewNodes[1]; 989 SDNode *LoadNode = NewNodes[0]; 990 unsigned NumVals = N->getNumValues(); 991 unsigned OldNumVals = SU->getNode()->getNumValues(); 992 993 // LoadNode may already exist. This can happen when there is another 994 // load from the same location and producing the same type of value 995 // but it has different alignment or volatileness. 996 bool isNewLoad = true; 997 SUnit *LoadSU; 998 if (LoadNode->getNodeId() != -1) { 999 LoadSU = &SUnits[LoadNode->getNodeId()]; 1000 // If LoadSU has already been scheduled, we should clone it but 1001 // this would negate the benefit to unfolding so just return SU. 1002 if (LoadSU->isScheduled) 1003 return SU; 1004 isNewLoad = false; 1005 } else { 1006 LoadSU = CreateNewSUnit(LoadNode); 1007 LoadNode->setNodeId(LoadSU->NodeNum); 1008 1009 InitNumRegDefsLeft(LoadSU); 1010 computeLatency(LoadSU); 1011 } 1012 1013 bool isNewN = true; 1014 SUnit *NewSU; 1015 // This can only happen when isNewLoad is false. 1016 if (N->getNodeId() != -1) { 1017 NewSU = &SUnits[N->getNodeId()]; 1018 // If NewSU has already been scheduled, we need to clone it, but this 1019 // negates the benefit to unfolding so just return SU. 1020 if (NewSU->isScheduled) 1021 return SU; 1022 isNewN = false; 1023 } else { 1024 NewSU = CreateNewSUnit(N); 1025 N->setNodeId(NewSU->NodeNum); 1026 1027 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1028 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 1029 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 1030 NewSU->isTwoAddress = true; 1031 break; 1032 } 1033 } 1034 if (MCID.isCommutable()) 1035 NewSU->isCommutable = true; 1036 1037 InitNumRegDefsLeft(NewSU); 1038 computeLatency(NewSU); 1039 } 1040 1041 LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 1042 1043 // Now that we are committed to unfolding replace DAG Uses. 1044 for (unsigned i = 0; i != NumVals; ++i) 1045 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 1046 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), 1047 SDValue(LoadNode, 1)); 1048 1049 // Record all the edges to and from the old SU, by category. 1050 SmallVector<SDep, 4> ChainPreds; 1051 SmallVector<SDep, 4> ChainSuccs; 1052 SmallVector<SDep, 4> LoadPreds; 1053 SmallVector<SDep, 4> NodePreds; 1054 SmallVector<SDep, 4> NodeSuccs; 1055 for (SDep &Pred : SU->Preds) { 1056 if (Pred.isCtrl()) 1057 ChainPreds.push_back(Pred); 1058 else if (isOperandOf(Pred.getSUnit(), LoadNode)) 1059 LoadPreds.push_back(Pred); 1060 else 1061 NodePreds.push_back(Pred); 1062 } 1063 for (SDep &Succ : SU->Succs) { 1064 if (Succ.isCtrl()) 1065 ChainSuccs.push_back(Succ); 1066 else 1067 NodeSuccs.push_back(Succ); 1068 } 1069 1070 // Now assign edges to the newly-created nodes. 1071 for (const SDep &Pred : ChainPreds) { 1072 RemovePred(SU, Pred); 1073 if (isNewLoad) 1074 AddPred(LoadSU, Pred); 1075 } 1076 for (const SDep &Pred : LoadPreds) { 1077 RemovePred(SU, Pred); 1078 if (isNewLoad) 1079 AddPred(LoadSU, Pred); 1080 } 1081 for (const SDep &Pred : NodePreds) { 1082 RemovePred(SU, Pred); 1083 AddPred(NewSU, Pred); 1084 } 1085 for (SDep D : NodeSuccs) { 1086 SUnit *SuccDep = D.getSUnit(); 1087 D.setSUnit(SU); 1088 RemovePred(SuccDep, D); 1089 D.setSUnit(NewSU); 1090 AddPred(SuccDep, D); 1091 // Balance register pressure. 1092 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled && 1093 !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1094 --NewSU->NumRegDefsLeft; 1095 } 1096 for (SDep D : ChainSuccs) { 1097 SUnit *SuccDep = D.getSUnit(); 1098 D.setSUnit(SU); 1099 RemovePred(SuccDep, D); 1100 if (isNewLoad) { 1101 D.setSUnit(LoadSU); 1102 AddPred(SuccDep, D); 1103 } 1104 } 1105 1106 // Add a data dependency to reflect that NewSU reads the value defined 1107 // by LoadSU. 1108 SDep D(LoadSU, SDep::Data, 0); 1109 D.setLatency(LoadSU->Latency); 1110 AddPred(NewSU, D); 1111 1112 if (isNewLoad) 1113 AvailableQueue->addNode(LoadSU); 1114 if (isNewN) 1115 AvailableQueue->addNode(NewSU); 1116 1117 ++NumUnfolds; 1118 1119 if (NewSU->NumSuccsLeft == 0) 1120 NewSU->isAvailable = true; 1121 1122 return NewSU; 1123 } 1124 1125 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 1126 /// successors to the newly created node. 1127 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 1128 SDNode *N = SU->getNode(); 1129 if (!N) 1130 return nullptr; 1131 1132 LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); 1133 LLVM_DEBUG(dumpNode(*SU)); 1134 1135 if (N->getGluedNode() && 1136 !TII->canCopyGluedNodeDuringSchedule(N)) { 1137 LLVM_DEBUG( 1138 dbgs() 1139 << "Giving up because it has incoming glue and the target does not " 1140 "want to copy it\n"); 1141 return nullptr; 1142 } 1143 1144 SUnit *NewSU; 1145 bool TryUnfold = false; 1146 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 1147 MVT VT = N->getSimpleValueType(i); 1148 if (VT == MVT::Glue) { 1149 LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); 1150 return nullptr; 1151 } else if (VT == MVT::Other) 1152 TryUnfold = true; 1153 } 1154 for (const SDValue &Op : N->op_values()) { 1155 MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); 1156 if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { 1157 LLVM_DEBUG( 1158 dbgs() << "Giving up because it one of the operands is glue and " 1159 "the target does not want to copy it\n"); 1160 return nullptr; 1161 } 1162 } 1163 1164 // If possible unfold instruction. 1165 if (TryUnfold) { 1166 SUnit *UnfoldSU = TryUnfoldSU(SU); 1167 if (!UnfoldSU) 1168 return nullptr; 1169 SU = UnfoldSU; 1170 N = SU->getNode(); 1171 // If this can be scheduled don't bother duplicating and just return 1172 if (SU->NumSuccsLeft == 0) 1173 return SU; 1174 } 1175 1176 LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1177 NewSU = CreateClone(SU); 1178 1179 // New SUnit has the exact same predecessors. 1180 for (SDep &Pred : SU->Preds) 1181 if (!Pred.isArtificial()) 1182 AddPred(NewSU, Pred); 1183 1184 // Only copy scheduled successors. Cut them from old node's successor 1185 // list and move them over. 1186 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1187 for (SDep &Succ : SU->Succs) { 1188 if (Succ.isArtificial()) 1189 continue; 1190 SUnit *SuccSU = Succ.getSUnit(); 1191 if (SuccSU->isScheduled) { 1192 SDep D = Succ; 1193 D.setSUnit(NewSU); 1194 AddPred(SuccSU, D); 1195 D.setSUnit(SU); 1196 DelDeps.push_back(std::make_pair(SuccSU, D)); 1197 } 1198 } 1199 for (auto &DelDep : DelDeps) 1200 RemovePred(DelDep.first, DelDep.second); 1201 1202 AvailableQueue->updateNode(SU); 1203 AvailableQueue->addNode(NewSU); 1204 1205 ++NumDups; 1206 return NewSU; 1207 } 1208 1209 /// InsertCopiesAndMoveSuccs - Insert register copies and move all 1210 /// scheduled successors of the given SUnit to the last copy. 1211 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1212 const TargetRegisterClass *DestRC, 1213 const TargetRegisterClass *SrcRC, 1214 SmallVectorImpl<SUnit*> &Copies) { 1215 SUnit *CopyFromSU = CreateNewSUnit(nullptr); 1216 CopyFromSU->CopySrcRC = SrcRC; 1217 CopyFromSU->CopyDstRC = DestRC; 1218 1219 SUnit *CopyToSU = CreateNewSUnit(nullptr); 1220 CopyToSU->CopySrcRC = DestRC; 1221 CopyToSU->CopyDstRC = SrcRC; 1222 1223 // Only copy scheduled successors. Cut them from old node's successor 1224 // list and move them over. 1225 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1226 for (SDep &Succ : SU->Succs) { 1227 if (Succ.isArtificial()) 1228 continue; 1229 SUnit *SuccSU = Succ.getSUnit(); 1230 if (SuccSU->isScheduled) { 1231 SDep D = Succ; 1232 D.setSUnit(CopyToSU); 1233 AddPred(SuccSU, D); 1234 DelDeps.push_back(std::make_pair(SuccSU, Succ)); 1235 } 1236 else { 1237 // Avoid scheduling the def-side copy before other successors. Otherwise 1238 // we could introduce another physreg interference on the copy and 1239 // continue inserting copies indefinitely. 1240 AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); 1241 } 1242 } 1243 for (auto &DelDep : DelDeps) 1244 RemovePred(DelDep.first, DelDep.second); 1245 1246 SDep FromDep(SU, SDep::Data, Reg); 1247 FromDep.setLatency(SU->Latency); 1248 AddPred(CopyFromSU, FromDep); 1249 SDep ToDep(CopyFromSU, SDep::Data, 0); 1250 ToDep.setLatency(CopyFromSU->Latency); 1251 AddPred(CopyToSU, ToDep); 1252 1253 AvailableQueue->updateNode(SU); 1254 AvailableQueue->addNode(CopyFromSU); 1255 AvailableQueue->addNode(CopyToSU); 1256 Copies.push_back(CopyFromSU); 1257 Copies.push_back(CopyToSU); 1258 1259 ++NumPRCopies; 1260 } 1261 1262 /// getPhysicalRegisterVT - Returns the ValueType of the physical register 1263 /// definition of the specified node. 1264 /// FIXME: Move to SelectionDAG? 1265 static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1266 const TargetInstrInfo *TII) { 1267 unsigned NumRes; 1268 if (N->getOpcode() == ISD::CopyFromReg) { 1269 // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. 1270 NumRes = 1; 1271 } else { 1272 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1273 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 1274 NumRes = MCID.getNumDefs(); 1275 for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 1276 if (Reg == *ImpDef) 1277 break; 1278 ++NumRes; 1279 } 1280 } 1281 return N->getSimpleValueType(NumRes); 1282 } 1283 1284 /// CheckForLiveRegDef - Return true and update live register vector if the 1285 /// specified register def of the specified SUnit clobbers any "live" registers. 1286 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1287 SUnit **LiveRegDefs, 1288 SmallSet<unsigned, 4> &RegAdded, 1289 SmallVectorImpl<unsigned> &LRegs, 1290 const TargetRegisterInfo *TRI) { 1291 for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { 1292 1293 // Check if Ref is live. 1294 if (!LiveRegDefs[*AliasI]) continue; 1295 1296 // Allow multiple uses of the same def. 1297 if (LiveRegDefs[*AliasI] == SU) continue; 1298 1299 // Add Reg to the set of interfering live regs. 1300 if (RegAdded.insert(*AliasI).second) { 1301 LRegs.push_back(*AliasI); 1302 } 1303 } 1304 } 1305 1306 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 1307 /// by RegMask, and add them to LRegs. 1308 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 1309 ArrayRef<SUnit*> LiveRegDefs, 1310 SmallSet<unsigned, 4> &RegAdded, 1311 SmallVectorImpl<unsigned> &LRegs) { 1312 // Look at all live registers. Skip Reg0 and the special CallResource. 1313 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 1314 if (!LiveRegDefs[i]) continue; 1315 if (LiveRegDefs[i] == SU) continue; 1316 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 1317 if (RegAdded.insert(i).second) 1318 LRegs.push_back(i); 1319 } 1320 } 1321 1322 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 1323 static const uint32_t *getNodeRegMask(const SDNode *N) { 1324 for (const SDValue &Op : N->op_values()) 1325 if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode())) 1326 return RegOp->getRegMask(); 1327 return nullptr; 1328 } 1329 1330 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1331 /// scheduling of the given node to satisfy live physical register dependencies. 1332 /// If the specific node is the last one that's available to schedule, do 1333 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1334 bool ScheduleDAGRRList:: 1335 DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { 1336 if (NumLiveRegs == 0) 1337 return false; 1338 1339 SmallSet<unsigned, 4> RegAdded; 1340 // If this node would clobber any "live" register, then it's not ready. 1341 // 1342 // If SU is the currently live definition of the same register that it uses, 1343 // then we are free to schedule it. 1344 for (SDep &Pred : SU->Preds) { 1345 if (Pred.isAssignedRegDep() && LiveRegDefs[Pred.getReg()] != SU) 1346 CheckForLiveRegDef(Pred.getSUnit(), Pred.getReg(), LiveRegDefs.get(), 1347 RegAdded, LRegs, TRI); 1348 } 1349 1350 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1351 if (Node->getOpcode() == ISD::INLINEASM) { 1352 // Inline asm can clobber physical defs. 1353 unsigned NumOps = Node->getNumOperands(); 1354 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1355 --NumOps; // Ignore the glue operand. 1356 1357 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1358 unsigned Flags = 1359 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1360 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1361 1362 ++i; // Skip the ID value. 1363 if (InlineAsm::isRegDefKind(Flags) || 1364 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1365 InlineAsm::isClobberKind(Flags)) { 1366 // Check for def of register or earlyclobber register. 1367 for (; NumVals; --NumVals, ++i) { 1368 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1369 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1370 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1371 } 1372 } else 1373 i += NumVals; 1374 } 1375 continue; 1376 } 1377 1378 if (!Node->isMachineOpcode()) 1379 continue; 1380 // If we're in the middle of scheduling a call, don't begin scheduling 1381 // another call. Also, don't allow any physical registers to be live across 1382 // the call. 1383 if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { 1384 // Check the special calling-sequence resource. 1385 unsigned CallResource = TRI->getNumRegs(); 1386 if (LiveRegDefs[CallResource]) { 1387 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1388 while (SDNode *Glued = Gen->getGluedNode()) 1389 Gen = Glued; 1390 if (!IsChainDependent(Gen, Node, 0, TII) && 1391 RegAdded.insert(CallResource).second) 1392 LRegs.push_back(CallResource); 1393 } 1394 } 1395 if (const uint32_t *RegMask = getNodeRegMask(Node)) 1396 CheckForLiveRegDefMasked(SU, RegMask, 1397 makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), 1398 RegAdded, LRegs); 1399 1400 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1401 if (MCID.hasOptionalDef()) { 1402 // Most ARM instructions have an OptionalDef for CPSR, to model the S-bit. 1403 // This operand can be either a def of CPSR, if the S bit is set; or a use 1404 // of %noreg. When the OptionalDef is set to a valid register, we need to 1405 // handle it in the same way as an ImplicitDef. 1406 for (unsigned i = 0; i < MCID.getNumDefs(); ++i) 1407 if (MCID.OpInfo[i].isOptionalDef()) { 1408 const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); 1409 unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); 1410 CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1411 } 1412 } 1413 if (!MCID.ImplicitDefs) 1414 continue; 1415 for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) 1416 CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); 1417 } 1418 1419 return !LRegs.empty(); 1420 } 1421 1422 void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { 1423 // Add the nodes that aren't ready back onto the available list. 1424 for (unsigned i = Interferences.size(); i > 0; --i) { 1425 SUnit *SU = Interferences[i-1]; 1426 LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); 1427 if (Reg) { 1428 SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; 1429 if (!is_contained(LRegs, Reg)) 1430 continue; 1431 } 1432 SU->isPending = false; 1433 // The interfering node may no longer be available due to backtracking. 1434 // Furthermore, it may have been made available again, in which case it is 1435 // now already in the AvailableQueue. 1436 if (SU->isAvailable && !SU->NodeQueueId) { 1437 LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); 1438 AvailableQueue->push(SU); 1439 } 1440 if (i < Interferences.size()) 1441 Interferences[i-1] = Interferences.back(); 1442 Interferences.pop_back(); 1443 LRegsMap.erase(LRegsPos); 1444 } 1445 } 1446 1447 /// Return a node that can be scheduled in this cycle. Requirements: 1448 /// (1) Ready: latency has been satisfied 1449 /// (2) No Hazards: resources are available 1450 /// (3) No Interferences: may unschedule to break register interferences. 1451 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1452 SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); 1453 auto FindAvailableNode = [&]() { 1454 while (CurSU) { 1455 SmallVector<unsigned, 4> LRegs; 1456 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1457 break; 1458 LLVM_DEBUG(dbgs() << " Interfering reg "; 1459 if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; 1460 else dbgs() << printReg(LRegs[0], TRI); 1461 dbgs() << " SU #" << CurSU->NodeNum << '\n'); 1462 std::pair<LRegsMapT::iterator, bool> LRegsPair = 1463 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1464 if (LRegsPair.second) { 1465 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1466 Interferences.push_back(CurSU); 1467 } 1468 else { 1469 assert(CurSU->isPending && "Interferences are pending"); 1470 // Update the interference with current live regs. 1471 LRegsPair.first->second = LRegs; 1472 } 1473 CurSU = AvailableQueue->pop(); 1474 } 1475 }; 1476 FindAvailableNode(); 1477 if (CurSU) 1478 return CurSU; 1479 1480 // All candidates are delayed due to live physical reg dependencies. 1481 // Try backtracking, code duplication, or inserting cross class copies 1482 // to resolve it. 1483 for (SUnit *TrySU : Interferences) { 1484 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 1485 1486 // Try unscheduling up to the point where it's safe to schedule 1487 // this node. 1488 SUnit *BtSU = nullptr; 1489 unsigned LiveCycle = std::numeric_limits<unsigned>::max(); 1490 for (unsigned Reg : LRegs) { 1491 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1492 BtSU = LiveRegGens[Reg]; 1493 LiveCycle = BtSU->getHeight(); 1494 } 1495 } 1496 if (!WillCreateCycle(TrySU, BtSU)) { 1497 // BacktrackBottomUp mutates Interferences! 1498 BacktrackBottomUp(TrySU, BtSU); 1499 1500 // Force the current node to be scheduled before the node that 1501 // requires the physical reg dep. 1502 if (BtSU->isAvailable) { 1503 BtSU->isAvailable = false; 1504 if (!BtSU->isPending) 1505 AvailableQueue->remove(BtSU); 1506 } 1507 LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum 1508 << ") to SU(" << TrySU->NodeNum << ")\n"); 1509 AddPred(TrySU, SDep(BtSU, SDep::Artificial)); 1510 1511 // If one or more successors has been unscheduled, then the current 1512 // node is no longer available. 1513 if (!TrySU->isAvailable || !TrySU->NodeQueueId) { 1514 LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); 1515 CurSU = AvailableQueue->pop(); 1516 } else { 1517 LLVM_DEBUG(dbgs() << "TrySU available\n"); 1518 // Available and in AvailableQueue 1519 AvailableQueue->remove(TrySU); 1520 CurSU = TrySU; 1521 } 1522 FindAvailableNode(); 1523 // Interferences has been mutated. We must break. 1524 break; 1525 } 1526 } 1527 1528 if (!CurSU) { 1529 // Can't backtrack. If it's too expensive to copy the value, then try 1530 // duplicate the nodes that produces these "too expensive to copy" 1531 // values to break the dependency. In case even that doesn't work, 1532 // insert cross class copies. 1533 // If it's not too expensive, i.e. cost != -1, issue copies. 1534 SUnit *TrySU = Interferences[0]; 1535 SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; 1536 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1537 unsigned Reg = LRegs[0]; 1538 SUnit *LRDef = LiveRegDefs[Reg]; 1539 MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1540 const TargetRegisterClass *RC = 1541 TRI->getMinimalPhysRegClass(Reg, VT); 1542 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1543 1544 // If cross copy register class is the same as RC, then it must be possible 1545 // copy the value directly. Do not try duplicate the def. 1546 // If cross copy register class is not the same as RC, then it's possible to 1547 // copy the value but it require cross register class copies and it is 1548 // expensive. 1549 // If cross copy register class is null, then it's not possible to copy 1550 // the value at all. 1551 SUnit *NewDef = nullptr; 1552 if (DestRC != RC) { 1553 NewDef = CopyAndMoveSuccessors(LRDef); 1554 if (!DestRC && !NewDef) 1555 report_fatal_error("Can't handle live physical register dependency!"); 1556 } 1557 if (!NewDef) { 1558 // Issue copies, these can be expensive cross register class copies. 1559 SmallVector<SUnit*, 2> Copies; 1560 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1561 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1562 << " to SU #" << Copies.front()->NodeNum << "\n"); 1563 AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); 1564 NewDef = Copies.back(); 1565 } 1566 1567 LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1568 << " to SU #" << TrySU->NodeNum << "\n"); 1569 LiveRegDefs[Reg] = NewDef; 1570 AddPred(NewDef, SDep(TrySU, SDep::Artificial)); 1571 TrySU->isAvailable = false; 1572 CurSU = NewDef; 1573 } 1574 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1575 return CurSU; 1576 } 1577 1578 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1579 /// schedulers. 1580 void ScheduleDAGRRList::ListScheduleBottomUp() { 1581 // Release any predecessors of the special Exit node. 1582 ReleasePredecessors(&ExitSU); 1583 1584 // Add root to Available queue. 1585 if (!SUnits.empty()) { 1586 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1587 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1588 RootSU->isAvailable = true; 1589 AvailableQueue->push(RootSU); 1590 } 1591 1592 // While Available queue is not empty, grab the node with the highest 1593 // priority. If it is not ready put it back. Schedule the node. 1594 Sequence.reserve(SUnits.size()); 1595 while (!AvailableQueue->empty() || !Interferences.empty()) { 1596 LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; 1597 AvailableQueue->dump(this)); 1598 1599 // Pick the best node to schedule taking all constraints into 1600 // consideration. 1601 SUnit *SU = PickNodeToScheduleBottomUp(); 1602 1603 AdvancePastStalls(SU); 1604 1605 ScheduleNodeBottomUp(SU); 1606 1607 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1608 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1609 assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() && 1610 "MinAvailableCycle uninitialized"); 1611 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1612 } 1613 } 1614 1615 // Reverse the order if it is bottom up. 1616 std::reverse(Sequence.begin(), Sequence.end()); 1617 1618 #ifndef NDEBUG 1619 VerifyScheduledSequence(/*isBottomUp=*/true); 1620 #endif 1621 } 1622 1623 namespace { 1624 1625 class RegReductionPQBase; 1626 1627 struct queue_sort { 1628 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1629 }; 1630 1631 #ifndef NDEBUG 1632 template<class SF> 1633 struct reverse_sort : public queue_sort { 1634 SF &SortFunc; 1635 1636 reverse_sort(SF &sf) : SortFunc(sf) {} 1637 1638 bool operator()(SUnit* left, SUnit* right) const { 1639 // reverse left/right rather than simply !SortFunc(left, right) 1640 // to expose different paths in the comparison logic. 1641 return SortFunc(right, left); 1642 } 1643 }; 1644 #endif // NDEBUG 1645 1646 /// bu_ls_rr_sort - Priority function for bottom up register pressure 1647 // reduction scheduler. 1648 struct bu_ls_rr_sort : public queue_sort { 1649 enum { 1650 IsBottomUp = true, 1651 HasReadyFilter = false 1652 }; 1653 1654 RegReductionPQBase *SPQ; 1655 1656 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1657 1658 bool operator()(SUnit* left, SUnit* right) const; 1659 }; 1660 1661 // src_ls_rr_sort - Priority function for source order scheduler. 1662 struct src_ls_rr_sort : public queue_sort { 1663 enum { 1664 IsBottomUp = true, 1665 HasReadyFilter = false 1666 }; 1667 1668 RegReductionPQBase *SPQ; 1669 1670 src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1671 1672 bool operator()(SUnit* left, SUnit* right) const; 1673 }; 1674 1675 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1676 struct hybrid_ls_rr_sort : public queue_sort { 1677 enum { 1678 IsBottomUp = true, 1679 HasReadyFilter = false 1680 }; 1681 1682 RegReductionPQBase *SPQ; 1683 1684 hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1685 1686 bool isReady(SUnit *SU, unsigned CurCycle) const; 1687 1688 bool operator()(SUnit* left, SUnit* right) const; 1689 }; 1690 1691 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1692 // scheduler. 1693 struct ilp_ls_rr_sort : public queue_sort { 1694 enum { 1695 IsBottomUp = true, 1696 HasReadyFilter = false 1697 }; 1698 1699 RegReductionPQBase *SPQ; 1700 1701 ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1702 1703 bool isReady(SUnit *SU, unsigned CurCycle) const; 1704 1705 bool operator()(SUnit* left, SUnit* right) const; 1706 }; 1707 1708 class RegReductionPQBase : public SchedulingPriorityQueue { 1709 protected: 1710 std::vector<SUnit *> Queue; 1711 unsigned CurQueueId = 0; 1712 bool TracksRegPressure; 1713 bool SrcOrder; 1714 1715 // SUnits - The SUnits for the current graph. 1716 std::vector<SUnit> *SUnits; 1717 1718 MachineFunction &MF; 1719 const TargetInstrInfo *TII; 1720 const TargetRegisterInfo *TRI; 1721 const TargetLowering *TLI; 1722 ScheduleDAGRRList *scheduleDAG = nullptr; 1723 1724 // SethiUllmanNumbers - The SethiUllman number for each node. 1725 std::vector<unsigned> SethiUllmanNumbers; 1726 1727 /// RegPressure - Tracking current reg pressure per register class. 1728 std::vector<unsigned> RegPressure; 1729 1730 /// RegLimit - Tracking the number of allocatable registers per register 1731 /// class. 1732 std::vector<unsigned> RegLimit; 1733 1734 public: 1735 RegReductionPQBase(MachineFunction &mf, 1736 bool hasReadyFilter, 1737 bool tracksrp, 1738 bool srcorder, 1739 const TargetInstrInfo *tii, 1740 const TargetRegisterInfo *tri, 1741 const TargetLowering *tli) 1742 : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), 1743 SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { 1744 if (TracksRegPressure) { 1745 unsigned NumRC = TRI->getNumRegClasses(); 1746 RegLimit.resize(NumRC); 1747 RegPressure.resize(NumRC); 1748 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1749 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1750 for (const TargetRegisterClass *RC : TRI->regclasses()) 1751 RegLimit[RC->getID()] = tri->getRegPressureLimit(RC, MF); 1752 } 1753 } 1754 1755 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1756 scheduleDAG = scheduleDag; 1757 } 1758 1759 ScheduleHazardRecognizer* getHazardRec() { 1760 return scheduleDAG->getHazardRec(); 1761 } 1762 1763 void initNodes(std::vector<SUnit> &sunits) override; 1764 1765 void addNode(const SUnit *SU) override; 1766 1767 void updateNode(const SUnit *SU) override; 1768 1769 void releaseState() override { 1770 SUnits = nullptr; 1771 SethiUllmanNumbers.clear(); 1772 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1773 } 1774 1775 unsigned getNodePriority(const SUnit *SU) const; 1776 1777 unsigned getNodeOrdering(const SUnit *SU) const { 1778 if (!SU->getNode()) return 0; 1779 1780 return SU->getNode()->getIROrder(); 1781 } 1782 1783 bool empty() const override { return Queue.empty(); } 1784 1785 void push(SUnit *U) override { 1786 assert(!U->NodeQueueId && "Node in the queue already"); 1787 U->NodeQueueId = ++CurQueueId; 1788 Queue.push_back(U); 1789 } 1790 1791 void remove(SUnit *SU) override { 1792 assert(!Queue.empty() && "Queue is empty!"); 1793 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1794 std::vector<SUnit *>::iterator I = llvm::find(Queue, SU); 1795 if (I != std::prev(Queue.end())) 1796 std::swap(*I, Queue.back()); 1797 Queue.pop_back(); 1798 SU->NodeQueueId = 0; 1799 } 1800 1801 bool tracksRegPressure() const override { return TracksRegPressure; } 1802 1803 void dumpRegPressure() const; 1804 1805 bool HighRegPressure(const SUnit *SU) const; 1806 1807 bool MayReduceRegPressure(SUnit *SU) const; 1808 1809 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1810 1811 void scheduledNode(SUnit *SU) override; 1812 1813 void unscheduledNode(SUnit *SU) override; 1814 1815 protected: 1816 bool canClobber(const SUnit *SU, const SUnit *Op); 1817 void AddPseudoTwoAddrDeps(); 1818 void PrescheduleNodesWithMultipleUses(); 1819 void CalculateSethiUllmanNumbers(); 1820 }; 1821 1822 template<class SF> 1823 static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { 1824 std::vector<SUnit *>::iterator Best = Q.begin(); 1825 for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) 1826 if (Picker(*Best, *I)) 1827 Best = I; 1828 SUnit *V = *Best; 1829 if (Best != std::prev(Q.end())) 1830 std::swap(*Best, Q.back()); 1831 Q.pop_back(); 1832 return V; 1833 } 1834 1835 template<class SF> 1836 SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { 1837 #ifndef NDEBUG 1838 if (DAG->StressSched) { 1839 reverse_sort<SF> RPicker(Picker); 1840 return popFromQueueImpl(Q, RPicker); 1841 } 1842 #endif 1843 (void)DAG; 1844 return popFromQueueImpl(Q, Picker); 1845 } 1846 1847 //===----------------------------------------------------------------------===// 1848 // RegReductionPriorityQueue Definition 1849 //===----------------------------------------------------------------------===// 1850 // 1851 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1852 // to reduce register pressure. 1853 // 1854 template<class SF> 1855 class RegReductionPriorityQueue : public RegReductionPQBase { 1856 SF Picker; 1857 1858 public: 1859 RegReductionPriorityQueue(MachineFunction &mf, 1860 bool tracksrp, 1861 bool srcorder, 1862 const TargetInstrInfo *tii, 1863 const TargetRegisterInfo *tri, 1864 const TargetLowering *tli) 1865 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, 1866 tii, tri, tli), 1867 Picker(this) {} 1868 1869 bool isBottomUp() const override { return SF::IsBottomUp; } 1870 1871 bool isReady(SUnit *U) const override { 1872 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1873 } 1874 1875 SUnit *pop() override { 1876 if (Queue.empty()) return nullptr; 1877 1878 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1879 V->NodeQueueId = 0; 1880 return V; 1881 } 1882 1883 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1884 LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { 1885 // Emulate pop() without clobbering NodeQueueIds. 1886 std::vector<SUnit *> DumpQueue = Queue; 1887 SF DumpPicker = Picker; 1888 while (!DumpQueue.empty()) { 1889 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1890 dbgs() << "Height " << SU->getHeight() << ": "; 1891 DAG->dumpNode(*SU); 1892 } 1893 } 1894 #endif 1895 }; 1896 1897 using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>; 1898 using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>; 1899 using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>; 1900 using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>; 1901 1902 } // end anonymous namespace 1903 1904 //===----------------------------------------------------------------------===// 1905 // Static Node Priority for Register Pressure Reduction 1906 //===----------------------------------------------------------------------===// 1907 1908 // Check for special nodes that bypass scheduling heuristics. 1909 // Currently this pushes TokenFactor nodes down, but may be used for other 1910 // pseudo-ops as well. 1911 // 1912 // Return -1 to schedule right above left, 1 for left above right. 1913 // Return 0 if no bias exists. 1914 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1915 bool LSchedLow = left->isScheduleLow; 1916 bool RSchedLow = right->isScheduleLow; 1917 if (LSchedLow != RSchedLow) 1918 return LSchedLow < RSchedLow ? 1 : -1; 1919 return 0; 1920 } 1921 1922 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1923 /// Smaller number is the higher priority. 1924 static unsigned 1925 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1926 if (SUNumbers[SU->NodeNum] != 0) 1927 return SUNumbers[SU->NodeNum]; 1928 1929 // Use WorkList to avoid stack overflow on excessively large IRs. 1930 struct WorkState { 1931 WorkState(const SUnit *SU) : SU(SU) {} 1932 const SUnit *SU; 1933 unsigned PredsProcessed = 0; 1934 }; 1935 1936 SmallVector<WorkState, 16> WorkList; 1937 WorkList.push_back(SU); 1938 while (!WorkList.empty()) { 1939 auto &Temp = WorkList.back(); 1940 auto *TempSU = Temp.SU; 1941 bool AllPredsKnown = true; 1942 // Try to find a non-evaluated pred and push it into the processing stack. 1943 for (unsigned P = Temp.PredsProcessed; P < TempSU->Preds.size(); ++P) { 1944 auto &Pred = TempSU->Preds[P]; 1945 if (Pred.isCtrl()) continue; // ignore chain preds 1946 SUnit *PredSU = Pred.getSUnit(); 1947 if (SUNumbers[PredSU->NodeNum] == 0) { 1948 #ifndef NDEBUG 1949 // In debug mode, check that we don't have such element in the stack. 1950 for (auto It : WorkList) 1951 assert(It.SU != PredSU && "Trying to push an element twice?"); 1952 #endif 1953 // Next time start processing this one starting from the next pred. 1954 Temp.PredsProcessed = P + 1; 1955 WorkList.push_back(PredSU); 1956 AllPredsKnown = false; 1957 break; 1958 } 1959 } 1960 1961 if (!AllPredsKnown) 1962 continue; 1963 1964 // Once all preds are known, we can calculate the answer for this one. 1965 unsigned SethiUllmanNumber = 0; 1966 unsigned Extra = 0; 1967 for (const SDep &Pred : TempSU->Preds) { 1968 if (Pred.isCtrl()) continue; // ignore chain preds 1969 SUnit *PredSU = Pred.getSUnit(); 1970 unsigned PredSethiUllman = SUNumbers[PredSU->NodeNum]; 1971 assert(PredSethiUllman > 0 && "We should have evaluated this pred!"); 1972 if (PredSethiUllman > SethiUllmanNumber) { 1973 SethiUllmanNumber = PredSethiUllman; 1974 Extra = 0; 1975 } else if (PredSethiUllman == SethiUllmanNumber) 1976 ++Extra; 1977 } 1978 1979 SethiUllmanNumber += Extra; 1980 if (SethiUllmanNumber == 0) 1981 SethiUllmanNumber = 1; 1982 SUNumbers[TempSU->NodeNum] = SethiUllmanNumber; 1983 WorkList.pop_back(); 1984 } 1985 1986 assert(SUNumbers[SU->NodeNum] > 0 && "SethiUllman should never be zero!"); 1987 return SUNumbers[SU->NodeNum]; 1988 } 1989 1990 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1991 /// scheduling units. 1992 void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1993 SethiUllmanNumbers.assign(SUnits->size(), 0); 1994 1995 for (const SUnit &SU : *SUnits) 1996 CalcNodeSethiUllmanNumber(&SU, SethiUllmanNumbers); 1997 } 1998 1999 void RegReductionPQBase::addNode(const SUnit *SU) { 2000 unsigned SUSize = SethiUllmanNumbers.size(); 2001 if (SUnits->size() > SUSize) 2002 SethiUllmanNumbers.resize(SUSize*2, 0); 2003 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 2004 } 2005 2006 void RegReductionPQBase::updateNode(const SUnit *SU) { 2007 SethiUllmanNumbers[SU->NodeNum] = 0; 2008 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 2009 } 2010 2011 // Lower priority means schedule further down. For bottom-up scheduling, lower 2012 // priority SUs are scheduled before higher priority SUs. 2013 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 2014 assert(SU->NodeNum < SethiUllmanNumbers.size()); 2015 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2016 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2017 // CopyToReg should be close to its uses to facilitate coalescing and 2018 // avoid spilling. 2019 return 0; 2020 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2021 Opc == TargetOpcode::SUBREG_TO_REG || 2022 Opc == TargetOpcode::INSERT_SUBREG) 2023 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2024 // close to their uses to facilitate coalescing. 2025 return 0; 2026 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 2027 // If SU does not have a register use, i.e. it doesn't produce a value 2028 // that would be consumed (e.g. store), then it terminates a chain of 2029 // computation. Give it a large SethiUllman number so it will be 2030 // scheduled right before its predecessors that it doesn't lengthen 2031 // their live ranges. 2032 return 0xffff; 2033 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2034 // If SU does not have a register def, schedule it close to its uses 2035 // because it does not lengthen any live ranges. 2036 return 0; 2037 #if 1 2038 return SethiUllmanNumbers[SU->NodeNum]; 2039 #else 2040 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 2041 if (SU->isCallOp) { 2042 // FIXME: This assumes all of the defs are used as call operands. 2043 int NP = (int)Priority - SU->getNode()->getNumValues(); 2044 return (NP > 0) ? NP : 0; 2045 } 2046 return Priority; 2047 #endif 2048 } 2049 2050 //===----------------------------------------------------------------------===// 2051 // Register Pressure Tracking 2052 //===----------------------------------------------------------------------===// 2053 2054 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2055 LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { 2056 for (const TargetRegisterClass *RC : TRI->regclasses()) { 2057 unsigned Id = RC->getID(); 2058 unsigned RP = RegPressure[Id]; 2059 if (!RP) continue; 2060 LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " 2061 << RegLimit[Id] << '\n'); 2062 } 2063 } 2064 #endif 2065 2066 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 2067 if (!TLI) 2068 return false; 2069 2070 for (const SDep &Pred : SU->Preds) { 2071 if (Pred.isCtrl()) 2072 continue; 2073 SUnit *PredSU = Pred.getSUnit(); 2074 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2075 // to cover the number of registers defined (they are all live). 2076 if (PredSU->NumRegDefsLeft == 0) { 2077 continue; 2078 } 2079 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2080 RegDefPos.IsValid(); RegDefPos.Advance()) { 2081 unsigned RCId, Cost; 2082 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2083 2084 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 2085 return true; 2086 } 2087 } 2088 return false; 2089 } 2090 2091 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 2092 const SDNode *N = SU->getNode(); 2093 2094 if (!N->isMachineOpcode() || !SU->NumSuccs) 2095 return false; 2096 2097 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2098 for (unsigned i = 0; i != NumDefs; ++i) { 2099 MVT VT = N->getSimpleValueType(i); 2100 if (!N->hasAnyUseOfValue(i)) 2101 continue; 2102 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2103 if (RegPressure[RCId] >= RegLimit[RCId]) 2104 return true; 2105 } 2106 return false; 2107 } 2108 2109 // Compute the register pressure contribution by this instruction by count up 2110 // for uses that are not live and down for defs. Only count register classes 2111 // that are already under high pressure. As a side effect, compute the number of 2112 // uses of registers that are already live. 2113 // 2114 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 2115 // so could probably be factored. 2116 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 2117 LiveUses = 0; 2118 int PDiff = 0; 2119 for (const SDep &Pred : SU->Preds) { 2120 if (Pred.isCtrl()) 2121 continue; 2122 SUnit *PredSU = Pred.getSUnit(); 2123 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2124 // to cover the number of registers defined (they are all live). 2125 if (PredSU->NumRegDefsLeft == 0) { 2126 if (PredSU->getNode()->isMachineOpcode()) 2127 ++LiveUses; 2128 continue; 2129 } 2130 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2131 RegDefPos.IsValid(); RegDefPos.Advance()) { 2132 MVT VT = RegDefPos.GetValue(); 2133 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2134 if (RegPressure[RCId] >= RegLimit[RCId]) 2135 ++PDiff; 2136 } 2137 } 2138 const SDNode *N = SU->getNode(); 2139 2140 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 2141 return PDiff; 2142 2143 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2144 for (unsigned i = 0; i != NumDefs; ++i) { 2145 MVT VT = N->getSimpleValueType(i); 2146 if (!N->hasAnyUseOfValue(i)) 2147 continue; 2148 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2149 if (RegPressure[RCId] >= RegLimit[RCId]) 2150 --PDiff; 2151 } 2152 return PDiff; 2153 } 2154 2155 void RegReductionPQBase::scheduledNode(SUnit *SU) { 2156 if (!TracksRegPressure) 2157 return; 2158 2159 if (!SU->getNode()) 2160 return; 2161 2162 for (const SDep &Pred : SU->Preds) { 2163 if (Pred.isCtrl()) 2164 continue; 2165 SUnit *PredSU = Pred.getSUnit(); 2166 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2167 // to cover the number of registers defined (they are all live). 2168 if (PredSU->NumRegDefsLeft == 0) { 2169 continue; 2170 } 2171 // FIXME: The ScheduleDAG currently loses information about which of a 2172 // node's values is consumed by each dependence. Consequently, if the node 2173 // defines multiple register classes, we don't know which to pressurize 2174 // here. Instead the following loop consumes the register defs in an 2175 // arbitrary order. At least it handles the common case of clustered loads 2176 // to the same class. For precise liveness, each SDep needs to indicate the 2177 // result number. But that tightly couples the ScheduleDAG with the 2178 // SelectionDAG making updates tricky. A simpler hack would be to attach a 2179 // value type or register class to SDep. 2180 // 2181 // The most important aspect of register tracking is balancing the increase 2182 // here with the reduction further below. Note that this SU may use multiple 2183 // defs in PredSU. The can't be determined here, but we've already 2184 // compensated by reducing NumRegDefsLeft in PredSU during 2185 // ScheduleDAGSDNodes::AddSchedEdges. 2186 --PredSU->NumRegDefsLeft; 2187 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 2188 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2189 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2190 if (SkipRegDefs) 2191 continue; 2192 2193 unsigned RCId, Cost; 2194 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2195 RegPressure[RCId] += Cost; 2196 break; 2197 } 2198 } 2199 2200 // We should have this assert, but there may be dead SDNodes that never 2201 // materialize as SUnits, so they don't appear to generate liveness. 2202 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2203 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2204 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2205 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2206 if (SkipRegDefs > 0) 2207 continue; 2208 unsigned RCId, Cost; 2209 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); 2210 if (RegPressure[RCId] < Cost) { 2211 // Register pressure tracking is imprecise. This can happen. But we try 2212 // hard not to let it happen because it likely results in poor scheduling. 2213 LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum 2214 << ") has too many regdefs\n"); 2215 RegPressure[RCId] = 0; 2216 } 2217 else { 2218 RegPressure[RCId] -= Cost; 2219 } 2220 } 2221 LLVM_DEBUG(dumpRegPressure()); 2222 } 2223 2224 void RegReductionPQBase::unscheduledNode(SUnit *SU) { 2225 if (!TracksRegPressure) 2226 return; 2227 2228 const SDNode *N = SU->getNode(); 2229 if (!N) return; 2230 2231 if (!N->isMachineOpcode()) { 2232 if (N->getOpcode() != ISD::CopyToReg) 2233 return; 2234 } else { 2235 unsigned Opc = N->getMachineOpcode(); 2236 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2237 Opc == TargetOpcode::INSERT_SUBREG || 2238 Opc == TargetOpcode::SUBREG_TO_REG || 2239 Opc == TargetOpcode::REG_SEQUENCE || 2240 Opc == TargetOpcode::IMPLICIT_DEF) 2241 return; 2242 } 2243 2244 for (const SDep &Pred : SU->Preds) { 2245 if (Pred.isCtrl()) 2246 continue; 2247 SUnit *PredSU = Pred.getSUnit(); 2248 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2249 // counts data deps. 2250 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2251 continue; 2252 const SDNode *PN = PredSU->getNode(); 2253 if (!PN->isMachineOpcode()) { 2254 if (PN->getOpcode() == ISD::CopyFromReg) { 2255 MVT VT = PN->getSimpleValueType(0); 2256 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2257 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2258 } 2259 continue; 2260 } 2261 unsigned POpc = PN->getMachineOpcode(); 2262 if (POpc == TargetOpcode::IMPLICIT_DEF) 2263 continue; 2264 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2265 POpc == TargetOpcode::INSERT_SUBREG || 2266 POpc == TargetOpcode::SUBREG_TO_REG) { 2267 MVT VT = PN->getSimpleValueType(0); 2268 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2269 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2270 continue; 2271 } 2272 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2273 for (unsigned i = 0; i != NumDefs; ++i) { 2274 MVT VT = PN->getSimpleValueType(i); 2275 if (!PN->hasAnyUseOfValue(i)) 2276 continue; 2277 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2278 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2279 // Register pressure tracking is imprecise. This can happen. 2280 RegPressure[RCId] = 0; 2281 else 2282 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2283 } 2284 } 2285 2286 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2287 // may transfer data dependencies to CopyToReg. 2288 if (SU->NumSuccs && N->isMachineOpcode()) { 2289 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2290 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2291 MVT VT = N->getSimpleValueType(i); 2292 if (VT == MVT::Glue || VT == MVT::Other) 2293 continue; 2294 if (!N->hasAnyUseOfValue(i)) 2295 continue; 2296 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2297 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2298 } 2299 } 2300 2301 LLVM_DEBUG(dumpRegPressure()); 2302 } 2303 2304 //===----------------------------------------------------------------------===// 2305 // Dynamic Node Priority for Register Pressure Reduction 2306 //===----------------------------------------------------------------------===// 2307 2308 /// closestSucc - Returns the scheduled cycle of the successor which is 2309 /// closest to the current cycle. 2310 static unsigned closestSucc(const SUnit *SU) { 2311 unsigned MaxHeight = 0; 2312 for (const SDep &Succ : SU->Succs) { 2313 if (Succ.isCtrl()) continue; // ignore chain succs 2314 unsigned Height = Succ.getSUnit()->getHeight(); 2315 // If there are bunch of CopyToRegs stacked up, they should be considered 2316 // to be at the same position. 2317 if (Succ.getSUnit()->getNode() && 2318 Succ.getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2319 Height = closestSucc(Succ.getSUnit())+1; 2320 if (Height > MaxHeight) 2321 MaxHeight = Height; 2322 } 2323 return MaxHeight; 2324 } 2325 2326 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 2327 /// for scratch registers, i.e. number of data dependencies. 2328 static unsigned calcMaxScratches(const SUnit *SU) { 2329 unsigned Scratches = 0; 2330 for (const SDep &Pred : SU->Preds) { 2331 if (Pred.isCtrl()) continue; // ignore chain preds 2332 Scratches++; 2333 } 2334 return Scratches; 2335 } 2336 2337 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2338 /// CopyFromReg from a virtual register. 2339 static bool hasOnlyLiveInOpers(const SUnit *SU) { 2340 bool RetVal = false; 2341 for (const SDep &Pred : SU->Preds) { 2342 if (Pred.isCtrl()) continue; 2343 const SUnit *PredSU = Pred.getSUnit(); 2344 if (PredSU->getNode() && 2345 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2346 unsigned Reg = 2347 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2348 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2349 RetVal = true; 2350 continue; 2351 } 2352 } 2353 return false; 2354 } 2355 return RetVal; 2356 } 2357 2358 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2359 /// CopyToReg to a virtual register. This SU def is probably a liveout and 2360 /// it has no other use. It should be scheduled closer to the terminator. 2361 static bool hasOnlyLiveOutUses(const SUnit *SU) { 2362 bool RetVal = false; 2363 for (const SDep &Succ : SU->Succs) { 2364 if (Succ.isCtrl()) continue; 2365 const SUnit *SuccSU = Succ.getSUnit(); 2366 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2367 unsigned Reg = 2368 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2369 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2370 RetVal = true; 2371 continue; 2372 } 2373 } 2374 return false; 2375 } 2376 return RetVal; 2377 } 2378 2379 // Set isVRegCycle for a node with only live in opers and live out uses. Also 2380 // set isVRegCycle for its CopyFromReg operands. 2381 // 2382 // This is only relevant for single-block loops, in which case the VRegCycle 2383 // node is likely an induction variable in which the operand and target virtual 2384 // registers should be coalesced (e.g. pre/post increment values). Setting the 2385 // isVRegCycle flag helps the scheduler prioritize other uses of the same 2386 // CopyFromReg so that this node becomes the virtual register "kill". This 2387 // avoids interference between the values live in and out of the block and 2388 // eliminates a copy inside the loop. 2389 static void initVRegCycle(SUnit *SU) { 2390 if (DisableSchedVRegCycle) 2391 return; 2392 2393 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2394 return; 2395 2396 LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2397 2398 SU->isVRegCycle = true; 2399 2400 for (const SDep &Pred : SU->Preds) { 2401 if (Pred.isCtrl()) continue; 2402 Pred.getSUnit()->isVRegCycle = true; 2403 } 2404 } 2405 2406 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2407 // CopyFromReg operands. We should no longer penalize other uses of this VReg. 2408 static void resetVRegCycle(SUnit *SU) { 2409 if (!SU->isVRegCycle) 2410 return; 2411 2412 for (const SDep &Pred : SU->Preds) { 2413 if (Pred.isCtrl()) continue; // ignore chain preds 2414 SUnit *PredSU = Pred.getSUnit(); 2415 if (PredSU->isVRegCycle) { 2416 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2417 "VRegCycle def must be CopyFromReg"); 2418 Pred.getSUnit()->isVRegCycle = false; 2419 } 2420 } 2421 } 2422 2423 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2424 // means a node that defines the VRegCycle has not been scheduled yet. 2425 static bool hasVRegCycleUse(const SUnit *SU) { 2426 // If this SU also defines the VReg, don't hoist it as a "use". 2427 if (SU->isVRegCycle) 2428 return false; 2429 2430 for (const SDep &Pred : SU->Preds) { 2431 if (Pred.isCtrl()) continue; // ignore chain preds 2432 if (Pred.getSUnit()->isVRegCycle && 2433 Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2434 LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2435 return true; 2436 } 2437 } 2438 return false; 2439 } 2440 2441 // Check for either a dependence (latency) or resource (hazard) stall. 2442 // 2443 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2444 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2445 if ((int)SPQ->getCurCycle() < Height) return true; 2446 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2447 != ScheduleHazardRecognizer::NoHazard) 2448 return true; 2449 return false; 2450 } 2451 2452 // Return -1 if left has higher priority, 1 if right has higher priority. 2453 // Return 0 if latency-based priority is equivalent. 2454 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2455 RegReductionPQBase *SPQ) { 2456 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2457 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2458 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2459 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2460 int LHeight = (int)left->getHeight() + LPenalty; 2461 int RHeight = (int)right->getHeight() + RPenalty; 2462 2463 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2464 BUHasStall(left, LHeight, SPQ); 2465 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2466 BUHasStall(right, RHeight, SPQ); 2467 2468 // If scheduling one of the node will cause a pipeline stall, delay it. 2469 // If scheduling either one of the node will cause a pipeline stall, sort 2470 // them according to their height. 2471 if (LStall) { 2472 if (!RStall) 2473 return 1; 2474 if (LHeight != RHeight) 2475 return LHeight > RHeight ? 1 : -1; 2476 } else if (RStall) 2477 return -1; 2478 2479 // If either node is scheduling for latency, sort them by height/depth 2480 // and latency. 2481 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2482 right->SchedulingPref == Sched::ILP)) { 2483 // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer 2484 // is enabled, grouping instructions by cycle, then its height is already 2485 // covered so only its depth matters. We also reach this point if both stall 2486 // but have the same height. 2487 if (!SPQ->getHazardRec()->isEnabled()) { 2488 if (LHeight != RHeight) 2489 return LHeight > RHeight ? 1 : -1; 2490 } 2491 int LDepth = left->getDepth() - LPenalty; 2492 int RDepth = right->getDepth() - RPenalty; 2493 if (LDepth != RDepth) { 2494 LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2495 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2496 << ") depth " << RDepth << "\n"); 2497 return LDepth < RDepth ? 1 : -1; 2498 } 2499 if (left->Latency != right->Latency) 2500 return left->Latency > right->Latency ? 1 : -1; 2501 } 2502 return 0; 2503 } 2504 2505 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2506 // Schedule physical register definitions close to their use. This is 2507 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2508 // long as shortening physreg live ranges is generally good, we can defer 2509 // creating a subtarget hook. 2510 if (!DisableSchedPhysRegJoin) { 2511 bool LHasPhysReg = left->hasPhysRegDefs; 2512 bool RHasPhysReg = right->hasPhysRegDefs; 2513 if (LHasPhysReg != RHasPhysReg) { 2514 #ifndef NDEBUG 2515 static const char *const PhysRegMsg[] = { " has no physreg", 2516 " defines a physreg" }; 2517 #endif 2518 LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2519 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum 2520 << ") " << PhysRegMsg[RHasPhysReg] << "\n"); 2521 return LHasPhysReg < RHasPhysReg; 2522 } 2523 } 2524 2525 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2526 unsigned LPriority = SPQ->getNodePriority(left); 2527 unsigned RPriority = SPQ->getNodePriority(right); 2528 2529 // Be really careful about hoisting call operands above previous calls. 2530 // Only allows it if it would reduce register pressure. 2531 if (left->isCall && right->isCallOp) { 2532 unsigned RNumVals = right->getNode()->getNumValues(); 2533 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2534 } 2535 if (right->isCall && left->isCallOp) { 2536 unsigned LNumVals = left->getNode()->getNumValues(); 2537 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2538 } 2539 2540 if (LPriority != RPriority) 2541 return LPriority > RPriority; 2542 2543 // One or both of the nodes are calls and their sethi-ullman numbers are the 2544 // same, then keep source order. 2545 if (left->isCall || right->isCall) { 2546 unsigned LOrder = SPQ->getNodeOrdering(left); 2547 unsigned ROrder = SPQ->getNodeOrdering(right); 2548 2549 // Prefer an ordering where the lower the non-zero order number, the higher 2550 // the preference. 2551 if ((LOrder || ROrder) && LOrder != ROrder) 2552 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2553 } 2554 2555 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2556 // e.g. 2557 // t1 = op t2, c1 2558 // t3 = op t4, c2 2559 // 2560 // and the following instructions are both ready. 2561 // t2 = op c3 2562 // t4 = op c4 2563 // 2564 // Then schedule t2 = op first. 2565 // i.e. 2566 // t4 = op c4 2567 // t2 = op c3 2568 // t1 = op t2, c1 2569 // t3 = op t4, c2 2570 // 2571 // This creates more short live intervals. 2572 unsigned LDist = closestSucc(left); 2573 unsigned RDist = closestSucc(right); 2574 if (LDist != RDist) 2575 return LDist < RDist; 2576 2577 // How many registers becomes live when the node is scheduled. 2578 unsigned LScratch = calcMaxScratches(left); 2579 unsigned RScratch = calcMaxScratches(right); 2580 if (LScratch != RScratch) 2581 return LScratch > RScratch; 2582 2583 // Comparing latency against a call makes little sense unless the node 2584 // is register pressure-neutral. 2585 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2586 return (left->NodeQueueId > right->NodeQueueId); 2587 2588 // Do not compare latencies when one or both of the nodes are calls. 2589 if (!DisableSchedCycles && 2590 !(left->isCall || right->isCall)) { 2591 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2592 if (result != 0) 2593 return result > 0; 2594 } 2595 else { 2596 if (left->getHeight() != right->getHeight()) 2597 return left->getHeight() > right->getHeight(); 2598 2599 if (left->getDepth() != right->getDepth()) 2600 return left->getDepth() < right->getDepth(); 2601 } 2602 2603 assert(left->NodeQueueId && right->NodeQueueId && 2604 "NodeQueueId cannot be zero"); 2605 return (left->NodeQueueId > right->NodeQueueId); 2606 } 2607 2608 // Bottom up 2609 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2610 if (int res = checkSpecialNodes(left, right)) 2611 return res > 0; 2612 2613 return BURRSort(left, right, SPQ); 2614 } 2615 2616 // Source order, otherwise bottom up. 2617 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2618 if (int res = checkSpecialNodes(left, right)) 2619 return res > 0; 2620 2621 unsigned LOrder = SPQ->getNodeOrdering(left); 2622 unsigned ROrder = SPQ->getNodeOrdering(right); 2623 2624 // Prefer an ordering where the lower the non-zero order number, the higher 2625 // the preference. 2626 if ((LOrder || ROrder) && LOrder != ROrder) 2627 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2628 2629 return BURRSort(left, right, SPQ); 2630 } 2631 2632 // If the time between now and when the instruction will be ready can cover 2633 // the spill code, then avoid adding it to the ready queue. This gives long 2634 // stalls highest priority and allows hoisting across calls. It should also 2635 // speed up processing the available queue. 2636 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2637 static const unsigned ReadyDelay = 3; 2638 2639 if (SPQ->MayReduceRegPressure(SU)) return true; 2640 2641 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2642 2643 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2644 != ScheduleHazardRecognizer::NoHazard) 2645 return false; 2646 2647 return true; 2648 } 2649 2650 // Return true if right should be scheduled with higher priority than left. 2651 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2652 if (int res = checkSpecialNodes(left, right)) 2653 return res > 0; 2654 2655 if (left->isCall || right->isCall) 2656 // No way to compute latency of calls. 2657 return BURRSort(left, right, SPQ); 2658 2659 bool LHigh = SPQ->HighRegPressure(left); 2660 bool RHigh = SPQ->HighRegPressure(right); 2661 // Avoid causing spills. If register pressure is high, schedule for 2662 // register pressure reduction. 2663 if (LHigh && !RHigh) { 2664 LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2665 << right->NodeNum << ")\n"); 2666 return true; 2667 } 2668 else if (!LHigh && RHigh) { 2669 LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2670 << left->NodeNum << ")\n"); 2671 return false; 2672 } 2673 if (!LHigh && !RHigh) { 2674 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2675 if (result != 0) 2676 return result > 0; 2677 } 2678 return BURRSort(left, right, SPQ); 2679 } 2680 2681 // Schedule as many instructions in each cycle as possible. So don't make an 2682 // instruction available unless it is ready in the current cycle. 2683 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2684 if (SU->getHeight() > CurCycle) return false; 2685 2686 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2687 != ScheduleHazardRecognizer::NoHazard) 2688 return false; 2689 2690 return true; 2691 } 2692 2693 static bool canEnableCoalescing(SUnit *SU) { 2694 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2695 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2696 // CopyToReg should be close to its uses to facilitate coalescing and 2697 // avoid spilling. 2698 return true; 2699 2700 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2701 Opc == TargetOpcode::SUBREG_TO_REG || 2702 Opc == TargetOpcode::INSERT_SUBREG) 2703 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2704 // close to their uses to facilitate coalescing. 2705 return true; 2706 2707 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2708 // If SU does not have a register def, schedule it close to its uses 2709 // because it does not lengthen any live ranges. 2710 return true; 2711 2712 return false; 2713 } 2714 2715 // list-ilp is currently an experimental scheduler that allows various 2716 // heuristics to be enabled prior to the normal register reduction logic. 2717 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2718 if (int res = checkSpecialNodes(left, right)) 2719 return res > 0; 2720 2721 if (left->isCall || right->isCall) 2722 // No way to compute latency of calls. 2723 return BURRSort(left, right, SPQ); 2724 2725 unsigned LLiveUses = 0, RLiveUses = 0; 2726 int LPDiff = 0, RPDiff = 0; 2727 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2728 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2729 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2730 } 2731 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2732 LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum 2733 << "): " << LPDiff << " != SU(" << right->NodeNum 2734 << "): " << RPDiff << "\n"); 2735 return LPDiff > RPDiff; 2736 } 2737 2738 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2739 bool LReduce = canEnableCoalescing(left); 2740 bool RReduce = canEnableCoalescing(right); 2741 if (LReduce && !RReduce) return false; 2742 if (RReduce && !LReduce) return true; 2743 } 2744 2745 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2746 LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2747 << " != SU(" << right->NodeNum << "): " << RLiveUses 2748 << "\n"); 2749 return LLiveUses < RLiveUses; 2750 } 2751 2752 if (!DisableSchedStalls) { 2753 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2754 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2755 if (LStall != RStall) 2756 return left->getHeight() > right->getHeight(); 2757 } 2758 2759 if (!DisableSchedCriticalPath) { 2760 int spread = (int)left->getDepth() - (int)right->getDepth(); 2761 if (std::abs(spread) > MaxReorderWindow) { 2762 LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2763 << left->getDepth() << " != SU(" << right->NodeNum 2764 << "): " << right->getDepth() << "\n"); 2765 return left->getDepth() < right->getDepth(); 2766 } 2767 } 2768 2769 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2770 int spread = (int)left->getHeight() - (int)right->getHeight(); 2771 if (std::abs(spread) > MaxReorderWindow) 2772 return left->getHeight() > right->getHeight(); 2773 } 2774 2775 return BURRSort(left, right, SPQ); 2776 } 2777 2778 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2779 SUnits = &sunits; 2780 // Add pseudo dependency edges for two-address nodes. 2781 if (!Disable2AddrHack) 2782 AddPseudoTwoAddrDeps(); 2783 // Reroute edges to nodes with multiple uses. 2784 if (!TracksRegPressure && !SrcOrder) 2785 PrescheduleNodesWithMultipleUses(); 2786 // Calculate node priorities. 2787 CalculateSethiUllmanNumbers(); 2788 2789 // For single block loops, mark nodes that look like canonical IV increments. 2790 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) 2791 for (SUnit &SU : sunits) 2792 initVRegCycle(&SU); 2793 } 2794 2795 //===----------------------------------------------------------------------===// 2796 // Preschedule for Register Pressure 2797 //===----------------------------------------------------------------------===// 2798 2799 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2800 if (SU->isTwoAddress) { 2801 unsigned Opc = SU->getNode()->getMachineOpcode(); 2802 const MCInstrDesc &MCID = TII->get(Opc); 2803 unsigned NumRes = MCID.getNumDefs(); 2804 unsigned NumOps = MCID.getNumOperands() - NumRes; 2805 for (unsigned i = 0; i != NumOps; ++i) { 2806 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2807 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2808 if (DU->getNodeId() != -1 && 2809 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2810 return true; 2811 } 2812 } 2813 } 2814 return false; 2815 } 2816 2817 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2818 /// successor's explicit physregs whose definition can reach DepSU. 2819 /// i.e. DepSU should not be scheduled above SU. 2820 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2821 ScheduleDAGRRList *scheduleDAG, 2822 const TargetInstrInfo *TII, 2823 const TargetRegisterInfo *TRI) { 2824 const MCPhysReg *ImpDefs 2825 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 2826 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 2827 if(!ImpDefs && !RegMask) 2828 return false; 2829 2830 for (const SDep &Succ : SU->Succs) { 2831 SUnit *SuccSU = Succ.getSUnit(); 2832 for (const SDep &SuccPred : SuccSU->Preds) { 2833 if (!SuccPred.isAssignedRegDep()) 2834 continue; 2835 2836 if (RegMask && 2837 MachineOperand::clobbersPhysReg(RegMask, SuccPred.getReg()) && 2838 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) 2839 return true; 2840 2841 if (ImpDefs) 2842 for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) 2843 // Return true if SU clobbers this physical register use and the 2844 // definition of the register reaches from DepSU. IsReachable queries 2845 // a topological forward sort of the DAG (following the successors). 2846 if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && 2847 scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) 2848 return true; 2849 } 2850 } 2851 return false; 2852 } 2853 2854 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2855 /// physical register defs. 2856 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2857 const TargetInstrInfo *TII, 2858 const TargetRegisterInfo *TRI) { 2859 SDNode *N = SuccSU->getNode(); 2860 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2861 const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2862 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2863 for (const SDNode *SUNode = SU->getNode(); SUNode; 2864 SUNode = SUNode->getGluedNode()) { 2865 if (!SUNode->isMachineOpcode()) 2866 continue; 2867 const MCPhysReg *SUImpDefs = 2868 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2869 const uint32_t *SURegMask = getNodeRegMask(SUNode); 2870 if (!SUImpDefs && !SURegMask) 2871 continue; 2872 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2873 MVT VT = N->getSimpleValueType(i); 2874 if (VT == MVT::Glue || VT == MVT::Other) 2875 continue; 2876 if (!N->hasAnyUseOfValue(i)) 2877 continue; 2878 unsigned Reg = ImpDefs[i - NumDefs]; 2879 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 2880 return true; 2881 if (!SUImpDefs) 2882 continue; 2883 for (;*SUImpDefs; ++SUImpDefs) { 2884 unsigned SUReg = *SUImpDefs; 2885 if (TRI->regsOverlap(Reg, SUReg)) 2886 return true; 2887 } 2888 } 2889 } 2890 return false; 2891 } 2892 2893 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2894 /// are not handled well by the general register pressure reduction 2895 /// heuristics. When presented with code like this: 2896 /// 2897 /// N 2898 /// / | 2899 /// / | 2900 /// U store 2901 /// | 2902 /// ... 2903 /// 2904 /// the heuristics tend to push the store up, but since the 2905 /// operand of the store has another use (U), this would increase 2906 /// the length of that other use (the U->N edge). 2907 /// 2908 /// This function transforms code like the above to route U's 2909 /// dependence through the store when possible, like this: 2910 /// 2911 /// N 2912 /// || 2913 /// || 2914 /// store 2915 /// | 2916 /// U 2917 /// | 2918 /// ... 2919 /// 2920 /// This results in the store being scheduled immediately 2921 /// after N, which shortens the U->N live range, reducing 2922 /// register pressure. 2923 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2924 // Visit all the nodes in topological order, working top-down. 2925 for (SUnit &SU : *SUnits) { 2926 // For now, only look at nodes with no data successors, such as stores. 2927 // These are especially important, due to the heuristics in 2928 // getNodePriority for nodes with no data successors. 2929 if (SU.NumSuccs != 0) 2930 continue; 2931 // For now, only look at nodes with exactly one data predecessor. 2932 if (SU.NumPreds != 1) 2933 continue; 2934 // Avoid prescheduling copies to virtual registers, which don't behave 2935 // like other nodes from the perspective of scheduling heuristics. 2936 if (SDNode *N = SU.getNode()) 2937 if (N->getOpcode() == ISD::CopyToReg && 2938 TargetRegisterInfo::isVirtualRegister 2939 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2940 continue; 2941 2942 // Locate the single data predecessor. 2943 SUnit *PredSU = nullptr; 2944 for (const SDep &Pred : SU.Preds) 2945 if (!Pred.isCtrl()) { 2946 PredSU = Pred.getSUnit(); 2947 break; 2948 } 2949 assert(PredSU); 2950 2951 // Don't rewrite edges that carry physregs, because that requires additional 2952 // support infrastructure. 2953 if (PredSU->hasPhysRegDefs) 2954 continue; 2955 // Short-circuit the case where SU is PredSU's only data successor. 2956 if (PredSU->NumSuccs == 1) 2957 continue; 2958 // Avoid prescheduling to copies from virtual registers, which don't behave 2959 // like other nodes from the perspective of scheduling heuristics. 2960 if (SDNode *N = SU.getNode()) 2961 if (N->getOpcode() == ISD::CopyFromReg && 2962 TargetRegisterInfo::isVirtualRegister 2963 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2964 continue; 2965 2966 // Perform checks on the successors of PredSU. 2967 for (const SDep &PredSucc : PredSU->Succs) { 2968 SUnit *PredSuccSU = PredSucc.getSUnit(); 2969 if (PredSuccSU == &SU) continue; 2970 // If PredSU has another successor with no data successors, for 2971 // now don't attempt to choose either over the other. 2972 if (PredSuccSU->NumSuccs == 0) 2973 goto outer_loop_continue; 2974 // Don't break physical register dependencies. 2975 if (SU.hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2976 if (canClobberPhysRegDefs(PredSuccSU, &SU, TII, TRI)) 2977 goto outer_loop_continue; 2978 // Don't introduce graph cycles. 2979 if (scheduleDAG->IsReachable(&SU, PredSuccSU)) 2980 goto outer_loop_continue; 2981 } 2982 2983 // Ok, the transformation is safe and the heuristics suggest it is 2984 // profitable. Update the graph. 2985 LLVM_DEBUG( 2986 dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" 2987 << PredSU->NodeNum 2988 << " to guide scheduling in the presence of multiple uses\n"); 2989 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2990 SDep Edge = PredSU->Succs[i]; 2991 assert(!Edge.isAssignedRegDep()); 2992 SUnit *SuccSU = Edge.getSUnit(); 2993 if (SuccSU != &SU) { 2994 Edge.setSUnit(PredSU); 2995 scheduleDAG->RemovePred(SuccSU, Edge); 2996 scheduleDAG->AddPred(&SU, Edge); 2997 Edge.setSUnit(&SU); 2998 scheduleDAG->AddPred(SuccSU, Edge); 2999 --i; 3000 } 3001 } 3002 outer_loop_continue:; 3003 } 3004 } 3005 3006 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 3007 /// it as a def&use operand. Add a pseudo control edge from it to the other 3008 /// node (if it won't create a cycle) so the two-address one will be scheduled 3009 /// first (lower in the schedule). If both nodes are two-address, favor the 3010 /// one that has a CopyToReg use (more likely to be a loop induction update). 3011 /// If both are two-address, but one is commutable while the other is not 3012 /// commutable, favor the one that's not commutable. 3013 void RegReductionPQBase::AddPseudoTwoAddrDeps() { 3014 for (SUnit &SU : *SUnits) { 3015 if (!SU.isTwoAddress) 3016 continue; 3017 3018 SDNode *Node = SU.getNode(); 3019 if (!Node || !Node->isMachineOpcode() || SU.getNode()->getGluedNode()) 3020 continue; 3021 3022 bool isLiveOut = hasOnlyLiveOutUses(&SU); 3023 unsigned Opc = Node->getMachineOpcode(); 3024 const MCInstrDesc &MCID = TII->get(Opc); 3025 unsigned NumRes = MCID.getNumDefs(); 3026 unsigned NumOps = MCID.getNumOperands() - NumRes; 3027 for (unsigned j = 0; j != NumOps; ++j) { 3028 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 3029 continue; 3030 SDNode *DU = SU.getNode()->getOperand(j).getNode(); 3031 if (DU->getNodeId() == -1) 3032 continue; 3033 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 3034 if (!DUSU) 3035 continue; 3036 for (const SDep &Succ : DUSU->Succs) { 3037 if (Succ.isCtrl()) 3038 continue; 3039 SUnit *SuccSU = Succ.getSUnit(); 3040 if (SuccSU == &SU) 3041 continue; 3042 // Be conservative. Ignore if nodes aren't at roughly the same 3043 // depth and height. 3044 if (SuccSU->getHeight() < SU.getHeight() && 3045 (SU.getHeight() - SuccSU->getHeight()) > 1) 3046 continue; 3047 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 3048 // constrains whatever is using the copy, instead of the copy 3049 // itself. In the case that the copy is coalesced, this 3050 // preserves the intent of the pseudo two-address heurietics. 3051 while (SuccSU->Succs.size() == 1 && 3052 SuccSU->getNode()->isMachineOpcode() && 3053 SuccSU->getNode()->getMachineOpcode() == 3054 TargetOpcode::COPY_TO_REGCLASS) 3055 SuccSU = SuccSU->Succs.front().getSUnit(); 3056 // Don't constrain non-instruction nodes. 3057 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 3058 continue; 3059 // Don't constrain nodes with physical register defs if the 3060 // predecessor can clobber them. 3061 if (SuccSU->hasPhysRegDefs && SU.hasPhysRegClobbers) { 3062 if (canClobberPhysRegDefs(SuccSU, &SU, TII, TRI)) 3063 continue; 3064 } 3065 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 3066 // these may be coalesced away. We want them close to their uses. 3067 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 3068 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 3069 SuccOpc == TargetOpcode::INSERT_SUBREG || 3070 SuccOpc == TargetOpcode::SUBREG_TO_REG) 3071 continue; 3072 if (!canClobberReachingPhysRegUse(SuccSU, &SU, scheduleDAG, TII, TRI) && 3073 (!canClobber(SuccSU, DUSU) || 3074 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 3075 (!SU.isCommutable && SuccSU->isCommutable)) && 3076 !scheduleDAG->IsReachable(SuccSU, &SU)) { 3077 LLVM_DEBUG(dbgs() 3078 << " Adding a pseudo-two-addr edge from SU #" 3079 << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 3080 scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); 3081 } 3082 } 3083 } 3084 } 3085 } 3086 3087 //===----------------------------------------------------------------------===// 3088 // Public Constructor Functions 3089 //===----------------------------------------------------------------------===// 3090 3091 ScheduleDAGSDNodes * 3092 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 3093 CodeGenOpt::Level OptLevel) { 3094 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3095 const TargetInstrInfo *TII = STI.getInstrInfo(); 3096 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3097 3098 BURegReductionPriorityQueue *PQ = 3099 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); 3100 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 3101 PQ->setScheduleDAG(SD); 3102 return SD; 3103 } 3104 3105 ScheduleDAGSDNodes * 3106 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 3107 CodeGenOpt::Level OptLevel) { 3108 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3109 const TargetInstrInfo *TII = STI.getInstrInfo(); 3110 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3111 3112 SrcRegReductionPriorityQueue *PQ = 3113 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); 3114 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 3115 PQ->setScheduleDAG(SD); 3116 return SD; 3117 } 3118 3119 ScheduleDAGSDNodes * 3120 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 3121 CodeGenOpt::Level OptLevel) { 3122 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3123 const TargetInstrInfo *TII = STI.getInstrInfo(); 3124 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3125 const TargetLowering *TLI = IS->TLI; 3126 3127 HybridBURRPriorityQueue *PQ = 3128 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3129 3130 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3131 PQ->setScheduleDAG(SD); 3132 return SD; 3133 } 3134 3135 ScheduleDAGSDNodes * 3136 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 3137 CodeGenOpt::Level OptLevel) { 3138 const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); 3139 const TargetInstrInfo *TII = STI.getInstrInfo(); 3140 const TargetRegisterInfo *TRI = STI.getRegisterInfo(); 3141 const TargetLowering *TLI = IS->TLI; 3142 3143 ILPBURRPriorityQueue *PQ = 3144 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3145 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3146 PQ->setScheduleDAG(SD); 3147 return SD; 3148 } 3149