1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides an interface for customizing the standard MachineScheduler 10 // pass. Note that the entire pass may be replaced as follows: 11 // 12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { 13 // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); 14 // ...} 15 // 16 // The MachineScheduler pass is only responsible for choosing the regions to be 17 // scheduled. Targets can override the DAG builder and scheduler without 18 // replacing the pass as follows: 19 // 20 // ScheduleDAGInstrs *<Target>PassConfig:: 21 // createMachineScheduler(MachineSchedContext *C) { 22 // return new CustomMachineScheduler(C); 23 // } 24 // 25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list 26 // scheduling while updating the instruction stream, register pressure, and live 27 // intervals. Most targets don't need to override the DAG builder and list 28 // scheduler, but subtargets that require custom scheduling heuristics may 29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for 30 // selecting the highest priority node from the list: 31 // 32 // ScheduleDAGInstrs *<Target>PassConfig:: 33 // createMachineScheduler(MachineSchedContext *C) { 34 // return new ScheduleDAGMILive(C, CustomStrategy(C)); 35 // } 36 // 37 // The DAG builder can also be customized in a sense by adding DAG mutations 38 // that will run after DAG building and before list scheduling. DAG mutations 39 // can adjust dependencies based on target-specific knowledge or add weak edges 40 // to aid heuristics: 41 // 42 // ScheduleDAGInstrs *<Target>PassConfig:: 43 // createMachineScheduler(MachineSchedContext *C) { 44 // ScheduleDAGMI *DAG = createGenericSchedLive(C); 45 // DAG->addMutation(new CustomDAGMutation(...)); 46 // return DAG; 47 // } 48 // 49 // A target that supports alternative schedulers can use the 50 // MachineSchedRegistry to allow command line selection. This can be done by 51 // implementing the following boilerplate: 52 // 53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { 54 // return new CustomMachineScheduler(C); 55 // } 56 // static MachineSchedRegistry 57 // SchedCustomRegistry("custom", "Run my target's custom scheduler", 58 // createCustomMachineSched); 59 // 60 // 61 // Finally, subtargets that don't need to implement custom heuristics but would 62 // like to configure the GenericScheduler's policy for a given scheduler region, 63 // including scheduling direction and register pressure tracking policy, can do 64 // this: 65 // 66 // void <SubTarget>Subtarget:: 67 // overrideSchedPolicy(MachineSchedPolicy &Policy, 68 // unsigned NumRegionInstrs) const { 69 // Policy.<Flag> = true; 70 // } 71 // 72 //===----------------------------------------------------------------------===// 73 74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H 75 #define LLVM_CODEGEN_MACHINESCHEDULER_H 76 77 #include "llvm/ADT/ArrayRef.h" 78 #include "llvm/ADT/BitVector.h" 79 #include "llvm/ADT/STLExtras.h" 80 #include "llvm/ADT/SmallVector.h" 81 #include "llvm/ADT/StringRef.h" 82 #include "llvm/ADT/Twine.h" 83 #include "llvm/CodeGen/MachineBasicBlock.h" 84 #include "llvm/CodeGen/MachinePassRegistry.h" 85 #include "llvm/CodeGen/RegisterPressure.h" 86 #include "llvm/CodeGen/ScheduleDAG.h" 87 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 88 #include "llvm/CodeGen/ScheduleDAGMutation.h" 89 #include "llvm/CodeGen/TargetSchedule.h" 90 #include "llvm/Support/CommandLine.h" 91 #include "llvm/Support/ErrorHandling.h" 92 #include <algorithm> 93 #include <cassert> 94 #include <memory> 95 #include <string> 96 #include <vector> 97 98 namespace llvm { 99 100 extern cl::opt<bool> ForceTopDown; 101 extern cl::opt<bool> ForceBottomUp; 102 extern cl::opt<bool> VerifyScheduling; 103 104 class AAResults; 105 class LiveIntervals; 106 class MachineDominatorTree; 107 class MachineFunction; 108 class MachineInstr; 109 class MachineLoopInfo; 110 class RegisterClassInfo; 111 class SchedDFSResult; 112 class ScheduleHazardRecognizer; 113 class TargetInstrInfo; 114 class TargetPassConfig; 115 class TargetRegisterInfo; 116 117 /// MachineSchedContext provides enough context from the MachineScheduler pass 118 /// for the target to instantiate a scheduler. 119 struct MachineSchedContext { 120 MachineFunction *MF = nullptr; 121 const MachineLoopInfo *MLI = nullptr; 122 const MachineDominatorTree *MDT = nullptr; 123 const TargetPassConfig *PassConfig = nullptr; 124 AAResults *AA = nullptr; 125 LiveIntervals *LIS = nullptr; 126 127 RegisterClassInfo *RegClassInfo; 128 129 MachineSchedContext(); 130 virtual ~MachineSchedContext(); 131 }; 132 133 /// MachineSchedRegistry provides a selection of available machine instruction 134 /// schedulers. 135 class MachineSchedRegistry 136 : public MachinePassRegistryNode< 137 ScheduleDAGInstrs *(*)(MachineSchedContext *)> { 138 public: 139 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); 140 141 // RegisterPassParser requires a (misnamed) FunctionPassCtor type. 142 using FunctionPassCtor = ScheduleDAGCtor; 143 144 static MachinePassRegistry<ScheduleDAGCtor> Registry; 145 146 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) 147 : MachinePassRegistryNode(N, D, C) { 148 Registry.Add(this); 149 } 150 151 ~MachineSchedRegistry() { Registry.Remove(this); } 152 153 // Accessors. 154 // 155 MachineSchedRegistry *getNext() const { 156 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); 157 } 158 159 static MachineSchedRegistry *getList() { 160 return (MachineSchedRegistry *)Registry.getList(); 161 } 162 163 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { 164 Registry.setListener(L); 165 } 166 }; 167 168 class ScheduleDAGMI; 169 170 /// Define a generic scheduling policy for targets that don't provide their own 171 /// MachineSchedStrategy. This can be overriden for each scheduling region 172 /// before building the DAG. 173 struct MachineSchedPolicy { 174 // Allow the scheduler to disable register pressure tracking. 175 bool ShouldTrackPressure = false; 176 /// Track LaneMasks to allow reordering of independent subregister writes 177 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() 178 bool ShouldTrackLaneMasks = false; 179 180 // Allow the scheduler to force top-down or bottom-up scheduling. If neither 181 // is true, the scheduler runs in both directions and converges. 182 bool OnlyTopDown = false; 183 bool OnlyBottomUp = false; 184 185 // Disable heuristic that tries to fetch nodes from long dependency chains 186 // first. 187 bool DisableLatencyHeuristic = false; 188 189 // Compute DFSResult for use in scheduling heuristics. 190 bool ComputeDFSResult = false; 191 192 MachineSchedPolicy() = default; 193 }; 194 195 /// MachineSchedStrategy - Interface to the scheduling algorithm used by 196 /// ScheduleDAGMI. 197 /// 198 /// Initialization sequence: 199 /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots 200 class MachineSchedStrategy { 201 virtual void anchor(); 202 203 public: 204 virtual ~MachineSchedStrategy() = default; 205 206 /// Optionally override the per-region scheduling policy. 207 virtual void initPolicy(MachineBasicBlock::iterator Begin, 208 MachineBasicBlock::iterator End, 209 unsigned NumRegionInstrs) {} 210 211 virtual void dumpPolicy() const {} 212 213 /// Check if pressure tracking is needed before building the DAG and 214 /// initializing this strategy. Called after initPolicy. 215 virtual bool shouldTrackPressure() const { return true; } 216 217 /// Returns true if lanemasks should be tracked. LaneMask tracking is 218 /// necessary to reorder independent subregister defs for the same vreg. 219 /// This has to be enabled in combination with shouldTrackPressure(). 220 virtual bool shouldTrackLaneMasks() const { return false; } 221 222 // If this method returns true, handling of the scheduling regions 223 // themselves (in case of a scheduling boundary in MBB) will be done 224 // beginning with the topmost region of MBB. 225 virtual bool doMBBSchedRegionsTopDown() const { return false; } 226 227 /// Initialize the strategy after building the DAG for a new region. 228 virtual void initialize(ScheduleDAGMI *DAG) = 0; 229 230 /// Tell the strategy that MBB is about to be processed. 231 virtual void enterMBB(MachineBasicBlock *MBB) {}; 232 233 /// Tell the strategy that current MBB is done. 234 virtual void leaveMBB() {}; 235 236 /// Notify this strategy that all roots have been released (including those 237 /// that depend on EntrySU or ExitSU). 238 virtual void registerRoots() {} 239 240 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to 241 /// schedule the node at the top of the unscheduled region. Otherwise it will 242 /// be scheduled at the bottom. 243 virtual SUnit *pickNode(bool &IsTopNode) = 0; 244 245 /// Scheduler callback to notify that a new subtree is scheduled. 246 virtual void scheduleTree(unsigned SubtreeID) {} 247 248 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an 249 /// instruction and updated scheduled/remaining flags in the DAG nodes. 250 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; 251 252 /// When all predecessor dependencies have been resolved, free this node for 253 /// top-down scheduling. 254 virtual void releaseTopNode(SUnit *SU) = 0; 255 256 /// When all successor dependencies have been resolved, free this node for 257 /// bottom-up scheduling. 258 virtual void releaseBottomNode(SUnit *SU) = 0; 259 }; 260 261 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply 262 /// schedules machine instructions according to the given MachineSchedStrategy 263 /// without much extra book-keeping. This is the common functionality between 264 /// PreRA and PostRA MachineScheduler. 265 class ScheduleDAGMI : public ScheduleDAGInstrs { 266 protected: 267 AAResults *AA; 268 LiveIntervals *LIS; 269 std::unique_ptr<MachineSchedStrategy> SchedImpl; 270 271 /// Ordered list of DAG postprocessing steps. 272 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 273 274 /// The top of the unscheduled zone. 275 MachineBasicBlock::iterator CurrentTop; 276 277 /// The bottom of the unscheduled zone. 278 MachineBasicBlock::iterator CurrentBottom; 279 280 /// Record the next node in a scheduled cluster. 281 const SUnit *NextClusterPred = nullptr; 282 const SUnit *NextClusterSucc = nullptr; 283 284 #ifndef NDEBUG 285 /// The number of instructions scheduled so far. Used to cut off the 286 /// scheduler at the point determined by misched-cutoff. 287 unsigned NumInstrsScheduled = 0; 288 #endif 289 290 public: 291 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 292 bool RemoveKillFlags) 293 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), 294 LIS(C->LIS), SchedImpl(std::move(S)) {} 295 296 // Provide a vtable anchor 297 ~ScheduleDAGMI() override; 298 299 /// If this method returns true, handling of the scheduling regions 300 /// themselves (in case of a scheduling boundary in MBB) will be done 301 /// beginning with the topmost region of MBB. 302 bool doMBBSchedRegionsTopDown() const override { 303 return SchedImpl->doMBBSchedRegionsTopDown(); 304 } 305 306 // Returns LiveIntervals instance for use in DAG mutators and such. 307 LiveIntervals *getLIS() const { return LIS; } 308 309 /// Return true if this DAG supports VReg liveness and RegPressure. 310 virtual bool hasVRegLiveness() const { return false; } 311 312 /// Add a postprocessing step to the DAG builder. 313 /// Mutations are applied in the order that they are added after normal DAG 314 /// building and before MachineSchedStrategy initialization. 315 /// 316 /// ScheduleDAGMI takes ownership of the Mutation object. 317 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 318 if (Mutation) 319 Mutations.push_back(std::move(Mutation)); 320 } 321 322 MachineBasicBlock::iterator top() const { return CurrentTop; } 323 MachineBasicBlock::iterator bottom() const { return CurrentBottom; } 324 325 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 326 /// region. This covers all instructions in a block, while schedule() may only 327 /// cover a subset. 328 void enterRegion(MachineBasicBlock *bb, 329 MachineBasicBlock::iterator begin, 330 MachineBasicBlock::iterator end, 331 unsigned regioninstrs) override; 332 333 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 334 /// reorderable instructions. 335 void schedule() override; 336 337 void startBlock(MachineBasicBlock *bb) override; 338 void finishBlock() override; 339 340 /// Change the position of an instruction within the basic block and update 341 /// live ranges and region boundary iterators. 342 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); 343 344 const SUnit *getNextClusterPred() const { return NextClusterPred; } 345 346 const SUnit *getNextClusterSucc() const { return NextClusterSucc; } 347 348 void viewGraph(const Twine &Name, const Twine &Title) override; 349 void viewGraph() override; 350 351 protected: 352 // Top-Level entry points for the schedule() driver... 353 354 /// Apply each ScheduleDAGMutation step in order. This allows different 355 /// instances of ScheduleDAGMI to perform custom DAG postprocessing. 356 void postprocessDAG(); 357 358 /// Release ExitSU predecessors and setup scheduler queues. 359 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 360 361 /// Update scheduler DAG and queues after scheduling an instruction. 362 void updateQueues(SUnit *SU, bool IsTopNode); 363 364 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. 365 void placeDebugValues(); 366 367 /// dump the scheduled Sequence. 368 void dumpSchedule() const; 369 370 // Lesser helpers... 371 bool checkSchedLimit(); 372 373 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 374 SmallVectorImpl<SUnit*> &BotRoots); 375 376 void releaseSucc(SUnit *SU, SDep *SuccEdge); 377 void releaseSuccessors(SUnit *SU); 378 void releasePred(SUnit *SU, SDep *PredEdge); 379 void releasePredecessors(SUnit *SU); 380 }; 381 382 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules 383 /// machine instructions while updating LiveIntervals and tracking regpressure. 384 class ScheduleDAGMILive : public ScheduleDAGMI { 385 protected: 386 RegisterClassInfo *RegClassInfo; 387 388 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees 389 /// will be empty. 390 SchedDFSResult *DFSResult = nullptr; 391 BitVector ScheduledTrees; 392 393 MachineBasicBlock::iterator LiveRegionEnd; 394 395 /// Maps vregs to the SUnits of their uses in the current scheduling region. 396 VReg2SUnitMultiMap VRegUses; 397 398 // Map each SU to its summary of pressure changes. This array is updated for 399 // liveness during bottom-up scheduling. Top-down scheduling may proceed but 400 // has no affect on the pressure diffs. 401 PressureDiffs SUPressureDiffs; 402 403 /// Register pressure in this region computed by initRegPressure. 404 bool ShouldTrackPressure = false; 405 bool ShouldTrackLaneMasks = false; 406 IntervalPressure RegPressure; 407 RegPressureTracker RPTracker; 408 409 /// List of pressure sets that exceed the target's pressure limit before 410 /// scheduling, listed in increasing set ID order. Each pressure set is paired 411 /// with its max pressure in the currently scheduled regions. 412 std::vector<PressureChange> RegionCriticalPSets; 413 414 /// The top of the unscheduled zone. 415 IntervalPressure TopPressure; 416 RegPressureTracker TopRPTracker; 417 418 /// The bottom of the unscheduled zone. 419 IntervalPressure BotPressure; 420 RegPressureTracker BotRPTracker; 421 422 /// True if disconnected subregister components are already renamed. 423 /// The renaming is only done on demand if lane masks are tracked. 424 bool DisconnectedComponentsRenamed = false; 425 426 public: 427 ScheduleDAGMILive(MachineSchedContext *C, 428 std::unique_ptr<MachineSchedStrategy> S) 429 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), 430 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), 431 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} 432 433 ~ScheduleDAGMILive() override; 434 435 /// Return true if this DAG supports VReg liveness and RegPressure. 436 bool hasVRegLiveness() const override { return true; } 437 438 /// Return true if register pressure tracking is enabled. 439 bool isTrackingPressure() const { return ShouldTrackPressure; } 440 441 /// Get current register pressure for the top scheduled instructions. 442 const IntervalPressure &getTopPressure() const { return TopPressure; } 443 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } 444 445 /// Get current register pressure for the bottom scheduled instructions. 446 const IntervalPressure &getBotPressure() const { return BotPressure; } 447 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } 448 449 /// Get register pressure for the entire scheduling region before scheduling. 450 const IntervalPressure &getRegPressure() const { return RegPressure; } 451 452 const std::vector<PressureChange> &getRegionCriticalPSets() const { 453 return RegionCriticalPSets; 454 } 455 456 PressureDiff &getPressureDiff(const SUnit *SU) { 457 return SUPressureDiffs[SU->NodeNum]; 458 } 459 const PressureDiff &getPressureDiff(const SUnit *SU) const { 460 return SUPressureDiffs[SU->NodeNum]; 461 } 462 463 /// Compute a DFSResult after DAG building is complete, and before any 464 /// queue comparisons. 465 void computeDFSResult(); 466 467 /// Return a non-null DFS result if the scheduling strategy initialized it. 468 const SchedDFSResult *getDFSResult() const { return DFSResult; } 469 470 BitVector &getScheduledTrees() { return ScheduledTrees; } 471 472 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 473 /// region. This covers all instructions in a block, while schedule() may only 474 /// cover a subset. 475 void enterRegion(MachineBasicBlock *bb, 476 MachineBasicBlock::iterator begin, 477 MachineBasicBlock::iterator end, 478 unsigned regioninstrs) override; 479 480 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 481 /// reorderable instructions. 482 void schedule() override; 483 484 /// Compute the cyclic critical path through the DAG. 485 unsigned computeCyclicCriticalPath(); 486 487 void dump() const override; 488 489 protected: 490 // Top-Level entry points for the schedule() driver... 491 492 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking 493 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG 494 /// region, TopTracker and BottomTracker will be initialized to the top and 495 /// bottom of the DAG region without covereing any unscheduled instruction. 496 void buildDAGWithRegPressure(); 497 498 /// Release ExitSU predecessors and setup scheduler queues. Re-position 499 /// the Top RP tracker in case the region beginning has changed. 500 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 501 502 /// Move an instruction and update register pressure. 503 void scheduleMI(SUnit *SU, bool IsTopNode); 504 505 // Lesser helpers... 506 507 void initRegPressure(); 508 509 void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); 510 511 void updateScheduledPressure(const SUnit *SU, 512 const std::vector<unsigned> &NewMaxPressure); 513 514 void collectVRegUses(SUnit &SU); 515 }; 516 517 //===----------------------------------------------------------------------===// 518 /// 519 /// Helpers for implementing custom MachineSchedStrategy classes. These take 520 /// care of the book-keeping associated with list scheduling heuristics. 521 /// 522 //===----------------------------------------------------------------------===// 523 524 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience 525 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified 526 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. 527 /// 528 /// This is a convenience class that may be used by implementations of 529 /// MachineSchedStrategy. 530 class ReadyQueue { 531 unsigned ID; 532 std::string Name; 533 std::vector<SUnit*> Queue; 534 535 public: 536 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} 537 538 unsigned getID() const { return ID; } 539 540 StringRef getName() const { return Name; } 541 542 // SU is in this queue if it's NodeQueueID is a superset of this ID. 543 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } 544 545 bool empty() const { return Queue.empty(); } 546 547 void clear() { Queue.clear(); } 548 549 unsigned size() const { return Queue.size(); } 550 551 using iterator = std::vector<SUnit*>::iterator; 552 553 iterator begin() { return Queue.begin(); } 554 555 iterator end() { return Queue.end(); } 556 557 ArrayRef<SUnit*> elements() { return Queue; } 558 559 iterator find(SUnit *SU) { return llvm::find(Queue, SU); } 560 561 void push(SUnit *SU) { 562 Queue.push_back(SU); 563 SU->NodeQueueId |= ID; 564 } 565 566 iterator remove(iterator I) { 567 (*I)->NodeQueueId &= ~ID; 568 *I = Queue.back(); 569 unsigned idx = I - Queue.begin(); 570 Queue.pop_back(); 571 return Queue.begin() + idx; 572 } 573 574 void dump() const; 575 }; 576 577 /// Summarize the unscheduled region. 578 struct SchedRemainder { 579 // Critical path through the DAG in expected latency. 580 unsigned CriticalPath; 581 unsigned CyclicCritPath; 582 583 // Scaled count of micro-ops left to schedule. 584 unsigned RemIssueCount; 585 586 bool IsAcyclicLatencyLimited; 587 588 // Unscheduled resources 589 SmallVector<unsigned, 16> RemainingCounts; 590 591 SchedRemainder() { reset(); } 592 593 void reset() { 594 CriticalPath = 0; 595 CyclicCritPath = 0; 596 RemIssueCount = 0; 597 IsAcyclicLatencyLimited = false; 598 RemainingCounts.clear(); 599 } 600 601 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 602 }; 603 604 /// Each Scheduling boundary is associated with ready queues. It tracks the 605 /// current cycle in the direction of movement, and maintains the state 606 /// of "hazards" and other interlocks at the current cycle. 607 class SchedBoundary { 608 public: 609 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 610 enum { 611 TopQID = 1, 612 BotQID = 2, 613 LogMaxQID = 2 614 }; 615 616 ScheduleDAGMI *DAG = nullptr; 617 const TargetSchedModel *SchedModel = nullptr; 618 SchedRemainder *Rem = nullptr; 619 620 ReadyQueue Available; 621 ReadyQueue Pending; 622 623 ScheduleHazardRecognizer *HazardRec = nullptr; 624 625 private: 626 /// True if the pending Q should be checked/updated before scheduling another 627 /// instruction. 628 bool CheckPending; 629 630 /// Number of cycles it takes to issue the instructions scheduled in this 631 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. 632 /// See getStalls(). 633 unsigned CurrCycle; 634 635 /// Micro-ops issued in the current cycle 636 unsigned CurrMOps; 637 638 /// MinReadyCycle - Cycle of the soonest available instruction. 639 unsigned MinReadyCycle; 640 641 // The expected latency of the critical path in this scheduled zone. 642 unsigned ExpectedLatency; 643 644 // The latency of dependence chains leading into this zone. 645 // For each node scheduled bottom-up: DLat = max DLat, N.Depth. 646 // For each cycle scheduled: DLat -= 1. 647 unsigned DependentLatency; 648 649 /// Count the scheduled (issued) micro-ops that can be retired by 650 /// time=CurrCycle assuming the first scheduled instr is retired at time=0. 651 unsigned RetiredMOps; 652 653 // Count scheduled resources that have been executed. Resources are 654 // considered executed if they become ready in the time that it takes to 655 // saturate any resource including the one in question. Counts are scaled 656 // for direct comparison with other resources. Counts can be compared with 657 // MOps * getMicroOpFactor and Latency * getLatencyFactor. 658 SmallVector<unsigned, 16> ExecutedResCounts; 659 660 /// Cache the max count for a single resource. 661 unsigned MaxExecutedResCount; 662 663 // Cache the critical resources ID in this scheduled zone. 664 unsigned ZoneCritResIdx; 665 666 // Is the scheduled region resource limited vs. latency limited. 667 bool IsResourceLimited; 668 669 // Record the highest cycle at which each resource has been reserved by a 670 // scheduled instruction. 671 SmallVector<unsigned, 16> ReservedCycles; 672 673 // For each PIdx, stores first index into ReservedCycles that corresponds to 674 // it. 675 SmallVector<unsigned, 16> ReservedCyclesIndex; 676 677 #ifndef NDEBUG 678 // Remember the greatest possible stall as an upper bound on the number of 679 // times we should retry the pending queue because of a hazard. 680 unsigned MaxObservedStall; 681 #endif 682 683 public: 684 /// Pending queues extend the ready queues with the same ID and the 685 /// PendingFlag set. 686 SchedBoundary(unsigned ID, const Twine &Name): 687 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") { 688 reset(); 689 } 690 691 ~SchedBoundary(); 692 693 void reset(); 694 695 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 696 SchedRemainder *rem); 697 698 bool isTop() const { 699 return Available.getID() == TopQID; 700 } 701 702 /// Number of cycles to issue the instructions scheduled in this zone. 703 unsigned getCurrCycle() const { return CurrCycle; } 704 705 /// Micro-ops issued in the current cycle 706 unsigned getCurrMOps() const { return CurrMOps; } 707 708 // The latency of dependence chains leading into this zone. 709 unsigned getDependentLatency() const { return DependentLatency; } 710 711 /// Get the number of latency cycles "covered" by the scheduled 712 /// instructions. This is the larger of the critical path within the zone 713 /// and the number of cycles required to issue the instructions. 714 unsigned getScheduledLatency() const { 715 return std::max(ExpectedLatency, CurrCycle); 716 } 717 718 unsigned getUnscheduledLatency(SUnit *SU) const { 719 return isTop() ? SU->getHeight() : SU->getDepth(); 720 } 721 722 unsigned getResourceCount(unsigned ResIdx) const { 723 return ExecutedResCounts[ResIdx]; 724 } 725 726 /// Get the scaled count of scheduled micro-ops and resources, including 727 /// executed resources. 728 unsigned getCriticalCount() const { 729 if (!ZoneCritResIdx) 730 return RetiredMOps * SchedModel->getMicroOpFactor(); 731 return getResourceCount(ZoneCritResIdx); 732 } 733 734 /// Get a scaled count for the minimum execution time of the scheduled 735 /// micro-ops that are ready to execute by getExecutedCount. Notice the 736 /// feedback loop. 737 unsigned getExecutedCount() const { 738 return std::max(CurrCycle * SchedModel->getLatencyFactor(), 739 MaxExecutedResCount); 740 } 741 742 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } 743 744 // Is the scheduled region resource limited vs. latency limited. 745 bool isResourceLimited() const { return IsResourceLimited; } 746 747 /// Get the difference between the given SUnit's ready time and the current 748 /// cycle. 749 unsigned getLatencyStallCycles(SUnit *SU); 750 751 unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, 752 unsigned Cycles); 753 754 std::pair<unsigned, unsigned> getNextResourceCycle(unsigned PIdx, 755 unsigned Cycles); 756 757 bool checkHazard(SUnit *SU); 758 759 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); 760 761 unsigned getOtherResourceCount(unsigned &OtherCritIdx); 762 763 /// Release SU to make it ready. If it's not in hazard, remove it from 764 /// pending queue (if already in) and push into available queue. 765 /// Otherwise, push the SU into pending queue. 766 /// 767 /// @param SU The unit to be released. 768 /// @param ReadyCycle Until which cycle the unit is ready. 769 /// @param InPQueue Whether SU is already in pending queue. 770 /// @param Idx Position offset in pending queue (if in it). 771 void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, 772 unsigned Idx = 0); 773 774 void bumpCycle(unsigned NextCycle); 775 776 void incExecutedResources(unsigned PIdx, unsigned Count); 777 778 unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); 779 780 void bumpNode(SUnit *SU); 781 782 void releasePending(); 783 784 void removeReady(SUnit *SU); 785 786 /// Call this before applying any other heuristics to the Available queue. 787 /// Updates the Available/Pending Q's if necessary and returns the single 788 /// available instruction, or NULL if there are multiple candidates. 789 SUnit *pickOnlyChoice(); 790 791 void dumpScheduledState() const; 792 }; 793 794 /// Base class for GenericScheduler. This class maintains information about 795 /// scheduling candidates based on TargetSchedModel making it easy to implement 796 /// heuristics for either preRA or postRA scheduling. 797 class GenericSchedulerBase : public MachineSchedStrategy { 798 public: 799 /// Represent the type of SchedCandidate found within a single queue. 800 /// pickNodeBidirectional depends on these listed by decreasing priority. 801 enum CandReason : uint8_t { 802 NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, 803 RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 804 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 805 806 #ifndef NDEBUG 807 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); 808 #endif 809 810 /// Policy for scheduling the next instruction in the candidate's zone. 811 struct CandPolicy { 812 bool ReduceLatency = false; 813 unsigned ReduceResIdx = 0; 814 unsigned DemandResIdx = 0; 815 816 CandPolicy() = default; 817 818 bool operator==(const CandPolicy &RHS) const { 819 return ReduceLatency == RHS.ReduceLatency && 820 ReduceResIdx == RHS.ReduceResIdx && 821 DemandResIdx == RHS.DemandResIdx; 822 } 823 bool operator!=(const CandPolicy &RHS) const { 824 return !(*this == RHS); 825 } 826 }; 827 828 /// Status of an instruction's critical resource consumption. 829 struct SchedResourceDelta { 830 // Count critical resources in the scheduled region required by SU. 831 unsigned CritResources = 0; 832 833 // Count critical resources from another region consumed by SU. 834 unsigned DemandedResources = 0; 835 836 SchedResourceDelta() = default; 837 838 bool operator==(const SchedResourceDelta &RHS) const { 839 return CritResources == RHS.CritResources 840 && DemandedResources == RHS.DemandedResources; 841 } 842 bool operator!=(const SchedResourceDelta &RHS) const { 843 return !operator==(RHS); 844 } 845 }; 846 847 /// Store the state used by GenericScheduler heuristics, required for the 848 /// lifetime of one invocation of pickNode(). 849 struct SchedCandidate { 850 CandPolicy Policy; 851 852 // The best SUnit candidate. 853 SUnit *SU; 854 855 // The reason for this candidate. 856 CandReason Reason; 857 858 // Whether this candidate should be scheduled at top/bottom. 859 bool AtTop; 860 861 // Register pressure values for the best candidate. 862 RegPressureDelta RPDelta; 863 864 // Critical resource consumption of the best candidate. 865 SchedResourceDelta ResDelta; 866 867 SchedCandidate() { reset(CandPolicy()); } 868 SchedCandidate(const CandPolicy &Policy) { reset(Policy); } 869 870 void reset(const CandPolicy &NewPolicy) { 871 Policy = NewPolicy; 872 SU = nullptr; 873 Reason = NoCand; 874 AtTop = false; 875 RPDelta = RegPressureDelta(); 876 ResDelta = SchedResourceDelta(); 877 } 878 879 bool isValid() const { return SU; } 880 881 // Copy the status of another candidate without changing policy. 882 void setBest(SchedCandidate &Best) { 883 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 884 SU = Best.SU; 885 Reason = Best.Reason; 886 AtTop = Best.AtTop; 887 RPDelta = Best.RPDelta; 888 ResDelta = Best.ResDelta; 889 } 890 891 void initResourceDelta(const ScheduleDAGMI *DAG, 892 const TargetSchedModel *SchedModel); 893 }; 894 895 protected: 896 const MachineSchedContext *Context; 897 const TargetSchedModel *SchedModel = nullptr; 898 const TargetRegisterInfo *TRI = nullptr; 899 900 SchedRemainder Rem; 901 902 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} 903 904 void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, 905 SchedBoundary *OtherZone); 906 907 #ifndef NDEBUG 908 void traceCandidate(const SchedCandidate &Cand); 909 #endif 910 911 private: 912 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, 913 bool ComputeRemLatency, unsigned &RemLatency) const; 914 }; 915 916 // Utility functions used by heuristics in tryCandidate(). 917 bool tryLess(int TryVal, int CandVal, 918 GenericSchedulerBase::SchedCandidate &TryCand, 919 GenericSchedulerBase::SchedCandidate &Cand, 920 GenericSchedulerBase::CandReason Reason); 921 bool tryGreater(int TryVal, int CandVal, 922 GenericSchedulerBase::SchedCandidate &TryCand, 923 GenericSchedulerBase::SchedCandidate &Cand, 924 GenericSchedulerBase::CandReason Reason); 925 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 926 GenericSchedulerBase::SchedCandidate &Cand, 927 SchedBoundary &Zone); 928 bool tryPressure(const PressureChange &TryP, 929 const PressureChange &CandP, 930 GenericSchedulerBase::SchedCandidate &TryCand, 931 GenericSchedulerBase::SchedCandidate &Cand, 932 GenericSchedulerBase::CandReason Reason, 933 const TargetRegisterInfo *TRI, 934 const MachineFunction &MF); 935 unsigned getWeakLeft(const SUnit *SU, bool isTop); 936 int biasPhysReg(const SUnit *SU, bool isTop); 937 938 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 939 /// the schedule. 940 class GenericScheduler : public GenericSchedulerBase { 941 public: 942 GenericScheduler(const MachineSchedContext *C): 943 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), 944 Bot(SchedBoundary::BotQID, "BotQ") {} 945 946 void initPolicy(MachineBasicBlock::iterator Begin, 947 MachineBasicBlock::iterator End, 948 unsigned NumRegionInstrs) override; 949 950 void dumpPolicy() const override; 951 952 bool shouldTrackPressure() const override { 953 return RegionPolicy.ShouldTrackPressure; 954 } 955 956 bool shouldTrackLaneMasks() const override { 957 return RegionPolicy.ShouldTrackLaneMasks; 958 } 959 960 void initialize(ScheduleDAGMI *dag) override; 961 962 SUnit *pickNode(bool &IsTopNode) override; 963 964 void schedNode(SUnit *SU, bool IsTopNode) override; 965 966 void releaseTopNode(SUnit *SU) override { 967 if (SU->isScheduled) 968 return; 969 970 Top.releaseNode(SU, SU->TopReadyCycle, false); 971 TopCand.SU = nullptr; 972 } 973 974 void releaseBottomNode(SUnit *SU) override { 975 if (SU->isScheduled) 976 return; 977 978 Bot.releaseNode(SU, SU->BotReadyCycle, false); 979 BotCand.SU = nullptr; 980 } 981 982 void registerRoots() override; 983 984 protected: 985 ScheduleDAGMILive *DAG = nullptr; 986 987 MachineSchedPolicy RegionPolicy; 988 989 // State of the top and bottom scheduled instruction boundaries. 990 SchedBoundary Top; 991 SchedBoundary Bot; 992 993 /// Candidate last picked from Top boundary. 994 SchedCandidate TopCand; 995 /// Candidate last picked from Bot boundary. 996 SchedCandidate BotCand; 997 998 void checkAcyclicLatency(); 999 1000 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 1001 const RegPressureTracker &RPTracker, 1002 RegPressureTracker &TempTracker); 1003 1004 virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 1005 SchedBoundary *Zone) const; 1006 1007 SUnit *pickNodeBidirectional(bool &IsTopNode); 1008 1009 void pickNodeFromQueue(SchedBoundary &Zone, 1010 const CandPolicy &ZonePolicy, 1011 const RegPressureTracker &RPTracker, 1012 SchedCandidate &Candidate); 1013 1014 void reschedulePhysReg(SUnit *SU, bool isTop); 1015 }; 1016 1017 /// PostGenericScheduler - Interface to the scheduling algorithm used by 1018 /// ScheduleDAGMI. 1019 /// 1020 /// Callbacks from ScheduleDAGMI: 1021 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... 1022 class PostGenericScheduler : public GenericSchedulerBase { 1023 protected: 1024 ScheduleDAGMI *DAG = nullptr; 1025 SchedBoundary Top; 1026 SmallVector<SUnit*, 8> BotRoots; 1027 1028 public: 1029 PostGenericScheduler(const MachineSchedContext *C): 1030 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} 1031 1032 ~PostGenericScheduler() override = default; 1033 1034 void initPolicy(MachineBasicBlock::iterator Begin, 1035 MachineBasicBlock::iterator End, 1036 unsigned NumRegionInstrs) override { 1037 /* no configurable policy */ 1038 } 1039 1040 /// PostRA scheduling does not track pressure. 1041 bool shouldTrackPressure() const override { return false; } 1042 1043 void initialize(ScheduleDAGMI *Dag) override; 1044 1045 void registerRoots() override; 1046 1047 SUnit *pickNode(bool &IsTopNode) override; 1048 1049 void scheduleTree(unsigned SubtreeID) override { 1050 llvm_unreachable("PostRA scheduler does not support subtree analysis."); 1051 } 1052 1053 void schedNode(SUnit *SU, bool IsTopNode) override; 1054 1055 void releaseTopNode(SUnit *SU) override { 1056 if (SU->isScheduled) 1057 return; 1058 Top.releaseNode(SU, SU->TopReadyCycle, false); 1059 } 1060 1061 // Only called for roots. 1062 void releaseBottomNode(SUnit *SU) override { 1063 BotRoots.push_back(SU); 1064 } 1065 1066 protected: 1067 virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); 1068 1069 void pickNodeFromQueue(SchedCandidate &Cand); 1070 }; 1071 1072 /// Create the standard converging machine scheduler. This will be used as the 1073 /// default scheduler if the target does not set a default. 1074 /// Adds default DAG mutations. 1075 ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); 1076 1077 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 1078 ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); 1079 1080 std::unique_ptr<ScheduleDAGMutation> 1081 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1082 const TargetRegisterInfo *TRI); 1083 1084 std::unique_ptr<ScheduleDAGMutation> 1085 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1086 const TargetRegisterInfo *TRI); 1087 1088 std::unique_ptr<ScheduleDAGMutation> 1089 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1090 const TargetRegisterInfo *TRI); 1091 1092 } // end namespace llvm 1093 1094 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H 1095