1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides an interface for customizing the standard MachineScheduler 10 // pass. Note that the entire pass may be replaced as follows: 11 // 12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { 13 // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); 14 // ...} 15 // 16 // The MachineScheduler pass is only responsible for choosing the regions to be 17 // scheduled. Targets can override the DAG builder and scheduler without 18 // replacing the pass as follows: 19 // 20 // ScheduleDAGInstrs *<Target>PassConfig:: 21 // createMachineScheduler(MachineSchedContext *C) { 22 // return new CustomMachineScheduler(C); 23 // } 24 // 25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list 26 // scheduling while updating the instruction stream, register pressure, and live 27 // intervals. Most targets don't need to override the DAG builder and list 28 // scheduler, but subtargets that require custom scheduling heuristics may 29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for 30 // selecting the highest priority node from the list: 31 // 32 // ScheduleDAGInstrs *<Target>PassConfig:: 33 // createMachineScheduler(MachineSchedContext *C) { 34 // return new ScheduleDAGMILive(C, CustomStrategy(C)); 35 // } 36 // 37 // The DAG builder can also be customized in a sense by adding DAG mutations 38 // that will run after DAG building and before list scheduling. DAG mutations 39 // can adjust dependencies based on target-specific knowledge or add weak edges 40 // to aid heuristics: 41 // 42 // ScheduleDAGInstrs *<Target>PassConfig:: 43 // createMachineScheduler(MachineSchedContext *C) { 44 // ScheduleDAGMI *DAG = createGenericSchedLive(C); 45 // DAG->addMutation(new CustomDAGMutation(...)); 46 // return DAG; 47 // } 48 // 49 // A target that supports alternative schedulers can use the 50 // MachineSchedRegistry to allow command line selection. This can be done by 51 // implementing the following boilerplate: 52 // 53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { 54 // return new CustomMachineScheduler(C); 55 // } 56 // static MachineSchedRegistry 57 // SchedCustomRegistry("custom", "Run my target's custom scheduler", 58 // createCustomMachineSched); 59 // 60 // 61 // Finally, subtargets that don't need to implement custom heuristics but would 62 // like to configure the GenericScheduler's policy for a given scheduler region, 63 // including scheduling direction and register pressure tracking policy, can do 64 // this: 65 // 66 // void <SubTarget>Subtarget:: 67 // overrideSchedPolicy(MachineSchedPolicy &Policy, 68 // unsigned NumRegionInstrs) const { 69 // Policy.<Flag> = true; 70 // } 71 // 72 //===----------------------------------------------------------------------===// 73 74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H 75 #define LLVM_CODEGEN_MACHINESCHEDULER_H 76 77 #include "llvm/ADT/APInt.h" 78 #include "llvm/ADT/ArrayRef.h" 79 #include "llvm/ADT/BitVector.h" 80 #include "llvm/ADT/STLExtras.h" 81 #include "llvm/ADT/SmallVector.h" 82 #include "llvm/ADT/StringRef.h" 83 #include "llvm/ADT/Twine.h" 84 #include "llvm/CodeGen/MachineBasicBlock.h" 85 #include "llvm/CodeGen/MachinePassRegistry.h" 86 #include "llvm/CodeGen/RegisterPressure.h" 87 #include "llvm/CodeGen/ScheduleDAG.h" 88 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 89 #include "llvm/CodeGen/ScheduleDAGMutation.h" 90 #include "llvm/CodeGen/TargetSchedule.h" 91 #include "llvm/Support/CommandLine.h" 92 #include "llvm/Support/ErrorHandling.h" 93 #include <algorithm> 94 #include <cassert> 95 #include <memory> 96 #include <string> 97 #include <vector> 98 99 namespace llvm { 100 101 extern cl::opt<bool> ForceTopDown; 102 extern cl::opt<bool> ForceBottomUp; 103 extern cl::opt<bool> VerifyScheduling; 104 #ifndef NDEBUG 105 extern cl::opt<bool> ViewMISchedDAGs; 106 extern cl::opt<bool> PrintDAGs; 107 #else 108 extern const bool ViewMISchedDAGs; 109 extern const bool PrintDAGs; 110 #endif 111 112 class AAResults; 113 class LiveIntervals; 114 class MachineDominatorTree; 115 class MachineFunction; 116 class MachineInstr; 117 class MachineLoopInfo; 118 class RegisterClassInfo; 119 class SchedDFSResult; 120 class ScheduleHazardRecognizer; 121 class TargetInstrInfo; 122 class TargetPassConfig; 123 class TargetRegisterInfo; 124 125 /// MachineSchedContext provides enough context from the MachineScheduler pass 126 /// for the target to instantiate a scheduler. 127 struct MachineSchedContext { 128 MachineFunction *MF = nullptr; 129 const MachineLoopInfo *MLI = nullptr; 130 const MachineDominatorTree *MDT = nullptr; 131 const TargetPassConfig *PassConfig = nullptr; 132 AAResults *AA = nullptr; 133 LiveIntervals *LIS = nullptr; 134 135 RegisterClassInfo *RegClassInfo; 136 137 MachineSchedContext(); 138 virtual ~MachineSchedContext(); 139 }; 140 141 /// MachineSchedRegistry provides a selection of available machine instruction 142 /// schedulers. 143 class MachineSchedRegistry 144 : public MachinePassRegistryNode< 145 ScheduleDAGInstrs *(*)(MachineSchedContext *)> { 146 public: 147 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); 148 149 // RegisterPassParser requires a (misnamed) FunctionPassCtor type. 150 using FunctionPassCtor = ScheduleDAGCtor; 151 152 static MachinePassRegistry<ScheduleDAGCtor> Registry; 153 MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)154 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) 155 : MachinePassRegistryNode(N, D, C) { 156 Registry.Add(this); 157 } 158 ~MachineSchedRegistry()159 ~MachineSchedRegistry() { Registry.Remove(this); } 160 161 // Accessors. 162 // getNext()163 MachineSchedRegistry *getNext() const { 164 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); 165 } 166 getList()167 static MachineSchedRegistry *getList() { 168 return (MachineSchedRegistry *)Registry.getList(); 169 } 170 setListener(MachinePassRegistryListener<FunctionPassCtor> * L)171 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { 172 Registry.setListener(L); 173 } 174 }; 175 176 class ScheduleDAGMI; 177 178 /// Define a generic scheduling policy for targets that don't provide their own 179 /// MachineSchedStrategy. This can be overriden for each scheduling region 180 /// before building the DAG. 181 struct MachineSchedPolicy { 182 // Allow the scheduler to disable register pressure tracking. 183 bool ShouldTrackPressure = false; 184 /// Track LaneMasks to allow reordering of independent subregister writes 185 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() 186 bool ShouldTrackLaneMasks = false; 187 188 // Allow the scheduler to force top-down or bottom-up scheduling. If neither 189 // is true, the scheduler runs in both directions and converges. 190 bool OnlyTopDown = false; 191 bool OnlyBottomUp = false; 192 193 // Disable heuristic that tries to fetch nodes from long dependency chains 194 // first. 195 bool DisableLatencyHeuristic = false; 196 197 // Compute DFSResult for use in scheduling heuristics. 198 bool ComputeDFSResult = false; 199 200 MachineSchedPolicy() = default; 201 }; 202 203 /// MachineSchedStrategy - Interface to the scheduling algorithm used by 204 /// ScheduleDAGMI. 205 /// 206 /// Initialization sequence: 207 /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots 208 class MachineSchedStrategy { 209 virtual void anchor(); 210 211 public: 212 virtual ~MachineSchedStrategy() = default; 213 214 /// Optionally override the per-region scheduling policy. initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)215 virtual void initPolicy(MachineBasicBlock::iterator Begin, 216 MachineBasicBlock::iterator End, 217 unsigned NumRegionInstrs) {} 218 dumpPolicy()219 virtual void dumpPolicy() const {} 220 221 /// Check if pressure tracking is needed before building the DAG and 222 /// initializing this strategy. Called after initPolicy. shouldTrackPressure()223 virtual bool shouldTrackPressure() const { return true; } 224 225 /// Returns true if lanemasks should be tracked. LaneMask tracking is 226 /// necessary to reorder independent subregister defs for the same vreg. 227 /// This has to be enabled in combination with shouldTrackPressure(). shouldTrackLaneMasks()228 virtual bool shouldTrackLaneMasks() const { return false; } 229 230 // If this method returns true, handling of the scheduling regions 231 // themselves (in case of a scheduling boundary in MBB) will be done 232 // beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()233 virtual bool doMBBSchedRegionsTopDown() const { return false; } 234 235 /// Initialize the strategy after building the DAG for a new region. 236 virtual void initialize(ScheduleDAGMI *DAG) = 0; 237 238 /// Tell the strategy that MBB is about to be processed. enterMBB(MachineBasicBlock * MBB)239 virtual void enterMBB(MachineBasicBlock *MBB) {}; 240 241 /// Tell the strategy that current MBB is done. leaveMBB()242 virtual void leaveMBB() {}; 243 244 /// Notify this strategy that all roots have been released (including those 245 /// that depend on EntrySU or ExitSU). registerRoots()246 virtual void registerRoots() {} 247 248 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to 249 /// schedule the node at the top of the unscheduled region. Otherwise it will 250 /// be scheduled at the bottom. 251 virtual SUnit *pickNode(bool &IsTopNode) = 0; 252 253 /// Scheduler callback to notify that a new subtree is scheduled. scheduleTree(unsigned SubtreeID)254 virtual void scheduleTree(unsigned SubtreeID) {} 255 256 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an 257 /// instruction and updated scheduled/remaining flags in the DAG nodes. 258 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; 259 260 /// When all predecessor dependencies have been resolved, free this node for 261 /// top-down scheduling. 262 virtual void releaseTopNode(SUnit *SU) = 0; 263 264 /// When all successor dependencies have been resolved, free this node for 265 /// bottom-up scheduling. 266 virtual void releaseBottomNode(SUnit *SU) = 0; 267 }; 268 269 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply 270 /// schedules machine instructions according to the given MachineSchedStrategy 271 /// without much extra book-keeping. This is the common functionality between 272 /// PreRA and PostRA MachineScheduler. 273 class ScheduleDAGMI : public ScheduleDAGInstrs { 274 protected: 275 AAResults *AA; 276 LiveIntervals *LIS; 277 std::unique_ptr<MachineSchedStrategy> SchedImpl; 278 279 /// Ordered list of DAG postprocessing steps. 280 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 281 282 /// The top of the unscheduled zone. 283 MachineBasicBlock::iterator CurrentTop; 284 285 /// The bottom of the unscheduled zone. 286 MachineBasicBlock::iterator CurrentBottom; 287 288 /// Record the next node in a scheduled cluster. 289 const SUnit *NextClusterPred = nullptr; 290 const SUnit *NextClusterSucc = nullptr; 291 292 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 293 /// The number of instructions scheduled so far. Used to cut off the 294 /// scheduler at the point determined by misched-cutoff. 295 unsigned NumInstrsScheduled = 0; 296 #endif 297 298 public: ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)299 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 300 bool RemoveKillFlags) 301 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), 302 LIS(C->LIS), SchedImpl(std::move(S)) {} 303 304 // Provide a vtable anchor 305 ~ScheduleDAGMI() override; 306 307 /// If this method returns true, handling of the scheduling regions 308 /// themselves (in case of a scheduling boundary in MBB) will be done 309 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()310 bool doMBBSchedRegionsTopDown() const override { 311 return SchedImpl->doMBBSchedRegionsTopDown(); 312 } 313 314 // Returns LiveIntervals instance for use in DAG mutators and such. getLIS()315 LiveIntervals *getLIS() const { return LIS; } 316 317 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()318 virtual bool hasVRegLiveness() const { return false; } 319 320 /// Add a postprocessing step to the DAG builder. 321 /// Mutations are applied in the order that they are added after normal DAG 322 /// building and before MachineSchedStrategy initialization. 323 /// 324 /// ScheduleDAGMI takes ownership of the Mutation object. addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)325 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 326 if (Mutation) 327 Mutations.push_back(std::move(Mutation)); 328 } 329 top()330 MachineBasicBlock::iterator top() const { return CurrentTop; } bottom()331 MachineBasicBlock::iterator bottom() const { return CurrentBottom; } 332 333 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 334 /// region. This covers all instructions in a block, while schedule() may only 335 /// cover a subset. 336 void enterRegion(MachineBasicBlock *bb, 337 MachineBasicBlock::iterator begin, 338 MachineBasicBlock::iterator end, 339 unsigned regioninstrs) override; 340 341 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 342 /// reorderable instructions. 343 void schedule() override; 344 345 void startBlock(MachineBasicBlock *bb) override; 346 void finishBlock() override; 347 348 /// Change the position of an instruction within the basic block and update 349 /// live ranges and region boundary iterators. 350 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); 351 getNextClusterPred()352 const SUnit *getNextClusterPred() const { return NextClusterPred; } 353 getNextClusterSucc()354 const SUnit *getNextClusterSucc() const { return NextClusterSucc; } 355 356 void viewGraph(const Twine &Name, const Twine &Title) override; 357 void viewGraph() override; 358 359 protected: 360 // Top-Level entry points for the schedule() driver... 361 362 /// Apply each ScheduleDAGMutation step in order. This allows different 363 /// instances of ScheduleDAGMI to perform custom DAG postprocessing. 364 void postprocessDAG(); 365 366 /// Release ExitSU predecessors and setup scheduler queues. 367 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 368 369 /// Update scheduler DAG and queues after scheduling an instruction. 370 void updateQueues(SUnit *SU, bool IsTopNode); 371 372 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. 373 void placeDebugValues(); 374 375 /// dump the scheduled Sequence. 376 void dumpSchedule() const; 377 378 // Lesser helpers... 379 bool checkSchedLimit(); 380 381 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 382 SmallVectorImpl<SUnit*> &BotRoots); 383 384 void releaseSucc(SUnit *SU, SDep *SuccEdge); 385 void releaseSuccessors(SUnit *SU); 386 void releasePred(SUnit *SU, SDep *PredEdge); 387 void releasePredecessors(SUnit *SU); 388 }; 389 390 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules 391 /// machine instructions while updating LiveIntervals and tracking regpressure. 392 class ScheduleDAGMILive : public ScheduleDAGMI { 393 protected: 394 RegisterClassInfo *RegClassInfo; 395 396 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees 397 /// will be empty. 398 SchedDFSResult *DFSResult = nullptr; 399 BitVector ScheduledTrees; 400 401 MachineBasicBlock::iterator LiveRegionEnd; 402 403 /// Maps vregs to the SUnits of their uses in the current scheduling region. 404 VReg2SUnitMultiMap VRegUses; 405 406 // Map each SU to its summary of pressure changes. This array is updated for 407 // liveness during bottom-up scheduling. Top-down scheduling may proceed but 408 // has no affect on the pressure diffs. 409 PressureDiffs SUPressureDiffs; 410 411 /// Register pressure in this region computed by initRegPressure. 412 bool ShouldTrackPressure = false; 413 bool ShouldTrackLaneMasks = false; 414 IntervalPressure RegPressure; 415 RegPressureTracker RPTracker; 416 417 /// List of pressure sets that exceed the target's pressure limit before 418 /// scheduling, listed in increasing set ID order. Each pressure set is paired 419 /// with its max pressure in the currently scheduled regions. 420 std::vector<PressureChange> RegionCriticalPSets; 421 422 /// The top of the unscheduled zone. 423 IntervalPressure TopPressure; 424 RegPressureTracker TopRPTracker; 425 426 /// The bottom of the unscheduled zone. 427 IntervalPressure BotPressure; 428 RegPressureTracker BotRPTracker; 429 430 public: ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)431 ScheduleDAGMILive(MachineSchedContext *C, 432 std::unique_ptr<MachineSchedStrategy> S) 433 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), 434 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), 435 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} 436 437 ~ScheduleDAGMILive() override; 438 439 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()440 bool hasVRegLiveness() const override { return true; } 441 442 /// Return true if register pressure tracking is enabled. isTrackingPressure()443 bool isTrackingPressure() const { return ShouldTrackPressure; } 444 445 /// Get current register pressure for the top scheduled instructions. getTopPressure()446 const IntervalPressure &getTopPressure() const { return TopPressure; } getTopRPTracker()447 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } 448 449 /// Get current register pressure for the bottom scheduled instructions. getBotPressure()450 const IntervalPressure &getBotPressure() const { return BotPressure; } getBotRPTracker()451 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } 452 453 /// Get register pressure for the entire scheduling region before scheduling. getRegPressure()454 const IntervalPressure &getRegPressure() const { return RegPressure; } 455 getRegionCriticalPSets()456 const std::vector<PressureChange> &getRegionCriticalPSets() const { 457 return RegionCriticalPSets; 458 } 459 getPressureDiff(const SUnit * SU)460 PressureDiff &getPressureDiff(const SUnit *SU) { 461 return SUPressureDiffs[SU->NodeNum]; 462 } getPressureDiff(const SUnit * SU)463 const PressureDiff &getPressureDiff(const SUnit *SU) const { 464 return SUPressureDiffs[SU->NodeNum]; 465 } 466 467 /// Compute a DFSResult after DAG building is complete, and before any 468 /// queue comparisons. 469 void computeDFSResult(); 470 471 /// Return a non-null DFS result if the scheduling strategy initialized it. getDFSResult()472 const SchedDFSResult *getDFSResult() const { return DFSResult; } 473 getScheduledTrees()474 BitVector &getScheduledTrees() { return ScheduledTrees; } 475 476 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 477 /// region. This covers all instructions in a block, while schedule() may only 478 /// cover a subset. 479 void enterRegion(MachineBasicBlock *bb, 480 MachineBasicBlock::iterator begin, 481 MachineBasicBlock::iterator end, 482 unsigned regioninstrs) override; 483 484 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 485 /// reorderable instructions. 486 void schedule() override; 487 488 /// Compute the cyclic critical path through the DAG. 489 unsigned computeCyclicCriticalPath(); 490 491 void dump() const override; 492 493 protected: 494 // Top-Level entry points for the schedule() driver... 495 496 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking 497 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG 498 /// region, TopTracker and BottomTracker will be initialized to the top and 499 /// bottom of the DAG region without covereing any unscheduled instruction. 500 void buildDAGWithRegPressure(); 501 502 /// Release ExitSU predecessors and setup scheduler queues. Re-position 503 /// the Top RP tracker in case the region beginning has changed. 504 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 505 506 /// Move an instruction and update register pressure. 507 void scheduleMI(SUnit *SU, bool IsTopNode); 508 509 // Lesser helpers... 510 511 void initRegPressure(); 512 513 void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); 514 515 void updateScheduledPressure(const SUnit *SU, 516 const std::vector<unsigned> &NewMaxPressure); 517 518 void collectVRegUses(SUnit &SU); 519 }; 520 521 //===----------------------------------------------------------------------===// 522 /// 523 /// Helpers for implementing custom MachineSchedStrategy classes. These take 524 /// care of the book-keeping associated with list scheduling heuristics. 525 /// 526 //===----------------------------------------------------------------------===// 527 528 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience 529 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified 530 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. 531 /// 532 /// This is a convenience class that may be used by implementations of 533 /// MachineSchedStrategy. 534 class ReadyQueue { 535 unsigned ID; 536 std::string Name; 537 std::vector<SUnit*> Queue; 538 539 public: ReadyQueue(unsigned id,const Twine & name)540 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} 541 getID()542 unsigned getID() const { return ID; } 543 getName()544 StringRef getName() const { return Name; } 545 546 // SU is in this queue if it's NodeQueueID is a superset of this ID. isInQueue(SUnit * SU)547 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } 548 empty()549 bool empty() const { return Queue.empty(); } 550 clear()551 void clear() { Queue.clear(); } 552 size()553 unsigned size() const { return Queue.size(); } 554 555 using iterator = std::vector<SUnit*>::iterator; 556 begin()557 iterator begin() { return Queue.begin(); } 558 end()559 iterator end() { return Queue.end(); } 560 elements()561 ArrayRef<SUnit*> elements() { return Queue; } 562 find(SUnit * SU)563 iterator find(SUnit *SU) { return llvm::find(Queue, SU); } 564 push(SUnit * SU)565 void push(SUnit *SU) { 566 Queue.push_back(SU); 567 SU->NodeQueueId |= ID; 568 } 569 remove(iterator I)570 iterator remove(iterator I) { 571 (*I)->NodeQueueId &= ~ID; 572 *I = Queue.back(); 573 unsigned idx = I - Queue.begin(); 574 Queue.pop_back(); 575 return Queue.begin() + idx; 576 } 577 578 void dump() const; 579 }; 580 581 /// Summarize the unscheduled region. 582 struct SchedRemainder { 583 // Critical path through the DAG in expected latency. 584 unsigned CriticalPath; 585 unsigned CyclicCritPath; 586 587 // Scaled count of micro-ops left to schedule. 588 unsigned RemIssueCount; 589 590 bool IsAcyclicLatencyLimited; 591 592 // Unscheduled resources 593 SmallVector<unsigned, 16> RemainingCounts; 594 SchedRemainderSchedRemainder595 SchedRemainder() { reset(); } 596 resetSchedRemainder597 void reset() { 598 CriticalPath = 0; 599 CyclicCritPath = 0; 600 RemIssueCount = 0; 601 IsAcyclicLatencyLimited = false; 602 RemainingCounts.clear(); 603 } 604 605 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 606 }; 607 608 /// Each Scheduling boundary is associated with ready queues. It tracks the 609 /// current cycle in the direction of movement, and maintains the state 610 /// of "hazards" and other interlocks at the current cycle. 611 class SchedBoundary { 612 public: 613 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 614 enum { 615 TopQID = 1, 616 BotQID = 2, 617 LogMaxQID = 2 618 }; 619 620 ScheduleDAGMI *DAG = nullptr; 621 const TargetSchedModel *SchedModel = nullptr; 622 SchedRemainder *Rem = nullptr; 623 624 ReadyQueue Available; 625 ReadyQueue Pending; 626 627 ScheduleHazardRecognizer *HazardRec = nullptr; 628 629 private: 630 /// True if the pending Q should be checked/updated before scheduling another 631 /// instruction. 632 bool CheckPending; 633 634 /// Number of cycles it takes to issue the instructions scheduled in this 635 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. 636 /// See getStalls(). 637 unsigned CurrCycle; 638 639 /// Micro-ops issued in the current cycle 640 unsigned CurrMOps; 641 642 /// MinReadyCycle - Cycle of the soonest available instruction. 643 unsigned MinReadyCycle; 644 645 // The expected latency of the critical path in this scheduled zone. 646 unsigned ExpectedLatency; 647 648 // The latency of dependence chains leading into this zone. 649 // For each node scheduled bottom-up: DLat = max DLat, N.Depth. 650 // For each cycle scheduled: DLat -= 1. 651 unsigned DependentLatency; 652 653 /// Count the scheduled (issued) micro-ops that can be retired by 654 /// time=CurrCycle assuming the first scheduled instr is retired at time=0. 655 unsigned RetiredMOps; 656 657 // Count scheduled resources that have been executed. Resources are 658 // considered executed if they become ready in the time that it takes to 659 // saturate any resource including the one in question. Counts are scaled 660 // for direct comparison with other resources. Counts can be compared with 661 // MOps * getMicroOpFactor and Latency * getLatencyFactor. 662 SmallVector<unsigned, 16> ExecutedResCounts; 663 664 /// Cache the max count for a single resource. 665 unsigned MaxExecutedResCount; 666 667 // Cache the critical resources ID in this scheduled zone. 668 unsigned ZoneCritResIdx; 669 670 // Is the scheduled region resource limited vs. latency limited. 671 bool IsResourceLimited; 672 673 // Record the highest cycle at which each resource has been reserved by a 674 // scheduled instruction. 675 SmallVector<unsigned, 16> ReservedCycles; 676 677 /// For each PIdx, stores first index into ReservedCycles that corresponds to 678 /// it. 679 /// 680 /// For example, consider the following 3 resources (ResourceCount = 681 /// 3): 682 /// 683 /// +------------+--------+ 684 /// |ResourceName|NumUnits| 685 /// +------------+--------+ 686 /// | X | 2 | 687 /// +------------+--------+ 688 /// | Y | 3 | 689 /// +------------+--------+ 690 /// | Z | 1 | 691 /// +------------+--------+ 692 /// 693 /// In this case, the total number of resource instances is 6. The 694 /// vector \ref ReservedCycles will have a slot for each instance. The 695 /// vector \ref ReservedCyclesIndex will track at what index the first 696 /// instance of the resource is found in the vector of \ref 697 /// ReservedCycles: 698 /// 699 /// Indexes of instances in ReservedCycles 700 /// 0 1 2 3 4 5 701 /// ReservedCyclesIndex[0] = 0; [X0, X1, 702 /// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2 703 /// ReservedCyclesIndex[2] = 5; Z 704 SmallVector<unsigned, 16> ReservedCyclesIndex; 705 706 // For each PIdx, stores the resource group IDs of its subunits 707 SmallVector<APInt, 16> ResourceGroupSubUnitMasks; 708 709 #if LLVM_ENABLE_ABI_BREAKING_CHECKS 710 // Remember the greatest possible stall as an upper bound on the number of 711 // times we should retry the pending queue because of a hazard. 712 unsigned MaxObservedStall; 713 #endif 714 715 public: 716 /// Pending queues extend the ready queues with the same ID and the 717 /// PendingFlag set. SchedBoundary(unsigned ID,const Twine & Name)718 SchedBoundary(unsigned ID, const Twine &Name): 719 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") { 720 reset(); 721 } 722 723 ~SchedBoundary(); 724 725 void reset(); 726 727 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 728 SchedRemainder *rem); 729 isTop()730 bool isTop() const { 731 return Available.getID() == TopQID; 732 } 733 734 /// Number of cycles to issue the instructions scheduled in this zone. getCurrCycle()735 unsigned getCurrCycle() const { return CurrCycle; } 736 737 /// Micro-ops issued in the current cycle getCurrMOps()738 unsigned getCurrMOps() const { return CurrMOps; } 739 740 // The latency of dependence chains leading into this zone. getDependentLatency()741 unsigned getDependentLatency() const { return DependentLatency; } 742 743 /// Get the number of latency cycles "covered" by the scheduled 744 /// instructions. This is the larger of the critical path within the zone 745 /// and the number of cycles required to issue the instructions. getScheduledLatency()746 unsigned getScheduledLatency() const { 747 return std::max(ExpectedLatency, CurrCycle); 748 } 749 getUnscheduledLatency(SUnit * SU)750 unsigned getUnscheduledLatency(SUnit *SU) const { 751 return isTop() ? SU->getHeight() : SU->getDepth(); 752 } 753 getResourceCount(unsigned ResIdx)754 unsigned getResourceCount(unsigned ResIdx) const { 755 return ExecutedResCounts[ResIdx]; 756 } 757 758 /// Get the scaled count of scheduled micro-ops and resources, including 759 /// executed resources. getCriticalCount()760 unsigned getCriticalCount() const { 761 if (!ZoneCritResIdx) 762 return RetiredMOps * SchedModel->getMicroOpFactor(); 763 return getResourceCount(ZoneCritResIdx); 764 } 765 766 /// Get a scaled count for the minimum execution time of the scheduled 767 /// micro-ops that are ready to execute by getExecutedCount. Notice the 768 /// feedback loop. getExecutedCount()769 unsigned getExecutedCount() const { 770 return std::max(CurrCycle * SchedModel->getLatencyFactor(), 771 MaxExecutedResCount); 772 } 773 getZoneCritResIdx()774 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } 775 776 // Is the scheduled region resource limited vs. latency limited. isResourceLimited()777 bool isResourceLimited() const { return IsResourceLimited; } 778 779 /// Get the difference between the given SUnit's ready time and the current 780 /// cycle. 781 unsigned getLatencyStallCycles(SUnit *SU); 782 783 unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, 784 unsigned Cycles); 785 786 std::pair<unsigned, unsigned> getNextResourceCycle(const MCSchedClassDesc *SC, 787 unsigned PIdx, 788 unsigned Cycles); 789 isUnbufferedGroup(unsigned PIdx)790 bool isUnbufferedGroup(unsigned PIdx) const { 791 return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin && 792 !SchedModel->getProcResource(PIdx)->BufferSize; 793 } 794 795 bool checkHazard(SUnit *SU); 796 797 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); 798 799 unsigned getOtherResourceCount(unsigned &OtherCritIdx); 800 801 /// Release SU to make it ready. If it's not in hazard, remove it from 802 /// pending queue (if already in) and push into available queue. 803 /// Otherwise, push the SU into pending queue. 804 /// 805 /// @param SU The unit to be released. 806 /// @param ReadyCycle Until which cycle the unit is ready. 807 /// @param InPQueue Whether SU is already in pending queue. 808 /// @param Idx Position offset in pending queue (if in it). 809 void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, 810 unsigned Idx = 0); 811 812 void bumpCycle(unsigned NextCycle); 813 814 void incExecutedResources(unsigned PIdx, unsigned Count); 815 816 unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, 817 unsigned Cycles, unsigned ReadyCycle); 818 819 void bumpNode(SUnit *SU); 820 821 void releasePending(); 822 823 void removeReady(SUnit *SU); 824 825 /// Call this before applying any other heuristics to the Available queue. 826 /// Updates the Available/Pending Q's if necessary and returns the single 827 /// available instruction, or NULL if there are multiple candidates. 828 SUnit *pickOnlyChoice(); 829 830 /// Dump the state of the information that tracks resource usage. 831 void dumpReservedCycles() const; 832 void dumpScheduledState() const; 833 }; 834 835 /// Base class for GenericScheduler. This class maintains information about 836 /// scheduling candidates based on TargetSchedModel making it easy to implement 837 /// heuristics for either preRA or postRA scheduling. 838 class GenericSchedulerBase : public MachineSchedStrategy { 839 public: 840 /// Represent the type of SchedCandidate found within a single queue. 841 /// pickNodeBidirectional depends on these listed by decreasing priority. 842 enum CandReason : uint8_t { 843 NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, 844 RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 845 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 846 847 #ifndef NDEBUG 848 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); 849 #endif 850 851 /// Policy for scheduling the next instruction in the candidate's zone. 852 struct CandPolicy { 853 bool ReduceLatency = false; 854 unsigned ReduceResIdx = 0; 855 unsigned DemandResIdx = 0; 856 857 CandPolicy() = default; 858 859 bool operator==(const CandPolicy &RHS) const { 860 return ReduceLatency == RHS.ReduceLatency && 861 ReduceResIdx == RHS.ReduceResIdx && 862 DemandResIdx == RHS.DemandResIdx; 863 } 864 bool operator!=(const CandPolicy &RHS) const { 865 return !(*this == RHS); 866 } 867 }; 868 869 /// Status of an instruction's critical resource consumption. 870 struct SchedResourceDelta { 871 // Count critical resources in the scheduled region required by SU. 872 unsigned CritResources = 0; 873 874 // Count critical resources from another region consumed by SU. 875 unsigned DemandedResources = 0; 876 877 SchedResourceDelta() = default; 878 879 bool operator==(const SchedResourceDelta &RHS) const { 880 return CritResources == RHS.CritResources 881 && DemandedResources == RHS.DemandedResources; 882 } 883 bool operator!=(const SchedResourceDelta &RHS) const { 884 return !operator==(RHS); 885 } 886 }; 887 888 /// Store the state used by GenericScheduler heuristics, required for the 889 /// lifetime of one invocation of pickNode(). 890 struct SchedCandidate { 891 CandPolicy Policy; 892 893 // The best SUnit candidate. 894 SUnit *SU; 895 896 // The reason for this candidate. 897 CandReason Reason; 898 899 // Whether this candidate should be scheduled at top/bottom. 900 bool AtTop; 901 902 // Register pressure values for the best candidate. 903 RegPressureDelta RPDelta; 904 905 // Critical resource consumption of the best candidate. 906 SchedResourceDelta ResDelta; 907 SchedCandidateSchedCandidate908 SchedCandidate() { reset(CandPolicy()); } SchedCandidateSchedCandidate909 SchedCandidate(const CandPolicy &Policy) { reset(Policy); } 910 resetSchedCandidate911 void reset(const CandPolicy &NewPolicy) { 912 Policy = NewPolicy; 913 SU = nullptr; 914 Reason = NoCand; 915 AtTop = false; 916 RPDelta = RegPressureDelta(); 917 ResDelta = SchedResourceDelta(); 918 } 919 isValidSchedCandidate920 bool isValid() const { return SU; } 921 922 // Copy the status of another candidate without changing policy. setBestSchedCandidate923 void setBest(SchedCandidate &Best) { 924 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 925 SU = Best.SU; 926 Reason = Best.Reason; 927 AtTop = Best.AtTop; 928 RPDelta = Best.RPDelta; 929 ResDelta = Best.ResDelta; 930 } 931 932 void initResourceDelta(const ScheduleDAGMI *DAG, 933 const TargetSchedModel *SchedModel); 934 }; 935 936 protected: 937 const MachineSchedContext *Context; 938 const TargetSchedModel *SchedModel = nullptr; 939 const TargetRegisterInfo *TRI = nullptr; 940 941 SchedRemainder Rem; 942 GenericSchedulerBase(const MachineSchedContext * C)943 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} 944 945 void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, 946 SchedBoundary *OtherZone); 947 948 #ifndef NDEBUG 949 void traceCandidate(const SchedCandidate &Cand); 950 #endif 951 952 private: 953 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, 954 bool ComputeRemLatency, unsigned &RemLatency) const; 955 }; 956 957 // Utility functions used by heuristics in tryCandidate(). 958 bool tryLess(int TryVal, int CandVal, 959 GenericSchedulerBase::SchedCandidate &TryCand, 960 GenericSchedulerBase::SchedCandidate &Cand, 961 GenericSchedulerBase::CandReason Reason); 962 bool tryGreater(int TryVal, int CandVal, 963 GenericSchedulerBase::SchedCandidate &TryCand, 964 GenericSchedulerBase::SchedCandidate &Cand, 965 GenericSchedulerBase::CandReason Reason); 966 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 967 GenericSchedulerBase::SchedCandidate &Cand, 968 SchedBoundary &Zone); 969 bool tryPressure(const PressureChange &TryP, 970 const PressureChange &CandP, 971 GenericSchedulerBase::SchedCandidate &TryCand, 972 GenericSchedulerBase::SchedCandidate &Cand, 973 GenericSchedulerBase::CandReason Reason, 974 const TargetRegisterInfo *TRI, 975 const MachineFunction &MF); 976 unsigned getWeakLeft(const SUnit *SU, bool isTop); 977 int biasPhysReg(const SUnit *SU, bool isTop); 978 979 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 980 /// the schedule. 981 class GenericScheduler : public GenericSchedulerBase { 982 public: GenericScheduler(const MachineSchedContext * C)983 GenericScheduler(const MachineSchedContext *C): 984 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), 985 Bot(SchedBoundary::BotQID, "BotQ") {} 986 987 void initPolicy(MachineBasicBlock::iterator Begin, 988 MachineBasicBlock::iterator End, 989 unsigned NumRegionInstrs) override; 990 991 void dumpPolicy() const override; 992 shouldTrackPressure()993 bool shouldTrackPressure() const override { 994 return RegionPolicy.ShouldTrackPressure; 995 } 996 shouldTrackLaneMasks()997 bool shouldTrackLaneMasks() const override { 998 return RegionPolicy.ShouldTrackLaneMasks; 999 } 1000 1001 void initialize(ScheduleDAGMI *dag) override; 1002 1003 SUnit *pickNode(bool &IsTopNode) override; 1004 1005 void schedNode(SUnit *SU, bool IsTopNode) override; 1006 releaseTopNode(SUnit * SU)1007 void releaseTopNode(SUnit *SU) override { 1008 if (SU->isScheduled) 1009 return; 1010 1011 Top.releaseNode(SU, SU->TopReadyCycle, false); 1012 TopCand.SU = nullptr; 1013 } 1014 releaseBottomNode(SUnit * SU)1015 void releaseBottomNode(SUnit *SU) override { 1016 if (SU->isScheduled) 1017 return; 1018 1019 Bot.releaseNode(SU, SU->BotReadyCycle, false); 1020 BotCand.SU = nullptr; 1021 } 1022 1023 void registerRoots() override; 1024 1025 protected: 1026 ScheduleDAGMILive *DAG = nullptr; 1027 1028 MachineSchedPolicy RegionPolicy; 1029 1030 // State of the top and bottom scheduled instruction boundaries. 1031 SchedBoundary Top; 1032 SchedBoundary Bot; 1033 1034 /// Candidate last picked from Top boundary. 1035 SchedCandidate TopCand; 1036 /// Candidate last picked from Bot boundary. 1037 SchedCandidate BotCand; 1038 1039 void checkAcyclicLatency(); 1040 1041 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 1042 const RegPressureTracker &RPTracker, 1043 RegPressureTracker &TempTracker); 1044 1045 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 1046 SchedBoundary *Zone) const; 1047 1048 SUnit *pickNodeBidirectional(bool &IsTopNode); 1049 1050 void pickNodeFromQueue(SchedBoundary &Zone, 1051 const CandPolicy &ZonePolicy, 1052 const RegPressureTracker &RPTracker, 1053 SchedCandidate &Candidate); 1054 1055 void reschedulePhysReg(SUnit *SU, bool isTop); 1056 }; 1057 1058 /// PostGenericScheduler - Interface to the scheduling algorithm used by 1059 /// ScheduleDAGMI. 1060 /// 1061 /// Callbacks from ScheduleDAGMI: 1062 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... 1063 class PostGenericScheduler : public GenericSchedulerBase { 1064 protected: 1065 ScheduleDAGMI *DAG = nullptr; 1066 SchedBoundary Top; 1067 SmallVector<SUnit*, 8> BotRoots; 1068 1069 public: PostGenericScheduler(const MachineSchedContext * C)1070 PostGenericScheduler(const MachineSchedContext *C): 1071 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} 1072 1073 ~PostGenericScheduler() override = default; 1074 initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)1075 void initPolicy(MachineBasicBlock::iterator Begin, 1076 MachineBasicBlock::iterator End, 1077 unsigned NumRegionInstrs) override { 1078 /* no configurable policy */ 1079 } 1080 1081 /// PostRA scheduling does not track pressure. shouldTrackPressure()1082 bool shouldTrackPressure() const override { return false; } 1083 1084 void initialize(ScheduleDAGMI *Dag) override; 1085 1086 void registerRoots() override; 1087 1088 SUnit *pickNode(bool &IsTopNode) override; 1089 scheduleTree(unsigned SubtreeID)1090 void scheduleTree(unsigned SubtreeID) override { 1091 llvm_unreachable("PostRA scheduler does not support subtree analysis."); 1092 } 1093 1094 void schedNode(SUnit *SU, bool IsTopNode) override; 1095 releaseTopNode(SUnit * SU)1096 void releaseTopNode(SUnit *SU) override { 1097 if (SU->isScheduled) 1098 return; 1099 Top.releaseNode(SU, SU->TopReadyCycle, false); 1100 } 1101 1102 // Only called for roots. releaseBottomNode(SUnit * SU)1103 void releaseBottomNode(SUnit *SU) override { 1104 BotRoots.push_back(SU); 1105 } 1106 1107 protected: 1108 virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); 1109 1110 void pickNodeFromQueue(SchedCandidate &Cand); 1111 }; 1112 1113 /// Create the standard converging machine scheduler. This will be used as the 1114 /// default scheduler if the target does not set a default. 1115 /// Adds default DAG mutations. 1116 ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); 1117 1118 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 1119 ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); 1120 1121 std::unique_ptr<ScheduleDAGMutation> 1122 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1123 const TargetRegisterInfo *TRI); 1124 1125 std::unique_ptr<ScheduleDAGMutation> 1126 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1127 const TargetRegisterInfo *TRI); 1128 1129 std::unique_ptr<ScheduleDAGMutation> 1130 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1131 const TargetRegisterInfo *TRI); 1132 1133 } // end namespace llvm 1134 1135 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H 1136