1 //===- MachineScheduler.h - MachineInstr Scheduling Pass --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file provides an interface for customizing the standard MachineScheduler 10 // pass. Note that the entire pass may be replaced as follows: 11 // 12 // <Target>TargetMachine::createPassConfig(PassManagerBase &PM) { 13 // PM.substitutePass(&MachineSchedulerID, &CustomSchedulerPassID); 14 // ...} 15 // 16 // The MachineScheduler pass is only responsible for choosing the regions to be 17 // scheduled. Targets can override the DAG builder and scheduler without 18 // replacing the pass as follows: 19 // 20 // ScheduleDAGInstrs *<Target>PassConfig:: 21 // createMachineScheduler(MachineSchedContext *C) { 22 // return new CustomMachineScheduler(C); 23 // } 24 // 25 // The default scheduler, ScheduleDAGMILive, builds the DAG and drives list 26 // scheduling while updating the instruction stream, register pressure, and live 27 // intervals. Most targets don't need to override the DAG builder and list 28 // scheduler, but subtargets that require custom scheduling heuristics may 29 // plugin an alternate MachineSchedStrategy. The strategy is responsible for 30 // selecting the highest priority node from the list: 31 // 32 // ScheduleDAGInstrs *<Target>PassConfig:: 33 // createMachineScheduler(MachineSchedContext *C) { 34 // return new ScheduleDAGMILive(C, CustomStrategy(C)); 35 // } 36 // 37 // The DAG builder can also be customized in a sense by adding DAG mutations 38 // that will run after DAG building and before list scheduling. DAG mutations 39 // can adjust dependencies based on target-specific knowledge or add weak edges 40 // to aid heuristics: 41 // 42 // ScheduleDAGInstrs *<Target>PassConfig:: 43 // createMachineScheduler(MachineSchedContext *C) { 44 // ScheduleDAGMI *DAG = createGenericSchedLive(C); 45 // DAG->addMutation(new CustomDAGMutation(...)); 46 // return DAG; 47 // } 48 // 49 // A target that supports alternative schedulers can use the 50 // MachineSchedRegistry to allow command line selection. This can be done by 51 // implementing the following boilerplate: 52 // 53 // static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) { 54 // return new CustomMachineScheduler(C); 55 // } 56 // static MachineSchedRegistry 57 // SchedCustomRegistry("custom", "Run my target's custom scheduler", 58 // createCustomMachineSched); 59 // 60 // 61 // Finally, subtargets that don't need to implement custom heuristics but would 62 // like to configure the GenericScheduler's policy for a given scheduler region, 63 // including scheduling direction and register pressure tracking policy, can do 64 // this: 65 // 66 // void <SubTarget>Subtarget:: 67 // overrideSchedPolicy(MachineSchedPolicy &Policy, 68 // unsigned NumRegionInstrs) const { 69 // Policy.<Flag> = true; 70 // } 71 // 72 //===----------------------------------------------------------------------===// 73 74 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H 75 #define LLVM_CODEGEN_MACHINESCHEDULER_H 76 77 #include "llvm/ADT/APInt.h" 78 #include "llvm/ADT/ArrayRef.h" 79 #include "llvm/ADT/BitVector.h" 80 #include "llvm/ADT/STLExtras.h" 81 #include "llvm/ADT/SmallVector.h" 82 #include "llvm/ADT/StringRef.h" 83 #include "llvm/ADT/Twine.h" 84 #include "llvm/CodeGen/MachineBasicBlock.h" 85 #include "llvm/CodeGen/MachinePassRegistry.h" 86 #include "llvm/CodeGen/RegisterPressure.h" 87 #include "llvm/CodeGen/ScheduleDAG.h" 88 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 89 #include "llvm/CodeGen/ScheduleDAGMutation.h" 90 #include "llvm/CodeGen/TargetSchedule.h" 91 #include "llvm/Support/CommandLine.h" 92 #include "llvm/Support/ErrorHandling.h" 93 #include <algorithm> 94 #include <cassert> 95 #include <memory> 96 #include <string> 97 #include <vector> 98 99 namespace llvm { 100 101 extern cl::opt<bool> ForceTopDown; 102 extern cl::opt<bool> ForceBottomUp; 103 extern cl::opt<bool> VerifyScheduling; 104 105 class AAResults; 106 class LiveIntervals; 107 class MachineDominatorTree; 108 class MachineFunction; 109 class MachineInstr; 110 class MachineLoopInfo; 111 class RegisterClassInfo; 112 class SchedDFSResult; 113 class ScheduleHazardRecognizer; 114 class TargetInstrInfo; 115 class TargetPassConfig; 116 class TargetRegisterInfo; 117 118 /// MachineSchedContext provides enough context from the MachineScheduler pass 119 /// for the target to instantiate a scheduler. 120 struct MachineSchedContext { 121 MachineFunction *MF = nullptr; 122 const MachineLoopInfo *MLI = nullptr; 123 const MachineDominatorTree *MDT = nullptr; 124 const TargetPassConfig *PassConfig = nullptr; 125 AAResults *AA = nullptr; 126 LiveIntervals *LIS = nullptr; 127 128 RegisterClassInfo *RegClassInfo; 129 130 MachineSchedContext(); 131 virtual ~MachineSchedContext(); 132 }; 133 134 /// MachineSchedRegistry provides a selection of available machine instruction 135 /// schedulers. 136 class MachineSchedRegistry 137 : public MachinePassRegistryNode< 138 ScheduleDAGInstrs *(*)(MachineSchedContext *)> { 139 public: 140 using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *); 141 142 // RegisterPassParser requires a (misnamed) FunctionPassCtor type. 143 using FunctionPassCtor = ScheduleDAGCtor; 144 145 static MachinePassRegistry<ScheduleDAGCtor> Registry; 146 MachineSchedRegistry(const char * N,const char * D,ScheduleDAGCtor C)147 MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C) 148 : MachinePassRegistryNode(N, D, C) { 149 Registry.Add(this); 150 } 151 ~MachineSchedRegistry()152 ~MachineSchedRegistry() { Registry.Remove(this); } 153 154 // Accessors. 155 // getNext()156 MachineSchedRegistry *getNext() const { 157 return (MachineSchedRegistry *)MachinePassRegistryNode::getNext(); 158 } 159 getList()160 static MachineSchedRegistry *getList() { 161 return (MachineSchedRegistry *)Registry.getList(); 162 } 163 setListener(MachinePassRegistryListener<FunctionPassCtor> * L)164 static void setListener(MachinePassRegistryListener<FunctionPassCtor> *L) { 165 Registry.setListener(L); 166 } 167 }; 168 169 class ScheduleDAGMI; 170 171 /// Define a generic scheduling policy for targets that don't provide their own 172 /// MachineSchedStrategy. This can be overriden for each scheduling region 173 /// before building the DAG. 174 struct MachineSchedPolicy { 175 // Allow the scheduler to disable register pressure tracking. 176 bool ShouldTrackPressure = false; 177 /// Track LaneMasks to allow reordering of independent subregister writes 178 /// of the same vreg. \sa MachineSchedStrategy::shouldTrackLaneMasks() 179 bool ShouldTrackLaneMasks = false; 180 181 // Allow the scheduler to force top-down or bottom-up scheduling. If neither 182 // is true, the scheduler runs in both directions and converges. 183 bool OnlyTopDown = false; 184 bool OnlyBottomUp = false; 185 186 // Disable heuristic that tries to fetch nodes from long dependency chains 187 // first. 188 bool DisableLatencyHeuristic = false; 189 190 // Compute DFSResult for use in scheduling heuristics. 191 bool ComputeDFSResult = false; 192 193 MachineSchedPolicy() = default; 194 }; 195 196 /// MachineSchedStrategy - Interface to the scheduling algorithm used by 197 /// ScheduleDAGMI. 198 /// 199 /// Initialization sequence: 200 /// initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots 201 class MachineSchedStrategy { 202 virtual void anchor(); 203 204 public: 205 virtual ~MachineSchedStrategy() = default; 206 207 /// Optionally override the per-region scheduling policy. initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)208 virtual void initPolicy(MachineBasicBlock::iterator Begin, 209 MachineBasicBlock::iterator End, 210 unsigned NumRegionInstrs) {} 211 dumpPolicy()212 virtual void dumpPolicy() const {} 213 214 /// Check if pressure tracking is needed before building the DAG and 215 /// initializing this strategy. Called after initPolicy. shouldTrackPressure()216 virtual bool shouldTrackPressure() const { return true; } 217 218 /// Returns true if lanemasks should be tracked. LaneMask tracking is 219 /// necessary to reorder independent subregister defs for the same vreg. 220 /// This has to be enabled in combination with shouldTrackPressure(). shouldTrackLaneMasks()221 virtual bool shouldTrackLaneMasks() const { return false; } 222 223 // If this method returns true, handling of the scheduling regions 224 // themselves (in case of a scheduling boundary in MBB) will be done 225 // beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()226 virtual bool doMBBSchedRegionsTopDown() const { return false; } 227 228 /// Initialize the strategy after building the DAG for a new region. 229 virtual void initialize(ScheduleDAGMI *DAG) = 0; 230 231 /// Tell the strategy that MBB is about to be processed. enterMBB(MachineBasicBlock * MBB)232 virtual void enterMBB(MachineBasicBlock *MBB) {}; 233 234 /// Tell the strategy that current MBB is done. leaveMBB()235 virtual void leaveMBB() {}; 236 237 /// Notify this strategy that all roots have been released (including those 238 /// that depend on EntrySU or ExitSU). registerRoots()239 virtual void registerRoots() {} 240 241 /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to 242 /// schedule the node at the top of the unscheduled region. Otherwise it will 243 /// be scheduled at the bottom. 244 virtual SUnit *pickNode(bool &IsTopNode) = 0; 245 246 /// Scheduler callback to notify that a new subtree is scheduled. scheduleTree(unsigned SubtreeID)247 virtual void scheduleTree(unsigned SubtreeID) {} 248 249 /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an 250 /// instruction and updated scheduled/remaining flags in the DAG nodes. 251 virtual void schedNode(SUnit *SU, bool IsTopNode) = 0; 252 253 /// When all predecessor dependencies have been resolved, free this node for 254 /// top-down scheduling. 255 virtual void releaseTopNode(SUnit *SU) = 0; 256 257 /// When all successor dependencies have been resolved, free this node for 258 /// bottom-up scheduling. 259 virtual void releaseBottomNode(SUnit *SU) = 0; 260 }; 261 262 /// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply 263 /// schedules machine instructions according to the given MachineSchedStrategy 264 /// without much extra book-keeping. This is the common functionality between 265 /// PreRA and PostRA MachineScheduler. 266 class ScheduleDAGMI : public ScheduleDAGInstrs { 267 protected: 268 AAResults *AA; 269 LiveIntervals *LIS; 270 std::unique_ptr<MachineSchedStrategy> SchedImpl; 271 272 /// Ordered list of DAG postprocessing steps. 273 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 274 275 /// The top of the unscheduled zone. 276 MachineBasicBlock::iterator CurrentTop; 277 278 /// The bottom of the unscheduled zone. 279 MachineBasicBlock::iterator CurrentBottom; 280 281 /// Record the next node in a scheduled cluster. 282 const SUnit *NextClusterPred = nullptr; 283 const SUnit *NextClusterSucc = nullptr; 284 285 #ifndef NDEBUG 286 /// The number of instructions scheduled so far. Used to cut off the 287 /// scheduler at the point determined by misched-cutoff. 288 unsigned NumInstrsScheduled = 0; 289 #endif 290 291 public: ScheduleDAGMI(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S,bool RemoveKillFlags)292 ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S, 293 bool RemoveKillFlags) 294 : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), 295 LIS(C->LIS), SchedImpl(std::move(S)) {} 296 297 // Provide a vtable anchor 298 ~ScheduleDAGMI() override; 299 300 /// If this method returns true, handling of the scheduling regions 301 /// themselves (in case of a scheduling boundary in MBB) will be done 302 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()303 bool doMBBSchedRegionsTopDown() const override { 304 return SchedImpl->doMBBSchedRegionsTopDown(); 305 } 306 307 // Returns LiveIntervals instance for use in DAG mutators and such. getLIS()308 LiveIntervals *getLIS() const { return LIS; } 309 310 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()311 virtual bool hasVRegLiveness() const { return false; } 312 313 /// Add a postprocessing step to the DAG builder. 314 /// Mutations are applied in the order that they are added after normal DAG 315 /// building and before MachineSchedStrategy initialization. 316 /// 317 /// ScheduleDAGMI takes ownership of the Mutation object. addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)318 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 319 if (Mutation) 320 Mutations.push_back(std::move(Mutation)); 321 } 322 top()323 MachineBasicBlock::iterator top() const { return CurrentTop; } bottom()324 MachineBasicBlock::iterator bottom() const { return CurrentBottom; } 325 326 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 327 /// region. This covers all instructions in a block, while schedule() may only 328 /// cover a subset. 329 void enterRegion(MachineBasicBlock *bb, 330 MachineBasicBlock::iterator begin, 331 MachineBasicBlock::iterator end, 332 unsigned regioninstrs) override; 333 334 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 335 /// reorderable instructions. 336 void schedule() override; 337 338 void startBlock(MachineBasicBlock *bb) override; 339 void finishBlock() override; 340 341 /// Change the position of an instruction within the basic block and update 342 /// live ranges and region boundary iterators. 343 void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos); 344 getNextClusterPred()345 const SUnit *getNextClusterPred() const { return NextClusterPred; } 346 getNextClusterSucc()347 const SUnit *getNextClusterSucc() const { return NextClusterSucc; } 348 349 void viewGraph(const Twine &Name, const Twine &Title) override; 350 void viewGraph() override; 351 352 protected: 353 // Top-Level entry points for the schedule() driver... 354 355 /// Apply each ScheduleDAGMutation step in order. This allows different 356 /// instances of ScheduleDAGMI to perform custom DAG postprocessing. 357 void postprocessDAG(); 358 359 /// Release ExitSU predecessors and setup scheduler queues. 360 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 361 362 /// Update scheduler DAG and queues after scheduling an instruction. 363 void updateQueues(SUnit *SU, bool IsTopNode); 364 365 /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. 366 void placeDebugValues(); 367 368 /// dump the scheduled Sequence. 369 void dumpSchedule() const; 370 371 // Lesser helpers... 372 bool checkSchedLimit(); 373 374 void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, 375 SmallVectorImpl<SUnit*> &BotRoots); 376 377 void releaseSucc(SUnit *SU, SDep *SuccEdge); 378 void releaseSuccessors(SUnit *SU); 379 void releasePred(SUnit *SU, SDep *PredEdge); 380 void releasePredecessors(SUnit *SU); 381 }; 382 383 /// ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules 384 /// machine instructions while updating LiveIntervals and tracking regpressure. 385 class ScheduleDAGMILive : public ScheduleDAGMI { 386 protected: 387 RegisterClassInfo *RegClassInfo; 388 389 /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees 390 /// will be empty. 391 SchedDFSResult *DFSResult = nullptr; 392 BitVector ScheduledTrees; 393 394 MachineBasicBlock::iterator LiveRegionEnd; 395 396 /// Maps vregs to the SUnits of their uses in the current scheduling region. 397 VReg2SUnitMultiMap VRegUses; 398 399 // Map each SU to its summary of pressure changes. This array is updated for 400 // liveness during bottom-up scheduling. Top-down scheduling may proceed but 401 // has no affect on the pressure diffs. 402 PressureDiffs SUPressureDiffs; 403 404 /// Register pressure in this region computed by initRegPressure. 405 bool ShouldTrackPressure = false; 406 bool ShouldTrackLaneMasks = false; 407 IntervalPressure RegPressure; 408 RegPressureTracker RPTracker; 409 410 /// List of pressure sets that exceed the target's pressure limit before 411 /// scheduling, listed in increasing set ID order. Each pressure set is paired 412 /// with its max pressure in the currently scheduled regions. 413 std::vector<PressureChange> RegionCriticalPSets; 414 415 /// The top of the unscheduled zone. 416 IntervalPressure TopPressure; 417 RegPressureTracker TopRPTracker; 418 419 /// The bottom of the unscheduled zone. 420 IntervalPressure BotPressure; 421 RegPressureTracker BotRPTracker; 422 423 /// True if disconnected subregister components are already renamed. 424 /// The renaming is only done on demand if lane masks are tracked. 425 bool DisconnectedComponentsRenamed = false; 426 427 public: ScheduleDAGMILive(MachineSchedContext * C,std::unique_ptr<MachineSchedStrategy> S)428 ScheduleDAGMILive(MachineSchedContext *C, 429 std::unique_ptr<MachineSchedStrategy> S) 430 : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false), 431 RegClassInfo(C->RegClassInfo), RPTracker(RegPressure), 432 TopRPTracker(TopPressure), BotRPTracker(BotPressure) {} 433 434 ~ScheduleDAGMILive() override; 435 436 /// Return true if this DAG supports VReg liveness and RegPressure. hasVRegLiveness()437 bool hasVRegLiveness() const override { return true; } 438 439 /// Return true if register pressure tracking is enabled. isTrackingPressure()440 bool isTrackingPressure() const { return ShouldTrackPressure; } 441 442 /// Get current register pressure for the top scheduled instructions. getTopPressure()443 const IntervalPressure &getTopPressure() const { return TopPressure; } getTopRPTracker()444 const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; } 445 446 /// Get current register pressure for the bottom scheduled instructions. getBotPressure()447 const IntervalPressure &getBotPressure() const { return BotPressure; } getBotRPTracker()448 const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; } 449 450 /// Get register pressure for the entire scheduling region before scheduling. getRegPressure()451 const IntervalPressure &getRegPressure() const { return RegPressure; } 452 getRegionCriticalPSets()453 const std::vector<PressureChange> &getRegionCriticalPSets() const { 454 return RegionCriticalPSets; 455 } 456 getPressureDiff(const SUnit * SU)457 PressureDiff &getPressureDiff(const SUnit *SU) { 458 return SUPressureDiffs[SU->NodeNum]; 459 } getPressureDiff(const SUnit * SU)460 const PressureDiff &getPressureDiff(const SUnit *SU) const { 461 return SUPressureDiffs[SU->NodeNum]; 462 } 463 464 /// Compute a DFSResult after DAG building is complete, and before any 465 /// queue comparisons. 466 void computeDFSResult(); 467 468 /// Return a non-null DFS result if the scheduling strategy initialized it. getDFSResult()469 const SchedDFSResult *getDFSResult() const { return DFSResult; } 470 getScheduledTrees()471 BitVector &getScheduledTrees() { return ScheduledTrees; } 472 473 /// Implement the ScheduleDAGInstrs interface for handling the next scheduling 474 /// region. This covers all instructions in a block, while schedule() may only 475 /// cover a subset. 476 void enterRegion(MachineBasicBlock *bb, 477 MachineBasicBlock::iterator begin, 478 MachineBasicBlock::iterator end, 479 unsigned regioninstrs) override; 480 481 /// Implement ScheduleDAGInstrs interface for scheduling a sequence of 482 /// reorderable instructions. 483 void schedule() override; 484 485 /// Compute the cyclic critical path through the DAG. 486 unsigned computeCyclicCriticalPath(); 487 488 void dump() const override; 489 490 protected: 491 // Top-Level entry points for the schedule() driver... 492 493 /// Call ScheduleDAGInstrs::buildSchedGraph with register pressure tracking 494 /// enabled. This sets up three trackers. RPTracker will cover the entire DAG 495 /// region, TopTracker and BottomTracker will be initialized to the top and 496 /// bottom of the DAG region without covereing any unscheduled instruction. 497 void buildDAGWithRegPressure(); 498 499 /// Release ExitSU predecessors and setup scheduler queues. Re-position 500 /// the Top RP tracker in case the region beginning has changed. 501 void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots); 502 503 /// Move an instruction and update register pressure. 504 void scheduleMI(SUnit *SU, bool IsTopNode); 505 506 // Lesser helpers... 507 508 void initRegPressure(); 509 510 void updatePressureDiffs(ArrayRef<RegisterMaskPair> LiveUses); 511 512 void updateScheduledPressure(const SUnit *SU, 513 const std::vector<unsigned> &NewMaxPressure); 514 515 void collectVRegUses(SUnit &SU); 516 }; 517 518 //===----------------------------------------------------------------------===// 519 /// 520 /// Helpers for implementing custom MachineSchedStrategy classes. These take 521 /// care of the book-keeping associated with list scheduling heuristics. 522 /// 523 //===----------------------------------------------------------------------===// 524 525 /// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience 526 /// methods for pushing and removing nodes. ReadyQueue's are uniquely identified 527 /// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in. 528 /// 529 /// This is a convenience class that may be used by implementations of 530 /// MachineSchedStrategy. 531 class ReadyQueue { 532 unsigned ID; 533 std::string Name; 534 std::vector<SUnit*> Queue; 535 536 public: ReadyQueue(unsigned id,const Twine & name)537 ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {} 538 getID()539 unsigned getID() const { return ID; } 540 getName()541 StringRef getName() const { return Name; } 542 543 // SU is in this queue if it's NodeQueueID is a superset of this ID. isInQueue(SUnit * SU)544 bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); } 545 empty()546 bool empty() const { return Queue.empty(); } 547 clear()548 void clear() { Queue.clear(); } 549 size()550 unsigned size() const { return Queue.size(); } 551 552 using iterator = std::vector<SUnit*>::iterator; 553 begin()554 iterator begin() { return Queue.begin(); } 555 end()556 iterator end() { return Queue.end(); } 557 elements()558 ArrayRef<SUnit*> elements() { return Queue; } 559 find(SUnit * SU)560 iterator find(SUnit *SU) { return llvm::find(Queue, SU); } 561 push(SUnit * SU)562 void push(SUnit *SU) { 563 Queue.push_back(SU); 564 SU->NodeQueueId |= ID; 565 } 566 remove(iterator I)567 iterator remove(iterator I) { 568 (*I)->NodeQueueId &= ~ID; 569 *I = Queue.back(); 570 unsigned idx = I - Queue.begin(); 571 Queue.pop_back(); 572 return Queue.begin() + idx; 573 } 574 575 void dump() const; 576 }; 577 578 /// Summarize the unscheduled region. 579 struct SchedRemainder { 580 // Critical path through the DAG in expected latency. 581 unsigned CriticalPath; 582 unsigned CyclicCritPath; 583 584 // Scaled count of micro-ops left to schedule. 585 unsigned RemIssueCount; 586 587 bool IsAcyclicLatencyLimited; 588 589 // Unscheduled resources 590 SmallVector<unsigned, 16> RemainingCounts; 591 SchedRemainderSchedRemainder592 SchedRemainder() { reset(); } 593 resetSchedRemainder594 void reset() { 595 CriticalPath = 0; 596 CyclicCritPath = 0; 597 RemIssueCount = 0; 598 IsAcyclicLatencyLimited = false; 599 RemainingCounts.clear(); 600 } 601 602 void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); 603 }; 604 605 /// Each Scheduling boundary is associated with ready queues. It tracks the 606 /// current cycle in the direction of movement, and maintains the state 607 /// of "hazards" and other interlocks at the current cycle. 608 class SchedBoundary { 609 public: 610 /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) 611 enum { 612 TopQID = 1, 613 BotQID = 2, 614 LogMaxQID = 2 615 }; 616 617 ScheduleDAGMI *DAG = nullptr; 618 const TargetSchedModel *SchedModel = nullptr; 619 SchedRemainder *Rem = nullptr; 620 621 ReadyQueue Available; 622 ReadyQueue Pending; 623 624 ScheduleHazardRecognizer *HazardRec = nullptr; 625 626 private: 627 /// True if the pending Q should be checked/updated before scheduling another 628 /// instruction. 629 bool CheckPending; 630 631 /// Number of cycles it takes to issue the instructions scheduled in this 632 /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. 633 /// See getStalls(). 634 unsigned CurrCycle; 635 636 /// Micro-ops issued in the current cycle 637 unsigned CurrMOps; 638 639 /// MinReadyCycle - Cycle of the soonest available instruction. 640 unsigned MinReadyCycle; 641 642 // The expected latency of the critical path in this scheduled zone. 643 unsigned ExpectedLatency; 644 645 // The latency of dependence chains leading into this zone. 646 // For each node scheduled bottom-up: DLat = max DLat, N.Depth. 647 // For each cycle scheduled: DLat -= 1. 648 unsigned DependentLatency; 649 650 /// Count the scheduled (issued) micro-ops that can be retired by 651 /// time=CurrCycle assuming the first scheduled instr is retired at time=0. 652 unsigned RetiredMOps; 653 654 // Count scheduled resources that have been executed. Resources are 655 // considered executed if they become ready in the time that it takes to 656 // saturate any resource including the one in question. Counts are scaled 657 // for direct comparison with other resources. Counts can be compared with 658 // MOps * getMicroOpFactor and Latency * getLatencyFactor. 659 SmallVector<unsigned, 16> ExecutedResCounts; 660 661 /// Cache the max count for a single resource. 662 unsigned MaxExecutedResCount; 663 664 // Cache the critical resources ID in this scheduled zone. 665 unsigned ZoneCritResIdx; 666 667 // Is the scheduled region resource limited vs. latency limited. 668 bool IsResourceLimited; 669 670 // Record the highest cycle at which each resource has been reserved by a 671 // scheduled instruction. 672 SmallVector<unsigned, 16> ReservedCycles; 673 674 // For each PIdx, stores first index into ReservedCycles that corresponds to 675 // it. 676 SmallVector<unsigned, 16> ReservedCyclesIndex; 677 678 // For each PIdx, stores the resource group IDs of its subunits 679 SmallVector<APInt, 16> ResourceGroupSubUnitMasks; 680 681 #ifndef NDEBUG 682 // Remember the greatest possible stall as an upper bound on the number of 683 // times we should retry the pending queue because of a hazard. 684 unsigned MaxObservedStall; 685 #endif 686 687 public: 688 /// Pending queues extend the ready queues with the same ID and the 689 /// PendingFlag set. SchedBoundary(unsigned ID,const Twine & Name)690 SchedBoundary(unsigned ID, const Twine &Name): 691 Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") { 692 reset(); 693 } 694 695 ~SchedBoundary(); 696 697 void reset(); 698 699 void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, 700 SchedRemainder *rem); 701 isTop()702 bool isTop() const { 703 return Available.getID() == TopQID; 704 } 705 706 /// Number of cycles to issue the instructions scheduled in this zone. getCurrCycle()707 unsigned getCurrCycle() const { return CurrCycle; } 708 709 /// Micro-ops issued in the current cycle getCurrMOps()710 unsigned getCurrMOps() const { return CurrMOps; } 711 712 // The latency of dependence chains leading into this zone. getDependentLatency()713 unsigned getDependentLatency() const { return DependentLatency; } 714 715 /// Get the number of latency cycles "covered" by the scheduled 716 /// instructions. This is the larger of the critical path within the zone 717 /// and the number of cycles required to issue the instructions. getScheduledLatency()718 unsigned getScheduledLatency() const { 719 return std::max(ExpectedLatency, CurrCycle); 720 } 721 getUnscheduledLatency(SUnit * SU)722 unsigned getUnscheduledLatency(SUnit *SU) const { 723 return isTop() ? SU->getHeight() : SU->getDepth(); 724 } 725 getResourceCount(unsigned ResIdx)726 unsigned getResourceCount(unsigned ResIdx) const { 727 return ExecutedResCounts[ResIdx]; 728 } 729 730 /// Get the scaled count of scheduled micro-ops and resources, including 731 /// executed resources. getCriticalCount()732 unsigned getCriticalCount() const { 733 if (!ZoneCritResIdx) 734 return RetiredMOps * SchedModel->getMicroOpFactor(); 735 return getResourceCount(ZoneCritResIdx); 736 } 737 738 /// Get a scaled count for the minimum execution time of the scheduled 739 /// micro-ops that are ready to execute by getExecutedCount. Notice the 740 /// feedback loop. getExecutedCount()741 unsigned getExecutedCount() const { 742 return std::max(CurrCycle * SchedModel->getLatencyFactor(), 743 MaxExecutedResCount); 744 } 745 getZoneCritResIdx()746 unsigned getZoneCritResIdx() const { return ZoneCritResIdx; } 747 748 // Is the scheduled region resource limited vs. latency limited. isResourceLimited()749 bool isResourceLimited() const { return IsResourceLimited; } 750 751 /// Get the difference between the given SUnit's ready time and the current 752 /// cycle. 753 unsigned getLatencyStallCycles(SUnit *SU); 754 755 unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, 756 unsigned Cycles); 757 758 std::pair<unsigned, unsigned> getNextResourceCycle(const MCSchedClassDesc *SC, 759 unsigned PIdx, 760 unsigned Cycles); 761 isUnbufferedGroup(unsigned PIdx)762 bool isUnbufferedGroup(unsigned PIdx) const { 763 return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin && 764 !SchedModel->getProcResource(PIdx)->BufferSize; 765 } 766 767 bool checkHazard(SUnit *SU); 768 769 unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs); 770 771 unsigned getOtherResourceCount(unsigned &OtherCritIdx); 772 773 /// Release SU to make it ready. If it's not in hazard, remove it from 774 /// pending queue (if already in) and push into available queue. 775 /// Otherwise, push the SU into pending queue. 776 /// 777 /// @param SU The unit to be released. 778 /// @param ReadyCycle Until which cycle the unit is ready. 779 /// @param InPQueue Whether SU is already in pending queue. 780 /// @param Idx Position offset in pending queue (if in it). 781 void releaseNode(SUnit *SU, unsigned ReadyCycle, bool InPQueue, 782 unsigned Idx = 0); 783 784 void bumpCycle(unsigned NextCycle); 785 786 void incExecutedResources(unsigned PIdx, unsigned Count); 787 788 unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, 789 unsigned Cycles, unsigned ReadyCycle); 790 791 void bumpNode(SUnit *SU); 792 793 void releasePending(); 794 795 void removeReady(SUnit *SU); 796 797 /// Call this before applying any other heuristics to the Available queue. 798 /// Updates the Available/Pending Q's if necessary and returns the single 799 /// available instruction, or NULL if there are multiple candidates. 800 SUnit *pickOnlyChoice(); 801 802 void dumpScheduledState() const; 803 }; 804 805 /// Base class for GenericScheduler. This class maintains information about 806 /// scheduling candidates based on TargetSchedModel making it easy to implement 807 /// heuristics for either preRA or postRA scheduling. 808 class GenericSchedulerBase : public MachineSchedStrategy { 809 public: 810 /// Represent the type of SchedCandidate found within a single queue. 811 /// pickNodeBidirectional depends on these listed by decreasing priority. 812 enum CandReason : uint8_t { 813 NoCand, Only1, PhysReg, RegExcess, RegCritical, Stall, Cluster, Weak, 814 RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, 815 TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; 816 817 #ifndef NDEBUG 818 static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); 819 #endif 820 821 /// Policy for scheduling the next instruction in the candidate's zone. 822 struct CandPolicy { 823 bool ReduceLatency = false; 824 unsigned ReduceResIdx = 0; 825 unsigned DemandResIdx = 0; 826 827 CandPolicy() = default; 828 829 bool operator==(const CandPolicy &RHS) const { 830 return ReduceLatency == RHS.ReduceLatency && 831 ReduceResIdx == RHS.ReduceResIdx && 832 DemandResIdx == RHS.DemandResIdx; 833 } 834 bool operator!=(const CandPolicy &RHS) const { 835 return !(*this == RHS); 836 } 837 }; 838 839 /// Status of an instruction's critical resource consumption. 840 struct SchedResourceDelta { 841 // Count critical resources in the scheduled region required by SU. 842 unsigned CritResources = 0; 843 844 // Count critical resources from another region consumed by SU. 845 unsigned DemandedResources = 0; 846 847 SchedResourceDelta() = default; 848 849 bool operator==(const SchedResourceDelta &RHS) const { 850 return CritResources == RHS.CritResources 851 && DemandedResources == RHS.DemandedResources; 852 } 853 bool operator!=(const SchedResourceDelta &RHS) const { 854 return !operator==(RHS); 855 } 856 }; 857 858 /// Store the state used by GenericScheduler heuristics, required for the 859 /// lifetime of one invocation of pickNode(). 860 struct SchedCandidate { 861 CandPolicy Policy; 862 863 // The best SUnit candidate. 864 SUnit *SU; 865 866 // The reason for this candidate. 867 CandReason Reason; 868 869 // Whether this candidate should be scheduled at top/bottom. 870 bool AtTop; 871 872 // Register pressure values for the best candidate. 873 RegPressureDelta RPDelta; 874 875 // Critical resource consumption of the best candidate. 876 SchedResourceDelta ResDelta; 877 SchedCandidateSchedCandidate878 SchedCandidate() { reset(CandPolicy()); } SchedCandidateSchedCandidate879 SchedCandidate(const CandPolicy &Policy) { reset(Policy); } 880 resetSchedCandidate881 void reset(const CandPolicy &NewPolicy) { 882 Policy = NewPolicy; 883 SU = nullptr; 884 Reason = NoCand; 885 AtTop = false; 886 RPDelta = RegPressureDelta(); 887 ResDelta = SchedResourceDelta(); 888 } 889 isValidSchedCandidate890 bool isValid() const { return SU; } 891 892 // Copy the status of another candidate without changing policy. setBestSchedCandidate893 void setBest(SchedCandidate &Best) { 894 assert(Best.Reason != NoCand && "uninitialized Sched candidate"); 895 SU = Best.SU; 896 Reason = Best.Reason; 897 AtTop = Best.AtTop; 898 RPDelta = Best.RPDelta; 899 ResDelta = Best.ResDelta; 900 } 901 902 void initResourceDelta(const ScheduleDAGMI *DAG, 903 const TargetSchedModel *SchedModel); 904 }; 905 906 protected: 907 const MachineSchedContext *Context; 908 const TargetSchedModel *SchedModel = nullptr; 909 const TargetRegisterInfo *TRI = nullptr; 910 911 SchedRemainder Rem; 912 GenericSchedulerBase(const MachineSchedContext * C)913 GenericSchedulerBase(const MachineSchedContext *C) : Context(C) {} 914 915 void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, 916 SchedBoundary *OtherZone); 917 918 #ifndef NDEBUG 919 void traceCandidate(const SchedCandidate &Cand); 920 #endif 921 922 private: 923 bool shouldReduceLatency(const CandPolicy &Policy, SchedBoundary &CurrZone, 924 bool ComputeRemLatency, unsigned &RemLatency) const; 925 }; 926 927 // Utility functions used by heuristics in tryCandidate(). 928 bool tryLess(int TryVal, int CandVal, 929 GenericSchedulerBase::SchedCandidate &TryCand, 930 GenericSchedulerBase::SchedCandidate &Cand, 931 GenericSchedulerBase::CandReason Reason); 932 bool tryGreater(int TryVal, int CandVal, 933 GenericSchedulerBase::SchedCandidate &TryCand, 934 GenericSchedulerBase::SchedCandidate &Cand, 935 GenericSchedulerBase::CandReason Reason); 936 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, 937 GenericSchedulerBase::SchedCandidate &Cand, 938 SchedBoundary &Zone); 939 bool tryPressure(const PressureChange &TryP, 940 const PressureChange &CandP, 941 GenericSchedulerBase::SchedCandidate &TryCand, 942 GenericSchedulerBase::SchedCandidate &Cand, 943 GenericSchedulerBase::CandReason Reason, 944 const TargetRegisterInfo *TRI, 945 const MachineFunction &MF); 946 unsigned getWeakLeft(const SUnit *SU, bool isTop); 947 int biasPhysReg(const SUnit *SU, bool isTop); 948 949 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance 950 /// the schedule. 951 class GenericScheduler : public GenericSchedulerBase { 952 public: GenericScheduler(const MachineSchedContext * C)953 GenericScheduler(const MachineSchedContext *C): 954 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"), 955 Bot(SchedBoundary::BotQID, "BotQ") {} 956 957 void initPolicy(MachineBasicBlock::iterator Begin, 958 MachineBasicBlock::iterator End, 959 unsigned NumRegionInstrs) override; 960 961 void dumpPolicy() const override; 962 shouldTrackPressure()963 bool shouldTrackPressure() const override { 964 return RegionPolicy.ShouldTrackPressure; 965 } 966 shouldTrackLaneMasks()967 bool shouldTrackLaneMasks() const override { 968 return RegionPolicy.ShouldTrackLaneMasks; 969 } 970 971 void initialize(ScheduleDAGMI *dag) override; 972 973 SUnit *pickNode(bool &IsTopNode) override; 974 975 void schedNode(SUnit *SU, bool IsTopNode) override; 976 releaseTopNode(SUnit * SU)977 void releaseTopNode(SUnit *SU) override { 978 if (SU->isScheduled) 979 return; 980 981 Top.releaseNode(SU, SU->TopReadyCycle, false); 982 TopCand.SU = nullptr; 983 } 984 releaseBottomNode(SUnit * SU)985 void releaseBottomNode(SUnit *SU) override { 986 if (SU->isScheduled) 987 return; 988 989 Bot.releaseNode(SU, SU->BotReadyCycle, false); 990 BotCand.SU = nullptr; 991 } 992 993 void registerRoots() override; 994 995 protected: 996 ScheduleDAGMILive *DAG = nullptr; 997 998 MachineSchedPolicy RegionPolicy; 999 1000 // State of the top and bottom scheduled instruction boundaries. 1001 SchedBoundary Top; 1002 SchedBoundary Bot; 1003 1004 /// Candidate last picked from Top boundary. 1005 SchedCandidate TopCand; 1006 /// Candidate last picked from Bot boundary. 1007 SchedCandidate BotCand; 1008 1009 void checkAcyclicLatency(); 1010 1011 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, 1012 const RegPressureTracker &RPTracker, 1013 RegPressureTracker &TempTracker); 1014 1015 virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, 1016 SchedBoundary *Zone) const; 1017 1018 SUnit *pickNodeBidirectional(bool &IsTopNode); 1019 1020 void pickNodeFromQueue(SchedBoundary &Zone, 1021 const CandPolicy &ZonePolicy, 1022 const RegPressureTracker &RPTracker, 1023 SchedCandidate &Candidate); 1024 1025 void reschedulePhysReg(SUnit *SU, bool isTop); 1026 }; 1027 1028 /// PostGenericScheduler - Interface to the scheduling algorithm used by 1029 /// ScheduleDAGMI. 1030 /// 1031 /// Callbacks from ScheduleDAGMI: 1032 /// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... 1033 class PostGenericScheduler : public GenericSchedulerBase { 1034 protected: 1035 ScheduleDAGMI *DAG = nullptr; 1036 SchedBoundary Top; 1037 SmallVector<SUnit*, 8> BotRoots; 1038 1039 public: PostGenericScheduler(const MachineSchedContext * C)1040 PostGenericScheduler(const MachineSchedContext *C): 1041 GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} 1042 1043 ~PostGenericScheduler() override = default; 1044 initPolicy(MachineBasicBlock::iterator Begin,MachineBasicBlock::iterator End,unsigned NumRegionInstrs)1045 void initPolicy(MachineBasicBlock::iterator Begin, 1046 MachineBasicBlock::iterator End, 1047 unsigned NumRegionInstrs) override { 1048 /* no configurable policy */ 1049 } 1050 1051 /// PostRA scheduling does not track pressure. shouldTrackPressure()1052 bool shouldTrackPressure() const override { return false; } 1053 1054 void initialize(ScheduleDAGMI *Dag) override; 1055 1056 void registerRoots() override; 1057 1058 SUnit *pickNode(bool &IsTopNode) override; 1059 scheduleTree(unsigned SubtreeID)1060 void scheduleTree(unsigned SubtreeID) override { 1061 llvm_unreachable("PostRA scheduler does not support subtree analysis."); 1062 } 1063 1064 void schedNode(SUnit *SU, bool IsTopNode) override; 1065 releaseTopNode(SUnit * SU)1066 void releaseTopNode(SUnit *SU) override { 1067 if (SU->isScheduled) 1068 return; 1069 Top.releaseNode(SU, SU->TopReadyCycle, false); 1070 } 1071 1072 // Only called for roots. releaseBottomNode(SUnit * SU)1073 void releaseBottomNode(SUnit *SU) override { 1074 BotRoots.push_back(SU); 1075 } 1076 1077 protected: 1078 virtual void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); 1079 1080 void pickNodeFromQueue(SchedCandidate &Cand); 1081 }; 1082 1083 /// Create the standard converging machine scheduler. This will be used as the 1084 /// default scheduler if the target does not set a default. 1085 /// Adds default DAG mutations. 1086 ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); 1087 1088 /// Create a generic scheduler with no vreg liveness or DAG mutation passes. 1089 ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); 1090 1091 std::unique_ptr<ScheduleDAGMutation> 1092 createLoadClusterDAGMutation(const TargetInstrInfo *TII, 1093 const TargetRegisterInfo *TRI); 1094 1095 std::unique_ptr<ScheduleDAGMutation> 1096 createStoreClusterDAGMutation(const TargetInstrInfo *TII, 1097 const TargetRegisterInfo *TRI); 1098 1099 std::unique_ptr<ScheduleDAGMutation> 1100 createCopyConstrainDAGMutation(const TargetInstrInfo *TII, 1101 const TargetRegisterInfo *TRI); 1102 1103 } // end namespace llvm 1104 1105 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H 1106