1 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. 10 // 11 // Software pipelining (SWP) is an instruction scheduling technique for loops 12 // that overlap loop iterations and exploits ILP via a compiler transformation. 13 // 14 // Swing Modulo Scheduling is an implementation of software pipelining 15 // that generates schedules that are near optimal in terms of initiation 16 // interval, register requirements, and stage count. See the papers: 17 // 18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, 19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 20 // Conference on Parallel Architectures and Compilation Techiniques. 21 // 22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. 23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE 24 // Transactions on Computers, Vol. 50, No. 3, 2001. 25 // 26 // "An Implementation of Swing Modulo Scheduling With Extensions for 27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at 28 // Urbana-Champaign, 2005. 29 // 30 // 31 // The SMS algorithm consists of three main steps after computing the minimal 32 // initiation interval (MII). 33 // 1) Analyze the dependence graph and compute information about each 34 // instruction in the graph. 35 // 2) Order the nodes (instructions) by priority based upon the heuristics 36 // described in the algorithm. 37 // 3) Attempt to schedule the nodes in the specified order using the MII. 38 // 39 //===----------------------------------------------------------------------===// 40 #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H 41 #define LLVM_CODEGEN_MACHINEPIPELINER_H 42 43 #include "llvm/ADT/SetVector.h" 44 #include "llvm/CodeGen/DFAPacketizer.h" 45 #include "llvm/CodeGen/MachineDominators.h" 46 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 47 #include "llvm/CodeGen/RegisterClassInfo.h" 48 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 49 #include "llvm/CodeGen/ScheduleDAGMutation.h" 50 #include "llvm/CodeGen/TargetInstrInfo.h" 51 #include "llvm/InitializePasses.h" 52 53 #include <deque> 54 55 namespace llvm { 56 57 class AAResults; 58 class NodeSet; 59 class SMSchedule; 60 61 extern cl::opt<bool> SwpEnableCopyToPhi; 62 extern cl::opt<int> SwpForceIssueWidth; 63 64 /// The main class in the implementation of the target independent 65 /// software pipeliner pass. 66 class MachinePipeliner : public MachineFunctionPass { 67 public: 68 MachineFunction *MF = nullptr; 69 MachineOptimizationRemarkEmitter *ORE = nullptr; 70 const MachineLoopInfo *MLI = nullptr; 71 const MachineDominatorTree *MDT = nullptr; 72 const InstrItineraryData *InstrItins = nullptr; 73 const TargetInstrInfo *TII = nullptr; 74 RegisterClassInfo RegClassInfo; 75 bool disabledByPragma = false; 76 unsigned II_setByPragma = 0; 77 78 #ifndef NDEBUG 79 static int NumTries; 80 #endif 81 82 /// Cache the target analysis information about the loop. 83 struct LoopInfo { 84 MachineBasicBlock *TBB = nullptr; 85 MachineBasicBlock *FBB = nullptr; 86 SmallVector<MachineOperand, 4> BrCond; 87 MachineInstr *LoopInductionVar = nullptr; 88 MachineInstr *LoopCompare = nullptr; 89 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo = 90 nullptr; 91 }; 92 LoopInfo LI; 93 94 static char ID; 95 MachinePipeliner()96 MachinePipeliner() : MachineFunctionPass(ID) { 97 initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); 98 } 99 100 bool runOnMachineFunction(MachineFunction &MF) override; 101 102 void getAnalysisUsage(AnalysisUsage &AU) const override; 103 104 private: 105 void preprocessPhiNodes(MachineBasicBlock &B); 106 bool canPipelineLoop(MachineLoop &L); 107 bool scheduleLoop(MachineLoop &L); 108 bool swingModuloScheduler(MachineLoop &L); 109 void setPragmaPipelineOptions(MachineLoop &L); 110 }; 111 112 /// This class builds the dependence graph for the instructions in a loop, 113 /// and attempts to schedule the instructions using the SMS algorithm. 114 class SwingSchedulerDAG : public ScheduleDAGInstrs { 115 MachinePipeliner &Pass; 116 /// The minimum initiation interval between iterations for this schedule. 117 unsigned MII = 0; 118 /// The maximum initiation interval between iterations for this schedule. 119 unsigned MAX_II = 0; 120 /// Set to true if a valid pipelined schedule is found for the loop. 121 bool Scheduled = false; 122 MachineLoop &Loop; 123 LiveIntervals &LIS; 124 const RegisterClassInfo &RegClassInfo; 125 unsigned II_setByPragma = 0; 126 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr; 127 128 /// A toplogical ordering of the SUnits, which is needed for changing 129 /// dependences and iterating over the SUnits. 130 ScheduleDAGTopologicalSort Topo; 131 132 struct NodeInfo { 133 int ASAP = 0; 134 int ALAP = 0; 135 int ZeroLatencyDepth = 0; 136 int ZeroLatencyHeight = 0; 137 138 NodeInfo() = default; 139 }; 140 /// Computed properties for each node in the graph. 141 std::vector<NodeInfo> ScheduleInfo; 142 143 enum OrderKind { BottomUp = 0, TopDown = 1 }; 144 /// Computed node ordering for scheduling. 145 SetVector<SUnit *> NodeOrder; 146 147 using NodeSetType = SmallVector<NodeSet, 8>; 148 using ValueMapTy = DenseMap<unsigned, unsigned>; 149 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; 150 using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; 151 152 /// Instructions to change when emitting the final schedule. 153 DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; 154 155 /// We may create a new instruction, so remember it because it 156 /// must be deleted when the pass is finished. 157 DenseMap<MachineInstr*, MachineInstr *> NewMIs; 158 159 /// Ordered list of DAG postprocessing steps. 160 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 161 162 /// Helper class to implement Johnson's circuit finding algorithm. 163 class Circuits { 164 std::vector<SUnit> &SUnits; 165 SetVector<SUnit *> Stack; 166 BitVector Blocked; 167 SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; 168 SmallVector<SmallVector<int, 4>, 16> AdjK; 169 // Node to Index from ScheduleDAGTopologicalSort 170 std::vector<int> *Node2Idx; 171 unsigned NumPaths = 0u; 172 static unsigned MaxPaths; 173 174 public: Circuits(std::vector<SUnit> & SUs,ScheduleDAGTopologicalSort & Topo)175 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo) 176 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) { 177 Node2Idx = new std::vector<int>(SUs.size()); 178 unsigned Idx = 0; 179 for (const auto &NodeNum : Topo) 180 Node2Idx->at(NodeNum) = Idx++; 181 } 182 Circuits &operator=(const Circuits &other) = delete; 183 Circuits(const Circuits &other) = delete; ~Circuits()184 ~Circuits() { delete Node2Idx; } 185 186 /// Reset the data structures used in the circuit algorithm. reset()187 void reset() { 188 Stack.clear(); 189 Blocked.reset(); 190 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); 191 NumPaths = 0; 192 } 193 194 void createAdjacencyStructure(SwingSchedulerDAG *DAG); 195 bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); 196 void unblock(int U); 197 }; 198 199 struct CopyToPhiMutation : public ScheduleDAGMutation { 200 void apply(ScheduleDAGInstrs *DAG) override; 201 }; 202 203 public: SwingSchedulerDAG(MachinePipeliner & P,MachineLoop & L,LiveIntervals & lis,const RegisterClassInfo & rci,unsigned II,TargetInstrInfo::PipelinerLoopInfo * PLI)204 SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, 205 const RegisterClassInfo &rci, unsigned II, 206 TargetInstrInfo::PipelinerLoopInfo *PLI) 207 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), 208 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI), 209 Topo(SUnits, &ExitSU) { 210 P.MF->getSubtarget().getSMSMutations(Mutations); 211 if (SwpEnableCopyToPhi) 212 Mutations.push_back(std::make_unique<CopyToPhiMutation>()); 213 } 214 215 void schedule() override; 216 void finishBlock() override; 217 218 /// Return true if the loop kernel has been scheduled. hasNewSchedule()219 bool hasNewSchedule() { return Scheduled; } 220 221 /// Return the earliest time an instruction may be scheduled. getASAP(SUnit * Node)222 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } 223 224 /// Return the latest time an instruction my be scheduled. getALAP(SUnit * Node)225 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } 226 227 /// The mobility function, which the number of slots in which 228 /// an instruction may be scheduled. getMOV(SUnit * Node)229 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } 230 231 /// The depth, in the dependence graph, for a node. getDepth(SUnit * Node)232 unsigned getDepth(SUnit *Node) { return Node->getDepth(); } 233 234 /// The maximum unweighted length of a path from an arbitrary node to the 235 /// given node in which each edge has latency 0 getZeroLatencyDepth(SUnit * Node)236 int getZeroLatencyDepth(SUnit *Node) { 237 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; 238 } 239 240 /// The height, in the dependence graph, for a node. getHeight(SUnit * Node)241 unsigned getHeight(SUnit *Node) { return Node->getHeight(); } 242 243 /// The maximum unweighted length of a path from the given node to an 244 /// arbitrary node in which each edge has latency 0 getZeroLatencyHeight(SUnit * Node)245 int getZeroLatencyHeight(SUnit *Node) { 246 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; 247 } 248 249 /// Return true if the dependence is a back-edge in the data dependence graph. 250 /// Since the DAG doesn't contain cycles, we represent a cycle in the graph 251 /// using an anti dependence from a Phi to an instruction. isBackedge(SUnit * Source,const SDep & Dep)252 bool isBackedge(SUnit *Source, const SDep &Dep) { 253 if (Dep.getKind() != SDep::Anti) 254 return false; 255 return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); 256 } 257 258 bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true); 259 260 /// The distance function, which indicates that operation V of iteration I 261 /// depends on operations U of iteration I-distance. getDistance(SUnit * U,SUnit * V,const SDep & Dep)262 unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { 263 // Instructions that feed a Phi have a distance of 1. Computing larger 264 // values for arrays requires data dependence information. 265 if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) 266 return 1; 267 return 0; 268 } 269 270 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); 271 272 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); 273 274 /// Return the new base register that was stored away for the changed 275 /// instruction. getInstrBaseReg(SUnit * SU)276 unsigned getInstrBaseReg(SUnit *SU) const { 277 DenseMap<SUnit *, std::pair<unsigned, int64_t>>::const_iterator It = 278 InstrChanges.find(SU); 279 if (It != InstrChanges.end()) 280 return It->second.first; 281 return 0; 282 } 283 addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)284 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 285 Mutations.push_back(std::move(Mutation)); 286 } 287 classof(const ScheduleDAGInstrs * DAG)288 static bool classof(const ScheduleDAGInstrs *DAG) { return true; } 289 290 private: 291 void addLoopCarriedDependences(AAResults *AA); 292 void updatePhiDependences(); 293 void changeDependences(); 294 unsigned calculateResMII(); 295 unsigned calculateRecMII(NodeSetType &RecNodeSets); 296 void findCircuits(NodeSetType &NodeSets); 297 void fuseRecs(NodeSetType &NodeSets); 298 void removeDuplicateNodes(NodeSetType &NodeSets); 299 void computeNodeFunctions(NodeSetType &NodeSets); 300 void registerPressureFilter(NodeSetType &NodeSets); 301 void colocateNodeSets(NodeSetType &NodeSets); 302 void checkNodeSets(NodeSetType &NodeSets); 303 void groupRemainingNodes(NodeSetType &NodeSets); 304 void addConnectedNodes(SUnit *SU, NodeSet &NewSet, 305 SetVector<SUnit *> &NodesAdded); 306 void computeNodeOrder(NodeSetType &NodeSets); 307 void checkValidNodeOrder(const NodeSetType &Circuits) const; 308 bool schedulePipeline(SMSchedule &Schedule); 309 bool computeDelta(MachineInstr &MI, unsigned &Delta); 310 MachineInstr *findDefInLoop(Register Reg); 311 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, 312 unsigned &OffsetPos, unsigned &NewBase, 313 int64_t &NewOffset); 314 void postProcessDAG(); 315 /// Set the Minimum Initiation Interval for this schedule attempt. 316 void setMII(unsigned ResMII, unsigned RecMII); 317 /// Set the Maximum Initiation Interval for this schedule attempt. 318 void setMAX_II(); 319 }; 320 321 /// A NodeSet contains a set of SUnit DAG nodes with additional information 322 /// that assigns a priority to the set. 323 class NodeSet { 324 SetVector<SUnit *> Nodes; 325 bool HasRecurrence = false; 326 unsigned RecMII = 0; 327 int MaxMOV = 0; 328 unsigned MaxDepth = 0; 329 unsigned Colocate = 0; 330 SUnit *ExceedPressure = nullptr; 331 unsigned Latency = 0; 332 333 public: 334 using iterator = SetVector<SUnit *>::const_iterator; 335 336 NodeSet() = default; NodeSet(iterator S,iterator E)337 NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { 338 Latency = 0; 339 for (const SUnit *Node : Nodes) { 340 DenseMap<SUnit *, unsigned> SuccSUnitLatency; 341 for (const SDep &Succ : Node->Succs) { 342 auto SuccSUnit = Succ.getSUnit(); 343 if (!Nodes.count(SuccSUnit)) 344 continue; 345 unsigned CurLatency = Succ.getLatency(); 346 unsigned MaxLatency = 0; 347 if (SuccSUnitLatency.count(SuccSUnit)) 348 MaxLatency = SuccSUnitLatency[SuccSUnit]; 349 if (CurLatency > MaxLatency) 350 SuccSUnitLatency[SuccSUnit] = CurLatency; 351 } 352 for (auto SUnitLatency : SuccSUnitLatency) 353 Latency += SUnitLatency.second; 354 } 355 } 356 insert(SUnit * SU)357 bool insert(SUnit *SU) { return Nodes.insert(SU); } 358 insert(iterator S,iterator E)359 void insert(iterator S, iterator E) { Nodes.insert(S, E); } 360 remove_if(UnaryPredicate P)361 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { 362 return Nodes.remove_if(P); 363 } 364 count(SUnit * SU)365 unsigned count(SUnit *SU) const { return Nodes.count(SU); } 366 hasRecurrence()367 bool hasRecurrence() { return HasRecurrence; }; 368 size()369 unsigned size() const { return Nodes.size(); } 370 empty()371 bool empty() const { return Nodes.empty(); } 372 getNode(unsigned i)373 SUnit *getNode(unsigned i) const { return Nodes[i]; }; 374 setRecMII(unsigned mii)375 void setRecMII(unsigned mii) { RecMII = mii; }; 376 setColocate(unsigned c)377 void setColocate(unsigned c) { Colocate = c; }; 378 setExceedPressure(SUnit * SU)379 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } 380 isExceedSU(SUnit * SU)381 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } 382 compareRecMII(NodeSet & RHS)383 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } 384 getRecMII()385 int getRecMII() { return RecMII; } 386 387 /// Summarize node functions for the entire node set. computeNodeSetInfo(SwingSchedulerDAG * SSD)388 void computeNodeSetInfo(SwingSchedulerDAG *SSD) { 389 for (SUnit *SU : *this) { 390 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU)); 391 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU)); 392 } 393 } 394 getLatency()395 unsigned getLatency() { return Latency; } 396 getMaxDepth()397 unsigned getMaxDepth() { return MaxDepth; } 398 clear()399 void clear() { 400 Nodes.clear(); 401 RecMII = 0; 402 HasRecurrence = false; 403 MaxMOV = 0; 404 MaxDepth = 0; 405 Colocate = 0; 406 ExceedPressure = nullptr; 407 } 408 409 operator SetVector<SUnit *> &() { return Nodes; } 410 411 /// Sort the node sets by importance. First, rank them by recurrence MII, 412 /// then by mobility (least mobile done first), and finally by depth. 413 /// Each node set may contain a colocate value which is used as the first 414 /// tie breaker, if it's set. 415 bool operator>(const NodeSet &RHS) const { 416 if (RecMII == RHS.RecMII) { 417 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) 418 return Colocate < RHS.Colocate; 419 if (MaxMOV == RHS.MaxMOV) 420 return MaxDepth > RHS.MaxDepth; 421 return MaxMOV < RHS.MaxMOV; 422 } 423 return RecMII > RHS.RecMII; 424 } 425 426 bool operator==(const NodeSet &RHS) const { 427 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && 428 MaxDepth == RHS.MaxDepth; 429 } 430 431 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } 432 begin()433 iterator begin() { return Nodes.begin(); } end()434 iterator end() { return Nodes.end(); } 435 void print(raw_ostream &os) const; 436 437 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 438 LLVM_DUMP_METHOD void dump() const; 439 #endif 440 }; 441 442 // 16 was selected based on the number of ProcResource kinds for all 443 // existing Subtargets, so that SmallVector don't need to resize too often. 444 static const int DefaultProcResSize = 16; 445 446 class ResourceManager { 447 private: 448 const MCSubtargetInfo *STI; 449 const MCSchedModel &SM; 450 const TargetSubtargetInfo *ST; 451 const TargetInstrInfo *TII; 452 SwingSchedulerDAG *DAG; 453 const bool UseDFA; 454 /// DFA resources for each slot 455 llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources; 456 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle 457 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F) 458 llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT; 459 /// The number of scheduled micro operations for each slot. Micro operations 460 /// are assumed to be scheduled one per cycle, starting with the cycle in 461 /// which the instruction is scheduled. 462 llvm::SmallVector<int> NumScheduledMops; 463 /// Each processor resource is associated with a so-called processor resource 464 /// mask. This vector allows to correlate processor resource IDs with 465 /// processor resource masks. There is exactly one element per each processor 466 /// resource declared by the scheduling model. 467 llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks; 468 int InitiationInterval = 0; 469 /// The number of micro operations that can be scheduled at a cycle. 470 int IssueWidth; 471 472 int calculateResMIIDFA() const; 473 /// Check if MRT is overbooked 474 bool isOverbooked() const; 475 /// Reserve resources on MRT 476 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 477 /// Unreserve resources on MRT 478 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 479 480 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor. 481 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C, 482 /// II). positiveModulo(int Dividend,int Divisor)483 int positiveModulo(int Dividend, int Divisor) const { 484 assert(Divisor > 0); 485 int R = Dividend % Divisor; 486 if (R < 0) 487 R += Divisor; 488 return R; 489 } 490 491 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 492 LLVM_DUMP_METHOD void dumpMRT() const; 493 #endif 494 495 public: ResourceManager(const TargetSubtargetInfo * ST,SwingSchedulerDAG * DAG)496 ResourceManager(const TargetSubtargetInfo *ST, SwingSchedulerDAG *DAG) 497 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()), 498 DAG(DAG), UseDFA(ST->useDFAforSMS()), 499 ProcResourceMasks(SM.getNumProcResourceKinds(), 0), 500 IssueWidth(SM.IssueWidth) { 501 initProcResourceVectors(SM, ProcResourceMasks); 502 if (IssueWidth <= 0) 503 // If IssueWidth is not specified, set a sufficiently large value 504 IssueWidth = 100; 505 if (SwpForceIssueWidth > 0) 506 IssueWidth = SwpForceIssueWidth; 507 } 508 509 void initProcResourceVectors(const MCSchedModel &SM, 510 SmallVectorImpl<uint64_t> &Masks); 511 512 /// Check if the resources occupied by a machine instruction are available 513 /// in the current state. 514 bool canReserveResources(SUnit &SU, int Cycle); 515 516 /// Reserve the resources occupied by a machine instruction and change the 517 /// current state to reflect that change. 518 void reserveResources(SUnit &SU, int Cycle); 519 520 int calculateResMII() const; 521 522 /// Initialize resources with the initiation interval II. 523 void init(int II); 524 }; 525 526 /// This class represents the scheduled code. The main data structure is a 527 /// map from scheduled cycle to instructions. During scheduling, the 528 /// data structure explicitly represents all stages/iterations. When 529 /// the algorithm finshes, the schedule is collapsed into a single stage, 530 /// which represents instructions from different loop iterations. 531 /// 532 /// The SMS algorithm allows negative values for cycles, so the first cycle 533 /// in the schedule is the smallest cycle value. 534 class SMSchedule { 535 private: 536 /// Map from execution cycle to instructions. 537 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; 538 539 /// Map from instruction to execution cycle. 540 std::map<SUnit *, int> InstrToCycle; 541 542 /// Keep track of the first cycle value in the schedule. It starts 543 /// as zero, but the algorithm allows negative values. 544 int FirstCycle = 0; 545 546 /// Keep track of the last cycle value in the schedule. 547 int LastCycle = 0; 548 549 /// The initiation interval (II) for the schedule. 550 int InitiationInterval = 0; 551 552 /// Target machine information. 553 const TargetSubtargetInfo &ST; 554 555 /// Virtual register information. 556 MachineRegisterInfo &MRI; 557 558 ResourceManager ProcItinResources; 559 560 public: SMSchedule(MachineFunction * mf,SwingSchedulerDAG * DAG)561 SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG) 562 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), 563 ProcItinResources(&ST, DAG) {} 564 reset()565 void reset() { 566 ScheduledInstrs.clear(); 567 InstrToCycle.clear(); 568 FirstCycle = 0; 569 LastCycle = 0; 570 InitiationInterval = 0; 571 } 572 573 /// Set the initiation interval for this schedule. setInitiationInterval(int ii)574 void setInitiationInterval(int ii) { 575 InitiationInterval = ii; 576 ProcItinResources.init(ii); 577 } 578 579 /// Return the initiation interval for this schedule. getInitiationInterval()580 int getInitiationInterval() const { return InitiationInterval; } 581 582 /// Return the first cycle in the completed schedule. This 583 /// can be a negative value. getFirstCycle()584 int getFirstCycle() const { return FirstCycle; } 585 586 /// Return the last cycle in the finalized schedule. getFinalCycle()587 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } 588 589 /// Return the cycle of the earliest scheduled instruction in the dependence 590 /// chain. 591 int earliestCycleInChain(const SDep &Dep); 592 593 /// Return the cycle of the latest scheduled instruction in the dependence 594 /// chain. 595 int latestCycleInChain(const SDep &Dep); 596 597 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, 598 int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG); 599 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); 600 601 /// Iterators for the cycle to instruction map. 602 using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; 603 using const_sched_iterator = 604 DenseMap<int, std::deque<SUnit *>>::const_iterator; 605 606 /// Return true if the instruction is scheduled at the specified stage. isScheduledAtStage(SUnit * SU,unsigned StageNum)607 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { 608 return (stageScheduled(SU) == (int)StageNum); 609 } 610 611 /// Return the stage for a scheduled instruction. Return -1 if 612 /// the instruction has not been scheduled. stageScheduled(SUnit * SU)613 int stageScheduled(SUnit *SU) const { 614 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 615 if (it == InstrToCycle.end()) 616 return -1; 617 return (it->second - FirstCycle) / InitiationInterval; 618 } 619 620 /// Return the cycle for a scheduled instruction. This function normalizes 621 /// the first cycle to be 0. cycleScheduled(SUnit * SU)622 unsigned cycleScheduled(SUnit *SU) const { 623 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 624 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled."); 625 return (it->second - FirstCycle) % InitiationInterval; 626 } 627 628 /// Return the maximum stage count needed for this schedule. getMaxStageCount()629 unsigned getMaxStageCount() { 630 return (LastCycle - FirstCycle) / InitiationInterval; 631 } 632 633 /// Return the instructions that are scheduled at the specified cycle. getInstructions(int cycle)634 std::deque<SUnit *> &getInstructions(int cycle) { 635 return ScheduledInstrs[cycle]; 636 } 637 638 SmallSet<SUnit *, 8> 639 computeUnpipelineableNodes(SwingSchedulerDAG *SSD, 640 TargetInstrInfo::PipelinerLoopInfo *PLI); 641 642 std::deque<SUnit *> 643 reorderInstructions(const SwingSchedulerDAG *SSD, 644 const std::deque<SUnit *> &Instrs) const; 645 646 bool 647 normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, 648 TargetInstrInfo::PipelinerLoopInfo *PLI); 649 bool isValidSchedule(SwingSchedulerDAG *SSD); 650 void finalizeSchedule(SwingSchedulerDAG *SSD); 651 void orderDependence(const SwingSchedulerDAG *SSD, SUnit *SU, 652 std::deque<SUnit *> &Insts) const; 653 bool isLoopCarried(const SwingSchedulerDAG *SSD, MachineInstr &Phi) const; 654 bool isLoopCarriedDefOfUse(const SwingSchedulerDAG *SSD, MachineInstr *Def, 655 MachineOperand &MO) const; 656 void print(raw_ostream &os) const; 657 void dump() const; 658 }; 659 660 } // end namespace llvm 661 662 #endif // LLVM_CODEGEN_MACHINEPIPELINER_H 663