1 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. 10 // 11 // Software pipelining (SWP) is an instruction scheduling technique for loops 12 // that overlap loop iterations and exploits ILP via a compiler transformation. 13 // 14 // Swing Modulo Scheduling is an implementation of software pipelining 15 // that generates schedules that are near optimal in terms of initiation 16 // interval, register requirements, and stage count. See the papers: 17 // 18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, 19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 20 // Conference on Parallel Architectures and Compilation Techiniques. 21 // 22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. 23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE 24 // Transactions on Computers, Vol. 50, No. 3, 2001. 25 // 26 // "An Implementation of Swing Modulo Scheduling With Extensions for 27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at 28 // Urbana-Champaign, 2005. 29 // 30 // 31 // The SMS algorithm consists of three main steps after computing the minimal 32 // initiation interval (MII). 33 // 1) Analyze the dependence graph and compute information about each 34 // instruction in the graph. 35 // 2) Order the nodes (instructions) by priority based upon the heuristics 36 // described in the algorithm. 37 // 3) Attempt to schedule the nodes in the specified order using the MII. 38 // 39 //===----------------------------------------------------------------------===// 40 #ifndef LLVM_CODEGEN_MACHINEPIPELINER_H 41 #define LLVM_CODEGEN_MACHINEPIPELINER_H 42 43 #include "llvm/ADT/SetVector.h" 44 #include "llvm/CodeGen/DFAPacketizer.h" 45 #include "llvm/CodeGen/MachineDominators.h" 46 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" 47 #include "llvm/CodeGen/RegisterClassInfo.h" 48 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 49 #include "llvm/CodeGen/ScheduleDAGMutation.h" 50 #include "llvm/CodeGen/TargetInstrInfo.h" 51 #include "llvm/InitializePasses.h" 52 53 #include <deque> 54 55 namespace llvm { 56 57 class AAResults; 58 class NodeSet; 59 class SMSchedule; 60 61 extern cl::opt<bool> SwpEnableCopyToPhi; 62 extern cl::opt<int> SwpForceIssueWidth; 63 64 /// The main class in the implementation of the target independent 65 /// software pipeliner pass. 66 class MachinePipeliner : public MachineFunctionPass { 67 public: 68 MachineFunction *MF = nullptr; 69 MachineOptimizationRemarkEmitter *ORE = nullptr; 70 const MachineLoopInfo *MLI = nullptr; 71 const MachineDominatorTree *MDT = nullptr; 72 const InstrItineraryData *InstrItins; 73 const TargetInstrInfo *TII = nullptr; 74 RegisterClassInfo RegClassInfo; 75 bool disabledByPragma = false; 76 unsigned II_setByPragma = 0; 77 78 #ifndef NDEBUG 79 static int NumTries; 80 #endif 81 82 /// Cache the target analysis information about the loop. 83 struct LoopInfo { 84 MachineBasicBlock *TBB = nullptr; 85 MachineBasicBlock *FBB = nullptr; 86 SmallVector<MachineOperand, 4> BrCond; 87 MachineInstr *LoopInductionVar = nullptr; 88 MachineInstr *LoopCompare = nullptr; 89 std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo = 90 nullptr; 91 }; 92 LoopInfo LI; 93 94 static char ID; 95 MachinePipeliner()96 MachinePipeliner() : MachineFunctionPass(ID) { 97 initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); 98 } 99 100 bool runOnMachineFunction(MachineFunction &MF) override; 101 102 void getAnalysisUsage(AnalysisUsage &AU) const override; 103 104 private: 105 void preprocessPhiNodes(MachineBasicBlock &B); 106 bool canPipelineLoop(MachineLoop &L); 107 bool scheduleLoop(MachineLoop &L); 108 bool swingModuloScheduler(MachineLoop &L); 109 void setPragmaPipelineOptions(MachineLoop &L); 110 }; 111 112 /// This class builds the dependence graph for the instructions in a loop, 113 /// and attempts to schedule the instructions using the SMS algorithm. 114 class SwingSchedulerDAG : public ScheduleDAGInstrs { 115 MachinePipeliner &Pass; 116 /// The minimum initiation interval between iterations for this schedule. 117 unsigned MII = 0; 118 /// The maximum initiation interval between iterations for this schedule. 119 unsigned MAX_II = 0; 120 /// Set to true if a valid pipelined schedule is found for the loop. 121 bool Scheduled = false; 122 MachineLoop &Loop; 123 LiveIntervals &LIS; 124 const RegisterClassInfo &RegClassInfo; 125 unsigned II_setByPragma = 0; 126 TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr; 127 128 /// A toplogical ordering of the SUnits, which is needed for changing 129 /// dependences and iterating over the SUnits. 130 ScheduleDAGTopologicalSort Topo; 131 132 struct NodeInfo { 133 int ASAP = 0; 134 int ALAP = 0; 135 int ZeroLatencyDepth = 0; 136 int ZeroLatencyHeight = 0; 137 138 NodeInfo() = default; 139 }; 140 /// Computed properties for each node in the graph. 141 std::vector<NodeInfo> ScheduleInfo; 142 143 enum OrderKind { BottomUp = 0, TopDown = 1 }; 144 /// Computed node ordering for scheduling. 145 SetVector<SUnit *> NodeOrder; 146 147 using NodeSetType = SmallVector<NodeSet, 8>; 148 using ValueMapTy = DenseMap<unsigned, unsigned>; 149 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; 150 using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; 151 152 /// Instructions to change when emitting the final schedule. 153 DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; 154 155 /// We may create a new instruction, so remember it because it 156 /// must be deleted when the pass is finished. 157 DenseMap<MachineInstr*, MachineInstr *> NewMIs; 158 159 /// Ordered list of DAG postprocessing steps. 160 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 161 162 /// Helper class to implement Johnson's circuit finding algorithm. 163 class Circuits { 164 std::vector<SUnit> &SUnits; 165 SetVector<SUnit *> Stack; 166 BitVector Blocked; 167 SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; 168 SmallVector<SmallVector<int, 4>, 16> AdjK; 169 // Node to Index from ScheduleDAGTopologicalSort 170 std::vector<int> *Node2Idx; 171 unsigned NumPaths; 172 static unsigned MaxPaths; 173 174 public: Circuits(std::vector<SUnit> & SUs,ScheduleDAGTopologicalSort & Topo)175 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo) 176 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) { 177 Node2Idx = new std::vector<int>(SUs.size()); 178 unsigned Idx = 0; 179 for (const auto &NodeNum : Topo) 180 Node2Idx->at(NodeNum) = Idx++; 181 } 182 ~Circuits()183 ~Circuits() { delete Node2Idx; } 184 185 /// Reset the data structures used in the circuit algorithm. reset()186 void reset() { 187 Stack.clear(); 188 Blocked.reset(); 189 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); 190 NumPaths = 0; 191 } 192 193 void createAdjacencyStructure(SwingSchedulerDAG *DAG); 194 bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); 195 void unblock(int U); 196 }; 197 198 struct CopyToPhiMutation : public ScheduleDAGMutation { 199 void apply(ScheduleDAGInstrs *DAG) override; 200 }; 201 202 public: SwingSchedulerDAG(MachinePipeliner & P,MachineLoop & L,LiveIntervals & lis,const RegisterClassInfo & rci,unsigned II,TargetInstrInfo::PipelinerLoopInfo * PLI)203 SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, 204 const RegisterClassInfo &rci, unsigned II, 205 TargetInstrInfo::PipelinerLoopInfo *PLI) 206 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), 207 RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI), 208 Topo(SUnits, &ExitSU) { 209 P.MF->getSubtarget().getSMSMutations(Mutations); 210 if (SwpEnableCopyToPhi) 211 Mutations.push_back(std::make_unique<CopyToPhiMutation>()); 212 } 213 214 void schedule() override; 215 void finishBlock() override; 216 217 /// Return true if the loop kernel has been scheduled. hasNewSchedule()218 bool hasNewSchedule() { return Scheduled; } 219 220 /// Return the earliest time an instruction may be scheduled. getASAP(SUnit * Node)221 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } 222 223 /// Return the latest time an instruction my be scheduled. getALAP(SUnit * Node)224 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } 225 226 /// The mobility function, which the number of slots in which 227 /// an instruction may be scheduled. getMOV(SUnit * Node)228 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } 229 230 /// The depth, in the dependence graph, for a node. getDepth(SUnit * Node)231 unsigned getDepth(SUnit *Node) { return Node->getDepth(); } 232 233 /// The maximum unweighted length of a path from an arbitrary node to the 234 /// given node in which each edge has latency 0 getZeroLatencyDepth(SUnit * Node)235 int getZeroLatencyDepth(SUnit *Node) { 236 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; 237 } 238 239 /// The height, in the dependence graph, for a node. getHeight(SUnit * Node)240 unsigned getHeight(SUnit *Node) { return Node->getHeight(); } 241 242 /// The maximum unweighted length of a path from the given node to an 243 /// arbitrary node in which each edge has latency 0 getZeroLatencyHeight(SUnit * Node)244 int getZeroLatencyHeight(SUnit *Node) { 245 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; 246 } 247 248 /// Return true if the dependence is a back-edge in the data dependence graph. 249 /// Since the DAG doesn't contain cycles, we represent a cycle in the graph 250 /// using an anti dependence from a Phi to an instruction. isBackedge(SUnit * Source,const SDep & Dep)251 bool isBackedge(SUnit *Source, const SDep &Dep) { 252 if (Dep.getKind() != SDep::Anti) 253 return false; 254 return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); 255 } 256 257 bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true); 258 259 /// The distance function, which indicates that operation V of iteration I 260 /// depends on operations U of iteration I-distance. getDistance(SUnit * U,SUnit * V,const SDep & Dep)261 unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { 262 // Instructions that feed a Phi have a distance of 1. Computing larger 263 // values for arrays requires data dependence information. 264 if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) 265 return 1; 266 return 0; 267 } 268 269 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); 270 271 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); 272 273 /// Return the new base register that was stored away for the changed 274 /// instruction. getInstrBaseReg(SUnit * SU)275 unsigned getInstrBaseReg(SUnit *SU) { 276 DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = 277 InstrChanges.find(SU); 278 if (It != InstrChanges.end()) 279 return It->second.first; 280 return 0; 281 } 282 addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)283 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 284 Mutations.push_back(std::move(Mutation)); 285 } 286 classof(const ScheduleDAGInstrs * DAG)287 static bool classof(const ScheduleDAGInstrs *DAG) { return true; } 288 289 private: 290 void addLoopCarriedDependences(AAResults *AA); 291 void updatePhiDependences(); 292 void changeDependences(); 293 unsigned calculateResMII(); 294 unsigned calculateRecMII(NodeSetType &RecNodeSets); 295 void findCircuits(NodeSetType &NodeSets); 296 void fuseRecs(NodeSetType &NodeSets); 297 void removeDuplicateNodes(NodeSetType &NodeSets); 298 void computeNodeFunctions(NodeSetType &NodeSets); 299 void registerPressureFilter(NodeSetType &NodeSets); 300 void colocateNodeSets(NodeSetType &NodeSets); 301 void checkNodeSets(NodeSetType &NodeSets); 302 void groupRemainingNodes(NodeSetType &NodeSets); 303 void addConnectedNodes(SUnit *SU, NodeSet &NewSet, 304 SetVector<SUnit *> &NodesAdded); 305 void computeNodeOrder(NodeSetType &NodeSets); 306 void checkValidNodeOrder(const NodeSetType &Circuits) const; 307 bool schedulePipeline(SMSchedule &Schedule); 308 bool computeDelta(MachineInstr &MI, unsigned &Delta); 309 MachineInstr *findDefInLoop(Register Reg); 310 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, 311 unsigned &OffsetPos, unsigned &NewBase, 312 int64_t &NewOffset); 313 void postprocessDAG(); 314 /// Set the Minimum Initiation Interval for this schedule attempt. 315 void setMII(unsigned ResMII, unsigned RecMII); 316 /// Set the Maximum Initiation Interval for this schedule attempt. 317 void setMAX_II(); 318 }; 319 320 /// A NodeSet contains a set of SUnit DAG nodes with additional information 321 /// that assigns a priority to the set. 322 class NodeSet { 323 SetVector<SUnit *> Nodes; 324 bool HasRecurrence = false; 325 unsigned RecMII = 0; 326 int MaxMOV = 0; 327 unsigned MaxDepth = 0; 328 unsigned Colocate = 0; 329 SUnit *ExceedPressure = nullptr; 330 unsigned Latency = 0; 331 332 public: 333 using iterator = SetVector<SUnit *>::const_iterator; 334 335 NodeSet() = default; NodeSet(iterator S,iterator E)336 NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { 337 Latency = 0; 338 for (const SUnit *Node : Nodes) { 339 DenseMap<SUnit *, unsigned> SuccSUnitLatency; 340 for (const SDep &Succ : Node->Succs) { 341 auto SuccSUnit = Succ.getSUnit(); 342 if (!Nodes.count(SuccSUnit)) 343 continue; 344 unsigned CurLatency = Succ.getLatency(); 345 unsigned MaxLatency = 0; 346 if (SuccSUnitLatency.count(SuccSUnit)) 347 MaxLatency = SuccSUnitLatency[SuccSUnit]; 348 if (CurLatency > MaxLatency) 349 SuccSUnitLatency[SuccSUnit] = CurLatency; 350 } 351 for (auto SUnitLatency : SuccSUnitLatency) 352 Latency += SUnitLatency.second; 353 } 354 } 355 insert(SUnit * SU)356 bool insert(SUnit *SU) { return Nodes.insert(SU); } 357 insert(iterator S,iterator E)358 void insert(iterator S, iterator E) { Nodes.insert(S, E); } 359 remove_if(UnaryPredicate P)360 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { 361 return Nodes.remove_if(P); 362 } 363 count(SUnit * SU)364 unsigned count(SUnit *SU) const { return Nodes.count(SU); } 365 hasRecurrence()366 bool hasRecurrence() { return HasRecurrence; }; 367 size()368 unsigned size() const { return Nodes.size(); } 369 empty()370 bool empty() const { return Nodes.empty(); } 371 getNode(unsigned i)372 SUnit *getNode(unsigned i) const { return Nodes[i]; }; 373 setRecMII(unsigned mii)374 void setRecMII(unsigned mii) { RecMII = mii; }; 375 setColocate(unsigned c)376 void setColocate(unsigned c) { Colocate = c; }; 377 setExceedPressure(SUnit * SU)378 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } 379 isExceedSU(SUnit * SU)380 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } 381 compareRecMII(NodeSet & RHS)382 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } 383 getRecMII()384 int getRecMII() { return RecMII; } 385 386 /// Summarize node functions for the entire node set. computeNodeSetInfo(SwingSchedulerDAG * SSD)387 void computeNodeSetInfo(SwingSchedulerDAG *SSD) { 388 for (SUnit *SU : *this) { 389 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU)); 390 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU)); 391 } 392 } 393 getLatency()394 unsigned getLatency() { return Latency; } 395 getMaxDepth()396 unsigned getMaxDepth() { return MaxDepth; } 397 clear()398 void clear() { 399 Nodes.clear(); 400 RecMII = 0; 401 HasRecurrence = false; 402 MaxMOV = 0; 403 MaxDepth = 0; 404 Colocate = 0; 405 ExceedPressure = nullptr; 406 } 407 408 operator SetVector<SUnit *> &() { return Nodes; } 409 410 /// Sort the node sets by importance. First, rank them by recurrence MII, 411 /// then by mobility (least mobile done first), and finally by depth. 412 /// Each node set may contain a colocate value which is used as the first 413 /// tie breaker, if it's set. 414 bool operator>(const NodeSet &RHS) const { 415 if (RecMII == RHS.RecMII) { 416 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) 417 return Colocate < RHS.Colocate; 418 if (MaxMOV == RHS.MaxMOV) 419 return MaxDepth > RHS.MaxDepth; 420 return MaxMOV < RHS.MaxMOV; 421 } 422 return RecMII > RHS.RecMII; 423 } 424 425 bool operator==(const NodeSet &RHS) const { 426 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && 427 MaxDepth == RHS.MaxDepth; 428 } 429 430 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } 431 begin()432 iterator begin() { return Nodes.begin(); } end()433 iterator end() { return Nodes.end(); } 434 void print(raw_ostream &os) const; 435 436 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 437 LLVM_DUMP_METHOD void dump() const; 438 #endif 439 }; 440 441 // 16 was selected based on the number of ProcResource kinds for all 442 // existing Subtargets, so that SmallVector don't need to resize too often. 443 static const int DefaultProcResSize = 16; 444 445 class ResourceManager { 446 private: 447 const MCSubtargetInfo *STI; 448 const MCSchedModel &SM; 449 const TargetSubtargetInfo *ST; 450 const TargetInstrInfo *TII; 451 SwingSchedulerDAG *DAG; 452 const bool UseDFA; 453 /// DFA resources for each slot 454 llvm::SmallVector<std::unique_ptr<DFAPacketizer>> DFAResources; 455 /// Modulo Reservation Table. When a resource with ID R is consumed in cycle 456 /// C, it is counted in MRT[C mod II][R]. (Used when UseDFA == F) 457 llvm::SmallVector<llvm::SmallVector<uint64_t, DefaultProcResSize>> MRT; 458 /// The number of scheduled micro operations for each slot. Micro operations 459 /// are assumed to be scheduled one per cycle, starting with the cycle in 460 /// which the instruction is scheduled. 461 llvm::SmallVector<int> NumScheduledMops; 462 /// Each processor resource is associated with a so-called processor resource 463 /// mask. This vector allows to correlate processor resource IDs with 464 /// processor resource masks. There is exactly one element per each processor 465 /// resource declared by the scheduling model. 466 llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks; 467 int InitiationInterval; 468 /// The number of micro operations that can be scheduled at a cycle. 469 int IssueWidth; 470 471 int calculateResMIIDFA() const; 472 /// Check if MRT is overbooked 473 bool isOverbooked() const; 474 /// Reserve resources on MRT 475 void reserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 476 /// Unreserve resources on MRT 477 void unreserveResources(const MCSchedClassDesc *SCDesc, int Cycle); 478 479 /// Return M satisfying Dividend = Divisor * X + M, 0 < M < Divisor. 480 /// The slot on MRT to reserve a resource for the cycle C is positiveModulo(C, 481 /// II). positiveModulo(int Dividend,int Divisor)482 int positiveModulo(int Dividend, int Divisor) const { 483 assert(Divisor > 0); 484 int R = Dividend % Divisor; 485 if (R < 0) 486 R += Divisor; 487 return R; 488 } 489 490 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 491 LLVM_DUMP_METHOD void dumpMRT() const; 492 #endif 493 494 public: ResourceManager(const TargetSubtargetInfo * ST,SwingSchedulerDAG * DAG)495 ResourceManager(const TargetSubtargetInfo *ST, SwingSchedulerDAG *DAG) 496 : STI(ST), SM(ST->getSchedModel()), ST(ST), TII(ST->getInstrInfo()), 497 DAG(DAG), UseDFA(ST->useDFAforSMS()), 498 ProcResourceMasks(SM.getNumProcResourceKinds(), 0), 499 IssueWidth(SM.IssueWidth) { 500 initProcResourceVectors(SM, ProcResourceMasks); 501 if (IssueWidth <= 0) 502 // If IssueWidth is not specified, set a sufficiently large value 503 IssueWidth = 100; 504 if (SwpForceIssueWidth > 0) 505 IssueWidth = SwpForceIssueWidth; 506 } 507 508 void initProcResourceVectors(const MCSchedModel &SM, 509 SmallVectorImpl<uint64_t> &Masks); 510 511 /// Check if the resources occupied by a machine instruction are available 512 /// in the current state. 513 bool canReserveResources(SUnit &SU, int Cycle); 514 515 /// Reserve the resources occupied by a machine instruction and change the 516 /// current state to reflect that change. 517 void reserveResources(SUnit &SU, int Cycle); 518 519 int calculateResMII() const; 520 521 /// Initialize resources with the initiation interval II. 522 void init(int II); 523 }; 524 525 /// This class represents the scheduled code. The main data structure is a 526 /// map from scheduled cycle to instructions. During scheduling, the 527 /// data structure explicitly represents all stages/iterations. When 528 /// the algorithm finshes, the schedule is collapsed into a single stage, 529 /// which represents instructions from different loop iterations. 530 /// 531 /// The SMS algorithm allows negative values for cycles, so the first cycle 532 /// in the schedule is the smallest cycle value. 533 class SMSchedule { 534 private: 535 /// Map from execution cycle to instructions. 536 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; 537 538 /// Map from instruction to execution cycle. 539 std::map<SUnit *, int> InstrToCycle; 540 541 /// Keep track of the first cycle value in the schedule. It starts 542 /// as zero, but the algorithm allows negative values. 543 int FirstCycle = 0; 544 545 /// Keep track of the last cycle value in the schedule. 546 int LastCycle = 0; 547 548 /// The initiation interval (II) for the schedule. 549 int InitiationInterval = 0; 550 551 /// Target machine information. 552 const TargetSubtargetInfo &ST; 553 554 /// Virtual register information. 555 MachineRegisterInfo &MRI; 556 557 ResourceManager ProcItinResources; 558 559 public: SMSchedule(MachineFunction * mf,SwingSchedulerDAG * DAG)560 SMSchedule(MachineFunction *mf, SwingSchedulerDAG *DAG) 561 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), 562 ProcItinResources(&ST, DAG) {} 563 reset()564 void reset() { 565 ScheduledInstrs.clear(); 566 InstrToCycle.clear(); 567 FirstCycle = 0; 568 LastCycle = 0; 569 InitiationInterval = 0; 570 } 571 572 /// Set the initiation interval for this schedule. setInitiationInterval(int ii)573 void setInitiationInterval(int ii) { 574 InitiationInterval = ii; 575 ProcItinResources.init(ii); 576 } 577 578 /// Return the initiation interval for this schedule. getInitiationInterval()579 int getInitiationInterval() const { return InitiationInterval; } 580 581 /// Return the first cycle in the completed schedule. This 582 /// can be a negative value. getFirstCycle()583 int getFirstCycle() const { return FirstCycle; } 584 585 /// Return the last cycle in the finalized schedule. getFinalCycle()586 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } 587 588 /// Return the cycle of the earliest scheduled instruction in the dependence 589 /// chain. 590 int earliestCycleInChain(const SDep &Dep); 591 592 /// Return the cycle of the latest scheduled instruction in the dependence 593 /// chain. 594 int latestCycleInChain(const SDep &Dep); 595 596 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, 597 int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG); 598 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); 599 600 /// Iterators for the cycle to instruction map. 601 using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; 602 using const_sched_iterator = 603 DenseMap<int, std::deque<SUnit *>>::const_iterator; 604 605 /// Return true if the instruction is scheduled at the specified stage. isScheduledAtStage(SUnit * SU,unsigned StageNum)606 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { 607 return (stageScheduled(SU) == (int)StageNum); 608 } 609 610 /// Return the stage for a scheduled instruction. Return -1 if 611 /// the instruction has not been scheduled. stageScheduled(SUnit * SU)612 int stageScheduled(SUnit *SU) const { 613 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 614 if (it == InstrToCycle.end()) 615 return -1; 616 return (it->second - FirstCycle) / InitiationInterval; 617 } 618 619 /// Return the cycle for a scheduled instruction. This function normalizes 620 /// the first cycle to be 0. cycleScheduled(SUnit * SU)621 unsigned cycleScheduled(SUnit *SU) const { 622 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 623 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled."); 624 return (it->second - FirstCycle) % InitiationInterval; 625 } 626 627 /// Return the maximum stage count needed for this schedule. getMaxStageCount()628 unsigned getMaxStageCount() { 629 return (LastCycle - FirstCycle) / InitiationInterval; 630 } 631 632 /// Return the instructions that are scheduled at the specified cycle. getInstructions(int cycle)633 std::deque<SUnit *> &getInstructions(int cycle) { 634 return ScheduledInstrs[cycle]; 635 } 636 637 SmallSet<SUnit *, 8> 638 computeUnpipelineableNodes(SwingSchedulerDAG *SSD, 639 TargetInstrInfo::PipelinerLoopInfo *PLI); 640 641 bool 642 normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD, 643 TargetInstrInfo::PipelinerLoopInfo *PLI); 644 bool isValidSchedule(SwingSchedulerDAG *SSD); 645 void finalizeSchedule(SwingSchedulerDAG *SSD); 646 void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, 647 std::deque<SUnit *> &Insts); 648 bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi); 649 bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, 650 MachineOperand &MO); 651 void print(raw_ostream &os) const; 652 void dump() const; 653 }; 654 655 } // end namespace llvm 656 657 #endif // LLVM_CODEGEN_MACHINEPIPELINER_H 658