1 //===- MachinePipeliner.h - Machine Software Pipeliner Pass -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // An implementation of the Swing Modulo Scheduling (SMS) software pipeliner. 10 // 11 // Software pipelining (SWP) is an instruction scheduling technique for loops 12 // that overlap loop iterations and exploits ILP via a compiler transformation. 13 // 14 // Swing Modulo Scheduling is an implementation of software pipelining 15 // that generates schedules that are near optimal in terms of initiation 16 // interval, register requirements, and stage count. See the papers: 17 // 18 // "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa, 19 // A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Proceedings of the 1996 20 // Conference on Parallel Architectures and Compilation Techiniques. 21 // 22 // "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J. 23 // Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE 24 // Transactions on Computers, Vol. 50, No. 3, 2001. 25 // 26 // "An Implementation of Swing Modulo Scheduling With Extensions for 27 // Superblocks", by T. Lattner, Master's Thesis, University of Illinois at 28 // Urbana-Champaign, 2005. 29 // 30 // 31 // The SMS algorithm consists of three main steps after computing the minimal 32 // initiation interval (MII). 33 // 1) Analyze the dependence graph and compute information about each 34 // instruction in the graph. 35 // 2) Order the nodes (instructions) by priority based upon the heuristics 36 // described in the algorithm. 37 // 3) Attempt to schedule the nodes in the specified order using the MII. 38 // 39 //===----------------------------------------------------------------------===// 40 #ifndef LLVM_LIB_CODEGEN_MACHINEPIPELINER_H 41 #define LLVM_LIB_CODEGEN_MACHINEPIPELINER_H 42 43 #include "llvm/CodeGen/MachineDominators.h" 44 #include "llvm/CodeGen/RegisterClassInfo.h" 45 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 46 #include "llvm/CodeGen/TargetInstrInfo.h" 47 48 namespace llvm { 49 50 class NodeSet; 51 class SMSchedule; 52 53 extern cl::opt<bool> SwpEnableCopyToPhi; 54 55 /// The main class in the implementation of the target independent 56 /// software pipeliner pass. 57 class MachinePipeliner : public MachineFunctionPass { 58 public: 59 MachineFunction *MF = nullptr; 60 const MachineLoopInfo *MLI = nullptr; 61 const MachineDominatorTree *MDT = nullptr; 62 const InstrItineraryData *InstrItins; 63 const TargetInstrInfo *TII = nullptr; 64 RegisterClassInfo RegClassInfo; 65 bool disabledByPragma = false; 66 unsigned II_setByPragma = 0; 67 68 #ifndef NDEBUG 69 static int NumTries; 70 #endif 71 72 /// Cache the target analysis information about the loop. 73 struct LoopInfo { 74 MachineBasicBlock *TBB = nullptr; 75 MachineBasicBlock *FBB = nullptr; 76 SmallVector<MachineOperand, 4> BrCond; 77 MachineInstr *LoopInductionVar = nullptr; 78 MachineInstr *LoopCompare = nullptr; 79 }; 80 LoopInfo LI; 81 82 static char ID; 83 MachinePipeliner()84 MachinePipeliner() : MachineFunctionPass(ID) { 85 initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); 86 } 87 88 bool runOnMachineFunction(MachineFunction &MF) override; 89 getAnalysisUsage(AnalysisUsage & AU)90 void getAnalysisUsage(AnalysisUsage &AU) const override { 91 AU.addRequired<AAResultsWrapperPass>(); 92 AU.addPreserved<AAResultsWrapperPass>(); 93 AU.addRequired<MachineLoopInfo>(); 94 AU.addRequired<MachineDominatorTree>(); 95 AU.addRequired<LiveIntervals>(); 96 MachineFunctionPass::getAnalysisUsage(AU); 97 } 98 99 private: 100 void preprocessPhiNodes(MachineBasicBlock &B); 101 bool canPipelineLoop(MachineLoop &L); 102 bool scheduleLoop(MachineLoop &L); 103 bool swingModuloScheduler(MachineLoop &L); 104 void setPragmaPipelineOptions(MachineLoop &L); 105 }; 106 107 /// This class builds the dependence graph for the instructions in a loop, 108 /// and attempts to schedule the instructions using the SMS algorithm. 109 class SwingSchedulerDAG : public ScheduleDAGInstrs { 110 MachinePipeliner &Pass; 111 /// The minimum initiation interval between iterations for this schedule. 112 unsigned MII = 0; 113 /// The maximum initiation interval between iterations for this schedule. 114 unsigned MAX_II = 0; 115 /// Set to true if a valid pipelined schedule is found for the loop. 116 bool Scheduled = false; 117 MachineLoop &Loop; 118 LiveIntervals &LIS; 119 const RegisterClassInfo &RegClassInfo; 120 unsigned II_setByPragma = 0; 121 122 /// A toplogical ordering of the SUnits, which is needed for changing 123 /// dependences and iterating over the SUnits. 124 ScheduleDAGTopologicalSort Topo; 125 126 struct NodeInfo { 127 int ASAP = 0; 128 int ALAP = 0; 129 int ZeroLatencyDepth = 0; 130 int ZeroLatencyHeight = 0; 131 132 NodeInfo() = default; 133 }; 134 /// Computed properties for each node in the graph. 135 std::vector<NodeInfo> ScheduleInfo; 136 137 enum OrderKind { BottomUp = 0, TopDown = 1 }; 138 /// Computed node ordering for scheduling. 139 SetVector<SUnit *> NodeOrder; 140 141 using NodeSetType = SmallVector<NodeSet, 8>; 142 using ValueMapTy = DenseMap<unsigned, unsigned>; 143 using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>; 144 using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>; 145 146 /// Instructions to change when emitting the final schedule. 147 DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges; 148 149 /// We may create a new instruction, so remember it because it 150 /// must be deleted when the pass is finished. 151 SmallPtrSet<MachineInstr *, 4> NewMIs; 152 153 /// Ordered list of DAG postprocessing steps. 154 std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations; 155 156 /// Helper class to implement Johnson's circuit finding algorithm. 157 class Circuits { 158 std::vector<SUnit> &SUnits; 159 SetVector<SUnit *> Stack; 160 BitVector Blocked; 161 SmallVector<SmallPtrSet<SUnit *, 4>, 10> B; 162 SmallVector<SmallVector<int, 4>, 16> AdjK; 163 // Node to Index from ScheduleDAGTopologicalSort 164 std::vector<int> *Node2Idx; 165 unsigned NumPaths; 166 static unsigned MaxPaths; 167 168 public: Circuits(std::vector<SUnit> & SUs,ScheduleDAGTopologicalSort & Topo)169 Circuits(std::vector<SUnit> &SUs, ScheduleDAGTopologicalSort &Topo) 170 : SUnits(SUs), Blocked(SUs.size()), B(SUs.size()), AdjK(SUs.size()) { 171 Node2Idx = new std::vector<int>(SUs.size()); 172 unsigned Idx = 0; 173 for (const auto &NodeNum : Topo) 174 Node2Idx->at(NodeNum) = Idx++; 175 } 176 ~Circuits()177 ~Circuits() { delete Node2Idx; } 178 179 /// Reset the data structures used in the circuit algorithm. reset()180 void reset() { 181 Stack.clear(); 182 Blocked.reset(); 183 B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>()); 184 NumPaths = 0; 185 } 186 187 void createAdjacencyStructure(SwingSchedulerDAG *DAG); 188 bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false); 189 void unblock(int U); 190 }; 191 192 struct CopyToPhiMutation : public ScheduleDAGMutation { 193 void apply(ScheduleDAGInstrs *DAG) override; 194 }; 195 196 public: SwingSchedulerDAG(MachinePipeliner & P,MachineLoop & L,LiveIntervals & lis,const RegisterClassInfo & rci,unsigned II)197 SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, 198 const RegisterClassInfo &rci, unsigned II) 199 : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), 200 RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) { 201 P.MF->getSubtarget().getSMSMutations(Mutations); 202 if (SwpEnableCopyToPhi) 203 Mutations.push_back(llvm::make_unique<CopyToPhiMutation>()); 204 } 205 206 void schedule() override; 207 void finishBlock() override; 208 209 /// Return true if the loop kernel has been scheduled. hasNewSchedule()210 bool hasNewSchedule() { return Scheduled; } 211 212 /// Return the earliest time an instruction may be scheduled. getASAP(SUnit * Node)213 int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; } 214 215 /// Return the latest time an instruction my be scheduled. getALAP(SUnit * Node)216 int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; } 217 218 /// The mobility function, which the number of slots in which 219 /// an instruction may be scheduled. getMOV(SUnit * Node)220 int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); } 221 222 /// The depth, in the dependence graph, for a node. getDepth(SUnit * Node)223 unsigned getDepth(SUnit *Node) { return Node->getDepth(); } 224 225 /// The maximum unweighted length of a path from an arbitrary node to the 226 /// given node in which each edge has latency 0 getZeroLatencyDepth(SUnit * Node)227 int getZeroLatencyDepth(SUnit *Node) { 228 return ScheduleInfo[Node->NodeNum].ZeroLatencyDepth; 229 } 230 231 /// The height, in the dependence graph, for a node. getHeight(SUnit * Node)232 unsigned getHeight(SUnit *Node) { return Node->getHeight(); } 233 234 /// The maximum unweighted length of a path from the given node to an 235 /// arbitrary node in which each edge has latency 0 getZeroLatencyHeight(SUnit * Node)236 int getZeroLatencyHeight(SUnit *Node) { 237 return ScheduleInfo[Node->NodeNum].ZeroLatencyHeight; 238 } 239 240 /// Return true if the dependence is a back-edge in the data dependence graph. 241 /// Since the DAG doesn't contain cycles, we represent a cycle in the graph 242 /// using an anti dependence from a Phi to an instruction. isBackedge(SUnit * Source,const SDep & Dep)243 bool isBackedge(SUnit *Source, const SDep &Dep) { 244 if (Dep.getKind() != SDep::Anti) 245 return false; 246 return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI(); 247 } 248 249 bool isLoopCarriedDep(SUnit *Source, const SDep &Dep, bool isSucc = true); 250 251 /// The distance function, which indicates that operation V of iteration I 252 /// depends on operations U of iteration I-distance. getDistance(SUnit * U,SUnit * V,const SDep & Dep)253 unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) { 254 // Instructions that feed a Phi have a distance of 1. Computing larger 255 // values for arrays requires data dependence information. 256 if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti) 257 return 1; 258 return 0; 259 } 260 261 void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); 262 263 void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs); 264 265 /// Return the new base register that was stored away for the changed 266 /// instruction. getInstrBaseReg(SUnit * SU)267 unsigned getInstrBaseReg(SUnit *SU) { 268 DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It = 269 InstrChanges.find(SU); 270 if (It != InstrChanges.end()) 271 return It->second.first; 272 return 0; 273 } 274 addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)275 void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) { 276 Mutations.push_back(std::move(Mutation)); 277 } 278 classof(const ScheduleDAGInstrs * DAG)279 static bool classof(const ScheduleDAGInstrs *DAG) { return true; } 280 281 private: 282 void addLoopCarriedDependences(AliasAnalysis *AA); 283 void updatePhiDependences(); 284 void changeDependences(); 285 unsigned calculateResMII(); 286 unsigned calculateRecMII(NodeSetType &RecNodeSets); 287 void findCircuits(NodeSetType &NodeSets); 288 void fuseRecs(NodeSetType &NodeSets); 289 void removeDuplicateNodes(NodeSetType &NodeSets); 290 void computeNodeFunctions(NodeSetType &NodeSets); 291 void registerPressureFilter(NodeSetType &NodeSets); 292 void colocateNodeSets(NodeSetType &NodeSets); 293 void checkNodeSets(NodeSetType &NodeSets); 294 void groupRemainingNodes(NodeSetType &NodeSets); 295 void addConnectedNodes(SUnit *SU, NodeSet &NewSet, 296 SetVector<SUnit *> &NodesAdded); 297 void computeNodeOrder(NodeSetType &NodeSets); 298 void checkValidNodeOrder(const NodeSetType &Circuits) const; 299 bool schedulePipeline(SMSchedule &Schedule); 300 void generatePipelinedLoop(SMSchedule &Schedule); 301 void generateProlog(SMSchedule &Schedule, unsigned LastStage, 302 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, 303 MBBVectorTy &PrologBBs); 304 void generateEpilog(SMSchedule &Schedule, unsigned LastStage, 305 MachineBasicBlock *KernelBB, ValueMapTy *VRMap, 306 MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); 307 void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, 308 MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, 309 SMSchedule &Schedule, ValueMapTy *VRMap, 310 InstrMapTy &InstrMap, unsigned LastStageNum, 311 unsigned CurStageNum, bool IsLast); 312 void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, 313 MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, 314 SMSchedule &Schedule, ValueMapTy *VRMap, 315 InstrMapTy &InstrMap, unsigned LastStageNum, 316 unsigned CurStageNum, bool IsLast); 317 void removeDeadInstructions(MachineBasicBlock *KernelBB, 318 MBBVectorTy &EpilogBBs); 319 void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, 320 SMSchedule &Schedule); 321 void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs, 322 MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs, 323 SMSchedule &Schedule, ValueMapTy *VRMap); 324 bool computeDelta(MachineInstr &MI, unsigned &Delta); 325 void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI, 326 unsigned Num); 327 MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum, 328 unsigned InstStageNum); 329 MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum, 330 unsigned InstStageNum, 331 SMSchedule &Schedule); 332 void updateInstruction(MachineInstr *NewMI, bool LastDef, 333 unsigned CurStageNum, unsigned InstrStageNum, 334 SMSchedule &Schedule, ValueMapTy *VRMap); 335 MachineInstr *findDefInLoop(unsigned Reg); 336 unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal, 337 unsigned LoopStage, ValueMapTy *VRMap, 338 MachineBasicBlock *BB); 339 void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum, 340 SMSchedule &Schedule, ValueMapTy *VRMap, 341 InstrMapTy &InstrMap); 342 void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule, 343 InstrMapTy &InstrMap, unsigned CurStageNum, 344 unsigned PhiNum, MachineInstr *Phi, 345 unsigned OldReg, unsigned NewReg, 346 unsigned PrevReg = 0); 347 bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos, 348 unsigned &OffsetPos, unsigned &NewBase, 349 int64_t &NewOffset); 350 void postprocessDAG(); 351 /// Set the Minimum Initiation Interval for this schedule attempt. 352 void setMII(unsigned ResMII, unsigned RecMII); 353 /// Set the Maximum Initiation Interval for this schedule attempt. 354 void setMAX_II(); 355 }; 356 357 /// A NodeSet contains a set of SUnit DAG nodes with additional information 358 /// that assigns a priority to the set. 359 class NodeSet { 360 SetVector<SUnit *> Nodes; 361 bool HasRecurrence = false; 362 unsigned RecMII = 0; 363 int MaxMOV = 0; 364 unsigned MaxDepth = 0; 365 unsigned Colocate = 0; 366 SUnit *ExceedPressure = nullptr; 367 unsigned Latency = 0; 368 369 public: 370 using iterator = SetVector<SUnit *>::const_iterator; 371 372 NodeSet() = default; NodeSet(iterator S,iterator E)373 NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) { 374 Latency = 0; 375 for (unsigned i = 0, e = Nodes.size(); i < e; ++i) 376 for (const SDep &Succ : Nodes[i]->Succs) 377 if (Nodes.count(Succ.getSUnit())) 378 Latency += Succ.getLatency(); 379 } 380 insert(SUnit * SU)381 bool insert(SUnit *SU) { return Nodes.insert(SU); } 382 insert(iterator S,iterator E)383 void insert(iterator S, iterator E) { Nodes.insert(S, E); } 384 remove_if(UnaryPredicate P)385 template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) { 386 return Nodes.remove_if(P); 387 } 388 count(SUnit * SU)389 unsigned count(SUnit *SU) const { return Nodes.count(SU); } 390 hasRecurrence()391 bool hasRecurrence() { return HasRecurrence; }; 392 size()393 unsigned size() const { return Nodes.size(); } 394 empty()395 bool empty() const { return Nodes.empty(); } 396 getNode(unsigned i)397 SUnit *getNode(unsigned i) const { return Nodes[i]; }; 398 setRecMII(unsigned mii)399 void setRecMII(unsigned mii) { RecMII = mii; }; 400 setColocate(unsigned c)401 void setColocate(unsigned c) { Colocate = c; }; 402 setExceedPressure(SUnit * SU)403 void setExceedPressure(SUnit *SU) { ExceedPressure = SU; } 404 isExceedSU(SUnit * SU)405 bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; } 406 compareRecMII(NodeSet & RHS)407 int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; } 408 getRecMII()409 int getRecMII() { return RecMII; } 410 411 /// Summarize node functions for the entire node set. computeNodeSetInfo(SwingSchedulerDAG * SSD)412 void computeNodeSetInfo(SwingSchedulerDAG *SSD) { 413 for (SUnit *SU : *this) { 414 MaxMOV = std::max(MaxMOV, SSD->getMOV(SU)); 415 MaxDepth = std::max(MaxDepth, SSD->getDepth(SU)); 416 } 417 } 418 getLatency()419 unsigned getLatency() { return Latency; } 420 getMaxDepth()421 unsigned getMaxDepth() { return MaxDepth; } 422 clear()423 void clear() { 424 Nodes.clear(); 425 RecMII = 0; 426 HasRecurrence = false; 427 MaxMOV = 0; 428 MaxDepth = 0; 429 Colocate = 0; 430 ExceedPressure = nullptr; 431 } 432 433 operator SetVector<SUnit *> &() { return Nodes; } 434 435 /// Sort the node sets by importance. First, rank them by recurrence MII, 436 /// then by mobility (least mobile done first), and finally by depth. 437 /// Each node set may contain a colocate value which is used as the first 438 /// tie breaker, if it's set. 439 bool operator>(const NodeSet &RHS) const { 440 if (RecMII == RHS.RecMII) { 441 if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate) 442 return Colocate < RHS.Colocate; 443 if (MaxMOV == RHS.MaxMOV) 444 return MaxDepth > RHS.MaxDepth; 445 return MaxMOV < RHS.MaxMOV; 446 } 447 return RecMII > RHS.RecMII; 448 } 449 450 bool operator==(const NodeSet &RHS) const { 451 return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV && 452 MaxDepth == RHS.MaxDepth; 453 } 454 455 bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); } 456 begin()457 iterator begin() { return Nodes.begin(); } end()458 iterator end() { return Nodes.end(); } 459 void print(raw_ostream &os) const; 460 461 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 462 LLVM_DUMP_METHOD void dump() const; 463 #endif 464 }; 465 466 // 16 was selected based on the number of ProcResource kinds for all 467 // existing Subtargets, so that SmallVector don't need to resize too often. 468 static const int DefaultProcResSize = 16; 469 470 class ResourceManager { 471 private: 472 const MCSubtargetInfo *STI; 473 const MCSchedModel &SM; 474 const bool UseDFA; 475 std::unique_ptr<DFAPacketizer> DFAResources; 476 /// Each processor resource is associated with a so-called processor resource 477 /// mask. This vector allows to correlate processor resource IDs with 478 /// processor resource masks. There is exactly one element per each processor 479 /// resource declared by the scheduling model. 480 llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks; 481 482 llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceCount; 483 484 public: ResourceManager(const TargetSubtargetInfo * ST)485 ResourceManager(const TargetSubtargetInfo *ST) 486 : STI(ST), SM(ST->getSchedModel()), UseDFA(ST->useDFAforSMS()), 487 ProcResourceMasks(SM.getNumProcResourceKinds(), 0), 488 ProcResourceCount(SM.getNumProcResourceKinds(), 0) { 489 if (UseDFA) 490 DFAResources.reset(ST->getInstrInfo()->CreateTargetScheduleState(*ST)); 491 initProcResourceVectors(SM, ProcResourceMasks); 492 } 493 494 void initProcResourceVectors(const MCSchedModel &SM, 495 SmallVectorImpl<uint64_t> &Masks); 496 /// Check if the resources occupied by a MCInstrDesc are available in 497 /// the current state. 498 bool canReserveResources(const MCInstrDesc *MID) const; 499 500 /// Reserve the resources occupied by a MCInstrDesc and change the current 501 /// state to reflect that change. 502 void reserveResources(const MCInstrDesc *MID); 503 504 /// Check if the resources occupied by a machine instruction are available 505 /// in the current state. 506 bool canReserveResources(const MachineInstr &MI) const; 507 508 /// Reserve the resources occupied by a machine instruction and change the 509 /// current state to reflect that change. 510 void reserveResources(const MachineInstr &MI); 511 512 /// Reset the state 513 void clearResources(); 514 }; 515 516 /// This class represents the scheduled code. The main data structure is a 517 /// map from scheduled cycle to instructions. During scheduling, the 518 /// data structure explicitly represents all stages/iterations. When 519 /// the algorithm finshes, the schedule is collapsed into a single stage, 520 /// which represents instructions from different loop iterations. 521 /// 522 /// The SMS algorithm allows negative values for cycles, so the first cycle 523 /// in the schedule is the smallest cycle value. 524 class SMSchedule { 525 private: 526 /// Map from execution cycle to instructions. 527 DenseMap<int, std::deque<SUnit *>> ScheduledInstrs; 528 529 /// Map from instruction to execution cycle. 530 std::map<SUnit *, int> InstrToCycle; 531 532 /// Map for each register and the max difference between its uses and def. 533 /// The first element in the pair is the max difference in stages. The 534 /// second is true if the register defines a Phi value and loop value is 535 /// scheduled before the Phi. 536 std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff; 537 538 /// Keep track of the first cycle value in the schedule. It starts 539 /// as zero, but the algorithm allows negative values. 540 int FirstCycle = 0; 541 542 /// Keep track of the last cycle value in the schedule. 543 int LastCycle = 0; 544 545 /// The initiation interval (II) for the schedule. 546 int InitiationInterval = 0; 547 548 /// Target machine information. 549 const TargetSubtargetInfo &ST; 550 551 /// Virtual register information. 552 MachineRegisterInfo &MRI; 553 554 ResourceManager ProcItinResources; 555 556 public: SMSchedule(MachineFunction * mf)557 SMSchedule(MachineFunction *mf) 558 : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), ProcItinResources(&ST) {} 559 reset()560 void reset() { 561 ScheduledInstrs.clear(); 562 InstrToCycle.clear(); 563 RegToStageDiff.clear(); 564 FirstCycle = 0; 565 LastCycle = 0; 566 InitiationInterval = 0; 567 } 568 569 /// Set the initiation interval for this schedule. setInitiationInterval(int ii)570 void setInitiationInterval(int ii) { InitiationInterval = ii; } 571 572 /// Return the first cycle in the completed schedule. This 573 /// can be a negative value. getFirstCycle()574 int getFirstCycle() const { return FirstCycle; } 575 576 /// Return the last cycle in the finalized schedule. getFinalCycle()577 int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; } 578 579 /// Return the cycle of the earliest scheduled instruction in the dependence 580 /// chain. 581 int earliestCycleInChain(const SDep &Dep); 582 583 /// Return the cycle of the latest scheduled instruction in the dependence 584 /// chain. 585 int latestCycleInChain(const SDep &Dep); 586 587 void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart, 588 int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG); 589 bool insert(SUnit *SU, int StartCycle, int EndCycle, int II); 590 591 /// Iterators for the cycle to instruction map. 592 using sched_iterator = DenseMap<int, std::deque<SUnit *>>::iterator; 593 using const_sched_iterator = 594 DenseMap<int, std::deque<SUnit *>>::const_iterator; 595 596 /// Return true if the instruction is scheduled at the specified stage. isScheduledAtStage(SUnit * SU,unsigned StageNum)597 bool isScheduledAtStage(SUnit *SU, unsigned StageNum) { 598 return (stageScheduled(SU) == (int)StageNum); 599 } 600 601 /// Return the stage for a scheduled instruction. Return -1 if 602 /// the instruction has not been scheduled. stageScheduled(SUnit * SU)603 int stageScheduled(SUnit *SU) const { 604 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 605 if (it == InstrToCycle.end()) 606 return -1; 607 return (it->second - FirstCycle) / InitiationInterval; 608 } 609 610 /// Return the cycle for a scheduled instruction. This function normalizes 611 /// the first cycle to be 0. cycleScheduled(SUnit * SU)612 unsigned cycleScheduled(SUnit *SU) const { 613 std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU); 614 assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled."); 615 return (it->second - FirstCycle) % InitiationInterval; 616 } 617 618 /// Return the maximum stage count needed for this schedule. getMaxStageCount()619 unsigned getMaxStageCount() { 620 return (LastCycle - FirstCycle) / InitiationInterval; 621 } 622 623 /// Return the max. number of stages/iterations that can occur between a 624 /// register definition and its uses. getStagesForReg(int Reg,unsigned CurStage)625 unsigned getStagesForReg(int Reg, unsigned CurStage) { 626 std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; 627 if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second) 628 return 1; 629 return Stages.first; 630 } 631 632 /// The number of stages for a Phi is a little different than other 633 /// instructions. The minimum value computed in RegToStageDiff is 1 634 /// because we assume the Phi is needed for at least 1 iteration. 635 /// This is not the case if the loop value is scheduled prior to the 636 /// Phi in the same stage. This function returns the number of stages 637 /// or iterations needed between the Phi definition and any uses. getStagesForPhi(int Reg)638 unsigned getStagesForPhi(int Reg) { 639 std::pair<unsigned, bool> Stages = RegToStageDiff[Reg]; 640 if (Stages.second) 641 return Stages.first; 642 return Stages.first - 1; 643 } 644 645 /// Return the instructions that are scheduled at the specified cycle. getInstructions(int cycle)646 std::deque<SUnit *> &getInstructions(int cycle) { 647 return ScheduledInstrs[cycle]; 648 } 649 650 bool isValidSchedule(SwingSchedulerDAG *SSD); 651 void finalizeSchedule(SwingSchedulerDAG *SSD); 652 void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU, 653 std::deque<SUnit *> &Insts); 654 bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi); 655 bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Def, 656 MachineOperand &MO); 657 void print(raw_ostream &os) const; 658 void dump() const; 659 }; 660 661 } // end namespace llvm 662 663 #endif // LLVM_LIB_CODEGEN_MACHINEPIPELINER_H 664