1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling 10 /// for a MachineInstr-based dependency graph. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 16 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PointerIntPair.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/SparseMultiSet.h" 21 #include "llvm/ADT/SparseSet.h" 22 #include "llvm/ADT/identity.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/CodeGen/TargetRegisterInfo.h" 27 #include "llvm/CodeGen/TargetSchedule.h" 28 #include "llvm/MC/LaneBitmask.h" 29 #include <cassert> 30 #include <cstdint> 31 #include <list> 32 #include <string> 33 #include <utility> 34 #include <vector> 35 36 namespace llvm { 37 38 class AAResults; 39 class LiveIntervals; 40 class MachineFrameInfo; 41 class MachineFunction; 42 class MachineInstr; 43 class MachineLoopInfo; 44 class MachineOperand; 45 struct MCSchedClassDesc; 46 class PressureDiffs; 47 class PseudoSourceValue; 48 class RegPressureTracker; 49 class UndefValue; 50 class Value; 51 52 /// An individual mapping from virtual register number to SUnit. 53 struct VReg2SUnit { 54 unsigned VirtReg; 55 LaneBitmask LaneMask; 56 SUnit *SU; 57 VReg2SUnitVReg2SUnit58 VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) 59 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} 60 getSparseSetIndexVReg2SUnit61 unsigned getSparseSetIndex() const { 62 return Register::virtReg2Index(VirtReg); 63 } 64 }; 65 66 /// Mapping from virtual register to SUnit including an operand index. 67 struct VReg2SUnitOperIdx : public VReg2SUnit { 68 unsigned OperandIndex; 69 VReg2SUnitOperIdxVReg2SUnitOperIdx70 VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, 71 unsigned OperandIndex, SUnit *SU) 72 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} 73 }; 74 75 /// Record a physical register access. 76 /// For non-data-dependent uses, OpIdx == -1. 77 struct PhysRegSUOper { 78 SUnit *SU; 79 int OpIdx; 80 unsigned Reg; 81 PhysRegSUOperPhysRegSUOper82 PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {} 83 getSparseSetIndexPhysRegSUOper84 unsigned getSparseSetIndex() const { return Reg; } 85 }; 86 87 /// Use a SparseMultiSet to track physical registers. Storage is only 88 /// allocated once for the pass. It can be cleared in constant time and reused 89 /// without any frees. 90 using Reg2SUnitsMap = 91 SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; 92 93 /// Use SparseSet as a SparseMap by relying on the fact that it never 94 /// compares ValueT's, only unsigned keys. This allows the set to be cleared 95 /// between scheduling regions in constant time as long as ValueT does not 96 /// require a destructor. 97 using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>; 98 99 /// Track local uses of virtual registers. These uses are gathered by the DAG 100 /// builder and may be consulted by the scheduler to avoid iterating an entire 101 /// vreg use list. 102 using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; 103 104 using VReg2SUnitOperIdxMultiMap = 105 SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; 106 107 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; 108 109 struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { UnderlyingObjectUnderlyingObject110 UnderlyingObject(ValueType V, bool MayAlias) 111 : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} 112 getValueUnderlyingObject113 ValueType getValue() const { return getPointer(); } mayAliasUnderlyingObject114 bool mayAlias() const { return getInt(); } 115 }; 116 117 using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; 118 119 /// A ScheduleDAG for scheduling lists of MachineInstr. 120 class ScheduleDAGInstrs : public ScheduleDAG { 121 protected: 122 const MachineLoopInfo *MLI; 123 const MachineFrameInfo &MFI; 124 125 /// TargetSchedModel provides an interface to the machine model. 126 TargetSchedModel SchedModel; 127 128 /// True if the DAG builder should remove kill flags (in preparation for 129 /// rescheduling). 130 bool RemoveKillFlags; 131 132 /// The standard DAG builder does not normally include terminators as DAG 133 /// nodes because it does not create the necessary dependencies to prevent 134 /// reordering. A specialized scheduler can override 135 /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate 136 /// it has taken responsibility for scheduling the terminator correctly. 137 bool CanHandleTerminators = false; 138 139 /// Whether lane masks should get tracked. 140 bool TrackLaneMasks = false; 141 142 // State specific to the current scheduling region. 143 // ------------------------------------------------ 144 145 /// The block in which to insert instructions 146 MachineBasicBlock *BB; 147 148 /// The beginning of the range to be scheduled. 149 MachineBasicBlock::iterator RegionBegin; 150 151 /// The end of the range to be scheduled. 152 MachineBasicBlock::iterator RegionEnd; 153 154 /// Instructions in this region (distance(RegionBegin, RegionEnd)). 155 unsigned NumRegionInstrs; 156 157 /// After calling BuildSchedGraph, each machine instruction in the current 158 /// scheduling region is mapped to an SUnit. 159 DenseMap<MachineInstr*, SUnit*> MISUnitMap; 160 161 // State internal to DAG building. 162 // ------------------------------- 163 164 /// Defs, Uses - Remember where defs and uses of each register are as we 165 /// iterate upward through the instructions. This is allocated here instead 166 /// of inside BuildSchedGraph to avoid the need for it to be initialized and 167 /// destructed for each block. 168 Reg2SUnitsMap Defs; 169 Reg2SUnitsMap Uses; 170 171 /// Tracks the last instruction(s) in this region defining each virtual 172 /// register. There may be multiple current definitions for a register with 173 /// disjunct lanemasks. 174 VReg2SUnitMultiMap CurrentVRegDefs; 175 /// Tracks the last instructions in this region using each virtual register. 176 VReg2SUnitOperIdxMultiMap CurrentVRegUses; 177 178 AAResults *AAForDep = nullptr; 179 180 /// Remember a generic side-effecting instruction as we proceed. 181 /// No other SU ever gets scheduled around it (except in the special 182 /// case of a huge region that gets reduced). 183 SUnit *BarrierChain = nullptr; 184 185 public: 186 /// A list of SUnits, used in Value2SUsMap, during DAG construction. 187 /// Note: to gain speed it might be worth investigating an optimized 188 /// implementation of this data structure, such as a singly linked list 189 /// with a memory pool (SmallVector was tried but slow and SparseSet is not 190 /// applicable). 191 using SUList = std::list<SUnit *>; 192 193 protected: 194 /// A map from ValueType to SUList, used during DAG construction, as 195 /// a means of remembering which SUs depend on which memory locations. 196 class Value2SUsMap; 197 198 /// Reduces maps in FIFO order, by N SUs. This is better than turning 199 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since 200 /// it avoids unnecessary edges between seen SUs above the new BarrierChain, 201 /// and those below it. 202 void reduceHugeMemNodeMaps(Value2SUsMap &stores, 203 Value2SUsMap &loads, unsigned N); 204 205 /// Adds a chain edge between SUa and SUb, but only if both 206 /// AAResults and Target fail to deny the dependency. 207 void addChainDependency(SUnit *SUa, SUnit *SUb, 208 unsigned Latency = 0); 209 210 /// Adds dependencies as needed from all SUs in list to SU. addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)211 void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) { 212 for (SUnit *Entry : SUs) 213 addChainDependency(SU, Entry, Latency); 214 } 215 216 /// Adds dependencies as needed from all SUs in map, to SU. 217 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); 218 219 /// Adds dependencies as needed to SU, from all SUs mapped to V. 220 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, 221 ValueType V); 222 223 /// Adds barrier chain edges from all SUs in map, and then clear the map. 224 /// This is equivalent to insertBarrierChain(), but optimized for the common 225 /// case where the new BarrierChain (a global memory object) has a higher 226 /// NodeNum than all SUs in map. It is assumed BarrierChain has been set 227 /// before calling this. 228 void addBarrierChain(Value2SUsMap &map); 229 230 /// Inserts a barrier chain in a huge region, far below current SU. 231 /// Adds barrier chain edges from all SUs in map with higher NodeNums than 232 /// this new BarrierChain, and remove them from map. It is assumed 233 /// BarrierChain has been set before calling this. 234 void insertBarrierChain(Value2SUsMap &map); 235 236 /// For an unanalyzable memory access, this Value is used in maps. 237 UndefValue *UnknownValue; 238 239 240 /// Topo - A topological ordering for SUnits which permits fast IsReachable 241 /// and similar queries. 242 ScheduleDAGTopologicalSort Topo; 243 244 using DbgValueVector = 245 std::vector<std::pair<MachineInstr *, MachineInstr *>>; 246 /// Remember instruction that precedes DBG_VALUE. 247 /// These are generated by buildSchedGraph but persist so they can be 248 /// referenced when emitting the final schedule. 249 DbgValueVector DbgValues; 250 MachineInstr *FirstDbgValue = nullptr; 251 252 /// Set of live physical registers for updating kill flags. 253 LivePhysRegs LiveRegs; 254 255 public: 256 explicit ScheduleDAGInstrs(MachineFunction &mf, 257 const MachineLoopInfo *mli, 258 bool RemoveKillFlags = false); 259 260 ~ScheduleDAGInstrs() override = default; 261 262 /// Gets the machine model for instruction scheduling. getSchedModel()263 const TargetSchedModel *getSchedModel() const { return &SchedModel; } 264 265 /// Resolves and cache a resolved scheduling class for an SUnit. getSchedClass(SUnit * SU)266 const MCSchedClassDesc *getSchedClass(SUnit *SU) const { 267 if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) 268 SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); 269 return SU->SchedClass; 270 } 271 272 /// IsReachable - Checks if SU is reachable from TargetSU. IsReachable(SUnit * SU,SUnit * TargetSU)273 bool IsReachable(SUnit *SU, SUnit *TargetSU) { 274 return Topo.IsReachable(SU, TargetSU); 275 } 276 277 /// Returns an iterator to the top of the current scheduling region. begin()278 MachineBasicBlock::iterator begin() const { return RegionBegin; } 279 280 /// Returns an iterator to the bottom of the current scheduling region. end()281 MachineBasicBlock::iterator end() const { return RegionEnd; } 282 283 /// Creates a new SUnit and return a ptr to it. 284 SUnit *newSUnit(MachineInstr *MI); 285 286 /// Returns an existing SUnit for this MI, or nullptr. 287 SUnit *getSUnit(MachineInstr *MI) const; 288 289 /// If this method returns true, handling of the scheduling regions 290 /// themselves (in case of a scheduling boundary in MBB) will be done 291 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()292 virtual bool doMBBSchedRegionsTopDown() const { return false; } 293 294 /// Prepares to perform scheduling in the given block. 295 virtual void startBlock(MachineBasicBlock *BB); 296 297 /// Cleans up after scheduling in the given block. 298 virtual void finishBlock(); 299 300 /// Initialize the DAG and common scheduler state for a new 301 /// scheduling region. This does not actually create the DAG, only clears 302 /// it. The scheduling driver may call BuildSchedGraph multiple times per 303 /// scheduling region. 304 virtual void enterRegion(MachineBasicBlock *bb, 305 MachineBasicBlock::iterator begin, 306 MachineBasicBlock::iterator end, 307 unsigned regioninstrs); 308 309 /// Called when the scheduler has finished scheduling the current region. 310 virtual void exitRegion(); 311 312 /// Builds SUnits for the current region. 313 /// If \p RPTracker is non-null, compute register pressure as a side effect. 314 /// The DAG builder is an efficient place to do it because it already visits 315 /// operands. 316 void buildSchedGraph(AAResults *AA, 317 RegPressureTracker *RPTracker = nullptr, 318 PressureDiffs *PDiffs = nullptr, 319 LiveIntervals *LIS = nullptr, 320 bool TrackLaneMasks = false); 321 322 /// Adds dependencies from instructions in the current list of 323 /// instructions being scheduled to scheduling barrier. We want to make sure 324 /// instructions which define registers that are either used by the 325 /// terminator or are live-out are properly scheduled. This is especially 326 /// important when the definition latency of the return value(s) are too 327 /// high to be hidden by the branch or when the liveout registers used by 328 /// instructions in the fallthrough block. 329 void addSchedBarrierDeps(); 330 331 /// Orders nodes according to selected style. 332 /// 333 /// Typically, a scheduling algorithm will implement schedule() without 334 /// overriding enterRegion() or exitRegion(). 335 virtual void schedule() = 0; 336 337 /// Allow targets to perform final scheduling actions at the level of the 338 /// whole MachineFunction. By default does nothing. finalizeSchedule()339 virtual void finalizeSchedule() {} 340 341 void dumpNode(const SUnit &SU) const override; 342 void dump() const override; 343 344 /// Returns a label for a DAG node that points to an instruction. 345 std::string getGraphNodeLabel(const SUnit *SU) const override; 346 347 /// Returns a label for the region of code covered by the DAG. 348 std::string getDAGName() const override; 349 350 /// Fixes register kill flags that scheduling has made invalid. 351 void fixupKills(MachineBasicBlock &MBB); 352 353 /// True if an edge can be added from PredSU to SuccSU without creating 354 /// a cycle. 355 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); 356 357 /// Add a DAG edge to the given SU with the given predecessor 358 /// dependence data. 359 /// 360 /// \returns true if the edge may be added without creating a cycle OR if an 361 /// equivalent edge already existed (false indicates failure). 362 bool addEdge(SUnit *SuccSU, const SDep &PredDep); 363 364 protected: 365 void initSUnits(); 366 void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); 367 void addPhysRegDeps(SUnit *SU, unsigned OperIdx); 368 void addVRegDefDeps(SUnit *SU, unsigned OperIdx); 369 void addVRegUseDeps(SUnit *SU, unsigned OperIdx); 370 371 /// Returns a mask for which lanes get read/written by the given (register) 372 /// machine operand. 373 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; 374 375 /// Returns true if the def register in \p MO has no uses. 376 bool deadDefHasNoUse(const MachineOperand &MO); 377 }; 378 379 /// Creates a new SUnit and return a ptr to it. newSUnit(MachineInstr * MI)380 inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { 381 #ifndef NDEBUG 382 const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0]; 383 #endif 384 SUnits.emplace_back(MI, (unsigned)SUnits.size()); 385 assert((Addr == nullptr || Addr == &SUnits[0]) && 386 "SUnits std::vector reallocated on the fly!"); 387 return &SUnits.back(); 388 } 389 390 /// Returns an existing SUnit for this MI, or nullptr. getSUnit(MachineInstr * MI)391 inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { 392 return MISUnitMap.lookup(MI); 393 } 394 395 } // end namespace llvm 396 397 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 398