1 //===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file Implements the ScheduleDAGInstrs class, which implements scheduling 10 /// for a MachineInstr-based dependency graph. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 15 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 16 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/PointerIntPair.h" 19 #include "llvm/ADT/STLExtras.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/SparseMultiSet.h" 22 #include "llvm/ADT/SparseSet.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineBasicBlock.h" 25 #include "llvm/CodeGen/ScheduleDAG.h" 26 #include "llvm/CodeGen/TargetRegisterInfo.h" 27 #include "llvm/CodeGen/TargetSchedule.h" 28 #include "llvm/MC/LaneBitmask.h" 29 #include <cassert> 30 #include <cstdint> 31 #include <list> 32 #include <utility> 33 #include <vector> 34 35 namespace llvm { 36 37 class AAResults; 38 class LiveIntervals; 39 class MachineFrameInfo; 40 class MachineFunction; 41 class MachineInstr; 42 class MachineLoopInfo; 43 class MachineOperand; 44 struct MCSchedClassDesc; 45 class PressureDiffs; 46 class PseudoSourceValue; 47 class RegPressureTracker; 48 class UndefValue; 49 class Value; 50 51 /// An individual mapping from virtual register number to SUnit. 52 struct VReg2SUnit { 53 unsigned VirtReg; 54 LaneBitmask LaneMask; 55 SUnit *SU; 56 VReg2SUnitVReg2SUnit57 VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) 58 : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} 59 getSparseSetIndexVReg2SUnit60 unsigned getSparseSetIndex() const { 61 return Register::virtReg2Index(VirtReg); 62 } 63 }; 64 65 /// Mapping from virtual register to SUnit including an operand index. 66 struct VReg2SUnitOperIdx : public VReg2SUnit { 67 unsigned OperandIndex; 68 VReg2SUnitOperIdxVReg2SUnitOperIdx69 VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, 70 unsigned OperandIndex, SUnit *SU) 71 : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} 72 }; 73 74 /// Record a physical register access. 75 /// For non-data-dependent uses, OpIdx == -1. 76 struct PhysRegSUOper { 77 SUnit *SU; 78 int OpIdx; 79 unsigned Reg; 80 PhysRegSUOperPhysRegSUOper81 PhysRegSUOper(SUnit *su, int op, unsigned R): SU(su), OpIdx(op), Reg(R) {} 82 getSparseSetIndexPhysRegSUOper83 unsigned getSparseSetIndex() const { return Reg; } 84 }; 85 86 /// Use a SparseMultiSet to track physical registers. Storage is only 87 /// allocated once for the pass. It can be cleared in constant time and reused 88 /// without any frees. 89 using Reg2SUnitsMap = 90 SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>; 91 92 /// Use SparseSet as a SparseMap by relying on the fact that it never 93 /// compares ValueT's, only unsigned keys. This allows the set to be cleared 94 /// between scheduling regions in constant time as long as ValueT does not 95 /// require a destructor. 96 using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>; 97 98 /// Track local uses of virtual registers. These uses are gathered by the DAG 99 /// builder and may be consulted by the scheduler to avoid iterating an entire 100 /// vreg use list. 101 using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>; 102 103 using VReg2SUnitOperIdxMultiMap = 104 SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>; 105 106 using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>; 107 108 struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> { UnderlyingObjectUnderlyingObject109 UnderlyingObject(ValueType V, bool MayAlias) 110 : PointerIntPair<ValueType, 1, bool>(V, MayAlias) {} 111 getValueUnderlyingObject112 ValueType getValue() const { return getPointer(); } mayAliasUnderlyingObject113 bool mayAlias() const { return getInt(); } 114 }; 115 116 using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>; 117 118 /// A ScheduleDAG for scheduling lists of MachineInstr. 119 class ScheduleDAGInstrs : public ScheduleDAG { 120 protected: 121 const MachineLoopInfo *MLI; 122 const MachineFrameInfo &MFI; 123 124 /// TargetSchedModel provides an interface to the machine model. 125 TargetSchedModel SchedModel; 126 127 /// True if the DAG builder should remove kill flags (in preparation for 128 /// rescheduling). 129 bool RemoveKillFlags; 130 131 /// The standard DAG builder does not normally include terminators as DAG 132 /// nodes because it does not create the necessary dependencies to prevent 133 /// reordering. A specialized scheduler can override 134 /// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate 135 /// it has taken responsibility for scheduling the terminator correctly. 136 bool CanHandleTerminators = false; 137 138 /// Whether lane masks should get tracked. 139 bool TrackLaneMasks = false; 140 141 // State specific to the current scheduling region. 142 // ------------------------------------------------ 143 144 /// The block in which to insert instructions 145 MachineBasicBlock *BB; 146 147 /// The beginning of the range to be scheduled. 148 MachineBasicBlock::iterator RegionBegin; 149 150 /// The end of the range to be scheduled. 151 MachineBasicBlock::iterator RegionEnd; 152 153 /// Instructions in this region (distance(RegionBegin, RegionEnd)). 154 unsigned NumRegionInstrs; 155 156 /// After calling BuildSchedGraph, each machine instruction in the current 157 /// scheduling region is mapped to an SUnit. 158 DenseMap<MachineInstr*, SUnit*> MISUnitMap; 159 160 // State internal to DAG building. 161 // ------------------------------- 162 163 /// Defs, Uses - Remember where defs and uses of each register are as we 164 /// iterate upward through the instructions. This is allocated here instead 165 /// of inside BuildSchedGraph to avoid the need for it to be initialized and 166 /// destructed for each block. 167 Reg2SUnitsMap Defs; 168 Reg2SUnitsMap Uses; 169 170 /// Tracks the last instruction(s) in this region defining each virtual 171 /// register. There may be multiple current definitions for a register with 172 /// disjunct lanemasks. 173 VReg2SUnitMultiMap CurrentVRegDefs; 174 /// Tracks the last instructions in this region using each virtual register. 175 VReg2SUnitOperIdxMultiMap CurrentVRegUses; 176 177 AAResults *AAForDep = nullptr; 178 179 /// Remember a generic side-effecting instruction as we proceed. 180 /// No other SU ever gets scheduled around it (except in the special 181 /// case of a huge region that gets reduced). 182 SUnit *BarrierChain = nullptr; 183 184 public: 185 /// A list of SUnits, used in Value2SUsMap, during DAG construction. 186 /// Note: to gain speed it might be worth investigating an optimized 187 /// implementation of this data structure, such as a singly linked list 188 /// with a memory pool (SmallVector was tried but slow and SparseSet is not 189 /// applicable). 190 using SUList = std::list<SUnit *>; 191 192 protected: 193 /// A map from ValueType to SUList, used during DAG construction, as 194 /// a means of remembering which SUs depend on which memory locations. 195 class Value2SUsMap; 196 197 /// Reduces maps in FIFO order, by N SUs. This is better than turning 198 /// every Nth memory SU into BarrierChain in buildSchedGraph(), since 199 /// it avoids unnecessary edges between seen SUs above the new BarrierChain, 200 /// and those below it. 201 void reduceHugeMemNodeMaps(Value2SUsMap &stores, 202 Value2SUsMap &loads, unsigned N); 203 204 /// Adds a chain edge between SUa and SUb, but only if both 205 /// AAResults and Target fail to deny the dependency. 206 void addChainDependency(SUnit *SUa, SUnit *SUb, 207 unsigned Latency = 0); 208 209 /// Adds dependencies as needed from all SUs in list to SU. addChainDependencies(SUnit * SU,SUList & SUs,unsigned Latency)210 void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency) { 211 for (SUnit *Entry : SUs) 212 addChainDependency(SU, Entry, Latency); 213 } 214 215 /// Adds dependencies as needed from all SUs in map, to SU. 216 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap); 217 218 /// Adds dependencies as needed to SU, from all SUs mapped to V. 219 void addChainDependencies(SUnit *SU, Value2SUsMap &Val2SUsMap, 220 ValueType V); 221 222 /// Adds barrier chain edges from all SUs in map, and then clear the map. 223 /// This is equivalent to insertBarrierChain(), but optimized for the common 224 /// case where the new BarrierChain (a global memory object) has a higher 225 /// NodeNum than all SUs in map. It is assumed BarrierChain has been set 226 /// before calling this. 227 void addBarrierChain(Value2SUsMap &map); 228 229 /// Inserts a barrier chain in a huge region, far below current SU. 230 /// Adds barrier chain edges from all SUs in map with higher NodeNums than 231 /// this new BarrierChain, and remove them from map. It is assumed 232 /// BarrierChain has been set before calling this. 233 void insertBarrierChain(Value2SUsMap &map); 234 235 /// For an unanalyzable memory access, this Value is used in maps. 236 UndefValue *UnknownValue; 237 238 239 /// Topo - A topological ordering for SUnits which permits fast IsReachable 240 /// and similar queries. 241 ScheduleDAGTopologicalSort Topo; 242 243 using DbgValueVector = 244 std::vector<std::pair<MachineInstr *, MachineInstr *>>; 245 /// Remember instruction that precedes DBG_VALUE. 246 /// These are generated by buildSchedGraph but persist so they can be 247 /// referenced when emitting the final schedule. 248 DbgValueVector DbgValues; 249 MachineInstr *FirstDbgValue = nullptr; 250 251 /// Set of live physical registers for updating kill flags. 252 LivePhysRegs LiveRegs; 253 254 public: 255 explicit ScheduleDAGInstrs(MachineFunction &mf, 256 const MachineLoopInfo *mli, 257 bool RemoveKillFlags = false); 258 259 ~ScheduleDAGInstrs() override = default; 260 261 /// Gets the machine model for instruction scheduling. getSchedModel()262 const TargetSchedModel *getSchedModel() const { return &SchedModel; } 263 264 /// Resolves and cache a resolved scheduling class for an SUnit. getSchedClass(SUnit * SU)265 const MCSchedClassDesc *getSchedClass(SUnit *SU) const { 266 if (!SU->SchedClass && SchedModel.hasInstrSchedModel()) 267 SU->SchedClass = SchedModel.resolveSchedClass(SU->getInstr()); 268 return SU->SchedClass; 269 } 270 271 /// IsReachable - Checks if SU is reachable from TargetSU. IsReachable(SUnit * SU,SUnit * TargetSU)272 bool IsReachable(SUnit *SU, SUnit *TargetSU) { 273 return Topo.IsReachable(SU, TargetSU); 274 } 275 276 /// Returns an iterator to the top of the current scheduling region. begin()277 MachineBasicBlock::iterator begin() const { return RegionBegin; } 278 279 /// Returns an iterator to the bottom of the current scheduling region. end()280 MachineBasicBlock::iterator end() const { return RegionEnd; } 281 282 /// Creates a new SUnit and return a ptr to it. 283 SUnit *newSUnit(MachineInstr *MI); 284 285 /// Returns an existing SUnit for this MI, or nullptr. 286 SUnit *getSUnit(MachineInstr *MI) const; 287 288 /// If this method returns true, handling of the scheduling regions 289 /// themselves (in case of a scheduling boundary in MBB) will be done 290 /// beginning with the topmost region of MBB. doMBBSchedRegionsTopDown()291 virtual bool doMBBSchedRegionsTopDown() const { return false; } 292 293 /// Prepares to perform scheduling in the given block. 294 virtual void startBlock(MachineBasicBlock *BB); 295 296 /// Cleans up after scheduling in the given block. 297 virtual void finishBlock(); 298 299 /// Initialize the DAG and common scheduler state for a new 300 /// scheduling region. This does not actually create the DAG, only clears 301 /// it. The scheduling driver may call BuildSchedGraph multiple times per 302 /// scheduling region. 303 virtual void enterRegion(MachineBasicBlock *bb, 304 MachineBasicBlock::iterator begin, 305 MachineBasicBlock::iterator end, 306 unsigned regioninstrs); 307 308 /// Called when the scheduler has finished scheduling the current region. 309 virtual void exitRegion(); 310 311 /// Builds SUnits for the current region. 312 /// If \p RPTracker is non-null, compute register pressure as a side effect. 313 /// The DAG builder is an efficient place to do it because it already visits 314 /// operands. 315 void buildSchedGraph(AAResults *AA, 316 RegPressureTracker *RPTracker = nullptr, 317 PressureDiffs *PDiffs = nullptr, 318 LiveIntervals *LIS = nullptr, 319 bool TrackLaneMasks = false); 320 321 /// Adds dependencies from instructions in the current list of 322 /// instructions being scheduled to scheduling barrier. We want to make sure 323 /// instructions which define registers that are either used by the 324 /// terminator or are live-out are properly scheduled. This is especially 325 /// important when the definition latency of the return value(s) are too 326 /// high to be hidden by the branch or when the liveout registers used by 327 /// instructions in the fallthrough block. 328 void addSchedBarrierDeps(); 329 330 /// Orders nodes according to selected style. 331 /// 332 /// Typically, a scheduling algorithm will implement schedule() without 333 /// overriding enterRegion() or exitRegion(). 334 virtual void schedule() = 0; 335 336 /// Allow targets to perform final scheduling actions at the level of the 337 /// whole MachineFunction. By default does nothing. finalizeSchedule()338 virtual void finalizeSchedule() {} 339 340 void dumpNode(const SUnit &SU) const override; 341 void dump() const override; 342 343 /// Returns a label for a DAG node that points to an instruction. 344 std::string getGraphNodeLabel(const SUnit *SU) const override; 345 346 /// Returns a label for the region of code covered by the DAG. 347 std::string getDAGName() const override; 348 349 /// Fixes register kill flags that scheduling has made invalid. 350 void fixupKills(MachineBasicBlock &MBB); 351 352 /// True if an edge can be added from PredSU to SuccSU without creating 353 /// a cycle. 354 bool canAddEdge(SUnit *SuccSU, SUnit *PredSU); 355 356 /// Add a DAG edge to the given SU with the given predecessor 357 /// dependence data. 358 /// 359 /// \returns true if the edge may be added without creating a cycle OR if an 360 /// equivalent edge already existed (false indicates failure). 361 bool addEdge(SUnit *SuccSU, const SDep &PredDep); 362 363 protected: 364 void initSUnits(); 365 void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx); 366 void addPhysRegDeps(SUnit *SU, unsigned OperIdx); 367 void addVRegDefDeps(SUnit *SU, unsigned OperIdx); 368 void addVRegUseDeps(SUnit *SU, unsigned OperIdx); 369 370 /// Returns a mask for which lanes get read/written by the given (register) 371 /// machine operand. 372 LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; 373 374 /// Returns true if the def register in \p MO has no uses. 375 bool deadDefHasNoUse(const MachineOperand &MO); 376 }; 377 378 /// Creates a new SUnit and return a ptr to it. newSUnit(MachineInstr * MI)379 inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) { 380 #ifndef NDEBUG 381 const SUnit *Addr = SUnits.empty() ? nullptr : &SUnits[0]; 382 #endif 383 SUnits.emplace_back(MI, (unsigned)SUnits.size()); 384 assert((Addr == nullptr || Addr == &SUnits[0]) && 385 "SUnits std::vector reallocated on the fly!"); 386 return &SUnits.back(); 387 } 388 389 /// Returns an existing SUnit for this MI, or nullptr. getSUnit(MachineInstr * MI)390 inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const { 391 return MISUnitMap.lookup(MI); 392 } 393 394 } // end namespace llvm 395 396 #endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H 397