1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15 
16 #include "GCNRegPressure.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/CodeGen/MachineScheduler.h"
19 
20 namespace llvm {
21 
22 class SIMachineFunctionInfo;
23 class SIRegisterInfo;
24 class GCNSubtarget;
25 
26 /// This is a minimal scheduler strategy.  The main difference between this
27 /// and the GenericScheduler is that GCNSchedStrategy uses different
28 /// heuristics to determine excess/critical pressure sets.  Its goal is to
29 /// maximize kernel occupancy (i.e. maximum number of waves per simd).
30 class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
31   SUnit *pickNodeBidirectional(bool &IsTopNode);
32 
33   void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
34                          const RegPressureTracker &RPTracker,
35                          SchedCandidate &Cand);
36 
37   void initCandidate(SchedCandidate &Cand, SUnit *SU,
38                      bool AtTop, const RegPressureTracker &RPTracker,
39                      const SIRegisterInfo *SRI,
40                      unsigned SGPRPressure, unsigned VGPRPressure);
41 
42   std::vector<unsigned> Pressure;
43 
44   std::vector<unsigned> MaxPressure;
45 
46   unsigned SGPRExcessLimit;
47 
48   unsigned VGPRExcessLimit;
49 
50   unsigned TargetOccupancy;
51 
52   MachineFunction *MF;
53 
54 public:
55   // schedule() have seen a clustered memory operation. Set it to false
56   // before a region scheduling to know if the region had such clusters.
57   bool HasClusteredNodes;
58 
59   // schedule() have seen an excess register pressure and had to track
60   // register pressure for actual scheduling heuristics.
61   bool HasExcessPressure;
62 
63   unsigned SGPRCriticalLimit;
64 
65   unsigned VGPRCriticalLimit;
66 
67   GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
68 
69   SUnit *pickNode(bool &IsTopNode) override;
70 
71   void initialize(ScheduleDAGMI *DAG) override;
72 
73   unsigned getTargetOccupancy() { return TargetOccupancy; }
74 
75   void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
76 };
77 
78 enum class GCNSchedStageID : unsigned {
79   InitialSchedule = 0,
80   UnclusteredReschedule = 1,
81   ClusteredLowOccupancyReschedule = 2,
82   PreRARematerialize = 3,
83   LastStage = PreRARematerialize
84 };
85 
86 #ifndef NDEBUG
87 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
88 #endif
89 
90 inline GCNSchedStageID &operator++(GCNSchedStageID &Stage, int) {
91   assert(Stage != GCNSchedStageID::PreRARematerialize);
92   Stage = static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
93   return Stage;
94 }
95 
96 inline GCNSchedStageID nextStage(const GCNSchedStageID Stage) {
97   return static_cast<GCNSchedStageID>(static_cast<unsigned>(Stage) + 1);
98 }
99 
100 inline bool operator>(GCNSchedStageID &LHS, GCNSchedStageID &RHS) {
101   return static_cast<unsigned>(LHS) > static_cast<unsigned>(RHS);
102 }
103 
104 class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
105   friend class GCNSchedStage;
106   friend class InitialScheduleStage;
107   friend class UnclusteredRescheduleStage;
108   friend class ClusteredLowOccStage;
109   friend class PreRARematStage;
110 
111   const GCNSubtarget &ST;
112 
113   SIMachineFunctionInfo &MFI;
114 
115   // Occupancy target at the beginning of function scheduling cycle.
116   unsigned StartingOccupancy;
117 
118   // Minimal real occupancy recorder for the function.
119   unsigned MinOccupancy;
120 
121   // Vector of regions recorder for later rescheduling
122   SmallVector<std::pair<MachineBasicBlock::iterator,
123                         MachineBasicBlock::iterator>, 32> Regions;
124 
125   // Records if a region is not yet scheduled, or schedule has been reverted,
126   // or we generally desire to reschedule it.
127   BitVector RescheduleRegions;
128 
129   // Record regions which use clustered loads/stores.
130   BitVector RegionsWithClusters;
131 
132   // Record regions with high register pressure.
133   BitVector RegionsWithHighRP;
134 
135   // Regions that has the same occupancy as the latest MinOccupancy
136   BitVector RegionsWithMinOcc;
137 
138   // Region live-in cache.
139   SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
140 
141   // Region pressure cache.
142   SmallVector<GCNRegPressure, 32> Pressure;
143 
144   // Temporary basic block live-in cache.
145   DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
146 
147   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
148 
149   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
150 
151   // Return current region pressure.
152   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
153 
154   // Compute and cache live-ins and pressure for all regions in block.
155   void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
156 
157   // Update region boundaries when removing MI or inserting NewMI before MI.
158   void updateRegionBoundaries(
159       SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
160                                 MachineBasicBlock::iterator>> &RegionBoundaries,
161       MachineBasicBlock::iterator MI, MachineInstr *NewMI,
162       bool Removing = false);
163 
164   void runSchedStages();
165 
166 public:
167   GCNScheduleDAGMILive(MachineSchedContext *C,
168                        std::unique_ptr<MachineSchedStrategy> S);
169 
170   void schedule() override;
171 
172   void finalizeSchedule() override;
173 };
174 
175 // GCNSchedStrategy applies multiple scheduling stages to a function.
176 class GCNSchedStage {
177 protected:
178   GCNScheduleDAGMILive &DAG;
179 
180   GCNMaxOccupancySchedStrategy &S;
181 
182   MachineFunction &MF;
183 
184   SIMachineFunctionInfo &MFI;
185 
186   const GCNSubtarget &ST;
187 
188   const GCNSchedStageID StageID;
189 
190   // The current block being scheduled.
191   MachineBasicBlock *CurrentMBB = nullptr;
192 
193   // Current region index.
194   unsigned RegionIdx = 0;
195 
196   // Record the original order of instructions before scheduling.
197   std::vector<MachineInstr *> Unsched;
198 
199   // RP before scheduling the current region.
200   GCNRegPressure PressureBefore;
201 
202   // RP after scheduling the current region.
203   GCNRegPressure PressureAfter;
204 
205   GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
206 
207 public:
208   // Initialize state for a scheduling stage. Returns false if the current stage
209   // should be skipped.
210   virtual bool initGCNSchedStage();
211 
212   // Finalize state after finishing a scheduling pass on the function.
213   virtual void finalizeGCNSchedStage();
214 
215   // Setup for scheduling a region. Returns false if the current region should
216   // be skipped.
217   virtual bool initGCNRegion();
218 
219   // Track whether a new region is also a new MBB.
220   void setupNewBlock();
221 
222   // Finalize state after scheudling a region.
223   virtual void finalizeGCNRegion();
224 
225   // Check result of scheduling.
226   void checkScheduling();
227 
228   // Returns true if scheduling should be reverted.
229   virtual bool shouldRevertScheduling(unsigned WavesAfter);
230 
231   // Returns true if the new schedule may result in more spilling.
232   bool mayCauseSpilling(unsigned WavesAfter);
233 
234   // Attempt to revert scheduling for this region.
235   void revertScheduling();
236 
237   void advanceRegion() { RegionIdx++; }
238 
239   virtual ~GCNSchedStage() = default;
240 };
241 
242 class InitialScheduleStage : public GCNSchedStage {
243 public:
244   void finalizeGCNRegion() override;
245 
246   bool shouldRevertScheduling(unsigned WavesAfter) override;
247 
248   InitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
249       : GCNSchedStage(StageID, DAG) {}
250 };
251 
252 class UnclusteredRescheduleStage : public GCNSchedStage {
253 private:
254   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
255 
256 public:
257   bool initGCNSchedStage() override;
258 
259   void finalizeGCNSchedStage() override;
260 
261   bool initGCNRegion() override;
262 
263   bool shouldRevertScheduling(unsigned WavesAfter) override;
264 
265   UnclusteredRescheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
266       : GCNSchedStage(StageID, DAG) {}
267 };
268 
269 // Retry function scheduling if we found resulting occupancy and it is
270 // lower than used for other scheduling passes. This will give more freedom
271 // to schedule low register pressure blocks.
272 class ClusteredLowOccStage : public GCNSchedStage {
273 public:
274   bool initGCNSchedStage() override;
275 
276   bool initGCNRegion() override;
277 
278   bool shouldRevertScheduling(unsigned WavesAfter) override;
279 
280   ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
281       : GCNSchedStage(StageID, DAG) {}
282 };
283 
284 class PreRARematStage : public GCNSchedStage {
285 private:
286   // Each region at MinOccupancy will have their own list of trivially
287   // rematerializable instructions we can remat to reduce RP. The list maps an
288   // instruction to the position we should remat before, usually the MI using
289   // the rematerializable instruction.
290   MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
291       RematerializableInsts;
292 
293   // Map a trivially remateriazable def to a list of regions at MinOccupancy
294   // that has the defined reg as a live-in.
295   DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
296 
297   // Collect all trivially rematerializable VGPR instructions with a single def
298   // and single use outside the defining block into RematerializableInsts.
299   void collectRematerializableInstructions();
300 
301   bool isTriviallyReMaterializable(const MachineInstr &MI);
302 
303   // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
304   // Attempt to reduce RP of VGPR by sinking trivially rematerializable
305   // instructions. Returns true if we were able to sink instruction(s).
306   bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
307                                const TargetInstrInfo *TII);
308 
309 public:
310   bool initGCNSchedStage() override;
311 
312   bool initGCNRegion() override;
313 
314   bool shouldRevertScheduling(unsigned WavesAfter) override;
315 
316   PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
317       : GCNSchedStage(StageID, DAG) {}
318 };
319 
320 } // End namespace llvm
321 
322 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
323