1 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15 
16 #include "GCNRegPressure.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/CodeGen/MachineScheduler.h"
19 
20 namespace llvm {
21 
22 class SIMachineFunctionInfo;
23 class SIRegisterInfo;
24 class GCNSubtarget;
25 class GCNSchedStage;
26 
27 enum class GCNSchedStageID : unsigned {
28   OccInitialSchedule = 0,
29   UnclusteredHighRPReschedule = 1,
30   ClusteredLowOccupancyReschedule = 2,
31   PreRARematerialize = 3,
32   ILPInitialSchedule = 4
33 };
34 
35 #ifndef NDEBUG
36 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
37 #endif
38 
39 /// This is a minimal scheduler strategy.  The main difference between this
40 /// and the GenericScheduler is that GCNSchedStrategy uses different
41 /// heuristics to determine excess/critical pressure sets.
42 class GCNSchedStrategy : public GenericScheduler {
43 protected:
44   SUnit *pickNodeBidirectional(bool &IsTopNode);
45 
46   void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
47                          const RegPressureTracker &RPTracker,
48                          SchedCandidate &Cand);
49 
50   void initCandidate(SchedCandidate &Cand, SUnit *SU,
51                      bool AtTop, const RegPressureTracker &RPTracker,
52                      const SIRegisterInfo *SRI,
53                      unsigned SGPRPressure, unsigned VGPRPressure);
54 
55   std::vector<unsigned> Pressure;
56 
57   std::vector<unsigned> MaxPressure;
58 
59   unsigned SGPRExcessLimit;
60 
61   unsigned VGPRExcessLimit;
62 
63   unsigned TargetOccupancy;
64 
65   MachineFunction *MF;
66 
67   // Scheduling stages for this strategy.
68   SmallVector<GCNSchedStageID, 4> SchedStages;
69 
70   // Pointer to the current SchedStageID.
71   SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;
72 
73 public:
74   // schedule() have seen register pressure over the critical limits and had to
75   // track register pressure for actual scheduling heuristics.
76   bool HasHighPressure;
77 
78   // Schedule known to have excess register pressure. Be more conservative in
79   // increasing ILP and preserving VGPRs.
80   bool KnownExcessRP = false;
81 
82   // An error margin is necessary because of poor performance of the generic RP
83   // tracker and can be adjusted up for tuning heuristics to try and more
84   // aggressively reduce register pressure.
85   unsigned ErrorMargin = 3;
86 
87   // Bias for SGPR limits under a high register pressure.
88   const unsigned HighRPSGPRBias = 7;
89 
90   // Bias for VGPR limits under a high register pressure.
91   const unsigned HighRPVGPRBias = 7;
92 
93   unsigned SGPRCriticalLimit;
94 
95   unsigned VGPRCriticalLimit;
96 
97   unsigned SGPRLimitBias = 0;
98 
99   unsigned VGPRLimitBias = 0;
100 
101   GCNSchedStrategy(const MachineSchedContext *C);
102 
103   SUnit *pickNode(bool &IsTopNode) override;
104 
105   void initialize(ScheduleDAGMI *DAG) override;
106 
107   unsigned getTargetOccupancy() { return TargetOccupancy; }
108 
109   void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
110 
111   GCNSchedStageID getCurrentStage();
112 
113   // Advances stage. Returns true if there are remaining stages.
114   bool advanceStage();
115 
116   bool hasNextStage() const;
117 
118   GCNSchedStageID getNextStage() const;
119 };
120 
121 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
122 /// maximum number of waves per simd).
123 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
124 public:
125   GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
126 };
127 
128 /// The goal of this scheduling strategy is to maximize ILP for a single wave
129 /// (i.e. latency hiding).
130 class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
131 protected:
132   bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
133                     SchedBoundary *Zone) const override;
134 
135 public:
136   GCNMaxILPSchedStrategy(const MachineSchedContext *C);
137 };
138 
139 class ScheduleMetrics {
140   unsigned ScheduleLength;
141   unsigned BubbleCycles;
142 
143 public:
144   ScheduleMetrics() {}
145   ScheduleMetrics(unsigned L, unsigned BC)
146       : ScheduleLength(L), BubbleCycles(BC) {}
147   unsigned getLength() const { return ScheduleLength; }
148   unsigned getBubbles() const { return BubbleCycles; }
149   unsigned getMetric() const {
150     unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
151     // Metric is zero if the amount of bubbles is less than 1% which is too
152     // small. So, return 1.
153     return Metric ? Metric : 1;
154   }
155   static const unsigned ScaleFactor;
156 };
157 
158 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
159   dbgs() << "\n Schedule Metric (scaled by "
160          << ScheduleMetrics::ScaleFactor
161          << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
162          << Sm.getLength() << " ]\n";
163   return OS;
164 }
165 
166 class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
167   friend class GCNSchedStage;
168   friend class OccInitialScheduleStage;
169   friend class UnclusteredHighRPStage;
170   friend class ClusteredLowOccStage;
171   friend class PreRARematStage;
172   friend class ILPInitialScheduleStage;
173 
174   const GCNSubtarget &ST;
175 
176   SIMachineFunctionInfo &MFI;
177 
178   // Occupancy target at the beginning of function scheduling cycle.
179   unsigned StartingOccupancy;
180 
181   // Minimal real occupancy recorder for the function.
182   unsigned MinOccupancy;
183 
184   // Vector of regions recorder for later rescheduling
185   SmallVector<std::pair<MachineBasicBlock::iterator,
186                         MachineBasicBlock::iterator>, 32> Regions;
187 
188   // Records if a region is not yet scheduled, or schedule has been reverted,
189   // or we generally desire to reschedule it.
190   BitVector RescheduleRegions;
191 
192   // Record regions with high register pressure.
193   BitVector RegionsWithHighRP;
194 
195   // Record regions with excess register pressure over the physical register
196   // limit. Register pressure in these regions usually will result in spilling.
197   BitVector RegionsWithExcessRP;
198 
199   // Regions that has the same occupancy as the latest MinOccupancy
200   BitVector RegionsWithMinOcc;
201 
202   // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
203   BitVector RegionsWithIGLPInstrs;
204 
205   // Region live-in cache.
206   SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;
207 
208   // Region pressure cache.
209   SmallVector<GCNRegPressure, 32> Pressure;
210 
211   // Temporary basic block live-in cache.
212   DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;
213 
214   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;
215 
216   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;
217 
218   // Return current region pressure.
219   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
220 
221   // Compute and cache live-ins and pressure for all regions in block.
222   void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
223 
224   // Update region boundaries when removing MI or inserting NewMI before MI.
225   void updateRegionBoundaries(
226       SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
227                                 MachineBasicBlock::iterator>> &RegionBoundaries,
228       MachineBasicBlock::iterator MI, MachineInstr *NewMI,
229       bool Removing = false);
230 
231   void runSchedStages();
232 
233   std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
234 
235 public:
236   GCNScheduleDAGMILive(MachineSchedContext *C,
237                        std::unique_ptr<MachineSchedStrategy> S);
238 
239   void schedule() override;
240 
241   void finalizeSchedule() override;
242 };
243 
244 // GCNSchedStrategy applies multiple scheduling stages to a function.
245 class GCNSchedStage {
246 protected:
247   GCNScheduleDAGMILive &DAG;
248 
249   GCNSchedStrategy &S;
250 
251   MachineFunction &MF;
252 
253   SIMachineFunctionInfo &MFI;
254 
255   const GCNSubtarget &ST;
256 
257   const GCNSchedStageID StageID;
258 
259   // The current block being scheduled.
260   MachineBasicBlock *CurrentMBB = nullptr;
261 
262   // Current region index.
263   unsigned RegionIdx = 0;
264 
265   // Record the original order of instructions before scheduling.
266   std::vector<MachineInstr *> Unsched;
267 
268   // RP before scheduling the current region.
269   GCNRegPressure PressureBefore;
270 
271   // RP after scheduling the current region.
272   GCNRegPressure PressureAfter;
273 
274   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
275 
276   GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);
277 
278 public:
279   // Initialize state for a scheduling stage. Returns false if the current stage
280   // should be skipped.
281   virtual bool initGCNSchedStage();
282 
283   // Finalize state after finishing a scheduling pass on the function.
284   virtual void finalizeGCNSchedStage();
285 
286   // Setup for scheduling a region. Returns false if the current region should
287   // be skipped.
288   virtual bool initGCNRegion();
289 
290   // Track whether a new region is also a new MBB.
291   void setupNewBlock();
292 
293   // Finalize state after scheudling a region.
294   void finalizeGCNRegion();
295 
296   // Check result of scheduling.
297   void checkScheduling();
298 
299   // computes the given schedule virtual execution time in clocks
300   ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
301   ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
302   unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
303                                   DenseMap<unsigned, unsigned> &ReadyCycles,
304                                   const TargetSchedModel &SM);
305 
306   // Returns true if scheduling should be reverted.
307   virtual bool shouldRevertScheduling(unsigned WavesAfter);
308 
309   // Returns true if current region has known excess pressure.
310   bool isRegionWithExcessRP() const {
311     return DAG.RegionsWithExcessRP[RegionIdx];
312   }
313 
314   // Returns true if the new schedule may result in more spilling.
315   bool mayCauseSpilling(unsigned WavesAfter);
316 
317   // Attempt to revert scheduling for this region.
318   void revertScheduling();
319 
320   void advanceRegion() { RegionIdx++; }
321 
322   virtual ~GCNSchedStage() = default;
323 };
324 
325 class OccInitialScheduleStage : public GCNSchedStage {
326 public:
327   bool shouldRevertScheduling(unsigned WavesAfter) override;
328 
329   OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
330       : GCNSchedStage(StageID, DAG) {}
331 };
332 
333 class UnclusteredHighRPStage : public GCNSchedStage {
334 private:
335   // Save the initial occupancy before starting this stage.
336   unsigned InitialOccupancy;
337 
338 public:
339   bool initGCNSchedStage() override;
340 
341   void finalizeGCNSchedStage() override;
342 
343   bool initGCNRegion() override;
344 
345   bool shouldRevertScheduling(unsigned WavesAfter) override;
346 
347   UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
348       : GCNSchedStage(StageID, DAG) {}
349 };
350 
351 // Retry function scheduling if we found resulting occupancy and it is
352 // lower than used for other scheduling passes. This will give more freedom
353 // to schedule low register pressure blocks.
354 class ClusteredLowOccStage : public GCNSchedStage {
355 public:
356   bool initGCNSchedStage() override;
357 
358   bool initGCNRegion() override;
359 
360   bool shouldRevertScheduling(unsigned WavesAfter) override;
361 
362   ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
363       : GCNSchedStage(StageID, DAG) {}
364 };
365 
366 class PreRARematStage : public GCNSchedStage {
367 private:
368   // Each region at MinOccupancy will have their own list of trivially
369   // rematerializable instructions we can remat to reduce RP. The list maps an
370   // instruction to the position we should remat before, usually the MI using
371   // the rematerializable instruction.
372   MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
373       RematerializableInsts;
374 
375   // Map a trivially remateriazable def to a list of regions at MinOccupancy
376   // that has the defined reg as a live-in.
377   DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;
378 
379   // Collect all trivially rematerializable VGPR instructions with a single def
380   // and single use outside the defining block into RematerializableInsts.
381   void collectRematerializableInstructions();
382 
383   bool isTriviallyReMaterializable(const MachineInstr &MI);
384 
385   // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
386   // Attempt to reduce RP of VGPR by sinking trivially rematerializable
387   // instructions. Returns true if we were able to sink instruction(s).
388   bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
389                                const TargetInstrInfo *TII);
390 
391 public:
392   bool initGCNSchedStage() override;
393 
394   bool initGCNRegion() override;
395 
396   bool shouldRevertScheduling(unsigned WavesAfter) override;
397 
398   PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
399       : GCNSchedStage(StageID, DAG) {}
400 };
401 
402 class ILPInitialScheduleStage : public GCNSchedStage {
403 public:
404   bool shouldRevertScheduling(unsigned WavesAfter) override;
405 
406   ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
407       : GCNSchedStage(StageID, DAG) {}
408 };
409 
410 class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
411 private:
412   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
413 
414   bool HasIGLPInstrs = false;
415 
416 public:
417   void schedule() override;
418 
419   void finalizeSchedule() override;
420 
421   GCNPostScheduleDAGMILive(MachineSchedContext *C,
422                            std::unique_ptr<MachineSchedStrategy> S,
423                            bool RemoveKillFlags);
424 };
425 
426 } // End namespace llvm
427 
428 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
429