1 //===---------------------------- GCNILPSched.cpp - -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CodeGen/ScheduleDAG.h"
14 #include "llvm/Support/Debug.h"
15 
16 using namespace llvm;
17 
18 #define DEBUG_TYPE "machine-scheduler"
19 
20 namespace {
21 
22 class GCNILPScheduler {
23   struct Candidate : ilist_node<Candidate> {
24     SUnit *SU;
25 
Candidate__anon7369b5dd0111::GCNILPScheduler::Candidate26     Candidate(SUnit *SU_)
27       : SU(SU_) {}
28   };
29 
30   SpecificBumpPtrAllocator<Candidate> Alloc;
31   typedef simple_ilist<Candidate> Queue;
32   Queue PendingQueue;
33   Queue AvailQueue;
34   unsigned CurQueueId = 0;
35 
36   std::vector<unsigned> SUNumbers;
37 
38   /// CurCycle - The current scheduler state corresponds to this cycle.
39   unsigned CurCycle = 0;
40 
41   unsigned getNodePriority(const SUnit *SU) const;
42 
43   const SUnit *pickBest(const SUnit *left, const SUnit *right);
44   Candidate* pickCandidate();
45 
46   void releasePending();
47   void advanceToCycle(unsigned NextCycle);
48   void releasePredecessors(const SUnit* SU);
49 
50 public:
51   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
52                                      const ScheduleDAG &DAG);
53 };
54 } // namespace
55 
56 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
57 /// Smaller number is the higher priority.
58 static unsigned
CalcNodeSethiUllmanNumber(const SUnit * SU,std::vector<unsigned> & SUNumbers)59 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
60   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
61   if (SethiUllmanNumber != 0)
62     return SethiUllmanNumber;
63 
64   unsigned Extra = 0;
65   for (const SDep &Pred : SU->Preds) {
66     if (Pred.isCtrl()) continue;  // ignore chain preds
67     SUnit *PredSU = Pred.getSUnit();
68     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
69     if (PredSethiUllman > SethiUllmanNumber) {
70       SethiUllmanNumber = PredSethiUllman;
71       Extra = 0;
72     }
73     else if (PredSethiUllman == SethiUllmanNumber)
74       ++Extra;
75   }
76 
77   SethiUllmanNumber += Extra;
78 
79   if (SethiUllmanNumber == 0)
80     SethiUllmanNumber = 1;
81 
82   return SethiUllmanNumber;
83 }
84 
85 // Lower priority means schedule further down. For bottom-up scheduling, lower
86 // priority SUs are scheduled before higher priority SUs.
getNodePriority(const SUnit * SU) const87 unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
88   assert(SU->NodeNum < SUNumbers.size());
89   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
90     // If SU does not have a register use, i.e. it doesn't produce a value
91     // that would be consumed (e.g. store), then it terminates a chain of
92     // computation.  Give it a large SethiUllman number so it will be
93     // scheduled right before its predecessors that it doesn't lengthen
94     // their live ranges.
95     return 0xffff;
96 
97   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
98     // If SU does not have a register def, schedule it close to its uses
99     // because it does not lengthen any live ranges.
100     return 0;
101 
102   return SUNumbers[SU->NodeNum];
103 }
104 
105 /// closestSucc - Returns the scheduled cycle of the successor which is
106 /// closest to the current cycle.
closestSucc(const SUnit * SU)107 static unsigned closestSucc(const SUnit *SU) {
108   unsigned MaxHeight = 0;
109   for (const SDep &Succ : SU->Succs) {
110     if (Succ.isCtrl()) continue;  // ignore chain succs
111     unsigned Height = Succ.getSUnit()->getHeight();
112     // If there are bunch of CopyToRegs stacked up, they should be considered
113     // to be at the same position.
114     if (Height > MaxHeight)
115       MaxHeight = Height;
116   }
117   return MaxHeight;
118 }
119 
120 /// calcMaxScratches - Returns an cost estimate of the worse case requirement
121 /// for scratch registers, i.e. number of data dependencies.
calcMaxScratches(const SUnit * SU)122 static unsigned calcMaxScratches(const SUnit *SU) {
123   unsigned Scratches = 0;
124   for (const SDep &Pred : SU->Preds) {
125     if (Pred.isCtrl()) continue;  // ignore chain preds
126     Scratches++;
127   }
128   return Scratches;
129 }
130 
131 // Return -1 if left has higher priority, 1 if right has higher priority.
132 // Return 0 if latency-based priority is equivalent.
BUCompareLatency(const SUnit * left,const SUnit * right)133 static int BUCompareLatency(const SUnit *left, const SUnit *right) {
134   // Scheduling an instruction that uses a VReg whose postincrement has not yet
135   // been scheduled will induce a copy. Model this as an extra cycle of latency.
136   int LHeight = (int)left->getHeight();
137   int RHeight = (int)right->getHeight();
138 
139   // If either node is scheduling for latency, sort them by height/depth
140   // and latency.
141 
142   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
143   // is enabled, grouping instructions by cycle, then its height is already
144   // covered so only its depth matters. We also reach this point if both stall
145   // but have the same height.
146   if (LHeight != RHeight)
147     return LHeight > RHeight ? 1 : -1;
148 
149   int LDepth = left->getDepth();
150   int RDepth = right->getDepth();
151   if (LDepth != RDepth) {
152     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
153                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
154                       << ") depth " << RDepth << "\n");
155     return LDepth < RDepth ? 1 : -1;
156   }
157   if (left->Latency != right->Latency)
158     return left->Latency > right->Latency ? 1 : -1;
159 
160   return 0;
161 }
162 
pickBest(const SUnit * left,const SUnit * right)163 const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
164 {
165   // TODO: add register pressure lowering checks
166 
167   bool const DisableSchedCriticalPath = false;
168   int MaxReorderWindow = 6;
169   if (!DisableSchedCriticalPath) {
170     int spread = (int)left->getDepth() - (int)right->getDepth();
171     if (std::abs(spread) > MaxReorderWindow) {
172       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
173                         << left->getDepth() << " != SU(" << right->NodeNum
174                         << "): " << right->getDepth() << "\n");
175       return left->getDepth() < right->getDepth() ? right : left;
176     }
177   }
178 
179   bool const DisableSchedHeight = false;
180   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
181     int spread = (int)left->getHeight() - (int)right->getHeight();
182     if (std::abs(spread) > MaxReorderWindow)
183       return left->getHeight() > right->getHeight() ? right : left;
184   }
185 
186   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
187   unsigned LPriority = getNodePriority(left);
188   unsigned RPriority = getNodePriority(right);
189 
190   if (LPriority != RPriority)
191     return LPriority > RPriority ? right : left;
192 
193   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
194   // e.g.
195   // t1 = op t2, c1
196   // t3 = op t4, c2
197   //
198   // and the following instructions are both ready.
199   // t2 = op c3
200   // t4 = op c4
201   //
202   // Then schedule t2 = op first.
203   // i.e.
204   // t4 = op c4
205   // t2 = op c3
206   // t1 = op t2, c1
207   // t3 = op t4, c2
208   //
209   // This creates more short live intervals.
210   unsigned LDist = closestSucc(left);
211   unsigned RDist = closestSucc(right);
212   if (LDist != RDist)
213     return LDist < RDist ? right : left;
214 
215   // How many registers becomes live when the node is scheduled.
216   unsigned LScratch = calcMaxScratches(left);
217   unsigned RScratch = calcMaxScratches(right);
218   if (LScratch != RScratch)
219     return LScratch > RScratch ? right : left;
220 
221   bool const DisableSchedCycles = false;
222   if (!DisableSchedCycles) {
223     int result = BUCompareLatency(left, right);
224     if (result != 0)
225       return result > 0 ? right : left;
226     return left;
227   }
228   else {
229     if (left->getHeight() != right->getHeight())
230       return (left->getHeight() > right->getHeight()) ? right : left;
231 
232     if (left->getDepth() != right->getDepth())
233       return (left->getDepth() < right->getDepth()) ? right : left;
234   }
235 
236   assert(left->NodeQueueId && right->NodeQueueId &&
237         "NodeQueueId cannot be zero");
238   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
239 }
240 
pickCandidate()241 GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
242   if (AvailQueue.empty())
243     return nullptr;
244   auto Best = AvailQueue.begin();
245   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
246     auto NewBestSU = pickBest(Best->SU, I->SU);
247     if (NewBestSU != Best->SU) {
248       assert(NewBestSU == I->SU);
249       Best = I;
250     }
251   }
252   return &*Best;
253 }
254 
releasePending()255 void GCNILPScheduler::releasePending() {
256   // Check to see if any of the pending instructions are ready to issue.  If
257   // so, add them to the available queue.
258   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
259     auto &C = *I++;
260     if (C.SU->getHeight() <= CurCycle) {
261       PendingQueue.remove(C);
262       AvailQueue.push_back(C);
263       C.SU->NodeQueueId = CurQueueId++;
264     }
265   }
266 }
267 
268 /// Move the scheduler state forward by the specified number of Cycles.
advanceToCycle(unsigned NextCycle)269 void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
270   if (NextCycle <= CurCycle)
271     return;
272   CurCycle = NextCycle;
273   releasePending();
274 }
275 
releasePredecessors(const SUnit * SU)276 void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
277   for (const auto &PredEdge : SU->Preds) {
278     auto PredSU = PredEdge.getSUnit();
279     if (PredEdge.isWeak())
280       continue;
281     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
282 
283     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
284 
285     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
286       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
287   }
288 }
289 
290 std::vector<const SUnit*>
schedule(ArrayRef<const SUnit * > BotRoots,const ScheduleDAG & DAG)291 GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
292                           const ScheduleDAG &DAG) {
293   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
294 
295   std::vector<SUnit> SUSavedCopy;
296   SUSavedCopy.resize(SUnits.size());
297 
298   // we cannot save only those fields we touch: some of them are private
299   // so save units verbatim: this assumes SUnit should have value semantics
300   for (const SUnit &SU : SUnits)
301     SUSavedCopy[SU.NodeNum] = SU;
302 
303   SUNumbers.assign(SUnits.size(), 0);
304   for (const SUnit &SU : SUnits)
305     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
306 
307   for (auto SU : BotRoots) {
308     AvailQueue.push_back(
309       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
310   }
311   releasePredecessors(&DAG.ExitSU);
312 
313   std::vector<const SUnit*> Schedule;
314   Schedule.reserve(SUnits.size());
315   while (true) {
316     if (AvailQueue.empty() && !PendingQueue.empty()) {
317       auto EarliestSU = std::min_element(
318         PendingQueue.begin(), PendingQueue.end(),
319         [=](const Candidate& C1, const Candidate& C2) {
320         return C1.SU->getHeight() < C2.SU->getHeight();
321       })->SU;
322       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
323     }
324     if (AvailQueue.empty())
325       break;
326 
327     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
328                          "Ready queue:";
329                for (auto &C
330                     : AvailQueue) dbgs()
331                << ' ' << C.SU->NodeNum;
332                dbgs() << '\n';);
333 
334     auto C = pickCandidate();
335     assert(C);
336     AvailQueue.remove(*C);
337     auto SU = C->SU;
338     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
339 
340     advanceToCycle(SU->getHeight());
341 
342     releasePredecessors(SU);
343     Schedule.push_back(SU);
344     SU->isScheduled = true;
345   }
346   assert(SUnits.size() == Schedule.size());
347 
348   std::reverse(Schedule.begin(), Schedule.end());
349 
350   // restore units
351   for (auto &SU : SUnits)
352     SU = SUSavedCopy[SU.NodeNum];
353 
354   return Schedule;
355 }
356 
357 namespace llvm {
makeGCNILPScheduler(ArrayRef<const SUnit * > BotRoots,const ScheduleDAG & DAG)358 std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
359                                               const ScheduleDAG &DAG) {
360   GCNILPScheduler S;
361   return S.schedule(BotRoots, DAG);
362 }
363 }
364