109467b48Spatrick //===---------------------------- GCNILPSched.cpp - -----------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick /// \file
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick 
1309467b48Spatrick #include "llvm/CodeGen/ScheduleDAG.h"
1409467b48Spatrick 
1509467b48Spatrick using namespace llvm;
1609467b48Spatrick 
1709467b48Spatrick #define DEBUG_TYPE "machine-scheduler"
1809467b48Spatrick 
1909467b48Spatrick namespace {
2009467b48Spatrick 
2109467b48Spatrick class GCNILPScheduler {
2209467b48Spatrick   struct Candidate : ilist_node<Candidate> {
2309467b48Spatrick     SUnit *SU;
2409467b48Spatrick 
Candidate__anonfecd50e40111::GCNILPScheduler::Candidate2509467b48Spatrick     Candidate(SUnit *SU_)
2609467b48Spatrick       : SU(SU_) {}
2709467b48Spatrick   };
2809467b48Spatrick 
2909467b48Spatrick   SpecificBumpPtrAllocator<Candidate> Alloc;
3009467b48Spatrick   typedef simple_ilist<Candidate> Queue;
3109467b48Spatrick   Queue PendingQueue;
3209467b48Spatrick   Queue AvailQueue;
3309467b48Spatrick   unsigned CurQueueId = 0;
3409467b48Spatrick 
3509467b48Spatrick   std::vector<unsigned> SUNumbers;
3609467b48Spatrick 
3709467b48Spatrick   /// CurCycle - The current scheduler state corresponds to this cycle.
3809467b48Spatrick   unsigned CurCycle = 0;
3909467b48Spatrick 
4009467b48Spatrick   unsigned getNodePriority(const SUnit *SU) const;
4109467b48Spatrick 
4209467b48Spatrick   const SUnit *pickBest(const SUnit *left, const SUnit *right);
4309467b48Spatrick   Candidate* pickCandidate();
4409467b48Spatrick 
4509467b48Spatrick   void releasePending();
4609467b48Spatrick   void advanceToCycle(unsigned NextCycle);
4709467b48Spatrick   void releasePredecessors(const SUnit* SU);
4809467b48Spatrick 
4909467b48Spatrick public:
5009467b48Spatrick   std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
5109467b48Spatrick                                      const ScheduleDAG &DAG);
5209467b48Spatrick };
5309467b48Spatrick } // namespace
5409467b48Spatrick 
5509467b48Spatrick /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
5609467b48Spatrick /// Smaller number is the higher priority.
5709467b48Spatrick static unsigned
CalcNodeSethiUllmanNumber(const SUnit * SU,std::vector<unsigned> & SUNumbers)5809467b48Spatrick CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
5909467b48Spatrick   unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
6009467b48Spatrick   if (SethiUllmanNumber != 0)
6109467b48Spatrick     return SethiUllmanNumber;
6209467b48Spatrick 
6309467b48Spatrick   unsigned Extra = 0;
6409467b48Spatrick   for (const SDep &Pred : SU->Preds) {
6509467b48Spatrick     if (Pred.isCtrl()) continue;  // ignore chain preds
6609467b48Spatrick     SUnit *PredSU = Pred.getSUnit();
6709467b48Spatrick     unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
6809467b48Spatrick     if (PredSethiUllman > SethiUllmanNumber) {
6909467b48Spatrick       SethiUllmanNumber = PredSethiUllman;
7009467b48Spatrick       Extra = 0;
7109467b48Spatrick     }
7209467b48Spatrick     else if (PredSethiUllman == SethiUllmanNumber)
7309467b48Spatrick       ++Extra;
7409467b48Spatrick   }
7509467b48Spatrick 
7609467b48Spatrick   SethiUllmanNumber += Extra;
7709467b48Spatrick 
7809467b48Spatrick   if (SethiUllmanNumber == 0)
7909467b48Spatrick     SethiUllmanNumber = 1;
8009467b48Spatrick 
8109467b48Spatrick   return SethiUllmanNumber;
8209467b48Spatrick }
8309467b48Spatrick 
8409467b48Spatrick // Lower priority means schedule further down. For bottom-up scheduling, lower
8509467b48Spatrick // priority SUs are scheduled before higher priority SUs.
getNodePriority(const SUnit * SU) const8609467b48Spatrick unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
8709467b48Spatrick   assert(SU->NodeNum < SUNumbers.size());
8809467b48Spatrick   if (SU->NumSuccs == 0 && SU->NumPreds != 0)
8909467b48Spatrick     // If SU does not have a register use, i.e. it doesn't produce a value
9009467b48Spatrick     // that would be consumed (e.g. store), then it terminates a chain of
9109467b48Spatrick     // computation.  Give it a large SethiUllman number so it will be
9209467b48Spatrick     // scheduled right before its predecessors that it doesn't lengthen
9309467b48Spatrick     // their live ranges.
9409467b48Spatrick     return 0xffff;
9509467b48Spatrick 
9609467b48Spatrick   if (SU->NumPreds == 0 && SU->NumSuccs != 0)
9709467b48Spatrick     // If SU does not have a register def, schedule it close to its uses
9809467b48Spatrick     // because it does not lengthen any live ranges.
9909467b48Spatrick     return 0;
10009467b48Spatrick 
10109467b48Spatrick   return SUNumbers[SU->NodeNum];
10209467b48Spatrick }
10309467b48Spatrick 
10409467b48Spatrick /// closestSucc - Returns the scheduled cycle of the successor which is
10509467b48Spatrick /// closest to the current cycle.
closestSucc(const SUnit * SU)10609467b48Spatrick static unsigned closestSucc(const SUnit *SU) {
10709467b48Spatrick   unsigned MaxHeight = 0;
10809467b48Spatrick   for (const SDep &Succ : SU->Succs) {
10909467b48Spatrick     if (Succ.isCtrl()) continue;  // ignore chain succs
11009467b48Spatrick     unsigned Height = Succ.getSUnit()->getHeight();
11109467b48Spatrick     // If there are bunch of CopyToRegs stacked up, they should be considered
11209467b48Spatrick     // to be at the same position.
11309467b48Spatrick     if (Height > MaxHeight)
11409467b48Spatrick       MaxHeight = Height;
11509467b48Spatrick   }
11609467b48Spatrick   return MaxHeight;
11709467b48Spatrick }
11809467b48Spatrick 
11909467b48Spatrick /// calcMaxScratches - Returns an cost estimate of the worse case requirement
12009467b48Spatrick /// for scratch registers, i.e. number of data dependencies.
calcMaxScratches(const SUnit * SU)12109467b48Spatrick static unsigned calcMaxScratches(const SUnit *SU) {
12209467b48Spatrick   unsigned Scratches = 0;
12309467b48Spatrick   for (const SDep &Pred : SU->Preds) {
12409467b48Spatrick     if (Pred.isCtrl()) continue;  // ignore chain preds
12509467b48Spatrick     Scratches++;
12609467b48Spatrick   }
12709467b48Spatrick   return Scratches;
12809467b48Spatrick }
12909467b48Spatrick 
13009467b48Spatrick // Return -1 if left has higher priority, 1 if right has higher priority.
13109467b48Spatrick // Return 0 if latency-based priority is equivalent.
BUCompareLatency(const SUnit * left,const SUnit * right)13209467b48Spatrick static int BUCompareLatency(const SUnit *left, const SUnit *right) {
13309467b48Spatrick   // Scheduling an instruction that uses a VReg whose postincrement has not yet
13409467b48Spatrick   // been scheduled will induce a copy. Model this as an extra cycle of latency.
13509467b48Spatrick   int LHeight = (int)left->getHeight();
13609467b48Spatrick   int RHeight = (int)right->getHeight();
13709467b48Spatrick 
13809467b48Spatrick   // If either node is scheduling for latency, sort them by height/depth
13909467b48Spatrick   // and latency.
14009467b48Spatrick 
14109467b48Spatrick   // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
14209467b48Spatrick   // is enabled, grouping instructions by cycle, then its height is already
14309467b48Spatrick   // covered so only its depth matters. We also reach this point if both stall
14409467b48Spatrick   // but have the same height.
14509467b48Spatrick   if (LHeight != RHeight)
14609467b48Spatrick     return LHeight > RHeight ? 1 : -1;
14709467b48Spatrick 
14809467b48Spatrick   int LDepth = left->getDepth();
14909467b48Spatrick   int RDepth = right->getDepth();
15009467b48Spatrick   if (LDepth != RDepth) {
15109467b48Spatrick     LLVM_DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
15209467b48Spatrick                       << ") depth " << LDepth << " vs SU (" << right->NodeNum
15309467b48Spatrick                       << ") depth " << RDepth << "\n");
15409467b48Spatrick     return LDepth < RDepth ? 1 : -1;
15509467b48Spatrick   }
15609467b48Spatrick   if (left->Latency != right->Latency)
15709467b48Spatrick     return left->Latency > right->Latency ? 1 : -1;
15809467b48Spatrick 
15909467b48Spatrick   return 0;
16009467b48Spatrick }
16109467b48Spatrick 
pickBest(const SUnit * left,const SUnit * right)16209467b48Spatrick const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
16309467b48Spatrick {
16409467b48Spatrick   // TODO: add register pressure lowering checks
16509467b48Spatrick 
16609467b48Spatrick   bool const DisableSchedCriticalPath = false;
16709467b48Spatrick   int MaxReorderWindow = 6;
16809467b48Spatrick   if (!DisableSchedCriticalPath) {
16909467b48Spatrick     int spread = (int)left->getDepth() - (int)right->getDepth();
17009467b48Spatrick     if (std::abs(spread) > MaxReorderWindow) {
17109467b48Spatrick       LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
17209467b48Spatrick                         << left->getDepth() << " != SU(" << right->NodeNum
17309467b48Spatrick                         << "): " << right->getDepth() << "\n");
17409467b48Spatrick       return left->getDepth() < right->getDepth() ? right : left;
17509467b48Spatrick     }
17609467b48Spatrick   }
17709467b48Spatrick 
17809467b48Spatrick   bool const DisableSchedHeight = false;
17909467b48Spatrick   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
18009467b48Spatrick     int spread = (int)left->getHeight() - (int)right->getHeight();
18109467b48Spatrick     if (std::abs(spread) > MaxReorderWindow)
18209467b48Spatrick       return left->getHeight() > right->getHeight() ? right : left;
18309467b48Spatrick   }
18409467b48Spatrick 
18509467b48Spatrick   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
18609467b48Spatrick   unsigned LPriority = getNodePriority(left);
18709467b48Spatrick   unsigned RPriority = getNodePriority(right);
18809467b48Spatrick 
18909467b48Spatrick   if (LPriority != RPriority)
19009467b48Spatrick     return LPriority > RPriority ? right : left;
19109467b48Spatrick 
19209467b48Spatrick   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
19309467b48Spatrick   // e.g.
19409467b48Spatrick   // t1 = op t2, c1
19509467b48Spatrick   // t3 = op t4, c2
19609467b48Spatrick   //
19709467b48Spatrick   // and the following instructions are both ready.
19809467b48Spatrick   // t2 = op c3
19909467b48Spatrick   // t4 = op c4
20009467b48Spatrick   //
20109467b48Spatrick   // Then schedule t2 = op first.
20209467b48Spatrick   // i.e.
20309467b48Spatrick   // t4 = op c4
20409467b48Spatrick   // t2 = op c3
20509467b48Spatrick   // t1 = op t2, c1
20609467b48Spatrick   // t3 = op t4, c2
20709467b48Spatrick   //
20809467b48Spatrick   // This creates more short live intervals.
20909467b48Spatrick   unsigned LDist = closestSucc(left);
21009467b48Spatrick   unsigned RDist = closestSucc(right);
21109467b48Spatrick   if (LDist != RDist)
21209467b48Spatrick     return LDist < RDist ? right : left;
21309467b48Spatrick 
21409467b48Spatrick   // How many registers becomes live when the node is scheduled.
21509467b48Spatrick   unsigned LScratch = calcMaxScratches(left);
21609467b48Spatrick   unsigned RScratch = calcMaxScratches(right);
21709467b48Spatrick   if (LScratch != RScratch)
21809467b48Spatrick     return LScratch > RScratch ? right : left;
21909467b48Spatrick 
22009467b48Spatrick   bool const DisableSchedCycles = false;
22109467b48Spatrick   if (!DisableSchedCycles) {
22209467b48Spatrick     int result = BUCompareLatency(left, right);
22309467b48Spatrick     if (result != 0)
22409467b48Spatrick       return result > 0 ? right : left;
22509467b48Spatrick     return left;
22609467b48Spatrick   }
22709467b48Spatrick   else {
22809467b48Spatrick     if (left->getHeight() != right->getHeight())
22909467b48Spatrick       return (left->getHeight() > right->getHeight()) ? right : left;
23009467b48Spatrick 
23109467b48Spatrick     if (left->getDepth() != right->getDepth())
23209467b48Spatrick       return (left->getDepth() < right->getDepth()) ? right : left;
23309467b48Spatrick   }
23409467b48Spatrick 
23509467b48Spatrick   assert(left->NodeQueueId && right->NodeQueueId &&
23609467b48Spatrick         "NodeQueueId cannot be zero");
23709467b48Spatrick   return (left->NodeQueueId > right->NodeQueueId) ? right : left;
23809467b48Spatrick }
23909467b48Spatrick 
pickCandidate()24009467b48Spatrick GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
24109467b48Spatrick   if (AvailQueue.empty())
24209467b48Spatrick     return nullptr;
24309467b48Spatrick   auto Best = AvailQueue.begin();
24409467b48Spatrick   for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
24509467b48Spatrick     auto NewBestSU = pickBest(Best->SU, I->SU);
24609467b48Spatrick     if (NewBestSU != Best->SU) {
24709467b48Spatrick       assert(NewBestSU == I->SU);
24809467b48Spatrick       Best = I;
24909467b48Spatrick     }
25009467b48Spatrick   }
25109467b48Spatrick   return &*Best;
25209467b48Spatrick }
25309467b48Spatrick 
releasePending()25409467b48Spatrick void GCNILPScheduler::releasePending() {
25509467b48Spatrick   // Check to see if any of the pending instructions are ready to issue.  If
25609467b48Spatrick   // so, add them to the available queue.
25709467b48Spatrick   for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
25809467b48Spatrick     auto &C = *I++;
25909467b48Spatrick     if (C.SU->getHeight() <= CurCycle) {
26009467b48Spatrick       PendingQueue.remove(C);
26109467b48Spatrick       AvailQueue.push_back(C);
26209467b48Spatrick       C.SU->NodeQueueId = CurQueueId++;
26309467b48Spatrick     }
26409467b48Spatrick   }
26509467b48Spatrick }
26609467b48Spatrick 
26709467b48Spatrick /// Move the scheduler state forward by the specified number of Cycles.
advanceToCycle(unsigned NextCycle)26809467b48Spatrick void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
26909467b48Spatrick   if (NextCycle <= CurCycle)
27009467b48Spatrick     return;
27109467b48Spatrick   CurCycle = NextCycle;
27209467b48Spatrick   releasePending();
27309467b48Spatrick }
27409467b48Spatrick 
releasePredecessors(const SUnit * SU)27509467b48Spatrick void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
27609467b48Spatrick   for (const auto &PredEdge : SU->Preds) {
27709467b48Spatrick     auto PredSU = PredEdge.getSUnit();
27809467b48Spatrick     if (PredEdge.isWeak())
27909467b48Spatrick       continue;
28009467b48Spatrick     assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
28109467b48Spatrick 
28209467b48Spatrick     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
28309467b48Spatrick 
28409467b48Spatrick     if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
28509467b48Spatrick       PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
28609467b48Spatrick   }
28709467b48Spatrick }
28809467b48Spatrick 
28909467b48Spatrick std::vector<const SUnit*>
schedule(ArrayRef<const SUnit * > BotRoots,const ScheduleDAG & DAG)29009467b48Spatrick GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
29109467b48Spatrick                           const ScheduleDAG &DAG) {
29209467b48Spatrick   auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
29309467b48Spatrick 
29409467b48Spatrick   std::vector<SUnit> SUSavedCopy;
29509467b48Spatrick   SUSavedCopy.resize(SUnits.size());
29609467b48Spatrick 
29709467b48Spatrick   // we cannot save only those fields we touch: some of them are private
29809467b48Spatrick   // so save units verbatim: this assumes SUnit should have value semantics
29909467b48Spatrick   for (const SUnit &SU : SUnits)
30009467b48Spatrick     SUSavedCopy[SU.NodeNum] = SU;
30109467b48Spatrick 
30209467b48Spatrick   SUNumbers.assign(SUnits.size(), 0);
30309467b48Spatrick   for (const SUnit &SU : SUnits)
30409467b48Spatrick     CalcNodeSethiUllmanNumber(&SU, SUNumbers);
30509467b48Spatrick 
306*d415bd75Srobert   for (const auto *SU : BotRoots) {
30709467b48Spatrick     AvailQueue.push_back(
30809467b48Spatrick       *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
30909467b48Spatrick   }
31009467b48Spatrick   releasePredecessors(&DAG.ExitSU);
31109467b48Spatrick 
31209467b48Spatrick   std::vector<const SUnit*> Schedule;
31309467b48Spatrick   Schedule.reserve(SUnits.size());
31409467b48Spatrick   while (true) {
31509467b48Spatrick     if (AvailQueue.empty() && !PendingQueue.empty()) {
31609467b48Spatrick       auto EarliestSU = std::min_element(
31709467b48Spatrick         PendingQueue.begin(), PendingQueue.end(),
31809467b48Spatrick         [=](const Candidate& C1, const Candidate& C2) {
31909467b48Spatrick         return C1.SU->getHeight() < C2.SU->getHeight();
32009467b48Spatrick       })->SU;
32109467b48Spatrick       advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
32209467b48Spatrick     }
32309467b48Spatrick     if (AvailQueue.empty())
32409467b48Spatrick       break;
32509467b48Spatrick 
32609467b48Spatrick     LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
32709467b48Spatrick                          "Ready queue:";
32809467b48Spatrick                for (auto &C
32909467b48Spatrick                     : AvailQueue) dbgs()
33009467b48Spatrick                << ' ' << C.SU->NodeNum;
33109467b48Spatrick                dbgs() << '\n';);
33209467b48Spatrick 
33309467b48Spatrick     auto C = pickCandidate();
33409467b48Spatrick     assert(C);
33509467b48Spatrick     AvailQueue.remove(*C);
33609467b48Spatrick     auto SU = C->SU;
33709467b48Spatrick     LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
33809467b48Spatrick 
33909467b48Spatrick     advanceToCycle(SU->getHeight());
34009467b48Spatrick 
34109467b48Spatrick     releasePredecessors(SU);
34209467b48Spatrick     Schedule.push_back(SU);
34309467b48Spatrick     SU->isScheduled = true;
34409467b48Spatrick   }
34509467b48Spatrick   assert(SUnits.size() == Schedule.size());
34609467b48Spatrick 
34709467b48Spatrick   std::reverse(Schedule.begin(), Schedule.end());
34809467b48Spatrick 
34909467b48Spatrick   // restore units
35009467b48Spatrick   for (auto &SU : SUnits)
35109467b48Spatrick     SU = SUSavedCopy[SU.NodeNum];
35209467b48Spatrick 
35309467b48Spatrick   return Schedule;
35409467b48Spatrick }
35509467b48Spatrick 
35609467b48Spatrick namespace llvm {
makeGCNILPScheduler(ArrayRef<const SUnit * > BotRoots,const ScheduleDAG & DAG)35709467b48Spatrick std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
35809467b48Spatrick                                               const ScheduleDAG &DAG) {
35909467b48Spatrick   GCNILPScheduler S;
36009467b48Spatrick   return S.schedule(BotRoots, DAG);
36109467b48Spatrick }
36209467b48Spatrick }
363