109467b48Spatrick //===---------------------------- GCNILPSched.cpp - -----------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick /// \file
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick
1309467b48Spatrick #include "llvm/CodeGen/ScheduleDAG.h"
1409467b48Spatrick
1509467b48Spatrick using namespace llvm;
1609467b48Spatrick
1709467b48Spatrick #define DEBUG_TYPE "machine-scheduler"
1809467b48Spatrick
1909467b48Spatrick namespace {
2009467b48Spatrick
2109467b48Spatrick class GCNILPScheduler {
2209467b48Spatrick struct Candidate : ilist_node<Candidate> {
2309467b48Spatrick SUnit *SU;
2409467b48Spatrick
Candidate__anonfecd50e40111::GCNILPScheduler::Candidate2509467b48Spatrick Candidate(SUnit *SU_)
2609467b48Spatrick : SU(SU_) {}
2709467b48Spatrick };
2809467b48Spatrick
2909467b48Spatrick SpecificBumpPtrAllocator<Candidate> Alloc;
3009467b48Spatrick typedef simple_ilist<Candidate> Queue;
3109467b48Spatrick Queue PendingQueue;
3209467b48Spatrick Queue AvailQueue;
3309467b48Spatrick unsigned CurQueueId = 0;
3409467b48Spatrick
3509467b48Spatrick std::vector<unsigned> SUNumbers;
3609467b48Spatrick
3709467b48Spatrick /// CurCycle - The current scheduler state corresponds to this cycle.
3809467b48Spatrick unsigned CurCycle = 0;
3909467b48Spatrick
4009467b48Spatrick unsigned getNodePriority(const SUnit *SU) const;
4109467b48Spatrick
4209467b48Spatrick const SUnit *pickBest(const SUnit *left, const SUnit *right);
4309467b48Spatrick Candidate* pickCandidate();
4409467b48Spatrick
4509467b48Spatrick void releasePending();
4609467b48Spatrick void advanceToCycle(unsigned NextCycle);
4709467b48Spatrick void releasePredecessors(const SUnit* SU);
4809467b48Spatrick
4909467b48Spatrick public:
5009467b48Spatrick std::vector<const SUnit*> schedule(ArrayRef<const SUnit*> TopRoots,
5109467b48Spatrick const ScheduleDAG &DAG);
5209467b48Spatrick };
5309467b48Spatrick } // namespace
5409467b48Spatrick
5509467b48Spatrick /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
5609467b48Spatrick /// Smaller number is the higher priority.
5709467b48Spatrick static unsigned
CalcNodeSethiUllmanNumber(const SUnit * SU,std::vector<unsigned> & SUNumbers)5809467b48Spatrick CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
5909467b48Spatrick unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
6009467b48Spatrick if (SethiUllmanNumber != 0)
6109467b48Spatrick return SethiUllmanNumber;
6209467b48Spatrick
6309467b48Spatrick unsigned Extra = 0;
6409467b48Spatrick for (const SDep &Pred : SU->Preds) {
6509467b48Spatrick if (Pred.isCtrl()) continue; // ignore chain preds
6609467b48Spatrick SUnit *PredSU = Pred.getSUnit();
6709467b48Spatrick unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
6809467b48Spatrick if (PredSethiUllman > SethiUllmanNumber) {
6909467b48Spatrick SethiUllmanNumber = PredSethiUllman;
7009467b48Spatrick Extra = 0;
7109467b48Spatrick }
7209467b48Spatrick else if (PredSethiUllman == SethiUllmanNumber)
7309467b48Spatrick ++Extra;
7409467b48Spatrick }
7509467b48Spatrick
7609467b48Spatrick SethiUllmanNumber += Extra;
7709467b48Spatrick
7809467b48Spatrick if (SethiUllmanNumber == 0)
7909467b48Spatrick SethiUllmanNumber = 1;
8009467b48Spatrick
8109467b48Spatrick return SethiUllmanNumber;
8209467b48Spatrick }
8309467b48Spatrick
8409467b48Spatrick // Lower priority means schedule further down. For bottom-up scheduling, lower
8509467b48Spatrick // priority SUs are scheduled before higher priority SUs.
getNodePriority(const SUnit * SU) const8609467b48Spatrick unsigned GCNILPScheduler::getNodePriority(const SUnit *SU) const {
8709467b48Spatrick assert(SU->NodeNum < SUNumbers.size());
8809467b48Spatrick if (SU->NumSuccs == 0 && SU->NumPreds != 0)
8909467b48Spatrick // If SU does not have a register use, i.e. it doesn't produce a value
9009467b48Spatrick // that would be consumed (e.g. store), then it terminates a chain of
9109467b48Spatrick // computation. Give it a large SethiUllman number so it will be
9209467b48Spatrick // scheduled right before its predecessors that it doesn't lengthen
9309467b48Spatrick // their live ranges.
9409467b48Spatrick return 0xffff;
9509467b48Spatrick
9609467b48Spatrick if (SU->NumPreds == 0 && SU->NumSuccs != 0)
9709467b48Spatrick // If SU does not have a register def, schedule it close to its uses
9809467b48Spatrick // because it does not lengthen any live ranges.
9909467b48Spatrick return 0;
10009467b48Spatrick
10109467b48Spatrick return SUNumbers[SU->NodeNum];
10209467b48Spatrick }
10309467b48Spatrick
10409467b48Spatrick /// closestSucc - Returns the scheduled cycle of the successor which is
10509467b48Spatrick /// closest to the current cycle.
closestSucc(const SUnit * SU)10609467b48Spatrick static unsigned closestSucc(const SUnit *SU) {
10709467b48Spatrick unsigned MaxHeight = 0;
10809467b48Spatrick for (const SDep &Succ : SU->Succs) {
10909467b48Spatrick if (Succ.isCtrl()) continue; // ignore chain succs
11009467b48Spatrick unsigned Height = Succ.getSUnit()->getHeight();
11109467b48Spatrick // If there are bunch of CopyToRegs stacked up, they should be considered
11209467b48Spatrick // to be at the same position.
11309467b48Spatrick if (Height > MaxHeight)
11409467b48Spatrick MaxHeight = Height;
11509467b48Spatrick }
11609467b48Spatrick return MaxHeight;
11709467b48Spatrick }
11809467b48Spatrick
11909467b48Spatrick /// calcMaxScratches - Returns an cost estimate of the worse case requirement
12009467b48Spatrick /// for scratch registers, i.e. number of data dependencies.
calcMaxScratches(const SUnit * SU)12109467b48Spatrick static unsigned calcMaxScratches(const SUnit *SU) {
12209467b48Spatrick unsigned Scratches = 0;
12309467b48Spatrick for (const SDep &Pred : SU->Preds) {
12409467b48Spatrick if (Pred.isCtrl()) continue; // ignore chain preds
12509467b48Spatrick Scratches++;
12609467b48Spatrick }
12709467b48Spatrick return Scratches;
12809467b48Spatrick }
12909467b48Spatrick
13009467b48Spatrick // Return -1 if left has higher priority, 1 if right has higher priority.
13109467b48Spatrick // Return 0 if latency-based priority is equivalent.
BUCompareLatency(const SUnit * left,const SUnit * right)13209467b48Spatrick static int BUCompareLatency(const SUnit *left, const SUnit *right) {
13309467b48Spatrick // Scheduling an instruction that uses a VReg whose postincrement has not yet
13409467b48Spatrick // been scheduled will induce a copy. Model this as an extra cycle of latency.
13509467b48Spatrick int LHeight = (int)left->getHeight();
13609467b48Spatrick int RHeight = (int)right->getHeight();
13709467b48Spatrick
13809467b48Spatrick // If either node is scheduling for latency, sort them by height/depth
13909467b48Spatrick // and latency.
14009467b48Spatrick
14109467b48Spatrick // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
14209467b48Spatrick // is enabled, grouping instructions by cycle, then its height is already
14309467b48Spatrick // covered so only its depth matters. We also reach this point if both stall
14409467b48Spatrick // but have the same height.
14509467b48Spatrick if (LHeight != RHeight)
14609467b48Spatrick return LHeight > RHeight ? 1 : -1;
14709467b48Spatrick
14809467b48Spatrick int LDepth = left->getDepth();
14909467b48Spatrick int RDepth = right->getDepth();
15009467b48Spatrick if (LDepth != RDepth) {
15109467b48Spatrick LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
15209467b48Spatrick << ") depth " << LDepth << " vs SU (" << right->NodeNum
15309467b48Spatrick << ") depth " << RDepth << "\n");
15409467b48Spatrick return LDepth < RDepth ? 1 : -1;
15509467b48Spatrick }
15609467b48Spatrick if (left->Latency != right->Latency)
15709467b48Spatrick return left->Latency > right->Latency ? 1 : -1;
15809467b48Spatrick
15909467b48Spatrick return 0;
16009467b48Spatrick }
16109467b48Spatrick
pickBest(const SUnit * left,const SUnit * right)16209467b48Spatrick const SUnit *GCNILPScheduler::pickBest(const SUnit *left, const SUnit *right)
16309467b48Spatrick {
16409467b48Spatrick // TODO: add register pressure lowering checks
16509467b48Spatrick
16609467b48Spatrick bool const DisableSchedCriticalPath = false;
16709467b48Spatrick int MaxReorderWindow = 6;
16809467b48Spatrick if (!DisableSchedCriticalPath) {
16909467b48Spatrick int spread = (int)left->getDepth() - (int)right->getDepth();
17009467b48Spatrick if (std::abs(spread) > MaxReorderWindow) {
17109467b48Spatrick LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
17209467b48Spatrick << left->getDepth() << " != SU(" << right->NodeNum
17309467b48Spatrick << "): " << right->getDepth() << "\n");
17409467b48Spatrick return left->getDepth() < right->getDepth() ? right : left;
17509467b48Spatrick }
17609467b48Spatrick }
17709467b48Spatrick
17809467b48Spatrick bool const DisableSchedHeight = false;
17909467b48Spatrick if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
18009467b48Spatrick int spread = (int)left->getHeight() - (int)right->getHeight();
18109467b48Spatrick if (std::abs(spread) > MaxReorderWindow)
18209467b48Spatrick return left->getHeight() > right->getHeight() ? right : left;
18309467b48Spatrick }
18409467b48Spatrick
18509467b48Spatrick // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
18609467b48Spatrick unsigned LPriority = getNodePriority(left);
18709467b48Spatrick unsigned RPriority = getNodePriority(right);
18809467b48Spatrick
18909467b48Spatrick if (LPriority != RPriority)
19009467b48Spatrick return LPriority > RPriority ? right : left;
19109467b48Spatrick
19209467b48Spatrick // Try schedule def + use closer when Sethi-Ullman numbers are the same.
19309467b48Spatrick // e.g.
19409467b48Spatrick // t1 = op t2, c1
19509467b48Spatrick // t3 = op t4, c2
19609467b48Spatrick //
19709467b48Spatrick // and the following instructions are both ready.
19809467b48Spatrick // t2 = op c3
19909467b48Spatrick // t4 = op c4
20009467b48Spatrick //
20109467b48Spatrick // Then schedule t2 = op first.
20209467b48Spatrick // i.e.
20309467b48Spatrick // t4 = op c4
20409467b48Spatrick // t2 = op c3
20509467b48Spatrick // t1 = op t2, c1
20609467b48Spatrick // t3 = op t4, c2
20709467b48Spatrick //
20809467b48Spatrick // This creates more short live intervals.
20909467b48Spatrick unsigned LDist = closestSucc(left);
21009467b48Spatrick unsigned RDist = closestSucc(right);
21109467b48Spatrick if (LDist != RDist)
21209467b48Spatrick return LDist < RDist ? right : left;
21309467b48Spatrick
21409467b48Spatrick // How many registers becomes live when the node is scheduled.
21509467b48Spatrick unsigned LScratch = calcMaxScratches(left);
21609467b48Spatrick unsigned RScratch = calcMaxScratches(right);
21709467b48Spatrick if (LScratch != RScratch)
21809467b48Spatrick return LScratch > RScratch ? right : left;
21909467b48Spatrick
22009467b48Spatrick bool const DisableSchedCycles = false;
22109467b48Spatrick if (!DisableSchedCycles) {
22209467b48Spatrick int result = BUCompareLatency(left, right);
22309467b48Spatrick if (result != 0)
22409467b48Spatrick return result > 0 ? right : left;
22509467b48Spatrick return left;
22609467b48Spatrick }
22709467b48Spatrick else {
22809467b48Spatrick if (left->getHeight() != right->getHeight())
22909467b48Spatrick return (left->getHeight() > right->getHeight()) ? right : left;
23009467b48Spatrick
23109467b48Spatrick if (left->getDepth() != right->getDepth())
23209467b48Spatrick return (left->getDepth() < right->getDepth()) ? right : left;
23309467b48Spatrick }
23409467b48Spatrick
23509467b48Spatrick assert(left->NodeQueueId && right->NodeQueueId &&
23609467b48Spatrick "NodeQueueId cannot be zero");
23709467b48Spatrick return (left->NodeQueueId > right->NodeQueueId) ? right : left;
23809467b48Spatrick }
23909467b48Spatrick
pickCandidate()24009467b48Spatrick GCNILPScheduler::Candidate* GCNILPScheduler::pickCandidate() {
24109467b48Spatrick if (AvailQueue.empty())
24209467b48Spatrick return nullptr;
24309467b48Spatrick auto Best = AvailQueue.begin();
24409467b48Spatrick for (auto I = std::next(AvailQueue.begin()), E = AvailQueue.end(); I != E; ++I) {
24509467b48Spatrick auto NewBestSU = pickBest(Best->SU, I->SU);
24609467b48Spatrick if (NewBestSU != Best->SU) {
24709467b48Spatrick assert(NewBestSU == I->SU);
24809467b48Spatrick Best = I;
24909467b48Spatrick }
25009467b48Spatrick }
25109467b48Spatrick return &*Best;
25209467b48Spatrick }
25309467b48Spatrick
releasePending()25409467b48Spatrick void GCNILPScheduler::releasePending() {
25509467b48Spatrick // Check to see if any of the pending instructions are ready to issue. If
25609467b48Spatrick // so, add them to the available queue.
25709467b48Spatrick for(auto I = PendingQueue.begin(), E = PendingQueue.end(); I != E;) {
25809467b48Spatrick auto &C = *I++;
25909467b48Spatrick if (C.SU->getHeight() <= CurCycle) {
26009467b48Spatrick PendingQueue.remove(C);
26109467b48Spatrick AvailQueue.push_back(C);
26209467b48Spatrick C.SU->NodeQueueId = CurQueueId++;
26309467b48Spatrick }
26409467b48Spatrick }
26509467b48Spatrick }
26609467b48Spatrick
26709467b48Spatrick /// Move the scheduler state forward by the specified number of Cycles.
advanceToCycle(unsigned NextCycle)26809467b48Spatrick void GCNILPScheduler::advanceToCycle(unsigned NextCycle) {
26909467b48Spatrick if (NextCycle <= CurCycle)
27009467b48Spatrick return;
27109467b48Spatrick CurCycle = NextCycle;
27209467b48Spatrick releasePending();
27309467b48Spatrick }
27409467b48Spatrick
releasePredecessors(const SUnit * SU)27509467b48Spatrick void GCNILPScheduler::releasePredecessors(const SUnit* SU) {
27609467b48Spatrick for (const auto &PredEdge : SU->Preds) {
27709467b48Spatrick auto PredSU = PredEdge.getSUnit();
27809467b48Spatrick if (PredEdge.isWeak())
27909467b48Spatrick continue;
28009467b48Spatrick assert(PredSU->isBoundaryNode() || PredSU->NumSuccsLeft > 0);
28109467b48Spatrick
28209467b48Spatrick PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge.getLatency());
28309467b48Spatrick
28409467b48Spatrick if (!PredSU->isBoundaryNode() && --PredSU->NumSuccsLeft == 0)
28509467b48Spatrick PendingQueue.push_front(*new (Alloc.Allocate()) Candidate(PredSU));
28609467b48Spatrick }
28709467b48Spatrick }
28809467b48Spatrick
28909467b48Spatrick std::vector<const SUnit*>
schedule(ArrayRef<const SUnit * > BotRoots,const ScheduleDAG & DAG)29009467b48Spatrick GCNILPScheduler::schedule(ArrayRef<const SUnit*> BotRoots,
29109467b48Spatrick const ScheduleDAG &DAG) {
29209467b48Spatrick auto &SUnits = const_cast<ScheduleDAG&>(DAG).SUnits;
29309467b48Spatrick
29409467b48Spatrick std::vector<SUnit> SUSavedCopy;
29509467b48Spatrick SUSavedCopy.resize(SUnits.size());
29609467b48Spatrick
29709467b48Spatrick // we cannot save only those fields we touch: some of them are private
29809467b48Spatrick // so save units verbatim: this assumes SUnit should have value semantics
29909467b48Spatrick for (const SUnit &SU : SUnits)
30009467b48Spatrick SUSavedCopy[SU.NodeNum] = SU;
30109467b48Spatrick
30209467b48Spatrick SUNumbers.assign(SUnits.size(), 0);
30309467b48Spatrick for (const SUnit &SU : SUnits)
30409467b48Spatrick CalcNodeSethiUllmanNumber(&SU, SUNumbers);
30509467b48Spatrick
306*d415bd75Srobert for (const auto *SU : BotRoots) {
30709467b48Spatrick AvailQueue.push_back(
30809467b48Spatrick *new (Alloc.Allocate()) Candidate(const_cast<SUnit*>(SU)));
30909467b48Spatrick }
31009467b48Spatrick releasePredecessors(&DAG.ExitSU);
31109467b48Spatrick
31209467b48Spatrick std::vector<const SUnit*> Schedule;
31309467b48Spatrick Schedule.reserve(SUnits.size());
31409467b48Spatrick while (true) {
31509467b48Spatrick if (AvailQueue.empty() && !PendingQueue.empty()) {
31609467b48Spatrick auto EarliestSU = std::min_element(
31709467b48Spatrick PendingQueue.begin(), PendingQueue.end(),
31809467b48Spatrick [=](const Candidate& C1, const Candidate& C2) {
31909467b48Spatrick return C1.SU->getHeight() < C2.SU->getHeight();
32009467b48Spatrick })->SU;
32109467b48Spatrick advanceToCycle(std::max(CurCycle + 1, EarliestSU->getHeight()));
32209467b48Spatrick }
32309467b48Spatrick if (AvailQueue.empty())
32409467b48Spatrick break;
32509467b48Spatrick
32609467b48Spatrick LLVM_DEBUG(dbgs() << "\n=== Picking candidate\n"
32709467b48Spatrick "Ready queue:";
32809467b48Spatrick for (auto &C
32909467b48Spatrick : AvailQueue) dbgs()
33009467b48Spatrick << ' ' << C.SU->NodeNum;
33109467b48Spatrick dbgs() << '\n';);
33209467b48Spatrick
33309467b48Spatrick auto C = pickCandidate();
33409467b48Spatrick assert(C);
33509467b48Spatrick AvailQueue.remove(*C);
33609467b48Spatrick auto SU = C->SU;
33709467b48Spatrick LLVM_DEBUG(dbgs() << "Selected "; DAG.dumpNode(*SU));
33809467b48Spatrick
33909467b48Spatrick advanceToCycle(SU->getHeight());
34009467b48Spatrick
34109467b48Spatrick releasePredecessors(SU);
34209467b48Spatrick Schedule.push_back(SU);
34309467b48Spatrick SU->isScheduled = true;
34409467b48Spatrick }
34509467b48Spatrick assert(SUnits.size() == Schedule.size());
34609467b48Spatrick
34709467b48Spatrick std::reverse(Schedule.begin(), Schedule.end());
34809467b48Spatrick
34909467b48Spatrick // restore units
35009467b48Spatrick for (auto &SU : SUnits)
35109467b48Spatrick SU = SUSavedCopy[SU.NodeNum];
35209467b48Spatrick
35309467b48Spatrick return Schedule;
35409467b48Spatrick }
35509467b48Spatrick
35609467b48Spatrick namespace llvm {
makeGCNILPScheduler(ArrayRef<const SUnit * > BotRoots,const ScheduleDAG & DAG)35709467b48Spatrick std::vector<const SUnit*> makeGCNILPScheduler(ArrayRef<const SUnit*> BotRoots,
35809467b48Spatrick const ScheduleDAG &DAG) {
35909467b48Spatrick GCNILPScheduler S;
36009467b48Spatrick return S.schedule(BotRoots, DAG);
36109467b48Spatrick }
36209467b48Spatrick }
363