1 //===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the ResourcePriorityQueue class, which is a
10 // SchedulingPriorityQueue that prioritizes instructions using DFA state to
11 // reduce the length of the critical path through the basic block
12 // on VLIW platforms.
13 // The scheduler is basically a top-down adaptable list scheduler with DFA
14 // resource tracking added to the cost function.
15 // DFA is queried as a state machine to model "packets/bundles" during
16 // schedule. Currently packets/bundles are discarded at the end of
17 // scheduling, affecting only order of instructions.
18 //
19 //===----------------------------------------------------------------------===//
20 
21 #include "llvm/CodeGen/ResourcePriorityQueue.h"
22 #include "llvm/CodeGen/DFAPacketizer.h"
23 #include "llvm/CodeGen/SelectionDAGISel.h"
24 #include "llvm/CodeGen/SelectionDAGNodes.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Support/CommandLine.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "scheduler"
34 
35 static cl::opt<bool>
36     DisableDFASched("disable-dfa-sched", cl::Hidden,
37                     cl::desc("Disable use of DFA during scheduling"));
38 
39 static cl::opt<int> RegPressureThreshold(
40     "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::init(5),
41     cl::desc("Track reg pressure and switch priority to in-depth"));
42 
43 ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
44     : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) {
45   const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
46   TRI = STI.getRegisterInfo();
47   TLI = IS->TLI;
48   TII = STI.getInstrInfo();
49   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
50   // This hard requirement could be relaxed, but for now
51   // do not let it proceed.
52   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
53 
54   unsigned NumRC = TRI->getNumRegClasses();
55   RegLimit.resize(NumRC);
56   RegPressure.resize(NumRC);
57   std::fill(RegLimit.begin(), RegLimit.end(), 0);
58   std::fill(RegPressure.begin(), RegPressure.end(), 0);
59   for (const TargetRegisterClass *RC : TRI->regclasses())
60     RegLimit[RC->getID()] = TRI->getRegPressureLimit(RC, *IS->MF);
61 
62   ParallelLiveRanges = 0;
63   HorizontalVerticalBalance = 0;
64 }
65 
66 unsigned
67 ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
68   unsigned NumberDeps = 0;
69   for (SDep &Pred : SU->Preds) {
70     if (Pred.isCtrl())
71       continue;
72 
73     SUnit *PredSU = Pred.getSUnit();
74     const SDNode *ScegN = PredSU->getNode();
75 
76     if (!ScegN)
77       continue;
78 
79     // If value is passed to CopyToReg, it is probably
80     // live outside BB.
81     switch (ScegN->getOpcode()) {
82       default:  break;
83       case ISD::TokenFactor:    break;
84       case ISD::CopyFromReg:    NumberDeps++;  break;
85       case ISD::CopyToReg:      break;
86       case ISD::INLINEASM:      break;
87       case ISD::INLINEASM_BR:   break;
88     }
89     if (!ScegN->isMachineOpcode())
90       continue;
91 
92     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
93       MVT VT = ScegN->getSimpleValueType(i);
94       if (TLI->isTypeLegal(VT)
95           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
96         NumberDeps++;
97         break;
98       }
99     }
100   }
101   return NumberDeps;
102 }
103 
104 unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
105                                                     unsigned RCId) {
106   unsigned NumberDeps = 0;
107   for (const SDep &Succ : SU->Succs) {
108     if (Succ.isCtrl())
109       continue;
110 
111     SUnit *SuccSU = Succ.getSUnit();
112     const SDNode *ScegN = SuccSU->getNode();
113     if (!ScegN)
114       continue;
115 
116     // If value is passed to CopyToReg, it is probably
117     // live outside BB.
118     switch (ScegN->getOpcode()) {
119       default:  break;
120       case ISD::TokenFactor:    break;
121       case ISD::CopyFromReg:    break;
122       case ISD::CopyToReg:      NumberDeps++;  break;
123       case ISD::INLINEASM:      break;
124       case ISD::INLINEASM_BR:   break;
125     }
126     if (!ScegN->isMachineOpcode())
127       continue;
128 
129     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
130       const SDValue &Op = ScegN->getOperand(i);
131       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
132       if (TLI->isTypeLegal(VT)
133           && (TLI->getRegClassFor(VT)->getID() == RCId)) {
134         NumberDeps++;
135         break;
136       }
137     }
138   }
139   return NumberDeps;
140 }
141 
142 static unsigned numberCtrlDepsInSU(SUnit *SU) {
143   unsigned NumberDeps = 0;
144   for (const SDep &Succ : SU->Succs)
145     if (Succ.isCtrl())
146       NumberDeps++;
147 
148   return NumberDeps;
149 }
150 
151 static unsigned numberCtrlPredInSU(SUnit *SU) {
152   unsigned NumberDeps = 0;
153   for (SDep &Pred : SU->Preds)
154     if (Pred.isCtrl())
155       NumberDeps++;
156 
157   return NumberDeps;
158 }
159 
160 ///
161 /// Initialize nodes.
162 ///
163 void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
164   SUnits = &sunits;
165   NumNodesSolelyBlocking.resize(SUnits->size(), 0);
166 
167   for (SUnit &SU : *SUnits) {
168     initNumRegDefsLeft(&SU);
169     SU.NodeQueueId = 0;
170   }
171 }
172 
173 /// This heuristic is used if DFA scheduling is not desired
174 /// for some VLIW platform.
175 bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
176   // The isScheduleHigh flag allows nodes with wraparound dependencies that
177   // cannot easily be modeled as edges with latencies to be scheduled as
178   // soon as possible in a top-down schedule.
179   if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
180     return false;
181 
182   if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
183     return true;
184 
185   unsigned LHSNum = LHS->NodeNum;
186   unsigned RHSNum = RHS->NodeNum;
187 
188   // The most important heuristic is scheduling the critical path.
189   unsigned LHSLatency = PQ->getLatency(LHSNum);
190   unsigned RHSLatency = PQ->getLatency(RHSNum);
191   if (LHSLatency < RHSLatency) return true;
192   if (LHSLatency > RHSLatency) return false;
193 
194   // After that, if two nodes have identical latencies, look to see if one will
195   // unblock more other nodes than the other.
196   unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
197   unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
198   if (LHSBlocked < RHSBlocked) return true;
199   if (LHSBlocked > RHSBlocked) return false;
200 
201   // Finally, just to provide a stable ordering, use the node number as a
202   // deciding factor.
203   return LHSNum < RHSNum;
204 }
205 
206 
207 /// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
208 /// of SU, return it, otherwise return null.
209 SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
210   SUnit *OnlyAvailablePred = nullptr;
211   for (const SDep &Pred : SU->Preds) {
212     SUnit &PredSU = *Pred.getSUnit();
213     if (!PredSU.isScheduled) {
214       // We found an available, but not scheduled, predecessor.  If it's the
215       // only one we have found, keep track of it... otherwise give up.
216       if (OnlyAvailablePred && OnlyAvailablePred != &PredSU)
217         return nullptr;
218       OnlyAvailablePred = &PredSU;
219     }
220   }
221   return OnlyAvailablePred;
222 }
223 
224 void ResourcePriorityQueue::push(SUnit *SU) {
225   // Look at all of the successors of this node.  Count the number of nodes that
226   // this node is the sole unscheduled node for.
227   unsigned NumNodesBlocking = 0;
228   for (const SDep &Succ : SU->Succs)
229     if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
230       ++NumNodesBlocking;
231 
232   NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
233   Queue.push_back(SU);
234 }
235 
236 /// Check if scheduling of this SU is possible
237 /// in the current packet.
238 bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
239   if (!SU || !SU->getNode())
240     return false;
241 
242   // If this is a compound instruction,
243   // it is likely to be a call. Do not delay it.
244   if (SU->getNode()->getGluedNode())
245     return true;
246 
247   // First see if the pipeline could receive this instruction
248   // in the current cycle.
249   if (SU->getNode()->isMachineOpcode())
250     switch (SU->getNode()->getMachineOpcode()) {
251     default:
252       if (!ResourcesModel->canReserveResources(&TII->get(
253           SU->getNode()->getMachineOpcode())))
254            return false;
255       break;
256     case TargetOpcode::EXTRACT_SUBREG:
257     case TargetOpcode::INSERT_SUBREG:
258     case TargetOpcode::SUBREG_TO_REG:
259     case TargetOpcode::REG_SEQUENCE:
260     case TargetOpcode::IMPLICIT_DEF:
261         break;
262     }
263 
264   // Now see if there are no other dependencies
265   // to instructions already in the packet.
266   for (const SUnit *S : Packet)
267     for (const SDep &Succ : S->Succs) {
268       // Since we do not add pseudos to packets, might as well
269       // ignore order deps.
270       if (Succ.isCtrl())
271         continue;
272 
273       if (Succ.getSUnit() == SU)
274         return false;
275     }
276 
277   return true;
278 }
279 
280 /// Keep track of available resources.
281 void ResourcePriorityQueue::reserveResources(SUnit *SU) {
282   // If this SU does not fit in the packet
283   // start a new one.
284   if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
285     ResourcesModel->clearResources();
286     Packet.clear();
287   }
288 
289   if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
290     switch (SU->getNode()->getMachineOpcode()) {
291     default:
292       ResourcesModel->reserveResources(&TII->get(
293         SU->getNode()->getMachineOpcode()));
294       break;
295     case TargetOpcode::EXTRACT_SUBREG:
296     case TargetOpcode::INSERT_SUBREG:
297     case TargetOpcode::SUBREG_TO_REG:
298     case TargetOpcode::REG_SEQUENCE:
299     case TargetOpcode::IMPLICIT_DEF:
300       break;
301     }
302     Packet.push_back(SU);
303   }
304   // Forcefully end packet for PseudoOps.
305   else {
306     ResourcesModel->clearResources();
307     Packet.clear();
308   }
309 
310   // If packet is now full, reset the state so in the next cycle
311   // we start fresh.
312   if (Packet.size() >= InstrItins->SchedModel.IssueWidth) {
313     ResourcesModel->clearResources();
314     Packet.clear();
315   }
316 }
317 
318 int ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
319   int RegBalance = 0;
320 
321   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
322     return RegBalance;
323 
324   // Gen estimate.
325   for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
326       MVT VT = SU->getNode()->getSimpleValueType(i);
327       if (TLI->isTypeLegal(VT)
328           && TLI->getRegClassFor(VT)
329           && TLI->getRegClassFor(VT)->getID() == RCId)
330         RegBalance += numberRCValSuccInSU(SU, RCId);
331   }
332   // Kill estimate.
333   for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
334       const SDValue &Op = SU->getNode()->getOperand(i);
335       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
336       if (isa<ConstantSDNode>(Op.getNode()))
337         continue;
338 
339       if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
340           && TLI->getRegClassFor(VT)->getID() == RCId)
341         RegBalance -= numberRCValPredInSU(SU, RCId);
342   }
343   return RegBalance;
344 }
345 
346 /// Estimates change in reg pressure from this SU.
347 /// It is achieved by trivial tracking of defined
348 /// and used vregs in dependent instructions.
349 /// The RawPressure flag makes this function to ignore
350 /// existing reg file sizes, and report raw def/use
351 /// balance.
352 int ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
353   int RegBalance = 0;
354 
355   if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
356     return RegBalance;
357 
358   if (RawPressure) {
359     for (const TargetRegisterClass *RC : TRI->regclasses())
360       RegBalance += rawRegPressureDelta(SU, RC->getID());
361   }
362   else {
363     for (const TargetRegisterClass *RC : TRI->regclasses()) {
364       if ((RegPressure[RC->getID()] +
365            rawRegPressureDelta(SU, RC->getID()) > 0) &&
366           (RegPressure[RC->getID()] +
367            rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
368         RegBalance += rawRegPressureDelta(SU, RC->getID());
369     }
370   }
371 
372   return RegBalance;
373 }
374 
375 // Constants used to denote relative importance of
376 // heuristic components for cost computation.
377 static const unsigned PriorityOne = 200;
378 static const unsigned PriorityTwo = 50;
379 static const unsigned PriorityThree = 15;
380 static const unsigned PriorityFour = 5;
381 static const unsigned ScaleOne = 20;
382 static const unsigned ScaleTwo = 10;
383 static const unsigned ScaleThree = 5;
384 static const unsigned FactorOne = 2;
385 
386 /// Returns single number reflecting benefit of scheduling SU
387 /// in the current cycle.
388 int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
389   // Initial trivial priority.
390   int ResCount = 1;
391 
392   // Do not waste time on a node that is already scheduled.
393   if (SU->isScheduled)
394     return ResCount;
395 
396   // Forced priority is high.
397   if (SU->isScheduleHigh)
398     ResCount += PriorityOne;
399 
400   // Adaptable scheduling
401   // A small, but very parallel
402   // region, where reg pressure is an issue.
403   if (HorizontalVerticalBalance > RegPressureThreshold) {
404     // Critical path first
405     ResCount += (SU->getHeight() * ScaleTwo);
406     // If resources are available for it, multiply the
407     // chance of scheduling.
408     if (isResourceAvailable(SU))
409       ResCount <<= FactorOne;
410 
411     // Consider change to reg pressure from scheduling
412     // this SU.
413     ResCount -= (regPressureDelta(SU,true) * ScaleOne);
414   }
415   // Default heuristic, greeady and
416   // critical path driven.
417   else {
418     // Critical path first.
419     ResCount += (SU->getHeight() * ScaleTwo);
420     // Now see how many instructions is blocked by this SU.
421     ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
422     // If resources are available for it, multiply the
423     // chance of scheduling.
424     if (isResourceAvailable(SU))
425       ResCount <<= FactorOne;
426 
427     ResCount -= (regPressureDelta(SU) * ScaleTwo);
428   }
429 
430   // These are platform-specific things.
431   // Will need to go into the back end
432   // and accessed from here via a hook.
433   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
434     if (N->isMachineOpcode()) {
435       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
436       if (TID.isCall())
437         ResCount += (PriorityTwo + (ScaleThree*N->getNumValues()));
438     }
439     else
440       switch (N->getOpcode()) {
441       default:  break;
442       case ISD::TokenFactor:
443       case ISD::CopyFromReg:
444       case ISD::CopyToReg:
445         ResCount += PriorityFour;
446         break;
447 
448       case ISD::INLINEASM:
449       case ISD::INLINEASM_BR:
450         ResCount += PriorityThree;
451         break;
452       }
453   }
454   return ResCount;
455 }
456 
457 
458 /// Main resource tracking point.
459 void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
460   // Use NULL entry as an event marker to reset
461   // the DFA state.
462   if (!SU) {
463     ResourcesModel->clearResources();
464     Packet.clear();
465     return;
466   }
467 
468   const SDNode *ScegN = SU->getNode();
469   // Update reg pressure tracking.
470   // First update current node.
471   if (ScegN->isMachineOpcode()) {
472     // Estimate generated regs.
473     for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
474       MVT VT = ScegN->getSimpleValueType(i);
475 
476       if (TLI->isTypeLegal(VT)) {
477         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
478         if (RC)
479           RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
480       }
481     }
482     // Estimate killed regs.
483     for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
484       const SDValue &Op = ScegN->getOperand(i);
485       MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
486 
487       if (TLI->isTypeLegal(VT)) {
488         const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
489         if (RC) {
490           if (RegPressure[RC->getID()] >
491             (numberRCValPredInSU(SU, RC->getID())))
492             RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
493           else RegPressure[RC->getID()] = 0;
494         }
495       }
496     }
497     for (SDep &Pred : SU->Preds) {
498       if (Pred.isCtrl() || (Pred.getSUnit()->NumRegDefsLeft == 0))
499         continue;
500       --Pred.getSUnit()->NumRegDefsLeft;
501     }
502   }
503 
504   // Reserve resources for this SU.
505   reserveResources(SU);
506 
507   // Adjust number of parallel live ranges.
508   // Heuristic is simple - node with no data successors reduces
509   // number of live ranges. All others, increase it.
510   unsigned NumberNonControlDeps = 0;
511 
512   for (const SDep &Succ : SU->Succs) {
513     adjustPriorityOfUnscheduledPreds(Succ.getSUnit());
514     if (!Succ.isCtrl())
515       NumberNonControlDeps++;
516   }
517 
518   if (!NumberNonControlDeps) {
519     if (ParallelLiveRanges >= SU->NumPreds)
520       ParallelLiveRanges -= SU->NumPreds;
521     else
522       ParallelLiveRanges = 0;
523 
524   }
525   else
526     ParallelLiveRanges += SU->NumRegDefsLeft;
527 
528   // Track parallel live chains.
529   HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
530   HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
531 }
532 
533 void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
534   unsigned  NodeNumDefs = 0;
535   for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
536     if (N->isMachineOpcode()) {
537       const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
538       // No register need be allocated for this.
539       if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
540         NodeNumDefs = 0;
541         break;
542       }
543       NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
544     }
545     else
546       switch(N->getOpcode()) {
547         default:     break;
548         case ISD::CopyFromReg:
549           NodeNumDefs++;
550           break;
551         case ISD::INLINEASM:
552         case ISD::INLINEASM_BR:
553           NodeNumDefs++;
554           break;
555       }
556 
557   SU->NumRegDefsLeft = NodeNumDefs;
558 }
559 
560 /// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
561 /// scheduled.  If SU is not itself available, then there is at least one
562 /// predecessor node that has not been scheduled yet.  If SU has exactly ONE
563 /// unscheduled predecessor, we want to increase its priority: it getting
564 /// scheduled will make this node available, so it is better than some other
565 /// node of the same priority that will not make a node available.
566 void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
567   if (SU->isAvailable) return;  // All preds scheduled.
568 
569   SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
570   if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
571     return;
572 
573   // Okay, we found a single predecessor that is available, but not scheduled.
574   // Since it is available, it must be in the priority queue.  First remove it.
575   remove(OnlyAvailablePred);
576 
577   // Reinsert the node into the priority queue, which recomputes its
578   // NumNodesSolelyBlocking value.
579   push(OnlyAvailablePred);
580 }
581 
582 
583 /// Main access point - returns next instructions
584 /// to be placed in scheduling sequence.
585 SUnit *ResourcePriorityQueue::pop() {
586   if (empty())
587     return nullptr;
588 
589   std::vector<SUnit *>::iterator Best = Queue.begin();
590   if (!DisableDFASched) {
591     int BestCost = SUSchedulingCost(*Best);
592     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I) {
593 
594       if (SUSchedulingCost(*I) > BestCost) {
595         BestCost = SUSchedulingCost(*I);
596         Best = I;
597       }
598     }
599   }
600   // Use default TD scheduling mechanism.
601   else {
602     for (auto I = std::next(Queue.begin()), E = Queue.end(); I != E; ++I)
603       if (Picker(*Best, *I))
604         Best = I;
605   }
606 
607   SUnit *V = *Best;
608   if (Best != std::prev(Queue.end()))
609     std::swap(*Best, Queue.back());
610 
611   Queue.pop_back();
612 
613   return V;
614 }
615 
616 
617 void ResourcePriorityQueue::remove(SUnit *SU) {
618   assert(!Queue.empty() && "Queue is empty!");
619   std::vector<SUnit *>::iterator I = find(Queue, SU);
620   if (I != std::prev(Queue.end()))
621     std::swap(*I, Queue.back());
622 
623   Queue.pop_back();
624 }
625