1 //===- AMDGPUSetWavePriority.cpp - Set wave priority ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to temporarily raise the wave priority beginning the start of
11 /// the shader function until its last VMEM instructions to allow younger
12 /// waves to issue their VMEM instructions as well.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIInstrInfo.h"
20 #include "llvm/ADT/PostOrderIterator.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/InitializePasses.h"
23 #include "llvm/Support/Allocator.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "amdgpu-set-wave-priority"
28 
29 static cl::opt<unsigned> DefaultVALUInstsThreshold(
30     "amdgpu-set-wave-priority-valu-insts-threshold",
31     cl::desc("VALU instruction count threshold for adjusting wave priority"),
32     cl::init(100), cl::Hidden);
33 
34 namespace {
35 
36 struct MBBInfo {
37   MBBInfo() = default;
38   unsigned NumVALUInstsAtStart = 0;
39   bool MayReachVMEMLoad = false;
40   MachineInstr *LastVMEMLoad = nullptr;
41 };
42 
43 using MBBInfoSet = DenseMap<const MachineBasicBlock *, MBBInfo>;
44 
45 class AMDGPUSetWavePriority : public MachineFunctionPass {
46 public:
47   static char ID;
48 
AMDGPUSetWavePriority()49   AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
50 
getPassName() const51   StringRef getPassName() const override { return "Set wave priority"; }
52 
53   bool runOnMachineFunction(MachineFunction &MF) override;
54 
55 private:
56   MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
57                                MachineBasicBlock::iterator I,
58                                unsigned priority) const;
59 
60   const SIInstrInfo *TII;
61 };
62 
63 } // End anonymous namespace.
64 
65 INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
66                 false)
67 
68 char AMDGPUSetWavePriority::ID = 0;
69 
createAMDGPUSetWavePriorityPass()70 FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
71   return new AMDGPUSetWavePriority();
72 }
73 
74 MachineInstr *
BuildSetprioMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,unsigned priority) const75 AMDGPUSetWavePriority::BuildSetprioMI(MachineBasicBlock &MBB,
76                                       MachineBasicBlock::iterator I,
77                                       unsigned priority) const {
78   return BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SETPRIO))
79       .addImm(priority);
80 }
81 
82 // Checks that for every predecessor Pred that can reach a VMEM load,
83 // none of Pred's successors can reach a VMEM load.
CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock & MBB,MBBInfoSet & MBBInfos)84 static bool CanLowerPriorityDirectlyInPredecessors(const MachineBasicBlock &MBB,
85                                                    MBBInfoSet &MBBInfos) {
86   for (const MachineBasicBlock *Pred : MBB.predecessors()) {
87     if (!MBBInfos[Pred].MayReachVMEMLoad)
88       continue;
89     for (const MachineBasicBlock *Succ : Pred->successors()) {
90       if (MBBInfos[Succ].MayReachVMEMLoad)
91         return false;
92     }
93   }
94   return true;
95 }
96 
isVMEMLoad(const MachineInstr & MI)97 static bool isVMEMLoad(const MachineInstr &MI) {
98   return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
99 }
100 
runOnMachineFunction(MachineFunction & MF)101 bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
102   const unsigned HighPriority = 3;
103   const unsigned LowPriority = 0;
104 
105   Function &F = MF.getFunction();
106   if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
107     return false;
108 
109   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
110   TII = ST.getInstrInfo();
111 
112   unsigned VALUInstsThreshold = DefaultVALUInstsThreshold;
113   Attribute A = F.getFnAttribute("amdgpu-wave-priority-threshold");
114   if (A.isValid())
115     A.getValueAsString().getAsInteger(0, VALUInstsThreshold);
116 
117   // Find VMEM loads that may be executed before long-enough sequences of
118   // VALU instructions. We currently assume that backedges/loops, branch
119   // probabilities and other details can be ignored, so we essentially
120   // determine the largest number of VALU instructions along every
121   // possible path from the start of the function that may potentially be
122   // executed provided no backedge is ever taken.
123   MBBInfoSet MBBInfos;
124   for (MachineBasicBlock *MBB : post_order(&MF)) {
125     bool AtStart = true;
126     unsigned MaxNumVALUInstsInMiddle = 0;
127     unsigned NumVALUInstsAtEnd = 0;
128     for (MachineInstr &MI : *MBB) {
129       if (isVMEMLoad(MI)) {
130         AtStart = false;
131         MBBInfo &Info = MBBInfos[MBB];
132         Info.NumVALUInstsAtStart = 0;
133         MaxNumVALUInstsInMiddle = 0;
134         NumVALUInstsAtEnd = 0;
135         Info.LastVMEMLoad = &MI;
136       } else if (SIInstrInfo::isDS(MI)) {
137         AtStart = false;
138         MaxNumVALUInstsInMiddle =
139             std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
140         NumVALUInstsAtEnd = 0;
141       } else if (SIInstrInfo::isVALU(MI)) {
142         if (AtStart)
143           ++MBBInfos[MBB].NumVALUInstsAtStart;
144         ++NumVALUInstsAtEnd;
145       }
146     }
147 
148     bool SuccsMayReachVMEMLoad = false;
149     unsigned NumFollowingVALUInsts = 0;
150     for (const MachineBasicBlock *Succ : MBB->successors()) {
151       SuccsMayReachVMEMLoad |= MBBInfos[Succ].MayReachVMEMLoad;
152       NumFollowingVALUInsts =
153           std::max(NumFollowingVALUInsts, MBBInfos[Succ].NumVALUInstsAtStart);
154     }
155     MBBInfo &Info = MBBInfos[MBB];
156     if (AtStart)
157       Info.NumVALUInstsAtStart += NumFollowingVALUInsts;
158     NumVALUInstsAtEnd += NumFollowingVALUInsts;
159 
160     unsigned MaxNumVALUInsts =
161         std::max(MaxNumVALUInstsInMiddle, NumVALUInstsAtEnd);
162     Info.MayReachVMEMLoad =
163         SuccsMayReachVMEMLoad ||
164         (Info.LastVMEMLoad && MaxNumVALUInsts >= VALUInstsThreshold);
165   }
166 
167   MachineBasicBlock &Entry = MF.front();
168   if (!MBBInfos[&Entry].MayReachVMEMLoad)
169     return false;
170 
171   // Raise the priority at the beginning of the shader.
172   MachineBasicBlock::iterator I = Entry.begin(), E = Entry.end();
173   while (I != E && !SIInstrInfo::isVALU(*I) && !I->isTerminator())
174     ++I;
175   BuildSetprioMI(Entry, I, HighPriority);
176 
177   // Lower the priority on edges where control leaves blocks from which
178   // the VMEM loads are reachable.
179   SmallSet<MachineBasicBlock *, 16> PriorityLoweringBlocks;
180   for (MachineBasicBlock &MBB : MF) {
181     if (MBBInfos[&MBB].MayReachVMEMLoad) {
182       if (MBB.succ_empty())
183         PriorityLoweringBlocks.insert(&MBB);
184       continue;
185     }
186 
187     if (CanLowerPriorityDirectlyInPredecessors(MBB, MBBInfos)) {
188       for (MachineBasicBlock *Pred : MBB.predecessors()) {
189         if (MBBInfos[Pred].MayReachVMEMLoad)
190           PriorityLoweringBlocks.insert(Pred);
191       }
192       continue;
193     }
194 
195     // Where lowering the priority in predecessors is not possible, the
196     // block receiving control either was not part of a loop in the first
197     // place or the loop simplification/canonicalization pass should have
198     // already tried to split the edge and insert a preheader, and if for
199     // whatever reason it failed to do so, then this leaves us with the
200     // only option of lowering the priority within the loop.
201     PriorityLoweringBlocks.insert(&MBB);
202   }
203 
204   for (MachineBasicBlock *MBB : PriorityLoweringBlocks) {
205     BuildSetprioMI(
206         *MBB,
207         MBBInfos[MBB].LastVMEMLoad
208             ? std::next(MachineBasicBlock::iterator(MBBInfos[MBB].LastVMEMLoad))
209             : MBB->begin(),
210         LowPriority);
211   }
212 
213   return true;
214 }
215