1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/VirtRegMap.h"
25 #include "llvm/InitializePasses.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
30 
31 static cl::opt<bool>
32     EnablePreallocateSGPRSpillVGPRs("amdgpu-prealloc-sgpr-spill-vgprs",
33                                     cl::init(false), cl::Hidden);
34 
35 namespace {
36 
37 class SIPreAllocateWWMRegs : public MachineFunctionPass {
38 private:
39   const SIInstrInfo *TII;
40   const SIRegisterInfo *TRI;
41   MachineRegisterInfo *MRI;
42   LiveIntervals *LIS;
43   LiveRegMatrix *Matrix;
44   VirtRegMap *VRM;
45   RegisterClassInfo RegClassInfo;
46 
47   std::vector<unsigned> RegsToRewrite;
48 #ifndef NDEBUG
49   void printWWMInfo(const MachineInstr &MI);
50 #endif
51 
52 public:
53   static char ID;
54 
SIPreAllocateWWMRegs()55   SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
56     initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
57   }
58 
59   bool runOnMachineFunction(MachineFunction &MF) override;
60 
getAnalysisUsage(AnalysisUsage & AU) const61   void getAnalysisUsage(AnalysisUsage &AU) const override {
62     AU.addRequired<LiveIntervals>();
63     AU.addRequired<VirtRegMap>();
64     AU.addRequired<LiveRegMatrix>();
65     AU.setPreservesAll();
66     MachineFunctionPass::getAnalysisUsage(AU);
67   }
68 
69 private:
70   bool processDef(MachineOperand &MO);
71   void rewriteRegs(MachineFunction &MF);
72 };
73 
74 } // End anonymous namespace.
75 
76 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
77                 "SI Pre-allocate WWM Registers", false, false)
78 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
79 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
80 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
81 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
82                 "SI Pre-allocate WWM Registers", false, false)
83 
84 char SIPreAllocateWWMRegs::ID = 0;
85 
86 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
87 
createSIPreAllocateWWMRegsPass()88 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
89   return new SIPreAllocateWWMRegs();
90 }
91 
processDef(MachineOperand & MO)92 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
93   Register Reg = MO.getReg();
94   if (Reg.isPhysical())
95     return false;
96 
97   if (!TRI->isVGPR(*MRI, Reg))
98     return false;
99 
100   if (VRM->hasPhys(Reg))
101     return false;
102 
103   LiveInterval &LI = LIS->getInterval(Reg);
104 
105   for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
106     if (!MRI->isPhysRegUsed(PhysReg, /*SkipRegMaskTest=*/true) &&
107         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
108       Matrix->assign(LI, PhysReg);
109       assert(PhysReg != 0);
110       RegsToRewrite.push_back(Reg);
111       return true;
112     }
113   }
114 
115   llvm_unreachable("physreg not found for WWM expression");
116 }
117 
rewriteRegs(MachineFunction & MF)118 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
119   for (MachineBasicBlock &MBB : MF) {
120     for (MachineInstr &MI : MBB) {
121       for (MachineOperand &MO : MI.operands()) {
122         if (!MO.isReg())
123           continue;
124 
125         const Register VirtReg = MO.getReg();
126         if (VirtReg.isPhysical())
127           continue;
128 
129         if (!VRM->hasPhys(VirtReg))
130           continue;
131 
132         Register PhysReg = VRM->getPhys(VirtReg);
133         const unsigned SubReg = MO.getSubReg();
134         if (SubReg != 0) {
135           PhysReg = TRI->getSubReg(PhysReg, SubReg);
136           MO.setSubReg(0);
137         }
138 
139         MO.setReg(PhysReg);
140         MO.setIsRenamable(false);
141       }
142     }
143   }
144 
145   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
146 
147   for (unsigned Reg : RegsToRewrite) {
148     LIS->removeInterval(Reg);
149 
150     const Register PhysReg = VRM->getPhys(Reg);
151     assert(PhysReg != 0);
152 
153     MFI->reserveWWMRegister(PhysReg);
154   }
155 
156   RegsToRewrite.clear();
157 
158   // Update the set of reserved registers to include WWM ones.
159   MRI->freezeReservedRegs(MF);
160 }
161 
162 #ifndef NDEBUG
163 LLVM_DUMP_METHOD void
printWWMInfo(const MachineInstr & MI)164 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
165 
166   unsigned Opc = MI.getOpcode();
167 
168   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM ||
169       Opc == AMDGPU::ENTER_PSEUDO_WM) {
170     dbgs() << "Entering ";
171   } else {
172     assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM ||
173            Opc == AMDGPU::EXIT_PSEUDO_WM);
174     dbgs() << "Exiting ";
175   }
176 
177   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
178     dbgs() << "Strict WWM ";
179   } else if (Opc == AMDGPU::ENTER_PSEUDO_WM || Opc == AMDGPU::EXIT_PSEUDO_WM) {
180     dbgs() << "Pseudo WWM/WQM ";
181   } else {
182     assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
183     dbgs() << "Strict WQM ";
184   }
185 
186   dbgs() << "region: " << MI;
187 }
188 
189 #endif
190 
runOnMachineFunction(MachineFunction & MF)191 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
192   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
193 
194   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195 
196   TII = ST.getInstrInfo();
197   TRI = &TII->getRegisterInfo();
198   MRI = &MF.getRegInfo();
199 
200   LIS = &getAnalysis<LiveIntervals>();
201   Matrix = &getAnalysis<LiveRegMatrix>();
202   VRM = &getAnalysis<VirtRegMap>();
203 
204   RegClassInfo.runOnMachineFunction(MF);
205 
206   bool PreallocateSGPRSpillVGPRs =
207       EnablePreallocateSGPRSpillVGPRs ||
208       MF.getFunction().hasFnAttribute("amdgpu-prealloc-sgpr-spill-vgprs");
209 
210   bool RegsAssigned = false;
211 
212   // We use a reverse post-order traversal of the control-flow graph to
213   // guarantee that we visit definitions in dominance order. Since WWM
214   // expressions are guaranteed to never involve phi nodes, and we can only
215   // escape WWM through the special WWM instruction, this means that this is a
216   // perfect elimination order, so we can never do any better.
217   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
218 
219   for (MachineBasicBlock *MBB : RPOT) {
220     bool InWWM = false;
221     for (MachineInstr &MI : *MBB) {
222       if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
223           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
224         RegsAssigned |= processDef(MI.getOperand(0));
225 
226       if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR) {
227         if (!PreallocateSGPRSpillVGPRs)
228           continue;
229         RegsAssigned |= processDef(MI.getOperand(0));
230       }
231 
232       if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
233           MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||
234           MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) {
235         LLVM_DEBUG(printWWMInfo(MI));
236         InWWM = true;
237         continue;
238       }
239 
240       if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
241           MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM ||
242           MI.getOpcode() == AMDGPU::EXIT_PSEUDO_WM) {
243         LLVM_DEBUG(printWWMInfo(MI));
244         InWWM = false;
245       }
246 
247       if (!InWWM)
248         continue;
249 
250       LLVM_DEBUG(dbgs() << "Processing " << MI);
251 
252       for (MachineOperand &DefOpnd : MI.defs()) {
253         RegsAssigned |= processDef(DefOpnd);
254       }
255     }
256   }
257 
258   if (!RegsAssigned)
259     return false;
260 
261   rewriteRegs(MF);
262   return true;
263 }
264