1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/InitializePasses.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
27 
28 namespace {
29 
30 class SIPreAllocateWWMRegs : public MachineFunctionPass {
31 private:
32   const SIInstrInfo *TII;
33   const SIRegisterInfo *TRI;
34   MachineRegisterInfo *MRI;
35   LiveIntervals *LIS;
36   LiveRegMatrix *Matrix;
37   VirtRegMap *VRM;
38   RegisterClassInfo RegClassInfo;
39 
40   std::vector<unsigned> RegsToRewrite;
41 #ifndef NDEBUG
42   void printWWMInfo(const MachineInstr &MI);
43 #endif
44 
45 public:
46   static char ID;
47 
48   SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
49     initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
50   }
51 
52   bool runOnMachineFunction(MachineFunction &MF) override;
53 
54   void getAnalysisUsage(AnalysisUsage &AU) const override {
55     AU.addRequired<LiveIntervals>();
56     AU.addPreserved<LiveIntervals>();
57     AU.addRequired<VirtRegMap>();
58     AU.addRequired<LiveRegMatrix>();
59     AU.addPreserved<SlotIndexes>();
60     AU.setPreservesCFG();
61     MachineFunctionPass::getAnalysisUsage(AU);
62   }
63 
64 private:
65   bool processDef(MachineOperand &MO);
66   void rewriteRegs(MachineFunction &MF);
67 };
68 
69 } // End anonymous namespace.
70 
71 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
72                 "SI Pre-allocate WWM Registers", false, false)
73 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
74 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
75 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
76 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
77                 "SI Pre-allocate WWM Registers", false, false)
78 
79 char SIPreAllocateWWMRegs::ID = 0;
80 
81 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
82 
83 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
84   return new SIPreAllocateWWMRegs();
85 }
86 
87 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
88   if (!MO.isReg())
89     return false;
90 
91   Register Reg = MO.getReg();
92   if (Reg.isPhysical())
93     return false;
94 
95   if (!TRI->isVGPR(*MRI, Reg))
96     return false;
97 
98   if (VRM->hasPhys(Reg))
99     return false;
100 
101   LiveInterval &LI = LIS->getInterval(Reg);
102 
103   for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
104     if (!MRI->isPhysRegUsed(PhysReg) &&
105         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
106       Matrix->assign(LI, PhysReg);
107       assert(PhysReg != 0);
108       RegsToRewrite.push_back(Reg);
109       return true;
110     }
111   }
112 
113   llvm_unreachable("physreg not found for WWM expression");
114   return false;
115 }
116 
117 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
118   for (MachineBasicBlock &MBB : MF) {
119     for (MachineInstr &MI : MBB) {
120       for (MachineOperand &MO : MI.operands()) {
121         if (!MO.isReg())
122           continue;
123 
124         const Register VirtReg = MO.getReg();
125         if (VirtReg.isPhysical())
126           continue;
127 
128         if (!VRM->hasPhys(VirtReg))
129           continue;
130 
131         Register PhysReg = VRM->getPhys(VirtReg);
132         const unsigned SubReg = MO.getSubReg();
133         if (SubReg != 0) {
134           PhysReg = TRI->getSubReg(PhysReg, SubReg);
135           MO.setSubReg(0);
136         }
137 
138         MO.setReg(PhysReg);
139         MO.setIsRenamable(false);
140       }
141     }
142   }
143 
144   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
145   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
146 
147   for (unsigned Reg : RegsToRewrite) {
148     LIS->removeInterval(Reg);
149 
150     const Register PhysReg = VRM->getPhys(Reg);
151     assert(PhysReg != 0);
152 
153     // Check if PhysReg is already reserved
154     if (!MFI->WWMReservedRegs.count(PhysReg)) {
155       Optional<int> FI;
156       if (!MFI->isEntryFunction()) {
157         // Create a stack object for a possible spill in the function prologue.
158         // Note: Non-CSR VGPR also need this as we may overwrite inactive lanes.
159         const TargetRegisterClass *RC = TRI->getPhysRegClass(PhysReg);
160         FI = FrameInfo.CreateSpillStackObject(TRI->getSpillSize(*RC),
161                                               TRI->getSpillAlign(*RC));
162       }
163       MFI->reserveWWMRegister(PhysReg, FI);
164     }
165   }
166 
167   RegsToRewrite.clear();
168 
169   // Update the set of reserved registers to include WWM ones.
170   MRI->freezeReservedRegs(MF);
171 }
172 
173 #ifndef NDEBUG
174 LLVM_DUMP_METHOD void
175 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
176 
177   unsigned Opc = MI.getOpcode();
178 
179   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM) {
180     dbgs() << "Entering ";
181   } else {
182     assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM);
183     dbgs() << "Exiting ";
184   }
185 
186   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
187     dbgs() << "Strict WWM ";
188   } else {
189     assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
190     dbgs() << "Strict WQM ";
191   }
192 
193   dbgs() << "region: " << MI;
194 }
195 
196 #endif
197 
198 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
199   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
200 
201   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
202 
203   TII = ST.getInstrInfo();
204   TRI = &TII->getRegisterInfo();
205   MRI = &MF.getRegInfo();
206 
207   LIS = &getAnalysis<LiveIntervals>();
208   Matrix = &getAnalysis<LiveRegMatrix>();
209   VRM = &getAnalysis<VirtRegMap>();
210 
211   RegClassInfo.runOnMachineFunction(MF);
212 
213   bool RegsAssigned = false;
214 
215   // We use a reverse post-order traversal of the control-flow graph to
216   // guarantee that we visit definitions in dominance order. Since WWM
217   // expressions are guaranteed to never involve phi nodes, and we can only
218   // escape WWM through the special WWM instruction, this means that this is a
219   // perfect elimination order, so we can never do any better.
220   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
221 
222   for (MachineBasicBlock *MBB : RPOT) {
223     bool InWWM = false;
224     for (MachineInstr &MI : *MBB) {
225       if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
226           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
227         RegsAssigned |= processDef(MI.getOperand(0));
228 
229       if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
230           MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM) {
231         LLVM_DEBUG(printWWMInfo(MI));
232         InWWM = true;
233         continue;
234       }
235 
236       if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
237           MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM) {
238         LLVM_DEBUG(printWWMInfo(MI));
239         InWWM = false;
240       }
241 
242       if (!InWWM)
243         continue;
244 
245       LLVM_DEBUG(dbgs() << "Processing " << MI);
246 
247       for (MachineOperand &DefOpnd : MI.defs()) {
248         RegsAssigned |= processDef(DefOpnd);
249       }
250     }
251   }
252 
253   if (!RegsAssigned)
254     return false;
255 
256   rewriteRegs(MF);
257   return true;
258 }
259