1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/RegisterClassInfo.h"
24 #include "llvm/CodeGen/VirtRegMap.h"
25 #include "llvm/InitializePasses.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
30 
31 namespace {
32 
33 class SIPreAllocateWWMRegs : public MachineFunctionPass {
34 private:
35   const SIInstrInfo *TII;
36   const SIRegisterInfo *TRI;
37   MachineRegisterInfo *MRI;
38   LiveIntervals *LIS;
39   LiveRegMatrix *Matrix;
40   VirtRegMap *VRM;
41   RegisterClassInfo RegClassInfo;
42 
43   std::vector<unsigned> RegsToRewrite;
44 #ifndef NDEBUG
45   void printWWMInfo(const MachineInstr &MI);
46 #endif
47 
48 public:
49   static char ID;
50 
51   SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
52     initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
53   }
54 
55   bool runOnMachineFunction(MachineFunction &MF) override;
56 
57   void getAnalysisUsage(AnalysisUsage &AU) const override {
58     AU.addRequired<LiveIntervals>();
59     AU.addPreserved<LiveIntervals>();
60     AU.addRequired<VirtRegMap>();
61     AU.addRequired<LiveRegMatrix>();
62     AU.addPreserved<SlotIndexes>();
63     AU.setPreservesCFG();
64     MachineFunctionPass::getAnalysisUsage(AU);
65   }
66 
67 private:
68   bool processDef(MachineOperand &MO);
69   void rewriteRegs(MachineFunction &MF);
70 };
71 
72 } // End anonymous namespace.
73 
74 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
75                 "SI Pre-allocate WWM Registers", false, false)
76 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
77 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
78 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
79 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
80                 "SI Pre-allocate WWM Registers", false, false)
81 
82 char SIPreAllocateWWMRegs::ID = 0;
83 
84 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
85 
86 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
87   return new SIPreAllocateWWMRegs();
88 }
89 
90 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
91   Register Reg = MO.getReg();
92   if (Reg.isPhysical())
93     return false;
94 
95   if (!TRI->isVGPR(*MRI, Reg))
96     return false;
97 
98   if (VRM->hasPhys(Reg))
99     return false;
100 
101   LiveInterval &LI = LIS->getInterval(Reg);
102 
103   for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
104     if (!MRI->isPhysRegUsed(PhysReg) &&
105         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
106       Matrix->assign(LI, PhysReg);
107       assert(PhysReg != 0);
108       RegsToRewrite.push_back(Reg);
109       return true;
110     }
111   }
112 
113   llvm_unreachable("physreg not found for WWM expression");
114 }
115 
116 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
117   for (MachineBasicBlock &MBB : MF) {
118     for (MachineInstr &MI : MBB) {
119       for (MachineOperand &MO : MI.operands()) {
120         if (!MO.isReg())
121           continue;
122 
123         const Register VirtReg = MO.getReg();
124         if (VirtReg.isPhysical())
125           continue;
126 
127         if (!VRM->hasPhys(VirtReg))
128           continue;
129 
130         Register PhysReg = VRM->getPhys(VirtReg);
131         const unsigned SubReg = MO.getSubReg();
132         if (SubReg != 0) {
133           PhysReg = TRI->getSubReg(PhysReg, SubReg);
134           MO.setSubReg(0);
135         }
136 
137         MO.setReg(PhysReg);
138         MO.setIsRenamable(false);
139       }
140     }
141   }
142 
143   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
144 
145   for (unsigned Reg : RegsToRewrite) {
146     LIS->removeInterval(Reg);
147 
148     const Register PhysReg = VRM->getPhys(Reg);
149     assert(PhysReg != 0);
150 
151     MFI->reserveWWMRegister(PhysReg);
152   }
153 
154   RegsToRewrite.clear();
155 
156   // Update the set of reserved registers to include WWM ones.
157   MRI->freezeReservedRegs(MF);
158 }
159 
160 #ifndef NDEBUG
161 LLVM_DUMP_METHOD void
162 SIPreAllocateWWMRegs::printWWMInfo(const MachineInstr &MI) {
163 
164   unsigned Opc = MI.getOpcode();
165 
166   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::ENTER_STRICT_WQM ||
167       Opc == AMDGPU::ENTER_PSEUDO_WM) {
168     dbgs() << "Entering ";
169   } else {
170     assert(Opc == AMDGPU::EXIT_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WQM ||
171            Opc == AMDGPU::EXIT_PSEUDO_WM);
172     dbgs() << "Exiting ";
173   }
174 
175   if (Opc == AMDGPU::ENTER_STRICT_WWM || Opc == AMDGPU::EXIT_STRICT_WWM) {
176     dbgs() << "Strict WWM ";
177   } else if (Opc == AMDGPU::ENTER_PSEUDO_WM || Opc == AMDGPU::EXIT_PSEUDO_WM) {
178     dbgs() << "Pseudo WWM/WQM ";
179   } else {
180     assert(Opc == AMDGPU::ENTER_STRICT_WQM || Opc == AMDGPU::EXIT_STRICT_WQM);
181     dbgs() << "Strict WQM ";
182   }
183 
184   dbgs() << "region: " << MI;
185 }
186 
187 #endif
188 
189 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
190   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
191 
192   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
193 
194   TII = ST.getInstrInfo();
195   TRI = &TII->getRegisterInfo();
196   MRI = &MF.getRegInfo();
197 
198   LIS = &getAnalysis<LiveIntervals>();
199   Matrix = &getAnalysis<LiveRegMatrix>();
200   VRM = &getAnalysis<VirtRegMap>();
201 
202   RegClassInfo.runOnMachineFunction(MF);
203 
204   bool RegsAssigned = false;
205 
206   // We use a reverse post-order traversal of the control-flow graph to
207   // guarantee that we visit definitions in dominance order. Since WWM
208   // expressions are guaranteed to never involve phi nodes, and we can only
209   // escape WWM through the special WWM instruction, this means that this is a
210   // perfect elimination order, so we can never do any better.
211   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
212 
213   for (MachineBasicBlock *MBB : RPOT) {
214     bool InWWM = false;
215     for (MachineInstr &MI : *MBB) {
216       if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
217           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
218         RegsAssigned |= processDef(MI.getOperand(0));
219 
220       if (MI.getOpcode() == AMDGPU::ENTER_STRICT_WWM ||
221           MI.getOpcode() == AMDGPU::ENTER_STRICT_WQM ||
222           MI.getOpcode() == AMDGPU::ENTER_PSEUDO_WM) {
223         LLVM_DEBUG(printWWMInfo(MI));
224         InWWM = true;
225         continue;
226       }
227 
228       if (MI.getOpcode() == AMDGPU::EXIT_STRICT_WWM ||
229           MI.getOpcode() == AMDGPU::EXIT_STRICT_WQM ||
230           MI.getOpcode() == AMDGPU::EXIT_PSEUDO_WM) {
231         LLVM_DEBUG(printWWMInfo(MI));
232         InWWM = false;
233       }
234 
235       if (!InWWM)
236         continue;
237 
238       LLVM_DEBUG(dbgs() << "Processing " << MI);
239 
240       for (MachineOperand &DefOpnd : MI.defs()) {
241         RegsAssigned |= processDef(DefOpnd);
242       }
243     }
244   }
245 
246   if (!RegsAssigned)
247     return false;
248 
249   rewriteRegs(MF);
250   return true;
251 }
252