1 //===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Pass to pre-allocated WWM registers
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "GCNSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIMachineFunctionInfo.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/CodeGen/LiveIntervals.h"
20 #include "llvm/CodeGen/LiveRegMatrix.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/InitializePasses.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "si-pre-allocate-wwm-regs"
27 
28 namespace {
29 
30 class SIPreAllocateWWMRegs : public MachineFunctionPass {
31 private:
32   const SIInstrInfo *TII;
33   const SIRegisterInfo *TRI;
34   MachineRegisterInfo *MRI;
35   LiveIntervals *LIS;
36   LiveRegMatrix *Matrix;
37   VirtRegMap *VRM;
38   RegisterClassInfo RegClassInfo;
39 
40   std::vector<unsigned> RegsToRewrite;
41 
42 public:
43   static char ID;
44 
45   SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
46     initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
47   }
48 
49   bool runOnMachineFunction(MachineFunction &MF) override;
50 
51   void getAnalysisUsage(AnalysisUsage &AU) const override {
52     AU.addRequired<LiveIntervals>();
53     AU.addPreserved<LiveIntervals>();
54     AU.addRequired<VirtRegMap>();
55     AU.addRequired<LiveRegMatrix>();
56     AU.addPreserved<SlotIndexes>();
57     AU.setPreservesCFG();
58     MachineFunctionPass::getAnalysisUsage(AU);
59   }
60 
61 private:
62   bool processDef(MachineOperand &MO);
63   void rewriteRegs(MachineFunction &MF);
64 };
65 
66 } // End anonymous namespace.
67 
68 INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
69                 "SI Pre-allocate WWM Registers", false, false)
70 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
71 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
72 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
73 INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
74                 "SI Pre-allocate WWM Registers", false, false)
75 
76 char SIPreAllocateWWMRegs::ID = 0;
77 
78 char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
79 
80 FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
81   return new SIPreAllocateWWMRegs();
82 }
83 
84 bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
85   if (!MO.isReg())
86     return false;
87 
88   Register Reg = MO.getReg();
89   if (Reg.isPhysical())
90     return false;
91 
92   if (!TRI->isVGPR(*MRI, Reg))
93     return false;
94 
95   if (VRM->hasPhys(Reg))
96     return false;
97 
98   LiveInterval &LI = LIS->getInterval(Reg);
99 
100   for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
101     if (!MRI->isPhysRegUsed(PhysReg) &&
102         Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
103       Matrix->assign(LI, PhysReg);
104       assert(PhysReg != 0);
105       RegsToRewrite.push_back(Reg);
106       return true;
107     }
108   }
109 
110   llvm_unreachable("physreg not found for WWM expression");
111   return false;
112 }
113 
114 void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
115   for (MachineBasicBlock &MBB : MF) {
116     for (MachineInstr &MI : MBB) {
117       for (MachineOperand &MO : MI.operands()) {
118         if (!MO.isReg())
119           continue;
120 
121         const Register VirtReg = MO.getReg();
122         if (VirtReg.isPhysical())
123           continue;
124 
125         if (!VRM->hasPhys(VirtReg))
126           continue;
127 
128         Register PhysReg = VRM->getPhys(VirtReg);
129         const unsigned SubReg = MO.getSubReg();
130         if (SubReg != 0) {
131           PhysReg = TRI->getSubReg(PhysReg, SubReg);
132           MO.setSubReg(0);
133         }
134 
135         MO.setReg(PhysReg);
136         MO.setIsRenamable(false);
137       }
138     }
139   }
140 
141   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
142 
143   for (unsigned Reg : RegsToRewrite) {
144     LIS->removeInterval(Reg);
145 
146     const Register PhysReg = VRM->getPhys(Reg);
147     assert(PhysReg != 0);
148     MFI->ReserveWWMRegister(PhysReg);
149   }
150 
151   RegsToRewrite.clear();
152 
153   // Update the set of reserved registers to include WWM ones.
154   MRI->freezeReservedRegs(MF);
155 }
156 
157 bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
158   LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
159 
160   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
161 
162   TII = ST.getInstrInfo();
163   TRI = &TII->getRegisterInfo();
164   MRI = &MF.getRegInfo();
165 
166   LIS = &getAnalysis<LiveIntervals>();
167   Matrix = &getAnalysis<LiveRegMatrix>();
168   VRM = &getAnalysis<VirtRegMap>();
169 
170   RegClassInfo.runOnMachineFunction(MF);
171 
172   bool RegsAssigned = false;
173 
174   // We use a reverse post-order traversal of the control-flow graph to
175   // guarantee that we visit definitions in dominance order. Since WWM
176   // expressions are guaranteed to never involve phi nodes, and we can only
177   // escape WWM through the special WWM instruction, this means that this is a
178   // perfect elimination order, so we can never do any better.
179   ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
180 
181   for (MachineBasicBlock *MBB : RPOT) {
182     bool InWWM = false;
183     for (MachineInstr &MI : *MBB) {
184       if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
185           MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
186         RegsAssigned |= processDef(MI.getOperand(0));
187 
188       if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
189         LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
190         InWWM = true;
191         continue;
192       }
193 
194       if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
195         LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
196         InWWM = false;
197       }
198 
199       if (!InWWM)
200         continue;
201 
202       LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
203 
204       for (MachineOperand &DefOpnd : MI.defs()) {
205         RegsAssigned |= processDef(DefOpnd);
206       }
207     }
208   }
209 
210   if (!RegsAssigned)
211     return false;
212 
213   rewriteRegs(MF);
214   return true;
215 }
216