1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Target/TargetMachine.h"
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "si-lower-sgpr-spills"
36 
37 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
38 
39 namespace {
40 
41 static cl::opt<bool> EnableSpillVGPRToAGPR(
42   "amdgpu-spill-vgpr-to-agpr",
43   cl::desc("Enable spilling VGPRs to AGPRs"),
44   cl::ReallyHidden,
45   cl::init(true));
46 
47 class SILowerSGPRSpills : public MachineFunctionPass {
48 private:
49   const SIRegisterInfo *TRI = nullptr;
50   const SIInstrInfo *TII = nullptr;
51   VirtRegMap *VRM = nullptr;
52   LiveIntervals *LIS = nullptr;
53 
54   // Save and Restore blocks of the current function. Typically there is a
55   // single save block, unless Windows EH funclets are involved.
56   MBBVector SaveBlocks;
57   MBBVector RestoreBlocks;
58 
59 public:
60   static char ID;
61 
62   SILowerSGPRSpills() : MachineFunctionPass(ID) {}
63 
64   void calculateSaveRestoreBlocks(MachineFunction &MF);
65   bool spillCalleeSavedRegs(MachineFunction &MF);
66 
67   bool runOnMachineFunction(MachineFunction &MF) override;
68 
69   void getAnalysisUsage(AnalysisUsage &AU) const override {
70     AU.setPreservesAll();
71     MachineFunctionPass::getAnalysisUsage(AU);
72   }
73 };
74 
75 } // end anonymous namespace
76 
77 char SILowerSGPRSpills::ID = 0;
78 
79 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
80                       "SI lower SGPR spill instructions", false, false)
81 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
82 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
83                     "SI lower SGPR spill instructions", false, false)
84 
85 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
86 
87 /// Insert restore code for the callee-saved registers used in the function.
88 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
89                            ArrayRef<CalleeSavedInfo> CSI,
90                            LiveIntervals *LIS) {
91   MachineFunction &MF = *SaveBlock.getParent();
92   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
93   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
94   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
95 
96   MachineBasicBlock::iterator I = SaveBlock.begin();
97   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
98     for (const CalleeSavedInfo &CS : CSI) {
99       // Insert the spill to the stack frame.
100       unsigned Reg = CS.getReg();
101 
102       MachineInstrSpan MIS(I, &SaveBlock);
103       const TargetRegisterClass *RC =
104         TRI->getMinimalPhysRegClass(Reg, MVT::i32);
105 
106       TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
107                               TRI);
108 
109       if (LIS) {
110         assert(std::distance(MIS.begin(), I) == 1);
111         MachineInstr &Inst = *std::prev(I);
112 
113         LIS->InsertMachineInstrInMaps(Inst);
114         LIS->removeAllRegUnitsForPhysReg(Reg);
115       }
116     }
117   }
118 }
119 
120 /// Insert restore code for the callee-saved registers used in the function.
121 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
122                               MutableArrayRef<CalleeSavedInfo> CSI,
123                               LiveIntervals *LIS) {
124   MachineFunction &MF = *RestoreBlock.getParent();
125   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
126   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
127   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
128 
129   // Restore all registers immediately before the return and any
130   // terminators that precede it.
131   MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
132 
133   // FIXME: Just emit the readlane/writelane directly
134   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
135     for (const CalleeSavedInfo &CI : reverse(CSI)) {
136       unsigned Reg = CI.getReg();
137       const TargetRegisterClass *RC =
138         TRI->getMinimalPhysRegClass(Reg, MVT::i32);
139 
140       TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
141       assert(I != RestoreBlock.begin() &&
142              "loadRegFromStackSlot didn't insert any code!");
143       // Insert in reverse order.  loadRegFromStackSlot can insert
144       // multiple instructions.
145 
146       if (LIS) {
147         MachineInstr &Inst = *std::prev(I);
148         LIS->InsertMachineInstrInMaps(Inst);
149         LIS->removeAllRegUnitsForPhysReg(Reg);
150       }
151     }
152   }
153 }
154 
155 /// Compute the sets of entry and return blocks for saving and restoring
156 /// callee-saved registers, and placing prolog and epilog code.
157 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
158   const MachineFrameInfo &MFI = MF.getFrameInfo();
159 
160   // Even when we do not change any CSR, we still want to insert the
161   // prologue and epilogue of the function.
162   // So set the save points for those.
163 
164   // Use the points found by shrink-wrapping, if any.
165   if (MFI.getSavePoint()) {
166     SaveBlocks.push_back(MFI.getSavePoint());
167     assert(MFI.getRestorePoint() && "Both restore and save must be set");
168     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
169     // If RestoreBlock does not have any successor and is not a return block
170     // then the end point is unreachable and we do not need to insert any
171     // epilogue.
172     if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
173       RestoreBlocks.push_back(RestoreBlock);
174     return;
175   }
176 
177   // Save refs to entry and return blocks.
178   SaveBlocks.push_back(&MF.front());
179   for (MachineBasicBlock &MBB : MF) {
180     if (MBB.isEHFuncletEntry())
181       SaveBlocks.push_back(&MBB);
182     if (MBB.isReturnBlock())
183       RestoreBlocks.push_back(&MBB);
184   }
185 }
186 
187 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
188   MachineRegisterInfo &MRI = MF.getRegInfo();
189   const Function &F = MF.getFunction();
190   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
191   const SIFrameLowering *TFI = ST.getFrameLowering();
192   MachineFrameInfo &MFI = MF.getFrameInfo();
193   RegScavenger *RS = nullptr;
194 
195   // Determine which of the registers in the callee save list should be saved.
196   BitVector SavedRegs;
197   TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
198 
199   // Add the code to save and restore the callee saved registers.
200   if (!F.hasFnAttribute(Attribute::Naked)) {
201     // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
202     // necessary for verifier liveness checks.
203     MFI.setCalleeSavedInfoValid(true);
204 
205     std::vector<CalleeSavedInfo> CSI;
206     const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
207 
208     for (unsigned I = 0; CSRegs[I]; ++I) {
209       unsigned Reg = CSRegs[I];
210       if (SavedRegs.test(Reg)) {
211         const TargetRegisterClass *RC =
212           TRI->getMinimalPhysRegClass(Reg, MVT::i32);
213         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
214                                            TRI->getSpillAlign(*RC), true);
215 
216         CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
217       }
218     }
219 
220     if (!CSI.empty()) {
221       for (MachineBasicBlock *SaveBlock : SaveBlocks)
222         insertCSRSaves(*SaveBlock, CSI, LIS);
223 
224       for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
225         insertCSRRestores(*RestoreBlock, CSI, LIS);
226       return true;
227     }
228   }
229 
230   return false;
231 }
232 
233 // Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
234 static bool lowerShiftReservedVGPR(MachineFunction &MF,
235                                    const GCNSubtarget &ST) {
236   MachineRegisterInfo &MRI = MF.getRegInfo();
237   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
238   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
239   Register LowestAvailableVGPR, ReservedVGPR;
240   ArrayRef<MCPhysReg> AllVGPR32s = ST.getRegisterInfo()->getAllVGPR32(MF);
241   for (MCPhysReg Reg : AllVGPR32s) {
242     if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg)) {
243       LowestAvailableVGPR = Reg;
244       break;
245     }
246   }
247 
248   if (!LowestAvailableVGPR)
249     return false;
250 
251   ReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
252   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
253   int i = 0;
254 
255   for (MachineBasicBlock &MBB : MF) {
256     for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
257       if (Reg.VGPR == ReservedVGPR) {
258         MBB.removeLiveIn(ReservedVGPR);
259         MBB.addLiveIn(LowestAvailableVGPR);
260         Optional<int> FI;
261         if (FuncInfo->isCalleeSavedReg(CSRegs, LowestAvailableVGPR))
262           FI = FrameInfo.CreateSpillStackObject(4, Align(4));
263 
264         FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, i);
265       }
266       ++i;
267     }
268     MBB.sortUniqueLiveIns();
269   }
270 
271   return true;
272 }
273 
274 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
275   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
276   TII = ST.getInstrInfo();
277   TRI = &TII->getRegisterInfo();
278 
279   VRM = getAnalysisIfAvailable<VirtRegMap>();
280 
281   assert(SaveBlocks.empty() && RestoreBlocks.empty());
282 
283   // First, expose any CSR SGPR spills. This is mostly the same as what PEI
284   // does, but somewhat simpler.
285   calculateSaveRestoreBlocks(MF);
286   bool HasCSRs = spillCalleeSavedRegs(MF);
287 
288   MachineFrameInfo &MFI = MF.getFrameInfo();
289   if (!MFI.hasStackObjects() && !HasCSRs) {
290     SaveBlocks.clear();
291     RestoreBlocks.clear();
292     return false;
293   }
294 
295   MachineRegisterInfo &MRI = MF.getRegInfo();
296   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
297   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
298     && EnableSpillVGPRToAGPR;
299 
300   bool MadeChange = false;
301 
302   const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
303 
304   // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
305   // handled as SpilledToReg in regular PrologEpilogInserter.
306   if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
307       SpillVGPRToAGPR) {
308     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
309     // are spilled to VGPRs, in which case we can eliminate the stack usage.
310     //
311     // This operates under the assumption that only other SGPR spills are users
312     // of the frame index.
313 
314     lowerShiftReservedVGPR(MF, ST);
315 
316     for (MachineBasicBlock &MBB : MF) {
317       MachineBasicBlock::iterator Next;
318       for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
319         MachineInstr &MI = *I;
320         Next = std::next(I);
321 
322         if (SpillToAGPR && TII->isVGPRSpill(MI)) {
323           // Try to eliminate stack used by VGPR spills before frame
324           // finalization.
325           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
326                                                      AMDGPU::OpName::vaddr);
327           int FI = MI.getOperand(FIOp).getIndex();
328           Register VReg =
329               TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
330           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
331                                                 TRI->isAGPR(MRI, VReg))) {
332             TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
333             continue;
334           }
335         }
336 
337         if (!TII->isSGPRSpill(MI))
338           continue;
339 
340         int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
341         assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
342         if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
343           bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
344           (void)Spilled;
345           assert(Spilled && "failed to spill SGPR to VGPR when allocated");
346         }
347       }
348     }
349 
350     for (MachineBasicBlock &MBB : MF) {
351       for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
352         MBB.addLiveIn(SSpill.VGPR);
353 
354       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
355         MBB.addLiveIn(Reg);
356 
357       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
358         MBB.addLiveIn(Reg);
359 
360       MBB.sortUniqueLiveIns();
361     }
362 
363     MadeChange = true;
364   } else if (FuncInfo->VGPRReservedForSGPRSpill) {
365     FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
366   }
367 
368   SaveBlocks.clear();
369   RestoreBlocks.clear();
370 
371   return MadeChange;
372 }
373