1 //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
10 // SGPR spills, so must insert CSR SGPR spills as well as expand them.
11 //
12 // This pass must never create new SGPR virtual registers.
13 //
14 // FIXME: Must stop RegScavenger spills in later passes.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "AMDGPU.h"
19 #include "AMDGPUSubtarget.h"
20 #include "SIInstrInfo.h"
21 #include "SIMachineFunctionInfo.h"
22 #include "llvm/CodeGen/LiveIntervals.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/CodeGen/MachineInstr.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/VirtRegMap.h"
30 #include "llvm/InitializePasses.h"
31 #include "llvm/Target/TargetMachine.h"
32
33 using namespace llvm;
34
35 #define DEBUG_TYPE "si-lower-sgpr-spills"
36
37 using MBBVector = SmallVector<MachineBasicBlock *, 4>;
38
39 namespace {
40
41 static cl::opt<bool> EnableSpillVGPRToAGPR(
42 "amdgpu-spill-vgpr-to-agpr",
43 cl::desc("Enable spilling VGPRs to AGPRs"),
44 cl::ReallyHidden,
45 cl::init(true));
46
47 class SILowerSGPRSpills : public MachineFunctionPass {
48 private:
49 const SIRegisterInfo *TRI = nullptr;
50 const SIInstrInfo *TII = nullptr;
51 VirtRegMap *VRM = nullptr;
52 LiveIntervals *LIS = nullptr;
53
54 // Save and Restore blocks of the current function. Typically there is a
55 // single save block, unless Windows EH funclets are involved.
56 MBBVector SaveBlocks;
57 MBBVector RestoreBlocks;
58
59 public:
60 static char ID;
61
SILowerSGPRSpills()62 SILowerSGPRSpills() : MachineFunctionPass(ID) {}
63
64 void calculateSaveRestoreBlocks(MachineFunction &MF);
65 bool spillCalleeSavedRegs(MachineFunction &MF);
66
67 bool runOnMachineFunction(MachineFunction &MF) override;
68
getAnalysisUsage(AnalysisUsage & AU) const69 void getAnalysisUsage(AnalysisUsage &AU) const override {
70 AU.setPreservesAll();
71 MachineFunctionPass::getAnalysisUsage(AU);
72 }
73 };
74
75 } // end anonymous namespace
76
77 char SILowerSGPRSpills::ID = 0;
78
79 INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
80 "SI lower SGPR spill instructions", false, false)
81 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
82 INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
83 "SI lower SGPR spill instructions", false, false)
84
85 char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
86
87 /// Insert restore code for the callee-saved registers used in the function.
insertCSRSaves(MachineBasicBlock & SaveBlock,ArrayRef<CalleeSavedInfo> CSI,LiveIntervals * LIS)88 static void insertCSRSaves(MachineBasicBlock &SaveBlock,
89 ArrayRef<CalleeSavedInfo> CSI,
90 LiveIntervals *LIS) {
91 MachineFunction &MF = *SaveBlock.getParent();
92 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
93 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
94 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
95
96 MachineBasicBlock::iterator I = SaveBlock.begin();
97 if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
98 for (const CalleeSavedInfo &CS : CSI) {
99 // Insert the spill to the stack frame.
100 unsigned Reg = CS.getReg();
101
102 MachineInstrSpan MIS(I, &SaveBlock);
103 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
104
105 TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
106 TRI);
107
108 if (LIS) {
109 assert(std::distance(MIS.begin(), I) == 1);
110 MachineInstr &Inst = *std::prev(I);
111
112 LIS->InsertMachineInstrInMaps(Inst);
113 LIS->removeAllRegUnitsForPhysReg(Reg);
114 }
115 }
116 }
117 }
118
119 /// Insert restore code for the callee-saved registers used in the function.
insertCSRRestores(MachineBasicBlock & RestoreBlock,std::vector<CalleeSavedInfo> & CSI,LiveIntervals * LIS)120 static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
121 std::vector<CalleeSavedInfo> &CSI,
122 LiveIntervals *LIS) {
123 MachineFunction &MF = *RestoreBlock.getParent();
124 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
125 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
126 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
127
128 // Restore all registers immediately before the return and any
129 // terminators that precede it.
130 MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
131
132 // FIXME: Just emit the readlane/writelane directly
133 if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
134 for (const CalleeSavedInfo &CI : reverse(CSI)) {
135 unsigned Reg = CI.getReg();
136 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
137
138 TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
139 assert(I != RestoreBlock.begin() &&
140 "loadRegFromStackSlot didn't insert any code!");
141 // Insert in reverse order. loadRegFromStackSlot can insert
142 // multiple instructions.
143
144 if (LIS) {
145 MachineInstr &Inst = *std::prev(I);
146 LIS->InsertMachineInstrInMaps(Inst);
147 LIS->removeAllRegUnitsForPhysReg(Reg);
148 }
149 }
150 }
151 }
152
153 /// Compute the sets of entry and return blocks for saving and restoring
154 /// callee-saved registers, and placing prolog and epilog code.
calculateSaveRestoreBlocks(MachineFunction & MF)155 void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
156 const MachineFrameInfo &MFI = MF.getFrameInfo();
157
158 // Even when we do not change any CSR, we still want to insert the
159 // prologue and epilogue of the function.
160 // So set the save points for those.
161
162 // Use the points found by shrink-wrapping, if any.
163 if (MFI.getSavePoint()) {
164 SaveBlocks.push_back(MFI.getSavePoint());
165 assert(MFI.getRestorePoint() && "Both restore and save must be set");
166 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
167 // If RestoreBlock does not have any successor and is not a return block
168 // then the end point is unreachable and we do not need to insert any
169 // epilogue.
170 if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
171 RestoreBlocks.push_back(RestoreBlock);
172 return;
173 }
174
175 // Save refs to entry and return blocks.
176 SaveBlocks.push_back(&MF.front());
177 for (MachineBasicBlock &MBB : MF) {
178 if (MBB.isEHFuncletEntry())
179 SaveBlocks.push_back(&MBB);
180 if (MBB.isReturnBlock())
181 RestoreBlocks.push_back(&MBB);
182 }
183 }
184
spillCalleeSavedRegs(MachineFunction & MF)185 bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
186 MachineRegisterInfo &MRI = MF.getRegInfo();
187 const Function &F = MF.getFunction();
188 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
189 const SIFrameLowering *TFI = ST.getFrameLowering();
190 MachineFrameInfo &MFI = MF.getFrameInfo();
191 RegScavenger *RS = nullptr;
192
193 // Determine which of the registers in the callee save list should be saved.
194 BitVector SavedRegs;
195 TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
196
197 // Add the code to save and restore the callee saved registers.
198 if (!F.hasFnAttribute(Attribute::Naked)) {
199 // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
200 // necessary for verifier liveness checks.
201 MFI.setCalleeSavedInfoValid(true);
202
203 std::vector<CalleeSavedInfo> CSI;
204 const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
205
206 for (unsigned I = 0; CSRegs[I]; ++I) {
207 unsigned Reg = CSRegs[I];
208 if (SavedRegs.test(Reg)) {
209 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
210 int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
211 TRI->getSpillAlignment(*RC),
212 true);
213
214 CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
215 }
216 }
217
218 if (!CSI.empty()) {
219 for (MachineBasicBlock *SaveBlock : SaveBlocks)
220 insertCSRSaves(*SaveBlock, CSI, LIS);
221
222 for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
223 insertCSRRestores(*RestoreBlock, CSI, LIS);
224 return true;
225 }
226 }
227
228 return false;
229 }
230
runOnMachineFunction(MachineFunction & MF)231 bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
232 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
233 TII = ST.getInstrInfo();
234 TRI = &TII->getRegisterInfo();
235
236 VRM = getAnalysisIfAvailable<VirtRegMap>();
237
238 assert(SaveBlocks.empty() && RestoreBlocks.empty());
239
240 // First, expose any CSR SGPR spills. This is mostly the same as what PEI
241 // does, but somewhat simpler.
242 calculateSaveRestoreBlocks(MF);
243 bool HasCSRs = spillCalleeSavedRegs(MF);
244
245 MachineFrameInfo &MFI = MF.getFrameInfo();
246 if (!MFI.hasStackObjects() && !HasCSRs) {
247 SaveBlocks.clear();
248 RestoreBlocks.clear();
249 return false;
250 }
251
252 MachineRegisterInfo &MRI = MF.getRegInfo();
253 SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
254 const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
255 && EnableSpillVGPRToAGPR;
256
257 bool MadeChange = false;
258
259 const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();
260
261 // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
262 // handled as SpilledToReg in regular PrologEpilogInserter.
263 if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
264 SpillVGPRToAGPR) {
265 // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
266 // are spilled to VGPRs, in which case we can eliminate the stack usage.
267 //
268 // This operates under the assumption that only other SGPR spills are users
269 // of the frame index.
270 for (MachineBasicBlock &MBB : MF) {
271 MachineBasicBlock::iterator Next;
272 for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
273 MachineInstr &MI = *I;
274 Next = std::next(I);
275
276 if (SpillToAGPR && TII->isVGPRSpill(MI)) {
277 // Try to eliminate stack used by VGPR spills before frame
278 // finalization.
279 unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
280 AMDGPU::OpName::vaddr);
281 int FI = MI.getOperand(FIOp).getIndex();
282 Register VReg =
283 TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
284 if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
285 TRI->isAGPR(MRI, VReg))) {
286 TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
287 continue;
288 }
289 }
290
291 if (!TII->isSGPRSpill(MI))
292 continue;
293
294 int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
295 assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
296 if (FuncInfo->allocateSGPRSpillToVGPR(MF, FI)) {
297 bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
298 (void)Spilled;
299 assert(Spilled && "failed to spill SGPR to VGPR when allocated");
300 }
301 }
302 }
303
304 for (MachineBasicBlock &MBB : MF) {
305 for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
306 MBB.addLiveIn(SSpill.VGPR);
307
308 for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
309 MBB.addLiveIn(Reg);
310
311 for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
312 MBB.addLiveIn(Reg);
313
314 MBB.sortUniqueLiveIns();
315 }
316
317 MadeChange = true;
318 }
319
320 SaveBlocks.clear();
321 RestoreBlocks.clear();
322
323 return MadeChange;
324 }
325