1 //===----------------------- SIFrameLowering.cpp --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "SIFrameLowering.h"
10 #include "AMDGPU.h"
11 #include "GCNSubtarget.h"
12 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13 #include "SIMachineFunctionInfo.h"
14 #include "llvm/CodeGen/LivePhysRegs.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/RegisterScavenging.h"
17 #include "llvm/Target/TargetMachine.h"
18 
19 using namespace llvm;
20 
21 #define DEBUG_TYPE "frame-info"
22 
23 static cl::opt<bool> EnableSpillVGPRToAGPR(
24   "amdgpu-spill-vgpr-to-agpr",
25   cl::desc("Enable spilling VGPRs to AGPRs"),
26   cl::ReallyHidden,
27   cl::init(true));
28 
29 // Find a scratch register that we can use in the prologue. We avoid using
30 // callee-save registers since they may appear to be free when this is called
31 // from canUseAsPrologue (during shrink wrapping), but then no longer be free
32 // when this is called from emitPrologue.
33 static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
34                                                    LivePhysRegs &LiveRegs,
35                                                    const TargetRegisterClass &RC,
36                                                    bool Unused = false) {
37   // Mark callee saved registers as used so we will not choose them.
38   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
39   for (unsigned i = 0; CSRegs[i]; ++i)
40     LiveRegs.addReg(CSRegs[i]);
41 
42   if (Unused) {
43     // We are looking for a register that can be used throughout the entire
44     // function, so any use is unacceptable.
45     for (MCRegister Reg : RC) {
46       if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
47         return Reg;
48     }
49   } else {
50     for (MCRegister Reg : RC) {
51       if (LiveRegs.available(MRI, Reg))
52         return Reg;
53     }
54   }
55 
56   return MCRegister();
57 }
58 
59 static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
60                                            LivePhysRegs &LiveRegs,
61                                            Register &TempSGPR,
62                                            Optional<int> &FrameIndex,
63                                            bool IsFP) {
64   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
65   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
66 
67   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
68   const SIRegisterInfo *TRI = ST.getRegisterInfo();
69 
70   // We need to save and restore the current FP/BP.
71 
72   // 1: If there is already a VGPR with free lanes, use it. We
73   // may already have to pay the penalty for spilling a CSR VGPR.
74   if (MFI->haveFreeLanesForSGPRSpill(MF, 1)) {
75     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
76                                             TargetStackID::SGPRSpill);
77 
78     if (!MFI->allocateSGPRSpillToVGPR(MF, NewFI))
79       llvm_unreachable("allocate SGPR spill should have worked");
80 
81     FrameIndex = NewFI;
82 
83     LLVM_DEBUG(auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
84                dbgs() << "Spilling " << (IsFP ? "FP" : "BP") << " to  "
85                       << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
86                       << '\n');
87     return;
88   }
89 
90   // 2: Next, try to save the FP/BP in an unused SGPR.
91   TempSGPR = findScratchNonCalleeSaveRegister(
92       MF.getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass, true);
93 
94   if (!TempSGPR) {
95     int NewFI = FrameInfo.CreateStackObject(4, Align(4), true, nullptr,
96                                             TargetStackID::SGPRSpill);
97 
98     if (TRI->spillSGPRToVGPR() && MFI->allocateSGPRSpillToVGPR(MF, NewFI)) {
99       // 3: There's no free lane to spill, and no free register to save FP/BP,
100       // so we're forced to spill another VGPR to use for the spill.
101       FrameIndex = NewFI;
102 
103       LLVM_DEBUG(
104           auto Spill = MFI->getSGPRToVGPRSpills(NewFI).front();
105           dbgs() << (IsFP ? "FP" : "BP") << " requires fallback spill to "
106                  << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
107     } else {
108       // Remove dead <NewFI> index
109       MF.getFrameInfo().RemoveStackObject(NewFI);
110       // 4: If all else fails, spill the FP/BP to memory.
111       FrameIndex = FrameInfo.CreateSpillStackObject(4, Align(4));
112       LLVM_DEBUG(dbgs() << "Reserved FI " << FrameIndex << " for spilling "
113                         << (IsFP ? "FP" : "BP") << '\n');
114     }
115   } else {
116     LLVM_DEBUG(dbgs() << "Saving " << (IsFP ? "FP" : "BP") << " with copy to "
117                       << printReg(TempSGPR, TRI) << '\n');
118   }
119 }
120 
121 // We need to specially emit stack operations here because a different frame
122 // register is used than in the rest of the function, as getFrameRegister would
123 // use.
124 static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
125                              const SIMachineFunctionInfo &FuncInfo,
126                              LivePhysRegs &LiveRegs, MachineFunction &MF,
127                              MachineBasicBlock &MBB,
128                              MachineBasicBlock::iterator I, Register SpillReg,
129                              int FI) {
130   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
131                                         : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
132 
133   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
134   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
135   MachineMemOperand *MMO = MF.getMachineMemOperand(
136       PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
137       FrameInfo.getObjectAlign(FI));
138   LiveRegs.addReg(SpillReg);
139   TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
140                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
141                           &LiveRegs);
142   LiveRegs.removeReg(SpillReg);
143 }
144 
145 static void buildEpilogRestore(const GCNSubtarget &ST,
146                                const SIRegisterInfo &TRI,
147                                const SIMachineFunctionInfo &FuncInfo,
148                                LivePhysRegs &LiveRegs, MachineFunction &MF,
149                                MachineBasicBlock &MBB,
150                                MachineBasicBlock::iterator I, Register SpillReg,
151                                int FI) {
152   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
153                                         : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
154 
155   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
156   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
157   MachineMemOperand *MMO = MF.getMachineMemOperand(
158       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
159       FrameInfo.getObjectAlign(FI));
160   TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
161                           FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
162                           &LiveRegs);
163 }
164 
165 static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
166                         const DebugLoc &DL, const SIInstrInfo *TII,
167                         Register TargetReg) {
168   MachineFunction *MF = MBB.getParent();
169   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
170   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
171   const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
172   Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
173   Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
174 
175   if (MFI->getGITPtrHigh() != 0xffffffff) {
176     BuildMI(MBB, I, DL, SMovB32, TargetHi)
177         .addImm(MFI->getGITPtrHigh())
178         .addReg(TargetReg, RegState::ImplicitDefine);
179   } else {
180     const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64);
181     BuildMI(MBB, I, DL, GetPC64, TargetReg);
182   }
183   Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
184   MF->getRegInfo().addLiveIn(GitPtrLo);
185   MBB.addLiveIn(GitPtrLo);
186   BuildMI(MBB, I, DL, SMovB32, TargetLo)
187     .addReg(GitPtrLo);
188 }
189 
190 // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
191 void SIFrameLowering::emitEntryFunctionFlatScratchInit(
192     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
193     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
194   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
195   const SIInstrInfo *TII = ST.getInstrInfo();
196   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
197   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
198 
199   // We don't need this if we only have spills since there is no user facing
200   // scratch.
201 
202   // TODO: If we know we don't have flat instructions earlier, we can omit
203   // this from the input registers.
204   //
205   // TODO: We only need to know if we access scratch space through a flat
206   // pointer. Because we only detect if flat instructions are used at all,
207   // this will be used more often than necessary on VI.
208 
209   Register FlatScrInitLo;
210   Register FlatScrInitHi;
211 
212   if (ST.isAmdPalOS()) {
213     // Extract the scratch offset from the descriptor in the GIT
214     LivePhysRegs LiveRegs;
215     LiveRegs.init(*TRI);
216     LiveRegs.addLiveIns(MBB);
217 
218     // Find unused reg to load flat scratch init into
219     MachineRegisterInfo &MRI = MF.getRegInfo();
220     Register FlatScrInit = AMDGPU::NoRegister;
221     ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
222     unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
223     AllSGPR64s = AllSGPR64s.slice(
224         std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
225     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
226     for (MCPhysReg Reg : AllSGPR64s) {
227       if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
228           !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
229         FlatScrInit = Reg;
230         break;
231       }
232     }
233     assert(FlatScrInit && "Failed to find free register for scratch init");
234 
235     FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
236     FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
237 
238     buildGitPtr(MBB, I, DL, TII, FlatScrInit);
239 
240     // We now have the GIT ptr - now get the scratch descriptor from the entry
241     // at offset 0 (or offset 16 for a compute shader).
242     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
243     const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
244     auto *MMO = MF.getMachineMemOperand(
245         PtrInfo,
246         MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
247             MachineMemOperand::MODereferenceable,
248         8, Align(4));
249     unsigned Offset =
250         MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
251     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
252     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
253     BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
254         .addReg(FlatScrInit)
255         .addImm(EncodedOffset) // offset
256         .addImm(0)             // cpol
257         .addMemOperand(MMO);
258 
259     // Mask the offset in [47:0] of the descriptor
260     const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
261     BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
262         .addReg(FlatScrInitHi)
263         .addImm(0xffff);
264   } else {
265     Register FlatScratchInitReg =
266         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
267     assert(FlatScratchInitReg);
268 
269     MachineRegisterInfo &MRI = MF.getRegInfo();
270     MRI.addLiveIn(FlatScratchInitReg);
271     MBB.addLiveIn(FlatScratchInitReg);
272 
273     FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
274     FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
275   }
276 
277   // Do a 64-bit pointer add.
278   if (ST.flatScratchIsPointer()) {
279     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
280       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
281         .addReg(FlatScrInitLo)
282         .addReg(ScratchWaveOffsetReg);
283       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
284         .addReg(FlatScrInitHi)
285         .addImm(0);
286       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
287         addReg(FlatScrInitLo).
288         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
289                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
290       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
291         addReg(FlatScrInitHi).
292         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
293                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
294       return;
295     }
296 
297     // For GFX9.
298     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
299       .addReg(FlatScrInitLo)
300       .addReg(ScratchWaveOffsetReg);
301     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
302       .addReg(FlatScrInitHi)
303       .addImm(0);
304 
305     return;
306   }
307 
308   assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
309 
310   // Copy the size in bytes.
311   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
312     .addReg(FlatScrInitHi, RegState::Kill);
313 
314   // Add wave offset in bytes to private base offset.
315   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
316   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
317       .addReg(FlatScrInitLo)
318       .addReg(ScratchWaveOffsetReg);
319 
320   // Convert offset to 256-byte units.
321   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
322     .addReg(FlatScrInitLo, RegState::Kill)
323     .addImm(8);
324 }
325 
326 // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
327 // memory. They should have been removed by now.
328 static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
329   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
330        I != E; ++I) {
331     if (!MFI.isDeadObjectIndex(I))
332       return false;
333   }
334 
335   return true;
336 }
337 
338 // Shift down registers reserved for the scratch RSRC.
339 Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
340     MachineFunction &MF) const {
341 
342   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
343   const SIInstrInfo *TII = ST.getInstrInfo();
344   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
345   MachineRegisterInfo &MRI = MF.getRegInfo();
346   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
347 
348   assert(MFI->isEntryFunction());
349 
350   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
351 
352   if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
353                           allStackObjectsAreDead(MF.getFrameInfo())))
354     return Register();
355 
356   if (ST.hasSGPRInitBug() ||
357       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
358     return ScratchRsrcReg;
359 
360   // We reserved the last registers for this. Shift it down to the end of those
361   // which were actually used.
362   //
363   // FIXME: It might be safer to use a pseudoregister before replacement.
364 
365   // FIXME: We should be able to eliminate unused input registers. We only
366   // cannot do this for the resources required for scratch access. For now we
367   // skip over user SGPRs and may leave unused holes.
368 
369   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
370   ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
371   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
372 
373   // Skip the last N reserved elements because they should have already been
374   // reserved for VCC etc.
375   Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
376   for (MCPhysReg Reg : AllSGPR128s) {
377     // Pick the first unallocated one. Make sure we don't clobber the other
378     // reserved input we needed. Also for PAL, make sure we don't clobber
379     // the GIT pointer passed in SGPR0 or SGPR8.
380     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
381         !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
382       MRI.replaceRegWith(ScratchRsrcReg, Reg);
383       MFI->setScratchRSrcReg(Reg);
384       return Reg;
385     }
386   }
387 
388   return ScratchRsrcReg;
389 }
390 
391 static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
392   return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
393 }
394 
395 void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
396                                                 MachineBasicBlock &MBB) const {
397   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
398 
399   // FIXME: If we only have SGPR spills, we won't actually be using scratch
400   // memory since these spill to VGPRs. We should be cleaning up these unused
401   // SGPR spill frame indices somewhere.
402 
403   // FIXME: We still have implicit uses on SGPR spill instructions in case they
404   // need to spill to vector memory. It's likely that will not happen, but at
405   // this point it appears we need the setup. This part of the prolog should be
406   // emitted after frame indices are eliminated.
407 
408   // FIXME: Remove all of the isPhysRegUsed checks
409 
410   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
411   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
412   const SIInstrInfo *TII = ST.getInstrInfo();
413   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
414   MachineRegisterInfo &MRI = MF.getRegInfo();
415   const Function &F = MF.getFunction();
416   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
417 
418   assert(MFI->isEntryFunction());
419 
420   Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
421       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
422   // FIXME: Hack to not crash in situations which emitted an error.
423   if (!PreloadedScratchWaveOffsetReg)
424     return;
425 
426   // We need to do the replacement of the private segment buffer register even
427   // if there are no stack objects. There could be stores to undef or a
428   // constant without an associated object.
429   //
430   // This will return `Register()` in cases where there are no actual
431   // uses of the SRSRC.
432   Register ScratchRsrcReg;
433   if (!ST.enableFlatScratch())
434     ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
435 
436   // Make the selected register live throughout the function.
437   if (ScratchRsrcReg) {
438     for (MachineBasicBlock &OtherBB : MF) {
439       if (&OtherBB != &MBB) {
440         OtherBB.addLiveIn(ScratchRsrcReg);
441       }
442     }
443   }
444 
445   // Now that we have fixed the reserved SRSRC we need to locate the
446   // (potentially) preloaded SRSRC.
447   Register PreloadedScratchRsrcReg;
448   if (ST.isAmdHsaOrMesa(F)) {
449     PreloadedScratchRsrcReg =
450         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
451     if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
452       // We added live-ins during argument lowering, but since they were not
453       // used they were deleted. We're adding the uses now, so add them back.
454       MRI.addLiveIn(PreloadedScratchRsrcReg);
455       MBB.addLiveIn(PreloadedScratchRsrcReg);
456     }
457   }
458 
459   // Debug location must be unknown since the first debug location is used to
460   // determine the end of the prologue.
461   DebugLoc DL;
462   MachineBasicBlock::iterator I = MBB.begin();
463 
464   // We found the SRSRC first because it needs four registers and has an
465   // alignment requirement. If the SRSRC that we found is clobbering with
466   // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
467   // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
468   // wave offset to a free SGPR.
469   Register ScratchWaveOffsetReg;
470   if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
471     ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
472     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
473     AllSGPRs = AllSGPRs.slice(
474         std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
475     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
476     for (MCPhysReg Reg : AllSGPRs) {
477       if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
478           !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
479         ScratchWaveOffsetReg = Reg;
480         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
481             .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
482         break;
483       }
484     }
485   } else {
486     ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
487   }
488   assert(ScratchWaveOffsetReg);
489 
490   if (requiresStackPointerReference(MF)) {
491     Register SPReg = MFI->getStackPtrOffsetReg();
492     assert(SPReg != AMDGPU::SP_REG);
493     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
494         .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
495   }
496 
497   if (hasFP(MF)) {
498     Register FPReg = MFI->getFrameOffsetReg();
499     assert(FPReg != AMDGPU::FP_REG);
500     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
501   }
502 
503   bool NeedsFlatScratchInit =
504       MFI->hasFlatScratchInit() &&
505       (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
506        (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
507 
508   if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
509       !ST.flatScratchIsArchitected()) {
510     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
511     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
512   }
513 
514   if (NeedsFlatScratchInit) {
515     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
516   }
517 
518   if (ScratchRsrcReg) {
519     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
520                                          PreloadedScratchRsrcReg,
521                                          ScratchRsrcReg, ScratchWaveOffsetReg);
522   }
523 }
524 
525 // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
526 void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
527     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
528     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
529     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
530 
531   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
532   const SIInstrInfo *TII = ST.getInstrInfo();
533   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
534   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
535   const Function &Fn = MF.getFunction();
536 
537   if (ST.isAmdPalOS()) {
538     // The pointer to the GIT is formed from the offset passed in and either
539     // the amdgpu-git-ptr-high function attribute or the top part of the PC
540     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
541     Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
542 
543     buildGitPtr(MBB, I, DL, TII, Rsrc01);
544 
545     // We now have the GIT ptr - now get the scratch descriptor from the entry
546     // at offset 0 (or offset 16 for a compute shader).
547     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
548     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
549     auto MMO = MF.getMachineMemOperand(PtrInfo,
550                                        MachineMemOperand::MOLoad |
551                                            MachineMemOperand::MOInvariant |
552                                            MachineMemOperand::MODereferenceable,
553                                        16, Align(4));
554     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
555     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
556     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
557     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
558       .addReg(Rsrc01)
559       .addImm(EncodedOffset) // offset
560       .addImm(0) // cpol
561       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
562       .addMemOperand(MMO);
563 
564     // The driver will always set the SRD for wave 64 (bits 118:117 of
565     // descriptor / bits 22:21 of third sub-reg will be 0b11)
566     // If the shader is actually wave32 we have to modify the const_index_stride
567     // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
568     // reason the driver does this is that there can be cases where it presents
569     // 2 shaders with different wave size (e.g. VsFs).
570     // TODO: convert to using SCRATCH instructions or multiple SRD buffers
571     if (ST.isWave32()) {
572       const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
573       BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
574           .addImm(21)
575           .addReg(Rsrc03);
576     }
577   } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
578     assert(!ST.isAmdHsaOrMesa(Fn));
579     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
580 
581     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
582     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
583 
584     // Use relocations to get the pointer, and setup the other bits manually.
585     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
586 
587     if (MFI->hasImplicitBufferPtr()) {
588       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
589 
590       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
591         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
592 
593         BuildMI(MBB, I, DL, Mov64, Rsrc01)
594           .addReg(MFI->getImplicitBufferPtrUserSGPR())
595           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
596       } else {
597         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
598 
599         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
600         auto MMO = MF.getMachineMemOperand(
601             PtrInfo,
602             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
603                 MachineMemOperand::MODereferenceable,
604             8, Align(4));
605         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
606           .addReg(MFI->getImplicitBufferPtrUserSGPR())
607           .addImm(0) // offset
608           .addImm(0) // cpol
609           .addMemOperand(MMO)
610           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
611 
612         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
613         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
614       }
615     } else {
616       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
617       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
618 
619       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
620         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
621         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
622 
623       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
624         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
625         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
626 
627     }
628 
629     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
630       .addImm(Rsrc23 & 0xffffffff)
631       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
632 
633     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
634       .addImm(Rsrc23 >> 32)
635       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
636   } else if (ST.isAmdHsaOrMesa(Fn)) {
637     assert(PreloadedScratchRsrcReg);
638 
639     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
640       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
641           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
642     }
643   }
644 
645   // Add the scratch wave offset into the scratch RSRC.
646   //
647   // We only want to update the first 48 bits, which is the base address
648   // pointer, without touching the adjacent 16 bits of flags. We know this add
649   // cannot carry-out from bit 47, otherwise the scratch allocation would be
650   // impossible to fit in the 48-bit global address space.
651   //
652   // TODO: Evaluate if it is better to just construct an SRD using the flat
653   // scratch init and some constants rather than update the one we are passed.
654   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
655   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
656 
657   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
658   // the kernel body via inreg arguments.
659   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
660       .addReg(ScratchRsrcSub0)
661       .addReg(ScratchWaveOffsetReg)
662       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
663   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
664       .addReg(ScratchRsrcSub1)
665       .addImm(0)
666       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
667 }
668 
669 bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
670   switch (ID) {
671   case TargetStackID::Default:
672   case TargetStackID::NoAlloc:
673   case TargetStackID::SGPRSpill:
674     return true;
675   case TargetStackID::ScalableVector:
676   case TargetStackID::WasmLocal:
677     return false;
678   }
679   llvm_unreachable("Invalid TargetStackID::Value");
680 }
681 
682 static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
683                          const SIMachineFunctionInfo *FuncInfo,
684                          MachineFunction &MF, MachineBasicBlock &MBB,
685                          MachineBasicBlock::iterator MBBI, bool IsProlog) {
686   if (LiveRegs.empty()) {
687     LiveRegs.init(TRI);
688     if (IsProlog) {
689       LiveRegs.addLiveIns(MBB);
690     } else {
691       // In epilog.
692       LiveRegs.addLiveOuts(MBB);
693       LiveRegs.stepBackward(*MBBI);
694     }
695   }
696 }
697 
698 // Activate all lanes, returns saved exec.
699 static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
700                                      MachineFunction &MF,
701                                      MachineBasicBlock &MBB,
702                                      MachineBasicBlock::iterator MBBI,
703                                      bool IsProlog) {
704   Register ScratchExecCopy;
705   MachineRegisterInfo &MRI = MF.getRegInfo();
706   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
707   const SIInstrInfo *TII = ST.getInstrInfo();
708   const SIRegisterInfo &TRI = TII->getRegisterInfo();
709   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
710   DebugLoc DL;
711 
712   initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
713 
714   ScratchExecCopy = findScratchNonCalleeSaveRegister(
715       MRI, LiveRegs, *TRI.getWaveMaskRegClass());
716   if (!ScratchExecCopy)
717     report_fatal_error("failed to find free scratch register");
718 
719   LiveRegs.addReg(ScratchExecCopy);
720 
721   const unsigned OrSaveExec =
722       ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
723   BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
724 
725   return ScratchExecCopy;
726 }
727 
728 // A StackID of SGPRSpill implies that this is a spill from SGPR to VGPR.
729 // Otherwise we are spilling to memory.
730 static bool spilledToMemory(const MachineFunction &MF, int SaveIndex) {
731   const MachineFrameInfo &MFI = MF.getFrameInfo();
732   return MFI.getStackID(SaveIndex) != TargetStackID::SGPRSpill;
733 }
734 
735 void SIFrameLowering::emitPrologue(MachineFunction &MF,
736                                    MachineBasicBlock &MBB) const {
737   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
738   if (FuncInfo->isEntryFunction()) {
739     emitEntryFunctionPrologue(MF, MBB);
740     return;
741   }
742 
743   const MachineFrameInfo &MFI = MF.getFrameInfo();
744   MachineRegisterInfo &MRI = MF.getRegInfo();
745   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
746   const SIInstrInfo *TII = ST.getInstrInfo();
747   const SIRegisterInfo &TRI = TII->getRegisterInfo();
748 
749   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
750   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
751   Register BasePtrReg =
752       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
753   LivePhysRegs LiveRegs;
754 
755   MachineBasicBlock::iterator MBBI = MBB.begin();
756   DebugLoc DL;
757 
758   bool HasFP = false;
759   bool HasBP = false;
760   uint32_t NumBytes = MFI.getStackSize();
761   uint32_t RoundedSize = NumBytes;
762   // To avoid clobbering VGPRs in lanes that weren't active on function entry,
763   // turn on all lanes before doing the spill to memory.
764   Register ScratchExecCopy;
765 
766   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
767   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
768 
769   // VGPRs used for SGPR->VGPR spills
770   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
771        FuncInfo->getSGPRSpillVGPRs()) {
772     if (!Reg.FI)
773       continue;
774 
775     if (!ScratchExecCopy)
776       ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
777                                              /*IsProlog*/ true);
778 
779     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
780                      *Reg.FI);
781   }
782 
783   // VGPRs used for Whole Wave Mode
784   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
785     auto VGPR = Reg.first;
786     auto FI = Reg.second;
787     if (!FI)
788       continue;
789 
790     if (!ScratchExecCopy)
791       ScratchExecCopy =
792           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
793 
794     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
795   }
796 
797   if (ScratchExecCopy) {
798     // FIXME: Split block and make terminator.
799     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
800     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
801     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
802         .addReg(ScratchExecCopy, RegState::Kill);
803     LiveRegs.addReg(ScratchExecCopy);
804   }
805 
806   if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
807     const int FramePtrFI = *FPSaveIndex;
808     assert(!MFI.isDeadObjectIndex(FramePtrFI));
809 
810     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
811 
812     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
813         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
814     if (!TmpVGPR)
815       report_fatal_error("failed to find free scratch register");
816 
817     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
818         .addReg(FramePtrReg);
819 
820     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
821                      FramePtrFI);
822   }
823 
824   if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
825     const int BasePtrFI = *BPSaveIndex;
826     assert(!MFI.isDeadObjectIndex(BasePtrFI));
827 
828     initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
829 
830     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
831         MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
832     if (!TmpVGPR)
833       report_fatal_error("failed to find free scratch register");
834 
835     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
836         .addReg(BasePtrReg);
837 
838     buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
839                      BasePtrFI);
840   }
841 
842   // In this case, spill the FP to a reserved VGPR.
843   if (FPSaveIndex && !spilledToMemory(MF, *FPSaveIndex)) {
844     const int FramePtrFI = *FPSaveIndex;
845     assert(!MFI.isDeadObjectIndex(FramePtrFI));
846 
847     assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
848     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
849         FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
850     assert(Spill.size() == 1);
851 
852     // Save FP before setting it up.
853     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
854         .addReg(FramePtrReg)
855         .addImm(Spill[0].Lane)
856         .addReg(Spill[0].VGPR, RegState::Undef);
857   }
858 
859   // In this case, spill the BP to a reserved VGPR.
860   if (BPSaveIndex && !spilledToMemory(MF, *BPSaveIndex)) {
861     const int BasePtrFI = *BPSaveIndex;
862     assert(!MFI.isDeadObjectIndex(BasePtrFI));
863 
864     assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
865     ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
866         FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
867     assert(Spill.size() == 1);
868 
869     // Save BP before setting it up.
870     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[0].VGPR)
871         .addReg(BasePtrReg)
872         .addImm(Spill[0].Lane)
873         .addReg(Spill[0].VGPR, RegState::Undef);
874   }
875 
876   // Emit the copy if we need an FP, and are using a free SGPR to save it.
877   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
878     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
879             FuncInfo->SGPRForFPSaveRestoreCopy)
880         .addReg(FramePtrReg)
881         .setMIFlag(MachineInstr::FrameSetup);
882   }
883 
884   // Emit the copy if we need a BP, and are using a free SGPR to save it.
885   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
886     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY),
887             FuncInfo->SGPRForBPSaveRestoreCopy)
888         .addReg(BasePtrReg)
889         .setMIFlag(MachineInstr::FrameSetup);
890   }
891 
892   // If a copy has been emitted for FP and/or BP, Make the SGPRs
893   // used in the copy instructions live throughout the function.
894   SmallVector<MCPhysReg, 2> TempSGPRs;
895   if (FuncInfo->SGPRForFPSaveRestoreCopy)
896     TempSGPRs.push_back(FuncInfo->SGPRForFPSaveRestoreCopy);
897 
898   if (FuncInfo->SGPRForBPSaveRestoreCopy)
899     TempSGPRs.push_back(FuncInfo->SGPRForBPSaveRestoreCopy);
900 
901   if (!TempSGPRs.empty()) {
902     for (MachineBasicBlock &MBB : MF) {
903       for (MCPhysReg Reg : TempSGPRs)
904         MBB.addLiveIn(Reg);
905 
906       MBB.sortUniqueLiveIns();
907     }
908     if (!LiveRegs.empty()) {
909       LiveRegs.addReg(FuncInfo->SGPRForFPSaveRestoreCopy);
910       LiveRegs.addReg(FuncInfo->SGPRForBPSaveRestoreCopy);
911     }
912   }
913 
914   if (TRI.hasStackRealignment(MF)) {
915     HasFP = true;
916     const unsigned Alignment = MFI.getMaxAlign().value();
917 
918     RoundedSize += Alignment;
919     if (LiveRegs.empty()) {
920       LiveRegs.init(TRI);
921       LiveRegs.addLiveIns(MBB);
922     }
923 
924     // s_add_i32 s33, s32, NumBytes
925     // s_and_b32 s33, s33, 0b111...0000
926     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
927         .addReg(StackPtrReg)
928         .addImm((Alignment - 1) * getScratchScaleFactor(ST))
929         .setMIFlag(MachineInstr::FrameSetup);
930     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
931         .addReg(FramePtrReg, RegState::Kill)
932         .addImm(-Alignment * getScratchScaleFactor(ST))
933         .setMIFlag(MachineInstr::FrameSetup);
934     FuncInfo->setIsStackRealigned(true);
935   } else if ((HasFP = hasFP(MF))) {
936     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
937         .addReg(StackPtrReg)
938         .setMIFlag(MachineInstr::FrameSetup);
939   }
940 
941   // If we need a base pointer, set it up here. It's whatever the value of
942   // the stack pointer is at this point. Any variable size objects will be
943   // allocated after this, so we can still use the base pointer to reference
944   // the incoming arguments.
945   if ((HasBP = TRI.hasBasePointer(MF))) {
946     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
947         .addReg(StackPtrReg)
948         .setMIFlag(MachineInstr::FrameSetup);
949   }
950 
951   if (HasFP && RoundedSize != 0) {
952     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
953         .addReg(StackPtrReg)
954         .addImm(RoundedSize * getScratchScaleFactor(ST))
955         .setMIFlag(MachineInstr::FrameSetup);
956   }
957 
958   assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
959                      FuncInfo->FramePointerSaveIndex)) &&
960          "Needed to save FP but didn't save it anywhere");
961 
962   assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
963                     !FuncInfo->FramePointerSaveIndex)) &&
964          "Saved FP but didn't need it");
965 
966   assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
967                      FuncInfo->BasePointerSaveIndex)) &&
968          "Needed to save BP but didn't save it anywhere");
969 
970   assert((HasBP || (!FuncInfo->SGPRForBPSaveRestoreCopy &&
971                     !FuncInfo->BasePointerSaveIndex)) &&
972          "Saved BP but didn't need it");
973 }
974 
975 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
976                                    MachineBasicBlock &MBB) const {
977   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
978   if (FuncInfo->isEntryFunction())
979     return;
980 
981   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
982   const SIInstrInfo *TII = ST.getInstrInfo();
983   MachineRegisterInfo &MRI = MF.getRegInfo();
984   const SIRegisterInfo &TRI = TII->getRegisterInfo();
985   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
986   LivePhysRegs LiveRegs;
987   DebugLoc DL;
988 
989   const MachineFrameInfo &MFI = MF.getFrameInfo();
990   uint32_t NumBytes = MFI.getStackSize();
991   uint32_t RoundedSize = FuncInfo->isStackRealigned()
992                              ? NumBytes + MFI.getMaxAlign().value()
993                              : NumBytes;
994   const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
995   const Register FramePtrReg = FuncInfo->getFrameOffsetReg();
996   const Register BasePtrReg =
997       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
998 
999   Optional<int> FPSaveIndex = FuncInfo->FramePointerSaveIndex;
1000   Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
1001 
1002   if (RoundedSize != 0 && hasFP(MF)) {
1003     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
1004         .addReg(StackPtrReg)
1005         .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
1006         .setMIFlag(MachineInstr::FrameDestroy);
1007   }
1008 
1009   if (FuncInfo->SGPRForFPSaveRestoreCopy) {
1010     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1011         .addReg(FuncInfo->SGPRForFPSaveRestoreCopy)
1012         .setMIFlag(MachineInstr::FrameDestroy);
1013   }
1014 
1015   if (FuncInfo->SGPRForBPSaveRestoreCopy) {
1016     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
1017         .addReg(FuncInfo->SGPRForBPSaveRestoreCopy)
1018         .setMIFlag(MachineInstr::FrameDestroy);
1019   }
1020 
1021   if (FPSaveIndex) {
1022     const int FramePtrFI = *FPSaveIndex;
1023     assert(!MFI.isDeadObjectIndex(FramePtrFI));
1024     if (spilledToMemory(MF, FramePtrFI)) {
1025       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1026 
1027       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1028           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1029       if (!TmpVGPR)
1030         report_fatal_error("failed to find free scratch register");
1031       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1032                          FramePtrFI);
1033       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
1034           .addReg(TmpVGPR, RegState::Kill);
1035     } else {
1036       // Reload from VGPR spill.
1037       assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
1038       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1039           FuncInfo->getSGPRToVGPRSpills(FramePtrFI);
1040       assert(Spill.size() == 1);
1041       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), FramePtrReg)
1042           .addReg(Spill[0].VGPR)
1043           .addImm(Spill[0].Lane);
1044     }
1045   }
1046 
1047   if (BPSaveIndex) {
1048     const int BasePtrFI = *BPSaveIndex;
1049     assert(!MFI.isDeadObjectIndex(BasePtrFI));
1050     if (spilledToMemory(MF, BasePtrFI)) {
1051       initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1052 
1053       MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
1054           MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
1055       if (!TmpVGPR)
1056         report_fatal_error("failed to find free scratch register");
1057       buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
1058                          BasePtrFI);
1059       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
1060           .addReg(TmpVGPR, RegState::Kill);
1061     } else {
1062       // Reload from VGPR spill.
1063       assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
1064       ArrayRef<SIMachineFunctionInfo::SpilledReg> Spill =
1065           FuncInfo->getSGPRToVGPRSpills(BasePtrFI);
1066       assert(Spill.size() == 1);
1067       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READLANE_B32), BasePtrReg)
1068           .addReg(Spill[0].VGPR)
1069           .addImm(Spill[0].Lane);
1070     }
1071   }
1072 
1073   Register ScratchExecCopy;
1074   for (const SIMachineFunctionInfo::SGPRSpillVGPR &Reg :
1075        FuncInfo->getSGPRSpillVGPRs()) {
1076     if (!Reg.FI)
1077       continue;
1078 
1079     if (!ScratchExecCopy)
1080       ScratchExecCopy =
1081           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1082 
1083     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
1084                        *Reg.FI);
1085   }
1086 
1087   for (const auto &Reg : FuncInfo->WWMReservedRegs) {
1088     auto VGPR = Reg.first;
1089     auto FI = Reg.second;
1090     if (!FI)
1091       continue;
1092 
1093     if (!ScratchExecCopy)
1094       ScratchExecCopy =
1095           buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
1096 
1097     buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
1098   }
1099 
1100   if (ScratchExecCopy) {
1101     // FIXME: Split block and make terminator.
1102     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1103     MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
1104     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
1105         .addReg(ScratchExecCopy, RegState::Kill);
1106   }
1107 }
1108 
1109 #ifndef NDEBUG
1110 static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1111   const MachineFrameInfo &MFI = MF.getFrameInfo();
1112   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1113   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
1114        I != E; ++I) {
1115     if (!MFI.isDeadObjectIndex(I) &&
1116         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1117         (I != FuncInfo->FramePointerSaveIndex &&
1118          I != FuncInfo->BasePointerSaveIndex)) {
1119       return false;
1120     }
1121   }
1122 
1123   return true;
1124 }
1125 #endif
1126 
1127 StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1128                                                     int FI,
1129                                                     Register &FrameReg) const {
1130   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
1131 
1132   FrameReg = RI->getFrameRegister(MF);
1133   return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
1134 }
1135 
1136 void SIFrameLowering::processFunctionBeforeFrameFinalized(
1137   MachineFunction &MF,
1138   RegScavenger *RS) const {
1139   MachineFrameInfo &MFI = MF.getFrameInfo();
1140 
1141   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1142   const SIInstrInfo *TII = ST.getInstrInfo();
1143   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1144   MachineRegisterInfo &MRI = MF.getRegInfo();
1145   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1146 
1147   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1148                                && EnableSpillVGPRToAGPR;
1149 
1150   if (SpillVGPRToAGPR) {
1151     // To track the spill frame indices handled in this pass.
1152     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
1153 
1154     bool SeenDbgInstr = false;
1155 
1156     for (MachineBasicBlock &MBB : MF) {
1157       MachineBasicBlock::iterator Next;
1158       for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
1159         MachineInstr &MI = *I;
1160         Next = std::next(I);
1161 
1162         if (MI.isDebugInstr())
1163           SeenDbgInstr = true;
1164 
1165         if (TII->isVGPRSpill(MI)) {
1166           // Try to eliminate stack used by VGPR spills before frame
1167           // finalization.
1168           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1169                                                      AMDGPU::OpName::vaddr);
1170           int FI = MI.getOperand(FIOp).getIndex();
1171           Register VReg =
1172             TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1173           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1174                                                 TRI->isAGPR(MRI, VReg))) {
1175             // FIXME: change to enterBasicBlockEnd()
1176             RS->enterBasicBlock(MBB);
1177             TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1178             SpillFIs.set(FI);
1179             continue;
1180           }
1181         }
1182       }
1183     }
1184 
1185     for (MachineBasicBlock &MBB : MF) {
1186       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1187         MBB.addLiveIn(Reg);
1188 
1189       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1190         MBB.addLiveIn(Reg);
1191 
1192       MBB.sortUniqueLiveIns();
1193 
1194       if (!SpillFIs.empty() && SeenDbgInstr) {
1195         // FIXME: The dead frame indices are replaced with a null register from
1196         // the debug value instructions. We should instead, update it with the
1197         // correct register value. But not sure the register value alone is
1198         for (MachineInstr &MI : MBB) {
1199           if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1200               SpillFIs[MI.getOperand(0).getIndex()]) {
1201             MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1202             MI.getOperand(0).setIsDebug();
1203           }
1204         }
1205       }
1206     }
1207   }
1208 
1209   FuncInfo->removeDeadFrameIndices(MFI);
1210   assert(allSGPRSpillsAreDead(MF) &&
1211          "SGPR spill should have been removed in SILowerSGPRSpills");
1212 
1213   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
1214   // but currently hasNonSpillStackObjects is set only from source
1215   // allocas. Stack temps produced from legalization are not counted currently.
1216   if (!allStackObjectsAreDead(MFI)) {
1217     assert(RS && "RegScavenger required if spilling");
1218 
1219     // Add an emergency spill slot
1220     RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
1221   }
1222 }
1223 
1224 // Only report VGPRs to generic code.
1225 void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1226                                            BitVector &SavedVGPRs,
1227                                            RegScavenger *RS) const {
1228   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1229   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1230   if (MFI->isEntryFunction())
1231     return;
1232 
1233   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1234   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1235   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1236 
1237   // Ignore the SGPRs the default implementation found.
1238   SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1239 
1240   // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1241   // In gfx908 there was do AGPR loads and stores and thus spilling also
1242   // require a temporary VGPR.
1243   if (!ST.hasGFX90AInsts())
1244     SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1245 
1246   // hasFP only knows about stack objects that already exist. We're now
1247   // determining the stack slots that will be created, so we have to predict
1248   // them. Stack objects force FP usage with calls.
1249   //
1250   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
1251   // don't want to report it here.
1252   //
1253   // FIXME: Is this really hasReservedCallFrame?
1254   const bool WillHaveFP =
1255       FrameInfo.hasCalls() &&
1256       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1257 
1258   // VGPRs used for SGPR spilling need to be specially inserted in the prolog,
1259   // so don't allow the default insertion to handle them.
1260   for (auto SSpill : MFI->getSGPRSpillVGPRs())
1261     SavedVGPRs.reset(SSpill.VGPR);
1262 
1263   LivePhysRegs LiveRegs;
1264   LiveRegs.init(*TRI);
1265 
1266   if (WillHaveFP || hasFP(MF)) {
1267     assert(!MFI->SGPRForFPSaveRestoreCopy && !MFI->FramePointerSaveIndex &&
1268            "Re-reserving spill slot for FP");
1269     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForFPSaveRestoreCopy,
1270                                    MFI->FramePointerSaveIndex, true);
1271   }
1272 
1273   if (TRI->hasBasePointer(MF)) {
1274     if (MFI->SGPRForFPSaveRestoreCopy)
1275       LiveRegs.addReg(MFI->SGPRForFPSaveRestoreCopy);
1276 
1277     assert(!MFI->SGPRForBPSaveRestoreCopy &&
1278            !MFI->BasePointerSaveIndex && "Re-reserving spill slot for BP");
1279     getVGPRSpillLaneOrTempRegister(MF, LiveRegs, MFI->SGPRForBPSaveRestoreCopy,
1280                                    MFI->BasePointerSaveIndex, false);
1281   }
1282 }
1283 
1284 void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
1285                                                BitVector &SavedRegs,
1286                                                RegScavenger *RS) const {
1287   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1288   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1289   if (MFI->isEntryFunction())
1290     return;
1291 
1292   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1293   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1294 
1295   // The SP is specifically managed and we don't want extra spills of it.
1296   SavedRegs.reset(MFI->getStackPtrOffsetReg());
1297 
1298   const BitVector AllSavedRegs = SavedRegs;
1299   SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1300 
1301   // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
1302   const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
1303 
1304   // We have to anticipate introducing CSR VGPR spills if we don't have any
1305   // stack objects already, since we require an FP if there is a call and stack.
1306   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1307   const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
1308 
1309   // FP will be specially managed like SP.
1310   if (WillHaveFP || hasFP(MF))
1311     SavedRegs.reset(MFI->getFrameOffsetReg());
1312 }
1313 
1314 bool SIFrameLowering::assignCalleeSavedSpillSlots(
1315     MachineFunction &MF, const TargetRegisterInfo *TRI,
1316     std::vector<CalleeSavedInfo> &CSI) const {
1317   if (CSI.empty())
1318     return true; // Early exit if no callee saved registers are modified!
1319 
1320   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1321   if (!FuncInfo->SGPRForFPSaveRestoreCopy &&
1322       !FuncInfo->SGPRForBPSaveRestoreCopy)
1323     return false;
1324 
1325   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1326   const SIRegisterInfo *RI = ST.getRegisterInfo();
1327   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1328   Register BasePtrReg = RI->getBaseRegister();
1329   unsigned NumModifiedRegs = 0;
1330 
1331   if (FuncInfo->SGPRForFPSaveRestoreCopy)
1332     NumModifiedRegs++;
1333   if (FuncInfo->SGPRForBPSaveRestoreCopy)
1334     NumModifiedRegs++;
1335 
1336   for (auto &CS : CSI) {
1337     if (CS.getReg() == FramePtrReg && FuncInfo->SGPRForFPSaveRestoreCopy) {
1338       CS.setDstReg(FuncInfo->SGPRForFPSaveRestoreCopy);
1339       if (--NumModifiedRegs)
1340         break;
1341     } else if (CS.getReg() == BasePtrReg &&
1342                FuncInfo->SGPRForBPSaveRestoreCopy) {
1343       CS.setDstReg(FuncInfo->SGPRForBPSaveRestoreCopy);
1344       if (--NumModifiedRegs)
1345         break;
1346     }
1347   }
1348 
1349   return false;
1350 }
1351 
1352 MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
1353   MachineFunction &MF,
1354   MachineBasicBlock &MBB,
1355   MachineBasicBlock::iterator I) const {
1356   int64_t Amount = I->getOperand(0).getImm();
1357   if (Amount == 0)
1358     return MBB.erase(I);
1359 
1360   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1361   const SIInstrInfo *TII = ST.getInstrInfo();
1362   const DebugLoc &DL = I->getDebugLoc();
1363   unsigned Opc = I->getOpcode();
1364   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
1365   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
1366 
1367   if (!hasReservedCallFrame(MF)) {
1368     Amount = alignTo(Amount, getStackAlign());
1369     assert(isUInt<32>(Amount) && "exceeded stack address space size");
1370     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1371     Register SPReg = MFI->getStackPtrOffsetReg();
1372 
1373     Amount *= getScratchScaleFactor(ST);
1374     if (IsDestroy)
1375       Amount = -Amount;
1376     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
1377         .addReg(SPReg)
1378         .addImm(Amount);
1379   } else if (CalleePopAmount != 0) {
1380     llvm_unreachable("is this used?");
1381   }
1382 
1383   return MBB.erase(I);
1384 }
1385 
1386 /// Returns true if the frame will require a reference to the stack pointer.
1387 ///
1388 /// This is the set of conditions common to setting up the stack pointer in a
1389 /// kernel, and for using a frame pointer in a callable function.
1390 ///
1391 /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1392 /// references SP.
1393 static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1394   return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1395 }
1396 
1397 // The FP for kernels is always known 0, so we never really need to setup an
1398 // explicit register for it. However, DisableFramePointerElim will force us to
1399 // use a register for it.
1400 bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
1401   const MachineFrameInfo &MFI = MF.getFrameInfo();
1402 
1403   // For entry functions we can use an immediate offset in most cases, so the
1404   // presence of calls doesn't imply we need a distinct frame pointer.
1405   if (MFI.hasCalls() &&
1406       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1407     // All offsets are unsigned, so need to be addressed in the same direction
1408     // as stack growth.
1409 
1410     // FIXME: This function is pretty broken, since it can be called before the
1411     // frame layout is determined or CSR spills are inserted.
1412     return MFI.getStackSize() != 0;
1413   }
1414 
1415   return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1416          MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1417              MF) ||
1418          MF.getTarget().Options.DisableFramePointerElim(MF);
1419 }
1420 
1421 // This is essentially a reduced version of hasFP for entry functions. Since the
1422 // stack pointer is known 0 on entry to kernels, we never really need an FP
1423 // register. We may need to initialize the stack pointer depending on the frame
1424 // properties, which logically overlaps many of the cases where an ordinary
1425 // function would require an FP.
1426 bool SIFrameLowering::requiresStackPointerReference(
1427     const MachineFunction &MF) const {
1428   // Callable functions always require a stack pointer reference.
1429   assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
1430          "only expected to call this for entry points");
1431 
1432   const MachineFrameInfo &MFI = MF.getFrameInfo();
1433 
1434   // Entry points ordinarily don't need to initialize SP. We have to set it up
1435   // for callees if there are any. Also note tail calls are impossible/don't
1436   // make any sense for kernels.
1437   if (MFI.hasCalls())
1438     return true;
1439 
1440   // We still need to initialize the SP if we're doing anything weird that
1441   // references the SP, like variable sized stack objects.
1442   return frameTriviallyRequiresSP(MFI);
1443 }
1444