10b57cec5SDimitry Andric //===----------------------- SIFrameLowering.cpp --------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "SIFrameLowering.h"
10e8d8bef9SDimitry Andric #include "AMDGPU.h"
11e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
120b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h"
145f757f3fSDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
17e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric using namespace llvm;
200b57cec5SDimitry Andric 
210b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info"
220b57cec5SDimitry Andric 
23fe6060f1SDimitry Andric static cl::opt<bool> EnableSpillVGPRToAGPR(
24fe6060f1SDimitry Andric   "amdgpu-spill-vgpr-to-agpr",
25fe6060f1SDimitry Andric   cl::desc("Enable spilling VGPRs to AGPRs"),
26fe6060f1SDimitry Andric   cl::ReallyHidden,
27fe6060f1SDimitry Andric   cl::init(true));
280b57cec5SDimitry Andric 
295f757f3fSDimitry Andric // Find a register matching \p RC from \p LiveUnits which is unused and
305f757f3fSDimitry Andric // available throughout the function. On failure, returns AMDGPU::NoRegister.
315f757f3fSDimitry Andric // TODO: Rewrite the loop here to iterate over MCRegUnits instead of
325f757f3fSDimitry Andric // MCRegisters. This should reduce the number of iterations and avoid redundant
335f757f3fSDimitry Andric // checking.
findUnusedRegister(MachineRegisterInfo & MRI,const LiveRegUnits & LiveUnits,const TargetRegisterClass & RC)34bdd1243dSDimitry Andric static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
355f757f3fSDimitry Andric                                      const LiveRegUnits &LiveUnits,
36bdd1243dSDimitry Andric                                      const TargetRegisterClass &RC) {
37bdd1243dSDimitry Andric   for (MCRegister Reg : RC) {
385f757f3fSDimitry Andric     if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
395f757f3fSDimitry Andric         !MRI.isReserved(Reg))
40bdd1243dSDimitry Andric       return Reg;
41bdd1243dSDimitry Andric   }
42bdd1243dSDimitry Andric   return MCRegister();
43bdd1243dSDimitry Andric }
44bdd1243dSDimitry Andric 
45fe6060f1SDimitry Andric // Find a scratch register that we can use in the prologue. We avoid using
46fe6060f1SDimitry Andric // callee-save registers since they may appear to be free when this is called
47fe6060f1SDimitry Andric // from canUseAsPrologue (during shrink wrapping), but then no longer be free
48fe6060f1SDimitry Andric // when this is called from emitPrologue.
findScratchNonCalleeSaveRegister(MachineRegisterInfo & MRI,LiveRegUnits & LiveUnits,const TargetRegisterClass & RC,bool Unused=false)495f757f3fSDimitry Andric static MCRegister findScratchNonCalleeSaveRegister(
505f757f3fSDimitry Andric     MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
515f757f3fSDimitry Andric     const TargetRegisterClass &RC, bool Unused = false) {
520b57cec5SDimitry Andric   // Mark callee saved registers as used so we will not choose them.
530b57cec5SDimitry Andric   const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
540b57cec5SDimitry Andric   for (unsigned i = 0; CSRegs[i]; ++i)
555f757f3fSDimitry Andric     LiveUnits.addReg(CSRegs[i]);
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   // We are looking for a register that can be used throughout the entire
580b57cec5SDimitry Andric   // function, so any use is unacceptable.
59bdd1243dSDimitry Andric   if (Unused)
605f757f3fSDimitry Andric     return findUnusedRegister(MRI, LiveUnits, RC);
61bdd1243dSDimitry Andric 
625ffd83dbSDimitry Andric   for (MCRegister Reg : RC) {
635f757f3fSDimitry Andric     if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
640b57cec5SDimitry Andric       return Reg;
650b57cec5SDimitry Andric   }
660b57cec5SDimitry Andric 
675ffd83dbSDimitry Andric   return MCRegister();
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric 
7006c3fb27SDimitry Andric /// Query target location for spilling SGPRs
7106c3fb27SDimitry Andric /// \p IncludeScratchCopy : Also look for free scratch SGPRs
getVGPRSpillLaneOrTempRegister(MachineFunction & MF,LiveRegUnits & LiveUnits,Register SGPR,const TargetRegisterClass & RC=AMDGPU::SReg_32_XM0_XEXECRegClass,bool IncludeScratchCopy=true)72bdd1243dSDimitry Andric static void getVGPRSpillLaneOrTempRegister(
735f757f3fSDimitry Andric     MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
7406c3fb27SDimitry Andric     const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
7506c3fb27SDimitry Andric     bool IncludeScratchCopy = true) {
765ffd83dbSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
775ffd83dbSDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
785ffd83dbSDimitry Andric 
795ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
805ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
81bdd1243dSDimitry Andric   unsigned Size = TRI->getSpillSize(RC);
82bdd1243dSDimitry Andric   Align Alignment = TRI->getSpillAlign(RC);
835ffd83dbSDimitry Andric 
84bdd1243dSDimitry Andric   // We need to save and restore the given SGPR.
855ffd83dbSDimitry Andric 
8606c3fb27SDimitry Andric   Register ScratchSGPR;
875f757f3fSDimitry Andric   // 1: Try to save the given register into an unused scratch SGPR. The
885f757f3fSDimitry Andric   // LiveUnits should have all the callee saved registers marked as used. For
895f757f3fSDimitry Andric   // certain cases we skip copy to scratch SGPR.
9006c3fb27SDimitry Andric   if (IncludeScratchCopy)
915f757f3fSDimitry Andric     ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
92bdd1243dSDimitry Andric 
93bdd1243dSDimitry Andric   if (!ScratchSGPR) {
94bdd1243dSDimitry Andric     int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
955ffd83dbSDimitry Andric                                          TargetStackID::SGPRSpill);
965ffd83dbSDimitry Andric 
97bdd1243dSDimitry Andric     if (TRI->spillSGPRToVGPR() &&
987a6dacacSDimitry Andric         MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
997a6dacacSDimitry Andric                                          /*IsPrologEpilog=*/true)) {
100bdd1243dSDimitry Andric       // 2: There's no free lane to spill, and no free register to save the
101bdd1243dSDimitry Andric       // SGPR, so we're forced to take another VGPR to use for the spill.
102bdd1243dSDimitry Andric       MFI->addToPrologEpilogSGPRSpills(
103bdd1243dSDimitry Andric           SGPR, PrologEpilogSGPRSaveRestoreInfo(
104bdd1243dSDimitry Andric                     SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
105e8d8bef9SDimitry Andric 
1065f757f3fSDimitry Andric       LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
107bdd1243dSDimitry Andric                  dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
1085f757f3fSDimitry Andric                         << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
1095f757f3fSDimitry Andric                         << '\n';);
1105ffd83dbSDimitry Andric     } else {
111bdd1243dSDimitry Andric       // Remove dead <FI> index
112bdd1243dSDimitry Andric       MF.getFrameInfo().RemoveStackObject(FI);
113bdd1243dSDimitry Andric       // 3: If all else fails, spill the register to memory.
114bdd1243dSDimitry Andric       FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
115bdd1243dSDimitry Andric       MFI->addToPrologEpilogSGPRSpills(
116bdd1243dSDimitry Andric           SGPR,
117bdd1243dSDimitry Andric           PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));
118bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
119bdd1243dSDimitry Andric                         << printReg(SGPR, TRI) << '\n');
1205ffd83dbSDimitry Andric     }
1215ffd83dbSDimitry Andric   } else {
122bdd1243dSDimitry Andric     MFI->addToPrologEpilogSGPRSpills(
123bdd1243dSDimitry Andric         SGPR, PrologEpilogSGPRSaveRestoreInfo(
124bdd1243dSDimitry Andric                   SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
1255f757f3fSDimitry Andric     LiveUnits.addReg(ScratchSGPR);
126bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
127bdd1243dSDimitry Andric                       << printReg(ScratchSGPR, TRI) << '\n');
1285ffd83dbSDimitry Andric   }
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric // We need to specially emit stack operations here because a different frame
1320b57cec5SDimitry Andric // register is used than in the rest of the function, as getFrameRegister would
1330b57cec5SDimitry Andric // use.
buildPrologSpill(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register SpillReg,int FI,Register FrameReg,int64_t DwordOff=0)134fe6060f1SDimitry Andric static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
135fe6060f1SDimitry Andric                              const SIMachineFunctionInfo &FuncInfo,
1365f757f3fSDimitry Andric                              LiveRegUnits &LiveUnits, MachineFunction &MF,
137e8d8bef9SDimitry Andric                              MachineBasicBlock &MBB,
138349cc55cSDimitry Andric                              MachineBasicBlock::iterator I, const DebugLoc &DL,
139bdd1243dSDimitry Andric                              Register SpillReg, int FI, Register FrameReg,
140bdd1243dSDimitry Andric                              int64_t DwordOff = 0) {
141fe6060f1SDimitry Andric   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142fe6060f1SDimitry Andric                                         : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1430b57cec5SDimitry Andric 
144fe6060f1SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
145fe6060f1SDimitry Andric   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
146fe6060f1SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
147fe6060f1SDimitry Andric       PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
148fe6060f1SDimitry Andric       FrameInfo.getObjectAlign(FI));
1495f757f3fSDimitry Andric   LiveUnits.addReg(SpillReg);
150bdd1243dSDimitry Andric   bool IsKill = !MBB.isLiveIn(SpillReg);
151bdd1243dSDimitry Andric   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
1525f757f3fSDimitry Andric                           DwordOff, MMO, nullptr, &LiveUnits);
153bdd1243dSDimitry Andric   if (IsKill)
1545f757f3fSDimitry Andric     LiveUnits.removeReg(SpillReg);
155e8d8bef9SDimitry Andric }
156e8d8bef9SDimitry Andric 
buildEpilogRestore(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register SpillReg,int FI,Register FrameReg,int64_t DwordOff=0)157fe6060f1SDimitry Andric static void buildEpilogRestore(const GCNSubtarget &ST,
158fe6060f1SDimitry Andric                                const SIRegisterInfo &TRI,
159fe6060f1SDimitry Andric                                const SIMachineFunctionInfo &FuncInfo,
1605f757f3fSDimitry Andric                                LiveRegUnits &LiveUnits, MachineFunction &MF,
161e8d8bef9SDimitry Andric                                MachineBasicBlock &MBB,
162349cc55cSDimitry Andric                                MachineBasicBlock::iterator I,
163bdd1243dSDimitry Andric                                const DebugLoc &DL, Register SpillReg, int FI,
164bdd1243dSDimitry Andric                                Register FrameReg, int64_t DwordOff = 0) {
165fe6060f1SDimitry Andric   unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166fe6060f1SDimitry Andric                                         : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1670b57cec5SDimitry Andric 
168fe6060f1SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
169fe6060f1SDimitry Andric   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
170fe6060f1SDimitry Andric   MachineMemOperand *MMO = MF.getMachineMemOperand(
171fe6060f1SDimitry Andric       PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
172fe6060f1SDimitry Andric       FrameInfo.getObjectAlign(FI));
173bdd1243dSDimitry Andric   TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
1745f757f3fSDimitry Andric                           DwordOff, MMO, nullptr, &LiveUnits);
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric 
buildGitPtr(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,const SIInstrInfo * TII,Register TargetReg)177e8d8bef9SDimitry Andric static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
178e8d8bef9SDimitry Andric                         const DebugLoc &DL, const SIInstrInfo *TII,
179e8d8bef9SDimitry Andric                         Register TargetReg) {
180e8d8bef9SDimitry Andric   MachineFunction *MF = MBB.getParent();
181e8d8bef9SDimitry Andric   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
182e8d8bef9SDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
183e8d8bef9SDimitry Andric   const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
184e8d8bef9SDimitry Andric   Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
185e8d8bef9SDimitry Andric   Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
186e8d8bef9SDimitry Andric 
187e8d8bef9SDimitry Andric   if (MFI->getGITPtrHigh() != 0xffffffff) {
188e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, SMovB32, TargetHi)
189e8d8bef9SDimitry Andric         .addImm(MFI->getGITPtrHigh())
190e8d8bef9SDimitry Andric         .addReg(TargetReg, RegState::ImplicitDefine);
191e8d8bef9SDimitry Andric   } else {
1927a6dacacSDimitry Andric     const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
193e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, GetPC64, TargetReg);
194e8d8bef9SDimitry Andric   }
195e8d8bef9SDimitry Andric   Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
196e8d8bef9SDimitry Andric   MF->getRegInfo().addLiveIn(GitPtrLo);
197e8d8bef9SDimitry Andric   MBB.addLiveIn(GitPtrLo);
198e8d8bef9SDimitry Andric   BuildMI(MBB, I, DL, SMovB32, TargetLo)
199e8d8bef9SDimitry Andric     .addReg(GitPtrLo);
200e8d8bef9SDimitry Andric }
201e8d8bef9SDimitry Andric 
initLiveUnits(LiveRegUnits & LiveUnits,const SIRegisterInfo & TRI,const SIMachineFunctionInfo * FuncInfo,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsProlog)2025f757f3fSDimitry Andric static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
203bdd1243dSDimitry Andric                           const SIMachineFunctionInfo *FuncInfo,
204bdd1243dSDimitry Andric                           MachineFunction &MF, MachineBasicBlock &MBB,
205bdd1243dSDimitry Andric                           MachineBasicBlock::iterator MBBI, bool IsProlog) {
2065f757f3fSDimitry Andric   if (LiveUnits.empty()) {
2075f757f3fSDimitry Andric     LiveUnits.init(TRI);
208bdd1243dSDimitry Andric     if (IsProlog) {
2095f757f3fSDimitry Andric       LiveUnits.addLiveIns(MBB);
210bdd1243dSDimitry Andric     } else {
211bdd1243dSDimitry Andric       // In epilog.
2125f757f3fSDimitry Andric       LiveUnits.addLiveOuts(MBB);
2135f757f3fSDimitry Andric       LiveUnits.stepBackward(*MBBI);
214bdd1243dSDimitry Andric     }
215bdd1243dSDimitry Andric   }
216bdd1243dSDimitry Andric }
217bdd1243dSDimitry Andric 
218bdd1243dSDimitry Andric namespace llvm {
219bdd1243dSDimitry Andric 
220bdd1243dSDimitry Andric // SpillBuilder to save/restore special SGPR spills like the one needed for FP,
221bdd1243dSDimitry Andric // BP, etc. These spills are delayed until the current function's frame is
222bdd1243dSDimitry Andric // finalized. For a given register, the builder uses the
223bdd1243dSDimitry Andric // PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
224bdd1243dSDimitry Andric class PrologEpilogSGPRSpillBuilder {
225bdd1243dSDimitry Andric   MachineBasicBlock::iterator MI;
226bdd1243dSDimitry Andric   MachineBasicBlock &MBB;
227bdd1243dSDimitry Andric   MachineFunction &MF;
228bdd1243dSDimitry Andric   const GCNSubtarget &ST;
229bdd1243dSDimitry Andric   MachineFrameInfo &MFI;
230bdd1243dSDimitry Andric   SIMachineFunctionInfo *FuncInfo;
231bdd1243dSDimitry Andric   const SIInstrInfo *TII;
232bdd1243dSDimitry Andric   const SIRegisterInfo &TRI;
233bdd1243dSDimitry Andric   Register SuperReg;
234bdd1243dSDimitry Andric   const PrologEpilogSGPRSaveRestoreInfo SI;
2355f757f3fSDimitry Andric   LiveRegUnits &LiveUnits;
236bdd1243dSDimitry Andric   const DebugLoc &DL;
237bdd1243dSDimitry Andric   Register FrameReg;
238bdd1243dSDimitry Andric   ArrayRef<int16_t> SplitParts;
239bdd1243dSDimitry Andric   unsigned NumSubRegs;
240bdd1243dSDimitry Andric   unsigned EltSize = 4;
241bdd1243dSDimitry Andric 
saveToMemory(const int FI) const242bdd1243dSDimitry Andric   void saveToMemory(const int FI) const {
243bdd1243dSDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
244bdd1243dSDimitry Andric     assert(!MFI.isDeadObjectIndex(FI));
245bdd1243dSDimitry Andric 
2465f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
247bdd1243dSDimitry Andric 
248bdd1243dSDimitry Andric     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2495f757f3fSDimitry Andric         MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
250bdd1243dSDimitry Andric     if (!TmpVGPR)
251bdd1243dSDimitry Andric       report_fatal_error("failed to find free scratch register");
252bdd1243dSDimitry Andric 
253bdd1243dSDimitry Andric     for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
254bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
255bdd1243dSDimitry Andric                             ? SuperReg
256bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
257bdd1243dSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
258bdd1243dSDimitry Andric           .addReg(SubReg);
259bdd1243dSDimitry Andric 
2605f757f3fSDimitry Andric       buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
261bdd1243dSDimitry Andric                        FI, FrameReg, DwordOff);
262bdd1243dSDimitry Andric       DwordOff += 4;
263bdd1243dSDimitry Andric     }
264bdd1243dSDimitry Andric   }
265bdd1243dSDimitry Andric 
saveToVGPRLane(const int FI) const266bdd1243dSDimitry Andric   void saveToVGPRLane(const int FI) const {
267bdd1243dSDimitry Andric     assert(!MFI.isDeadObjectIndex(FI));
268bdd1243dSDimitry Andric 
269bdd1243dSDimitry Andric     assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
270bdd1243dSDimitry Andric     ArrayRef<SIRegisterInfo::SpilledReg> Spill =
2715f757f3fSDimitry Andric         FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
272bdd1243dSDimitry Andric     assert(Spill.size() == NumSubRegs);
273bdd1243dSDimitry Andric 
274bdd1243dSDimitry Andric     for (unsigned I = 0; I < NumSubRegs; ++I) {
275bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
276bdd1243dSDimitry Andric                             ? SuperReg
277bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
2785f757f3fSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
2795f757f3fSDimitry Andric               Spill[I].VGPR)
280bdd1243dSDimitry Andric           .addReg(SubReg)
281bdd1243dSDimitry Andric           .addImm(Spill[I].Lane)
282bdd1243dSDimitry Andric           .addReg(Spill[I].VGPR, RegState::Undef);
283bdd1243dSDimitry Andric     }
284bdd1243dSDimitry Andric   }
285bdd1243dSDimitry Andric 
copyToScratchSGPR(Register DstReg) const286bdd1243dSDimitry Andric   void copyToScratchSGPR(Register DstReg) const {
287bdd1243dSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
288bdd1243dSDimitry Andric         .addReg(SuperReg)
289bdd1243dSDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
290bdd1243dSDimitry Andric   }
291bdd1243dSDimitry Andric 
restoreFromMemory(const int FI)292bdd1243dSDimitry Andric   void restoreFromMemory(const int FI) {
293bdd1243dSDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
294bdd1243dSDimitry Andric 
2955f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
296bdd1243dSDimitry Andric     MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2975f757f3fSDimitry Andric         MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
298bdd1243dSDimitry Andric     if (!TmpVGPR)
299bdd1243dSDimitry Andric       report_fatal_error("failed to find free scratch register");
300bdd1243dSDimitry Andric 
301bdd1243dSDimitry Andric     for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
302bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
303bdd1243dSDimitry Andric                             ? SuperReg
304bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
305bdd1243dSDimitry Andric 
3065f757f3fSDimitry Andric       buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
3075f757f3fSDimitry Andric                          TmpVGPR, FI, FrameReg, DwordOff);
308bdd1243dSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
309bdd1243dSDimitry Andric           .addReg(TmpVGPR, RegState::Kill);
310bdd1243dSDimitry Andric       DwordOff += 4;
311bdd1243dSDimitry Andric     }
312bdd1243dSDimitry Andric   }
313bdd1243dSDimitry Andric 
restoreFromVGPRLane(const int FI)314bdd1243dSDimitry Andric   void restoreFromVGPRLane(const int FI) {
315bdd1243dSDimitry Andric     assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
316bdd1243dSDimitry Andric     ArrayRef<SIRegisterInfo::SpilledReg> Spill =
3175f757f3fSDimitry Andric         FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
318bdd1243dSDimitry Andric     assert(Spill.size() == NumSubRegs);
319bdd1243dSDimitry Andric 
320bdd1243dSDimitry Andric     for (unsigned I = 0; I < NumSubRegs; ++I) {
321bdd1243dSDimitry Andric       Register SubReg = NumSubRegs == 1
322bdd1243dSDimitry Andric                             ? SuperReg
323bdd1243dSDimitry Andric                             : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
3245f757f3fSDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
325bdd1243dSDimitry Andric           .addReg(Spill[I].VGPR)
326bdd1243dSDimitry Andric           .addImm(Spill[I].Lane);
327bdd1243dSDimitry Andric     }
328bdd1243dSDimitry Andric   }
329bdd1243dSDimitry Andric 
copyFromScratchSGPR(Register SrcReg) const330bdd1243dSDimitry Andric   void copyFromScratchSGPR(Register SrcReg) const {
331bdd1243dSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
332bdd1243dSDimitry Andric         .addReg(SrcReg)
333bdd1243dSDimitry Andric         .setMIFlag(MachineInstr::FrameDestroy);
334bdd1243dSDimitry Andric   }
335bdd1243dSDimitry Andric 
336bdd1243dSDimitry Andric public:
PrologEpilogSGPRSpillBuilder(Register Reg,const PrologEpilogSGPRSaveRestoreInfo SI,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const DebugLoc & DL,const SIInstrInfo * TII,const SIRegisterInfo & TRI,LiveRegUnits & LiveUnits,Register FrameReg)337bdd1243dSDimitry Andric   PrologEpilogSGPRSpillBuilder(Register Reg,
338bdd1243dSDimitry Andric                                const PrologEpilogSGPRSaveRestoreInfo SI,
339bdd1243dSDimitry Andric                                MachineBasicBlock &MBB,
340bdd1243dSDimitry Andric                                MachineBasicBlock::iterator MI,
341bdd1243dSDimitry Andric                                const DebugLoc &DL, const SIInstrInfo *TII,
342bdd1243dSDimitry Andric                                const SIRegisterInfo &TRI,
3435f757f3fSDimitry Andric                                LiveRegUnits &LiveUnits, Register FrameReg)
344bdd1243dSDimitry Andric       : MI(MI), MBB(MBB), MF(*MBB.getParent()),
345bdd1243dSDimitry Andric         ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
346bdd1243dSDimitry Andric         FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
3475f757f3fSDimitry Andric         SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
3485f757f3fSDimitry Andric         FrameReg(FrameReg) {
349bdd1243dSDimitry Andric     const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
350bdd1243dSDimitry Andric     SplitParts = TRI.getRegSplitParts(RC, EltSize);
351bdd1243dSDimitry Andric     NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
352bdd1243dSDimitry Andric 
353bdd1243dSDimitry Andric     assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
354bdd1243dSDimitry Andric   }
355bdd1243dSDimitry Andric 
save()356bdd1243dSDimitry Andric   void save() {
357bdd1243dSDimitry Andric     switch (SI.getKind()) {
358bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_MEM:
359bdd1243dSDimitry Andric       return saveToMemory(SI.getIndex());
360bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_VGPR_LANE:
361bdd1243dSDimitry Andric       return saveToVGPRLane(SI.getIndex());
362bdd1243dSDimitry Andric     case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
363bdd1243dSDimitry Andric       return copyToScratchSGPR(SI.getReg());
364bdd1243dSDimitry Andric     }
365bdd1243dSDimitry Andric   }
366bdd1243dSDimitry Andric 
restore()367bdd1243dSDimitry Andric   void restore() {
368bdd1243dSDimitry Andric     switch (SI.getKind()) {
369bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_MEM:
370bdd1243dSDimitry Andric       return restoreFromMemory(SI.getIndex());
371bdd1243dSDimitry Andric     case SGPRSaveKind::SPILL_TO_VGPR_LANE:
372bdd1243dSDimitry Andric       return restoreFromVGPRLane(SI.getIndex());
373bdd1243dSDimitry Andric     case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
374bdd1243dSDimitry Andric       return copyFromScratchSGPR(SI.getReg());
375bdd1243dSDimitry Andric     }
376bdd1243dSDimitry Andric   }
377bdd1243dSDimitry Andric };
378bdd1243dSDimitry Andric 
379bdd1243dSDimitry Andric } // namespace llvm
380bdd1243dSDimitry Andric 
3815ffd83dbSDimitry Andric // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
emitEntryFunctionFlatScratchInit(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register ScratchWaveOffsetReg) const3825ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionFlatScratchInit(
3835ffd83dbSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
3845ffd83dbSDimitry Andric     const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
3855ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3860b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
3870b57cec5SDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
3880b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric   // We don't need this if we only have spills since there is no user facing
3910b57cec5SDimitry Andric   // scratch.
3920b57cec5SDimitry Andric 
3930b57cec5SDimitry Andric   // TODO: If we know we don't have flat instructions earlier, we can omit
3940b57cec5SDimitry Andric   // this from the input registers.
3950b57cec5SDimitry Andric   //
3960b57cec5SDimitry Andric   // TODO: We only need to know if we access scratch space through a flat
3970b57cec5SDimitry Andric   // pointer. Because we only detect if flat instructions are used at all,
3980b57cec5SDimitry Andric   // this will be used more often than necessary on VI.
3990b57cec5SDimitry Andric 
400e8d8bef9SDimitry Andric   Register FlatScrInitLo;
401e8d8bef9SDimitry Andric   Register FlatScrInitHi;
402e8d8bef9SDimitry Andric 
403e8d8bef9SDimitry Andric   if (ST.isAmdPalOS()) {
404e8d8bef9SDimitry Andric     // Extract the scratch offset from the descriptor in the GIT
4055f757f3fSDimitry Andric     LiveRegUnits LiveUnits;
4065f757f3fSDimitry Andric     LiveUnits.init(*TRI);
4075f757f3fSDimitry Andric     LiveUnits.addLiveIns(MBB);
408e8d8bef9SDimitry Andric 
409e8d8bef9SDimitry Andric     // Find unused reg to load flat scratch init into
410e8d8bef9SDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
411e8d8bef9SDimitry Andric     Register FlatScrInit = AMDGPU::NoRegister;
412e8d8bef9SDimitry Andric     ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
413e8d8bef9SDimitry Andric     unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
414e8d8bef9SDimitry Andric     AllSGPR64s = AllSGPR64s.slice(
415e8d8bef9SDimitry Andric         std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
416e8d8bef9SDimitry Andric     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
417e8d8bef9SDimitry Andric     for (MCPhysReg Reg : AllSGPR64s) {
4185f757f3fSDimitry Andric       if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
4195f757f3fSDimitry Andric           MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
420e8d8bef9SDimitry Andric         FlatScrInit = Reg;
421e8d8bef9SDimitry Andric         break;
422e8d8bef9SDimitry Andric       }
423e8d8bef9SDimitry Andric     }
424e8d8bef9SDimitry Andric     assert(FlatScrInit && "Failed to find free register for scratch init");
425e8d8bef9SDimitry Andric 
426e8d8bef9SDimitry Andric     FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
427e8d8bef9SDimitry Andric     FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
428e8d8bef9SDimitry Andric 
429e8d8bef9SDimitry Andric     buildGitPtr(MBB, I, DL, TII, FlatScrInit);
430e8d8bef9SDimitry Andric 
431e8d8bef9SDimitry Andric     // We now have the GIT ptr - now get the scratch descriptor from the entry
432e8d8bef9SDimitry Andric     // at offset 0 (or offset 16 for a compute shader).
433e8d8bef9SDimitry Andric     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
434e8d8bef9SDimitry Andric     const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
435e8d8bef9SDimitry Andric     auto *MMO = MF.getMachineMemOperand(
436e8d8bef9SDimitry Andric         PtrInfo,
437e8d8bef9SDimitry Andric         MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
438e8d8bef9SDimitry Andric             MachineMemOperand::MODereferenceable,
439e8d8bef9SDimitry Andric         8, Align(4));
440e8d8bef9SDimitry Andric     unsigned Offset =
441e8d8bef9SDimitry Andric         MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
442e8d8bef9SDimitry Andric     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
443e8d8bef9SDimitry Andric     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
444e8d8bef9SDimitry Andric     BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
445e8d8bef9SDimitry Andric         .addReg(FlatScrInit)
446e8d8bef9SDimitry Andric         .addImm(EncodedOffset) // offset
447fe6060f1SDimitry Andric         .addImm(0)             // cpol
448e8d8bef9SDimitry Andric         .addMemOperand(MMO);
449e8d8bef9SDimitry Andric 
450e8d8bef9SDimitry Andric     // Mask the offset in [47:0] of the descriptor
451e8d8bef9SDimitry Andric     const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
452349cc55cSDimitry Andric     auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
453e8d8bef9SDimitry Andric         .addReg(FlatScrInitHi)
454e8d8bef9SDimitry Andric         .addImm(0xffff);
455349cc55cSDimitry Andric     And->getOperand(3).setIsDead(); // Mark SCC as dead.
456e8d8bef9SDimitry Andric   } else {
4578bcb0991SDimitry Andric     Register FlatScratchInitReg =
4588bcb0991SDimitry Andric         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459e8d8bef9SDimitry Andric     assert(FlatScratchInitReg);
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric     MachineRegisterInfo &MRI = MF.getRegInfo();
4620b57cec5SDimitry Andric     MRI.addLiveIn(FlatScratchInitReg);
4630b57cec5SDimitry Andric     MBB.addLiveIn(FlatScratchInitReg);
4640b57cec5SDimitry Andric 
465e8d8bef9SDimitry Andric     FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
466e8d8bef9SDimitry Andric     FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
467e8d8bef9SDimitry Andric   }
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric   // Do a 64-bit pointer add.
4700b57cec5SDimitry Andric   if (ST.flatScratchIsPointer()) {
4710b57cec5SDimitry Andric     if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
4720b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
4730b57cec5SDimitry Andric         .addReg(FlatScrInitLo)
4740b57cec5SDimitry Andric         .addReg(ScratchWaveOffsetReg);
475349cc55cSDimitry Andric       auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
476349cc55cSDimitry Andric                           FlatScrInitHi)
4770b57cec5SDimitry Andric         .addReg(FlatScrInitHi)
4780b57cec5SDimitry Andric         .addImm(0);
479349cc55cSDimitry Andric       Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
480349cc55cSDimitry Andric 
4810b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
4820b57cec5SDimitry Andric         addReg(FlatScrInitLo).
4830b57cec5SDimitry Andric         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
4840b57cec5SDimitry Andric                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
4850b57cec5SDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
4860b57cec5SDimitry Andric         addReg(FlatScrInitHi).
4870b57cec5SDimitry Andric         addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
4880b57cec5SDimitry Andric                        (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
4890b57cec5SDimitry Andric       return;
4900b57cec5SDimitry Andric     }
4910b57cec5SDimitry Andric 
492e8d8bef9SDimitry Andric     // For GFX9.
4930b57cec5SDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
4940b57cec5SDimitry Andric       .addReg(FlatScrInitLo)
4950b57cec5SDimitry Andric       .addReg(ScratchWaveOffsetReg);
496349cc55cSDimitry Andric     auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
497349cc55cSDimitry Andric                         AMDGPU::FLAT_SCR_HI)
4980b57cec5SDimitry Andric       .addReg(FlatScrInitHi)
4990b57cec5SDimitry Andric       .addImm(0);
500349cc55cSDimitry Andric     Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric     return;
5030b57cec5SDimitry Andric   }
5040b57cec5SDimitry Andric 
505e8d8bef9SDimitry Andric   assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
5060b57cec5SDimitry Andric 
5070b57cec5SDimitry Andric   // Copy the size in bytes.
5080b57cec5SDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
5090b57cec5SDimitry Andric     .addReg(FlatScrInitHi, RegState::Kill);
5100b57cec5SDimitry Andric 
5110b57cec5SDimitry Andric   // Add wave offset in bytes to private base offset.
5120b57cec5SDimitry Andric   // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
513fe6060f1SDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
5140b57cec5SDimitry Andric       .addReg(FlatScrInitLo)
5150b57cec5SDimitry Andric       .addReg(ScratchWaveOffsetReg);
5160b57cec5SDimitry Andric 
5170b57cec5SDimitry Andric   // Convert offset to 256-byte units.
518349cc55cSDimitry Andric   auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
519349cc55cSDimitry Andric                       AMDGPU::FLAT_SCR_HI)
5200b57cec5SDimitry Andric     .addReg(FlatScrInitLo, RegState::Kill)
5210b57cec5SDimitry Andric     .addImm(8);
522bdd1243dSDimitry Andric   LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
5230b57cec5SDimitry Andric }
5240b57cec5SDimitry Andric 
525e8d8bef9SDimitry Andric // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
526e8d8bef9SDimitry Andric // memory. They should have been removed by now.
allStackObjectsAreDead(const MachineFrameInfo & MFI)527e8d8bef9SDimitry Andric static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
528e8d8bef9SDimitry Andric   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
529e8d8bef9SDimitry Andric        I != E; ++I) {
530e8d8bef9SDimitry Andric     if (!MFI.isDeadObjectIndex(I))
531e8d8bef9SDimitry Andric       return false;
532e8d8bef9SDimitry Andric   }
533e8d8bef9SDimitry Andric 
534e8d8bef9SDimitry Andric   return true;
535e8d8bef9SDimitry Andric }
536e8d8bef9SDimitry Andric 
5375ffd83dbSDimitry Andric // Shift down registers reserved for the scratch RSRC.
getEntryFunctionReservedScratchRsrcReg(MachineFunction & MF) const5385ffd83dbSDimitry Andric Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
5390b57cec5SDimitry Andric     MachineFunction &MF) const {
5400b57cec5SDimitry Andric 
5415ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5425ffd83dbSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
5435ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
5445ffd83dbSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
5455ffd83dbSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
5465ffd83dbSDimitry Andric 
5475ffd83dbSDimitry Andric   assert(MFI->isEntryFunction());
5485ffd83dbSDimitry Andric 
5495ffd83dbSDimitry Andric   Register ScratchRsrcReg = MFI->getScratchRSrcReg();
5505ffd83dbSDimitry Andric 
551e8d8bef9SDimitry Andric   if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
552e8d8bef9SDimitry Andric                           allStackObjectsAreDead(MF.getFrameInfo())))
5535ffd83dbSDimitry Andric     return Register();
5540b57cec5SDimitry Andric 
5550b57cec5SDimitry Andric   if (ST.hasSGPRInitBug() ||
5560b57cec5SDimitry Andric       ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
5570b57cec5SDimitry Andric     return ScratchRsrcReg;
5580b57cec5SDimitry Andric 
5590b57cec5SDimitry Andric   // We reserved the last registers for this. Shift it down to the end of those
5600b57cec5SDimitry Andric   // which were actually used.
5610b57cec5SDimitry Andric   //
5620b57cec5SDimitry Andric   // FIXME: It might be safer to use a pseudoregister before replacement.
5630b57cec5SDimitry Andric 
5640b57cec5SDimitry Andric   // FIXME: We should be able to eliminate unused input registers. We only
5650b57cec5SDimitry Andric   // cannot do this for the resources required for scratch access. For now we
5660b57cec5SDimitry Andric   // skip over user SGPRs and may leave unused holes.
5670b57cec5SDimitry Andric 
5680b57cec5SDimitry Andric   unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
5695ffd83dbSDimitry Andric   ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
5700b57cec5SDimitry Andric   AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
5710b57cec5SDimitry Andric 
5720b57cec5SDimitry Andric   // Skip the last N reserved elements because they should have already been
5730b57cec5SDimitry Andric   // reserved for VCC etc.
5745ffd83dbSDimitry Andric   Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
5750b57cec5SDimitry Andric   for (MCPhysReg Reg : AllSGPR128s) {
5760b57cec5SDimitry Andric     // Pick the first unallocated one. Make sure we don't clobber the other
5775ffd83dbSDimitry Andric     // reserved input we needed. Also for PAL, make sure we don't clobber
5785ffd83dbSDimitry Andric     // the GIT pointer passed in SGPR0 or SGPR8.
5795ffd83dbSDimitry Andric     if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
58006c3fb27SDimitry Andric         (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
5810b57cec5SDimitry Andric       MRI.replaceRegWith(ScratchRsrcReg, Reg);
5820b57cec5SDimitry Andric       MFI->setScratchRSrcReg(Reg);
5830b57cec5SDimitry Andric       return Reg;
5840b57cec5SDimitry Andric     }
5850b57cec5SDimitry Andric   }
5860b57cec5SDimitry Andric 
5870b57cec5SDimitry Andric   return ScratchRsrcReg;
5880b57cec5SDimitry Andric }
5890b57cec5SDimitry Andric 
getScratchScaleFactor(const GCNSubtarget & ST)590e8d8bef9SDimitry Andric static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
591e8d8bef9SDimitry Andric   return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
592e8d8bef9SDimitry Andric }
593e8d8bef9SDimitry Andric 
emitEntryFunctionPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const5940b57cec5SDimitry Andric void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
5950b57cec5SDimitry Andric                                                 MachineBasicBlock &MBB) const {
5960b57cec5SDimitry Andric   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
5970b57cec5SDimitry Andric 
5985ffd83dbSDimitry Andric   // FIXME: If we only have SGPR spills, we won't actually be using scratch
5995ffd83dbSDimitry Andric   // memory since these spill to VGPRs. We should be cleaning up these unused
6005ffd83dbSDimitry Andric   // SGPR spill frame indices somewhere.
6010b57cec5SDimitry Andric 
6020b57cec5SDimitry Andric   // FIXME: We still have implicit uses on SGPR spill instructions in case they
6030b57cec5SDimitry Andric   // need to spill to vector memory. It's likely that will not happen, but at
6040b57cec5SDimitry Andric   // this point it appears we need the setup. This part of the prolog should be
6050b57cec5SDimitry Andric   // emitted after frame indices are eliminated.
6060b57cec5SDimitry Andric 
6075ffd83dbSDimitry Andric   // FIXME: Remove all of the isPhysRegUsed checks
6080b57cec5SDimitry Andric 
6095ffd83dbSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
6105ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
6115ffd83dbSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
6125ffd83dbSDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
6135ffd83dbSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
6145ffd83dbSDimitry Andric   const Function &F = MF.getFunction();
615fe6060f1SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
6160b57cec5SDimitry Andric 
6175ffd83dbSDimitry Andric   assert(MFI->isEntryFunction());
6180b57cec5SDimitry Andric 
6198bcb0991SDimitry Andric   Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
6200b57cec5SDimitry Andric       AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
6210b57cec5SDimitry Andric 
6225ffd83dbSDimitry Andric   // We need to do the replacement of the private segment buffer register even
6235ffd83dbSDimitry Andric   // if there are no stack objects. There could be stores to undef or a
6245ffd83dbSDimitry Andric   // constant without an associated object.
6255ffd83dbSDimitry Andric   //
6265ffd83dbSDimitry Andric   // This will return `Register()` in cases where there are no actual
6275ffd83dbSDimitry Andric   // uses of the SRSRC.
628e8d8bef9SDimitry Andric   Register ScratchRsrcReg;
629e8d8bef9SDimitry Andric   if (!ST.enableFlatScratch())
630e8d8bef9SDimitry Andric     ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
6310b57cec5SDimitry Andric 
6325ffd83dbSDimitry Andric   // Make the selected register live throughout the function.
6335ffd83dbSDimitry Andric   if (ScratchRsrcReg) {
6340b57cec5SDimitry Andric     for (MachineBasicBlock &OtherBB : MF) {
6355ffd83dbSDimitry Andric       if (&OtherBB != &MBB) {
6360b57cec5SDimitry Andric         OtherBB.addLiveIn(ScratchRsrcReg);
6370b57cec5SDimitry Andric       }
6385ffd83dbSDimitry Andric     }
6395ffd83dbSDimitry Andric   }
6400b57cec5SDimitry Andric 
6415ffd83dbSDimitry Andric   // Now that we have fixed the reserved SRSRC we need to locate the
6425ffd83dbSDimitry Andric   // (potentially) preloaded SRSRC.
6435ffd83dbSDimitry Andric   Register PreloadedScratchRsrcReg;
6445ffd83dbSDimitry Andric   if (ST.isAmdHsaOrMesa(F)) {
6455ffd83dbSDimitry Andric     PreloadedScratchRsrcReg =
6465ffd83dbSDimitry Andric         MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
6475ffd83dbSDimitry Andric     if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
6485ffd83dbSDimitry Andric       // We added live-ins during argument lowering, but since they were not
6495ffd83dbSDimitry Andric       // used they were deleted. We're adding the uses now, so add them back.
6505ffd83dbSDimitry Andric       MRI.addLiveIn(PreloadedScratchRsrcReg);
6515ffd83dbSDimitry Andric       MBB.addLiveIn(PreloadedScratchRsrcReg);
6525ffd83dbSDimitry Andric     }
6535ffd83dbSDimitry Andric   }
6545ffd83dbSDimitry Andric 
6555ffd83dbSDimitry Andric   // Debug location must be unknown since the first debug location is used to
6565ffd83dbSDimitry Andric   // determine the end of the prologue.
6570b57cec5SDimitry Andric   DebugLoc DL;
6580b57cec5SDimitry Andric   MachineBasicBlock::iterator I = MBB.begin();
6590b57cec5SDimitry Andric 
6605ffd83dbSDimitry Andric   // We found the SRSRC first because it needs four registers and has an
6615ffd83dbSDimitry Andric   // alignment requirement. If the SRSRC that we found is clobbering with
6625ffd83dbSDimitry Andric   // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
6635ffd83dbSDimitry Andric   // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
6645ffd83dbSDimitry Andric   // wave offset to a free SGPR.
6655ffd83dbSDimitry Andric   Register ScratchWaveOffsetReg;
666349cc55cSDimitry Andric   if (PreloadedScratchWaveOffsetReg &&
667349cc55cSDimitry Andric       TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
6685ffd83dbSDimitry Andric     ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
6695ffd83dbSDimitry Andric     unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
6705ffd83dbSDimitry Andric     AllSGPRs = AllSGPRs.slice(
6715ffd83dbSDimitry Andric         std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
6725ffd83dbSDimitry Andric     Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
6735ffd83dbSDimitry Andric     for (MCPhysReg Reg : AllSGPRs) {
6745ffd83dbSDimitry Andric       if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
6755ffd83dbSDimitry Andric           !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
6765ffd83dbSDimitry Andric         ScratchWaveOffsetReg = Reg;
6770b57cec5SDimitry Andric         BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
6785ffd83dbSDimitry Andric             .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
6795ffd83dbSDimitry Andric         break;
6800b57cec5SDimitry Andric       }
6810b57cec5SDimitry Andric     }
6820b57cec5SDimitry Andric   } else {
6835ffd83dbSDimitry Andric     ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
6840b57cec5SDimitry Andric   }
685349cc55cSDimitry Andric   assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
6865ffd83dbSDimitry Andric 
687e8d8bef9SDimitry Andric   if (requiresStackPointerReference(MF)) {
6885ffd83dbSDimitry Andric     Register SPReg = MFI->getStackPtrOffsetReg();
6895ffd83dbSDimitry Andric     assert(SPReg != AMDGPU::SP_REG);
6905ffd83dbSDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
691fe6060f1SDimitry Andric         .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
6925ffd83dbSDimitry Andric   }
6935ffd83dbSDimitry Andric 
6945ffd83dbSDimitry Andric   if (hasFP(MF)) {
6955ffd83dbSDimitry Andric     Register FPReg = MFI->getFrameOffsetReg();
6965ffd83dbSDimitry Andric     assert(FPReg != AMDGPU::FP_REG);
6975ffd83dbSDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
6985ffd83dbSDimitry Andric   }
6995ffd83dbSDimitry Andric 
700fe6060f1SDimitry Andric   bool NeedsFlatScratchInit =
7015f757f3fSDimitry Andric       MFI->getUserSGPRInfo().hasFlatScratchInit() &&
702fe6060f1SDimitry Andric       (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
703fe6060f1SDimitry Andric        (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
704fe6060f1SDimitry Andric 
705fe6060f1SDimitry Andric   if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
706349cc55cSDimitry Andric       PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
7075ffd83dbSDimitry Andric     MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
7085ffd83dbSDimitry Andric     MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
7095ffd83dbSDimitry Andric   }
7105ffd83dbSDimitry Andric 
711fe6060f1SDimitry Andric   if (NeedsFlatScratchInit) {
7125ffd83dbSDimitry Andric     emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
7135ffd83dbSDimitry Andric   }
7145ffd83dbSDimitry Andric 
7155ffd83dbSDimitry Andric   if (ScratchRsrcReg) {
7165ffd83dbSDimitry Andric     emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
7175ffd83dbSDimitry Andric                                          PreloadedScratchRsrcReg,
7185ffd83dbSDimitry Andric                                          ScratchRsrcReg, ScratchWaveOffsetReg);
7190b57cec5SDimitry Andric   }
7200b57cec5SDimitry Andric }
7210b57cec5SDimitry Andric 
7225ffd83dbSDimitry Andric // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
emitEntryFunctionScratchRsrcRegSetup(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register PreloadedScratchRsrcReg,Register ScratchRsrcReg,Register ScratchWaveOffsetReg) const7235ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
7245ffd83dbSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
7255ffd83dbSDimitry Andric     const DebugLoc &DL, Register PreloadedScratchRsrcReg,
7265ffd83dbSDimitry Andric     Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
7270b57cec5SDimitry Andric 
7285ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
7290b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
7300b57cec5SDimitry Andric   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
7315ffd83dbSDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
7320b57cec5SDimitry Andric   const Function &Fn = MF.getFunction();
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric   if (ST.isAmdPalOS()) {
7350b57cec5SDimitry Andric     // The pointer to the GIT is formed from the offset passed in and either
7360b57cec5SDimitry Andric     // the amdgpu-git-ptr-high function attribute or the top part of the PC
7378bcb0991SDimitry Andric     Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
738fe6060f1SDimitry Andric     Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7390b57cec5SDimitry Andric 
740e8d8bef9SDimitry Andric     buildGitPtr(MBB, I, DL, TII, Rsrc01);
7410b57cec5SDimitry Andric 
7420b57cec5SDimitry Andric     // We now have the GIT ptr - now get the scratch descriptor from the entry
7430b57cec5SDimitry Andric     // at offset 0 (or offset 16 for a compute shader).
744480093f4SDimitry Andric     MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7450b57cec5SDimitry Andric     const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
7460b57cec5SDimitry Andric     auto MMO = MF.getMachineMemOperand(PtrInfo,
7470b57cec5SDimitry Andric                                        MachineMemOperand::MOLoad |
7480b57cec5SDimitry Andric                                            MachineMemOperand::MOInvariant |
7490b57cec5SDimitry Andric                                            MachineMemOperand::MODereferenceable,
7505ffd83dbSDimitry Andric                                        16, Align(4));
7510b57cec5SDimitry Andric     unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
7520b57cec5SDimitry Andric     const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
7535ffd83dbSDimitry Andric     unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
7540b57cec5SDimitry Andric     BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
7550b57cec5SDimitry Andric       .addReg(Rsrc01)
7560b57cec5SDimitry Andric       .addImm(EncodedOffset) // offset
757fe6060f1SDimitry Andric       .addImm(0) // cpol
7580b57cec5SDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
7590b57cec5SDimitry Andric       .addMemOperand(MMO);
760fe6060f1SDimitry Andric 
761fe6060f1SDimitry Andric     // The driver will always set the SRD for wave 64 (bits 118:117 of
762fe6060f1SDimitry Andric     // descriptor / bits 22:21 of third sub-reg will be 0b11)
763fe6060f1SDimitry Andric     // If the shader is actually wave32 we have to modify the const_index_stride
764fe6060f1SDimitry Andric     // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
765fe6060f1SDimitry Andric     // reason the driver does this is that there can be cases where it presents
766fe6060f1SDimitry Andric     // 2 shaders with different wave size (e.g. VsFs).
767fe6060f1SDimitry Andric     // TODO: convert to using SCRATCH instructions or multiple SRD buffers
768fe6060f1SDimitry Andric     if (ST.isWave32()) {
769fe6060f1SDimitry Andric       const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
770fe6060f1SDimitry Andric       BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
771fe6060f1SDimitry Andric           .addImm(21)
772fe6060f1SDimitry Andric           .addReg(Rsrc03);
773fe6060f1SDimitry Andric     }
7745ffd83dbSDimitry Andric   } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
7750b57cec5SDimitry Andric     assert(!ST.isAmdHsaOrMesa(Fn));
7760b57cec5SDimitry Andric     const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
7770b57cec5SDimitry Andric 
7788bcb0991SDimitry Andric     Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
7798bcb0991SDimitry Andric     Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7800b57cec5SDimitry Andric 
7810b57cec5SDimitry Andric     // Use relocations to get the pointer, and setup the other bits manually.
7820b57cec5SDimitry Andric     uint64_t Rsrc23 = TII->getScratchRsrcWords23();
7830b57cec5SDimitry Andric 
7845f757f3fSDimitry Andric     if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {
7858bcb0991SDimitry Andric       Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric       if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
7880b57cec5SDimitry Andric         const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
7890b57cec5SDimitry Andric 
7900b57cec5SDimitry Andric         BuildMI(MBB, I, DL, Mov64, Rsrc01)
7910b57cec5SDimitry Andric           .addReg(MFI->getImplicitBufferPtrUserSGPR())
7920b57cec5SDimitry Andric           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
7930b57cec5SDimitry Andric       } else {
7940b57cec5SDimitry Andric         const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
7950b57cec5SDimitry Andric 
796480093f4SDimitry Andric         MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7975ffd83dbSDimitry Andric         auto MMO = MF.getMachineMemOperand(
7985ffd83dbSDimitry Andric             PtrInfo,
7995ffd83dbSDimitry Andric             MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
8000b57cec5SDimitry Andric                 MachineMemOperand::MODereferenceable,
8015ffd83dbSDimitry Andric             8, Align(4));
8020b57cec5SDimitry Andric         BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
8030b57cec5SDimitry Andric           .addReg(MFI->getImplicitBufferPtrUserSGPR())
8040b57cec5SDimitry Andric           .addImm(0) // offset
805fe6060f1SDimitry Andric           .addImm(0) // cpol
8060b57cec5SDimitry Andric           .addMemOperand(MMO)
8070b57cec5SDimitry Andric           .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8080b57cec5SDimitry Andric 
8090b57cec5SDimitry Andric         MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8100b57cec5SDimitry Andric         MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8110b57cec5SDimitry Andric       }
8120b57cec5SDimitry Andric     } else {
8138bcb0991SDimitry Andric       Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8148bcb0991SDimitry Andric       Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8150b57cec5SDimitry Andric 
8160b57cec5SDimitry Andric       BuildMI(MBB, I, DL, SMovB32, Rsrc0)
8170b57cec5SDimitry Andric         .addExternalSymbol("SCRATCH_RSRC_DWORD0")
8180b57cec5SDimitry Andric         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8190b57cec5SDimitry Andric 
8200b57cec5SDimitry Andric       BuildMI(MBB, I, DL, SMovB32, Rsrc1)
8210b57cec5SDimitry Andric         .addExternalSymbol("SCRATCH_RSRC_DWORD1")
8220b57cec5SDimitry Andric         .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8230b57cec5SDimitry Andric     }
8240b57cec5SDimitry Andric 
8250b57cec5SDimitry Andric     BuildMI(MBB, I, DL, SMovB32, Rsrc2)
8260b57cec5SDimitry Andric       .addImm(Rsrc23 & 0xffffffff)
8270b57cec5SDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8280b57cec5SDimitry Andric 
8290b57cec5SDimitry Andric     BuildMI(MBB, I, DL, SMovB32, Rsrc3)
8300b57cec5SDimitry Andric       .addImm(Rsrc23 >> 32)
8310b57cec5SDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8325ffd83dbSDimitry Andric   } else if (ST.isAmdHsaOrMesa(Fn)) {
8335ffd83dbSDimitry Andric     assert(PreloadedScratchRsrcReg);
8345ffd83dbSDimitry Andric 
8355ffd83dbSDimitry Andric     if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
8365ffd83dbSDimitry Andric       BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
8375ffd83dbSDimitry Andric           .addReg(PreloadedScratchRsrcReg, RegState::Kill);
8380b57cec5SDimitry Andric     }
8390b57cec5SDimitry Andric   }
8400b57cec5SDimitry Andric 
8415ffd83dbSDimitry Andric   // Add the scratch wave offset into the scratch RSRC.
8425ffd83dbSDimitry Andric   //
8435ffd83dbSDimitry Andric   // We only want to update the first 48 bits, which is the base address
8445ffd83dbSDimitry Andric   // pointer, without touching the adjacent 16 bits of flags. We know this add
8455ffd83dbSDimitry Andric   // cannot carry-out from bit 47, otherwise the scratch allocation would be
8465ffd83dbSDimitry Andric   // impossible to fit in the 48-bit global address space.
8475ffd83dbSDimitry Andric   //
8485ffd83dbSDimitry Andric   // TODO: Evaluate if it is better to just construct an SRD using the flat
8495ffd83dbSDimitry Andric   // scratch init and some constants rather than update the one we are passed.
8505ffd83dbSDimitry Andric   Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8515ffd83dbSDimitry Andric   Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8525ffd83dbSDimitry Andric 
8535ffd83dbSDimitry Andric   // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
8545ffd83dbSDimitry Andric   // the kernel body via inreg arguments.
8555ffd83dbSDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
8565ffd83dbSDimitry Andric       .addReg(ScratchRsrcSub0)
8575ffd83dbSDimitry Andric       .addReg(ScratchWaveOffsetReg)
8585ffd83dbSDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
859349cc55cSDimitry Andric   auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
8605ffd83dbSDimitry Andric       .addReg(ScratchRsrcSub1)
8615ffd83dbSDimitry Andric       .addImm(0)
8625ffd83dbSDimitry Andric       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
863349cc55cSDimitry Andric   Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
8645ffd83dbSDimitry Andric }
8655ffd83dbSDimitry Andric 
isSupportedStackID(TargetStackID::Value ID) const8660b57cec5SDimitry Andric bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
8670b57cec5SDimitry Andric   switch (ID) {
8680b57cec5SDimitry Andric   case TargetStackID::Default:
8690b57cec5SDimitry Andric   case TargetStackID::NoAlloc:
8700b57cec5SDimitry Andric   case TargetStackID::SGPRSpill:
8710b57cec5SDimitry Andric     return true;
872e8d8bef9SDimitry Andric   case TargetStackID::ScalableVector:
873fe6060f1SDimitry Andric   case TargetStackID::WasmLocal:
8748bcb0991SDimitry Andric     return false;
8750b57cec5SDimitry Andric   }
8760b57cec5SDimitry Andric   llvm_unreachable("Invalid TargetStackID::Value");
8770b57cec5SDimitry Andric }
8780b57cec5SDimitry Andric 
879bdd1243dSDimitry Andric // Activate only the inactive lanes when \p EnableInactiveLanes is true.
880bdd1243dSDimitry Andric // Otherwise, activate all lanes. It returns the saved exec.
buildScratchExecCopy(LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,bool IsProlog,bool EnableInactiveLanes)8815f757f3fSDimitry Andric static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
8825ffd83dbSDimitry Andric                                      MachineFunction &MF,
8835ffd83dbSDimitry Andric                                      MachineBasicBlock &MBB,
8845ffd83dbSDimitry Andric                                      MachineBasicBlock::iterator MBBI,
885bdd1243dSDimitry Andric                                      const DebugLoc &DL, bool IsProlog,
886bdd1243dSDimitry Andric                                      bool EnableInactiveLanes) {
8875ffd83dbSDimitry Andric   Register ScratchExecCopy;
8885ffd83dbSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
8895ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
8905ffd83dbSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
8915ffd83dbSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
8925ffd83dbSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
8935ffd83dbSDimitry Andric 
8945f757f3fSDimitry Andric   initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
8955ffd83dbSDimitry Andric 
8965ffd83dbSDimitry Andric   ScratchExecCopy = findScratchNonCalleeSaveRegister(
8975f757f3fSDimitry Andric       MRI, LiveUnits, *TRI.getWaveMaskRegClass());
898fe6060f1SDimitry Andric   if (!ScratchExecCopy)
899fe6060f1SDimitry Andric     report_fatal_error("failed to find free scratch register");
9005ffd83dbSDimitry Andric 
9015f757f3fSDimitry Andric   LiveUnits.addReg(ScratchExecCopy);
9025ffd83dbSDimitry Andric 
903bdd1243dSDimitry Andric   const unsigned SaveExecOpc =
904bdd1243dSDimitry Andric       ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
905bdd1243dSDimitry Andric                                            : AMDGPU::S_OR_SAVEEXEC_B32)
906bdd1243dSDimitry Andric                     : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
907bdd1243dSDimitry Andric                                            : AMDGPU::S_OR_SAVEEXEC_B64);
908bdd1243dSDimitry Andric   auto SaveExec =
909bdd1243dSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
910349cc55cSDimitry Andric   SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
9115ffd83dbSDimitry Andric 
9125ffd83dbSDimitry Andric   return ScratchExecCopy;
9135ffd83dbSDimitry Andric }
9145ffd83dbSDimitry Andric 
emitCSRSpillStores(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,LiveRegUnits & LiveUnits,Register FrameReg,Register FramePtrRegScratchCopy) const915bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillStores(
916bdd1243dSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB,
9175f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
918bdd1243dSDimitry Andric     Register FrameReg, Register FramePtrRegScratchCopy) const {
919bdd1243dSDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
920bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
921bdd1243dSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
922bdd1243dSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
923bdd1243dSDimitry Andric 
924bdd1243dSDimitry Andric   // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
925bdd1243dSDimitry Andric   // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
926bdd1243dSDimitry Andric   // might end up flipping the EXEC bits twice.
927bdd1243dSDimitry Andric   Register ScratchExecCopy;
928bdd1243dSDimitry Andric   SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
929bdd1243dSDimitry Andric   FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
930bdd1243dSDimitry Andric   if (!WWMScratchRegs.empty())
931bdd1243dSDimitry Andric     ScratchExecCopy =
9325f757f3fSDimitry Andric         buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
933bdd1243dSDimitry Andric                              /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
934bdd1243dSDimitry Andric 
935bdd1243dSDimitry Andric   auto StoreWWMRegisters =
936bdd1243dSDimitry Andric       [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
937bdd1243dSDimitry Andric         for (const auto &Reg : WWMRegs) {
938bdd1243dSDimitry Andric           Register VGPR = Reg.first;
939bdd1243dSDimitry Andric           int FI = Reg.second;
9405f757f3fSDimitry Andric           buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
941bdd1243dSDimitry Andric                            VGPR, FI, FrameReg);
942bdd1243dSDimitry Andric         }
943bdd1243dSDimitry Andric       };
944bdd1243dSDimitry Andric 
945bdd1243dSDimitry Andric   StoreWWMRegisters(WWMScratchRegs);
946bdd1243dSDimitry Andric   if (!WWMCalleeSavedRegs.empty()) {
947bdd1243dSDimitry Andric     if (ScratchExecCopy) {
948bdd1243dSDimitry Andric       unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
94906c3fb27SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
950bdd1243dSDimitry Andric     } else {
9515f757f3fSDimitry Andric       ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
952bdd1243dSDimitry Andric                                              /*IsProlog*/ true,
953bdd1243dSDimitry Andric                                              /*EnableInactiveLanes*/ false);
954bdd1243dSDimitry Andric     }
955bdd1243dSDimitry Andric   }
956bdd1243dSDimitry Andric 
957bdd1243dSDimitry Andric   StoreWWMRegisters(WWMCalleeSavedRegs);
958bdd1243dSDimitry Andric   if (ScratchExecCopy) {
959bdd1243dSDimitry Andric     // FIXME: Split block and make terminator.
960bdd1243dSDimitry Andric     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
96106c3fb27SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
962bdd1243dSDimitry Andric         .addReg(ScratchExecCopy, RegState::Kill);
9635f757f3fSDimitry Andric     LiveUnits.addReg(ScratchExecCopy);
964bdd1243dSDimitry Andric   }
965bdd1243dSDimitry Andric 
966bdd1243dSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
967bdd1243dSDimitry Andric 
968bdd1243dSDimitry Andric   for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
969bdd1243dSDimitry Andric     // Special handle FP spill:
970bdd1243dSDimitry Andric     // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
971bdd1243dSDimitry Andric     // Otherwise, FP has been moved to a temporary register and spill it
972bdd1243dSDimitry Andric     // instead.
973bdd1243dSDimitry Andric     Register Reg =
974bdd1243dSDimitry Andric         Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
975bdd1243dSDimitry Andric     if (!Reg)
976bdd1243dSDimitry Andric       continue;
977bdd1243dSDimitry Andric 
978bdd1243dSDimitry Andric     PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
9795f757f3fSDimitry Andric                                     LiveUnits, FrameReg);
980bdd1243dSDimitry Andric     SB.save();
981bdd1243dSDimitry Andric   }
982bdd1243dSDimitry Andric 
983bdd1243dSDimitry Andric   // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
984bdd1243dSDimitry Andric   // such scratch registers live throughout the function.
985bdd1243dSDimitry Andric   SmallVector<Register, 1> ScratchSGPRs;
986bdd1243dSDimitry Andric   FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
987bdd1243dSDimitry Andric   if (!ScratchSGPRs.empty()) {
988bdd1243dSDimitry Andric     for (MachineBasicBlock &MBB : MF) {
989bdd1243dSDimitry Andric       for (MCPhysReg Reg : ScratchSGPRs)
990bdd1243dSDimitry Andric         MBB.addLiveIn(Reg);
991bdd1243dSDimitry Andric 
992bdd1243dSDimitry Andric       MBB.sortUniqueLiveIns();
993bdd1243dSDimitry Andric     }
9945f757f3fSDimitry Andric     if (!LiveUnits.empty()) {
995bdd1243dSDimitry Andric       for (MCPhysReg Reg : ScratchSGPRs)
9965f757f3fSDimitry Andric         LiveUnits.addReg(Reg);
997bdd1243dSDimitry Andric     }
998bdd1243dSDimitry Andric   }
999bdd1243dSDimitry Andric }
1000bdd1243dSDimitry Andric 
emitCSRSpillRestores(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,LiveRegUnits & LiveUnits,Register FrameReg,Register FramePtrRegScratchCopy) const1001bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillRestores(
1002bdd1243dSDimitry Andric     MachineFunction &MF, MachineBasicBlock &MBB,
10035f757f3fSDimitry Andric     MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
1004bdd1243dSDimitry Andric     Register FrameReg, Register FramePtrRegScratchCopy) const {
1005bdd1243dSDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1006bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1007bdd1243dSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
1008bdd1243dSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1009bdd1243dSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1010bdd1243dSDimitry Andric 
1011bdd1243dSDimitry Andric   for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1012bdd1243dSDimitry Andric     // Special handle FP restore:
1013bdd1243dSDimitry Andric     // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1014bdd1243dSDimitry Andric     // the FP value to a temporary register. The frame pointer should be
1015bdd1243dSDimitry Andric     // overwritten only at the end when all other spills are restored from
1016bdd1243dSDimitry Andric     // current frame.
1017bdd1243dSDimitry Andric     Register Reg =
1018bdd1243dSDimitry Andric         Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1019bdd1243dSDimitry Andric     if (!Reg)
1020bdd1243dSDimitry Andric       continue;
1021bdd1243dSDimitry Andric 
1022bdd1243dSDimitry Andric     PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
10235f757f3fSDimitry Andric                                     LiveUnits, FrameReg);
1024bdd1243dSDimitry Andric     SB.restore();
1025bdd1243dSDimitry Andric   }
1026bdd1243dSDimitry Andric 
1027bdd1243dSDimitry Andric   // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1028bdd1243dSDimitry Andric   // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1029bdd1243dSDimitry Andric   // this, we might end up flipping the EXEC bits twice.
1030bdd1243dSDimitry Andric   Register ScratchExecCopy;
1031bdd1243dSDimitry Andric   SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1032bdd1243dSDimitry Andric   FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1033bdd1243dSDimitry Andric   if (!WWMScratchRegs.empty())
1034bdd1243dSDimitry Andric     ScratchExecCopy =
10355f757f3fSDimitry Andric         buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1036bdd1243dSDimitry Andric                              /*IsProlog*/ false, /*EnableInactiveLanes*/ true);
1037bdd1243dSDimitry Andric 
1038bdd1243dSDimitry Andric   auto RestoreWWMRegisters =
1039bdd1243dSDimitry Andric       [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
1040bdd1243dSDimitry Andric         for (const auto &Reg : WWMRegs) {
1041bdd1243dSDimitry Andric           Register VGPR = Reg.first;
1042bdd1243dSDimitry Andric           int FI = Reg.second;
10435f757f3fSDimitry Andric           buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1044bdd1243dSDimitry Andric                              VGPR, FI, FrameReg);
1045bdd1243dSDimitry Andric         }
1046bdd1243dSDimitry Andric       };
1047bdd1243dSDimitry Andric 
1048bdd1243dSDimitry Andric   RestoreWWMRegisters(WWMScratchRegs);
1049bdd1243dSDimitry Andric   if (!WWMCalleeSavedRegs.empty()) {
1050bdd1243dSDimitry Andric     if (ScratchExecCopy) {
1051bdd1243dSDimitry Andric       unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
105206c3fb27SDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1053bdd1243dSDimitry Andric     } else {
10545f757f3fSDimitry Andric       ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1055bdd1243dSDimitry Andric                                              /*IsProlog*/ false,
1056bdd1243dSDimitry Andric                                              /*EnableInactiveLanes*/ false);
1057bdd1243dSDimitry Andric     }
1058bdd1243dSDimitry Andric   }
1059bdd1243dSDimitry Andric 
1060bdd1243dSDimitry Andric   RestoreWWMRegisters(WWMCalleeSavedRegs);
1061bdd1243dSDimitry Andric   if (ScratchExecCopy) {
1062bdd1243dSDimitry Andric     // FIXME: Split block and make terminator.
1063bdd1243dSDimitry Andric     unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
106406c3fb27SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1065bdd1243dSDimitry Andric         .addReg(ScratchExecCopy, RegState::Kill);
1066bdd1243dSDimitry Andric   }
1067fe6060f1SDimitry Andric }
1068fe6060f1SDimitry Andric 
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const10690b57cec5SDimitry Andric void SIFrameLowering::emitPrologue(MachineFunction &MF,
10700b57cec5SDimitry Andric                                    MachineBasicBlock &MBB) const {
10710b57cec5SDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
10720b57cec5SDimitry Andric   if (FuncInfo->isEntryFunction()) {
10730b57cec5SDimitry Andric     emitEntryFunctionPrologue(MF, MBB);
10740b57cec5SDimitry Andric     return;
10750b57cec5SDimitry Andric   }
10760b57cec5SDimitry Andric 
107781ad6265SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
10780b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
10790b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
10800b57cec5SDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1081bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
10820b57cec5SDimitry Andric 
10835ffd83dbSDimitry Andric   Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
10845ffd83dbSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
10855ffd83dbSDimitry Andric   Register BasePtrReg =
10865ffd83dbSDimitry Andric       TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
10875f757f3fSDimitry Andric   LiveRegUnits LiveUnits;
10880b57cec5SDimitry Andric 
10890b57cec5SDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.begin();
1090bdd1243dSDimitry Andric   // DebugLoc must be unknown since the first instruction with DebugLoc is used
1091bdd1243dSDimitry Andric   // to determine the end of the prologue.
10920b57cec5SDimitry Andric   DebugLoc DL;
10930b57cec5SDimitry Andric 
10945f757f3fSDimitry Andric   if (FuncInfo->isChainFunction()) {
10955f757f3fSDimitry Andric     // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
10965f757f3fSDimitry Andric     // are free to set one up if they need it.
10975f757f3fSDimitry Andric     bool UseSP = requiresStackPointerReference(MF);
10985f757f3fSDimitry Andric     if (UseSP) {
10995f757f3fSDimitry Andric       assert(StackPtrReg != AMDGPU::SP_REG);
11005f757f3fSDimitry Andric 
11015f757f3fSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
11025f757f3fSDimitry Andric           .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));
11035f757f3fSDimitry Andric     }
11045f757f3fSDimitry Andric   }
11055f757f3fSDimitry Andric 
11060b57cec5SDimitry Andric   bool HasFP = false;
11075ffd83dbSDimitry Andric   bool HasBP = false;
11080b57cec5SDimitry Andric   uint32_t NumBytes = MFI.getStackSize();
11090b57cec5SDimitry Andric   uint32_t RoundedSize = NumBytes;
11105ffd83dbSDimitry Andric 
1111bdd1243dSDimitry Andric   if (TRI.hasStackRealignment(MF))
1112bdd1243dSDimitry Andric     HasFP = true;
1113fe6060f1SDimitry Andric 
1114bdd1243dSDimitry Andric   Register FramePtrRegScratchCopy;
1115bdd1243dSDimitry Andric   if (!HasFP && !hasFP(MF)) {
1116bdd1243dSDimitry Andric     // Emit the CSR spill stores with SP base register.
11175f757f3fSDimitry Andric     emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
11185f757f3fSDimitry Andric                        FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1119bdd1243dSDimitry Andric                        FramePtrRegScratchCopy);
1120bdd1243dSDimitry Andric   } else {
1121bdd1243dSDimitry Andric     // CSR spill stores will use FP as base register.
1122bdd1243dSDimitry Andric     Register SGPRForFPSaveRestoreCopy =
1123bdd1243dSDimitry Andric         FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1124fe6060f1SDimitry Andric 
11255f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1126bdd1243dSDimitry Andric     if (SGPRForFPSaveRestoreCopy) {
1127bdd1243dSDimitry Andric       // Copy FP to the scratch register now and emit the CFI entry. It avoids
1128bdd1243dSDimitry Andric       // the extra FP copy needed in the other two cases when FP is spilled to
1129bdd1243dSDimitry Andric       // memory or to a VGPR lane.
1130bdd1243dSDimitry Andric       PrologEpilogSGPRSpillBuilder SB(
1131bdd1243dSDimitry Andric           FramePtrReg,
1132bdd1243dSDimitry Andric           FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
11335f757f3fSDimitry Andric           DL, TII, TRI, LiveUnits, FramePtrReg);
1134bdd1243dSDimitry Andric       SB.save();
11355f757f3fSDimitry Andric       LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1136bdd1243dSDimitry Andric     } else {
1137bdd1243dSDimitry Andric       // Copy FP into a new scratch register so that its previous value can be
1138bdd1243dSDimitry Andric       // spilled after setting up the new frame.
1139bdd1243dSDimitry Andric       FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
11405f757f3fSDimitry Andric           MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1141bdd1243dSDimitry Andric       if (!FramePtrRegScratchCopy)
1142fe6060f1SDimitry Andric         report_fatal_error("failed to find free scratch register");
1143fe6060f1SDimitry Andric 
11445f757f3fSDimitry Andric       LiveUnits.addReg(FramePtrRegScratchCopy);
1145bdd1243dSDimitry Andric       BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1146bdd1243dSDimitry Andric           .addReg(FramePtrReg);
1147fe6060f1SDimitry Andric     }
11485ffd83dbSDimitry Andric   }
11495ffd83dbSDimitry Andric 
1150bdd1243dSDimitry Andric   if (HasFP) {
11515ffd83dbSDimitry Andric     const unsigned Alignment = MFI.getMaxAlign().value();
11520b57cec5SDimitry Andric 
11530b57cec5SDimitry Andric     RoundedSize += Alignment;
11545f757f3fSDimitry Andric     if (LiveUnits.empty()) {
11555f757f3fSDimitry Andric       LiveUnits.init(TRI);
11565f757f3fSDimitry Andric       LiveUnits.addLiveIns(MBB);
11570b57cec5SDimitry Andric     }
11580b57cec5SDimitry Andric 
1159fe6060f1SDimitry Andric     // s_add_i32 s33, s32, NumBytes
1160fe6060f1SDimitry Andric     // s_and_b32 s33, s33, 0b111...0000
1161fe6060f1SDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
11620b57cec5SDimitry Andric         .addReg(StackPtrReg)
1163e8d8bef9SDimitry Andric         .addImm((Alignment - 1) * getScratchScaleFactor(ST))
11640b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
1165349cc55cSDimitry Andric     auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1166fe6060f1SDimitry Andric         .addReg(FramePtrReg, RegState::Kill)
1167e8d8bef9SDimitry Andric         .addImm(-Alignment * getScratchScaleFactor(ST))
11680b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
1169349cc55cSDimitry Andric     And->getOperand(3).setIsDead(); // Mark SCC as dead.
11700b57cec5SDimitry Andric     FuncInfo->setIsStackRealigned(true);
11710b57cec5SDimitry Andric   } else if ((HasFP = hasFP(MF))) {
11725ffd83dbSDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
11735ffd83dbSDimitry Andric         .addReg(StackPtrReg)
11745ffd83dbSDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
11755ffd83dbSDimitry Andric   }
11765ffd83dbSDimitry Andric 
1177bdd1243dSDimitry Andric   // If FP is used, emit the CSR spills with FP base register.
1178bdd1243dSDimitry Andric   if (HasFP) {
11795f757f3fSDimitry Andric     emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1180bdd1243dSDimitry Andric                        FramePtrRegScratchCopy);
1181bdd1243dSDimitry Andric     if (FramePtrRegScratchCopy)
11825f757f3fSDimitry Andric       LiveUnits.removeReg(FramePtrRegScratchCopy);
1183bdd1243dSDimitry Andric   }
1184bdd1243dSDimitry Andric 
11850b57cec5SDimitry Andric   // If we need a base pointer, set it up here. It's whatever the value of
11860b57cec5SDimitry Andric   // the stack pointer is at this point. Any variable size objects will be
11870b57cec5SDimitry Andric   // allocated after this, so we can still use the base pointer to reference
11885ffd83dbSDimitry Andric   // the incoming arguments.
11895ffd83dbSDimitry Andric   if ((HasBP = TRI.hasBasePointer(MF))) {
11905ffd83dbSDimitry Andric     BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
11910b57cec5SDimitry Andric         .addReg(StackPtrReg)
11920b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
11930b57cec5SDimitry Andric   }
11940b57cec5SDimitry Andric 
11950b57cec5SDimitry Andric   if (HasFP && RoundedSize != 0) {
1196349cc55cSDimitry Andric     auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
11970b57cec5SDimitry Andric         .addReg(StackPtrReg)
1198e8d8bef9SDimitry Andric         .addImm(RoundedSize * getScratchScaleFactor(ST))
11990b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameSetup);
1200349cc55cSDimitry Andric     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12010b57cec5SDimitry Andric   }
12020b57cec5SDimitry Andric 
1203bdd1243dSDimitry Andric   bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1204bdd1243dSDimitry Andric   (void)FPSaved;
1205bdd1243dSDimitry Andric   assert((!HasFP || FPSaved) &&
12060b57cec5SDimitry Andric          "Needed to save FP but didn't save it anywhere");
12070b57cec5SDimitry Andric 
1208349cc55cSDimitry Andric   // If we allow spilling to AGPRs we may have saved FP but then spill
1209349cc55cSDimitry Andric   // everything into AGPRs instead of the stack.
1210bdd1243dSDimitry Andric   assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
12110b57cec5SDimitry Andric          "Saved FP but didn't need it");
12125ffd83dbSDimitry Andric 
1213bdd1243dSDimitry Andric   bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1214bdd1243dSDimitry Andric   (void)BPSaved;
1215bdd1243dSDimitry Andric   assert((!HasBP || BPSaved) &&
12165ffd83dbSDimitry Andric          "Needed to save BP but didn't save it anywhere");
12175ffd83dbSDimitry Andric 
1218bdd1243dSDimitry Andric   assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
12190b57cec5SDimitry Andric }
12200b57cec5SDimitry Andric 
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const12210b57cec5SDimitry Andric void SIFrameLowering::emitEpilogue(MachineFunction &MF,
12220b57cec5SDimitry Andric                                    MachineBasicBlock &MBB) const {
12230b57cec5SDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
12240b57cec5SDimitry Andric   if (FuncInfo->isEntryFunction())
12250b57cec5SDimitry Andric     return;
12260b57cec5SDimitry Andric 
12270b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
12280b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
12295ffd83dbSDimitry Andric   const SIRegisterInfo &TRI = TII->getRegisterInfo();
1230bdd1243dSDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
12315f757f3fSDimitry Andric   LiveRegUnits LiveUnits;
1232bdd1243dSDimitry Andric   // Get the insert location for the epilogue. If there were no terminators in
1233bdd1243dSDimitry Andric   // the block, get the last instruction.
1234bdd1243dSDimitry Andric   MachineBasicBlock::iterator MBBI = MBB.end();
12350b57cec5SDimitry Andric   DebugLoc DL;
1236bdd1243dSDimitry Andric   if (!MBB.empty()) {
1237bdd1243dSDimitry Andric     MBBI = MBB.getLastNonDebugInstr();
1238bdd1243dSDimitry Andric     if (MBBI != MBB.end())
1239bdd1243dSDimitry Andric       DL = MBBI->getDebugLoc();
1240bdd1243dSDimitry Andric 
1241bdd1243dSDimitry Andric     MBBI = MBB.getFirstTerminator();
1242bdd1243dSDimitry Andric   }
12430b57cec5SDimitry Andric 
12440b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
12450b57cec5SDimitry Andric   uint32_t NumBytes = MFI.getStackSize();
12465ffd83dbSDimitry Andric   uint32_t RoundedSize = FuncInfo->isStackRealigned()
12475ffd83dbSDimitry Andric                              ? NumBytes + MFI.getMaxAlign().value()
12485ffd83dbSDimitry Andric                              : NumBytes;
12495ffd83dbSDimitry Andric   const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1250bdd1243dSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1251bdd1243dSDimitry Andric   bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
12525ffd83dbSDimitry Andric 
1253bdd1243dSDimitry Andric   Register FramePtrRegScratchCopy;
1254bdd1243dSDimitry Andric   Register SGPRForFPSaveRestoreCopy =
1255bdd1243dSDimitry Andric       FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1256bdd1243dSDimitry Andric   if (FPSaved) {
1257bdd1243dSDimitry Andric     // CSR spill restores should use FP as base register. If
1258bdd1243dSDimitry Andric     // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1259bdd1243dSDimitry Andric     // into a new scratch register and copy to FP later when other registers are
1260bdd1243dSDimitry Andric     // restored from the current stack frame.
12615f757f3fSDimitry Andric     initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1262bdd1243dSDimitry Andric     if (SGPRForFPSaveRestoreCopy) {
12635f757f3fSDimitry Andric       LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1264bdd1243dSDimitry Andric     } else {
1265bdd1243dSDimitry Andric       FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
12665f757f3fSDimitry Andric           MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1267bdd1243dSDimitry Andric       if (!FramePtrRegScratchCopy)
1268bdd1243dSDimitry Andric         report_fatal_error("failed to find free scratch register");
1269bdd1243dSDimitry Andric 
12705f757f3fSDimitry Andric       LiveUnits.addReg(FramePtrRegScratchCopy);
1271bdd1243dSDimitry Andric     }
1272bdd1243dSDimitry Andric 
12735f757f3fSDimitry Andric     emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1274bdd1243dSDimitry Andric                          FramePtrRegScratchCopy);
1275bdd1243dSDimitry Andric   }
12760b57cec5SDimitry Andric 
12770b57cec5SDimitry Andric   if (RoundedSize != 0 && hasFP(MF)) {
1278349cc55cSDimitry Andric     auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
12790b57cec5SDimitry Andric         .addReg(StackPtrReg)
1280fe6060f1SDimitry Andric         .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
12810b57cec5SDimitry Andric         .setMIFlag(MachineInstr::FrameDestroy);
1282349cc55cSDimitry Andric     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12830b57cec5SDimitry Andric   }
12840b57cec5SDimitry Andric 
1285bdd1243dSDimitry Andric   if (FPSaved) {
1286bdd1243dSDimitry Andric     // Insert the copy to restore FP.
1287bdd1243dSDimitry Andric     Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1288bdd1243dSDimitry Andric                                                : FramePtrRegScratchCopy;
1289bdd1243dSDimitry Andric     MachineInstrBuilder MIB =
12905ffd83dbSDimitry Andric         BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1291bdd1243dSDimitry Andric             .addReg(SrcReg);
1292bdd1243dSDimitry Andric     if (SGPRForFPSaveRestoreCopy)
1293bdd1243dSDimitry Andric       MIB.setMIFlag(MachineInstr::FrameDestroy);
1294bdd1243dSDimitry Andric   } else {
1295bdd1243dSDimitry Andric     // Insert the CSR spill restores with SP as the base register.
12965f757f3fSDimitry Andric     emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1297bdd1243dSDimitry Andric                          FramePtrRegScratchCopy);
12980b57cec5SDimitry Andric   }
12990b57cec5SDimitry Andric }
13000b57cec5SDimitry Andric 
13010b57cec5SDimitry Andric #ifndef NDEBUG
allSGPRSpillsAreDead(const MachineFunction & MF)1302e8d8bef9SDimitry Andric static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1303e8d8bef9SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
1304e8d8bef9SDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13050b57cec5SDimitry Andric   for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
13060b57cec5SDimitry Andric        I != E; ++I) {
13070b57cec5SDimitry Andric     if (!MFI.isDeadObjectIndex(I) &&
13080b57cec5SDimitry Andric         MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1309bdd1243dSDimitry Andric         !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
13100b57cec5SDimitry Andric       return false;
13110b57cec5SDimitry Andric     }
13120b57cec5SDimitry Andric   }
13130b57cec5SDimitry Andric 
13140b57cec5SDimitry Andric   return true;
13150b57cec5SDimitry Andric }
13160b57cec5SDimitry Andric #endif
13170b57cec5SDimitry Andric 
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const1318e8d8bef9SDimitry Andric StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1319e8d8bef9SDimitry Andric                                                     int FI,
13205ffd83dbSDimitry Andric                                                     Register &FrameReg) const {
13210b57cec5SDimitry Andric   const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
13220b57cec5SDimitry Andric 
13230b57cec5SDimitry Andric   FrameReg = RI->getFrameRegister(MF);
1324e8d8bef9SDimitry Andric   return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
13250b57cec5SDimitry Andric }
13260b57cec5SDimitry Andric 
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const13270b57cec5SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameFinalized(
13280b57cec5SDimitry Andric   MachineFunction &MF,
13290b57cec5SDimitry Andric   RegScavenger *RS) const {
13300b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
13310b57cec5SDimitry Andric 
13320b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1333fe6060f1SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
13340b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
1335fe6060f1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
13360b57cec5SDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13370b57cec5SDimitry Andric 
1338bdd1243dSDimitry Andric   // Allocate spill slots for WWM reserved VGPRs.
13395f757f3fSDimitry Andric   // For chain functions, we only need to do this if we have calls to
13405f757f3fSDimitry Andric   // llvm.amdgcn.cs.chain.
13415f757f3fSDimitry Andric   bool IsChainWithoutCalls =
13425f757f3fSDimitry Andric       FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall();
13435f757f3fSDimitry Andric   if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) {
1344bdd1243dSDimitry Andric     for (Register Reg : FuncInfo->getWWMReservedRegs()) {
1345bdd1243dSDimitry Andric       const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1346bdd1243dSDimitry Andric       FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1347bdd1243dSDimitry Andric                                  TRI->getSpillAlign(*RC));
1348bdd1243dSDimitry Andric     }
134981ad6265SDimitry Andric   }
135081ad6265SDimitry Andric 
1351fe6060f1SDimitry Andric   const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1352fe6060f1SDimitry Andric                                && EnableSpillVGPRToAGPR;
1353fe6060f1SDimitry Andric 
1354fe6060f1SDimitry Andric   if (SpillVGPRToAGPR) {
1355fe6060f1SDimitry Andric     // To track the spill frame indices handled in this pass.
1356fe6060f1SDimitry Andric     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
13570eae32dcSDimitry Andric     BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1358fe6060f1SDimitry Andric 
1359fe6060f1SDimitry Andric     bool SeenDbgInstr = false;
1360fe6060f1SDimitry Andric 
1361fe6060f1SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
1362349cc55cSDimitry Andric       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
13630eae32dcSDimitry Andric         int FrameIndex;
1364fe6060f1SDimitry Andric         if (MI.isDebugInstr())
1365fe6060f1SDimitry Andric           SeenDbgInstr = true;
1366fe6060f1SDimitry Andric 
1367fe6060f1SDimitry Andric         if (TII->isVGPRSpill(MI)) {
1368fe6060f1SDimitry Andric           // Try to eliminate stack used by VGPR spills before frame
1369fe6060f1SDimitry Andric           // finalization.
1370fe6060f1SDimitry Andric           unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1371fe6060f1SDimitry Andric                                                      AMDGPU::OpName::vaddr);
1372fe6060f1SDimitry Andric           int FI = MI.getOperand(FIOp).getIndex();
1373fe6060f1SDimitry Andric           Register VReg =
1374fe6060f1SDimitry Andric             TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1375fe6060f1SDimitry Andric           if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1376fe6060f1SDimitry Andric                                                 TRI->isAGPR(MRI, VReg))) {
137706c3fb27SDimitry Andric             assert(RS != nullptr);
13785f757f3fSDimitry Andric             RS->enterBasicBlockEnd(MBB);
13795f757f3fSDimitry Andric             RS->backward(std::next(MI.getIterator()));
1380fe6060f1SDimitry Andric             TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1381fe6060f1SDimitry Andric             SpillFIs.set(FI);
1382fe6060f1SDimitry Andric             continue;
1383fe6060f1SDimitry Andric           }
13840eae32dcSDimitry Andric         } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
13850eae32dcSDimitry Andric                    TII->isLoadFromStackSlot(MI, FrameIndex))
138604eeddc0SDimitry Andric           if (!MFI.isFixedObjectIndex(FrameIndex))
13870eae32dcSDimitry Andric             NonVGPRSpillFIs.set(FrameIndex);
1388fe6060f1SDimitry Andric       }
1389fe6060f1SDimitry Andric     }
13900eae32dcSDimitry Andric 
139181ad6265SDimitry Andric     // Stack slot coloring may assign different objects to the same stack slot.
13920eae32dcSDimitry Andric     // If not, then the VGPR to AGPR spill slot is dead.
13930eae32dcSDimitry Andric     for (unsigned FI : SpillFIs.set_bits())
13940eae32dcSDimitry Andric       if (!NonVGPRSpillFIs.test(FI))
13950eae32dcSDimitry Andric         FuncInfo->setVGPRToAGPRSpillDead(FI);
1396fe6060f1SDimitry Andric 
1397fe6060f1SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
1398fe6060f1SDimitry Andric       for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1399fe6060f1SDimitry Andric         MBB.addLiveIn(Reg);
1400fe6060f1SDimitry Andric 
1401fe6060f1SDimitry Andric       for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1402fe6060f1SDimitry Andric         MBB.addLiveIn(Reg);
1403fe6060f1SDimitry Andric 
1404fe6060f1SDimitry Andric       MBB.sortUniqueLiveIns();
1405fe6060f1SDimitry Andric 
1406fe6060f1SDimitry Andric       if (!SpillFIs.empty() && SeenDbgInstr) {
1407fe6060f1SDimitry Andric         // FIXME: The dead frame indices are replaced with a null register from
1408fe6060f1SDimitry Andric         // the debug value instructions. We should instead, update it with the
1409fe6060f1SDimitry Andric         // correct register value. But not sure the register value alone is
1410fe6060f1SDimitry Andric         for (MachineInstr &MI : MBB) {
1411fe6060f1SDimitry Andric           if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1412bdd1243dSDimitry Andric               !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
1413fe6060f1SDimitry Andric               SpillFIs[MI.getOperand(0).getIndex()]) {
1414fe6060f1SDimitry Andric             MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1415fe6060f1SDimitry Andric           }
1416fe6060f1SDimitry Andric         }
1417fe6060f1SDimitry Andric       }
1418fe6060f1SDimitry Andric     }
1419fe6060f1SDimitry Andric   }
1420fe6060f1SDimitry Andric 
142181ad6265SDimitry Andric   // At this point we've already allocated all spilled SGPRs to VGPRs if we
142281ad6265SDimitry Andric   // can. Any remaining SGPR spills will go to memory, so move them back to the
142381ad6265SDimitry Andric   // default stack.
142481ad6265SDimitry Andric   bool HaveSGPRToVMemSpill =
142581ad6265SDimitry Andric       FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1426e8d8bef9SDimitry Andric   assert(allSGPRSpillsAreDead(MF) &&
14270b57cec5SDimitry Andric          "SGPR spill should have been removed in SILowerSGPRSpills");
14280b57cec5SDimitry Andric 
14290b57cec5SDimitry Andric   // FIXME: The other checks should be redundant with allStackObjectsAreDead,
14300b57cec5SDimitry Andric   // but currently hasNonSpillStackObjects is set only from source
14310b57cec5SDimitry Andric   // allocas. Stack temps produced from legalization are not counted currently.
14320b57cec5SDimitry Andric   if (!allStackObjectsAreDead(MFI)) {
14330b57cec5SDimitry Andric     assert(RS && "RegScavenger required if spilling");
14340b57cec5SDimitry Andric 
1435fe6060f1SDimitry Andric     // Add an emergency spill slot
1436fe6060f1SDimitry Andric     RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
143781ad6265SDimitry Andric 
143881ad6265SDimitry Andric     // If we are spilling SGPRs to memory with a large frame, we may need a
143981ad6265SDimitry Andric     // second VGPR emergency frame index.
144081ad6265SDimitry Andric     if (HaveSGPRToVMemSpill &&
144181ad6265SDimitry Andric         allocateScavengingFrameIndexesNearIncomingSP(MF)) {
144281ad6265SDimitry Andric       RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
144381ad6265SDimitry Andric     }
144481ad6265SDimitry Andric   }
144581ad6265SDimitry Andric }
144681ad6265SDimitry Andric 
processFunctionBeforeFrameIndicesReplaced(MachineFunction & MF,RegScavenger * RS) const144781ad6265SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
144881ad6265SDimitry Andric     MachineFunction &MF, RegScavenger *RS) const {
144981ad6265SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
145081ad6265SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
145181ad6265SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
145281ad6265SDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
145381ad6265SDimitry Andric 
145481ad6265SDimitry Andric   if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
145581ad6265SDimitry Andric     // On gfx908, we had initially reserved highest available VGPR for AGPR
145681ad6265SDimitry Andric     // copy. Now since we are done with RA, check if there exist an unused VGPR
145781ad6265SDimitry Andric     // which is lower than the eariler reserved VGPR before RA. If one exist,
145881ad6265SDimitry Andric     // use it for AGPR copy instead of one reserved before RA.
145981ad6265SDimitry Andric     Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
146081ad6265SDimitry Andric     Register UnusedLowVGPR =
146181ad6265SDimitry Andric         TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
146281ad6265SDimitry Andric     if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
146381ad6265SDimitry Andric                           TRI->getHWRegIndex(VGPRForAGPRCopy))) {
146406c3fb27SDimitry Andric       // Reserve this newly identified VGPR (for AGPR copy)
146506c3fb27SDimitry Andric       // reserved registers should already be frozen at this point
146606c3fb27SDimitry Andric       // so we can avoid calling MRI.freezeReservedRegs and just use
146706c3fb27SDimitry Andric       // MRI.reserveReg
146881ad6265SDimitry Andric       FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
146906c3fb27SDimitry Andric       MRI.reserveReg(UnusedLowVGPR, TRI);
147081ad6265SDimitry Andric     }
14710b57cec5SDimitry Andric   }
147206c3fb27SDimitry Andric   // We initally reserved the highest available SGPR pair for long branches
147306c3fb27SDimitry Andric   // now, after RA, we shift down to a lower unused one if one exists
147406c3fb27SDimitry Andric   Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
147506c3fb27SDimitry Andric   Register UnusedLowSGPR =
147606c3fb27SDimitry Andric       TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
147706c3fb27SDimitry Andric   // If LongBranchReservedReg is null then we didn't find a long branch
147806c3fb27SDimitry Andric   // and never reserved a register to begin with so there is nothing to
147906c3fb27SDimitry Andric   // shift down. Then if UnusedLowSGPR is null, there isn't available lower
148006c3fb27SDimitry Andric   // register to use so just keep the original one we set.
148106c3fb27SDimitry Andric   if (LongBranchReservedReg && UnusedLowSGPR) {
148206c3fb27SDimitry Andric     FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
148306c3fb27SDimitry Andric     MRI.reserveReg(UnusedLowSGPR, TRI);
148406c3fb27SDimitry Andric   }
14850b57cec5SDimitry Andric }
14860b57cec5SDimitry Andric 
1487bdd1243dSDimitry Andric // The special SGPR spills like the one needed for FP, BP or any reserved
1488bdd1243dSDimitry Andric // registers delayed until frame lowering.
determinePrologEpilogSGPRSaves(MachineFunction & MF,BitVector & SavedVGPRs,bool NeedExecCopyReservedReg) const1489bdd1243dSDimitry Andric void SIFrameLowering::determinePrologEpilogSGPRSaves(
149006c3fb27SDimitry Andric     MachineFunction &MF, BitVector &SavedVGPRs,
149106c3fb27SDimitry Andric     bool NeedExecCopyReservedReg) const {
14925ffd83dbSDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
149306c3fb27SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
1494bdd1243dSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
14950b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
14960b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
14975f757f3fSDimitry Andric   LiveRegUnits LiveUnits;
14985f757f3fSDimitry Andric   LiveUnits.init(*TRI);
1499bdd1243dSDimitry Andric   // Initially mark callee saved registers as used so we will not choose them
1500bdd1243dSDimitry Andric   // while looking for scratch SGPRs.
1501bdd1243dSDimitry Andric   const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1502bdd1243dSDimitry Andric   for (unsigned I = 0; CSRegs[I]; ++I)
15035f757f3fSDimitry Andric     LiveUnits.addReg(CSRegs[I]);
15040b57cec5SDimitry Andric 
150506c3fb27SDimitry Andric   const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
150606c3fb27SDimitry Andric 
150706c3fb27SDimitry Andric   if (NeedExecCopyReservedReg) {
150806c3fb27SDimitry Andric     Register ReservedReg = MFI->getSGPRForEXECCopy();
150906c3fb27SDimitry Andric     assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
15105f757f3fSDimitry Andric     Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
151106c3fb27SDimitry Andric     if (UnusedScratchReg) {
151206c3fb27SDimitry Andric       // If found any unused scratch SGPR, reserve the register itself for Exec
151306c3fb27SDimitry Andric       // copy and there is no need for any spills in that case.
151406c3fb27SDimitry Andric       MFI->setSGPRForEXECCopy(UnusedScratchReg);
15155f757f3fSDimitry Andric       LiveUnits.addReg(UnusedScratchReg);
151606c3fb27SDimitry Andric     } else {
151706c3fb27SDimitry Andric       // Needs spill.
151806c3fb27SDimitry Andric       assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
151906c3fb27SDimitry Andric              "Re-reserving spill slot for EXEC copy register");
15205f757f3fSDimitry Andric       getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedReg, RC,
152106c3fb27SDimitry Andric                                      /*IncludeScratchCopy=*/false);
152206c3fb27SDimitry Andric     }
152306c3fb27SDimitry Andric   }
152406c3fb27SDimitry Andric 
15250b57cec5SDimitry Andric   // hasFP only knows about stack objects that already exist. We're now
15260b57cec5SDimitry Andric   // determining the stack slots that will be created, so we have to predict
15270b57cec5SDimitry Andric   // them. Stack objects force FP usage with calls.
15280b57cec5SDimitry Andric   //
15290b57cec5SDimitry Andric   // Note a new VGPR CSR may be introduced if one is used for the spill, but we
15300b57cec5SDimitry Andric   // don't want to report it here.
15310b57cec5SDimitry Andric   //
15320b57cec5SDimitry Andric   // FIXME: Is this really hasReservedCallFrame?
15330b57cec5SDimitry Andric   const bool WillHaveFP =
15340b57cec5SDimitry Andric       FrameInfo.hasCalls() &&
15350b57cec5SDimitry Andric       (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
15360b57cec5SDimitry Andric 
15375ffd83dbSDimitry Andric   if (WillHaveFP || hasFP(MF)) {
1538bdd1243dSDimitry Andric     Register FramePtrReg = MFI->getFrameOffsetReg();
1539bdd1243dSDimitry Andric     assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1540e8d8bef9SDimitry Andric            "Re-reserving spill slot for FP");
15415f757f3fSDimitry Andric     getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
15420b57cec5SDimitry Andric   }
15430b57cec5SDimitry Andric 
15445ffd83dbSDimitry Andric   if (TRI->hasBasePointer(MF)) {
1545bdd1243dSDimitry Andric     Register BasePtrReg = TRI->getBaseRegister();
1546bdd1243dSDimitry Andric     assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1547bdd1243dSDimitry Andric            "Re-reserving spill slot for BP");
15485f757f3fSDimitry Andric     getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1549bdd1243dSDimitry Andric   }
1550bdd1243dSDimitry Andric }
1551e8d8bef9SDimitry Andric 
1552bdd1243dSDimitry Andric // Only report VGPRs to generic code.
determineCalleeSaves(MachineFunction & MF,BitVector & SavedVGPRs,RegScavenger * RS) const1553bdd1243dSDimitry Andric void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1554bdd1243dSDimitry Andric                                            BitVector &SavedVGPRs,
1555bdd1243dSDimitry Andric                                            RegScavenger *RS) const {
1556bdd1243dSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
15575f757f3fSDimitry Andric 
15585f757f3fSDimitry Andric   // If this is a function with the amdgpu_cs_chain[_preserve] calling
15595f757f3fSDimitry Andric   // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
15605f757f3fSDimitry Andric   // we don't need to save and restore anything.
15615f757f3fSDimitry Andric   if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
15625f757f3fSDimitry Andric     return;
15635f757f3fSDimitry Andric 
15647a6dacacSDimitry Andric   MFI->shiftSpillPhysVGPRsToLowestRange(MF);
15657a6dacacSDimitry Andric 
15665f757f3fSDimitry Andric   TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1567bdd1243dSDimitry Andric   if (MFI->isEntryFunction())
1568bdd1243dSDimitry Andric     return;
1569bdd1243dSDimitry Andric 
1570bdd1243dSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1571bdd1243dSDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
157206c3fb27SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
157306c3fb27SDimitry Andric   bool NeedExecCopyReservedReg = false;
1574bdd1243dSDimitry Andric 
157506c3fb27SDimitry Andric   MachineInstr *ReturnMI = nullptr;
1576bdd1243dSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
1577bdd1243dSDimitry Andric     for (MachineInstr &MI : MBB) {
1578bdd1243dSDimitry Andric       // WRITELANE instructions used for SGPR spills can overwrite the inactive
1579bdd1243dSDimitry Andric       // lanes of VGPRs and callee must spill and restore them even if they are
1580bdd1243dSDimitry Andric       // marked Caller-saved.
1581bdd1243dSDimitry Andric 
1582bdd1243dSDimitry Andric       // TODO: Handle this elsewhere at an early point. Walking through all MBBs
1583bdd1243dSDimitry Andric       // here would be a bad heuristic. A better way should be by calling
1584bdd1243dSDimitry Andric       // allocateWWMSpill during the regalloc pipeline whenever a physical
15855f757f3fSDimitry Andric       // register is allocated for the intended virtual registers.
15865f757f3fSDimitry Andric       if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
1587bdd1243dSDimitry Andric         MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
15885f757f3fSDimitry Andric       else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
1589bdd1243dSDimitry Andric         MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
159006c3fb27SDimitry Andric       else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
159106c3fb27SDimitry Andric         NeedExecCopyReservedReg = true;
159206c3fb27SDimitry Andric       else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
15935f757f3fSDimitry Andric                MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
15945f757f3fSDimitry Andric                (MFI->isChainFunction() &&
15955f757f3fSDimitry Andric                 TII->isChainCallOpcode(MI.getOpcode()))) {
159606c3fb27SDimitry Andric         // We expect all return to be the same size.
159706c3fb27SDimitry Andric         assert(!ReturnMI ||
159806c3fb27SDimitry Andric                (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
159906c3fb27SDimitry Andric                 count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
160006c3fb27SDimitry Andric         ReturnMI = &MI;
160106c3fb27SDimitry Andric       }
160206c3fb27SDimitry Andric     }
160306c3fb27SDimitry Andric   }
160406c3fb27SDimitry Andric 
160506c3fb27SDimitry Andric   // Remove any VGPRs used in the return value because these do not need to be saved.
160606c3fb27SDimitry Andric   // This prevents CSR restore from clobbering return VGPRs.
160706c3fb27SDimitry Andric   if (ReturnMI) {
160806c3fb27SDimitry Andric     for (auto &Op : ReturnMI->operands()) {
160906c3fb27SDimitry Andric       if (Op.isReg())
161006c3fb27SDimitry Andric         SavedVGPRs.reset(Op.getReg());
1611bdd1243dSDimitry Andric     }
1612bdd1243dSDimitry Andric   }
1613bdd1243dSDimitry Andric 
1614bdd1243dSDimitry Andric   // Ignore the SGPRs the default implementation found.
1615bdd1243dSDimitry Andric   SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1616bdd1243dSDimitry Andric 
1617bdd1243dSDimitry Andric   // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1618bdd1243dSDimitry Andric   // In gfx908 there was do AGPR loads and stores and thus spilling also
1619bdd1243dSDimitry Andric   // require a temporary VGPR.
1620bdd1243dSDimitry Andric   if (!ST.hasGFX90AInsts())
1621bdd1243dSDimitry Andric     SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1622bdd1243dSDimitry Andric 
162306c3fb27SDimitry Andric   determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1624bdd1243dSDimitry Andric 
1625bdd1243dSDimitry Andric   // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1626bdd1243dSDimitry Andric   // allow the default insertion to handle them.
1627bdd1243dSDimitry Andric   for (auto &Reg : MFI->getWWMSpills())
1628bdd1243dSDimitry Andric     SavedVGPRs.reset(Reg.first);
1629bdd1243dSDimitry Andric 
1630bdd1243dSDimitry Andric   // Mark all lane VGPRs as BB LiveIns.
1631bdd1243dSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
1632bdd1243dSDimitry Andric     for (auto &Reg : MFI->getWWMSpills())
1633bdd1243dSDimitry Andric       MBB.addLiveIn(Reg.first);
1634bdd1243dSDimitry Andric 
1635bdd1243dSDimitry Andric     MBB.sortUniqueLiveIns();
16360b57cec5SDimitry Andric   }
16370b57cec5SDimitry Andric }
16380b57cec5SDimitry Andric 
determineCalleeSavesSGPR(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const16390b57cec5SDimitry Andric void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
16400b57cec5SDimitry Andric                                                BitVector &SavedRegs,
16410b57cec5SDimitry Andric                                                RegScavenger *RS) const {
16420b57cec5SDimitry Andric   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
16430b57cec5SDimitry Andric   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
16440b57cec5SDimitry Andric   if (MFI->isEntryFunction())
16450b57cec5SDimitry Andric     return;
16460b57cec5SDimitry Andric 
16470b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
16480b57cec5SDimitry Andric   const SIRegisterInfo *TRI = ST.getRegisterInfo();
16490b57cec5SDimitry Andric 
16500b57cec5SDimitry Andric   // The SP is specifically managed and we don't want extra spills of it.
16510b57cec5SDimitry Andric   SavedRegs.reset(MFI->getStackPtrOffsetReg());
1652e8d8bef9SDimitry Andric 
1653e8d8bef9SDimitry Andric   const BitVector AllSavedRegs = SavedRegs;
1654fe6060f1SDimitry Andric   SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1655e8d8bef9SDimitry Andric 
1656349cc55cSDimitry Andric   // We have to anticipate introducing CSR VGPR spills or spill of caller
1657349cc55cSDimitry Andric   // save VGPR reserved for SGPR spills as we now always create stack entry
165804eeddc0SDimitry Andric   // for it, if we don't have any stack objects already, since we require a FP
165904eeddc0SDimitry Andric   // if there is a call and stack. We will allocate a VGPR for SGPR spills if
166004eeddc0SDimitry Andric   // there are any SGPR spills. Whether they are CSR spills or otherwise.
1661e8d8bef9SDimitry Andric   MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1662349cc55cSDimitry Andric   const bool WillHaveFP =
166304eeddc0SDimitry Andric       FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1664e8d8bef9SDimitry Andric 
1665e8d8bef9SDimitry Andric   // FP will be specially managed like SP.
1666e8d8bef9SDimitry Andric   if (WillHaveFP || hasFP(MF))
1667e8d8bef9SDimitry Andric     SavedRegs.reset(MFI->getFrameOffsetReg());
166881ad6265SDimitry Andric 
166981ad6265SDimitry Andric   // Return address use with return instruction is hidden through the SI_RETURN
167081ad6265SDimitry Andric   // pseudo. Given that and since the IPRA computes actual register usage and
167181ad6265SDimitry Andric   // does not use CSR list, the clobbering of return address by function calls
167281ad6265SDimitry Andric   // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
167381ad6265SDimitry Andric   // usage collection. This will ensure save/restore of return address happens
167481ad6265SDimitry Andric   // in those scenarios.
167581ad6265SDimitry Andric   const MachineRegisterInfo &MRI = MF.getRegInfo();
167681ad6265SDimitry Andric   Register RetAddrReg = TRI->getReturnAddressReg(MF);
167781ad6265SDimitry Andric   if (!MFI->isEntryFunction() &&
167881ad6265SDimitry Andric       (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
167981ad6265SDimitry Andric     SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
168081ad6265SDimitry Andric     SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
168181ad6265SDimitry Andric   }
16820b57cec5SDimitry Andric }
16830b57cec5SDimitry Andric 
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const16840b57cec5SDimitry Andric bool SIFrameLowering::assignCalleeSavedSpillSlots(
16850b57cec5SDimitry Andric     MachineFunction &MF, const TargetRegisterInfo *TRI,
16860b57cec5SDimitry Andric     std::vector<CalleeSavedInfo> &CSI) const {
16870b57cec5SDimitry Andric   if (CSI.empty())
16880b57cec5SDimitry Andric     return true; // Early exit if no callee saved registers are modified!
16890b57cec5SDimitry Andric 
16900b57cec5SDimitry Andric   const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
16915ffd83dbSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
16925ffd83dbSDimitry Andric   const SIRegisterInfo *RI = ST.getRegisterInfo();
16935ffd83dbSDimitry Andric   Register FramePtrReg = FuncInfo->getFrameOffsetReg();
16945ffd83dbSDimitry Andric   Register BasePtrReg = RI->getBaseRegister();
1695bdd1243dSDimitry Andric   Register SGPRForFPSaveRestoreCopy =
1696bdd1243dSDimitry Andric       FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1697bdd1243dSDimitry Andric   Register SGPRForBPSaveRestoreCopy =
1698bdd1243dSDimitry Andric       FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1699bdd1243dSDimitry Andric   if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1700bdd1243dSDimitry Andric     return false;
1701bdd1243dSDimitry Andric 
17025ffd83dbSDimitry Andric   unsigned NumModifiedRegs = 0;
17035ffd83dbSDimitry Andric 
1704bdd1243dSDimitry Andric   if (SGPRForFPSaveRestoreCopy)
17055ffd83dbSDimitry Andric     NumModifiedRegs++;
1706bdd1243dSDimitry Andric   if (SGPRForBPSaveRestoreCopy)
17075ffd83dbSDimitry Andric     NumModifiedRegs++;
17085ffd83dbSDimitry Andric 
17090b57cec5SDimitry Andric   for (auto &CS : CSI) {
1710bdd1243dSDimitry Andric     if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {
1711bdd1243dSDimitry Andric       CS.setDstReg(SGPRForFPSaveRestoreCopy);
17125ffd83dbSDimitry Andric       if (--NumModifiedRegs)
17135ffd83dbSDimitry Andric         break;
1714bdd1243dSDimitry Andric     } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {
1715bdd1243dSDimitry Andric       CS.setDstReg(SGPRForBPSaveRestoreCopy);
17165ffd83dbSDimitry Andric       if (--NumModifiedRegs)
17170b57cec5SDimitry Andric         break;
17180b57cec5SDimitry Andric     }
17190b57cec5SDimitry Andric   }
17200b57cec5SDimitry Andric 
17210b57cec5SDimitry Andric   return false;
17220b57cec5SDimitry Andric }
17230b57cec5SDimitry Andric 
allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction & MF) const17244824e7fdSDimitry Andric bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
17254824e7fdSDimitry Andric   const MachineFunction &MF) const {
17264824e7fdSDimitry Andric 
17274824e7fdSDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17284824e7fdSDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
17295f757f3fSDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
17304824e7fdSDimitry Andric   uint64_t EstStackSize = MFI.estimateStackSize(MF);
17314824e7fdSDimitry Andric   uint64_t MaxOffset = EstStackSize - 1;
17324824e7fdSDimitry Andric 
17334824e7fdSDimitry Andric   // We need the emergency stack slots to be allocated in range of the
17344824e7fdSDimitry Andric   // MUBUF/flat scratch immediate offset from the base register, so assign these
17354824e7fdSDimitry Andric   // first at the incoming SP position.
17364824e7fdSDimitry Andric   //
17374824e7fdSDimitry Andric   // TODO: We could try sorting the objects to find a hole in the first bytes
17384824e7fdSDimitry Andric   // rather than allocating as close to possible. This could save a lot of space
17394824e7fdSDimitry Andric   // on frames with alignment requirements.
17404824e7fdSDimitry Andric   if (ST.enableFlatScratch()) {
17414824e7fdSDimitry Andric     if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
17424824e7fdSDimitry Andric                                SIInstrFlags::FlatScratch))
17434824e7fdSDimitry Andric       return false;
17444824e7fdSDimitry Andric   } else {
17455f757f3fSDimitry Andric     if (TII->isLegalMUBUFImmOffset(MaxOffset))
17464824e7fdSDimitry Andric       return false;
17474824e7fdSDimitry Andric   }
17484824e7fdSDimitry Andric 
17494824e7fdSDimitry Andric   return true;
17504824e7fdSDimitry Andric }
17514824e7fdSDimitry Andric 
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const17520b57cec5SDimitry Andric MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
17530b57cec5SDimitry Andric   MachineFunction &MF,
17540b57cec5SDimitry Andric   MachineBasicBlock &MBB,
17550b57cec5SDimitry Andric   MachineBasicBlock::iterator I) const {
17560b57cec5SDimitry Andric   int64_t Amount = I->getOperand(0).getImm();
17570b57cec5SDimitry Andric   if (Amount == 0)
17580b57cec5SDimitry Andric     return MBB.erase(I);
17590b57cec5SDimitry Andric 
17600b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17610b57cec5SDimitry Andric   const SIInstrInfo *TII = ST.getInstrInfo();
17620b57cec5SDimitry Andric   const DebugLoc &DL = I->getDebugLoc();
17630b57cec5SDimitry Andric   unsigned Opc = I->getOpcode();
17640b57cec5SDimitry Andric   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
17650b57cec5SDimitry Andric   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
17660b57cec5SDimitry Andric 
17670b57cec5SDimitry Andric   if (!hasReservedCallFrame(MF)) {
17685ffd83dbSDimitry Andric     Amount = alignTo(Amount, getStackAlign());
17690b57cec5SDimitry Andric     assert(isUInt<32>(Amount) && "exceeded stack address space size");
17700b57cec5SDimitry Andric     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
17715ffd83dbSDimitry Andric     Register SPReg = MFI->getStackPtrOffsetReg();
17720b57cec5SDimitry Andric 
1773fe6060f1SDimitry Andric     Amount *= getScratchScaleFactor(ST);
1774fe6060f1SDimitry Andric     if (IsDestroy)
1775fe6060f1SDimitry Andric       Amount = -Amount;
1776349cc55cSDimitry Andric     auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
17770b57cec5SDimitry Andric         .addReg(SPReg)
1778fe6060f1SDimitry Andric         .addImm(Amount);
1779349cc55cSDimitry Andric     Add->getOperand(3).setIsDead(); // Mark SCC as dead.
17800b57cec5SDimitry Andric   } else if (CalleePopAmount != 0) {
17810b57cec5SDimitry Andric     llvm_unreachable("is this used?");
17820b57cec5SDimitry Andric   }
17830b57cec5SDimitry Andric 
17840b57cec5SDimitry Andric   return MBB.erase(I);
17850b57cec5SDimitry Andric }
17860b57cec5SDimitry Andric 
1787e8d8bef9SDimitry Andric /// Returns true if the frame will require a reference to the stack pointer.
1788e8d8bef9SDimitry Andric ///
1789e8d8bef9SDimitry Andric /// This is the set of conditions common to setting up the stack pointer in a
1790e8d8bef9SDimitry Andric /// kernel, and for using a frame pointer in a callable function.
1791e8d8bef9SDimitry Andric ///
1792e8d8bef9SDimitry Andric /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1793e8d8bef9SDimitry Andric /// references SP.
frameTriviallyRequiresSP(const MachineFrameInfo & MFI)1794e8d8bef9SDimitry Andric static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1795e8d8bef9SDimitry Andric   return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1796e8d8bef9SDimitry Andric }
1797e8d8bef9SDimitry Andric 
1798e8d8bef9SDimitry Andric // The FP for kernels is always known 0, so we never really need to setup an
1799e8d8bef9SDimitry Andric // explicit register for it. However, DisableFramePointerElim will force us to
1800e8d8bef9SDimitry Andric // use a register for it.
hasFP(const MachineFunction & MF) const18010b57cec5SDimitry Andric bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
18020b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
18035ffd83dbSDimitry Andric 
18045f757f3fSDimitry Andric   // For entry & chain functions we can use an immediate offset in most cases,
18055f757f3fSDimitry Andric   // so the presence of calls doesn't imply we need a distinct frame pointer.
18065ffd83dbSDimitry Andric   if (MFI.hasCalls() &&
18075f757f3fSDimitry Andric       !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
18085f757f3fSDimitry Andric       !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) {
18090b57cec5SDimitry Andric     // All offsets are unsigned, so need to be addressed in the same direction
18100b57cec5SDimitry Andric     // as stack growth.
18110b57cec5SDimitry Andric 
18120b57cec5SDimitry Andric     // FIXME: This function is pretty broken, since it can be called before the
18130b57cec5SDimitry Andric     // frame layout is determined or CSR spills are inserted.
18145ffd83dbSDimitry Andric     return MFI.getStackSize() != 0;
18150b57cec5SDimitry Andric   }
18160b57cec5SDimitry Andric 
1817e8d8bef9SDimitry Andric   return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1818fe6060f1SDimitry Andric          MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1819fe6060f1SDimitry Andric              MF) ||
18200b57cec5SDimitry Andric          MF.getTarget().Options.DisableFramePointerElim(MF);
18210b57cec5SDimitry Andric }
1822e8d8bef9SDimitry Andric 
1823e8d8bef9SDimitry Andric // This is essentially a reduced version of hasFP for entry functions. Since the
1824e8d8bef9SDimitry Andric // stack pointer is known 0 on entry to kernels, we never really need an FP
1825e8d8bef9SDimitry Andric // register. We may need to initialize the stack pointer depending on the frame
1826e8d8bef9SDimitry Andric // properties, which logically overlaps many of the cases where an ordinary
1827e8d8bef9SDimitry Andric // function would require an FP.
18285f757f3fSDimitry Andric // Also used for chain functions. While not technically entry functions, chain
18295f757f3fSDimitry Andric // functions may need to set up a stack pointer in some situations.
requiresStackPointerReference(const MachineFunction & MF) const1830e8d8bef9SDimitry Andric bool SIFrameLowering::requiresStackPointerReference(
1831e8d8bef9SDimitry Andric     const MachineFunction &MF) const {
1832e8d8bef9SDimitry Andric   // Callable functions always require a stack pointer reference.
18335f757f3fSDimitry Andric   assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() ||
18345f757f3fSDimitry Andric           MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) &&
18355f757f3fSDimitry Andric          "only expected to call this for entry points and chain functions");
1836e8d8bef9SDimitry Andric 
1837e8d8bef9SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
1838e8d8bef9SDimitry Andric 
1839e8d8bef9SDimitry Andric   // Entry points ordinarily don't need to initialize SP. We have to set it up
1840e8d8bef9SDimitry Andric   // for callees if there are any. Also note tail calls are impossible/don't
1841e8d8bef9SDimitry Andric   // make any sense for kernels.
1842e8d8bef9SDimitry Andric   if (MFI.hasCalls())
1843e8d8bef9SDimitry Andric     return true;
1844e8d8bef9SDimitry Andric 
1845e8d8bef9SDimitry Andric   // We still need to initialize the SP if we're doing anything weird that
1846e8d8bef9SDimitry Andric   // references the SP, like variable sized stack objects.
1847e8d8bef9SDimitry Andric   return frameTriviallyRequiresSP(MFI);
1848e8d8bef9SDimitry Andric }
1849