10b57cec5SDimitry Andric //===----------------------- SIFrameLowering.cpp --------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //==-----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "SIFrameLowering.h"
10e8d8bef9SDimitry Andric #include "AMDGPU.h"
11e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
120b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
13e8d8bef9SDimitry Andric #include "SIMachineFunctionInfo.h"
145f757f3fSDimitry Andric #include "llvm/CodeGen/LiveRegUnits.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
17e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
180b57cec5SDimitry Andric
190b57cec5SDimitry Andric using namespace llvm;
200b57cec5SDimitry Andric
210b57cec5SDimitry Andric #define DEBUG_TYPE "frame-info"
220b57cec5SDimitry Andric
23fe6060f1SDimitry Andric static cl::opt<bool> EnableSpillVGPRToAGPR(
24fe6060f1SDimitry Andric "amdgpu-spill-vgpr-to-agpr",
25fe6060f1SDimitry Andric cl::desc("Enable spilling VGPRs to AGPRs"),
26fe6060f1SDimitry Andric cl::ReallyHidden,
27fe6060f1SDimitry Andric cl::init(true));
280b57cec5SDimitry Andric
295f757f3fSDimitry Andric // Find a register matching \p RC from \p LiveUnits which is unused and
305f757f3fSDimitry Andric // available throughout the function. On failure, returns AMDGPU::NoRegister.
315f757f3fSDimitry Andric // TODO: Rewrite the loop here to iterate over MCRegUnits instead of
325f757f3fSDimitry Andric // MCRegisters. This should reduce the number of iterations and avoid redundant
335f757f3fSDimitry Andric // checking.
findUnusedRegister(MachineRegisterInfo & MRI,const LiveRegUnits & LiveUnits,const TargetRegisterClass & RC)34bdd1243dSDimitry Andric static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
355f757f3fSDimitry Andric const LiveRegUnits &LiveUnits,
36bdd1243dSDimitry Andric const TargetRegisterClass &RC) {
37bdd1243dSDimitry Andric for (MCRegister Reg : RC) {
385f757f3fSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
395f757f3fSDimitry Andric !MRI.isReserved(Reg))
40bdd1243dSDimitry Andric return Reg;
41bdd1243dSDimitry Andric }
42bdd1243dSDimitry Andric return MCRegister();
43bdd1243dSDimitry Andric }
44bdd1243dSDimitry Andric
45fe6060f1SDimitry Andric // Find a scratch register that we can use in the prologue. We avoid using
46fe6060f1SDimitry Andric // callee-save registers since they may appear to be free when this is called
47fe6060f1SDimitry Andric // from canUseAsPrologue (during shrink wrapping), but then no longer be free
48fe6060f1SDimitry Andric // when this is called from emitPrologue.
findScratchNonCalleeSaveRegister(MachineRegisterInfo & MRI,LiveRegUnits & LiveUnits,const TargetRegisterClass & RC,bool Unused=false)495f757f3fSDimitry Andric static MCRegister findScratchNonCalleeSaveRegister(
505f757f3fSDimitry Andric MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
515f757f3fSDimitry Andric const TargetRegisterClass &RC, bool Unused = false) {
520b57cec5SDimitry Andric // Mark callee saved registers as used so we will not choose them.
530b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
540b57cec5SDimitry Andric for (unsigned i = 0; CSRegs[i]; ++i)
555f757f3fSDimitry Andric LiveUnits.addReg(CSRegs[i]);
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric // We are looking for a register that can be used throughout the entire
580b57cec5SDimitry Andric // function, so any use is unacceptable.
59bdd1243dSDimitry Andric if (Unused)
605f757f3fSDimitry Andric return findUnusedRegister(MRI, LiveUnits, RC);
61bdd1243dSDimitry Andric
625ffd83dbSDimitry Andric for (MCRegister Reg : RC) {
635f757f3fSDimitry Andric if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
640b57cec5SDimitry Andric return Reg;
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric
675ffd83dbSDimitry Andric return MCRegister();
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric
7006c3fb27SDimitry Andric /// Query target location for spilling SGPRs
7106c3fb27SDimitry Andric /// \p IncludeScratchCopy : Also look for free scratch SGPRs
getVGPRSpillLaneOrTempRegister(MachineFunction & MF,LiveRegUnits & LiveUnits,Register SGPR,const TargetRegisterClass & RC=AMDGPU::SReg_32_XM0_XEXECRegClass,bool IncludeScratchCopy=true)72bdd1243dSDimitry Andric static void getVGPRSpillLaneOrTempRegister(
735f757f3fSDimitry Andric MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
7406c3fb27SDimitry Andric const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
7506c3fb27SDimitry Andric bool IncludeScratchCopy = true) {
765ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
775ffd83dbSDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
785ffd83dbSDimitry Andric
795ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
805ffd83dbSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
81bdd1243dSDimitry Andric unsigned Size = TRI->getSpillSize(RC);
82bdd1243dSDimitry Andric Align Alignment = TRI->getSpillAlign(RC);
835ffd83dbSDimitry Andric
84bdd1243dSDimitry Andric // We need to save and restore the given SGPR.
855ffd83dbSDimitry Andric
8606c3fb27SDimitry Andric Register ScratchSGPR;
875f757f3fSDimitry Andric // 1: Try to save the given register into an unused scratch SGPR. The
885f757f3fSDimitry Andric // LiveUnits should have all the callee saved registers marked as used. For
895f757f3fSDimitry Andric // certain cases we skip copy to scratch SGPR.
9006c3fb27SDimitry Andric if (IncludeScratchCopy)
915f757f3fSDimitry Andric ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);
92bdd1243dSDimitry Andric
93bdd1243dSDimitry Andric if (!ScratchSGPR) {
94bdd1243dSDimitry Andric int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
955ffd83dbSDimitry Andric TargetStackID::SGPRSpill);
965ffd83dbSDimitry Andric
97bdd1243dSDimitry Andric if (TRI->spillSGPRToVGPR() &&
987a6dacacSDimitry Andric MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,
997a6dacacSDimitry Andric /*IsPrologEpilog=*/true)) {
100bdd1243dSDimitry Andric // 2: There's no free lane to spill, and no free register to save the
101bdd1243dSDimitry Andric // SGPR, so we're forced to take another VGPR to use for the spill.
102bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills(
103bdd1243dSDimitry Andric SGPR, PrologEpilogSGPRSaveRestoreInfo(
104bdd1243dSDimitry Andric SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
105e8d8bef9SDimitry Andric
1065f757f3fSDimitry Andric LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
107bdd1243dSDimitry Andric dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
1085f757f3fSDimitry Andric << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
1095f757f3fSDimitry Andric << '\n';);
1105ffd83dbSDimitry Andric } else {
111bdd1243dSDimitry Andric // Remove dead <FI> index
112bdd1243dSDimitry Andric MF.getFrameInfo().RemoveStackObject(FI);
113bdd1243dSDimitry Andric // 3: If all else fails, spill the register to memory.
114bdd1243dSDimitry Andric FI = FrameInfo.CreateSpillStackObject(Size, Alignment);
115bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills(
116bdd1243dSDimitry Andric SGPR,
117bdd1243dSDimitry Andric PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));
118bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "
119bdd1243dSDimitry Andric << printReg(SGPR, TRI) << '\n');
1205ffd83dbSDimitry Andric }
1215ffd83dbSDimitry Andric } else {
122bdd1243dSDimitry Andric MFI->addToPrologEpilogSGPRSpills(
123bdd1243dSDimitry Andric SGPR, PrologEpilogSGPRSaveRestoreInfo(
124bdd1243dSDimitry Andric SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
1255f757f3fSDimitry Andric LiveUnits.addReg(ScratchSGPR);
126bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
127bdd1243dSDimitry Andric << printReg(ScratchSGPR, TRI) << '\n');
1285ffd83dbSDimitry Andric }
1290b57cec5SDimitry Andric }
1300b57cec5SDimitry Andric
1310b57cec5SDimitry Andric // We need to specially emit stack operations here because a different frame
1320b57cec5SDimitry Andric // register is used than in the rest of the function, as getFrameRegister would
1330b57cec5SDimitry Andric // use.
buildPrologSpill(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register SpillReg,int FI,Register FrameReg,int64_t DwordOff=0)134fe6060f1SDimitry Andric static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
135fe6060f1SDimitry Andric const SIMachineFunctionInfo &FuncInfo,
1365f757f3fSDimitry Andric LiveRegUnits &LiveUnits, MachineFunction &MF,
137e8d8bef9SDimitry Andric MachineBasicBlock &MBB,
138349cc55cSDimitry Andric MachineBasicBlock::iterator I, const DebugLoc &DL,
139bdd1243dSDimitry Andric Register SpillReg, int FI, Register FrameReg,
140bdd1243dSDimitry Andric int64_t DwordOff = 0) {
141fe6060f1SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
142fe6060f1SDimitry Andric : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1430b57cec5SDimitry Andric
144fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
145fe6060f1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
146fe6060f1SDimitry Andric MachineMemOperand *MMO = MF.getMachineMemOperand(
147fe6060f1SDimitry Andric PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
148fe6060f1SDimitry Andric FrameInfo.getObjectAlign(FI));
1495f757f3fSDimitry Andric LiveUnits.addReg(SpillReg);
150bdd1243dSDimitry Andric bool IsKill = !MBB.isLiveIn(SpillReg);
151bdd1243dSDimitry Andric TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
1525f757f3fSDimitry Andric DwordOff, MMO, nullptr, &LiveUnits);
153bdd1243dSDimitry Andric if (IsKill)
1545f757f3fSDimitry Andric LiveUnits.removeReg(SpillReg);
155e8d8bef9SDimitry Andric }
156e8d8bef9SDimitry Andric
buildEpilogRestore(const GCNSubtarget & ST,const SIRegisterInfo & TRI,const SIMachineFunctionInfo & FuncInfo,LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register SpillReg,int FI,Register FrameReg,int64_t DwordOff=0)157fe6060f1SDimitry Andric static void buildEpilogRestore(const GCNSubtarget &ST,
158fe6060f1SDimitry Andric const SIRegisterInfo &TRI,
159fe6060f1SDimitry Andric const SIMachineFunctionInfo &FuncInfo,
1605f757f3fSDimitry Andric LiveRegUnits &LiveUnits, MachineFunction &MF,
161e8d8bef9SDimitry Andric MachineBasicBlock &MBB,
162349cc55cSDimitry Andric MachineBasicBlock::iterator I,
163bdd1243dSDimitry Andric const DebugLoc &DL, Register SpillReg, int FI,
164bdd1243dSDimitry Andric Register FrameReg, int64_t DwordOff = 0) {
165fe6060f1SDimitry Andric unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
166fe6060f1SDimitry Andric : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1670b57cec5SDimitry Andric
168fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
169fe6060f1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
170fe6060f1SDimitry Andric MachineMemOperand *MMO = MF.getMachineMemOperand(
171fe6060f1SDimitry Andric PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
172fe6060f1SDimitry Andric FrameInfo.getObjectAlign(FI));
173bdd1243dSDimitry Andric TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
1745f757f3fSDimitry Andric DwordOff, MMO, nullptr, &LiveUnits);
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric
buildGitPtr(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,const SIInstrInfo * TII,Register TargetReg)177e8d8bef9SDimitry Andric static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
178e8d8bef9SDimitry Andric const DebugLoc &DL, const SIInstrInfo *TII,
179e8d8bef9SDimitry Andric Register TargetReg) {
180e8d8bef9SDimitry Andric MachineFunction *MF = MBB.getParent();
181e8d8bef9SDimitry Andric const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
182e8d8bef9SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
183e8d8bef9SDimitry Andric const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
184e8d8bef9SDimitry Andric Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);
185e8d8bef9SDimitry Andric Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);
186e8d8bef9SDimitry Andric
187e8d8bef9SDimitry Andric if (MFI->getGITPtrHigh() != 0xffffffff) {
188e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, SMovB32, TargetHi)
189e8d8bef9SDimitry Andric .addImm(MFI->getGITPtrHigh())
190e8d8bef9SDimitry Andric .addReg(TargetReg, RegState::ImplicitDefine);
191e8d8bef9SDimitry Andric } else {
1927a6dacacSDimitry Andric const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);
193e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, GetPC64, TargetReg);
194e8d8bef9SDimitry Andric }
195e8d8bef9SDimitry Andric Register GitPtrLo = MFI->getGITPtrLoReg(*MF);
196e8d8bef9SDimitry Andric MF->getRegInfo().addLiveIn(GitPtrLo);
197e8d8bef9SDimitry Andric MBB.addLiveIn(GitPtrLo);
198e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, SMovB32, TargetLo)
199e8d8bef9SDimitry Andric .addReg(GitPtrLo);
200e8d8bef9SDimitry Andric }
201e8d8bef9SDimitry Andric
initLiveUnits(LiveRegUnits & LiveUnits,const SIRegisterInfo & TRI,const SIMachineFunctionInfo * FuncInfo,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,bool IsProlog)2025f757f3fSDimitry Andric static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
203bdd1243dSDimitry Andric const SIMachineFunctionInfo *FuncInfo,
204bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB,
205bdd1243dSDimitry Andric MachineBasicBlock::iterator MBBI, bool IsProlog) {
2065f757f3fSDimitry Andric if (LiveUnits.empty()) {
2075f757f3fSDimitry Andric LiveUnits.init(TRI);
208bdd1243dSDimitry Andric if (IsProlog) {
2095f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB);
210bdd1243dSDimitry Andric } else {
211bdd1243dSDimitry Andric // In epilog.
2125f757f3fSDimitry Andric LiveUnits.addLiveOuts(MBB);
2135f757f3fSDimitry Andric LiveUnits.stepBackward(*MBBI);
214bdd1243dSDimitry Andric }
215bdd1243dSDimitry Andric }
216bdd1243dSDimitry Andric }
217bdd1243dSDimitry Andric
218bdd1243dSDimitry Andric namespace llvm {
219bdd1243dSDimitry Andric
220bdd1243dSDimitry Andric // SpillBuilder to save/restore special SGPR spills like the one needed for FP,
221bdd1243dSDimitry Andric // BP, etc. These spills are delayed until the current function's frame is
222bdd1243dSDimitry Andric // finalized. For a given register, the builder uses the
223bdd1243dSDimitry Andric // PrologEpilogSGPRSaveRestoreInfo to decide the spill method.
224bdd1243dSDimitry Andric class PrologEpilogSGPRSpillBuilder {
225bdd1243dSDimitry Andric MachineBasicBlock::iterator MI;
226bdd1243dSDimitry Andric MachineBasicBlock &MBB;
227bdd1243dSDimitry Andric MachineFunction &MF;
228bdd1243dSDimitry Andric const GCNSubtarget &ST;
229bdd1243dSDimitry Andric MachineFrameInfo &MFI;
230bdd1243dSDimitry Andric SIMachineFunctionInfo *FuncInfo;
231bdd1243dSDimitry Andric const SIInstrInfo *TII;
232bdd1243dSDimitry Andric const SIRegisterInfo &TRI;
233bdd1243dSDimitry Andric Register SuperReg;
234bdd1243dSDimitry Andric const PrologEpilogSGPRSaveRestoreInfo SI;
2355f757f3fSDimitry Andric LiveRegUnits &LiveUnits;
236bdd1243dSDimitry Andric const DebugLoc &DL;
237bdd1243dSDimitry Andric Register FrameReg;
238bdd1243dSDimitry Andric ArrayRef<int16_t> SplitParts;
239bdd1243dSDimitry Andric unsigned NumSubRegs;
240bdd1243dSDimitry Andric unsigned EltSize = 4;
241bdd1243dSDimitry Andric
saveToMemory(const int FI) const242bdd1243dSDimitry Andric void saveToMemory(const int FI) const {
243bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
244bdd1243dSDimitry Andric assert(!MFI.isDeadObjectIndex(FI));
245bdd1243dSDimitry Andric
2465f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
247bdd1243dSDimitry Andric
248bdd1243dSDimitry Andric MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2495f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
250bdd1243dSDimitry Andric if (!TmpVGPR)
251bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register");
252bdd1243dSDimitry Andric
253bdd1243dSDimitry Andric for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
254bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
255bdd1243dSDimitry Andric ? SuperReg
256bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
257bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
258bdd1243dSDimitry Andric .addReg(SubReg);
259bdd1243dSDimitry Andric
2605f757f3fSDimitry Andric buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
261bdd1243dSDimitry Andric FI, FrameReg, DwordOff);
262bdd1243dSDimitry Andric DwordOff += 4;
263bdd1243dSDimitry Andric }
264bdd1243dSDimitry Andric }
265bdd1243dSDimitry Andric
saveToVGPRLane(const int FI) const266bdd1243dSDimitry Andric void saveToVGPRLane(const int FI) const {
267bdd1243dSDimitry Andric assert(!MFI.isDeadObjectIndex(FI));
268bdd1243dSDimitry Andric
269bdd1243dSDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
270bdd1243dSDimitry Andric ArrayRef<SIRegisterInfo::SpilledReg> Spill =
2715f757f3fSDimitry Andric FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
272bdd1243dSDimitry Andric assert(Spill.size() == NumSubRegs);
273bdd1243dSDimitry Andric
274bdd1243dSDimitry Andric for (unsigned I = 0; I < NumSubRegs; ++I) {
275bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
276bdd1243dSDimitry Andric ? SuperReg
277bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
2785f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
2795f757f3fSDimitry Andric Spill[I].VGPR)
280bdd1243dSDimitry Andric .addReg(SubReg)
281bdd1243dSDimitry Andric .addImm(Spill[I].Lane)
282bdd1243dSDimitry Andric .addReg(Spill[I].VGPR, RegState::Undef);
283bdd1243dSDimitry Andric }
284bdd1243dSDimitry Andric }
285bdd1243dSDimitry Andric
copyToScratchSGPR(Register DstReg) const286bdd1243dSDimitry Andric void copyToScratchSGPR(Register DstReg) const {
287bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)
288bdd1243dSDimitry Andric .addReg(SuperReg)
289bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
290bdd1243dSDimitry Andric }
291bdd1243dSDimitry Andric
restoreFromMemory(const int FI)292bdd1243dSDimitry Andric void restoreFromMemory(const int FI) {
293bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
294bdd1243dSDimitry Andric
2955f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
296bdd1243dSDimitry Andric MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
2975f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
298bdd1243dSDimitry Andric if (!TmpVGPR)
299bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register");
300bdd1243dSDimitry Andric
301bdd1243dSDimitry Andric for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {
302bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
303bdd1243dSDimitry Andric ? SuperReg
304bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
305bdd1243dSDimitry Andric
3065f757f3fSDimitry Andric buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
3075f757f3fSDimitry Andric TmpVGPR, FI, FrameReg, DwordOff);
308bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
309bdd1243dSDimitry Andric .addReg(TmpVGPR, RegState::Kill);
310bdd1243dSDimitry Andric DwordOff += 4;
311bdd1243dSDimitry Andric }
312bdd1243dSDimitry Andric }
313bdd1243dSDimitry Andric
restoreFromVGPRLane(const int FI)314bdd1243dSDimitry Andric void restoreFromVGPRLane(const int FI) {
315bdd1243dSDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
316bdd1243dSDimitry Andric ArrayRef<SIRegisterInfo::SpilledReg> Spill =
3175f757f3fSDimitry Andric FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
318bdd1243dSDimitry Andric assert(Spill.size() == NumSubRegs);
319bdd1243dSDimitry Andric
320bdd1243dSDimitry Andric for (unsigned I = 0; I < NumSubRegs; ++I) {
321bdd1243dSDimitry Andric Register SubReg = NumSubRegs == 1
322bdd1243dSDimitry Andric ? SuperReg
323bdd1243dSDimitry Andric : Register(TRI.getSubReg(SuperReg, SplitParts[I]));
3245f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
325bdd1243dSDimitry Andric .addReg(Spill[I].VGPR)
326bdd1243dSDimitry Andric .addImm(Spill[I].Lane);
327bdd1243dSDimitry Andric }
328bdd1243dSDimitry Andric }
329bdd1243dSDimitry Andric
copyFromScratchSGPR(Register SrcReg) const330bdd1243dSDimitry Andric void copyFromScratchSGPR(Register SrcReg) const {
331bdd1243dSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)
332bdd1243dSDimitry Andric .addReg(SrcReg)
333bdd1243dSDimitry Andric .setMIFlag(MachineInstr::FrameDestroy);
334bdd1243dSDimitry Andric }
335bdd1243dSDimitry Andric
336bdd1243dSDimitry Andric public:
PrologEpilogSGPRSpillBuilder(Register Reg,const PrologEpilogSGPRSaveRestoreInfo SI,MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,const DebugLoc & DL,const SIInstrInfo * TII,const SIRegisterInfo & TRI,LiveRegUnits & LiveUnits,Register FrameReg)337bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder(Register Reg,
338bdd1243dSDimitry Andric const PrologEpilogSGPRSaveRestoreInfo SI,
339bdd1243dSDimitry Andric MachineBasicBlock &MBB,
340bdd1243dSDimitry Andric MachineBasicBlock::iterator MI,
341bdd1243dSDimitry Andric const DebugLoc &DL, const SIInstrInfo *TII,
342bdd1243dSDimitry Andric const SIRegisterInfo &TRI,
3435f757f3fSDimitry Andric LiveRegUnits &LiveUnits, Register FrameReg)
344bdd1243dSDimitry Andric : MI(MI), MBB(MBB), MF(*MBB.getParent()),
345bdd1243dSDimitry Andric ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
346bdd1243dSDimitry Andric FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
3475f757f3fSDimitry Andric SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
3485f757f3fSDimitry Andric FrameReg(FrameReg) {
349bdd1243dSDimitry Andric const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
350bdd1243dSDimitry Andric SplitParts = TRI.getRegSplitParts(RC, EltSize);
351bdd1243dSDimitry Andric NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
352bdd1243dSDimitry Andric
353bdd1243dSDimitry Andric assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
354bdd1243dSDimitry Andric }
355bdd1243dSDimitry Andric
save()356bdd1243dSDimitry Andric void save() {
357bdd1243dSDimitry Andric switch (SI.getKind()) {
358bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_MEM:
359bdd1243dSDimitry Andric return saveToMemory(SI.getIndex());
360bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_VGPR_LANE:
361bdd1243dSDimitry Andric return saveToVGPRLane(SI.getIndex());
362bdd1243dSDimitry Andric case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
363bdd1243dSDimitry Andric return copyToScratchSGPR(SI.getReg());
364bdd1243dSDimitry Andric }
365bdd1243dSDimitry Andric }
366bdd1243dSDimitry Andric
restore()367bdd1243dSDimitry Andric void restore() {
368bdd1243dSDimitry Andric switch (SI.getKind()) {
369bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_MEM:
370bdd1243dSDimitry Andric return restoreFromMemory(SI.getIndex());
371bdd1243dSDimitry Andric case SGPRSaveKind::SPILL_TO_VGPR_LANE:
372bdd1243dSDimitry Andric return restoreFromVGPRLane(SI.getIndex());
373bdd1243dSDimitry Andric case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:
374bdd1243dSDimitry Andric return copyFromScratchSGPR(SI.getReg());
375bdd1243dSDimitry Andric }
376bdd1243dSDimitry Andric }
377bdd1243dSDimitry Andric };
378bdd1243dSDimitry Andric
379bdd1243dSDimitry Andric } // namespace llvm
380bdd1243dSDimitry Andric
3815ffd83dbSDimitry Andric // Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`
emitEntryFunctionFlatScratchInit(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register ScratchWaveOffsetReg) const3825ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionFlatScratchInit(
3835ffd83dbSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
3845ffd83dbSDimitry Andric const DebugLoc &DL, Register ScratchWaveOffsetReg) const {
3855ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3860b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
3870b57cec5SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
3880b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
3890b57cec5SDimitry Andric
3900b57cec5SDimitry Andric // We don't need this if we only have spills since there is no user facing
3910b57cec5SDimitry Andric // scratch.
3920b57cec5SDimitry Andric
3930b57cec5SDimitry Andric // TODO: If we know we don't have flat instructions earlier, we can omit
3940b57cec5SDimitry Andric // this from the input registers.
3950b57cec5SDimitry Andric //
3960b57cec5SDimitry Andric // TODO: We only need to know if we access scratch space through a flat
3970b57cec5SDimitry Andric // pointer. Because we only detect if flat instructions are used at all,
3980b57cec5SDimitry Andric // this will be used more often than necessary on VI.
3990b57cec5SDimitry Andric
400e8d8bef9SDimitry Andric Register FlatScrInitLo;
401e8d8bef9SDimitry Andric Register FlatScrInitHi;
402e8d8bef9SDimitry Andric
403e8d8bef9SDimitry Andric if (ST.isAmdPalOS()) {
404e8d8bef9SDimitry Andric // Extract the scratch offset from the descriptor in the GIT
4055f757f3fSDimitry Andric LiveRegUnits LiveUnits;
4065f757f3fSDimitry Andric LiveUnits.init(*TRI);
4075f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB);
408e8d8bef9SDimitry Andric
409e8d8bef9SDimitry Andric // Find unused reg to load flat scratch init into
410e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
411e8d8bef9SDimitry Andric Register FlatScrInit = AMDGPU::NoRegister;
412e8d8bef9SDimitry Andric ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);
413e8d8bef9SDimitry Andric unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;
414e8d8bef9SDimitry Andric AllSGPR64s = AllSGPR64s.slice(
415e8d8bef9SDimitry Andric std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
416e8d8bef9SDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
417e8d8bef9SDimitry Andric for (MCPhysReg Reg : AllSGPR64s) {
4185f757f3fSDimitry Andric if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
4195f757f3fSDimitry Andric MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
420e8d8bef9SDimitry Andric FlatScrInit = Reg;
421e8d8bef9SDimitry Andric break;
422e8d8bef9SDimitry Andric }
423e8d8bef9SDimitry Andric }
424e8d8bef9SDimitry Andric assert(FlatScrInit && "Failed to find free register for scratch init");
425e8d8bef9SDimitry Andric
426e8d8bef9SDimitry Andric FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);
427e8d8bef9SDimitry Andric FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);
428e8d8bef9SDimitry Andric
429e8d8bef9SDimitry Andric buildGitPtr(MBB, I, DL, TII, FlatScrInit);
430e8d8bef9SDimitry Andric
431e8d8bef9SDimitry Andric // We now have the GIT ptr - now get the scratch descriptor from the entry
432e8d8bef9SDimitry Andric // at offset 0 (or offset 16 for a compute shader).
433e8d8bef9SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
434e8d8bef9SDimitry Andric const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
435e8d8bef9SDimitry Andric auto *MMO = MF.getMachineMemOperand(
436e8d8bef9SDimitry Andric PtrInfo,
437e8d8bef9SDimitry Andric MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
438e8d8bef9SDimitry Andric MachineMemOperand::MODereferenceable,
439e8d8bef9SDimitry Andric 8, Align(4));
440e8d8bef9SDimitry Andric unsigned Offset =
441e8d8bef9SDimitry Andric MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
442e8d8bef9SDimitry Andric const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
443e8d8bef9SDimitry Andric unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
444e8d8bef9SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)
445e8d8bef9SDimitry Andric .addReg(FlatScrInit)
446e8d8bef9SDimitry Andric .addImm(EncodedOffset) // offset
447fe6060f1SDimitry Andric .addImm(0) // cpol
448e8d8bef9SDimitry Andric .addMemOperand(MMO);
449e8d8bef9SDimitry Andric
450e8d8bef9SDimitry Andric // Mask the offset in [47:0] of the descriptor
451e8d8bef9SDimitry Andric const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
452349cc55cSDimitry Andric auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
453e8d8bef9SDimitry Andric .addReg(FlatScrInitHi)
454e8d8bef9SDimitry Andric .addImm(0xffff);
455349cc55cSDimitry Andric And->getOperand(3).setIsDead(); // Mark SCC as dead.
456e8d8bef9SDimitry Andric } else {
4578bcb0991SDimitry Andric Register FlatScratchInitReg =
4588bcb0991SDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
459e8d8bef9SDimitry Andric assert(FlatScratchInitReg);
4600b57cec5SDimitry Andric
4610b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
4620b57cec5SDimitry Andric MRI.addLiveIn(FlatScratchInitReg);
4630b57cec5SDimitry Andric MBB.addLiveIn(FlatScratchInitReg);
4640b57cec5SDimitry Andric
465e8d8bef9SDimitry Andric FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
466e8d8bef9SDimitry Andric FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
467e8d8bef9SDimitry Andric }
4680b57cec5SDimitry Andric
4690b57cec5SDimitry Andric // Do a 64-bit pointer add.
4700b57cec5SDimitry Andric if (ST.flatScratchIsPointer()) {
4710b57cec5SDimitry Andric if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {
4720b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
4730b57cec5SDimitry Andric .addReg(FlatScrInitLo)
4740b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg);
475349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
476349cc55cSDimitry Andric FlatScrInitHi)
4770b57cec5SDimitry Andric .addReg(FlatScrInitHi)
4780b57cec5SDimitry Andric .addImm(0);
479349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
480349cc55cSDimitry Andric
4810b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
4820b57cec5SDimitry Andric addReg(FlatScrInitLo).
4830b57cec5SDimitry Andric addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
4840b57cec5SDimitry Andric (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
4850b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
4860b57cec5SDimitry Andric addReg(FlatScrInitHi).
4870b57cec5SDimitry Andric addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_HI |
4880b57cec5SDimitry Andric (31 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_)));
4890b57cec5SDimitry Andric return;
4900b57cec5SDimitry Andric }
4910b57cec5SDimitry Andric
492e8d8bef9SDimitry Andric // For GFX9.
4930b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
4940b57cec5SDimitry Andric .addReg(FlatScrInitLo)
4950b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg);
496349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
497349cc55cSDimitry Andric AMDGPU::FLAT_SCR_HI)
4980b57cec5SDimitry Andric .addReg(FlatScrInitHi)
4990b57cec5SDimitry Andric .addImm(0);
500349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
5010b57cec5SDimitry Andric
5020b57cec5SDimitry Andric return;
5030b57cec5SDimitry Andric }
5040b57cec5SDimitry Andric
505e8d8bef9SDimitry Andric assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);
5060b57cec5SDimitry Andric
5070b57cec5SDimitry Andric // Copy the size in bytes.
5080b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
5090b57cec5SDimitry Andric .addReg(FlatScrInitHi, RegState::Kill);
5100b57cec5SDimitry Andric
5110b57cec5SDimitry Andric // Add wave offset in bytes to private base offset.
5120b57cec5SDimitry Andric // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
513fe6060f1SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)
5140b57cec5SDimitry Andric .addReg(FlatScrInitLo)
5150b57cec5SDimitry Andric .addReg(ScratchWaveOffsetReg);
5160b57cec5SDimitry Andric
5170b57cec5SDimitry Andric // Convert offset to 256-byte units.
518349cc55cSDimitry Andric auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
519349cc55cSDimitry Andric AMDGPU::FLAT_SCR_HI)
5200b57cec5SDimitry Andric .addReg(FlatScrInitLo, RegState::Kill)
5210b57cec5SDimitry Andric .addImm(8);
522bdd1243dSDimitry Andric LShr->getOperand(3).setIsDead(); // Mark SCC as dead.
5230b57cec5SDimitry Andric }
5240b57cec5SDimitry Andric
525e8d8bef9SDimitry Andric // Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
526e8d8bef9SDimitry Andric // memory. They should have been removed by now.
allStackObjectsAreDead(const MachineFrameInfo & MFI)527e8d8bef9SDimitry Andric static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
528e8d8bef9SDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
529e8d8bef9SDimitry Andric I != E; ++I) {
530e8d8bef9SDimitry Andric if (!MFI.isDeadObjectIndex(I))
531e8d8bef9SDimitry Andric return false;
532e8d8bef9SDimitry Andric }
533e8d8bef9SDimitry Andric
534e8d8bef9SDimitry Andric return true;
535e8d8bef9SDimitry Andric }
536e8d8bef9SDimitry Andric
5375ffd83dbSDimitry Andric // Shift down registers reserved for the scratch RSRC.
getEntryFunctionReservedScratchRsrcReg(MachineFunction & MF) const5385ffd83dbSDimitry Andric Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
5390b57cec5SDimitry Andric MachineFunction &MF) const {
5400b57cec5SDimitry Andric
5415ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
5425ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
5435ffd83dbSDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
5445ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
5455ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
5465ffd83dbSDimitry Andric
5475ffd83dbSDimitry Andric assert(MFI->isEntryFunction());
5485ffd83dbSDimitry Andric
5495ffd83dbSDimitry Andric Register ScratchRsrcReg = MFI->getScratchRSrcReg();
5505ffd83dbSDimitry Andric
551e8d8bef9SDimitry Andric if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
552e8d8bef9SDimitry Andric allStackObjectsAreDead(MF.getFrameInfo())))
5535ffd83dbSDimitry Andric return Register();
5540b57cec5SDimitry Andric
5550b57cec5SDimitry Andric if (ST.hasSGPRInitBug() ||
5560b57cec5SDimitry Andric ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
5570b57cec5SDimitry Andric return ScratchRsrcReg;
5580b57cec5SDimitry Andric
5590b57cec5SDimitry Andric // We reserved the last registers for this. Shift it down to the end of those
5600b57cec5SDimitry Andric // which were actually used.
5610b57cec5SDimitry Andric //
5620b57cec5SDimitry Andric // FIXME: It might be safer to use a pseudoregister before replacement.
5630b57cec5SDimitry Andric
5640b57cec5SDimitry Andric // FIXME: We should be able to eliminate unused input registers. We only
5650b57cec5SDimitry Andric // cannot do this for the resources required for scratch access. For now we
5660b57cec5SDimitry Andric // skip over user SGPRs and may leave unused holes.
5670b57cec5SDimitry Andric
5680b57cec5SDimitry Andric unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
5695ffd83dbSDimitry Andric ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);
5700b57cec5SDimitry Andric AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
5710b57cec5SDimitry Andric
5720b57cec5SDimitry Andric // Skip the last N reserved elements because they should have already been
5730b57cec5SDimitry Andric // reserved for VCC etc.
5745ffd83dbSDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
5750b57cec5SDimitry Andric for (MCPhysReg Reg : AllSGPR128s) {
5760b57cec5SDimitry Andric // Pick the first unallocated one. Make sure we don't clobber the other
5775ffd83dbSDimitry Andric // reserved input we needed. Also for PAL, make sure we don't clobber
5785ffd83dbSDimitry Andric // the GIT pointer passed in SGPR0 or SGPR8.
5795ffd83dbSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
58006c3fb27SDimitry Andric (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
5810b57cec5SDimitry Andric MRI.replaceRegWith(ScratchRsrcReg, Reg);
5820b57cec5SDimitry Andric MFI->setScratchRSrcReg(Reg);
5830b57cec5SDimitry Andric return Reg;
5840b57cec5SDimitry Andric }
5850b57cec5SDimitry Andric }
5860b57cec5SDimitry Andric
5870b57cec5SDimitry Andric return ScratchRsrcReg;
5880b57cec5SDimitry Andric }
5890b57cec5SDimitry Andric
getScratchScaleFactor(const GCNSubtarget & ST)590e8d8bef9SDimitry Andric static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
591e8d8bef9SDimitry Andric return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
592e8d8bef9SDimitry Andric }
593e8d8bef9SDimitry Andric
emitEntryFunctionPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const5940b57cec5SDimitry Andric void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
5950b57cec5SDimitry Andric MachineBasicBlock &MBB) const {
5960b57cec5SDimitry Andric assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
5970b57cec5SDimitry Andric
5985ffd83dbSDimitry Andric // FIXME: If we only have SGPR spills, we won't actually be using scratch
5995ffd83dbSDimitry Andric // memory since these spill to VGPRs. We should be cleaning up these unused
6005ffd83dbSDimitry Andric // SGPR spill frame indices somewhere.
6010b57cec5SDimitry Andric
6020b57cec5SDimitry Andric // FIXME: We still have implicit uses on SGPR spill instructions in case they
6030b57cec5SDimitry Andric // need to spill to vector memory. It's likely that will not happen, but at
6040b57cec5SDimitry Andric // this point it appears we need the setup. This part of the prolog should be
6050b57cec5SDimitry Andric // emitted after frame indices are eliminated.
6060b57cec5SDimitry Andric
6075ffd83dbSDimitry Andric // FIXME: Remove all of the isPhysRegUsed checks
6080b57cec5SDimitry Andric
6095ffd83dbSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
6105ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
6115ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
6125ffd83dbSDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
6135ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
6145ffd83dbSDimitry Andric const Function &F = MF.getFunction();
615fe6060f1SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
6160b57cec5SDimitry Andric
6175ffd83dbSDimitry Andric assert(MFI->isEntryFunction());
6180b57cec5SDimitry Andric
6198bcb0991SDimitry Andric Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
6200b57cec5SDimitry Andric AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
6210b57cec5SDimitry Andric
6225ffd83dbSDimitry Andric // We need to do the replacement of the private segment buffer register even
6235ffd83dbSDimitry Andric // if there are no stack objects. There could be stores to undef or a
6245ffd83dbSDimitry Andric // constant without an associated object.
6255ffd83dbSDimitry Andric //
6265ffd83dbSDimitry Andric // This will return `Register()` in cases where there are no actual
6275ffd83dbSDimitry Andric // uses of the SRSRC.
628e8d8bef9SDimitry Andric Register ScratchRsrcReg;
629e8d8bef9SDimitry Andric if (!ST.enableFlatScratch())
630e8d8bef9SDimitry Andric ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);
6310b57cec5SDimitry Andric
6325ffd83dbSDimitry Andric // Make the selected register live throughout the function.
6335ffd83dbSDimitry Andric if (ScratchRsrcReg) {
6340b57cec5SDimitry Andric for (MachineBasicBlock &OtherBB : MF) {
6355ffd83dbSDimitry Andric if (&OtherBB != &MBB) {
6360b57cec5SDimitry Andric OtherBB.addLiveIn(ScratchRsrcReg);
6370b57cec5SDimitry Andric }
6385ffd83dbSDimitry Andric }
6395ffd83dbSDimitry Andric }
6400b57cec5SDimitry Andric
6415ffd83dbSDimitry Andric // Now that we have fixed the reserved SRSRC we need to locate the
6425ffd83dbSDimitry Andric // (potentially) preloaded SRSRC.
6435ffd83dbSDimitry Andric Register PreloadedScratchRsrcReg;
6445ffd83dbSDimitry Andric if (ST.isAmdHsaOrMesa(F)) {
6455ffd83dbSDimitry Andric PreloadedScratchRsrcReg =
6465ffd83dbSDimitry Andric MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
6475ffd83dbSDimitry Andric if (ScratchRsrcReg && PreloadedScratchRsrcReg) {
6485ffd83dbSDimitry Andric // We added live-ins during argument lowering, but since they were not
6495ffd83dbSDimitry Andric // used they were deleted. We're adding the uses now, so add them back.
6505ffd83dbSDimitry Andric MRI.addLiveIn(PreloadedScratchRsrcReg);
6515ffd83dbSDimitry Andric MBB.addLiveIn(PreloadedScratchRsrcReg);
6525ffd83dbSDimitry Andric }
6535ffd83dbSDimitry Andric }
6545ffd83dbSDimitry Andric
6555ffd83dbSDimitry Andric // Debug location must be unknown since the first debug location is used to
6565ffd83dbSDimitry Andric // determine the end of the prologue.
6570b57cec5SDimitry Andric DebugLoc DL;
6580b57cec5SDimitry Andric MachineBasicBlock::iterator I = MBB.begin();
6590b57cec5SDimitry Andric
6605ffd83dbSDimitry Andric // We found the SRSRC first because it needs four registers and has an
6615ffd83dbSDimitry Andric // alignment requirement. If the SRSRC that we found is clobbering with
6625ffd83dbSDimitry Andric // the scratch wave offset, which may be in a fixed SGPR or a free SGPR
6635ffd83dbSDimitry Andric // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
6645ffd83dbSDimitry Andric // wave offset to a free SGPR.
6655ffd83dbSDimitry Andric Register ScratchWaveOffsetReg;
666349cc55cSDimitry Andric if (PreloadedScratchWaveOffsetReg &&
667349cc55cSDimitry Andric TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
6685ffd83dbSDimitry Andric ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
6695ffd83dbSDimitry Andric unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
6705ffd83dbSDimitry Andric AllSGPRs = AllSGPRs.slice(
6715ffd83dbSDimitry Andric std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));
6725ffd83dbSDimitry Andric Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
6735ffd83dbSDimitry Andric for (MCPhysReg Reg : AllSGPRs) {
6745ffd83dbSDimitry Andric if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
6755ffd83dbSDimitry Andric !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {
6765ffd83dbSDimitry Andric ScratchWaveOffsetReg = Reg;
6770b57cec5SDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
6785ffd83dbSDimitry Andric .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
6795ffd83dbSDimitry Andric break;
6800b57cec5SDimitry Andric }
6810b57cec5SDimitry Andric }
6820b57cec5SDimitry Andric } else {
6835ffd83dbSDimitry Andric ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
6840b57cec5SDimitry Andric }
685349cc55cSDimitry Andric assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
6865ffd83dbSDimitry Andric
687e8d8bef9SDimitry Andric if (requiresStackPointerReference(MF)) {
6885ffd83dbSDimitry Andric Register SPReg = MFI->getStackPtrOffsetReg();
6895ffd83dbSDimitry Andric assert(SPReg != AMDGPU::SP_REG);
6905ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg)
691fe6060f1SDimitry Andric .addImm(FrameInfo.getStackSize() * getScratchScaleFactor(ST));
6925ffd83dbSDimitry Andric }
6935ffd83dbSDimitry Andric
6945ffd83dbSDimitry Andric if (hasFP(MF)) {
6955ffd83dbSDimitry Andric Register FPReg = MFI->getFrameOffsetReg();
6965ffd83dbSDimitry Andric assert(FPReg != AMDGPU::FP_REG);
6975ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);
6985ffd83dbSDimitry Andric }
6995ffd83dbSDimitry Andric
700fe6060f1SDimitry Andric bool NeedsFlatScratchInit =
7015f757f3fSDimitry Andric MFI->getUserSGPRInfo().hasFlatScratchInit() &&
702fe6060f1SDimitry Andric (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
703fe6060f1SDimitry Andric (!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
704fe6060f1SDimitry Andric
705fe6060f1SDimitry Andric if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
706349cc55cSDimitry Andric PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
7075ffd83dbSDimitry Andric MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
7085ffd83dbSDimitry Andric MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
7095ffd83dbSDimitry Andric }
7105ffd83dbSDimitry Andric
711fe6060f1SDimitry Andric if (NeedsFlatScratchInit) {
7125ffd83dbSDimitry Andric emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);
7135ffd83dbSDimitry Andric }
7145ffd83dbSDimitry Andric
7155ffd83dbSDimitry Andric if (ScratchRsrcReg) {
7165ffd83dbSDimitry Andric emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,
7175ffd83dbSDimitry Andric PreloadedScratchRsrcReg,
7185ffd83dbSDimitry Andric ScratchRsrcReg, ScratchWaveOffsetReg);
7190b57cec5SDimitry Andric }
7200b57cec5SDimitry Andric }
7210b57cec5SDimitry Andric
7225ffd83dbSDimitry Andric // Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`
emitEntryFunctionScratchRsrcRegSetup(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register PreloadedScratchRsrcReg,Register ScratchRsrcReg,Register ScratchWaveOffsetReg) const7235ffd83dbSDimitry Andric void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
7245ffd83dbSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
7255ffd83dbSDimitry Andric const DebugLoc &DL, Register PreloadedScratchRsrcReg,
7265ffd83dbSDimitry Andric Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {
7270b57cec5SDimitry Andric
7285ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
7290b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
7300b57cec5SDimitry Andric const SIRegisterInfo *TRI = &TII->getRegisterInfo();
7315ffd83dbSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
7320b57cec5SDimitry Andric const Function &Fn = MF.getFunction();
7330b57cec5SDimitry Andric
7340b57cec5SDimitry Andric if (ST.isAmdPalOS()) {
7350b57cec5SDimitry Andric // The pointer to the GIT is formed from the offset passed in and either
7360b57cec5SDimitry Andric // the amdgpu-git-ptr-high function attribute or the top part of the PC
7378bcb0991SDimitry Andric Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
738fe6060f1SDimitry Andric Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7390b57cec5SDimitry Andric
740e8d8bef9SDimitry Andric buildGitPtr(MBB, I, DL, TII, Rsrc01);
7410b57cec5SDimitry Andric
7420b57cec5SDimitry Andric // We now have the GIT ptr - now get the scratch descriptor from the entry
7430b57cec5SDimitry Andric // at offset 0 (or offset 16 for a compute shader).
744480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7450b57cec5SDimitry Andric const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);
7460b57cec5SDimitry Andric auto MMO = MF.getMachineMemOperand(PtrInfo,
7470b57cec5SDimitry Andric MachineMemOperand::MOLoad |
7480b57cec5SDimitry Andric MachineMemOperand::MOInvariant |
7490b57cec5SDimitry Andric MachineMemOperand::MODereferenceable,
7505ffd83dbSDimitry Andric 16, Align(4));
7510b57cec5SDimitry Andric unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;
7520b57cec5SDimitry Andric const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();
7535ffd83dbSDimitry Andric unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);
7540b57cec5SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)
7550b57cec5SDimitry Andric .addReg(Rsrc01)
7560b57cec5SDimitry Andric .addImm(EncodedOffset) // offset
757fe6060f1SDimitry Andric .addImm(0) // cpol
7580b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine)
7590b57cec5SDimitry Andric .addMemOperand(MMO);
760fe6060f1SDimitry Andric
761fe6060f1SDimitry Andric // The driver will always set the SRD for wave 64 (bits 118:117 of
762fe6060f1SDimitry Andric // descriptor / bits 22:21 of third sub-reg will be 0b11)
763fe6060f1SDimitry Andric // If the shader is actually wave32 we have to modify the const_index_stride
764fe6060f1SDimitry Andric // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The
765fe6060f1SDimitry Andric // reason the driver does this is that there can be cases where it presents
766fe6060f1SDimitry Andric // 2 shaders with different wave size (e.g. VsFs).
767fe6060f1SDimitry Andric // TODO: convert to using SCRATCH instructions or multiple SRD buffers
768fe6060f1SDimitry Andric if (ST.isWave32()) {
769fe6060f1SDimitry Andric const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);
770fe6060f1SDimitry Andric BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)
771fe6060f1SDimitry Andric .addImm(21)
772fe6060f1SDimitry Andric .addReg(Rsrc03);
773fe6060f1SDimitry Andric }
7745ffd83dbSDimitry Andric } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {
7750b57cec5SDimitry Andric assert(!ST.isAmdHsaOrMesa(Fn));
7760b57cec5SDimitry Andric const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
7770b57cec5SDimitry Andric
7788bcb0991SDimitry Andric Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
7798bcb0991SDimitry Andric Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
7800b57cec5SDimitry Andric
7810b57cec5SDimitry Andric // Use relocations to get the pointer, and setup the other bits manually.
7820b57cec5SDimitry Andric uint64_t Rsrc23 = TII->getScratchRsrcWords23();
7830b57cec5SDimitry Andric
7845f757f3fSDimitry Andric if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {
7858bcb0991SDimitry Andric Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
7860b57cec5SDimitry Andric
7870b57cec5SDimitry Andric if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {
7880b57cec5SDimitry Andric const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
7890b57cec5SDimitry Andric
7900b57cec5SDimitry Andric BuildMI(MBB, I, DL, Mov64, Rsrc01)
7910b57cec5SDimitry Andric .addReg(MFI->getImplicitBufferPtrUserSGPR())
7920b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
7930b57cec5SDimitry Andric } else {
7940b57cec5SDimitry Andric const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
7950b57cec5SDimitry Andric
796480093f4SDimitry Andric MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
7975ffd83dbSDimitry Andric auto MMO = MF.getMachineMemOperand(
7985ffd83dbSDimitry Andric PtrInfo,
7995ffd83dbSDimitry Andric MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
8000b57cec5SDimitry Andric MachineMemOperand::MODereferenceable,
8015ffd83dbSDimitry Andric 8, Align(4));
8020b57cec5SDimitry Andric BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
8030b57cec5SDimitry Andric .addReg(MFI->getImplicitBufferPtrUserSGPR())
8040b57cec5SDimitry Andric .addImm(0) // offset
805fe6060f1SDimitry Andric .addImm(0) // cpol
8060b57cec5SDimitry Andric .addMemOperand(MMO)
8070b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8080b57cec5SDimitry Andric
8090b57cec5SDimitry Andric MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8100b57cec5SDimitry Andric MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());
8110b57cec5SDimitry Andric }
8120b57cec5SDimitry Andric } else {
8138bcb0991SDimitry Andric Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8148bcb0991SDimitry Andric Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8150b57cec5SDimitry Andric
8160b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc0)
8170b57cec5SDimitry Andric .addExternalSymbol("SCRATCH_RSRC_DWORD0")
8180b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8190b57cec5SDimitry Andric
8200b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc1)
8210b57cec5SDimitry Andric .addExternalSymbol("SCRATCH_RSRC_DWORD1")
8220b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8230b57cec5SDimitry Andric }
8240b57cec5SDimitry Andric
8250b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc2)
8260b57cec5SDimitry Andric .addImm(Rsrc23 & 0xffffffff)
8270b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8280b57cec5SDimitry Andric
8290b57cec5SDimitry Andric BuildMI(MBB, I, DL, SMovB32, Rsrc3)
8300b57cec5SDimitry Andric .addImm(Rsrc23 >> 32)
8310b57cec5SDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
8325ffd83dbSDimitry Andric } else if (ST.isAmdHsaOrMesa(Fn)) {
8335ffd83dbSDimitry Andric assert(PreloadedScratchRsrcReg);
8345ffd83dbSDimitry Andric
8355ffd83dbSDimitry Andric if (ScratchRsrcReg != PreloadedScratchRsrcReg) {
8365ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
8375ffd83dbSDimitry Andric .addReg(PreloadedScratchRsrcReg, RegState::Kill);
8380b57cec5SDimitry Andric }
8390b57cec5SDimitry Andric }
8400b57cec5SDimitry Andric
8415ffd83dbSDimitry Andric // Add the scratch wave offset into the scratch RSRC.
8425ffd83dbSDimitry Andric //
8435ffd83dbSDimitry Andric // We only want to update the first 48 bits, which is the base address
8445ffd83dbSDimitry Andric // pointer, without touching the adjacent 16 bits of flags. We know this add
8455ffd83dbSDimitry Andric // cannot carry-out from bit 47, otherwise the scratch allocation would be
8465ffd83dbSDimitry Andric // impossible to fit in the 48-bit global address space.
8475ffd83dbSDimitry Andric //
8485ffd83dbSDimitry Andric // TODO: Evaluate if it is better to just construct an SRD using the flat
8495ffd83dbSDimitry Andric // scratch init and some constants rather than update the one we are passed.
8505ffd83dbSDimitry Andric Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
8515ffd83dbSDimitry Andric Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
8525ffd83dbSDimitry Andric
8535ffd83dbSDimitry Andric // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in
8545ffd83dbSDimitry Andric // the kernel body via inreg arguments.
8555ffd83dbSDimitry Andric BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)
8565ffd83dbSDimitry Andric .addReg(ScratchRsrcSub0)
8575ffd83dbSDimitry Andric .addReg(ScratchWaveOffsetReg)
8585ffd83dbSDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
859349cc55cSDimitry Andric auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
8605ffd83dbSDimitry Andric .addReg(ScratchRsrcSub1)
8615ffd83dbSDimitry Andric .addImm(0)
8625ffd83dbSDimitry Andric .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
863349cc55cSDimitry Andric Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
8645ffd83dbSDimitry Andric }
8655ffd83dbSDimitry Andric
isSupportedStackID(TargetStackID::Value ID) const8660b57cec5SDimitry Andric bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
8670b57cec5SDimitry Andric switch (ID) {
8680b57cec5SDimitry Andric case TargetStackID::Default:
8690b57cec5SDimitry Andric case TargetStackID::NoAlloc:
8700b57cec5SDimitry Andric case TargetStackID::SGPRSpill:
8710b57cec5SDimitry Andric return true;
872e8d8bef9SDimitry Andric case TargetStackID::ScalableVector:
873fe6060f1SDimitry Andric case TargetStackID::WasmLocal:
8748bcb0991SDimitry Andric return false;
8750b57cec5SDimitry Andric }
8760b57cec5SDimitry Andric llvm_unreachable("Invalid TargetStackID::Value");
8770b57cec5SDimitry Andric }
8780b57cec5SDimitry Andric
879bdd1243dSDimitry Andric // Activate only the inactive lanes when \p EnableInactiveLanes is true.
880bdd1243dSDimitry Andric // Otherwise, activate all lanes. It returns the saved exec.
buildScratchExecCopy(LiveRegUnits & LiveUnits,MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,bool IsProlog,bool EnableInactiveLanes)8815f757f3fSDimitry Andric static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
8825ffd83dbSDimitry Andric MachineFunction &MF,
8835ffd83dbSDimitry Andric MachineBasicBlock &MBB,
8845ffd83dbSDimitry Andric MachineBasicBlock::iterator MBBI,
885bdd1243dSDimitry Andric const DebugLoc &DL, bool IsProlog,
886bdd1243dSDimitry Andric bool EnableInactiveLanes) {
8875ffd83dbSDimitry Andric Register ScratchExecCopy;
8885ffd83dbSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
8895ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
8905ffd83dbSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
8915ffd83dbSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
8925ffd83dbSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
8935ffd83dbSDimitry Andric
8945f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
8955ffd83dbSDimitry Andric
8965ffd83dbSDimitry Andric ScratchExecCopy = findScratchNonCalleeSaveRegister(
8975f757f3fSDimitry Andric MRI, LiveUnits, *TRI.getWaveMaskRegClass());
898fe6060f1SDimitry Andric if (!ScratchExecCopy)
899fe6060f1SDimitry Andric report_fatal_error("failed to find free scratch register");
9005ffd83dbSDimitry Andric
9015f757f3fSDimitry Andric LiveUnits.addReg(ScratchExecCopy);
9025ffd83dbSDimitry Andric
903bdd1243dSDimitry Andric const unsigned SaveExecOpc =
904bdd1243dSDimitry Andric ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
905bdd1243dSDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B32)
906bdd1243dSDimitry Andric : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64
907bdd1243dSDimitry Andric : AMDGPU::S_OR_SAVEEXEC_B64);
908bdd1243dSDimitry Andric auto SaveExec =
909bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);
910349cc55cSDimitry Andric SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
9115ffd83dbSDimitry Andric
9125ffd83dbSDimitry Andric return ScratchExecCopy;
9135ffd83dbSDimitry Andric }
9145ffd83dbSDimitry Andric
emitCSRSpillStores(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,LiveRegUnits & LiveUnits,Register FrameReg,Register FramePtrRegScratchCopy) const915bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillStores(
916bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB,
9175f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
918bdd1243dSDimitry Andric Register FrameReg, Register FramePtrRegScratchCopy) const {
919bdd1243dSDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
920bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
921bdd1243dSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
922bdd1243dSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
923bdd1243dSDimitry Andric
924bdd1243dSDimitry Andric // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
925bdd1243dSDimitry Andric // registers. However, save all lanes of callee-saved VGPRs. Due to this, we
926bdd1243dSDimitry Andric // might end up flipping the EXEC bits twice.
927bdd1243dSDimitry Andric Register ScratchExecCopy;
928bdd1243dSDimitry Andric SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
929bdd1243dSDimitry Andric FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
930bdd1243dSDimitry Andric if (!WWMScratchRegs.empty())
931bdd1243dSDimitry Andric ScratchExecCopy =
9325f757f3fSDimitry Andric buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
933bdd1243dSDimitry Andric /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
934bdd1243dSDimitry Andric
935bdd1243dSDimitry Andric auto StoreWWMRegisters =
936bdd1243dSDimitry Andric [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
937bdd1243dSDimitry Andric for (const auto &Reg : WWMRegs) {
938bdd1243dSDimitry Andric Register VGPR = Reg.first;
939bdd1243dSDimitry Andric int FI = Reg.second;
9405f757f3fSDimitry Andric buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
941bdd1243dSDimitry Andric VGPR, FI, FrameReg);
942bdd1243dSDimitry Andric }
943bdd1243dSDimitry Andric };
944bdd1243dSDimitry Andric
945bdd1243dSDimitry Andric StoreWWMRegisters(WWMScratchRegs);
946bdd1243dSDimitry Andric if (!WWMCalleeSavedRegs.empty()) {
947bdd1243dSDimitry Andric if (ScratchExecCopy) {
948bdd1243dSDimitry Andric unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
94906c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
950bdd1243dSDimitry Andric } else {
9515f757f3fSDimitry Andric ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
952bdd1243dSDimitry Andric /*IsProlog*/ true,
953bdd1243dSDimitry Andric /*EnableInactiveLanes*/ false);
954bdd1243dSDimitry Andric }
955bdd1243dSDimitry Andric }
956bdd1243dSDimitry Andric
957bdd1243dSDimitry Andric StoreWWMRegisters(WWMCalleeSavedRegs);
958bdd1243dSDimitry Andric if (ScratchExecCopy) {
959bdd1243dSDimitry Andric // FIXME: Split block and make terminator.
960bdd1243dSDimitry Andric unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
96106c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
962bdd1243dSDimitry Andric .addReg(ScratchExecCopy, RegState::Kill);
9635f757f3fSDimitry Andric LiveUnits.addReg(ScratchExecCopy);
964bdd1243dSDimitry Andric }
965bdd1243dSDimitry Andric
966bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
967bdd1243dSDimitry Andric
968bdd1243dSDimitry Andric for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
969bdd1243dSDimitry Andric // Special handle FP spill:
970bdd1243dSDimitry Andric // Skip if FP is saved to a scratch SGPR, the save has already been emitted.
971bdd1243dSDimitry Andric // Otherwise, FP has been moved to a temporary register and spill it
972bdd1243dSDimitry Andric // instead.
973bdd1243dSDimitry Andric Register Reg =
974bdd1243dSDimitry Andric Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
975bdd1243dSDimitry Andric if (!Reg)
976bdd1243dSDimitry Andric continue;
977bdd1243dSDimitry Andric
978bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
9795f757f3fSDimitry Andric LiveUnits, FrameReg);
980bdd1243dSDimitry Andric SB.save();
981bdd1243dSDimitry Andric }
982bdd1243dSDimitry Andric
983bdd1243dSDimitry Andric // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make
984bdd1243dSDimitry Andric // such scratch registers live throughout the function.
985bdd1243dSDimitry Andric SmallVector<Register, 1> ScratchSGPRs;
986bdd1243dSDimitry Andric FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);
987bdd1243dSDimitry Andric if (!ScratchSGPRs.empty()) {
988bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) {
989bdd1243dSDimitry Andric for (MCPhysReg Reg : ScratchSGPRs)
990bdd1243dSDimitry Andric MBB.addLiveIn(Reg);
991bdd1243dSDimitry Andric
992bdd1243dSDimitry Andric MBB.sortUniqueLiveIns();
993bdd1243dSDimitry Andric }
9945f757f3fSDimitry Andric if (!LiveUnits.empty()) {
995bdd1243dSDimitry Andric for (MCPhysReg Reg : ScratchSGPRs)
9965f757f3fSDimitry Andric LiveUnits.addReg(Reg);
997bdd1243dSDimitry Andric }
998bdd1243dSDimitry Andric }
999bdd1243dSDimitry Andric }
1000bdd1243dSDimitry Andric
emitCSRSpillRestores(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,LiveRegUnits & LiveUnits,Register FrameReg,Register FramePtrRegScratchCopy) const1001bdd1243dSDimitry Andric void SIFrameLowering::emitCSRSpillRestores(
1002bdd1243dSDimitry Andric MachineFunction &MF, MachineBasicBlock &MBB,
10035f757f3fSDimitry Andric MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
1004bdd1243dSDimitry Andric Register FrameReg, Register FramePtrRegScratchCopy) const {
1005bdd1243dSDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
1006bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1007bdd1243dSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
1008bdd1243dSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
1009bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1010bdd1243dSDimitry Andric
1011bdd1243dSDimitry Andric for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {
1012bdd1243dSDimitry Andric // Special handle FP restore:
1013bdd1243dSDimitry Andric // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore
1014bdd1243dSDimitry Andric // the FP value to a temporary register. The frame pointer should be
1015bdd1243dSDimitry Andric // overwritten only at the end when all other spills are restored from
1016bdd1243dSDimitry Andric // current frame.
1017bdd1243dSDimitry Andric Register Reg =
1018bdd1243dSDimitry Andric Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;
1019bdd1243dSDimitry Andric if (!Reg)
1020bdd1243dSDimitry Andric continue;
1021bdd1243dSDimitry Andric
1022bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
10235f757f3fSDimitry Andric LiveUnits, FrameReg);
1024bdd1243dSDimitry Andric SB.restore();
1025bdd1243dSDimitry Andric }
1026bdd1243dSDimitry Andric
1027bdd1243dSDimitry Andric // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the
1028bdd1243dSDimitry Andric // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to
1029bdd1243dSDimitry Andric // this, we might end up flipping the EXEC bits twice.
1030bdd1243dSDimitry Andric Register ScratchExecCopy;
1031bdd1243dSDimitry Andric SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;
1032bdd1243dSDimitry Andric FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
1033bdd1243dSDimitry Andric if (!WWMScratchRegs.empty())
1034bdd1243dSDimitry Andric ScratchExecCopy =
10355f757f3fSDimitry Andric buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1036bdd1243dSDimitry Andric /*IsProlog*/ false, /*EnableInactiveLanes*/ true);
1037bdd1243dSDimitry Andric
1038bdd1243dSDimitry Andric auto RestoreWWMRegisters =
1039bdd1243dSDimitry Andric [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
1040bdd1243dSDimitry Andric for (const auto &Reg : WWMRegs) {
1041bdd1243dSDimitry Andric Register VGPR = Reg.first;
1042bdd1243dSDimitry Andric int FI = Reg.second;
10435f757f3fSDimitry Andric buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
1044bdd1243dSDimitry Andric VGPR, FI, FrameReg);
1045bdd1243dSDimitry Andric }
1046bdd1243dSDimitry Andric };
1047bdd1243dSDimitry Andric
1048bdd1243dSDimitry Andric RestoreWWMRegisters(WWMScratchRegs);
1049bdd1243dSDimitry Andric if (!WWMCalleeSavedRegs.empty()) {
1050bdd1243dSDimitry Andric if (ScratchExecCopy) {
1051bdd1243dSDimitry Andric unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
105206c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
1053bdd1243dSDimitry Andric } else {
10545f757f3fSDimitry Andric ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
1055bdd1243dSDimitry Andric /*IsProlog*/ false,
1056bdd1243dSDimitry Andric /*EnableInactiveLanes*/ false);
1057bdd1243dSDimitry Andric }
1058bdd1243dSDimitry Andric }
1059bdd1243dSDimitry Andric
1060bdd1243dSDimitry Andric RestoreWWMRegisters(WWMCalleeSavedRegs);
1061bdd1243dSDimitry Andric if (ScratchExecCopy) {
1062bdd1243dSDimitry Andric // FIXME: Split block and make terminator.
1063bdd1243dSDimitry Andric unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
106406c3fb27SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
1065bdd1243dSDimitry Andric .addReg(ScratchExecCopy, RegState::Kill);
1066bdd1243dSDimitry Andric }
1067fe6060f1SDimitry Andric }
1068fe6060f1SDimitry Andric
emitPrologue(MachineFunction & MF,MachineBasicBlock & MBB) const10690b57cec5SDimitry Andric void SIFrameLowering::emitPrologue(MachineFunction &MF,
10700b57cec5SDimitry Andric MachineBasicBlock &MBB) const {
10710b57cec5SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
10720b57cec5SDimitry Andric if (FuncInfo->isEntryFunction()) {
10730b57cec5SDimitry Andric emitEntryFunctionPrologue(MF, MBB);
10740b57cec5SDimitry Andric return;
10750b57cec5SDimitry Andric }
10760b57cec5SDimitry Andric
107781ad6265SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo();
10780b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
10790b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
10800b57cec5SDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
1081bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
10820b57cec5SDimitry Andric
10835ffd83dbSDimitry Andric Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
10845ffd83dbSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
10855ffd83dbSDimitry Andric Register BasePtrReg =
10865ffd83dbSDimitry Andric TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
10875f757f3fSDimitry Andric LiveRegUnits LiveUnits;
10880b57cec5SDimitry Andric
10890b57cec5SDimitry Andric MachineBasicBlock::iterator MBBI = MBB.begin();
1090bdd1243dSDimitry Andric // DebugLoc must be unknown since the first instruction with DebugLoc is used
1091bdd1243dSDimitry Andric // to determine the end of the prologue.
10920b57cec5SDimitry Andric DebugLoc DL;
10930b57cec5SDimitry Andric
10945f757f3fSDimitry Andric if (FuncInfo->isChainFunction()) {
10955f757f3fSDimitry Andric // Functions with the amdgpu_cs_chain[_preserve] CC don't receive a SP, but
10965f757f3fSDimitry Andric // are free to set one up if they need it.
10975f757f3fSDimitry Andric bool UseSP = requiresStackPointerReference(MF);
10985f757f3fSDimitry Andric if (UseSP) {
10995f757f3fSDimitry Andric assert(StackPtrReg != AMDGPU::SP_REG);
11005f757f3fSDimitry Andric
11015f757f3fSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
11025f757f3fSDimitry Andric .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));
11035f757f3fSDimitry Andric }
11045f757f3fSDimitry Andric }
11055f757f3fSDimitry Andric
11060b57cec5SDimitry Andric bool HasFP = false;
11075ffd83dbSDimitry Andric bool HasBP = false;
11080b57cec5SDimitry Andric uint32_t NumBytes = MFI.getStackSize();
11090b57cec5SDimitry Andric uint32_t RoundedSize = NumBytes;
11105ffd83dbSDimitry Andric
1111bdd1243dSDimitry Andric if (TRI.hasStackRealignment(MF))
1112bdd1243dSDimitry Andric HasFP = true;
1113fe6060f1SDimitry Andric
1114bdd1243dSDimitry Andric Register FramePtrRegScratchCopy;
1115bdd1243dSDimitry Andric if (!HasFP && !hasFP(MF)) {
1116bdd1243dSDimitry Andric // Emit the CSR spill stores with SP base register.
11175f757f3fSDimitry Andric emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits,
11185f757f3fSDimitry Andric FuncInfo->isChainFunction() ? Register() : StackPtrReg,
1119bdd1243dSDimitry Andric FramePtrRegScratchCopy);
1120bdd1243dSDimitry Andric } else {
1121bdd1243dSDimitry Andric // CSR spill stores will use FP as base register.
1122bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy =
1123bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1124fe6060f1SDimitry Andric
11255f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
1126bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) {
1127bdd1243dSDimitry Andric // Copy FP to the scratch register now and emit the CFI entry. It avoids
1128bdd1243dSDimitry Andric // the extra FP copy needed in the other two cases when FP is spilled to
1129bdd1243dSDimitry Andric // memory or to a VGPR lane.
1130bdd1243dSDimitry Andric PrologEpilogSGPRSpillBuilder SB(
1131bdd1243dSDimitry Andric FramePtrReg,
1132bdd1243dSDimitry Andric FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
11335f757f3fSDimitry Andric DL, TII, TRI, LiveUnits, FramePtrReg);
1134bdd1243dSDimitry Andric SB.save();
11355f757f3fSDimitry Andric LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1136bdd1243dSDimitry Andric } else {
1137bdd1243dSDimitry Andric // Copy FP into a new scratch register so that its previous value can be
1138bdd1243dSDimitry Andric // spilled after setting up the new frame.
1139bdd1243dSDimitry Andric FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
11405f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1141bdd1243dSDimitry Andric if (!FramePtrRegScratchCopy)
1142fe6060f1SDimitry Andric report_fatal_error("failed to find free scratch register");
1143fe6060f1SDimitry Andric
11445f757f3fSDimitry Andric LiveUnits.addReg(FramePtrRegScratchCopy);
1145bdd1243dSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
1146bdd1243dSDimitry Andric .addReg(FramePtrReg);
1147fe6060f1SDimitry Andric }
11485ffd83dbSDimitry Andric }
11495ffd83dbSDimitry Andric
1150bdd1243dSDimitry Andric if (HasFP) {
11515ffd83dbSDimitry Andric const unsigned Alignment = MFI.getMaxAlign().value();
11520b57cec5SDimitry Andric
11530b57cec5SDimitry Andric RoundedSize += Alignment;
11545f757f3fSDimitry Andric if (LiveUnits.empty()) {
11555f757f3fSDimitry Andric LiveUnits.init(TRI);
11565f757f3fSDimitry Andric LiveUnits.addLiveIns(MBB);
11570b57cec5SDimitry Andric }
11580b57cec5SDimitry Andric
1159fe6060f1SDimitry Andric // s_add_i32 s33, s32, NumBytes
1160fe6060f1SDimitry Andric // s_and_b32 s33, s33, 0b111...0000
1161fe6060f1SDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
11620b57cec5SDimitry Andric .addReg(StackPtrReg)
1163e8d8bef9SDimitry Andric .addImm((Alignment - 1) * getScratchScaleFactor(ST))
11640b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
1165349cc55cSDimitry Andric auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
1166fe6060f1SDimitry Andric .addReg(FramePtrReg, RegState::Kill)
1167e8d8bef9SDimitry Andric .addImm(-Alignment * getScratchScaleFactor(ST))
11680b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
1169349cc55cSDimitry Andric And->getOperand(3).setIsDead(); // Mark SCC as dead.
11700b57cec5SDimitry Andric FuncInfo->setIsStackRealigned(true);
11710b57cec5SDimitry Andric } else if ((HasFP = hasFP(MF))) {
11725ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
11735ffd83dbSDimitry Andric .addReg(StackPtrReg)
11745ffd83dbSDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
11755ffd83dbSDimitry Andric }
11765ffd83dbSDimitry Andric
1177bdd1243dSDimitry Andric // If FP is used, emit the CSR spills with FP base register.
1178bdd1243dSDimitry Andric if (HasFP) {
11795f757f3fSDimitry Andric emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1180bdd1243dSDimitry Andric FramePtrRegScratchCopy);
1181bdd1243dSDimitry Andric if (FramePtrRegScratchCopy)
11825f757f3fSDimitry Andric LiveUnits.removeReg(FramePtrRegScratchCopy);
1183bdd1243dSDimitry Andric }
1184bdd1243dSDimitry Andric
11850b57cec5SDimitry Andric // If we need a base pointer, set it up here. It's whatever the value of
11860b57cec5SDimitry Andric // the stack pointer is at this point. Any variable size objects will be
11870b57cec5SDimitry Andric // allocated after this, so we can still use the base pointer to reference
11885ffd83dbSDimitry Andric // the incoming arguments.
11895ffd83dbSDimitry Andric if ((HasBP = TRI.hasBasePointer(MF))) {
11905ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)
11910b57cec5SDimitry Andric .addReg(StackPtrReg)
11920b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
11930b57cec5SDimitry Andric }
11940b57cec5SDimitry Andric
11950b57cec5SDimitry Andric if (HasFP && RoundedSize != 0) {
1196349cc55cSDimitry Andric auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
11970b57cec5SDimitry Andric .addReg(StackPtrReg)
1198e8d8bef9SDimitry Andric .addImm(RoundedSize * getScratchScaleFactor(ST))
11990b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameSetup);
1200349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12010b57cec5SDimitry Andric }
12020b57cec5SDimitry Andric
1203bdd1243dSDimitry Andric bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
1204bdd1243dSDimitry Andric (void)FPSaved;
1205bdd1243dSDimitry Andric assert((!HasFP || FPSaved) &&
12060b57cec5SDimitry Andric "Needed to save FP but didn't save it anywhere");
12070b57cec5SDimitry Andric
1208349cc55cSDimitry Andric // If we allow spilling to AGPRs we may have saved FP but then spill
1209349cc55cSDimitry Andric // everything into AGPRs instead of the stack.
1210bdd1243dSDimitry Andric assert((HasFP || !FPSaved || EnableSpillVGPRToAGPR) &&
12110b57cec5SDimitry Andric "Saved FP but didn't need it");
12125ffd83dbSDimitry Andric
1213bdd1243dSDimitry Andric bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);
1214bdd1243dSDimitry Andric (void)BPSaved;
1215bdd1243dSDimitry Andric assert((!HasBP || BPSaved) &&
12165ffd83dbSDimitry Andric "Needed to save BP but didn't save it anywhere");
12175ffd83dbSDimitry Andric
1218bdd1243dSDimitry Andric assert((HasBP || !BPSaved) && "Saved BP but didn't need it");
12190b57cec5SDimitry Andric }
12200b57cec5SDimitry Andric
emitEpilogue(MachineFunction & MF,MachineBasicBlock & MBB) const12210b57cec5SDimitry Andric void SIFrameLowering::emitEpilogue(MachineFunction &MF,
12220b57cec5SDimitry Andric MachineBasicBlock &MBB) const {
12230b57cec5SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
12240b57cec5SDimitry Andric if (FuncInfo->isEntryFunction())
12250b57cec5SDimitry Andric return;
12260b57cec5SDimitry Andric
12270b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
12280b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
12295ffd83dbSDimitry Andric const SIRegisterInfo &TRI = TII->getRegisterInfo();
1230bdd1243dSDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
12315f757f3fSDimitry Andric LiveRegUnits LiveUnits;
1232bdd1243dSDimitry Andric // Get the insert location for the epilogue. If there were no terminators in
1233bdd1243dSDimitry Andric // the block, get the last instruction.
1234bdd1243dSDimitry Andric MachineBasicBlock::iterator MBBI = MBB.end();
12350b57cec5SDimitry Andric DebugLoc DL;
1236bdd1243dSDimitry Andric if (!MBB.empty()) {
1237bdd1243dSDimitry Andric MBBI = MBB.getLastNonDebugInstr();
1238bdd1243dSDimitry Andric if (MBBI != MBB.end())
1239bdd1243dSDimitry Andric DL = MBBI->getDebugLoc();
1240bdd1243dSDimitry Andric
1241bdd1243dSDimitry Andric MBBI = MBB.getFirstTerminator();
1242bdd1243dSDimitry Andric }
12430b57cec5SDimitry Andric
12440b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
12450b57cec5SDimitry Andric uint32_t NumBytes = MFI.getStackSize();
12465ffd83dbSDimitry Andric uint32_t RoundedSize = FuncInfo->isStackRealigned()
12475ffd83dbSDimitry Andric ? NumBytes + MFI.getMaxAlign().value()
12485ffd83dbSDimitry Andric : NumBytes;
12495ffd83dbSDimitry Andric const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();
1250bdd1243dSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
1251bdd1243dSDimitry Andric bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);
12525ffd83dbSDimitry Andric
1253bdd1243dSDimitry Andric Register FramePtrRegScratchCopy;
1254bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy =
1255bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1256bdd1243dSDimitry Andric if (FPSaved) {
1257bdd1243dSDimitry Andric // CSR spill restores should use FP as base register. If
1258bdd1243dSDimitry Andric // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
1259bdd1243dSDimitry Andric // into a new scratch register and copy to FP later when other registers are
1260bdd1243dSDimitry Andric // restored from the current stack frame.
12615f757f3fSDimitry Andric initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
1262bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy) {
12635f757f3fSDimitry Andric LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
1264bdd1243dSDimitry Andric } else {
1265bdd1243dSDimitry Andric FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
12665f757f3fSDimitry Andric MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
1267bdd1243dSDimitry Andric if (!FramePtrRegScratchCopy)
1268bdd1243dSDimitry Andric report_fatal_error("failed to find free scratch register");
1269bdd1243dSDimitry Andric
12705f757f3fSDimitry Andric LiveUnits.addReg(FramePtrRegScratchCopy);
1271bdd1243dSDimitry Andric }
1272bdd1243dSDimitry Andric
12735f757f3fSDimitry Andric emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1274bdd1243dSDimitry Andric FramePtrRegScratchCopy);
1275bdd1243dSDimitry Andric }
12760b57cec5SDimitry Andric
12770b57cec5SDimitry Andric if (RoundedSize != 0 && hasFP(MF)) {
1278349cc55cSDimitry Andric auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
12790b57cec5SDimitry Andric .addReg(StackPtrReg)
1280fe6060f1SDimitry Andric .addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
12810b57cec5SDimitry Andric .setMIFlag(MachineInstr::FrameDestroy);
1282349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead.
12830b57cec5SDimitry Andric }
12840b57cec5SDimitry Andric
1285bdd1243dSDimitry Andric if (FPSaved) {
1286bdd1243dSDimitry Andric // Insert the copy to restore FP.
1287bdd1243dSDimitry Andric Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
1288bdd1243dSDimitry Andric : FramePtrRegScratchCopy;
1289bdd1243dSDimitry Andric MachineInstrBuilder MIB =
12905ffd83dbSDimitry Andric BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
1291bdd1243dSDimitry Andric .addReg(SrcReg);
1292bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy)
1293bdd1243dSDimitry Andric MIB.setMIFlag(MachineInstr::FrameDestroy);
1294bdd1243dSDimitry Andric } else {
1295bdd1243dSDimitry Andric // Insert the CSR spill restores with SP as the base register.
12965f757f3fSDimitry Andric emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
1297bdd1243dSDimitry Andric FramePtrRegScratchCopy);
12980b57cec5SDimitry Andric }
12990b57cec5SDimitry Andric }
13000b57cec5SDimitry Andric
13010b57cec5SDimitry Andric #ifndef NDEBUG
allSGPRSpillsAreDead(const MachineFunction & MF)1302e8d8bef9SDimitry Andric static bool allSGPRSpillsAreDead(const MachineFunction &MF) {
1303e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
1304e8d8bef9SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13050b57cec5SDimitry Andric for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
13060b57cec5SDimitry Andric I != E; ++I) {
13070b57cec5SDimitry Andric if (!MFI.isDeadObjectIndex(I) &&
13080b57cec5SDimitry Andric MFI.getStackID(I) == TargetStackID::SGPRSpill &&
1309bdd1243dSDimitry Andric !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
13100b57cec5SDimitry Andric return false;
13110b57cec5SDimitry Andric }
13120b57cec5SDimitry Andric }
13130b57cec5SDimitry Andric
13140b57cec5SDimitry Andric return true;
13150b57cec5SDimitry Andric }
13160b57cec5SDimitry Andric #endif
13170b57cec5SDimitry Andric
getFrameIndexReference(const MachineFunction & MF,int FI,Register & FrameReg) const1318e8d8bef9SDimitry Andric StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,
1319e8d8bef9SDimitry Andric int FI,
13205ffd83dbSDimitry Andric Register &FrameReg) const {
13210b57cec5SDimitry Andric const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
13220b57cec5SDimitry Andric
13230b57cec5SDimitry Andric FrameReg = RI->getFrameRegister(MF);
1324e8d8bef9SDimitry Andric return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));
13250b57cec5SDimitry Andric }
13260b57cec5SDimitry Andric
processFunctionBeforeFrameFinalized(MachineFunction & MF,RegScavenger * RS) const13270b57cec5SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameFinalized(
13280b57cec5SDimitry Andric MachineFunction &MF,
13290b57cec5SDimitry Andric RegScavenger *RS) const {
13300b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo();
13310b57cec5SDimitry Andric
13320b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1333fe6060f1SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
13340b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
1335fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
13360b57cec5SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
13370b57cec5SDimitry Andric
1338bdd1243dSDimitry Andric // Allocate spill slots for WWM reserved VGPRs.
13395f757f3fSDimitry Andric // For chain functions, we only need to do this if we have calls to
13405f757f3fSDimitry Andric // llvm.amdgcn.cs.chain.
13415f757f3fSDimitry Andric bool IsChainWithoutCalls =
13425f757f3fSDimitry Andric FuncInfo->isChainFunction() && !MF.getFrameInfo().hasTailCall();
13435f757f3fSDimitry Andric if (!FuncInfo->isEntryFunction() && !IsChainWithoutCalls) {
1344bdd1243dSDimitry Andric for (Register Reg : FuncInfo->getWWMReservedRegs()) {
1345bdd1243dSDimitry Andric const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
1346bdd1243dSDimitry Andric FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
1347bdd1243dSDimitry Andric TRI->getSpillAlign(*RC));
1348bdd1243dSDimitry Andric }
134981ad6265SDimitry Andric }
135081ad6265SDimitry Andric
1351fe6060f1SDimitry Andric const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
1352fe6060f1SDimitry Andric && EnableSpillVGPRToAGPR;
1353fe6060f1SDimitry Andric
1354fe6060f1SDimitry Andric if (SpillVGPRToAGPR) {
1355fe6060f1SDimitry Andric // To track the spill frame indices handled in this pass.
1356fe6060f1SDimitry Andric BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
13570eae32dcSDimitry Andric BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);
1358fe6060f1SDimitry Andric
1359fe6060f1SDimitry Andric bool SeenDbgInstr = false;
1360fe6060f1SDimitry Andric
1361fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) {
1362349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
13630eae32dcSDimitry Andric int FrameIndex;
1364fe6060f1SDimitry Andric if (MI.isDebugInstr())
1365fe6060f1SDimitry Andric SeenDbgInstr = true;
1366fe6060f1SDimitry Andric
1367fe6060f1SDimitry Andric if (TII->isVGPRSpill(MI)) {
1368fe6060f1SDimitry Andric // Try to eliminate stack used by VGPR spills before frame
1369fe6060f1SDimitry Andric // finalization.
1370fe6060f1SDimitry Andric unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1371fe6060f1SDimitry Andric AMDGPU::OpName::vaddr);
1372fe6060f1SDimitry Andric int FI = MI.getOperand(FIOp).getIndex();
1373fe6060f1SDimitry Andric Register VReg =
1374fe6060f1SDimitry Andric TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
1375fe6060f1SDimitry Andric if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
1376fe6060f1SDimitry Andric TRI->isAGPR(MRI, VReg))) {
137706c3fb27SDimitry Andric assert(RS != nullptr);
13785f757f3fSDimitry Andric RS->enterBasicBlockEnd(MBB);
13795f757f3fSDimitry Andric RS->backward(std::next(MI.getIterator()));
1380fe6060f1SDimitry Andric TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
1381fe6060f1SDimitry Andric SpillFIs.set(FI);
1382fe6060f1SDimitry Andric continue;
1383fe6060f1SDimitry Andric }
13840eae32dcSDimitry Andric } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
13850eae32dcSDimitry Andric TII->isLoadFromStackSlot(MI, FrameIndex))
138604eeddc0SDimitry Andric if (!MFI.isFixedObjectIndex(FrameIndex))
13870eae32dcSDimitry Andric NonVGPRSpillFIs.set(FrameIndex);
1388fe6060f1SDimitry Andric }
1389fe6060f1SDimitry Andric }
13900eae32dcSDimitry Andric
139181ad6265SDimitry Andric // Stack slot coloring may assign different objects to the same stack slot.
13920eae32dcSDimitry Andric // If not, then the VGPR to AGPR spill slot is dead.
13930eae32dcSDimitry Andric for (unsigned FI : SpillFIs.set_bits())
13940eae32dcSDimitry Andric if (!NonVGPRSpillFIs.test(FI))
13950eae32dcSDimitry Andric FuncInfo->setVGPRToAGPRSpillDead(FI);
1396fe6060f1SDimitry Andric
1397fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : MF) {
1398fe6060f1SDimitry Andric for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
1399fe6060f1SDimitry Andric MBB.addLiveIn(Reg);
1400fe6060f1SDimitry Andric
1401fe6060f1SDimitry Andric for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
1402fe6060f1SDimitry Andric MBB.addLiveIn(Reg);
1403fe6060f1SDimitry Andric
1404fe6060f1SDimitry Andric MBB.sortUniqueLiveIns();
1405fe6060f1SDimitry Andric
1406fe6060f1SDimitry Andric if (!SpillFIs.empty() && SeenDbgInstr) {
1407fe6060f1SDimitry Andric // FIXME: The dead frame indices are replaced with a null register from
1408fe6060f1SDimitry Andric // the debug value instructions. We should instead, update it with the
1409fe6060f1SDimitry Andric // correct register value. But not sure the register value alone is
1410fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) {
1411fe6060f1SDimitry Andric if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
1412bdd1243dSDimitry Andric !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
1413fe6060f1SDimitry Andric SpillFIs[MI.getOperand(0).getIndex()]) {
1414fe6060f1SDimitry Andric MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
1415fe6060f1SDimitry Andric }
1416fe6060f1SDimitry Andric }
1417fe6060f1SDimitry Andric }
1418fe6060f1SDimitry Andric }
1419fe6060f1SDimitry Andric }
1420fe6060f1SDimitry Andric
142181ad6265SDimitry Andric // At this point we've already allocated all spilled SGPRs to VGPRs if we
142281ad6265SDimitry Andric // can. Any remaining SGPR spills will go to memory, so move them back to the
142381ad6265SDimitry Andric // default stack.
142481ad6265SDimitry Andric bool HaveSGPRToVMemSpill =
142581ad6265SDimitry Andric FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1426e8d8bef9SDimitry Andric assert(allSGPRSpillsAreDead(MF) &&
14270b57cec5SDimitry Andric "SGPR spill should have been removed in SILowerSGPRSpills");
14280b57cec5SDimitry Andric
14290b57cec5SDimitry Andric // FIXME: The other checks should be redundant with allStackObjectsAreDead,
14300b57cec5SDimitry Andric // but currently hasNonSpillStackObjects is set only from source
14310b57cec5SDimitry Andric // allocas. Stack temps produced from legalization are not counted currently.
14320b57cec5SDimitry Andric if (!allStackObjectsAreDead(MFI)) {
14330b57cec5SDimitry Andric assert(RS && "RegScavenger required if spilling");
14340b57cec5SDimitry Andric
1435fe6060f1SDimitry Andric // Add an emergency spill slot
1436fe6060f1SDimitry Andric RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));
143781ad6265SDimitry Andric
143881ad6265SDimitry Andric // If we are spilling SGPRs to memory with a large frame, we may need a
143981ad6265SDimitry Andric // second VGPR emergency frame index.
144081ad6265SDimitry Andric if (HaveSGPRToVMemSpill &&
144181ad6265SDimitry Andric allocateScavengingFrameIndexesNearIncomingSP(MF)) {
144281ad6265SDimitry Andric RS->addScavengingFrameIndex(MFI.CreateStackObject(4, Align(4), false));
144381ad6265SDimitry Andric }
144481ad6265SDimitry Andric }
144581ad6265SDimitry Andric }
144681ad6265SDimitry Andric
processFunctionBeforeFrameIndicesReplaced(MachineFunction & MF,RegScavenger * RS) const144781ad6265SDimitry Andric void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
144881ad6265SDimitry Andric MachineFunction &MF, RegScavenger *RS) const {
144981ad6265SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
145081ad6265SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
145181ad6265SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
145281ad6265SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
145381ad6265SDimitry Andric
145481ad6265SDimitry Andric if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
145581ad6265SDimitry Andric // On gfx908, we had initially reserved highest available VGPR for AGPR
145681ad6265SDimitry Andric // copy. Now since we are done with RA, check if there exist an unused VGPR
145781ad6265SDimitry Andric // which is lower than the eariler reserved VGPR before RA. If one exist,
145881ad6265SDimitry Andric // use it for AGPR copy instead of one reserved before RA.
145981ad6265SDimitry Andric Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();
146081ad6265SDimitry Andric Register UnusedLowVGPR =
146181ad6265SDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
146281ad6265SDimitry Andric if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
146381ad6265SDimitry Andric TRI->getHWRegIndex(VGPRForAGPRCopy))) {
146406c3fb27SDimitry Andric // Reserve this newly identified VGPR (for AGPR copy)
146506c3fb27SDimitry Andric // reserved registers should already be frozen at this point
146606c3fb27SDimitry Andric // so we can avoid calling MRI.freezeReservedRegs and just use
146706c3fb27SDimitry Andric // MRI.reserveReg
146881ad6265SDimitry Andric FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
146906c3fb27SDimitry Andric MRI.reserveReg(UnusedLowVGPR, TRI);
147081ad6265SDimitry Andric }
14710b57cec5SDimitry Andric }
147206c3fb27SDimitry Andric // We initally reserved the highest available SGPR pair for long branches
147306c3fb27SDimitry Andric // now, after RA, we shift down to a lower unused one if one exists
147406c3fb27SDimitry Andric Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
147506c3fb27SDimitry Andric Register UnusedLowSGPR =
147606c3fb27SDimitry Andric TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
147706c3fb27SDimitry Andric // If LongBranchReservedReg is null then we didn't find a long branch
147806c3fb27SDimitry Andric // and never reserved a register to begin with so there is nothing to
147906c3fb27SDimitry Andric // shift down. Then if UnusedLowSGPR is null, there isn't available lower
148006c3fb27SDimitry Andric // register to use so just keep the original one we set.
148106c3fb27SDimitry Andric if (LongBranchReservedReg && UnusedLowSGPR) {
148206c3fb27SDimitry Andric FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
148306c3fb27SDimitry Andric MRI.reserveReg(UnusedLowSGPR, TRI);
148406c3fb27SDimitry Andric }
14850b57cec5SDimitry Andric }
14860b57cec5SDimitry Andric
1487bdd1243dSDimitry Andric // The special SGPR spills like the one needed for FP, BP or any reserved
1488bdd1243dSDimitry Andric // registers delayed until frame lowering.
determinePrologEpilogSGPRSaves(MachineFunction & MF,BitVector & SavedVGPRs,bool NeedExecCopyReservedReg) const1489bdd1243dSDimitry Andric void SIFrameLowering::determinePrologEpilogSGPRSaves(
149006c3fb27SDimitry Andric MachineFunction &MF, BitVector &SavedVGPRs,
149106c3fb27SDimitry Andric bool NeedExecCopyReservedReg) const {
14925ffd83dbSDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
149306c3fb27SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
1494bdd1243dSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
14950b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
14960b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
14975f757f3fSDimitry Andric LiveRegUnits LiveUnits;
14985f757f3fSDimitry Andric LiveUnits.init(*TRI);
1499bdd1243dSDimitry Andric // Initially mark callee saved registers as used so we will not choose them
1500bdd1243dSDimitry Andric // while looking for scratch SGPRs.
1501bdd1243dSDimitry Andric const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
1502bdd1243dSDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I)
15035f757f3fSDimitry Andric LiveUnits.addReg(CSRegs[I]);
15040b57cec5SDimitry Andric
150506c3fb27SDimitry Andric const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
150606c3fb27SDimitry Andric
150706c3fb27SDimitry Andric if (NeedExecCopyReservedReg) {
150806c3fb27SDimitry Andric Register ReservedReg = MFI->getSGPRForEXECCopy();
150906c3fb27SDimitry Andric assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
15105f757f3fSDimitry Andric Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
151106c3fb27SDimitry Andric if (UnusedScratchReg) {
151206c3fb27SDimitry Andric // If found any unused scratch SGPR, reserve the register itself for Exec
151306c3fb27SDimitry Andric // copy and there is no need for any spills in that case.
151406c3fb27SDimitry Andric MFI->setSGPRForEXECCopy(UnusedScratchReg);
15155f757f3fSDimitry Andric LiveUnits.addReg(UnusedScratchReg);
151606c3fb27SDimitry Andric } else {
151706c3fb27SDimitry Andric // Needs spill.
151806c3fb27SDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
151906c3fb27SDimitry Andric "Re-reserving spill slot for EXEC copy register");
15205f757f3fSDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedReg, RC,
152106c3fb27SDimitry Andric /*IncludeScratchCopy=*/false);
152206c3fb27SDimitry Andric }
152306c3fb27SDimitry Andric }
152406c3fb27SDimitry Andric
15250b57cec5SDimitry Andric // hasFP only knows about stack objects that already exist. We're now
15260b57cec5SDimitry Andric // determining the stack slots that will be created, so we have to predict
15270b57cec5SDimitry Andric // them. Stack objects force FP usage with calls.
15280b57cec5SDimitry Andric //
15290b57cec5SDimitry Andric // Note a new VGPR CSR may be introduced if one is used for the spill, but we
15300b57cec5SDimitry Andric // don't want to report it here.
15310b57cec5SDimitry Andric //
15320b57cec5SDimitry Andric // FIXME: Is this really hasReservedCallFrame?
15330b57cec5SDimitry Andric const bool WillHaveFP =
15340b57cec5SDimitry Andric FrameInfo.hasCalls() &&
15350b57cec5SDimitry Andric (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
15360b57cec5SDimitry Andric
15375ffd83dbSDimitry Andric if (WillHaveFP || hasFP(MF)) {
1538bdd1243dSDimitry Andric Register FramePtrReg = MFI->getFrameOffsetReg();
1539bdd1243dSDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
1540e8d8bef9SDimitry Andric "Re-reserving spill slot for FP");
15415f757f3fSDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
15420b57cec5SDimitry Andric }
15430b57cec5SDimitry Andric
15445ffd83dbSDimitry Andric if (TRI->hasBasePointer(MF)) {
1545bdd1243dSDimitry Andric Register BasePtrReg = TRI->getBaseRegister();
1546bdd1243dSDimitry Andric assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
1547bdd1243dSDimitry Andric "Re-reserving spill slot for BP");
15485f757f3fSDimitry Andric getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
1549bdd1243dSDimitry Andric }
1550bdd1243dSDimitry Andric }
1551e8d8bef9SDimitry Andric
1552bdd1243dSDimitry Andric // Only report VGPRs to generic code.
determineCalleeSaves(MachineFunction & MF,BitVector & SavedVGPRs,RegScavenger * RS) const1553bdd1243dSDimitry Andric void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
1554bdd1243dSDimitry Andric BitVector &SavedVGPRs,
1555bdd1243dSDimitry Andric RegScavenger *RS) const {
1556bdd1243dSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
15575f757f3fSDimitry Andric
15585f757f3fSDimitry Andric // If this is a function with the amdgpu_cs_chain[_preserve] calling
15595f757f3fSDimitry Andric // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then
15605f757f3fSDimitry Andric // we don't need to save and restore anything.
15615f757f3fSDimitry Andric if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())
15625f757f3fSDimitry Andric return;
15635f757f3fSDimitry Andric
15647a6dacacSDimitry Andric MFI->shiftSpillPhysVGPRsToLowestRange(MF);
15657a6dacacSDimitry Andric
15665f757f3fSDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);
1567bdd1243dSDimitry Andric if (MFI->isEntryFunction())
1568bdd1243dSDimitry Andric return;
1569bdd1243dSDimitry Andric
1570bdd1243dSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1571bdd1243dSDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
157206c3fb27SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
157306c3fb27SDimitry Andric bool NeedExecCopyReservedReg = false;
1574bdd1243dSDimitry Andric
157506c3fb27SDimitry Andric MachineInstr *ReturnMI = nullptr;
1576bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) {
1577bdd1243dSDimitry Andric for (MachineInstr &MI : MBB) {
1578bdd1243dSDimitry Andric // WRITELANE instructions used for SGPR spills can overwrite the inactive
1579bdd1243dSDimitry Andric // lanes of VGPRs and callee must spill and restore them even if they are
1580bdd1243dSDimitry Andric // marked Caller-saved.
1581bdd1243dSDimitry Andric
1582bdd1243dSDimitry Andric // TODO: Handle this elsewhere at an early point. Walking through all MBBs
1583bdd1243dSDimitry Andric // here would be a bad heuristic. A better way should be by calling
1584bdd1243dSDimitry Andric // allocateWWMSpill during the regalloc pipeline whenever a physical
15855f757f3fSDimitry Andric // register is allocated for the intended virtual registers.
15865f757f3fSDimitry Andric if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
1587bdd1243dSDimitry Andric MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
15885f757f3fSDimitry Andric else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
1589bdd1243dSDimitry Andric MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
159006c3fb27SDimitry Andric else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
159106c3fb27SDimitry Andric NeedExecCopyReservedReg = true;
159206c3fb27SDimitry Andric else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
15935f757f3fSDimitry Andric MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
15945f757f3fSDimitry Andric (MFI->isChainFunction() &&
15955f757f3fSDimitry Andric TII->isChainCallOpcode(MI.getOpcode()))) {
159606c3fb27SDimitry Andric // We expect all return to be the same size.
159706c3fb27SDimitry Andric assert(!ReturnMI ||
159806c3fb27SDimitry Andric (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
159906c3fb27SDimitry Andric count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
160006c3fb27SDimitry Andric ReturnMI = &MI;
160106c3fb27SDimitry Andric }
160206c3fb27SDimitry Andric }
160306c3fb27SDimitry Andric }
160406c3fb27SDimitry Andric
160506c3fb27SDimitry Andric // Remove any VGPRs used in the return value because these do not need to be saved.
160606c3fb27SDimitry Andric // This prevents CSR restore from clobbering return VGPRs.
160706c3fb27SDimitry Andric if (ReturnMI) {
160806c3fb27SDimitry Andric for (auto &Op : ReturnMI->operands()) {
160906c3fb27SDimitry Andric if (Op.isReg())
161006c3fb27SDimitry Andric SavedVGPRs.reset(Op.getReg());
1611bdd1243dSDimitry Andric }
1612bdd1243dSDimitry Andric }
1613bdd1243dSDimitry Andric
1614bdd1243dSDimitry Andric // Ignore the SGPRs the default implementation found.
1615bdd1243dSDimitry Andric SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());
1616bdd1243dSDimitry Andric
1617bdd1243dSDimitry Andric // Do not save AGPRs prior to GFX90A because there was no easy way to do so.
1618bdd1243dSDimitry Andric // In gfx908 there was do AGPR loads and stores and thus spilling also
1619bdd1243dSDimitry Andric // require a temporary VGPR.
1620bdd1243dSDimitry Andric if (!ST.hasGFX90AInsts())
1621bdd1243dSDimitry Andric SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
1622bdd1243dSDimitry Andric
162306c3fb27SDimitry Andric determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
1624bdd1243dSDimitry Andric
1625bdd1243dSDimitry Andric // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
1626bdd1243dSDimitry Andric // allow the default insertion to handle them.
1627bdd1243dSDimitry Andric for (auto &Reg : MFI->getWWMSpills())
1628bdd1243dSDimitry Andric SavedVGPRs.reset(Reg.first);
1629bdd1243dSDimitry Andric
1630bdd1243dSDimitry Andric // Mark all lane VGPRs as BB LiveIns.
1631bdd1243dSDimitry Andric for (MachineBasicBlock &MBB : MF) {
1632bdd1243dSDimitry Andric for (auto &Reg : MFI->getWWMSpills())
1633bdd1243dSDimitry Andric MBB.addLiveIn(Reg.first);
1634bdd1243dSDimitry Andric
1635bdd1243dSDimitry Andric MBB.sortUniqueLiveIns();
16360b57cec5SDimitry Andric }
16370b57cec5SDimitry Andric }
16380b57cec5SDimitry Andric
determineCalleeSavesSGPR(MachineFunction & MF,BitVector & SavedRegs,RegScavenger * RS) const16390b57cec5SDimitry Andric void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
16400b57cec5SDimitry Andric BitVector &SavedRegs,
16410b57cec5SDimitry Andric RegScavenger *RS) const {
16420b57cec5SDimitry Andric TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
16430b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
16440b57cec5SDimitry Andric if (MFI->isEntryFunction())
16450b57cec5SDimitry Andric return;
16460b57cec5SDimitry Andric
16470b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
16480b57cec5SDimitry Andric const SIRegisterInfo *TRI = ST.getRegisterInfo();
16490b57cec5SDimitry Andric
16500b57cec5SDimitry Andric // The SP is specifically managed and we don't want extra spills of it.
16510b57cec5SDimitry Andric SavedRegs.reset(MFI->getStackPtrOffsetReg());
1652e8d8bef9SDimitry Andric
1653e8d8bef9SDimitry Andric const BitVector AllSavedRegs = SavedRegs;
1654fe6060f1SDimitry Andric SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
1655e8d8bef9SDimitry Andric
1656349cc55cSDimitry Andric // We have to anticipate introducing CSR VGPR spills or spill of caller
1657349cc55cSDimitry Andric // save VGPR reserved for SGPR spills as we now always create stack entry
165804eeddc0SDimitry Andric // for it, if we don't have any stack objects already, since we require a FP
165904eeddc0SDimitry Andric // if there is a call and stack. We will allocate a VGPR for SGPR spills if
166004eeddc0SDimitry Andric // there are any SGPR spills. Whether they are CSR spills or otherwise.
1661e8d8bef9SDimitry Andric MachineFrameInfo &FrameInfo = MF.getFrameInfo();
1662349cc55cSDimitry Andric const bool WillHaveFP =
166304eeddc0SDimitry Andric FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
1664e8d8bef9SDimitry Andric
1665e8d8bef9SDimitry Andric // FP will be specially managed like SP.
1666e8d8bef9SDimitry Andric if (WillHaveFP || hasFP(MF))
1667e8d8bef9SDimitry Andric SavedRegs.reset(MFI->getFrameOffsetReg());
166881ad6265SDimitry Andric
166981ad6265SDimitry Andric // Return address use with return instruction is hidden through the SI_RETURN
167081ad6265SDimitry Andric // pseudo. Given that and since the IPRA computes actual register usage and
167181ad6265SDimitry Andric // does not use CSR list, the clobbering of return address by function calls
167281ad6265SDimitry Andric // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register
167381ad6265SDimitry Andric // usage collection. This will ensure save/restore of return address happens
167481ad6265SDimitry Andric // in those scenarios.
167581ad6265SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo();
167681ad6265SDimitry Andric Register RetAddrReg = TRI->getReturnAddressReg(MF);
167781ad6265SDimitry Andric if (!MFI->isEntryFunction() &&
167881ad6265SDimitry Andric (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {
167981ad6265SDimitry Andric SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));
168081ad6265SDimitry Andric SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));
168181ad6265SDimitry Andric }
16820b57cec5SDimitry Andric }
16830b57cec5SDimitry Andric
assignCalleeSavedSpillSlots(MachineFunction & MF,const TargetRegisterInfo * TRI,std::vector<CalleeSavedInfo> & CSI) const16840b57cec5SDimitry Andric bool SIFrameLowering::assignCalleeSavedSpillSlots(
16850b57cec5SDimitry Andric MachineFunction &MF, const TargetRegisterInfo *TRI,
16860b57cec5SDimitry Andric std::vector<CalleeSavedInfo> &CSI) const {
16870b57cec5SDimitry Andric if (CSI.empty())
16880b57cec5SDimitry Andric return true; // Early exit if no callee saved registers are modified!
16890b57cec5SDimitry Andric
16900b57cec5SDimitry Andric const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
16915ffd83dbSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
16925ffd83dbSDimitry Andric const SIRegisterInfo *RI = ST.getRegisterInfo();
16935ffd83dbSDimitry Andric Register FramePtrReg = FuncInfo->getFrameOffsetReg();
16945ffd83dbSDimitry Andric Register BasePtrReg = RI->getBaseRegister();
1695bdd1243dSDimitry Andric Register SGPRForFPSaveRestoreCopy =
1696bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);
1697bdd1243dSDimitry Andric Register SGPRForBPSaveRestoreCopy =
1698bdd1243dSDimitry Andric FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);
1699bdd1243dSDimitry Andric if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)
1700bdd1243dSDimitry Andric return false;
1701bdd1243dSDimitry Andric
17025ffd83dbSDimitry Andric unsigned NumModifiedRegs = 0;
17035ffd83dbSDimitry Andric
1704bdd1243dSDimitry Andric if (SGPRForFPSaveRestoreCopy)
17055ffd83dbSDimitry Andric NumModifiedRegs++;
1706bdd1243dSDimitry Andric if (SGPRForBPSaveRestoreCopy)
17075ffd83dbSDimitry Andric NumModifiedRegs++;
17085ffd83dbSDimitry Andric
17090b57cec5SDimitry Andric for (auto &CS : CSI) {
1710bdd1243dSDimitry Andric if (CS.getReg() == FramePtrReg && SGPRForFPSaveRestoreCopy) {
1711bdd1243dSDimitry Andric CS.setDstReg(SGPRForFPSaveRestoreCopy);
17125ffd83dbSDimitry Andric if (--NumModifiedRegs)
17135ffd83dbSDimitry Andric break;
1714bdd1243dSDimitry Andric } else if (CS.getReg() == BasePtrReg && SGPRForBPSaveRestoreCopy) {
1715bdd1243dSDimitry Andric CS.setDstReg(SGPRForBPSaveRestoreCopy);
17165ffd83dbSDimitry Andric if (--NumModifiedRegs)
17170b57cec5SDimitry Andric break;
17180b57cec5SDimitry Andric }
17190b57cec5SDimitry Andric }
17200b57cec5SDimitry Andric
17210b57cec5SDimitry Andric return false;
17220b57cec5SDimitry Andric }
17230b57cec5SDimitry Andric
allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction & MF) const17244824e7fdSDimitry Andric bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
17254824e7fdSDimitry Andric const MachineFunction &MF) const {
17264824e7fdSDimitry Andric
17274824e7fdSDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17284824e7fdSDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
17295f757f3fSDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
17304824e7fdSDimitry Andric uint64_t EstStackSize = MFI.estimateStackSize(MF);
17314824e7fdSDimitry Andric uint64_t MaxOffset = EstStackSize - 1;
17324824e7fdSDimitry Andric
17334824e7fdSDimitry Andric // We need the emergency stack slots to be allocated in range of the
17344824e7fdSDimitry Andric // MUBUF/flat scratch immediate offset from the base register, so assign these
17354824e7fdSDimitry Andric // first at the incoming SP position.
17364824e7fdSDimitry Andric //
17374824e7fdSDimitry Andric // TODO: We could try sorting the objects to find a hole in the first bytes
17384824e7fdSDimitry Andric // rather than allocating as close to possible. This could save a lot of space
17394824e7fdSDimitry Andric // on frames with alignment requirements.
17404824e7fdSDimitry Andric if (ST.enableFlatScratch()) {
17414824e7fdSDimitry Andric if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
17424824e7fdSDimitry Andric SIInstrFlags::FlatScratch))
17434824e7fdSDimitry Andric return false;
17444824e7fdSDimitry Andric } else {
17455f757f3fSDimitry Andric if (TII->isLegalMUBUFImmOffset(MaxOffset))
17464824e7fdSDimitry Andric return false;
17474824e7fdSDimitry Andric }
17484824e7fdSDimitry Andric
17494824e7fdSDimitry Andric return true;
17504824e7fdSDimitry Andric }
17514824e7fdSDimitry Andric
eliminateCallFramePseudoInstr(MachineFunction & MF,MachineBasicBlock & MBB,MachineBasicBlock::iterator I) const17520b57cec5SDimitry Andric MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
17530b57cec5SDimitry Andric MachineFunction &MF,
17540b57cec5SDimitry Andric MachineBasicBlock &MBB,
17550b57cec5SDimitry Andric MachineBasicBlock::iterator I) const {
17560b57cec5SDimitry Andric int64_t Amount = I->getOperand(0).getImm();
17570b57cec5SDimitry Andric if (Amount == 0)
17580b57cec5SDimitry Andric return MBB.erase(I);
17590b57cec5SDimitry Andric
17600b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
17610b57cec5SDimitry Andric const SIInstrInfo *TII = ST.getInstrInfo();
17620b57cec5SDimitry Andric const DebugLoc &DL = I->getDebugLoc();
17630b57cec5SDimitry Andric unsigned Opc = I->getOpcode();
17640b57cec5SDimitry Andric bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
17650b57cec5SDimitry Andric uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
17660b57cec5SDimitry Andric
17670b57cec5SDimitry Andric if (!hasReservedCallFrame(MF)) {
17685ffd83dbSDimitry Andric Amount = alignTo(Amount, getStackAlign());
17690b57cec5SDimitry Andric assert(isUInt<32>(Amount) && "exceeded stack address space size");
17700b57cec5SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
17715ffd83dbSDimitry Andric Register SPReg = MFI->getStackPtrOffsetReg();
17720b57cec5SDimitry Andric
1773fe6060f1SDimitry Andric Amount *= getScratchScaleFactor(ST);
1774fe6060f1SDimitry Andric if (IsDestroy)
1775fe6060f1SDimitry Andric Amount = -Amount;
1776349cc55cSDimitry Andric auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
17770b57cec5SDimitry Andric .addReg(SPReg)
1778fe6060f1SDimitry Andric .addImm(Amount);
1779349cc55cSDimitry Andric Add->getOperand(3).setIsDead(); // Mark SCC as dead.
17800b57cec5SDimitry Andric } else if (CalleePopAmount != 0) {
17810b57cec5SDimitry Andric llvm_unreachable("is this used?");
17820b57cec5SDimitry Andric }
17830b57cec5SDimitry Andric
17840b57cec5SDimitry Andric return MBB.erase(I);
17850b57cec5SDimitry Andric }
17860b57cec5SDimitry Andric
1787e8d8bef9SDimitry Andric /// Returns true if the frame will require a reference to the stack pointer.
1788e8d8bef9SDimitry Andric ///
1789e8d8bef9SDimitry Andric /// This is the set of conditions common to setting up the stack pointer in a
1790e8d8bef9SDimitry Andric /// kernel, and for using a frame pointer in a callable function.
1791e8d8bef9SDimitry Andric ///
1792e8d8bef9SDimitry Andric /// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm
1793e8d8bef9SDimitry Andric /// references SP.
frameTriviallyRequiresSP(const MachineFrameInfo & MFI)1794e8d8bef9SDimitry Andric static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {
1795e8d8bef9SDimitry Andric return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();
1796e8d8bef9SDimitry Andric }
1797e8d8bef9SDimitry Andric
1798e8d8bef9SDimitry Andric // The FP for kernels is always known 0, so we never really need to setup an
1799e8d8bef9SDimitry Andric // explicit register for it. However, DisableFramePointerElim will force us to
1800e8d8bef9SDimitry Andric // use a register for it.
hasFP(const MachineFunction & MF) const18010b57cec5SDimitry Andric bool SIFrameLowering::hasFP(const MachineFunction &MF) const {
18020b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
18035ffd83dbSDimitry Andric
18045f757f3fSDimitry Andric // For entry & chain functions we can use an immediate offset in most cases,
18055f757f3fSDimitry Andric // so the presence of calls doesn't imply we need a distinct frame pointer.
18065ffd83dbSDimitry Andric if (MFI.hasCalls() &&
18075f757f3fSDimitry Andric !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&
18085f757f3fSDimitry Andric !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) {
18090b57cec5SDimitry Andric // All offsets are unsigned, so need to be addressed in the same direction
18100b57cec5SDimitry Andric // as stack growth.
18110b57cec5SDimitry Andric
18120b57cec5SDimitry Andric // FIXME: This function is pretty broken, since it can be called before the
18130b57cec5SDimitry Andric // frame layout is determined or CSR spills are inserted.
18145ffd83dbSDimitry Andric return MFI.getStackSize() != 0;
18150b57cec5SDimitry Andric }
18160b57cec5SDimitry Andric
1817e8d8bef9SDimitry Andric return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||
1818fe6060f1SDimitry Andric MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(
1819fe6060f1SDimitry Andric MF) ||
18200b57cec5SDimitry Andric MF.getTarget().Options.DisableFramePointerElim(MF);
18210b57cec5SDimitry Andric }
1822e8d8bef9SDimitry Andric
1823e8d8bef9SDimitry Andric // This is essentially a reduced version of hasFP for entry functions. Since the
1824e8d8bef9SDimitry Andric // stack pointer is known 0 on entry to kernels, we never really need an FP
1825e8d8bef9SDimitry Andric // register. We may need to initialize the stack pointer depending on the frame
1826e8d8bef9SDimitry Andric // properties, which logically overlaps many of the cases where an ordinary
1827e8d8bef9SDimitry Andric // function would require an FP.
18285f757f3fSDimitry Andric // Also used for chain functions. While not technically entry functions, chain
18295f757f3fSDimitry Andric // functions may need to set up a stack pointer in some situations.
requiresStackPointerReference(const MachineFunction & MF) const1830e8d8bef9SDimitry Andric bool SIFrameLowering::requiresStackPointerReference(
1831e8d8bef9SDimitry Andric const MachineFunction &MF) const {
1832e8d8bef9SDimitry Andric // Callable functions always require a stack pointer reference.
18335f757f3fSDimitry Andric assert((MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() ||
18345f757f3fSDimitry Andric MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) &&
18355f757f3fSDimitry Andric "only expected to call this for entry points and chain functions");
1836e8d8bef9SDimitry Andric
1837e8d8bef9SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
1838e8d8bef9SDimitry Andric
1839e8d8bef9SDimitry Andric // Entry points ordinarily don't need to initialize SP. We have to set it up
1840e8d8bef9SDimitry Andric // for callees if there are any. Also note tail calls are impossible/don't
1841e8d8bef9SDimitry Andric // make any sense for kernels.
1842e8d8bef9SDimitry Andric if (MFI.hasCalls())
1843e8d8bef9SDimitry Andric return true;
1844e8d8bef9SDimitry Andric
1845e8d8bef9SDimitry Andric // We still need to initialize the SP if we're doing anything weird that
1846e8d8bef9SDimitry Andric // references the SP, like variable sized stack objects.
1847e8d8bef9SDimitry Andric return frameTriviallyRequiresSP(MFI);
1848e8d8bef9SDimitry Andric }
1849