10b57cec5SDimitry Andric //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
100b57cec5SDimitry Andric // SGPR spills, so must insert CSR SGPR spills as well as expand them.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // This pass must never create new SGPR virtual registers.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // FIXME: Must stop RegScavenger spills in later passes.
150b57cec5SDimitry Andric //
160b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
170b57cec5SDimitry Andric
180b57cec5SDimitry Andric #include "AMDGPU.h"
19e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
20e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
210b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
2381ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
24e8d8bef9SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
25480093f4SDimitry Andric #include "llvm/InitializePasses.h"
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric
290b57cec5SDimitry Andric #define DEBUG_TYPE "si-lower-sgpr-spills"
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric using MBBVector = SmallVector<MachineBasicBlock *, 4>;
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric namespace {
340b57cec5SDimitry Andric
350b57cec5SDimitry Andric class SILowerSGPRSpills : public MachineFunctionPass {
360b57cec5SDimitry Andric private:
370b57cec5SDimitry Andric const SIRegisterInfo *TRI = nullptr;
380b57cec5SDimitry Andric const SIInstrInfo *TII = nullptr;
390b57cec5SDimitry Andric LiveIntervals *LIS = nullptr;
40bdd1243dSDimitry Andric SlotIndexes *Indexes = nullptr;
410b57cec5SDimitry Andric
420b57cec5SDimitry Andric // Save and Restore blocks of the current function. Typically there is a
430b57cec5SDimitry Andric // single save block, unless Windows EH funclets are involved.
440b57cec5SDimitry Andric MBBVector SaveBlocks;
450b57cec5SDimitry Andric MBBVector RestoreBlocks;
460b57cec5SDimitry Andric
470b57cec5SDimitry Andric public:
480b57cec5SDimitry Andric static char ID;
490b57cec5SDimitry Andric
SILowerSGPRSpills()500b57cec5SDimitry Andric SILowerSGPRSpills() : MachineFunctionPass(ID) {}
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric void calculateSaveRestoreBlocks(MachineFunction &MF);
535f757f3fSDimitry Andric bool spillCalleeSavedRegs(MachineFunction &MF,
545f757f3fSDimitry Andric SmallVectorImpl<int> &CalleeSavedFIs);
555f757f3fSDimitry Andric void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
560b57cec5SDimitry Andric
570b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
580b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const590b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
600b57cec5SDimitry Andric AU.setPreservesAll();
610b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
620b57cec5SDimitry Andric }
635f757f3fSDimitry Andric
getClearedProperties() const645f757f3fSDimitry Andric MachineFunctionProperties getClearedProperties() const override {
655f757f3fSDimitry Andric // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
665f757f3fSDimitry Andric return MachineFunctionProperties()
675f757f3fSDimitry Andric .set(MachineFunctionProperties::Property::IsSSA)
685f757f3fSDimitry Andric .set(MachineFunctionProperties::Property::NoVRegs);
695f757f3fSDimitry Andric }
700b57cec5SDimitry Andric };
710b57cec5SDimitry Andric
720b57cec5SDimitry Andric } // end anonymous namespace
730b57cec5SDimitry Andric
740b57cec5SDimitry Andric char SILowerSGPRSpills::ID = 0;
750b57cec5SDimitry Andric
760b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
770b57cec5SDimitry Andric "SI lower SGPR spill instructions", false, false)
78fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
790b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
800b57cec5SDimitry Andric INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
810b57cec5SDimitry Andric "SI lower SGPR spill instructions", false, false)
820b57cec5SDimitry Andric
830b57cec5SDimitry Andric char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
840b57cec5SDimitry Andric
85753f127fSDimitry Andric /// Insert spill code for the callee-saved registers used in the function.
insertCSRSaves(MachineBasicBlock & SaveBlock,ArrayRef<CalleeSavedInfo> CSI,SlotIndexes * Indexes,LiveIntervals * LIS)860b57cec5SDimitry Andric static void insertCSRSaves(MachineBasicBlock &SaveBlock,
87bdd1243dSDimitry Andric ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
880b57cec5SDimitry Andric LiveIntervals *LIS) {
890b57cec5SDimitry Andric MachineFunction &MF = *SaveBlock.getParent();
900b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
910b57cec5SDimitry Andric const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
920b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9381ad6265SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
9481ad6265SDimitry Andric const SIRegisterInfo *RI = ST.getRegisterInfo();
950b57cec5SDimitry Andric
960b57cec5SDimitry Andric MachineBasicBlock::iterator I = SaveBlock.begin();
970b57cec5SDimitry Andric if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
98fe6060f1SDimitry Andric const MachineRegisterInfo &MRI = MF.getRegInfo();
99fe6060f1SDimitry Andric
1000b57cec5SDimitry Andric for (const CalleeSavedInfo &CS : CSI) {
1010b57cec5SDimitry Andric // Insert the spill to the stack frame.
102e8d8bef9SDimitry Andric MCRegister Reg = CS.getReg();
1030b57cec5SDimitry Andric
1040b57cec5SDimitry Andric MachineInstrSpan MIS(I, &SaveBlock);
10581ad6265SDimitry Andric const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
10681ad6265SDimitry Andric Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
1070b57cec5SDimitry Andric
108fe6060f1SDimitry Andric // If this value was already livein, we probably have a direct use of the
109fe6060f1SDimitry Andric // incoming register value, so don't kill at the spill point. This happens
110fe6060f1SDimitry Andric // since we pass some special inputs (workgroup IDs) in the callee saved
111fe6060f1SDimitry Andric // range.
112fe6060f1SDimitry Andric const bool IsLiveIn = MRI.isLiveIn(Reg);
113fe6060f1SDimitry Andric TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
114bdd1243dSDimitry Andric RC, TRI, Register());
1150b57cec5SDimitry Andric
116bdd1243dSDimitry Andric if (Indexes) {
1170b57cec5SDimitry Andric assert(std::distance(MIS.begin(), I) == 1);
1180b57cec5SDimitry Andric MachineInstr &Inst = *std::prev(I);
119bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(Inst);
1200b57cec5SDimitry Andric }
121bdd1243dSDimitry Andric
122bdd1243dSDimitry Andric if (LIS)
123bdd1243dSDimitry Andric LIS->removeAllRegUnitsForPhysReg(Reg);
1240b57cec5SDimitry Andric }
1250b57cec5SDimitry Andric }
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric
1280b57cec5SDimitry Andric /// Insert restore code for the callee-saved registers used in the function.
insertCSRRestores(MachineBasicBlock & RestoreBlock,MutableArrayRef<CalleeSavedInfo> CSI,SlotIndexes * Indexes,LiveIntervals * LIS)1290b57cec5SDimitry Andric static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
1305ffd83dbSDimitry Andric MutableArrayRef<CalleeSavedInfo> CSI,
131bdd1243dSDimitry Andric SlotIndexes *Indexes, LiveIntervals *LIS) {
1320b57cec5SDimitry Andric MachineFunction &MF = *RestoreBlock.getParent();
1330b57cec5SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1340b57cec5SDimitry Andric const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
1350b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
13681ad6265SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
13781ad6265SDimitry Andric const SIRegisterInfo *RI = ST.getRegisterInfo();
1380b57cec5SDimitry Andric // Restore all registers immediately before the return and any
1390b57cec5SDimitry Andric // terminators that precede it.
1400b57cec5SDimitry Andric MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
1410b57cec5SDimitry Andric
1420b57cec5SDimitry Andric // FIXME: Just emit the readlane/writelane directly
1430b57cec5SDimitry Andric if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
1440b57cec5SDimitry Andric for (const CalleeSavedInfo &CI : reverse(CSI)) {
14504eeddc0SDimitry Andric Register Reg = CI.getReg();
14681ad6265SDimitry Andric const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
14781ad6265SDimitry Andric Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
1480b57cec5SDimitry Andric
149bdd1243dSDimitry Andric TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI,
150bdd1243dSDimitry Andric Register());
1510b57cec5SDimitry Andric assert(I != RestoreBlock.begin() &&
1520b57cec5SDimitry Andric "loadRegFromStackSlot didn't insert any code!");
1530b57cec5SDimitry Andric // Insert in reverse order. loadRegFromStackSlot can insert
1540b57cec5SDimitry Andric // multiple instructions.
1550b57cec5SDimitry Andric
156bdd1243dSDimitry Andric if (Indexes) {
1570b57cec5SDimitry Andric MachineInstr &Inst = *std::prev(I);
158bdd1243dSDimitry Andric Indexes->insertMachineInstrInMaps(Inst);
1590b57cec5SDimitry Andric }
160bdd1243dSDimitry Andric
161bdd1243dSDimitry Andric if (LIS)
162bdd1243dSDimitry Andric LIS->removeAllRegUnitsForPhysReg(Reg);
1630b57cec5SDimitry Andric }
1640b57cec5SDimitry Andric }
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric
1670b57cec5SDimitry Andric /// Compute the sets of entry and return blocks for saving and restoring
1680b57cec5SDimitry Andric /// callee-saved registers, and placing prolog and epilog code.
calculateSaveRestoreBlocks(MachineFunction & MF)1690b57cec5SDimitry Andric void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
1700b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo();
1710b57cec5SDimitry Andric
1720b57cec5SDimitry Andric // Even when we do not change any CSR, we still want to insert the
1730b57cec5SDimitry Andric // prologue and epilogue of the function.
1740b57cec5SDimitry Andric // So set the save points for those.
1750b57cec5SDimitry Andric
1760b57cec5SDimitry Andric // Use the points found by shrink-wrapping, if any.
1770b57cec5SDimitry Andric if (MFI.getSavePoint()) {
1780b57cec5SDimitry Andric SaveBlocks.push_back(MFI.getSavePoint());
1790b57cec5SDimitry Andric assert(MFI.getRestorePoint() && "Both restore and save must be set");
1800b57cec5SDimitry Andric MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1810b57cec5SDimitry Andric // If RestoreBlock does not have any successor and is not a return block
1820b57cec5SDimitry Andric // then the end point is unreachable and we do not need to insert any
1830b57cec5SDimitry Andric // epilogue.
1840b57cec5SDimitry Andric if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
1850b57cec5SDimitry Andric RestoreBlocks.push_back(RestoreBlock);
1860b57cec5SDimitry Andric return;
1870b57cec5SDimitry Andric }
1880b57cec5SDimitry Andric
1890b57cec5SDimitry Andric // Save refs to entry and return blocks.
1900b57cec5SDimitry Andric SaveBlocks.push_back(&MF.front());
1910b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) {
1920b57cec5SDimitry Andric if (MBB.isEHFuncletEntry())
1930b57cec5SDimitry Andric SaveBlocks.push_back(&MBB);
1940b57cec5SDimitry Andric if (MBB.isReturnBlock())
1950b57cec5SDimitry Andric RestoreBlocks.push_back(&MBB);
1960b57cec5SDimitry Andric }
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric
199e8d8bef9SDimitry Andric // TODO: To support shrink wrapping, this would need to copy
200e8d8bef9SDimitry Andric // PrologEpilogInserter's updateLiveness.
updateLiveness(MachineFunction & MF,ArrayRef<CalleeSavedInfo> CSI)201e8d8bef9SDimitry Andric static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
202e8d8bef9SDimitry Andric MachineBasicBlock &EntryBB = MF.front();
203e8d8bef9SDimitry Andric
204e8d8bef9SDimitry Andric for (const CalleeSavedInfo &CSIReg : CSI)
205e8d8bef9SDimitry Andric EntryBB.addLiveIn(CSIReg.getReg());
206e8d8bef9SDimitry Andric EntryBB.sortUniqueLiveIns();
207e8d8bef9SDimitry Andric }
208e8d8bef9SDimitry Andric
spillCalleeSavedRegs(MachineFunction & MF,SmallVectorImpl<int> & CalleeSavedFIs)2095f757f3fSDimitry Andric bool SILowerSGPRSpills::spillCalleeSavedRegs(
2105f757f3fSDimitry Andric MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
2110b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
2120b57cec5SDimitry Andric const Function &F = MF.getFunction();
2130b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2140b57cec5SDimitry Andric const SIFrameLowering *TFI = ST.getFrameLowering();
2150b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo();
2160b57cec5SDimitry Andric RegScavenger *RS = nullptr;
2170b57cec5SDimitry Andric
2180b57cec5SDimitry Andric // Determine which of the registers in the callee save list should be saved.
2190b57cec5SDimitry Andric BitVector SavedRegs;
2200b57cec5SDimitry Andric TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
2210b57cec5SDimitry Andric
2220b57cec5SDimitry Andric // Add the code to save and restore the callee saved registers.
2230b57cec5SDimitry Andric if (!F.hasFnAttribute(Attribute::Naked)) {
2240b57cec5SDimitry Andric // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
2250b57cec5SDimitry Andric // necessary for verifier liveness checks.
2260b57cec5SDimitry Andric MFI.setCalleeSavedInfoValid(true);
2270b57cec5SDimitry Andric
2280b57cec5SDimitry Andric std::vector<CalleeSavedInfo> CSI;
2290b57cec5SDimitry Andric const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
2300b57cec5SDimitry Andric
2310b57cec5SDimitry Andric for (unsigned I = 0; CSRegs[I]; ++I) {
232e8d8bef9SDimitry Andric MCRegister Reg = CSRegs[I];
233e8d8bef9SDimitry Andric
2340b57cec5SDimitry Andric if (SavedRegs.test(Reg)) {
2355ffd83dbSDimitry Andric const TargetRegisterClass *RC =
2365ffd83dbSDimitry Andric TRI->getMinimalPhysRegClass(Reg, MVT::i32);
2370b57cec5SDimitry Andric int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
2385ffd83dbSDimitry Andric TRI->getSpillAlign(*RC), true);
2390b57cec5SDimitry Andric
2400b57cec5SDimitry Andric CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
2415f757f3fSDimitry Andric CalleeSavedFIs.push_back(JunkFI);
2420b57cec5SDimitry Andric }
2430b57cec5SDimitry Andric }
2440b57cec5SDimitry Andric
2450b57cec5SDimitry Andric if (!CSI.empty()) {
2460b57cec5SDimitry Andric for (MachineBasicBlock *SaveBlock : SaveBlocks)
247bdd1243dSDimitry Andric insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
2480b57cec5SDimitry Andric
249e8d8bef9SDimitry Andric // Add live ins to save blocks.
250e8d8bef9SDimitry Andric assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
251e8d8bef9SDimitry Andric updateLiveness(MF, CSI);
252e8d8bef9SDimitry Andric
2530b57cec5SDimitry Andric for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
254bdd1243dSDimitry Andric insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
2550b57cec5SDimitry Andric return true;
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric }
2580b57cec5SDimitry Andric
2590b57cec5SDimitry Andric return false;
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric
extendWWMVirtRegLiveness(MachineFunction & MF,LiveIntervals * LIS)2625f757f3fSDimitry Andric void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
2635f757f3fSDimitry Andric LiveIntervals *LIS) {
2645f757f3fSDimitry Andric // TODO: This is a workaround to avoid the unmodelled liveness computed with
2655f757f3fSDimitry Andric // whole-wave virtual registers when allocated together with the regular VGPR
2665f757f3fSDimitry Andric // virtual registers. Presently, the liveness computed during the regalloc is
2675f757f3fSDimitry Andric // only uniform (or single lane aware) and it doesn't take account of the
2685f757f3fSDimitry Andric // divergent control flow that exists for our GPUs. Since the WWM registers
2695f757f3fSDimitry Andric // can modify inactive lanes, the wave-aware liveness should be computed for
2705f757f3fSDimitry Andric // the virtual registers to accurately plot their interferences. Without
2715f757f3fSDimitry Andric // having the divergent CFG for the function, it is difficult to implement the
2725f757f3fSDimitry Andric // wave-aware liveness info. Until then, we conservatively extend the liveness
2735f757f3fSDimitry Andric // of the wwm registers into the entire function so that they won't be reused
2745f757f3fSDimitry Andric // without first spilling/splitting their liveranges.
2755f757f3fSDimitry Andric SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2765f757f3fSDimitry Andric
2775f757f3fSDimitry Andric // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
2785f757f3fSDimitry Andric for (auto Reg : MFI->getSGPRSpillVGPRs()) {
2795f757f3fSDimitry Andric for (MachineBasicBlock *SaveBlock : SaveBlocks) {
2805f757f3fSDimitry Andric MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
2815f757f3fSDimitry Andric auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
2825f757f3fSDimitry Andric TII->get(AMDGPU::IMPLICIT_DEF), Reg);
2835f757f3fSDimitry Andric MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
2845f757f3fSDimitry Andric // Set SGPR_SPILL asm printer flag
2855f757f3fSDimitry Andric MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
2865f757f3fSDimitry Andric if (LIS) {
2875f757f3fSDimitry Andric LIS->InsertMachineInstrInMaps(*MIB);
2885f757f3fSDimitry Andric }
2895f757f3fSDimitry Andric }
2905f757f3fSDimitry Andric }
2915f757f3fSDimitry Andric
2925f757f3fSDimitry Andric // Insert the KILL in the return blocks to extend their liveness untill the
2935f757f3fSDimitry Andric // end of function. Insert a separate KILL for each VGPR.
2945f757f3fSDimitry Andric for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
2955f757f3fSDimitry Andric MachineBasicBlock::iterator InsertBefore =
2965f757f3fSDimitry Andric RestoreBlock->getFirstTerminator();
2975f757f3fSDimitry Andric for (auto Reg : MFI->getSGPRSpillVGPRs()) {
2985f757f3fSDimitry Andric auto MIB =
2995f757f3fSDimitry Andric BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(),
3005f757f3fSDimitry Andric TII->get(TargetOpcode::KILL));
3015f757f3fSDimitry Andric MIB.addReg(Reg);
3025f757f3fSDimitry Andric if (LIS)
3035f757f3fSDimitry Andric LIS->InsertMachineInstrInMaps(*MIB);
3045f757f3fSDimitry Andric }
3055f757f3fSDimitry Andric }
3065f757f3fSDimitry Andric }
3075f757f3fSDimitry Andric
runOnMachineFunction(MachineFunction & MF)3080b57cec5SDimitry Andric bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
3090b57cec5SDimitry Andric const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3100b57cec5SDimitry Andric TII = ST.getInstrInfo();
3110b57cec5SDimitry Andric TRI = &TII->getRegisterInfo();
3120b57cec5SDimitry Andric
313fe6060f1SDimitry Andric LIS = getAnalysisIfAvailable<LiveIntervals>();
314bdd1243dSDimitry Andric Indexes = getAnalysisIfAvailable<SlotIndexes>();
3150b57cec5SDimitry Andric
3160b57cec5SDimitry Andric assert(SaveBlocks.empty() && RestoreBlocks.empty());
3170b57cec5SDimitry Andric
3180b57cec5SDimitry Andric // First, expose any CSR SGPR spills. This is mostly the same as what PEI
3190b57cec5SDimitry Andric // does, but somewhat simpler.
3200b57cec5SDimitry Andric calculateSaveRestoreBlocks(MF);
3215f757f3fSDimitry Andric SmallVector<int> CalleeSavedFIs;
3225f757f3fSDimitry Andric bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
3230b57cec5SDimitry Andric
3240b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo();
325fe6060f1SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
326fe6060f1SDimitry Andric SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
327fe6060f1SDimitry Andric
3280b57cec5SDimitry Andric if (!MFI.hasStackObjects() && !HasCSRs) {
3290b57cec5SDimitry Andric SaveBlocks.clear();
3300b57cec5SDimitry Andric RestoreBlocks.clear();
3310b57cec5SDimitry Andric return false;
3320b57cec5SDimitry Andric }
3330b57cec5SDimitry Andric
3340b57cec5SDimitry Andric bool MadeChange = false;
3355f757f3fSDimitry Andric bool SpilledToVirtVGPRLanes = false;
3360b57cec5SDimitry Andric
3370b57cec5SDimitry Andric // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
3380b57cec5SDimitry Andric // handled as SpilledToReg in regular PrologEpilogInserter.
339e8d8bef9SDimitry Andric const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
340e8d8bef9SDimitry Andric (HasCSRs || FuncInfo->hasSpilledSGPRs());
341fe6060f1SDimitry Andric if (HasSGPRSpillToVGPR) {
3420b57cec5SDimitry Andric // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
3430b57cec5SDimitry Andric // are spilled to VGPRs, in which case we can eliminate the stack usage.
3440b57cec5SDimitry Andric //
3450b57cec5SDimitry Andric // This operates under the assumption that only other SGPR spills are users
3460b57cec5SDimitry Andric // of the frame index.
3475ffd83dbSDimitry Andric
348fe6060f1SDimitry Andric // To track the spill frame indices handled in this pass.
349fe6060f1SDimitry Andric BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
350fe6060f1SDimitry Andric
3510b57cec5SDimitry Andric for (MachineBasicBlock &MBB : MF) {
352349cc55cSDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
3530b57cec5SDimitry Andric if (!TII->isSGPRSpill(MI))
3540b57cec5SDimitry Andric continue;
3550b57cec5SDimitry Andric
3560b57cec5SDimitry Andric int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
3570b57cec5SDimitry Andric assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
3585f757f3fSDimitry Andric
3595f757f3fSDimitry Andric bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
3605f757f3fSDimitry Andric if (IsCalleeSaveSGPRSpill) {
3615f757f3fSDimitry Andric // Spill callee-saved SGPRs into physical VGPR lanes.
3625f757f3fSDimitry Andric
3635f757f3fSDimitry Andric // TODO: This is to ensure the CFIs are static for efficient frame
3645f757f3fSDimitry Andric // unwinding in the debugger. Spilling them into virtual VGPR lanes
3655f757f3fSDimitry Andric // involve regalloc to allocate the physical VGPRs and that might
3665f757f3fSDimitry Andric // cause intermediate spill/split of such liveranges for successful
3675f757f3fSDimitry Andric // allocation. This would result in broken CFI encoding unless the
3685f757f3fSDimitry Andric // regalloc aware CFI generation to insert new CFIs along with the
3695f757f3fSDimitry Andric // intermediate spills is implemented. There is no such support
3705f757f3fSDimitry Andric // currently exist in the LLVM compiler.
3717a6dacacSDimitry Andric if (FuncInfo->allocateSGPRSpillToVGPRLane(
3727a6dacacSDimitry Andric MF, FI, /*SpillToPhysVGPRLane=*/true)) {
373bdd1243dSDimitry Andric bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
3745f757f3fSDimitry Andric MI, FI, nullptr, Indexes, LIS, true);
3755f757f3fSDimitry Andric if (!Spilled)
3765f757f3fSDimitry Andric llvm_unreachable(
3775f757f3fSDimitry Andric "failed to spill SGPR to physical VGPR lane when allocated");
3785f757f3fSDimitry Andric }
3795f757f3fSDimitry Andric } else {
3805f757f3fSDimitry Andric if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
3815f757f3fSDimitry Andric bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
382bdd1243dSDimitry Andric MI, FI, nullptr, Indexes, LIS);
3835f757f3fSDimitry Andric if (!Spilled)
3845f757f3fSDimitry Andric llvm_unreachable(
3855f757f3fSDimitry Andric "failed to spill SGPR to virtual VGPR lane when allocated");
386fe6060f1SDimitry Andric SpillFIs.set(FI);
3875f757f3fSDimitry Andric SpilledToVirtVGPRLanes = true;
3885f757f3fSDimitry Andric }
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric }
3910b57cec5SDimitry Andric }
3920b57cec5SDimitry Andric
3935f757f3fSDimitry Andric if (SpilledToVirtVGPRLanes) {
3945f757f3fSDimitry Andric extendWWMVirtRegLiveness(MF, LIS);
3955f757f3fSDimitry Andric if (LIS) {
3965f757f3fSDimitry Andric // Compute the LiveInterval for the newly created virtual registers.
3978a4dda33SDimitry Andric for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
3985f757f3fSDimitry Andric LIS->createAndComputeVirtRegInterval(Reg);
3995f757f3fSDimitry Andric }
4005f757f3fSDimitry Andric }
4018a4dda33SDimitry Andric
4025f757f3fSDimitry Andric for (MachineBasicBlock &MBB : MF) {
403fe6060f1SDimitry Andric // FIXME: The dead frame indices are replaced with a null register from
404fe6060f1SDimitry Andric // the debug value instructions. We should instead, update it with the
405fe6060f1SDimitry Andric // correct register value. But not sure the register value alone is
406fe6060f1SDimitry Andric // adequate to lower the DIExpression. It should be worked out later.
407fe6060f1SDimitry Andric for (MachineInstr &MI : MBB) {
408fe6060f1SDimitry Andric if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
409bdd1243dSDimitry Andric !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
410fe6060f1SDimitry Andric SpillFIs[MI.getOperand(0).getIndex()]) {
411fe6060f1SDimitry Andric MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
412fe6060f1SDimitry Andric }
413fe6060f1SDimitry Andric }
4140b57cec5SDimitry Andric }
4150b57cec5SDimitry Andric
416349cc55cSDimitry Andric // All those frame indices which are dead by now should be removed from the
417349cc55cSDimitry Andric // function frame. Otherwise, there is a side effect such as re-mapping of
418349cc55cSDimitry Andric // free frame index ids by the later pass(es) like "stack slot coloring"
419349cc55cSDimitry Andric // which in turn could mess-up with the book keeping of "frame index to VGPR
420349cc55cSDimitry Andric // lane".
42181ad6265SDimitry Andric FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
422349cc55cSDimitry Andric
4235f757f3fSDimitry Andric MadeChange = true;
4245f757f3fSDimitry Andric }
4255f757f3fSDimitry Andric
4265f757f3fSDimitry Andric if (SpilledToVirtVGPRLanes) {
42706c3fb27SDimitry Andric const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
42806c3fb27SDimitry Andric // Shift back the reserved SGPR for EXEC copy into the lowest range.
42906c3fb27SDimitry Andric // This SGPR is reserved to handle the whole-wave spill/copy operations
43006c3fb27SDimitry Andric // that might get inserted during vgpr regalloc.
43106c3fb27SDimitry Andric Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
43206c3fb27SDimitry Andric if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
43306c3fb27SDimitry Andric TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
43406c3fb27SDimitry Andric FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
43506c3fb27SDimitry Andric } else {
4365f757f3fSDimitry Andric // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
4375f757f3fSDimitry Andric // spills/copies. Reset the SGPR reserved for EXEC copy.
43806c3fb27SDimitry Andric FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
43906c3fb27SDimitry Andric }
44006c3fb27SDimitry Andric
4410b57cec5SDimitry Andric SaveBlocks.clear();
4420b57cec5SDimitry Andric RestoreBlocks.clear();
4430b57cec5SDimitry Andric
4440b57cec5SDimitry Andric return MadeChange;
4450b57cec5SDimitry Andric }
446