10b57cec5SDimitry Andric //===-- SILowerSGPRSPills.cpp ---------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // Handle SGPR spills. This pass takes the place of PrologEpilogInserter for all
100b57cec5SDimitry Andric // SGPR spills, so must insert CSR SGPR spills as well as expand them.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // This pass must never create new SGPR virtual registers.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // FIXME: Must stop RegScavenger spills in later passes.
150b57cec5SDimitry Andric //
160b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
170b57cec5SDimitry Andric 
180b57cec5SDimitry Andric #include "AMDGPU.h"
19e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
20e8d8bef9SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
210b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
220b57cec5SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
2381ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
24e8d8bef9SDimitry Andric #include "llvm/CodeGen/RegisterScavenging.h"
25480093f4SDimitry Andric #include "llvm/InitializePasses.h"
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric #define DEBUG_TYPE "si-lower-sgpr-spills"
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric using MBBVector = SmallVector<MachineBasicBlock *, 4>;
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric namespace {
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric class SILowerSGPRSpills : public MachineFunctionPass {
360b57cec5SDimitry Andric private:
370b57cec5SDimitry Andric   const SIRegisterInfo *TRI = nullptr;
380b57cec5SDimitry Andric   const SIInstrInfo *TII = nullptr;
390b57cec5SDimitry Andric   LiveIntervals *LIS = nullptr;
40bdd1243dSDimitry Andric   SlotIndexes *Indexes = nullptr;
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric   // Save and Restore blocks of the current function. Typically there is a
430b57cec5SDimitry Andric   // single save block, unless Windows EH funclets are involved.
440b57cec5SDimitry Andric   MBBVector SaveBlocks;
450b57cec5SDimitry Andric   MBBVector RestoreBlocks;
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric public:
480b57cec5SDimitry Andric   static char ID;
490b57cec5SDimitry Andric 
SILowerSGPRSpills()500b57cec5SDimitry Andric   SILowerSGPRSpills() : MachineFunctionPass(ID) {}
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric   void calculateSaveRestoreBlocks(MachineFunction &MF);
535f757f3fSDimitry Andric   bool spillCalleeSavedRegs(MachineFunction &MF,
545f757f3fSDimitry Andric                             SmallVectorImpl<int> &CalleeSavedFIs);
555f757f3fSDimitry Andric   void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
580b57cec5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const590b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
600b57cec5SDimitry Andric     AU.setPreservesAll();
610b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
620b57cec5SDimitry Andric   }
635f757f3fSDimitry Andric 
getClearedProperties() const645f757f3fSDimitry Andric   MachineFunctionProperties getClearedProperties() const override {
655f757f3fSDimitry Andric     // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
665f757f3fSDimitry Andric     return MachineFunctionProperties()
675f757f3fSDimitry Andric         .set(MachineFunctionProperties::Property::IsSSA)
685f757f3fSDimitry Andric         .set(MachineFunctionProperties::Property::NoVRegs);
695f757f3fSDimitry Andric   }
700b57cec5SDimitry Andric };
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric } // end anonymous namespace
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric char SILowerSGPRSpills::ID = 0;
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SILowerSGPRSpills, DEBUG_TYPE,
770b57cec5SDimitry Andric                       "SI lower SGPR spill instructions", false, false)
78fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
790b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
800b57cec5SDimitry Andric INITIALIZE_PASS_END(SILowerSGPRSpills, DEBUG_TYPE,
810b57cec5SDimitry Andric                     "SI lower SGPR spill instructions", false, false)
820b57cec5SDimitry Andric 
830b57cec5SDimitry Andric char &llvm::SILowerSGPRSpillsID = SILowerSGPRSpills::ID;
840b57cec5SDimitry Andric 
85753f127fSDimitry Andric /// Insert spill code for the callee-saved registers used in the function.
insertCSRSaves(MachineBasicBlock & SaveBlock,ArrayRef<CalleeSavedInfo> CSI,SlotIndexes * Indexes,LiveIntervals * LIS)860b57cec5SDimitry Andric static void insertCSRSaves(MachineBasicBlock &SaveBlock,
87bdd1243dSDimitry Andric                            ArrayRef<CalleeSavedInfo> CSI, SlotIndexes *Indexes,
880b57cec5SDimitry Andric                            LiveIntervals *LIS) {
890b57cec5SDimitry Andric   MachineFunction &MF = *SaveBlock.getParent();
900b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
910b57cec5SDimitry Andric   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
920b57cec5SDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9381ad6265SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
9481ad6265SDimitry Andric   const SIRegisterInfo *RI = ST.getRegisterInfo();
950b57cec5SDimitry Andric 
960b57cec5SDimitry Andric   MachineBasicBlock::iterator I = SaveBlock.begin();
970b57cec5SDimitry Andric   if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
98fe6060f1SDimitry Andric     const MachineRegisterInfo &MRI = MF.getRegInfo();
99fe6060f1SDimitry Andric 
1000b57cec5SDimitry Andric     for (const CalleeSavedInfo &CS : CSI) {
1010b57cec5SDimitry Andric       // Insert the spill to the stack frame.
102e8d8bef9SDimitry Andric       MCRegister Reg = CS.getReg();
1030b57cec5SDimitry Andric 
1040b57cec5SDimitry Andric       MachineInstrSpan MIS(I, &SaveBlock);
10581ad6265SDimitry Andric       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
10681ad6265SDimitry Andric           Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
1070b57cec5SDimitry Andric 
108fe6060f1SDimitry Andric       // If this value was already livein, we probably have a direct use of the
109fe6060f1SDimitry Andric       // incoming register value, so don't kill at the spill point. This happens
110fe6060f1SDimitry Andric       // since we pass some special inputs (workgroup IDs) in the callee saved
111fe6060f1SDimitry Andric       // range.
112fe6060f1SDimitry Andric       const bool IsLiveIn = MRI.isLiveIn(Reg);
113fe6060f1SDimitry Andric       TII.storeRegToStackSlot(SaveBlock, I, Reg, !IsLiveIn, CS.getFrameIdx(),
114bdd1243dSDimitry Andric                               RC, TRI, Register());
1150b57cec5SDimitry Andric 
116bdd1243dSDimitry Andric       if (Indexes) {
1170b57cec5SDimitry Andric         assert(std::distance(MIS.begin(), I) == 1);
1180b57cec5SDimitry Andric         MachineInstr &Inst = *std::prev(I);
119bdd1243dSDimitry Andric         Indexes->insertMachineInstrInMaps(Inst);
1200b57cec5SDimitry Andric       }
121bdd1243dSDimitry Andric 
122bdd1243dSDimitry Andric       if (LIS)
123bdd1243dSDimitry Andric         LIS->removeAllRegUnitsForPhysReg(Reg);
1240b57cec5SDimitry Andric     }
1250b57cec5SDimitry Andric   }
1260b57cec5SDimitry Andric }
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric /// Insert restore code for the callee-saved registers used in the function.
insertCSRRestores(MachineBasicBlock & RestoreBlock,MutableArrayRef<CalleeSavedInfo> CSI,SlotIndexes * Indexes,LiveIntervals * LIS)1290b57cec5SDimitry Andric static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
1305ffd83dbSDimitry Andric                               MutableArrayRef<CalleeSavedInfo> CSI,
131bdd1243dSDimitry Andric                               SlotIndexes *Indexes, LiveIntervals *LIS) {
1320b57cec5SDimitry Andric   MachineFunction &MF = *RestoreBlock.getParent();
1330b57cec5SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1340b57cec5SDimitry Andric   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
1350b57cec5SDimitry Andric   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
13681ad6265SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
13781ad6265SDimitry Andric   const SIRegisterInfo *RI = ST.getRegisterInfo();
1380b57cec5SDimitry Andric   // Restore all registers immediately before the return and any
1390b57cec5SDimitry Andric   // terminators that precede it.
1400b57cec5SDimitry Andric   MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
1410b57cec5SDimitry Andric 
1420b57cec5SDimitry Andric   // FIXME: Just emit the readlane/writelane directly
1430b57cec5SDimitry Andric   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
1440b57cec5SDimitry Andric     for (const CalleeSavedInfo &CI : reverse(CSI)) {
14504eeddc0SDimitry Andric       Register Reg = CI.getReg();
14681ad6265SDimitry Andric       const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
14781ad6265SDimitry Andric           Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
1480b57cec5SDimitry Andric 
149bdd1243dSDimitry Andric       TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI,
150bdd1243dSDimitry Andric                                Register());
1510b57cec5SDimitry Andric       assert(I != RestoreBlock.begin() &&
1520b57cec5SDimitry Andric              "loadRegFromStackSlot didn't insert any code!");
1530b57cec5SDimitry Andric       // Insert in reverse order.  loadRegFromStackSlot can insert
1540b57cec5SDimitry Andric       // multiple instructions.
1550b57cec5SDimitry Andric 
156bdd1243dSDimitry Andric       if (Indexes) {
1570b57cec5SDimitry Andric         MachineInstr &Inst = *std::prev(I);
158bdd1243dSDimitry Andric         Indexes->insertMachineInstrInMaps(Inst);
1590b57cec5SDimitry Andric       }
160bdd1243dSDimitry Andric 
161bdd1243dSDimitry Andric       if (LIS)
162bdd1243dSDimitry Andric         LIS->removeAllRegUnitsForPhysReg(Reg);
1630b57cec5SDimitry Andric     }
1640b57cec5SDimitry Andric   }
1650b57cec5SDimitry Andric }
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric /// Compute the sets of entry and return blocks for saving and restoring
1680b57cec5SDimitry Andric /// callee-saved registers, and placing prolog and epilog code.
calculateSaveRestoreBlocks(MachineFunction & MF)1690b57cec5SDimitry Andric void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
1700b57cec5SDimitry Andric   const MachineFrameInfo &MFI = MF.getFrameInfo();
1710b57cec5SDimitry Andric 
1720b57cec5SDimitry Andric   // Even when we do not change any CSR, we still want to insert the
1730b57cec5SDimitry Andric   // prologue and epilogue of the function.
1740b57cec5SDimitry Andric   // So set the save points for those.
1750b57cec5SDimitry Andric 
1760b57cec5SDimitry Andric   // Use the points found by shrink-wrapping, if any.
1770b57cec5SDimitry Andric   if (MFI.getSavePoint()) {
1780b57cec5SDimitry Andric     SaveBlocks.push_back(MFI.getSavePoint());
1790b57cec5SDimitry Andric     assert(MFI.getRestorePoint() && "Both restore and save must be set");
1800b57cec5SDimitry Andric     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1810b57cec5SDimitry Andric     // If RestoreBlock does not have any successor and is not a return block
1820b57cec5SDimitry Andric     // then the end point is unreachable and we do not need to insert any
1830b57cec5SDimitry Andric     // epilogue.
1840b57cec5SDimitry Andric     if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
1850b57cec5SDimitry Andric       RestoreBlocks.push_back(RestoreBlock);
1860b57cec5SDimitry Andric     return;
1870b57cec5SDimitry Andric   }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   // Save refs to entry and return blocks.
1900b57cec5SDimitry Andric   SaveBlocks.push_back(&MF.front());
1910b57cec5SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
1920b57cec5SDimitry Andric     if (MBB.isEHFuncletEntry())
1930b57cec5SDimitry Andric       SaveBlocks.push_back(&MBB);
1940b57cec5SDimitry Andric     if (MBB.isReturnBlock())
1950b57cec5SDimitry Andric       RestoreBlocks.push_back(&MBB);
1960b57cec5SDimitry Andric   }
1970b57cec5SDimitry Andric }
1980b57cec5SDimitry Andric 
199e8d8bef9SDimitry Andric // TODO: To support shrink wrapping, this would need to copy
200e8d8bef9SDimitry Andric // PrologEpilogInserter's updateLiveness.
updateLiveness(MachineFunction & MF,ArrayRef<CalleeSavedInfo> CSI)201e8d8bef9SDimitry Andric static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
202e8d8bef9SDimitry Andric   MachineBasicBlock &EntryBB = MF.front();
203e8d8bef9SDimitry Andric 
204e8d8bef9SDimitry Andric   for (const CalleeSavedInfo &CSIReg : CSI)
205e8d8bef9SDimitry Andric     EntryBB.addLiveIn(CSIReg.getReg());
206e8d8bef9SDimitry Andric   EntryBB.sortUniqueLiveIns();
207e8d8bef9SDimitry Andric }
208e8d8bef9SDimitry Andric 
spillCalleeSavedRegs(MachineFunction & MF,SmallVectorImpl<int> & CalleeSavedFIs)2095f757f3fSDimitry Andric bool SILowerSGPRSpills::spillCalleeSavedRegs(
2105f757f3fSDimitry Andric     MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
2110b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
2120b57cec5SDimitry Andric   const Function &F = MF.getFunction();
2130b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
2140b57cec5SDimitry Andric   const SIFrameLowering *TFI = ST.getFrameLowering();
2150b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
2160b57cec5SDimitry Andric   RegScavenger *RS = nullptr;
2170b57cec5SDimitry Andric 
2180b57cec5SDimitry Andric   // Determine which of the registers in the callee save list should be saved.
2190b57cec5SDimitry Andric   BitVector SavedRegs;
2200b57cec5SDimitry Andric   TFI->determineCalleeSavesSGPR(MF, SavedRegs, RS);
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric   // Add the code to save and restore the callee saved registers.
2230b57cec5SDimitry Andric   if (!F.hasFnAttribute(Attribute::Naked)) {
2240b57cec5SDimitry Andric     // FIXME: This is a lie. The CalleeSavedInfo is incomplete, but this is
2250b57cec5SDimitry Andric     // necessary for verifier liveness checks.
2260b57cec5SDimitry Andric     MFI.setCalleeSavedInfoValid(true);
2270b57cec5SDimitry Andric 
2280b57cec5SDimitry Andric     std::vector<CalleeSavedInfo> CSI;
2290b57cec5SDimitry Andric     const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
2300b57cec5SDimitry Andric 
2310b57cec5SDimitry Andric     for (unsigned I = 0; CSRegs[I]; ++I) {
232e8d8bef9SDimitry Andric       MCRegister Reg = CSRegs[I];
233e8d8bef9SDimitry Andric 
2340b57cec5SDimitry Andric       if (SavedRegs.test(Reg)) {
2355ffd83dbSDimitry Andric         const TargetRegisterClass *RC =
2365ffd83dbSDimitry Andric           TRI->getMinimalPhysRegClass(Reg, MVT::i32);
2370b57cec5SDimitry Andric         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
2385ffd83dbSDimitry Andric                                            TRI->getSpillAlign(*RC), true);
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric         CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
2415f757f3fSDimitry Andric         CalleeSavedFIs.push_back(JunkFI);
2420b57cec5SDimitry Andric       }
2430b57cec5SDimitry Andric     }
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric     if (!CSI.empty()) {
2460b57cec5SDimitry Andric       for (MachineBasicBlock *SaveBlock : SaveBlocks)
247bdd1243dSDimitry Andric         insertCSRSaves(*SaveBlock, CSI, Indexes, LIS);
2480b57cec5SDimitry Andric 
249e8d8bef9SDimitry Andric       // Add live ins to save blocks.
250e8d8bef9SDimitry Andric       assert(SaveBlocks.size() == 1 && "shrink wrapping not fully implemented");
251e8d8bef9SDimitry Andric       updateLiveness(MF, CSI);
252e8d8bef9SDimitry Andric 
2530b57cec5SDimitry Andric       for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
254bdd1243dSDimitry Andric         insertCSRRestores(*RestoreBlock, CSI, Indexes, LIS);
2550b57cec5SDimitry Andric       return true;
2560b57cec5SDimitry Andric     }
2570b57cec5SDimitry Andric   }
2580b57cec5SDimitry Andric 
2590b57cec5SDimitry Andric   return false;
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric 
extendWWMVirtRegLiveness(MachineFunction & MF,LiveIntervals * LIS)2625f757f3fSDimitry Andric void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
2635f757f3fSDimitry Andric                                                  LiveIntervals *LIS) {
2645f757f3fSDimitry Andric   // TODO: This is a workaround to avoid the unmodelled liveness computed with
2655f757f3fSDimitry Andric   // whole-wave virtual registers when allocated together with the regular VGPR
2665f757f3fSDimitry Andric   // virtual registers. Presently, the liveness computed during the regalloc is
2675f757f3fSDimitry Andric   // only uniform (or single lane aware) and it doesn't take account of the
2685f757f3fSDimitry Andric   // divergent control flow that exists for our GPUs. Since the WWM registers
2695f757f3fSDimitry Andric   // can modify inactive lanes, the wave-aware liveness should be computed for
2705f757f3fSDimitry Andric   // the virtual registers to accurately plot their interferences. Without
2715f757f3fSDimitry Andric   // having the divergent CFG for the function, it is difficult to implement the
2725f757f3fSDimitry Andric   // wave-aware liveness info. Until then, we conservatively extend the liveness
2735f757f3fSDimitry Andric   // of the wwm registers into the entire function so that they won't be reused
2745f757f3fSDimitry Andric   // without first spilling/splitting their liveranges.
2755f757f3fSDimitry Andric   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2765f757f3fSDimitry Andric 
2775f757f3fSDimitry Andric   // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
2785f757f3fSDimitry Andric   for (auto Reg : MFI->getSGPRSpillVGPRs()) {
2795f757f3fSDimitry Andric     for (MachineBasicBlock *SaveBlock : SaveBlocks) {
2805f757f3fSDimitry Andric       MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
2815f757f3fSDimitry Andric       auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
2825f757f3fSDimitry Andric                          TII->get(AMDGPU::IMPLICIT_DEF), Reg);
2835f757f3fSDimitry Andric       MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
2845f757f3fSDimitry Andric       // Set SGPR_SPILL asm printer flag
2855f757f3fSDimitry Andric       MIB->setAsmPrinterFlag(AMDGPU::SGPR_SPILL);
2865f757f3fSDimitry Andric       if (LIS) {
2875f757f3fSDimitry Andric         LIS->InsertMachineInstrInMaps(*MIB);
2885f757f3fSDimitry Andric       }
2895f757f3fSDimitry Andric     }
2905f757f3fSDimitry Andric   }
2915f757f3fSDimitry Andric 
2925f757f3fSDimitry Andric   // Insert the KILL in the return blocks to extend their liveness untill the
2935f757f3fSDimitry Andric   // end of function. Insert a separate KILL for each VGPR.
2945f757f3fSDimitry Andric   for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
2955f757f3fSDimitry Andric     MachineBasicBlock::iterator InsertBefore =
2965f757f3fSDimitry Andric         RestoreBlock->getFirstTerminator();
2975f757f3fSDimitry Andric     for (auto Reg : MFI->getSGPRSpillVGPRs()) {
2985f757f3fSDimitry Andric       auto MIB =
2995f757f3fSDimitry Andric           BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(),
3005f757f3fSDimitry Andric                   TII->get(TargetOpcode::KILL));
3015f757f3fSDimitry Andric       MIB.addReg(Reg);
3025f757f3fSDimitry Andric       if (LIS)
3035f757f3fSDimitry Andric         LIS->InsertMachineInstrInMaps(*MIB);
3045f757f3fSDimitry Andric     }
3055f757f3fSDimitry Andric   }
3065f757f3fSDimitry Andric }
3075f757f3fSDimitry Andric 
runOnMachineFunction(MachineFunction & MF)3080b57cec5SDimitry Andric bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
3090b57cec5SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
3100b57cec5SDimitry Andric   TII = ST.getInstrInfo();
3110b57cec5SDimitry Andric   TRI = &TII->getRegisterInfo();
3120b57cec5SDimitry Andric 
313fe6060f1SDimitry Andric   LIS = getAnalysisIfAvailable<LiveIntervals>();
314bdd1243dSDimitry Andric   Indexes = getAnalysisIfAvailable<SlotIndexes>();
3150b57cec5SDimitry Andric 
3160b57cec5SDimitry Andric   assert(SaveBlocks.empty() && RestoreBlocks.empty());
3170b57cec5SDimitry Andric 
3180b57cec5SDimitry Andric   // First, expose any CSR SGPR spills. This is mostly the same as what PEI
3190b57cec5SDimitry Andric   // does, but somewhat simpler.
3200b57cec5SDimitry Andric   calculateSaveRestoreBlocks(MF);
3215f757f3fSDimitry Andric   SmallVector<int> CalleeSavedFIs;
3225f757f3fSDimitry Andric   bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
325fe6060f1SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
326fe6060f1SDimitry Andric   SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
327fe6060f1SDimitry Andric 
3280b57cec5SDimitry Andric   if (!MFI.hasStackObjects() && !HasCSRs) {
3290b57cec5SDimitry Andric     SaveBlocks.clear();
3300b57cec5SDimitry Andric     RestoreBlocks.clear();
3310b57cec5SDimitry Andric     return false;
3320b57cec5SDimitry Andric   }
3330b57cec5SDimitry Andric 
3340b57cec5SDimitry Andric   bool MadeChange = false;
3355f757f3fSDimitry Andric   bool SpilledToVirtVGPRLanes = false;
3360b57cec5SDimitry Andric 
3370b57cec5SDimitry Andric   // TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
3380b57cec5SDimitry Andric   // handled as SpilledToReg in regular PrologEpilogInserter.
339e8d8bef9SDimitry Andric   const bool HasSGPRSpillToVGPR = TRI->spillSGPRToVGPR() &&
340e8d8bef9SDimitry Andric                                   (HasCSRs || FuncInfo->hasSpilledSGPRs());
341fe6060f1SDimitry Andric   if (HasSGPRSpillToVGPR) {
3420b57cec5SDimitry Andric     // Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
3430b57cec5SDimitry Andric     // are spilled to VGPRs, in which case we can eliminate the stack usage.
3440b57cec5SDimitry Andric     //
3450b57cec5SDimitry Andric     // This operates under the assumption that only other SGPR spills are users
3460b57cec5SDimitry Andric     // of the frame index.
3475ffd83dbSDimitry Andric 
348fe6060f1SDimitry Andric     // To track the spill frame indices handled in this pass.
349fe6060f1SDimitry Andric     BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
350fe6060f1SDimitry Andric 
3510b57cec5SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
352349cc55cSDimitry Andric       for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
3530b57cec5SDimitry Andric         if (!TII->isSGPRSpill(MI))
3540b57cec5SDimitry Andric           continue;
3550b57cec5SDimitry Andric 
3560b57cec5SDimitry Andric         int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
3570b57cec5SDimitry Andric         assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
3585f757f3fSDimitry Andric 
3595f757f3fSDimitry Andric         bool IsCalleeSaveSGPRSpill = llvm::is_contained(CalleeSavedFIs, FI);
3605f757f3fSDimitry Andric         if (IsCalleeSaveSGPRSpill) {
3615f757f3fSDimitry Andric           // Spill callee-saved SGPRs into physical VGPR lanes.
3625f757f3fSDimitry Andric 
3635f757f3fSDimitry Andric           // TODO: This is to ensure the CFIs are static for efficient frame
3645f757f3fSDimitry Andric           // unwinding in the debugger. Spilling them into virtual VGPR lanes
3655f757f3fSDimitry Andric           // involve regalloc to allocate the physical VGPRs and that might
3665f757f3fSDimitry Andric           // cause intermediate spill/split of such liveranges for successful
3675f757f3fSDimitry Andric           // allocation. This would result in broken CFI encoding unless the
3685f757f3fSDimitry Andric           // regalloc aware CFI generation to insert new CFIs along with the
3695f757f3fSDimitry Andric           // intermediate spills is implemented. There is no such support
3705f757f3fSDimitry Andric           // currently exist in the LLVM compiler.
3717a6dacacSDimitry Andric           if (FuncInfo->allocateSGPRSpillToVGPRLane(
3727a6dacacSDimitry Andric                   MF, FI, /*SpillToPhysVGPRLane=*/true)) {
373bdd1243dSDimitry Andric             bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
3745f757f3fSDimitry Andric                 MI, FI, nullptr, Indexes, LIS, true);
3755f757f3fSDimitry Andric             if (!Spilled)
3765f757f3fSDimitry Andric               llvm_unreachable(
3775f757f3fSDimitry Andric                   "failed to spill SGPR to physical VGPR lane when allocated");
3785f757f3fSDimitry Andric           }
3795f757f3fSDimitry Andric         } else {
3805f757f3fSDimitry Andric           if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
3815f757f3fSDimitry Andric             bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
382bdd1243dSDimitry Andric                 MI, FI, nullptr, Indexes, LIS);
3835f757f3fSDimitry Andric             if (!Spilled)
3845f757f3fSDimitry Andric               llvm_unreachable(
3855f757f3fSDimitry Andric                   "failed to spill SGPR to virtual VGPR lane when allocated");
386fe6060f1SDimitry Andric             SpillFIs.set(FI);
3875f757f3fSDimitry Andric             SpilledToVirtVGPRLanes = true;
3885f757f3fSDimitry Andric           }
3890b57cec5SDimitry Andric         }
3900b57cec5SDimitry Andric       }
3910b57cec5SDimitry Andric     }
3920b57cec5SDimitry Andric 
3935f757f3fSDimitry Andric     if (SpilledToVirtVGPRLanes) {
3945f757f3fSDimitry Andric       extendWWMVirtRegLiveness(MF, LIS);
3955f757f3fSDimitry Andric       if (LIS) {
3965f757f3fSDimitry Andric         // Compute the LiveInterval for the newly created virtual registers.
3978a4dda33SDimitry Andric         for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
3985f757f3fSDimitry Andric           LIS->createAndComputeVirtRegInterval(Reg);
3995f757f3fSDimitry Andric       }
4005f757f3fSDimitry Andric     }
4018a4dda33SDimitry Andric 
4025f757f3fSDimitry Andric     for (MachineBasicBlock &MBB : MF) {
403fe6060f1SDimitry Andric       // FIXME: The dead frame indices are replaced with a null register from
404fe6060f1SDimitry Andric       // the debug value instructions. We should instead, update it with the
405fe6060f1SDimitry Andric       // correct register value. But not sure the register value alone is
406fe6060f1SDimitry Andric       // adequate to lower the DIExpression. It should be worked out later.
407fe6060f1SDimitry Andric       for (MachineInstr &MI : MBB) {
408fe6060f1SDimitry Andric         if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
409bdd1243dSDimitry Andric             !MFI.isFixedObjectIndex(MI.getOperand(0).getIndex()) &&
410fe6060f1SDimitry Andric             SpillFIs[MI.getOperand(0).getIndex()]) {
411fe6060f1SDimitry Andric           MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
412fe6060f1SDimitry Andric         }
413fe6060f1SDimitry Andric       }
4140b57cec5SDimitry Andric     }
4150b57cec5SDimitry Andric 
416349cc55cSDimitry Andric     // All those frame indices which are dead by now should be removed from the
417349cc55cSDimitry Andric     // function frame. Otherwise, there is a side effect such as re-mapping of
418349cc55cSDimitry Andric     // free frame index ids by the later pass(es) like "stack slot coloring"
419349cc55cSDimitry Andric     // which in turn could mess-up with the book keeping of "frame index to VGPR
420349cc55cSDimitry Andric     // lane".
42181ad6265SDimitry Andric     FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
422349cc55cSDimitry Andric 
4235f757f3fSDimitry Andric     MadeChange = true;
4245f757f3fSDimitry Andric   }
4255f757f3fSDimitry Andric 
4265f757f3fSDimitry Andric   if (SpilledToVirtVGPRLanes) {
42706c3fb27SDimitry Andric     const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
42806c3fb27SDimitry Andric     // Shift back the reserved SGPR for EXEC copy into the lowest range.
42906c3fb27SDimitry Andric     // This SGPR is reserved to handle the whole-wave spill/copy operations
43006c3fb27SDimitry Andric     // that might get inserted during vgpr regalloc.
43106c3fb27SDimitry Andric     Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
43206c3fb27SDimitry Andric     if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
43306c3fb27SDimitry Andric                              TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
43406c3fb27SDimitry Andric       FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
43506c3fb27SDimitry Andric   } else {
4365f757f3fSDimitry Andric     // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
4375f757f3fSDimitry Andric     // spills/copies. Reset the SGPR reserved for EXEC copy.
43806c3fb27SDimitry Andric     FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
43906c3fb27SDimitry Andric   }
44006c3fb27SDimitry Andric 
4410b57cec5SDimitry Andric   SaveBlocks.clear();
4420b57cec5SDimitry Andric   RestoreBlocks.clear();
4430b57cec5SDimitry Andric 
4440b57cec5SDimitry Andric   return MadeChange;
4450b57cec5SDimitry Andric }
446