1fe6060f1SDimitry Andric //===-- SILateBranchLowering.cpp - Final preparation of branches ----------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file
10fe6060f1SDimitry Andric /// This pass mainly lowers early terminate pseudo instructions.
11fe6060f1SDimitry Andric //
12fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
13fe6060f1SDimitry Andric 
14fe6060f1SDimitry Andric #include "AMDGPU.h"
15fe6060f1SDimitry Andric #include "GCNSubtarget.h"
16fe6060f1SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17fe6060f1SDimitry Andric #include "SIMachineFunctionInfo.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
19fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
20fe6060f1SDimitry Andric 
21fe6060f1SDimitry Andric using namespace llvm;
22fe6060f1SDimitry Andric 
23fe6060f1SDimitry Andric #define DEBUG_TYPE "si-late-branch-lowering"
24fe6060f1SDimitry Andric 
25fe6060f1SDimitry Andric namespace {
26fe6060f1SDimitry Andric 
27fe6060f1SDimitry Andric class SILateBranchLowering : public MachineFunctionPass {
28fe6060f1SDimitry Andric private:
29fe6060f1SDimitry Andric   const SIRegisterInfo *TRI = nullptr;
30fe6060f1SDimitry Andric   const SIInstrInfo *TII = nullptr;
31fe6060f1SDimitry Andric   MachineDominatorTree *MDT = nullptr;
32fe6060f1SDimitry Andric 
33*5f757f3fSDimitry Andric   void expandChainCall(MachineInstr &MI);
34fe6060f1SDimitry Andric   void earlyTerm(MachineInstr &MI, MachineBasicBlock *EarlyExitBlock);
35fe6060f1SDimitry Andric 
36fe6060f1SDimitry Andric public:
37fe6060f1SDimitry Andric   static char ID;
38fe6060f1SDimitry Andric 
39fe6060f1SDimitry Andric   unsigned MovOpc;
40fe6060f1SDimitry Andric   Register ExecReg;
41fe6060f1SDimitry Andric 
SILateBranchLowering()42fe6060f1SDimitry Andric   SILateBranchLowering() : MachineFunctionPass(ID) {}
43fe6060f1SDimitry Andric 
44fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
45fe6060f1SDimitry Andric 
getPassName() const46fe6060f1SDimitry Andric   StringRef getPassName() const override {
47fe6060f1SDimitry Andric     return "SI Final Branch Preparation";
48fe6060f1SDimitry Andric   }
49fe6060f1SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const50fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
51fe6060f1SDimitry Andric     AU.addRequired<MachineDominatorTree>();
52fe6060f1SDimitry Andric     AU.addPreserved<MachineDominatorTree>();
53fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
54fe6060f1SDimitry Andric   }
55fe6060f1SDimitry Andric };
56fe6060f1SDimitry Andric 
57fe6060f1SDimitry Andric } // end anonymous namespace
58fe6060f1SDimitry Andric 
59fe6060f1SDimitry Andric char SILateBranchLowering::ID = 0;
60fe6060f1SDimitry Andric 
61fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(SILateBranchLowering, DEBUG_TYPE,
62fe6060f1SDimitry Andric                       "SI insert s_cbranch_execz instructions", false, false)
63fe6060f1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
64fe6060f1SDimitry Andric INITIALIZE_PASS_END(SILateBranchLowering, DEBUG_TYPE,
65fe6060f1SDimitry Andric                     "SI insert s_cbranch_execz instructions", false, false)
66fe6060f1SDimitry Andric 
67fe6060f1SDimitry Andric char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
68fe6060f1SDimitry Andric 
generateEndPgm(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,const SIInstrInfo * TII,MachineFunction & MF)69fe6060f1SDimitry Andric static void generateEndPgm(MachineBasicBlock &MBB,
70fe6060f1SDimitry Andric                            MachineBasicBlock::iterator I, DebugLoc DL,
71fe6060f1SDimitry Andric                            const SIInstrInfo *TII, MachineFunction &MF) {
72fe6060f1SDimitry Andric   const Function &F = MF.getFunction();
73fe6060f1SDimitry Andric   bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
74fe6060f1SDimitry Andric 
75fe6060f1SDimitry Andric   // Check if hardware has been configured to expect color or depth exports.
7681ad6265SDimitry Andric   bool HasColorExports = AMDGPU::getHasColorExport(F);
7781ad6265SDimitry Andric   bool HasDepthExports = AMDGPU::getHasDepthExport(F);
7881ad6265SDimitry Andric   bool HasExports = HasColorExports || HasDepthExports;
79fe6060f1SDimitry Andric 
80fe6060f1SDimitry Andric   // Prior to GFX10, hardware always expects at least one export for PS.
81fe6060f1SDimitry Andric   bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
82fe6060f1SDimitry Andric 
83fe6060f1SDimitry Andric   if (IsPS && (HasExports || MustExport)) {
84fe6060f1SDimitry Andric     // Generate "null export" if hardware is expecting PS to export.
8581ad6265SDimitry Andric     const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
8681ad6265SDimitry Andric     int Target =
8781ad6265SDimitry Andric         ST.hasNullExportTarget()
8881ad6265SDimitry Andric             ? AMDGPU::Exp::ET_NULL
8981ad6265SDimitry Andric             : (HasColorExports ? AMDGPU::Exp::ET_MRT0 : AMDGPU::Exp::ET_MRTZ);
90fe6060f1SDimitry Andric     BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
9181ad6265SDimitry Andric         .addImm(Target)
92fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
93fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
94fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
95fe6060f1SDimitry Andric         .addReg(AMDGPU::VGPR0, RegState::Undef)
96fe6060f1SDimitry Andric         .addImm(1)  // vm
97fe6060f1SDimitry Andric         .addImm(0)  // compr
98fe6060f1SDimitry Andric         .addImm(0); // en
99fe6060f1SDimitry Andric   }
100fe6060f1SDimitry Andric 
101fe6060f1SDimitry Andric   // s_endpgm
102fe6060f1SDimitry Andric   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
103fe6060f1SDimitry Andric }
104fe6060f1SDimitry Andric 
splitBlock(MachineBasicBlock & MBB,MachineInstr & MI,MachineDominatorTree * MDT)105fe6060f1SDimitry Andric static void splitBlock(MachineBasicBlock &MBB, MachineInstr &MI,
106fe6060f1SDimitry Andric                        MachineDominatorTree *MDT) {
107fe6060f1SDimitry Andric   MachineBasicBlock *SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/ true);
108fe6060f1SDimitry Andric 
109fe6060f1SDimitry Andric   // Update dominator tree
110fe6060f1SDimitry Andric   using DomTreeT = DomTreeBase<MachineBasicBlock>;
111fe6060f1SDimitry Andric   SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
112fe6060f1SDimitry Andric   for (MachineBasicBlock *Succ : SplitBB->successors()) {
113fe6060f1SDimitry Andric     DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
114fe6060f1SDimitry Andric     DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
115fe6060f1SDimitry Andric   }
116fe6060f1SDimitry Andric   DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
117fe6060f1SDimitry Andric   MDT->getBase().applyUpdates(DTUpdates);
118fe6060f1SDimitry Andric }
119fe6060f1SDimitry Andric 
expandChainCall(MachineInstr & MI)120*5f757f3fSDimitry Andric void SILateBranchLowering::expandChainCall(MachineInstr &MI) {
121*5f757f3fSDimitry Andric   // This is a tail call that needs to be expanded into at least
122*5f757f3fSDimitry Andric   // 2 instructions, one for setting EXEC and one for the actual tail call.
123*5f757f3fSDimitry Andric   constexpr unsigned ExecIdx = 3;
124*5f757f3fSDimitry Andric 
125*5f757f3fSDimitry Andric   BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MovOpc), ExecReg)
126*5f757f3fSDimitry Andric       ->addOperand(MI.getOperand(ExecIdx));
127*5f757f3fSDimitry Andric   MI.removeOperand(ExecIdx);
128*5f757f3fSDimitry Andric 
129*5f757f3fSDimitry Andric   MI.setDesc(TII->get(AMDGPU::SI_TCRETURN));
130*5f757f3fSDimitry Andric }
131*5f757f3fSDimitry Andric 
earlyTerm(MachineInstr & MI,MachineBasicBlock * EarlyExitBlock)132fe6060f1SDimitry Andric void SILateBranchLowering::earlyTerm(MachineInstr &MI,
133fe6060f1SDimitry Andric                                      MachineBasicBlock *EarlyExitBlock) {
134fe6060f1SDimitry Andric   MachineBasicBlock &MBB = *MI.getParent();
135fe6060f1SDimitry Andric   const DebugLoc DL = MI.getDebugLoc();
136fe6060f1SDimitry Andric 
137fe6060f1SDimitry Andric   auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
138fe6060f1SDimitry Andric                       .addMBB(EarlyExitBlock);
139fe6060f1SDimitry Andric   auto Next = std::next(MI.getIterator());
140fe6060f1SDimitry Andric 
141fe6060f1SDimitry Andric   if (Next != MBB.end() && !Next->isTerminator())
142fe6060f1SDimitry Andric     splitBlock(MBB, *BranchMI, MDT);
143fe6060f1SDimitry Andric 
144fe6060f1SDimitry Andric   MBB.addSuccessor(EarlyExitBlock);
145fe6060f1SDimitry Andric   MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
146fe6060f1SDimitry Andric }
147fe6060f1SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)148fe6060f1SDimitry Andric bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
149fe6060f1SDimitry Andric   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
150fe6060f1SDimitry Andric   TII = ST.getInstrInfo();
151fe6060f1SDimitry Andric   TRI = &TII->getRegisterInfo();
152fe6060f1SDimitry Andric   MDT = &getAnalysis<MachineDominatorTree>();
153fe6060f1SDimitry Andric 
154fe6060f1SDimitry Andric   MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
155fe6060f1SDimitry Andric   ExecReg = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
156fe6060f1SDimitry Andric 
157fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> EarlyTermInstrs;
158fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 1> EpilogInstrs;
159fe6060f1SDimitry Andric   bool MadeChange = false;
160fe6060f1SDimitry Andric 
161fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
162349cc55cSDimitry Andric     for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
163fe6060f1SDimitry Andric       switch (MI.getOpcode()) {
164fe6060f1SDimitry Andric       case AMDGPU::S_BRANCH:
165fe6060f1SDimitry Andric         // Optimize out branches to the next block.
166fe6060f1SDimitry Andric         // This only occurs in -O0 when BranchFolding is not executed.
167fe6060f1SDimitry Andric         if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB())) {
168fe6060f1SDimitry Andric           assert(&MI == &MBB.back());
169fe6060f1SDimitry Andric           MI.eraseFromParent();
170fe6060f1SDimitry Andric           MadeChange = true;
171fe6060f1SDimitry Andric         }
172fe6060f1SDimitry Andric         break;
173fe6060f1SDimitry Andric 
174*5f757f3fSDimitry Andric       case AMDGPU::SI_CS_CHAIN_TC_W32:
175*5f757f3fSDimitry Andric       case AMDGPU::SI_CS_CHAIN_TC_W64:
176*5f757f3fSDimitry Andric         expandChainCall(MI);
177*5f757f3fSDimitry Andric         MadeChange = true;
178*5f757f3fSDimitry Andric         break;
179*5f757f3fSDimitry Andric 
180fe6060f1SDimitry Andric       case AMDGPU::SI_EARLY_TERMINATE_SCC0:
181fe6060f1SDimitry Andric         EarlyTermInstrs.push_back(&MI);
182fe6060f1SDimitry Andric         break;
183fe6060f1SDimitry Andric 
184fe6060f1SDimitry Andric       case AMDGPU::SI_RETURN_TO_EPILOG:
185fe6060f1SDimitry Andric         EpilogInstrs.push_back(&MI);
186fe6060f1SDimitry Andric         break;
187fe6060f1SDimitry Andric 
188fe6060f1SDimitry Andric       default:
189fe6060f1SDimitry Andric         break;
190fe6060f1SDimitry Andric       }
191fe6060f1SDimitry Andric     }
192fe6060f1SDimitry Andric   }
193fe6060f1SDimitry Andric 
194fe6060f1SDimitry Andric   // Lower any early exit branches first
195fe6060f1SDimitry Andric   if (!EarlyTermInstrs.empty()) {
196fe6060f1SDimitry Andric     MachineBasicBlock *EarlyExitBlock = MF.CreateMachineBasicBlock();
197fe6060f1SDimitry Andric     DebugLoc DL;
198fe6060f1SDimitry Andric 
199fe6060f1SDimitry Andric     MF.insert(MF.end(), EarlyExitBlock);
200fe6060f1SDimitry Andric     BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
201fe6060f1SDimitry Andric             ExecReg)
202fe6060f1SDimitry Andric         .addImm(0);
203fe6060f1SDimitry Andric     generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
204fe6060f1SDimitry Andric 
205fe6060f1SDimitry Andric     for (MachineInstr *Instr : EarlyTermInstrs) {
206fe6060f1SDimitry Andric       // Early termination in GS does nothing
207fe6060f1SDimitry Andric       if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
208fe6060f1SDimitry Andric         earlyTerm(*Instr, EarlyExitBlock);
209fe6060f1SDimitry Andric       Instr->eraseFromParent();
210fe6060f1SDimitry Andric     }
211fe6060f1SDimitry Andric 
212fe6060f1SDimitry Andric     EarlyTermInstrs.clear();
213fe6060f1SDimitry Andric     MadeChange = true;
214fe6060f1SDimitry Andric   }
215fe6060f1SDimitry Andric 
216fe6060f1SDimitry Andric   // Now check return to epilog instructions occur at function end
217fe6060f1SDimitry Andric   if (!EpilogInstrs.empty()) {
218fe6060f1SDimitry Andric     MachineBasicBlock *EmptyMBBAtEnd = nullptr;
219fe6060f1SDimitry Andric     assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
220fe6060f1SDimitry Andric 
221fe6060f1SDimitry Andric     // If there are multiple returns to epilog then all will
222fe6060f1SDimitry Andric     // become jumps to new empty end block.
223fe6060f1SDimitry Andric     if (EpilogInstrs.size() > 1) {
224fe6060f1SDimitry Andric       EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
225fe6060f1SDimitry Andric       MF.insert(MF.end(), EmptyMBBAtEnd);
226fe6060f1SDimitry Andric     }
227fe6060f1SDimitry Andric 
228bdd1243dSDimitry Andric     for (auto *MI : EpilogInstrs) {
229fe6060f1SDimitry Andric       auto MBB = MI->getParent();
230fe6060f1SDimitry Andric       if (MBB == &MF.back() && MI == &MBB->back())
231fe6060f1SDimitry Andric         continue;
232fe6060f1SDimitry Andric 
233fe6060f1SDimitry Andric       // SI_RETURN_TO_EPILOG is not the last instruction.
234fe6060f1SDimitry Andric       // Jump to empty block at function end.
235fe6060f1SDimitry Andric       if (!EmptyMBBAtEnd) {
236fe6060f1SDimitry Andric         EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
237fe6060f1SDimitry Andric         MF.insert(MF.end(), EmptyMBBAtEnd);
238fe6060f1SDimitry Andric       }
239fe6060f1SDimitry Andric 
240fe6060f1SDimitry Andric       MBB->addSuccessor(EmptyMBBAtEnd);
241fe6060f1SDimitry Andric       MDT->getBase().insertEdge(MBB, EmptyMBBAtEnd);
242fe6060f1SDimitry Andric       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
243fe6060f1SDimitry Andric           .addMBB(EmptyMBBAtEnd);
244fe6060f1SDimitry Andric       MI->eraseFromParent();
245fe6060f1SDimitry Andric       MadeChange = true;
246fe6060f1SDimitry Andric     }
247fe6060f1SDimitry Andric 
248fe6060f1SDimitry Andric     EpilogInstrs.clear();
249fe6060f1SDimitry Andric   }
250fe6060f1SDimitry Andric 
251fe6060f1SDimitry Andric   return MadeChange;
252fe6060f1SDimitry Andric }
253