//===-- GCNPreRALongBranchReg.cpp ----------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // \file // \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if // there is a long branch. Branch size at this point is difficult to track since // we have no idea what spills will be inserted later on. We just assume 8 bytes // per instruction to compute approximations without computing the actual // instruction size to see if we're in the neighborhood of the maximum branch // distrance threshold tuning of what is considered "long" is handled through // amdgpu-long-branch-factor cl argument which sets LongBranchFactor. //===----------------------------------------------------------------------===// #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/InitializePasses.h" using namespace llvm; #define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg" namespace { static cl::opt LongBranchFactor( "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden, cl::desc("Factor to apply to what qualifies as a long branch " "to reserve a pair of scalar registers. If this value " "is 0 the long branch registers are never reserved. As this " "value grows the greater chance the branch distance will fall " "within the threshold and the registers will be marked to be " "reserved. We lean towards always reserving a register for " "long jumps")); class GCNPreRALongBranchReg : public MachineFunctionPass { struct BasicBlockInfo { // Offset - Distance from the beginning of the function to the beginning // of this basic block. uint64_t Offset = 0; // Size - Size of the basic block in bytes uint64_t Size = 0; }; void generateBlockInfo(MachineFunction &MF, SmallVectorImpl &BlockInfo); public: static char ID; GCNPreRALongBranchReg() : MachineFunctionPass(ID) { initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "AMDGPU Pre-RA Long Branch Reg"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); MachineFunctionPass::getAnalysisUsage(AU); } }; } // End anonymous namespace. char GCNPreRALongBranchReg::ID = 0; INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE, "AMDGPU Pre-RA Long Branch Reg", false, false) char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID; void GCNPreRALongBranchReg::generateBlockInfo( MachineFunction &MF, SmallVectorImpl &BlockInfo) { BlockInfo.resize(MF.getNumBlockIDs()); // Approximate the size of all basic blocks by just // assuming 8 bytes per instruction for (const MachineBasicBlock &MBB : MF) { uint64_t NumInstr = 0; // Loop through the basic block and add up all non-debug // non-meta instructions for (const MachineInstr &MI : MBB) { // isMetaInstruction is a superset of isDebugIstr if (MI.isMetaInstruction()) continue; NumInstr += 1; } // Approximate size as just 8 bytes per instruction BlockInfo[MBB.getNumber()].Size = 8 * NumInstr; } uint64_t PrevNum = (&MF)->begin()->getNumber(); for (auto &MBB : make_range(std::next(MachineFunction::iterator((&MF)->begin())), (&MF)->end())) { uint64_t Num = MBB.getNumber(); // Compute the offset immediately following this block. BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size; PrevNum = Num; } } bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &STM = MF.getSubtarget(); const SIInstrInfo *TII = STM.getInstrInfo(); const SIRegisterInfo *TRI = STM.getRegisterInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); // For now, reserve highest available SGPR pair. After RA, // shift down to a lower unused pair of SGPRs // If all registers are used, then findUnusedRegister will return // AMDGPU::NoRegister. constexpr bool ReserveHighestRegister = true; Register LongBranchReservedReg = TRI->findUnusedRegister( MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister); if (!LongBranchReservedReg) return false; // Approximate code size and offsets of each basic block SmallVector BlockInfo; generateBlockInfo(MF, BlockInfo); for (const MachineBasicBlock &MBB : MF) { MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr(); if (Last == MBB.end() || !Last->isUnconditionalBranch()) continue; MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last); uint64_t BlockDistance = static_cast( LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset); // If the distance falls outside the threshold assume it is a long branch // and we need to reserve the registers if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) { MFI->setLongBranchReservedReg(LongBranchReservedReg); return true; } } return false; }