173471bf0Spatrick //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// 273471bf0Spatrick // 373471bf0Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 473471bf0Spatrick // See https://llvm.org/LICENSE.txt for license information. 573471bf0Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 673471bf0Spatrick // 773471bf0Spatrick //==-----------------------------------------------------------------------===// 873471bf0Spatrick // 973471bf0Spatrick /// \file 1073471bf0Spatrick /// AMD GCN specific subclass of TargetSubtarget. 1173471bf0Spatrick // 1273471bf0Spatrick //===----------------------------------------------------------------------===// 1373471bf0Spatrick 1473471bf0Spatrick #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1573471bf0Spatrick #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1673471bf0Spatrick 1773471bf0Spatrick #include "AMDGPUCallLowering.h" 1873471bf0Spatrick #include "AMDGPUSubtarget.h" 1973471bf0Spatrick #include "SIFrameLowering.h" 2073471bf0Spatrick #include "SIISelLowering.h" 2173471bf0Spatrick #include "SIInstrInfo.h" 2273471bf0Spatrick #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 2373471bf0Spatrick 2473471bf0Spatrick #define GET_SUBTARGETINFO_HEADER 2573471bf0Spatrick #include "AMDGPUGenSubtargetInfo.inc" 2673471bf0Spatrick 2773471bf0Spatrick namespace llvm { 2873471bf0Spatrick 2973471bf0Spatrick class GCNTargetMachine; 3073471bf0Spatrick 3173471bf0Spatrick class GCNSubtarget final : public AMDGPUGenSubtargetInfo, 3273471bf0Spatrick public AMDGPUSubtarget { 33*d415bd75Srobert public: 3473471bf0Spatrick using AMDGPUSubtarget::getMaxWavesPerEU; 3573471bf0Spatrick 3673471bf0Spatrick // Following 2 enums are documented at: 3773471bf0Spatrick // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi 3873471bf0Spatrick enum class TrapHandlerAbi { 3973471bf0Spatrick NONE = 0x00, 4073471bf0Spatrick AMDHSA = 0x01, 4173471bf0Spatrick }; 4273471bf0Spatrick 4373471bf0Spatrick enum class TrapID { 4473471bf0Spatrick LLVMAMDHSATrap = 0x02, 4573471bf0Spatrick LLVMAMDHSADebugTrap = 0x03, 4673471bf0Spatrick }; 4773471bf0Spatrick 4873471bf0Spatrick private: 4973471bf0Spatrick /// GlobalISel related APIs. 5073471bf0Spatrick std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 5173471bf0Spatrick std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 5273471bf0Spatrick std::unique_ptr<InstructionSelector> InstSelector; 5373471bf0Spatrick std::unique_ptr<LegalizerInfo> Legalizer; 5473471bf0Spatrick std::unique_ptr<RegisterBankInfo> RegBankInfo; 5573471bf0Spatrick 5673471bf0Spatrick protected: 5773471bf0Spatrick // Basic subtarget description. 5873471bf0Spatrick Triple TargetTriple; 5973471bf0Spatrick AMDGPU::IsaInfo::AMDGPUTargetID TargetID; 60*d415bd75Srobert unsigned Gen = INVALID; 6173471bf0Spatrick InstrItineraryData InstrItins; 62*d415bd75Srobert int LDSBankCount = 0; 63*d415bd75Srobert unsigned MaxPrivateElementSize = 0; 6473471bf0Spatrick 6573471bf0Spatrick // Possibly statically set by tablegen, but may want to be overridden. 66*d415bd75Srobert bool FastFMAF32 = false; 67*d415bd75Srobert bool FastDenormalF32 = false; 68*d415bd75Srobert bool HalfRate64Ops = false; 69*d415bd75Srobert bool FullRate64Ops = false; 7073471bf0Spatrick 7173471bf0Spatrick // Dynamically set bits that enable features. 72*d415bd75Srobert bool FlatForGlobal = false; 73*d415bd75Srobert bool AutoWaitcntBeforeBarrier = false; 74*d415bd75Srobert bool BackOffBarrier = false; 75*d415bd75Srobert bool UnalignedScratchAccess = false; 76*d415bd75Srobert bool UnalignedAccessMode = false; 77*d415bd75Srobert bool HasApertureRegs = false; 78*d415bd75Srobert bool SupportsXNACK = false; 7973471bf0Spatrick 8073471bf0Spatrick // This should not be used directly. 'TargetID' tracks the dynamic settings 8173471bf0Spatrick // for XNACK. 82*d415bd75Srobert bool EnableXNACK = false; 8373471bf0Spatrick 84*d415bd75Srobert bool EnableTgSplit = false; 85*d415bd75Srobert bool EnableCuMode = false; 86*d415bd75Srobert bool TrapHandler = false; 8773471bf0Spatrick 8873471bf0Spatrick // Used as options. 89*d415bd75Srobert bool EnableLoadStoreOpt = false; 90*d415bd75Srobert bool EnableUnsafeDSOffsetFolding = false; 91*d415bd75Srobert bool EnableSIScheduler = false; 92*d415bd75Srobert bool EnableDS128 = false; 93*d415bd75Srobert bool EnablePRTStrictNull = false; 94*d415bd75Srobert bool DumpCode = false; 9573471bf0Spatrick 9673471bf0Spatrick // Subtarget statically properties set by tablegen 97*d415bd75Srobert bool FP64 = false; 98*d415bd75Srobert bool FMA = false; 99*d415bd75Srobert bool MIMG_R128 = false; 100*d415bd75Srobert bool CIInsts = false; 101*d415bd75Srobert bool GFX8Insts = false; 102*d415bd75Srobert bool GFX9Insts = false; 103*d415bd75Srobert bool GFX90AInsts = false; 104*d415bd75Srobert bool GFX940Insts = false; 105*d415bd75Srobert bool GFX10Insts = false; 106*d415bd75Srobert bool GFX11Insts = false; 107*d415bd75Srobert bool GFX10_3Insts = false; 108*d415bd75Srobert bool GFX7GFX8GFX9Insts = false; 109*d415bd75Srobert bool SGPRInitBug = false; 110*d415bd75Srobert bool UserSGPRInit16Bug = false; 111*d415bd75Srobert bool NegativeScratchOffsetBug = false; 112*d415bd75Srobert bool NegativeUnalignedScratchOffsetBug = false; 113*d415bd75Srobert bool HasSMemRealTime = false; 114*d415bd75Srobert bool HasIntClamp = false; 115*d415bd75Srobert bool HasFmaMixInsts = false; 116*d415bd75Srobert bool HasMovrel = false; 117*d415bd75Srobert bool HasVGPRIndexMode = false; 118*d415bd75Srobert bool HasScalarStores = false; 119*d415bd75Srobert bool HasScalarAtomics = false; 120*d415bd75Srobert bool HasSDWAOmod = false; 121*d415bd75Srobert bool HasSDWAScalar = false; 122*d415bd75Srobert bool HasSDWASdst = false; 123*d415bd75Srobert bool HasSDWAMac = false; 124*d415bd75Srobert bool HasSDWAOutModsVOPC = false; 125*d415bd75Srobert bool HasDPP = false; 126*d415bd75Srobert bool HasDPP8 = false; 127*d415bd75Srobert bool Has64BitDPP = false; 128*d415bd75Srobert bool HasPackedFP32Ops = false; 129*d415bd75Srobert bool HasImageInsts = false; 130*d415bd75Srobert bool HasExtendedImageInsts = false; 131*d415bd75Srobert bool HasR128A16 = false; 132*d415bd75Srobert bool HasA16 = false; 133*d415bd75Srobert bool HasG16 = false; 134*d415bd75Srobert bool HasNSAEncoding = false; 135*d415bd75Srobert unsigned NSAMaxSize = 0; 136*d415bd75Srobert bool GFX10_AEncoding = false; 137*d415bd75Srobert bool GFX10_BEncoding = false; 138*d415bd75Srobert bool HasDLInsts = false; 139*d415bd75Srobert bool HasFmacF64Inst = false; 140*d415bd75Srobert bool HasDot1Insts = false; 141*d415bd75Srobert bool HasDot2Insts = false; 142*d415bd75Srobert bool HasDot3Insts = false; 143*d415bd75Srobert bool HasDot4Insts = false; 144*d415bd75Srobert bool HasDot5Insts = false; 145*d415bd75Srobert bool HasDot6Insts = false; 146*d415bd75Srobert bool HasDot7Insts = false; 147*d415bd75Srobert bool HasDot8Insts = false; 148*d415bd75Srobert bool HasDot9Insts = false; 149*d415bd75Srobert bool HasMAIInsts = false; 150*d415bd75Srobert bool HasFP8Insts = false; 151*d415bd75Srobert bool HasPkFmacF16Inst = false; 152*d415bd75Srobert bool HasAtomicFaddRtnInsts = false; 153*d415bd75Srobert bool HasAtomicFaddNoRtnInsts = false; 154*d415bd75Srobert bool HasAtomicPkFaddNoRtnInsts = false; 155*d415bd75Srobert bool HasFlatAtomicFaddF32Inst = false; 156*d415bd75Srobert bool SupportsSRAMECC = false; 15773471bf0Spatrick 15873471bf0Spatrick // This should not be used directly. 'TargetID' tracks the dynamic settings 15973471bf0Spatrick // for SRAMECC. 160*d415bd75Srobert bool EnableSRAMECC = false; 16173471bf0Spatrick 162*d415bd75Srobert bool HasNoSdstCMPX = false; 163*d415bd75Srobert bool HasVscnt = false; 164*d415bd75Srobert bool HasGetWaveIdInst = false; 165*d415bd75Srobert bool HasSMemTimeInst = false; 166*d415bd75Srobert bool HasShaderCyclesRegister = false; 167*d415bd75Srobert bool HasVOP3Literal = false; 168*d415bd75Srobert bool HasNoDataDepHazard = false; 169*d415bd75Srobert bool FlatAddressSpace = false; 170*d415bd75Srobert bool FlatInstOffsets = false; 171*d415bd75Srobert bool FlatGlobalInsts = false; 172*d415bd75Srobert bool FlatScratchInsts = false; 173*d415bd75Srobert bool ScalarFlatScratchInsts = false; 174*d415bd75Srobert bool HasArchitectedFlatScratch = false; 175*d415bd75Srobert bool EnableFlatScratch = false; 176*d415bd75Srobert bool AddNoCarryInsts = false; 177*d415bd75Srobert bool HasUnpackedD16VMem = false; 178*d415bd75Srobert bool LDSMisalignedBug = false; 179*d415bd75Srobert bool HasMFMAInlineLiteralBug = false; 180*d415bd75Srobert bool UnalignedBufferAccess = false; 181*d415bd75Srobert bool UnalignedDSAccess = false; 182*d415bd75Srobert bool HasPackedTID = false; 183*d415bd75Srobert bool ScalarizeGlobal = false; 18473471bf0Spatrick 185*d415bd75Srobert bool HasVcmpxPermlaneHazard = false; 186*d415bd75Srobert bool HasVMEMtoScalarWriteHazard = false; 187*d415bd75Srobert bool HasSMEMtoVectorWriteHazard = false; 188*d415bd75Srobert bool HasInstFwdPrefetchBug = false; 189*d415bd75Srobert bool HasVcmpxExecWARHazard = false; 190*d415bd75Srobert bool HasLdsBranchVmemWARHazard = false; 191*d415bd75Srobert bool HasNSAtoVMEMBug = false; 192*d415bd75Srobert bool HasNSAClauseBug = false; 193*d415bd75Srobert bool HasOffset3fBug = false; 194*d415bd75Srobert bool HasFlatSegmentOffsetBug = false; 195*d415bd75Srobert bool HasImageStoreD16Bug = false; 196*d415bd75Srobert bool HasImageGather4D16Bug = false; 197*d415bd75Srobert bool HasGFX11FullVGPRs = false; 198*d415bd75Srobert bool HasMADIntraFwdBug = false; 199*d415bd75Srobert bool HasVOPDInsts = false; 200*d415bd75Srobert bool HasVALUTransUseHazard = false; 20173471bf0Spatrick 20273471bf0Spatrick // Dummy feature to use for assembler in tablegen. 203*d415bd75Srobert bool FeatureDisable = false; 20473471bf0Spatrick 20573471bf0Spatrick SelectionDAGTargetInfo TSInfo; 20673471bf0Spatrick private: 20773471bf0Spatrick SIInstrInfo InstrInfo; 20873471bf0Spatrick SITargetLowering TLInfo; 20973471bf0Spatrick SIFrameLowering FrameLowering; 21073471bf0Spatrick 21173471bf0Spatrick public: 21273471bf0Spatrick GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 21373471bf0Spatrick const GCNTargetMachine &TM); 21473471bf0Spatrick ~GCNSubtarget() override; 21573471bf0Spatrick 21673471bf0Spatrick GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, 21773471bf0Spatrick StringRef GPU, StringRef FS); 21873471bf0Spatrick getInstrInfo()21973471bf0Spatrick const SIInstrInfo *getInstrInfo() const override { 22073471bf0Spatrick return &InstrInfo; 22173471bf0Spatrick } 22273471bf0Spatrick getFrameLowering()22373471bf0Spatrick const SIFrameLowering *getFrameLowering() const override { 22473471bf0Spatrick return &FrameLowering; 22573471bf0Spatrick } 22673471bf0Spatrick getTargetLowering()22773471bf0Spatrick const SITargetLowering *getTargetLowering() const override { 22873471bf0Spatrick return &TLInfo; 22973471bf0Spatrick } 23073471bf0Spatrick getRegisterInfo()23173471bf0Spatrick const SIRegisterInfo *getRegisterInfo() const override { 23273471bf0Spatrick return &InstrInfo.getRegisterInfo(); 23373471bf0Spatrick } 23473471bf0Spatrick getCallLowering()23573471bf0Spatrick const CallLowering *getCallLowering() const override { 23673471bf0Spatrick return CallLoweringInfo.get(); 23773471bf0Spatrick } 23873471bf0Spatrick getInlineAsmLowering()23973471bf0Spatrick const InlineAsmLowering *getInlineAsmLowering() const override { 24073471bf0Spatrick return InlineAsmLoweringInfo.get(); 24173471bf0Spatrick } 24273471bf0Spatrick getInstructionSelector()24373471bf0Spatrick InstructionSelector *getInstructionSelector() const override { 24473471bf0Spatrick return InstSelector.get(); 24573471bf0Spatrick } 24673471bf0Spatrick getLegalizerInfo()24773471bf0Spatrick const LegalizerInfo *getLegalizerInfo() const override { 24873471bf0Spatrick return Legalizer.get(); 24973471bf0Spatrick } 25073471bf0Spatrick getRegBankInfo()25173471bf0Spatrick const RegisterBankInfo *getRegBankInfo() const override { 25273471bf0Spatrick return RegBankInfo.get(); 25373471bf0Spatrick } 25473471bf0Spatrick getTargetID()25573471bf0Spatrick const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { 25673471bf0Spatrick return TargetID; 25773471bf0Spatrick } 25873471bf0Spatrick 25973471bf0Spatrick // Nothing implemented, just prevent crashes on use. getSelectionDAGInfo()26073471bf0Spatrick const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { 26173471bf0Spatrick return &TSInfo; 26273471bf0Spatrick } 26373471bf0Spatrick getInstrItineraryData()26473471bf0Spatrick const InstrItineraryData *getInstrItineraryData() const override { 26573471bf0Spatrick return &InstrItins; 26673471bf0Spatrick } 26773471bf0Spatrick 26873471bf0Spatrick void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 26973471bf0Spatrick getGeneration()27073471bf0Spatrick Generation getGeneration() const { 27173471bf0Spatrick return (Generation)Gen; 27273471bf0Spatrick } 27373471bf0Spatrick getMaxWaveScratchSize()274*d415bd75Srobert unsigned getMaxWaveScratchSize() const { 275*d415bd75Srobert // See COMPUTE_TMPRING_SIZE.WAVESIZE. 276*d415bd75Srobert if (getGeneration() < GFX11) { 277*d415bd75Srobert // 13-bit field in units of 256-dword. 278*d415bd75Srobert return (256 * 4) * ((1 << 13) - 1); 279*d415bd75Srobert } 280*d415bd75Srobert // 15-bit field in units of 64-dword. 281*d415bd75Srobert return (64 * 4) * ((1 << 15) - 1); 282*d415bd75Srobert } 283*d415bd75Srobert 284*d415bd75Srobert /// Return the number of high bits known to be zero for a frame index. getKnownHighZeroBitsForFrameIndex()28573471bf0Spatrick unsigned getKnownHighZeroBitsForFrameIndex() const { 286*d415bd75Srobert return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2(); 28773471bf0Spatrick } 28873471bf0Spatrick getLDSBankCount()28973471bf0Spatrick int getLDSBankCount() const { 29073471bf0Spatrick return LDSBankCount; 29173471bf0Spatrick } 29273471bf0Spatrick 29373471bf0Spatrick unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { 29473471bf0Spatrick return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; 29573471bf0Spatrick } 29673471bf0Spatrick 29773471bf0Spatrick unsigned getConstantBusLimit(unsigned Opcode) const; 29873471bf0Spatrick 29973471bf0Spatrick /// Returns if the result of this instruction with a 16-bit result returned in 30073471bf0Spatrick /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve 30173471bf0Spatrick /// the original value. 30273471bf0Spatrick bool zeroesHigh16BitsOfDest(unsigned Opcode) const; 30373471bf0Spatrick supportsWGP()304*d415bd75Srobert bool supportsWGP() const { return getGeneration() >= GFX10; } 305*d415bd75Srobert hasIntClamp()30673471bf0Spatrick bool hasIntClamp() const { 30773471bf0Spatrick return HasIntClamp; 30873471bf0Spatrick } 30973471bf0Spatrick hasFP64()31073471bf0Spatrick bool hasFP64() const { 31173471bf0Spatrick return FP64; 31273471bf0Spatrick } 31373471bf0Spatrick hasMIMG_R128()31473471bf0Spatrick bool hasMIMG_R128() const { 31573471bf0Spatrick return MIMG_R128; 31673471bf0Spatrick } 31773471bf0Spatrick hasHWFP64()31873471bf0Spatrick bool hasHWFP64() const { 31973471bf0Spatrick return FP64; 32073471bf0Spatrick } 32173471bf0Spatrick hasFastFMAF32()32273471bf0Spatrick bool hasFastFMAF32() const { 32373471bf0Spatrick return FastFMAF32; 32473471bf0Spatrick } 32573471bf0Spatrick hasHalfRate64Ops()32673471bf0Spatrick bool hasHalfRate64Ops() const { 32773471bf0Spatrick return HalfRate64Ops; 32873471bf0Spatrick } 32973471bf0Spatrick hasFullRate64Ops()33073471bf0Spatrick bool hasFullRate64Ops() const { 33173471bf0Spatrick return FullRate64Ops; 33273471bf0Spatrick } 33373471bf0Spatrick hasAddr64()33473471bf0Spatrick bool hasAddr64() const { 33573471bf0Spatrick return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); 33673471bf0Spatrick } 33773471bf0Spatrick hasFlat()33873471bf0Spatrick bool hasFlat() const { 33973471bf0Spatrick return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); 34073471bf0Spatrick } 34173471bf0Spatrick 34273471bf0Spatrick // Return true if the target only has the reverse operand versions of VALU 34373471bf0Spatrick // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). hasOnlyRevVALUShifts()34473471bf0Spatrick bool hasOnlyRevVALUShifts() const { 34573471bf0Spatrick return getGeneration() >= VOLCANIC_ISLANDS; 34673471bf0Spatrick } 34773471bf0Spatrick hasFractBug()34873471bf0Spatrick bool hasFractBug() const { 34973471bf0Spatrick return getGeneration() == SOUTHERN_ISLANDS; 35073471bf0Spatrick } 35173471bf0Spatrick hasBFE()35273471bf0Spatrick bool hasBFE() const { 35373471bf0Spatrick return true; 35473471bf0Spatrick } 35573471bf0Spatrick hasBFI()35673471bf0Spatrick bool hasBFI() const { 35773471bf0Spatrick return true; 35873471bf0Spatrick } 35973471bf0Spatrick hasBFM()36073471bf0Spatrick bool hasBFM() const { 36173471bf0Spatrick return hasBFE(); 36273471bf0Spatrick } 36373471bf0Spatrick hasBCNT(unsigned Size)36473471bf0Spatrick bool hasBCNT(unsigned Size) const { 36573471bf0Spatrick return true; 36673471bf0Spatrick } 36773471bf0Spatrick hasFFBL()36873471bf0Spatrick bool hasFFBL() const { 36973471bf0Spatrick return true; 37073471bf0Spatrick } 37173471bf0Spatrick hasFFBH()37273471bf0Spatrick bool hasFFBH() const { 37373471bf0Spatrick return true; 37473471bf0Spatrick } 37573471bf0Spatrick hasMed3_16()37673471bf0Spatrick bool hasMed3_16() const { 37773471bf0Spatrick return getGeneration() >= AMDGPUSubtarget::GFX9; 37873471bf0Spatrick } 37973471bf0Spatrick hasMin3Max3_16()38073471bf0Spatrick bool hasMin3Max3_16() const { 38173471bf0Spatrick return getGeneration() >= AMDGPUSubtarget::GFX9; 38273471bf0Spatrick } 38373471bf0Spatrick hasFmaMixInsts()38473471bf0Spatrick bool hasFmaMixInsts() const { 38573471bf0Spatrick return HasFmaMixInsts; 38673471bf0Spatrick } 38773471bf0Spatrick hasCARRY()38873471bf0Spatrick bool hasCARRY() const { 38973471bf0Spatrick return true; 39073471bf0Spatrick } 39173471bf0Spatrick hasFMA()39273471bf0Spatrick bool hasFMA() const { 39373471bf0Spatrick return FMA; 39473471bf0Spatrick } 39573471bf0Spatrick hasSwap()39673471bf0Spatrick bool hasSwap() const { 39773471bf0Spatrick return GFX9Insts; 39873471bf0Spatrick } 39973471bf0Spatrick hasScalarPackInsts()40073471bf0Spatrick bool hasScalarPackInsts() const { 40173471bf0Spatrick return GFX9Insts; 40273471bf0Spatrick } 40373471bf0Spatrick hasScalarMulHiInsts()40473471bf0Spatrick bool hasScalarMulHiInsts() const { 40573471bf0Spatrick return GFX9Insts; 40673471bf0Spatrick } 40773471bf0Spatrick getTrapHandlerAbi()40873471bf0Spatrick TrapHandlerAbi getTrapHandlerAbi() const { 40973471bf0Spatrick return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE; 41073471bf0Spatrick } 41173471bf0Spatrick supportsGetDoorbellID()41273471bf0Spatrick bool supportsGetDoorbellID() const { 41373471bf0Spatrick // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets. 41473471bf0Spatrick return getGeneration() >= GFX9; 41573471bf0Spatrick } 41673471bf0Spatrick 41773471bf0Spatrick /// True if the offset field of DS instructions works as expected. On SI, the 41873471bf0Spatrick /// offset uses a 16-bit adder and does not always wrap properly. hasUsableDSOffset()41973471bf0Spatrick bool hasUsableDSOffset() const { 42073471bf0Spatrick return getGeneration() >= SEA_ISLANDS; 42173471bf0Spatrick } 42273471bf0Spatrick unsafeDSOffsetFoldingEnabled()42373471bf0Spatrick bool unsafeDSOffsetFoldingEnabled() const { 42473471bf0Spatrick return EnableUnsafeDSOffsetFolding; 42573471bf0Spatrick } 42673471bf0Spatrick 42773471bf0Spatrick /// Condition output from div_scale is usable. hasUsableDivScaleConditionOutput()42873471bf0Spatrick bool hasUsableDivScaleConditionOutput() const { 42973471bf0Spatrick return getGeneration() != SOUTHERN_ISLANDS; 43073471bf0Spatrick } 43173471bf0Spatrick 43273471bf0Spatrick /// Extra wait hazard is needed in some cases before 43373471bf0Spatrick /// s_cbranch_vccnz/s_cbranch_vccz. hasReadVCCZBug()43473471bf0Spatrick bool hasReadVCCZBug() const { 43573471bf0Spatrick return getGeneration() <= SEA_ISLANDS; 43673471bf0Spatrick } 43773471bf0Spatrick 43873471bf0Spatrick /// Writes to VCC_LO/VCC_HI update the VCCZ flag. partialVCCWritesUpdateVCCZ()43973471bf0Spatrick bool partialVCCWritesUpdateVCCZ() const { 44073471bf0Spatrick return getGeneration() >= GFX10; 44173471bf0Spatrick } 44273471bf0Spatrick 44373471bf0Spatrick /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR 44473471bf0Spatrick /// was written by a VALU instruction. hasSMRDReadVALUDefHazard()44573471bf0Spatrick bool hasSMRDReadVALUDefHazard() const { 44673471bf0Spatrick return getGeneration() == SOUTHERN_ISLANDS; 44773471bf0Spatrick } 44873471bf0Spatrick 44973471bf0Spatrick /// A read of an SGPR by a VMEM instruction requires 5 wait states when the 45073471bf0Spatrick /// SGPR was written by a VALU Instruction. hasVMEMReadSGPRVALUDefHazard()45173471bf0Spatrick bool hasVMEMReadSGPRVALUDefHazard() const { 45273471bf0Spatrick return getGeneration() >= VOLCANIC_ISLANDS; 45373471bf0Spatrick } 45473471bf0Spatrick hasRFEHazards()45573471bf0Spatrick bool hasRFEHazards() const { 45673471bf0Spatrick return getGeneration() >= VOLCANIC_ISLANDS; 45773471bf0Spatrick } 45873471bf0Spatrick 45973471bf0Spatrick /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. getSetRegWaitStates()46073471bf0Spatrick unsigned getSetRegWaitStates() const { 46173471bf0Spatrick return getGeneration() <= SEA_ISLANDS ? 1 : 2; 46273471bf0Spatrick } 46373471bf0Spatrick dumpCode()46473471bf0Spatrick bool dumpCode() const { 46573471bf0Spatrick return DumpCode; 46673471bf0Spatrick } 46773471bf0Spatrick 46873471bf0Spatrick /// Return the amount of LDS that can be used that will not restrict the 46973471bf0Spatrick /// occupancy lower than WaveCount. 47073471bf0Spatrick unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 47173471bf0Spatrick const Function &) const; 47273471bf0Spatrick supportsMinMaxDenormModes()47373471bf0Spatrick bool supportsMinMaxDenormModes() const { 47473471bf0Spatrick return getGeneration() >= AMDGPUSubtarget::GFX9; 47573471bf0Spatrick } 47673471bf0Spatrick 47773471bf0Spatrick /// \returns If target supports S_DENORM_MODE. hasDenormModeInst()47873471bf0Spatrick bool hasDenormModeInst() const { 47973471bf0Spatrick return getGeneration() >= AMDGPUSubtarget::GFX10; 48073471bf0Spatrick } 48173471bf0Spatrick useFlatForGlobal()48273471bf0Spatrick bool useFlatForGlobal() const { 48373471bf0Spatrick return FlatForGlobal; 48473471bf0Spatrick } 48573471bf0Spatrick 48673471bf0Spatrick /// \returns If target supports ds_read/write_b128 and user enables generation 48773471bf0Spatrick /// of ds_read/write_b128. useDS128()48873471bf0Spatrick bool useDS128() const { 48973471bf0Spatrick return CIInsts && EnableDS128; 49073471bf0Spatrick } 49173471bf0Spatrick 49273471bf0Spatrick /// \return If target supports ds_read/write_b96/128. hasDS96AndDS128()49373471bf0Spatrick bool hasDS96AndDS128() const { 49473471bf0Spatrick return CIInsts; 49573471bf0Spatrick } 49673471bf0Spatrick 49773471bf0Spatrick /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 haveRoundOpsF64()49873471bf0Spatrick bool haveRoundOpsF64() const { 49973471bf0Spatrick return CIInsts; 50073471bf0Spatrick } 50173471bf0Spatrick 50273471bf0Spatrick /// \returns If MUBUF instructions always perform range checking, even for 50373471bf0Spatrick /// buffer resources used for private memory access. privateMemoryResourceIsRangeChecked()50473471bf0Spatrick bool privateMemoryResourceIsRangeChecked() const { 50573471bf0Spatrick return getGeneration() < AMDGPUSubtarget::GFX9; 50673471bf0Spatrick } 50773471bf0Spatrick 50873471bf0Spatrick /// \returns If target requires PRT Struct NULL support (zero result registers 50973471bf0Spatrick /// for sparse texture support). usePRTStrictNull()51073471bf0Spatrick bool usePRTStrictNull() const { 51173471bf0Spatrick return EnablePRTStrictNull; 51273471bf0Spatrick } 51373471bf0Spatrick hasAutoWaitcntBeforeBarrier()51473471bf0Spatrick bool hasAutoWaitcntBeforeBarrier() const { 51573471bf0Spatrick return AutoWaitcntBeforeBarrier; 51673471bf0Spatrick } 51773471bf0Spatrick 518*d415bd75Srobert /// \returns true if the target supports backing off of s_barrier instructions 519*d415bd75Srobert /// when an exception is raised. supportsBackOffBarrier()520*d415bd75Srobert bool supportsBackOffBarrier() const { 521*d415bd75Srobert return BackOffBarrier; 522*d415bd75Srobert } 523*d415bd75Srobert hasUnalignedBufferAccess()52473471bf0Spatrick bool hasUnalignedBufferAccess() const { 52573471bf0Spatrick return UnalignedBufferAccess; 52673471bf0Spatrick } 52773471bf0Spatrick hasUnalignedBufferAccessEnabled()52873471bf0Spatrick bool hasUnalignedBufferAccessEnabled() const { 52973471bf0Spatrick return UnalignedBufferAccess && UnalignedAccessMode; 53073471bf0Spatrick } 53173471bf0Spatrick hasUnalignedDSAccess()53273471bf0Spatrick bool hasUnalignedDSAccess() const { 53373471bf0Spatrick return UnalignedDSAccess; 53473471bf0Spatrick } 53573471bf0Spatrick hasUnalignedDSAccessEnabled()53673471bf0Spatrick bool hasUnalignedDSAccessEnabled() const { 53773471bf0Spatrick return UnalignedDSAccess && UnalignedAccessMode; 53873471bf0Spatrick } 53973471bf0Spatrick hasUnalignedScratchAccess()54073471bf0Spatrick bool hasUnalignedScratchAccess() const { 54173471bf0Spatrick return UnalignedScratchAccess; 54273471bf0Spatrick } 54373471bf0Spatrick hasUnalignedAccessMode()54473471bf0Spatrick bool hasUnalignedAccessMode() const { 54573471bf0Spatrick return UnalignedAccessMode; 54673471bf0Spatrick } 54773471bf0Spatrick hasApertureRegs()54873471bf0Spatrick bool hasApertureRegs() const { 54973471bf0Spatrick return HasApertureRegs; 55073471bf0Spatrick } 55173471bf0Spatrick isTrapHandlerEnabled()55273471bf0Spatrick bool isTrapHandlerEnabled() const { 55373471bf0Spatrick return TrapHandler; 55473471bf0Spatrick } 55573471bf0Spatrick isXNACKEnabled()55673471bf0Spatrick bool isXNACKEnabled() const { 55773471bf0Spatrick return TargetID.isXnackOnOrAny(); 55873471bf0Spatrick } 55973471bf0Spatrick isTgSplitEnabled()56073471bf0Spatrick bool isTgSplitEnabled() const { 56173471bf0Spatrick return EnableTgSplit; 56273471bf0Spatrick } 56373471bf0Spatrick isCuModeEnabled()56473471bf0Spatrick bool isCuModeEnabled() const { 56573471bf0Spatrick return EnableCuMode; 56673471bf0Spatrick } 56773471bf0Spatrick hasFlatAddressSpace()56873471bf0Spatrick bool hasFlatAddressSpace() const { 56973471bf0Spatrick return FlatAddressSpace; 57073471bf0Spatrick } 57173471bf0Spatrick hasFlatScrRegister()57273471bf0Spatrick bool hasFlatScrRegister() const { 57373471bf0Spatrick return hasFlatAddressSpace(); 57473471bf0Spatrick } 57573471bf0Spatrick hasFlatInstOffsets()57673471bf0Spatrick bool hasFlatInstOffsets() const { 57773471bf0Spatrick return FlatInstOffsets; 57873471bf0Spatrick } 57973471bf0Spatrick hasFlatGlobalInsts()58073471bf0Spatrick bool hasFlatGlobalInsts() const { 58173471bf0Spatrick return FlatGlobalInsts; 58273471bf0Spatrick } 58373471bf0Spatrick hasFlatScratchInsts()58473471bf0Spatrick bool hasFlatScratchInsts() const { 58573471bf0Spatrick return FlatScratchInsts; 58673471bf0Spatrick } 58773471bf0Spatrick 58873471bf0Spatrick // Check if target supports ST addressing mode with FLAT scratch instructions. 58973471bf0Spatrick // The ST addressing mode means no registers are used, either VGPR or SGPR, 59073471bf0Spatrick // but only immediate offset is swizzled and added to the FLAT scratch base. hasFlatScratchSTMode()59173471bf0Spatrick bool hasFlatScratchSTMode() const { 592*d415bd75Srobert return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts()); 59373471bf0Spatrick } 59473471bf0Spatrick hasFlatScratchSVSMode()595*d415bd75Srobert bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; } 596*d415bd75Srobert hasScalarFlatScratchInsts()59773471bf0Spatrick bool hasScalarFlatScratchInsts() const { 59873471bf0Spatrick return ScalarFlatScratchInsts; 59973471bf0Spatrick } 60073471bf0Spatrick enableFlatScratch()601*d415bd75Srobert bool enableFlatScratch() const { 602*d415bd75Srobert return flatScratchIsArchitected() || 603*d415bd75Srobert (EnableFlatScratch && hasFlatScratchInsts()); 604*d415bd75Srobert } 605*d415bd75Srobert hasGlobalAddTidInsts()60673471bf0Spatrick bool hasGlobalAddTidInsts() const { 60773471bf0Spatrick return GFX10_BEncoding; 60873471bf0Spatrick } 60973471bf0Spatrick hasAtomicCSub()61073471bf0Spatrick bool hasAtomicCSub() const { 61173471bf0Spatrick return GFX10_BEncoding; 61273471bf0Spatrick } 61373471bf0Spatrick hasMultiDwordFlatScratchAddressing()61473471bf0Spatrick bool hasMultiDwordFlatScratchAddressing() const { 61573471bf0Spatrick return getGeneration() >= GFX9; 61673471bf0Spatrick } 61773471bf0Spatrick hasFlatSegmentOffsetBug()61873471bf0Spatrick bool hasFlatSegmentOffsetBug() const { 61973471bf0Spatrick return HasFlatSegmentOffsetBug; 62073471bf0Spatrick } 62173471bf0Spatrick hasFlatLgkmVMemCountInOrder()62273471bf0Spatrick bool hasFlatLgkmVMemCountInOrder() const { 62373471bf0Spatrick return getGeneration() > GFX9; 62473471bf0Spatrick } 62573471bf0Spatrick hasD16LoadStore()62673471bf0Spatrick bool hasD16LoadStore() const { 62773471bf0Spatrick return getGeneration() >= GFX9; 62873471bf0Spatrick } 62973471bf0Spatrick d16PreservesUnusedBits()63073471bf0Spatrick bool d16PreservesUnusedBits() const { 63173471bf0Spatrick return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); 63273471bf0Spatrick } 63373471bf0Spatrick hasD16Images()63473471bf0Spatrick bool hasD16Images() const { 63573471bf0Spatrick return getGeneration() >= VOLCANIC_ISLANDS; 63673471bf0Spatrick } 63773471bf0Spatrick 63873471bf0Spatrick /// Return if most LDS instructions have an m0 use that require m0 to be 639*d415bd75Srobert /// initialized. ldsRequiresM0Init()64073471bf0Spatrick bool ldsRequiresM0Init() const { 64173471bf0Spatrick return getGeneration() < GFX9; 64273471bf0Spatrick } 64373471bf0Spatrick 64473471bf0Spatrick // True if the hardware rewinds and replays GWS operations if a wave is 64573471bf0Spatrick // preempted. 64673471bf0Spatrick // 64773471bf0Spatrick // If this is false, a GWS operation requires testing if a nack set the 64873471bf0Spatrick // MEM_VIOL bit, and repeating if so. hasGWSAutoReplay()64973471bf0Spatrick bool hasGWSAutoReplay() const { 65073471bf0Spatrick return getGeneration() >= GFX9; 65173471bf0Spatrick } 65273471bf0Spatrick 65373471bf0Spatrick /// \returns if target has ds_gws_sema_release_all instruction. hasGWSSemaReleaseAll()65473471bf0Spatrick bool hasGWSSemaReleaseAll() const { 65573471bf0Spatrick return CIInsts; 65673471bf0Spatrick } 65773471bf0Spatrick 65873471bf0Spatrick /// \returns true if the target has integer add/sub instructions that do not 65973471bf0Spatrick /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, 66073471bf0Spatrick /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier 66173471bf0Spatrick /// for saturation. hasAddNoCarry()66273471bf0Spatrick bool hasAddNoCarry() const { 66373471bf0Spatrick return AddNoCarryInsts; 66473471bf0Spatrick } 66573471bf0Spatrick hasUnpackedD16VMem()66673471bf0Spatrick bool hasUnpackedD16VMem() const { 66773471bf0Spatrick return HasUnpackedD16VMem; 66873471bf0Spatrick } 66973471bf0Spatrick 67073471bf0Spatrick // Covers VS/PS/CS graphics shaders isMesaGfxShader(const Function & F)67173471bf0Spatrick bool isMesaGfxShader(const Function &F) const { 67273471bf0Spatrick return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); 67373471bf0Spatrick } 67473471bf0Spatrick hasMad64_32()67573471bf0Spatrick bool hasMad64_32() const { 67673471bf0Spatrick return getGeneration() >= SEA_ISLANDS; 67773471bf0Spatrick } 67873471bf0Spatrick hasSDWAOmod()67973471bf0Spatrick bool hasSDWAOmod() const { 68073471bf0Spatrick return HasSDWAOmod; 68173471bf0Spatrick } 68273471bf0Spatrick hasSDWAScalar()68373471bf0Spatrick bool hasSDWAScalar() const { 68473471bf0Spatrick return HasSDWAScalar; 68573471bf0Spatrick } 68673471bf0Spatrick hasSDWASdst()68773471bf0Spatrick bool hasSDWASdst() const { 68873471bf0Spatrick return HasSDWASdst; 68973471bf0Spatrick } 69073471bf0Spatrick hasSDWAMac()69173471bf0Spatrick bool hasSDWAMac() const { 69273471bf0Spatrick return HasSDWAMac; 69373471bf0Spatrick } 69473471bf0Spatrick hasSDWAOutModsVOPC()69573471bf0Spatrick bool hasSDWAOutModsVOPC() const { 69673471bf0Spatrick return HasSDWAOutModsVOPC; 69773471bf0Spatrick } 69873471bf0Spatrick hasDLInsts()69973471bf0Spatrick bool hasDLInsts() const { 70073471bf0Spatrick return HasDLInsts; 70173471bf0Spatrick } 70273471bf0Spatrick hasFmacF64Inst()703*d415bd75Srobert bool hasFmacF64Inst() const { return HasFmacF64Inst; } 704*d415bd75Srobert hasDot1Insts()70573471bf0Spatrick bool hasDot1Insts() const { 70673471bf0Spatrick return HasDot1Insts; 70773471bf0Spatrick } 70873471bf0Spatrick hasDot2Insts()70973471bf0Spatrick bool hasDot2Insts() const { 71073471bf0Spatrick return HasDot2Insts; 71173471bf0Spatrick } 71273471bf0Spatrick hasDot3Insts()71373471bf0Spatrick bool hasDot3Insts() const { 71473471bf0Spatrick return HasDot3Insts; 71573471bf0Spatrick } 71673471bf0Spatrick hasDot4Insts()71773471bf0Spatrick bool hasDot4Insts() const { 71873471bf0Spatrick return HasDot4Insts; 71973471bf0Spatrick } 72073471bf0Spatrick hasDot5Insts()72173471bf0Spatrick bool hasDot5Insts() const { 72273471bf0Spatrick return HasDot5Insts; 72373471bf0Spatrick } 72473471bf0Spatrick hasDot6Insts()72573471bf0Spatrick bool hasDot6Insts() const { 72673471bf0Spatrick return HasDot6Insts; 72773471bf0Spatrick } 72873471bf0Spatrick hasDot7Insts()72973471bf0Spatrick bool hasDot7Insts() const { 73073471bf0Spatrick return HasDot7Insts; 73173471bf0Spatrick } 73273471bf0Spatrick hasDot8Insts()733*d415bd75Srobert bool hasDot8Insts() const { 734*d415bd75Srobert return HasDot8Insts; 735*d415bd75Srobert } 736*d415bd75Srobert hasDot9Insts()737*d415bd75Srobert bool hasDot9Insts() const { 738*d415bd75Srobert return HasDot9Insts; 739*d415bd75Srobert } 740*d415bd75Srobert hasMAIInsts()74173471bf0Spatrick bool hasMAIInsts() const { 74273471bf0Spatrick return HasMAIInsts; 74373471bf0Spatrick } 74473471bf0Spatrick hasFP8Insts()745*d415bd75Srobert bool hasFP8Insts() const { 746*d415bd75Srobert return HasFP8Insts; 747*d415bd75Srobert } 748*d415bd75Srobert hasPkFmacF16Inst()74973471bf0Spatrick bool hasPkFmacF16Inst() const { 75073471bf0Spatrick return HasPkFmacF16Inst; 75173471bf0Spatrick } 75273471bf0Spatrick hasAtomicFaddInsts()75373471bf0Spatrick bool hasAtomicFaddInsts() const { 754*d415bd75Srobert return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts; 75573471bf0Spatrick } 75673471bf0Spatrick hasAtomicFaddRtnInsts()757*d415bd75Srobert bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; } 758*d415bd75Srobert hasAtomicFaddNoRtnInsts()759*d415bd75Srobert bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; } 760*d415bd75Srobert hasAtomicPkFaddNoRtnInsts()761*d415bd75Srobert bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; } 762*d415bd75Srobert hasFlatAtomicFaddF32Inst()763*d415bd75Srobert bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; } 764*d415bd75Srobert hasNoSdstCMPX()76573471bf0Spatrick bool hasNoSdstCMPX() const { 76673471bf0Spatrick return HasNoSdstCMPX; 76773471bf0Spatrick } 76873471bf0Spatrick hasVscnt()76973471bf0Spatrick bool hasVscnt() const { 77073471bf0Spatrick return HasVscnt; 77173471bf0Spatrick } 77273471bf0Spatrick hasGetWaveIdInst()77373471bf0Spatrick bool hasGetWaveIdInst() const { 77473471bf0Spatrick return HasGetWaveIdInst; 77573471bf0Spatrick } 77673471bf0Spatrick hasSMemTimeInst()77773471bf0Spatrick bool hasSMemTimeInst() const { 77873471bf0Spatrick return HasSMemTimeInst; 77973471bf0Spatrick } 78073471bf0Spatrick hasShaderCyclesRegister()78173471bf0Spatrick bool hasShaderCyclesRegister() const { 78273471bf0Spatrick return HasShaderCyclesRegister; 78373471bf0Spatrick } 78473471bf0Spatrick hasVOP3Literal()78573471bf0Spatrick bool hasVOP3Literal() const { 78673471bf0Spatrick return HasVOP3Literal; 78773471bf0Spatrick } 78873471bf0Spatrick hasNoDataDepHazard()78973471bf0Spatrick bool hasNoDataDepHazard() const { 79073471bf0Spatrick return HasNoDataDepHazard; 79173471bf0Spatrick } 79273471bf0Spatrick vmemWriteNeedsExpWaitcnt()79373471bf0Spatrick bool vmemWriteNeedsExpWaitcnt() const { 79473471bf0Spatrick return getGeneration() < SEA_ISLANDS; 79573471bf0Spatrick } 79673471bf0Spatrick hasInstPrefetch()797*d415bd75Srobert bool hasInstPrefetch() const { return getGeneration() >= GFX10; } 798*d415bd75Srobert 79973471bf0Spatrick // Scratch is allocated in 256 dword per wave blocks for the entire 800*d415bd75Srobert // wavefront. When viewed from the perspective of an arbitrary workitem, this 80173471bf0Spatrick // is 4-byte aligned. 80273471bf0Spatrick // 80373471bf0Spatrick // Only 4-byte alignment is really needed to access anything. Transformations 80473471bf0Spatrick // on the pointer value itself may rely on the alignment / known low bits of 80573471bf0Spatrick // the pointer. Set this to something above the minimum to avoid needing 80673471bf0Spatrick // dynamic realignment in common cases. getStackAlignment()80773471bf0Spatrick Align getStackAlignment() const { return Align(16); } 80873471bf0Spatrick enableMachineScheduler()80973471bf0Spatrick bool enableMachineScheduler() const override { 81073471bf0Spatrick return true; 81173471bf0Spatrick } 81273471bf0Spatrick 81373471bf0Spatrick bool useAA() const override; 81473471bf0Spatrick enableSubRegLiveness()81573471bf0Spatrick bool enableSubRegLiveness() const override { 81673471bf0Spatrick return true; 81773471bf0Spatrick } 81873471bf0Spatrick setScalarizeGlobalBehavior(bool b)81973471bf0Spatrick void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } getScalarizeGlobalBehavior()82073471bf0Spatrick bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } 82173471bf0Spatrick 82273471bf0Spatrick // static wrappers 82373471bf0Spatrick static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); 82473471bf0Spatrick 82573471bf0Spatrick // XXX - Why is this here if it isn't in the default pass set? enableEarlyIfConversion()82673471bf0Spatrick bool enableEarlyIfConversion() const override { 82773471bf0Spatrick return true; 82873471bf0Spatrick } 82973471bf0Spatrick 83073471bf0Spatrick void overrideSchedPolicy(MachineSchedPolicy &Policy, 83173471bf0Spatrick unsigned NumRegionInstrs) const override; 83273471bf0Spatrick getMaxNumUserSGPRs()83373471bf0Spatrick unsigned getMaxNumUserSGPRs() const { 83473471bf0Spatrick return 16; 83573471bf0Spatrick } 83673471bf0Spatrick hasSMemRealTime()83773471bf0Spatrick bool hasSMemRealTime() const { 83873471bf0Spatrick return HasSMemRealTime; 83973471bf0Spatrick } 84073471bf0Spatrick hasMovrel()84173471bf0Spatrick bool hasMovrel() const { 84273471bf0Spatrick return HasMovrel; 84373471bf0Spatrick } 84473471bf0Spatrick hasVGPRIndexMode()84573471bf0Spatrick bool hasVGPRIndexMode() const { 84673471bf0Spatrick return HasVGPRIndexMode; 84773471bf0Spatrick } 84873471bf0Spatrick 84973471bf0Spatrick bool useVGPRIndexMode() const; 85073471bf0Spatrick hasScalarCompareEq64()85173471bf0Spatrick bool hasScalarCompareEq64() const { 85273471bf0Spatrick return getGeneration() >= VOLCANIC_ISLANDS; 85373471bf0Spatrick } 85473471bf0Spatrick hasScalarStores()85573471bf0Spatrick bool hasScalarStores() const { 85673471bf0Spatrick return HasScalarStores; 85773471bf0Spatrick } 85873471bf0Spatrick hasScalarAtomics()85973471bf0Spatrick bool hasScalarAtomics() const { 86073471bf0Spatrick return HasScalarAtomics; 86173471bf0Spatrick } 86273471bf0Spatrick hasLDSFPAtomicAdd()863*d415bd75Srobert bool hasLDSFPAtomicAdd() const { return GFX8Insts; } 86473471bf0Spatrick 86573471bf0Spatrick /// \returns true if the subtarget has the v_permlanex16_b32 instruction. hasPermLaneX16()86673471bf0Spatrick bool hasPermLaneX16() const { return getGeneration() >= GFX10; } 86773471bf0Spatrick 868*d415bd75Srobert /// \returns true if the subtarget has the v_permlane64_b32 instruction. hasPermLane64()869*d415bd75Srobert bool hasPermLane64() const { return getGeneration() >= GFX11; } 870*d415bd75Srobert hasDPP()87173471bf0Spatrick bool hasDPP() const { 87273471bf0Spatrick return HasDPP; 87373471bf0Spatrick } 87473471bf0Spatrick hasDPPBroadcasts()87573471bf0Spatrick bool hasDPPBroadcasts() const { 87673471bf0Spatrick return HasDPP && getGeneration() < GFX10; 87773471bf0Spatrick } 87873471bf0Spatrick hasDPPWavefrontShifts()87973471bf0Spatrick bool hasDPPWavefrontShifts() const { 88073471bf0Spatrick return HasDPP && getGeneration() < GFX10; 88173471bf0Spatrick } 88273471bf0Spatrick hasDPP8()88373471bf0Spatrick bool hasDPP8() const { 88473471bf0Spatrick return HasDPP8; 88573471bf0Spatrick } 88673471bf0Spatrick has64BitDPP()88773471bf0Spatrick bool has64BitDPP() const { 88873471bf0Spatrick return Has64BitDPP; 88973471bf0Spatrick } 89073471bf0Spatrick hasPackedFP32Ops()89173471bf0Spatrick bool hasPackedFP32Ops() const { 89273471bf0Spatrick return HasPackedFP32Ops; 89373471bf0Spatrick } 89473471bf0Spatrick hasFmaakFmamkF32Insts()89573471bf0Spatrick bool hasFmaakFmamkF32Insts() const { 896*d415bd75Srobert return getGeneration() >= GFX10 || hasGFX940Insts(); 897*d415bd75Srobert } 898*d415bd75Srobert hasImageInsts()899*d415bd75Srobert bool hasImageInsts() const { 900*d415bd75Srobert return HasImageInsts; 90173471bf0Spatrick } 90273471bf0Spatrick hasExtendedImageInsts()90373471bf0Spatrick bool hasExtendedImageInsts() const { 90473471bf0Spatrick return HasExtendedImageInsts; 90573471bf0Spatrick } 90673471bf0Spatrick hasR128A16()90773471bf0Spatrick bool hasR128A16() const { 90873471bf0Spatrick return HasR128A16; 90973471bf0Spatrick } 91073471bf0Spatrick hasA16()911*d415bd75Srobert bool hasA16() const { return HasA16; } 91273471bf0Spatrick hasG16()91373471bf0Spatrick bool hasG16() const { return HasG16; } 91473471bf0Spatrick hasOffset3fBug()91573471bf0Spatrick bool hasOffset3fBug() const { 91673471bf0Spatrick return HasOffset3fBug; 91773471bf0Spatrick } 91873471bf0Spatrick hasImageStoreD16Bug()91973471bf0Spatrick bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } 92073471bf0Spatrick hasImageGather4D16Bug()92173471bf0Spatrick bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } 92273471bf0Spatrick hasMADIntraFwdBug()923*d415bd75Srobert bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; } 924*d415bd75Srobert hasNSAEncoding()92573471bf0Spatrick bool hasNSAEncoding() const { return HasNSAEncoding; } 92673471bf0Spatrick getNSAMaxSize()92773471bf0Spatrick unsigned getNSAMaxSize() const { return NSAMaxSize; } 92873471bf0Spatrick hasGFX10_AEncoding()92973471bf0Spatrick bool hasGFX10_AEncoding() const { 93073471bf0Spatrick return GFX10_AEncoding; 93173471bf0Spatrick } 93273471bf0Spatrick hasGFX10_BEncoding()93373471bf0Spatrick bool hasGFX10_BEncoding() const { 93473471bf0Spatrick return GFX10_BEncoding; 93573471bf0Spatrick } 93673471bf0Spatrick hasGFX10_3Insts()93773471bf0Spatrick bool hasGFX10_3Insts() const { 93873471bf0Spatrick return GFX10_3Insts; 93973471bf0Spatrick } 94073471bf0Spatrick 94173471bf0Spatrick bool hasMadF16() const; 94273471bf0Spatrick hasMovB64()943*d415bd75Srobert bool hasMovB64() const { return GFX940Insts; } 944*d415bd75Srobert hasLshlAddB64()945*d415bd75Srobert bool hasLshlAddB64() const { return GFX940Insts; } 946*d415bd75Srobert enableSIScheduler()94773471bf0Spatrick bool enableSIScheduler() const { 94873471bf0Spatrick return EnableSIScheduler; 94973471bf0Spatrick } 95073471bf0Spatrick loadStoreOptEnabled()95173471bf0Spatrick bool loadStoreOptEnabled() const { 95273471bf0Spatrick return EnableLoadStoreOpt; 95373471bf0Spatrick } 95473471bf0Spatrick hasSGPRInitBug()95573471bf0Spatrick bool hasSGPRInitBug() const { 95673471bf0Spatrick return SGPRInitBug; 95773471bf0Spatrick } 95873471bf0Spatrick hasUserSGPRInit16Bug()959*d415bd75Srobert bool hasUserSGPRInit16Bug() const { 960*d415bd75Srobert return UserSGPRInit16Bug && isWave32(); 961*d415bd75Srobert } 962*d415bd75Srobert hasNegativeScratchOffsetBug()96373471bf0Spatrick bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } 96473471bf0Spatrick hasNegativeUnalignedScratchOffsetBug()96573471bf0Spatrick bool hasNegativeUnalignedScratchOffsetBug() const { 96673471bf0Spatrick return NegativeUnalignedScratchOffsetBug; 96773471bf0Spatrick } 96873471bf0Spatrick hasMFMAInlineLiteralBug()96973471bf0Spatrick bool hasMFMAInlineLiteralBug() const { 97073471bf0Spatrick return HasMFMAInlineLiteralBug; 97173471bf0Spatrick } 97273471bf0Spatrick has12DWordStoreHazard()97373471bf0Spatrick bool has12DWordStoreHazard() const { 97473471bf0Spatrick return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; 97573471bf0Spatrick } 97673471bf0Spatrick 97773471bf0Spatrick // \returns true if the subtarget supports DWORDX3 load/store instructions. hasDwordx3LoadStores()97873471bf0Spatrick bool hasDwordx3LoadStores() const { 97973471bf0Spatrick return CIInsts; 98073471bf0Spatrick } 98173471bf0Spatrick hasReadM0MovRelInterpHazard()98273471bf0Spatrick bool hasReadM0MovRelInterpHazard() const { 98373471bf0Spatrick return getGeneration() == AMDGPUSubtarget::GFX9; 98473471bf0Spatrick } 98573471bf0Spatrick hasReadM0SendMsgHazard()98673471bf0Spatrick bool hasReadM0SendMsgHazard() const { 98773471bf0Spatrick return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 98873471bf0Spatrick getGeneration() <= AMDGPUSubtarget::GFX9; 98973471bf0Spatrick } 99073471bf0Spatrick hasReadM0LdsDmaHazard()991*d415bd75Srobert bool hasReadM0LdsDmaHazard() const { 992*d415bd75Srobert return getGeneration() == AMDGPUSubtarget::GFX9; 993*d415bd75Srobert } 994*d415bd75Srobert hasReadM0LdsDirectHazard()995*d415bd75Srobert bool hasReadM0LdsDirectHazard() const { 996*d415bd75Srobert return getGeneration() == AMDGPUSubtarget::GFX9; 997*d415bd75Srobert } 998*d415bd75Srobert hasVcmpxPermlaneHazard()99973471bf0Spatrick bool hasVcmpxPermlaneHazard() const { 100073471bf0Spatrick return HasVcmpxPermlaneHazard; 100173471bf0Spatrick } 100273471bf0Spatrick hasVMEMtoScalarWriteHazard()100373471bf0Spatrick bool hasVMEMtoScalarWriteHazard() const { 100473471bf0Spatrick return HasVMEMtoScalarWriteHazard; 100573471bf0Spatrick } 100673471bf0Spatrick hasSMEMtoVectorWriteHazard()100773471bf0Spatrick bool hasSMEMtoVectorWriteHazard() const { 100873471bf0Spatrick return HasSMEMtoVectorWriteHazard; 100973471bf0Spatrick } 101073471bf0Spatrick hasLDSMisalignedBug()101173471bf0Spatrick bool hasLDSMisalignedBug() const { 101273471bf0Spatrick return LDSMisalignedBug && !EnableCuMode; 101373471bf0Spatrick } 101473471bf0Spatrick hasInstFwdPrefetchBug()101573471bf0Spatrick bool hasInstFwdPrefetchBug() const { 101673471bf0Spatrick return HasInstFwdPrefetchBug; 101773471bf0Spatrick } 101873471bf0Spatrick hasVcmpxExecWARHazard()101973471bf0Spatrick bool hasVcmpxExecWARHazard() const { 102073471bf0Spatrick return HasVcmpxExecWARHazard; 102173471bf0Spatrick } 102273471bf0Spatrick hasLdsBranchVmemWARHazard()102373471bf0Spatrick bool hasLdsBranchVmemWARHazard() const { 102473471bf0Spatrick return HasLdsBranchVmemWARHazard; 102573471bf0Spatrick } 102673471bf0Spatrick 1027*d415bd75Srobert // Shift amount of a 64 bit shift cannot be a highest allocated register 1028*d415bd75Srobert // if also at the end of the allocation block. hasShift64HighRegBug()1029*d415bd75Srobert bool hasShift64HighRegBug() const { 1030*d415bd75Srobert return GFX90AInsts && !GFX940Insts; 1031*d415bd75Srobert } 1032*d415bd75Srobert 1033*d415bd75Srobert // Has one cycle hazard on transcendental instruction feeding a 1034*d415bd75Srobert // non transcendental VALU. hasTransForwardingHazard()1035*d415bd75Srobert bool hasTransForwardingHazard() const { return GFX940Insts; } 1036*d415bd75Srobert 1037*d415bd75Srobert // Has one cycle hazard on a VALU instruction partially writing dst with 1038*d415bd75Srobert // a shift of result bits feeding another VALU instruction. hasDstSelForwardingHazard()1039*d415bd75Srobert bool hasDstSelForwardingHazard() const { return GFX940Insts; } 1040*d415bd75Srobert 1041*d415bd75Srobert // Cannot use op_sel with v_dot instructions. hasDOTOpSelHazard()1042*d415bd75Srobert bool hasDOTOpSelHazard() const { return GFX940Insts; } 1043*d415bd75Srobert 1044*d415bd75Srobert // Does not have HW interlocs for VALU writing and then reading SGPRs. hasVDecCoExecHazard()1045*d415bd75Srobert bool hasVDecCoExecHazard() const { 1046*d415bd75Srobert return GFX940Insts; 1047*d415bd75Srobert } 1048*d415bd75Srobert hasNSAtoVMEMBug()104973471bf0Spatrick bool hasNSAtoVMEMBug() const { 105073471bf0Spatrick return HasNSAtoVMEMBug; 105173471bf0Spatrick } 105273471bf0Spatrick hasNSAClauseBug()105373471bf0Spatrick bool hasNSAClauseBug() const { return HasNSAClauseBug; } 105473471bf0Spatrick hasHardClauses()105573471bf0Spatrick bool hasHardClauses() const { return getGeneration() >= GFX10; } 105673471bf0Spatrick hasGFX90AInsts()105773471bf0Spatrick bool hasGFX90AInsts() const { return GFX90AInsts; } 105873471bf0Spatrick hasFPAtomicToDenormModeHazard()1059*d415bd75Srobert bool hasFPAtomicToDenormModeHazard() const { 1060*d415bd75Srobert return getGeneration() == GFX10; 1061*d415bd75Srobert } 1062*d415bd75Srobert hasVOP3DPP()1063*d415bd75Srobert bool hasVOP3DPP() const { return getGeneration() >= GFX11; } 1064*d415bd75Srobert hasLdsDirect()1065*d415bd75Srobert bool hasLdsDirect() const { return getGeneration() >= GFX11; } 1066*d415bd75Srobert hasVALUPartialForwardingHazard()1067*d415bd75Srobert bool hasVALUPartialForwardingHazard() const { 1068*d415bd75Srobert return getGeneration() >= GFX11; 1069*d415bd75Srobert } 1070*d415bd75Srobert hasVALUTransUseHazard()1071*d415bd75Srobert bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; } 1072*d415bd75Srobert hasVALUMaskWriteHazard()1073*d415bd75Srobert bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; } 1074*d415bd75Srobert 107573471bf0Spatrick /// Return if operations acting on VGPR tuples require even alignment. needsAlignedVGPRs()107673471bf0Spatrick bool needsAlignedVGPRs() const { return GFX90AInsts; } 107773471bf0Spatrick 1078*d415bd75Srobert /// Return true if the target has the S_PACK_HL_B32_B16 instruction. hasSPackHL()1079*d415bd75Srobert bool hasSPackHL() const { return GFX11Insts; } 1080*d415bd75Srobert 1081*d415bd75Srobert /// Return true if the target's EXP instruction has the COMPR flag, which 1082*d415bd75Srobert /// affects the meaning of the EN (enable) bits. hasCompressedExport()1083*d415bd75Srobert bool hasCompressedExport() const { return !GFX11Insts; } 1084*d415bd75Srobert 1085*d415bd75Srobert /// Return true if the target's EXP instruction supports the NULL export 1086*d415bd75Srobert /// target. hasNullExportTarget()1087*d415bd75Srobert bool hasNullExportTarget() const { return !GFX11Insts; } 1088*d415bd75Srobert hasGFX11FullVGPRs()1089*d415bd75Srobert bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; } 1090*d415bd75Srobert hasVOPDInsts()1091*d415bd75Srobert bool hasVOPDInsts() const { return HasVOPDInsts; } 1092*d415bd75Srobert hasFlatScratchSVSSwizzleBug()1093*d415bd75Srobert bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; } 1094*d415bd75Srobert 1095*d415bd75Srobert /// Return true if the target has the S_DELAY_ALU instruction. hasDelayAlu()1096*d415bd75Srobert bool hasDelayAlu() const { return GFX11Insts; } 1097*d415bd75Srobert hasPackedTID()109873471bf0Spatrick bool hasPackedTID() const { return HasPackedTID; } 109973471bf0Spatrick 1100*d415bd75Srobert // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that 1101*d415bd75Srobert // hasGFX90AInsts is also true. hasGFX940Insts()1102*d415bd75Srobert bool hasGFX940Insts() const { return GFX940Insts; } 1103*d415bd75Srobert 110473471bf0Spatrick /// Return the maximum number of waves per SIMD for kernels using \p SGPRs 110573471bf0Spatrick /// SGPRs 110673471bf0Spatrick unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; 110773471bf0Spatrick 110873471bf0Spatrick /// Return the maximum number of waves per SIMD for kernels using \p VGPRs 110973471bf0Spatrick /// VGPRs 111073471bf0Spatrick unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; 111173471bf0Spatrick 111273471bf0Spatrick /// Return occupancy for the given function. Used LDS and a number of 111373471bf0Spatrick /// registers if provided. 111473471bf0Spatrick /// Note, occupancy can be affected by the scratch allocation as well, but 111573471bf0Spatrick /// we do not have enough information to compute it. 111673471bf0Spatrick unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, 111773471bf0Spatrick unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; 111873471bf0Spatrick 111973471bf0Spatrick /// \returns true if the flat_scratch register should be initialized with the 112073471bf0Spatrick /// pointer to the wave's scratch memory rather than a size and offset. flatScratchIsPointer()112173471bf0Spatrick bool flatScratchIsPointer() const { 112273471bf0Spatrick return getGeneration() >= AMDGPUSubtarget::GFX9; 112373471bf0Spatrick } 112473471bf0Spatrick 112573471bf0Spatrick /// \returns true if the flat_scratch register is initialized by the HW. 112673471bf0Spatrick /// In this case it is readonly. flatScratchIsArchitected()112773471bf0Spatrick bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; } 112873471bf0Spatrick 112973471bf0Spatrick /// \returns true if the machine has merged shaders in which s0-s7 are 113073471bf0Spatrick /// reserved by the hardware and user SGPRs start at s8 hasMergedShaders()113173471bf0Spatrick bool hasMergedShaders() const { 113273471bf0Spatrick return getGeneration() >= GFX9; 113373471bf0Spatrick } 113473471bf0Spatrick 1135*d415bd75Srobert // \returns true if the target supports the pre-NGG legacy geometry path. hasLegacyGeometry()1136*d415bd75Srobert bool hasLegacyGeometry() const { return getGeneration() < GFX11; } 1137*d415bd75Srobert 113873471bf0Spatrick /// \returns SGPR allocation granularity supported by the subtarget. getSGPRAllocGranule()113973471bf0Spatrick unsigned getSGPRAllocGranule() const { 114073471bf0Spatrick return AMDGPU::IsaInfo::getSGPRAllocGranule(this); 114173471bf0Spatrick } 114273471bf0Spatrick 114373471bf0Spatrick /// \returns SGPR encoding granularity supported by the subtarget. getSGPREncodingGranule()114473471bf0Spatrick unsigned getSGPREncodingGranule() const { 114573471bf0Spatrick return AMDGPU::IsaInfo::getSGPREncodingGranule(this); 114673471bf0Spatrick } 114773471bf0Spatrick 114873471bf0Spatrick /// \returns Total number of SGPRs supported by the subtarget. getTotalNumSGPRs()114973471bf0Spatrick unsigned getTotalNumSGPRs() const { 115073471bf0Spatrick return AMDGPU::IsaInfo::getTotalNumSGPRs(this); 115173471bf0Spatrick } 115273471bf0Spatrick 115373471bf0Spatrick /// \returns Addressable number of SGPRs supported by the subtarget. getAddressableNumSGPRs()115473471bf0Spatrick unsigned getAddressableNumSGPRs() const { 115573471bf0Spatrick return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); 115673471bf0Spatrick } 115773471bf0Spatrick 115873471bf0Spatrick /// \returns Minimum number of SGPRs that meets the given number of waves per 115973471bf0Spatrick /// execution unit requirement supported by the subtarget. getMinNumSGPRs(unsigned WavesPerEU)116073471bf0Spatrick unsigned getMinNumSGPRs(unsigned WavesPerEU) const { 116173471bf0Spatrick return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); 116273471bf0Spatrick } 116373471bf0Spatrick 116473471bf0Spatrick /// \returns Maximum number of SGPRs that meets the given number of waves per 116573471bf0Spatrick /// execution unit requirement supported by the subtarget. getMaxNumSGPRs(unsigned WavesPerEU,bool Addressable)116673471bf0Spatrick unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { 116773471bf0Spatrick return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); 116873471bf0Spatrick } 116973471bf0Spatrick 117073471bf0Spatrick /// \returns Reserved number of SGPRs. This is common 117173471bf0Spatrick /// utility function called by MachineFunction and 117273471bf0Spatrick /// Function variants of getReservedNumSGPRs. 1173*d415bd75Srobert unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const; 117473471bf0Spatrick /// \returns Reserved number of SGPRs for given machine function \p MF. 117573471bf0Spatrick unsigned getReservedNumSGPRs(const MachineFunction &MF) const; 117673471bf0Spatrick 117773471bf0Spatrick /// \returns Reserved number of SGPRs for given function \p F. 117873471bf0Spatrick unsigned getReservedNumSGPRs(const Function &F) const; 117973471bf0Spatrick 118073471bf0Spatrick /// \returns max num SGPRs. This is the common utility 118173471bf0Spatrick /// function called by MachineFunction and Function 118273471bf0Spatrick /// variants of getMaxNumSGPRs. 118373471bf0Spatrick unsigned getBaseMaxNumSGPRs(const Function &F, 118473471bf0Spatrick std::pair<unsigned, unsigned> WavesPerEU, 118573471bf0Spatrick unsigned PreloadedSGPRs, 118673471bf0Spatrick unsigned ReservedNumSGPRs) const; 118773471bf0Spatrick 118873471bf0Spatrick /// \returns Maximum number of SGPRs that meets number of waves per execution 118973471bf0Spatrick /// unit requirement for function \p MF, or number of SGPRs explicitly 119073471bf0Spatrick /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. 119173471bf0Spatrick /// 119273471bf0Spatrick /// \returns Value that meets number of waves per execution unit requirement 119373471bf0Spatrick /// if explicitly requested value cannot be converted to integer, violates 119473471bf0Spatrick /// subtarget's specifications, or does not meet number of waves per execution 119573471bf0Spatrick /// unit requirement. 119673471bf0Spatrick unsigned getMaxNumSGPRs(const MachineFunction &MF) const; 119773471bf0Spatrick 119873471bf0Spatrick /// \returns Maximum number of SGPRs that meets number of waves per execution 119973471bf0Spatrick /// unit requirement for function \p F, or number of SGPRs explicitly 120073471bf0Spatrick /// requested using "amdgpu-num-sgpr" attribute attached to function \p F. 120173471bf0Spatrick /// 120273471bf0Spatrick /// \returns Value that meets number of waves per execution unit requirement 120373471bf0Spatrick /// if explicitly requested value cannot be converted to integer, violates 120473471bf0Spatrick /// subtarget's specifications, or does not meet number of waves per execution 120573471bf0Spatrick /// unit requirement. 120673471bf0Spatrick unsigned getMaxNumSGPRs(const Function &F) const; 120773471bf0Spatrick 120873471bf0Spatrick /// \returns VGPR allocation granularity supported by the subtarget. getVGPRAllocGranule()120973471bf0Spatrick unsigned getVGPRAllocGranule() const { 121073471bf0Spatrick return AMDGPU::IsaInfo::getVGPRAllocGranule(this); 121173471bf0Spatrick } 121273471bf0Spatrick 121373471bf0Spatrick /// \returns VGPR encoding granularity supported by the subtarget. getVGPREncodingGranule()121473471bf0Spatrick unsigned getVGPREncodingGranule() const { 121573471bf0Spatrick return AMDGPU::IsaInfo::getVGPREncodingGranule(this); 121673471bf0Spatrick } 121773471bf0Spatrick 121873471bf0Spatrick /// \returns Total number of VGPRs supported by the subtarget. getTotalNumVGPRs()121973471bf0Spatrick unsigned getTotalNumVGPRs() const { 122073471bf0Spatrick return AMDGPU::IsaInfo::getTotalNumVGPRs(this); 122173471bf0Spatrick } 122273471bf0Spatrick 122373471bf0Spatrick /// \returns Addressable number of VGPRs supported by the subtarget. getAddressableNumVGPRs()122473471bf0Spatrick unsigned getAddressableNumVGPRs() const { 122573471bf0Spatrick return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); 122673471bf0Spatrick } 122773471bf0Spatrick 1228*d415bd75Srobert /// \returns the minimum number of VGPRs that will prevent achieving more than 1229*d415bd75Srobert /// the specified number of waves \p WavesPerEU. getMinNumVGPRs(unsigned WavesPerEU)123073471bf0Spatrick unsigned getMinNumVGPRs(unsigned WavesPerEU) const { 123173471bf0Spatrick return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); 123273471bf0Spatrick } 123373471bf0Spatrick 1234*d415bd75Srobert /// \returns the maximum number of VGPRs that can be used and still achieved 1235*d415bd75Srobert /// at least the specified number of waves \p WavesPerEU. getMaxNumVGPRs(unsigned WavesPerEU)123673471bf0Spatrick unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { 123773471bf0Spatrick return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); 123873471bf0Spatrick } 123973471bf0Spatrick 124073471bf0Spatrick /// \returns max num VGPRs. This is the common utility function 124173471bf0Spatrick /// called by MachineFunction and Function variants of getMaxNumVGPRs. 124273471bf0Spatrick unsigned getBaseMaxNumVGPRs(const Function &F, 124373471bf0Spatrick std::pair<unsigned, unsigned> WavesPerEU) const; 124473471bf0Spatrick /// \returns Maximum number of VGPRs that meets number of waves per execution 124573471bf0Spatrick /// unit requirement for function \p F, or number of VGPRs explicitly 124673471bf0Spatrick /// requested using "amdgpu-num-vgpr" attribute attached to function \p F. 124773471bf0Spatrick /// 124873471bf0Spatrick /// \returns Value that meets number of waves per execution unit requirement 124973471bf0Spatrick /// if explicitly requested value cannot be converted to integer, violates 125073471bf0Spatrick /// subtarget's specifications, or does not meet number of waves per execution 125173471bf0Spatrick /// unit requirement. 125273471bf0Spatrick unsigned getMaxNumVGPRs(const Function &F) const; 125373471bf0Spatrick getMaxNumAGPRs(const Function & F)1254*d415bd75Srobert unsigned getMaxNumAGPRs(const Function &F) const { 1255*d415bd75Srobert return getMaxNumVGPRs(F); 1256*d415bd75Srobert } 1257*d415bd75Srobert 125873471bf0Spatrick /// \returns Maximum number of VGPRs that meets number of waves per execution 125973471bf0Spatrick /// unit requirement for function \p MF, or number of VGPRs explicitly 126073471bf0Spatrick /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. 126173471bf0Spatrick /// 126273471bf0Spatrick /// \returns Value that meets number of waves per execution unit requirement 126373471bf0Spatrick /// if explicitly requested value cannot be converted to integer, violates 126473471bf0Spatrick /// subtarget's specifications, or does not meet number of waves per execution 126573471bf0Spatrick /// unit requirement. 126673471bf0Spatrick unsigned getMaxNumVGPRs(const MachineFunction &MF) const; 126773471bf0Spatrick 126873471bf0Spatrick void getPostRAMutations( 126973471bf0Spatrick std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) 127073471bf0Spatrick const override; 127173471bf0Spatrick 1272*d415bd75Srobert std::unique_ptr<ScheduleDAGMutation> 1273*d415bd75Srobert createFillMFMAShadowMutation(const TargetInstrInfo *TII) const; 1274*d415bd75Srobert isWave32()127573471bf0Spatrick bool isWave32() const { 127673471bf0Spatrick return getWavefrontSize() == 32; 127773471bf0Spatrick } 127873471bf0Spatrick isWave64()127973471bf0Spatrick bool isWave64() const { 128073471bf0Spatrick return getWavefrontSize() == 64; 128173471bf0Spatrick } 128273471bf0Spatrick getBoolRC()128373471bf0Spatrick const TargetRegisterClass *getBoolRC() const { 128473471bf0Spatrick return getRegisterInfo()->getBoolRC(); 128573471bf0Spatrick } 128673471bf0Spatrick 128773471bf0Spatrick /// \returns Maximum number of work groups per compute unit supported by the 128873471bf0Spatrick /// subtarget and limited by given \p FlatWorkGroupSize. getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize)128973471bf0Spatrick unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { 129073471bf0Spatrick return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); 129173471bf0Spatrick } 129273471bf0Spatrick 129373471bf0Spatrick /// \returns Minimum flat work group size supported by the subtarget. getMinFlatWorkGroupSize()129473471bf0Spatrick unsigned getMinFlatWorkGroupSize() const override { 129573471bf0Spatrick return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); 129673471bf0Spatrick } 129773471bf0Spatrick 129873471bf0Spatrick /// \returns Maximum flat work group size supported by the subtarget. getMaxFlatWorkGroupSize()129973471bf0Spatrick unsigned getMaxFlatWorkGroupSize() const override { 130073471bf0Spatrick return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); 130173471bf0Spatrick } 130273471bf0Spatrick 130373471bf0Spatrick /// \returns Number of waves per execution unit required to support the given 130473471bf0Spatrick /// \p FlatWorkGroupSize. 130573471bf0Spatrick unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize)130673471bf0Spatrick getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { 130773471bf0Spatrick return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); 130873471bf0Spatrick } 130973471bf0Spatrick 131073471bf0Spatrick /// \returns Minimum number of waves per execution unit supported by the 131173471bf0Spatrick /// subtarget. getMinWavesPerEU()131273471bf0Spatrick unsigned getMinWavesPerEU() const override { 131373471bf0Spatrick return AMDGPU::IsaInfo::getMinWavesPerEU(this); 131473471bf0Spatrick } 131573471bf0Spatrick 131673471bf0Spatrick void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 131773471bf0Spatrick SDep &Dep) const override; 1318*d415bd75Srobert 1319*d415bd75Srobert // \returns true if it's beneficial on this subtarget for the scheduler to 1320*d415bd75Srobert // cluster stores as well as loads. shouldClusterStores()1321*d415bd75Srobert bool shouldClusterStores() const { return getGeneration() >= GFX11; } 1322*d415bd75Srobert 1323*d415bd75Srobert // \returns the number of address arguments from which to enable MIMG NSA 1324*d415bd75Srobert // on supported architectures. 1325*d415bd75Srobert unsigned getNSAThreshold(const MachineFunction &MF) const; 132673471bf0Spatrick }; 132773471bf0Spatrick 132873471bf0Spatrick } // end namespace llvm 132973471bf0Spatrick 133073471bf0Spatrick #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1331