173471bf0Spatrick //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//
273471bf0Spatrick //
373471bf0Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
473471bf0Spatrick // See https://llvm.org/LICENSE.txt for license information.
573471bf0Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
673471bf0Spatrick //
773471bf0Spatrick //==-----------------------------------------------------------------------===//
873471bf0Spatrick //
973471bf0Spatrick /// \file
1073471bf0Spatrick /// AMD GCN specific subclass of TargetSubtarget.
1173471bf0Spatrick //
1273471bf0Spatrick //===----------------------------------------------------------------------===//
1373471bf0Spatrick 
1473471bf0Spatrick #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
1573471bf0Spatrick #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
1673471bf0Spatrick 
1773471bf0Spatrick #include "AMDGPUCallLowering.h"
1873471bf0Spatrick #include "AMDGPUSubtarget.h"
1973471bf0Spatrick #include "SIFrameLowering.h"
2073471bf0Spatrick #include "SIISelLowering.h"
2173471bf0Spatrick #include "SIInstrInfo.h"
2273471bf0Spatrick #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
2373471bf0Spatrick 
2473471bf0Spatrick #define GET_SUBTARGETINFO_HEADER
2573471bf0Spatrick #include "AMDGPUGenSubtargetInfo.inc"
2673471bf0Spatrick 
2773471bf0Spatrick namespace llvm {
2873471bf0Spatrick 
2973471bf0Spatrick class GCNTargetMachine;
3073471bf0Spatrick 
3173471bf0Spatrick class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
3273471bf0Spatrick                            public AMDGPUSubtarget {
33*d415bd75Srobert public:
3473471bf0Spatrick   using AMDGPUSubtarget::getMaxWavesPerEU;
3573471bf0Spatrick 
3673471bf0Spatrick   // Following 2 enums are documented at:
3773471bf0Spatrick   //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi
3873471bf0Spatrick   enum class TrapHandlerAbi {
3973471bf0Spatrick     NONE   = 0x00,
4073471bf0Spatrick     AMDHSA = 0x01,
4173471bf0Spatrick   };
4273471bf0Spatrick 
4373471bf0Spatrick   enum class TrapID {
4473471bf0Spatrick     LLVMAMDHSATrap      = 0x02,
4573471bf0Spatrick     LLVMAMDHSADebugTrap = 0x03,
4673471bf0Spatrick   };
4773471bf0Spatrick 
4873471bf0Spatrick private:
4973471bf0Spatrick   /// GlobalISel related APIs.
5073471bf0Spatrick   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
5173471bf0Spatrick   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
5273471bf0Spatrick   std::unique_ptr<InstructionSelector> InstSelector;
5373471bf0Spatrick   std::unique_ptr<LegalizerInfo> Legalizer;
5473471bf0Spatrick   std::unique_ptr<RegisterBankInfo> RegBankInfo;
5573471bf0Spatrick 
5673471bf0Spatrick protected:
5773471bf0Spatrick   // Basic subtarget description.
5873471bf0Spatrick   Triple TargetTriple;
5973471bf0Spatrick   AMDGPU::IsaInfo::AMDGPUTargetID TargetID;
60*d415bd75Srobert   unsigned Gen = INVALID;
6173471bf0Spatrick   InstrItineraryData InstrItins;
62*d415bd75Srobert   int LDSBankCount = 0;
63*d415bd75Srobert   unsigned MaxPrivateElementSize = 0;
6473471bf0Spatrick 
6573471bf0Spatrick   // Possibly statically set by tablegen, but may want to be overridden.
66*d415bd75Srobert   bool FastFMAF32 = false;
67*d415bd75Srobert   bool FastDenormalF32 = false;
68*d415bd75Srobert   bool HalfRate64Ops = false;
69*d415bd75Srobert   bool FullRate64Ops = false;
7073471bf0Spatrick 
7173471bf0Spatrick   // Dynamically set bits that enable features.
72*d415bd75Srobert   bool FlatForGlobal = false;
73*d415bd75Srobert   bool AutoWaitcntBeforeBarrier = false;
74*d415bd75Srobert   bool BackOffBarrier = false;
75*d415bd75Srobert   bool UnalignedScratchAccess = false;
76*d415bd75Srobert   bool UnalignedAccessMode = false;
77*d415bd75Srobert   bool HasApertureRegs = false;
78*d415bd75Srobert   bool SupportsXNACK = false;
7973471bf0Spatrick 
8073471bf0Spatrick   // This should not be used directly. 'TargetID' tracks the dynamic settings
8173471bf0Spatrick   // for XNACK.
82*d415bd75Srobert   bool EnableXNACK = false;
8373471bf0Spatrick 
84*d415bd75Srobert   bool EnableTgSplit = false;
85*d415bd75Srobert   bool EnableCuMode = false;
86*d415bd75Srobert   bool TrapHandler = false;
8773471bf0Spatrick 
8873471bf0Spatrick   // Used as options.
89*d415bd75Srobert   bool EnableLoadStoreOpt = false;
90*d415bd75Srobert   bool EnableUnsafeDSOffsetFolding = false;
91*d415bd75Srobert   bool EnableSIScheduler = false;
92*d415bd75Srobert   bool EnableDS128 = false;
93*d415bd75Srobert   bool EnablePRTStrictNull = false;
94*d415bd75Srobert   bool DumpCode = false;
9573471bf0Spatrick 
9673471bf0Spatrick   // Subtarget statically properties set by tablegen
97*d415bd75Srobert   bool FP64 = false;
98*d415bd75Srobert   bool FMA = false;
99*d415bd75Srobert   bool MIMG_R128 = false;
100*d415bd75Srobert   bool CIInsts = false;
101*d415bd75Srobert   bool GFX8Insts = false;
102*d415bd75Srobert   bool GFX9Insts = false;
103*d415bd75Srobert   bool GFX90AInsts = false;
104*d415bd75Srobert   bool GFX940Insts = false;
105*d415bd75Srobert   bool GFX10Insts = false;
106*d415bd75Srobert   bool GFX11Insts = false;
107*d415bd75Srobert   bool GFX10_3Insts = false;
108*d415bd75Srobert   bool GFX7GFX8GFX9Insts = false;
109*d415bd75Srobert   bool SGPRInitBug = false;
110*d415bd75Srobert   bool UserSGPRInit16Bug = false;
111*d415bd75Srobert   bool NegativeScratchOffsetBug = false;
112*d415bd75Srobert   bool NegativeUnalignedScratchOffsetBug = false;
113*d415bd75Srobert   bool HasSMemRealTime = false;
114*d415bd75Srobert   bool HasIntClamp = false;
115*d415bd75Srobert   bool HasFmaMixInsts = false;
116*d415bd75Srobert   bool HasMovrel = false;
117*d415bd75Srobert   bool HasVGPRIndexMode = false;
118*d415bd75Srobert   bool HasScalarStores = false;
119*d415bd75Srobert   bool HasScalarAtomics = false;
120*d415bd75Srobert   bool HasSDWAOmod = false;
121*d415bd75Srobert   bool HasSDWAScalar = false;
122*d415bd75Srobert   bool HasSDWASdst = false;
123*d415bd75Srobert   bool HasSDWAMac = false;
124*d415bd75Srobert   bool HasSDWAOutModsVOPC = false;
125*d415bd75Srobert   bool HasDPP = false;
126*d415bd75Srobert   bool HasDPP8 = false;
127*d415bd75Srobert   bool Has64BitDPP = false;
128*d415bd75Srobert   bool HasPackedFP32Ops = false;
129*d415bd75Srobert   bool HasImageInsts = false;
130*d415bd75Srobert   bool HasExtendedImageInsts = false;
131*d415bd75Srobert   bool HasR128A16 = false;
132*d415bd75Srobert   bool HasA16 = false;
133*d415bd75Srobert   bool HasG16 = false;
134*d415bd75Srobert   bool HasNSAEncoding = false;
135*d415bd75Srobert   unsigned NSAMaxSize = 0;
136*d415bd75Srobert   bool GFX10_AEncoding = false;
137*d415bd75Srobert   bool GFX10_BEncoding = false;
138*d415bd75Srobert   bool HasDLInsts = false;
139*d415bd75Srobert   bool HasFmacF64Inst = false;
140*d415bd75Srobert   bool HasDot1Insts = false;
141*d415bd75Srobert   bool HasDot2Insts = false;
142*d415bd75Srobert   bool HasDot3Insts = false;
143*d415bd75Srobert   bool HasDot4Insts = false;
144*d415bd75Srobert   bool HasDot5Insts = false;
145*d415bd75Srobert   bool HasDot6Insts = false;
146*d415bd75Srobert   bool HasDot7Insts = false;
147*d415bd75Srobert   bool HasDot8Insts = false;
148*d415bd75Srobert   bool HasDot9Insts = false;
149*d415bd75Srobert   bool HasMAIInsts = false;
150*d415bd75Srobert   bool HasFP8Insts = false;
151*d415bd75Srobert   bool HasPkFmacF16Inst = false;
152*d415bd75Srobert   bool HasAtomicFaddRtnInsts = false;
153*d415bd75Srobert   bool HasAtomicFaddNoRtnInsts = false;
154*d415bd75Srobert   bool HasAtomicPkFaddNoRtnInsts = false;
155*d415bd75Srobert   bool HasFlatAtomicFaddF32Inst = false;
156*d415bd75Srobert   bool SupportsSRAMECC = false;
15773471bf0Spatrick 
15873471bf0Spatrick   // This should not be used directly. 'TargetID' tracks the dynamic settings
15973471bf0Spatrick   // for SRAMECC.
160*d415bd75Srobert   bool EnableSRAMECC = false;
16173471bf0Spatrick 
162*d415bd75Srobert   bool HasNoSdstCMPX = false;
163*d415bd75Srobert   bool HasVscnt = false;
164*d415bd75Srobert   bool HasGetWaveIdInst = false;
165*d415bd75Srobert   bool HasSMemTimeInst = false;
166*d415bd75Srobert   bool HasShaderCyclesRegister = false;
167*d415bd75Srobert   bool HasVOP3Literal = false;
168*d415bd75Srobert   bool HasNoDataDepHazard = false;
169*d415bd75Srobert   bool FlatAddressSpace = false;
170*d415bd75Srobert   bool FlatInstOffsets = false;
171*d415bd75Srobert   bool FlatGlobalInsts = false;
172*d415bd75Srobert   bool FlatScratchInsts = false;
173*d415bd75Srobert   bool ScalarFlatScratchInsts = false;
174*d415bd75Srobert   bool HasArchitectedFlatScratch = false;
175*d415bd75Srobert   bool EnableFlatScratch = false;
176*d415bd75Srobert   bool AddNoCarryInsts = false;
177*d415bd75Srobert   bool HasUnpackedD16VMem = false;
178*d415bd75Srobert   bool LDSMisalignedBug = false;
179*d415bd75Srobert   bool HasMFMAInlineLiteralBug = false;
180*d415bd75Srobert   bool UnalignedBufferAccess = false;
181*d415bd75Srobert   bool UnalignedDSAccess = false;
182*d415bd75Srobert   bool HasPackedTID = false;
183*d415bd75Srobert   bool ScalarizeGlobal = false;
18473471bf0Spatrick 
185*d415bd75Srobert   bool HasVcmpxPermlaneHazard = false;
186*d415bd75Srobert   bool HasVMEMtoScalarWriteHazard = false;
187*d415bd75Srobert   bool HasSMEMtoVectorWriteHazard = false;
188*d415bd75Srobert   bool HasInstFwdPrefetchBug = false;
189*d415bd75Srobert   bool HasVcmpxExecWARHazard = false;
190*d415bd75Srobert   bool HasLdsBranchVmemWARHazard = false;
191*d415bd75Srobert   bool HasNSAtoVMEMBug = false;
192*d415bd75Srobert   bool HasNSAClauseBug = false;
193*d415bd75Srobert   bool HasOffset3fBug = false;
194*d415bd75Srobert   bool HasFlatSegmentOffsetBug = false;
195*d415bd75Srobert   bool HasImageStoreD16Bug = false;
196*d415bd75Srobert   bool HasImageGather4D16Bug = false;
197*d415bd75Srobert   bool HasGFX11FullVGPRs = false;
198*d415bd75Srobert   bool HasMADIntraFwdBug = false;
199*d415bd75Srobert   bool HasVOPDInsts = false;
200*d415bd75Srobert   bool HasVALUTransUseHazard = false;
20173471bf0Spatrick 
20273471bf0Spatrick   // Dummy feature to use for assembler in tablegen.
203*d415bd75Srobert   bool FeatureDisable = false;
20473471bf0Spatrick 
20573471bf0Spatrick   SelectionDAGTargetInfo TSInfo;
20673471bf0Spatrick private:
20773471bf0Spatrick   SIInstrInfo InstrInfo;
20873471bf0Spatrick   SITargetLowering TLInfo;
20973471bf0Spatrick   SIFrameLowering FrameLowering;
21073471bf0Spatrick 
21173471bf0Spatrick public:
21273471bf0Spatrick   GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
21373471bf0Spatrick                const GCNTargetMachine &TM);
21473471bf0Spatrick   ~GCNSubtarget() override;
21573471bf0Spatrick 
21673471bf0Spatrick   GCNSubtarget &initializeSubtargetDependencies(const Triple &TT,
21773471bf0Spatrick                                                    StringRef GPU, StringRef FS);
21873471bf0Spatrick 
getInstrInfo()21973471bf0Spatrick   const SIInstrInfo *getInstrInfo() const override {
22073471bf0Spatrick     return &InstrInfo;
22173471bf0Spatrick   }
22273471bf0Spatrick 
getFrameLowering()22373471bf0Spatrick   const SIFrameLowering *getFrameLowering() const override {
22473471bf0Spatrick     return &FrameLowering;
22573471bf0Spatrick   }
22673471bf0Spatrick 
getTargetLowering()22773471bf0Spatrick   const SITargetLowering *getTargetLowering() const override {
22873471bf0Spatrick     return &TLInfo;
22973471bf0Spatrick   }
23073471bf0Spatrick 
getRegisterInfo()23173471bf0Spatrick   const SIRegisterInfo *getRegisterInfo() const override {
23273471bf0Spatrick     return &InstrInfo.getRegisterInfo();
23373471bf0Spatrick   }
23473471bf0Spatrick 
getCallLowering()23573471bf0Spatrick   const CallLowering *getCallLowering() const override {
23673471bf0Spatrick     return CallLoweringInfo.get();
23773471bf0Spatrick   }
23873471bf0Spatrick 
getInlineAsmLowering()23973471bf0Spatrick   const InlineAsmLowering *getInlineAsmLowering() const override {
24073471bf0Spatrick     return InlineAsmLoweringInfo.get();
24173471bf0Spatrick   }
24273471bf0Spatrick 
getInstructionSelector()24373471bf0Spatrick   InstructionSelector *getInstructionSelector() const override {
24473471bf0Spatrick     return InstSelector.get();
24573471bf0Spatrick   }
24673471bf0Spatrick 
getLegalizerInfo()24773471bf0Spatrick   const LegalizerInfo *getLegalizerInfo() const override {
24873471bf0Spatrick     return Legalizer.get();
24973471bf0Spatrick   }
25073471bf0Spatrick 
getRegBankInfo()25173471bf0Spatrick   const RegisterBankInfo *getRegBankInfo() const override {
25273471bf0Spatrick     return RegBankInfo.get();
25373471bf0Spatrick   }
25473471bf0Spatrick 
getTargetID()25573471bf0Spatrick   const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {
25673471bf0Spatrick     return TargetID;
25773471bf0Spatrick   }
25873471bf0Spatrick 
25973471bf0Spatrick   // Nothing implemented, just prevent crashes on use.
getSelectionDAGInfo()26073471bf0Spatrick   const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
26173471bf0Spatrick     return &TSInfo;
26273471bf0Spatrick   }
26373471bf0Spatrick 
getInstrItineraryData()26473471bf0Spatrick   const InstrItineraryData *getInstrItineraryData() const override {
26573471bf0Spatrick     return &InstrItins;
26673471bf0Spatrick   }
26773471bf0Spatrick 
26873471bf0Spatrick   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
26973471bf0Spatrick 
getGeneration()27073471bf0Spatrick   Generation getGeneration() const {
27173471bf0Spatrick     return (Generation)Gen;
27273471bf0Spatrick   }
27373471bf0Spatrick 
getMaxWaveScratchSize()274*d415bd75Srobert   unsigned getMaxWaveScratchSize() const {
275*d415bd75Srobert     // See COMPUTE_TMPRING_SIZE.WAVESIZE.
276*d415bd75Srobert     if (getGeneration() < GFX11) {
277*d415bd75Srobert       // 13-bit field in units of 256-dword.
278*d415bd75Srobert       return (256 * 4) * ((1 << 13) - 1);
279*d415bd75Srobert     }
280*d415bd75Srobert     // 15-bit field in units of 64-dword.
281*d415bd75Srobert     return (64 * 4) * ((1 << 15) - 1);
282*d415bd75Srobert   }
283*d415bd75Srobert 
284*d415bd75Srobert   /// Return the number of high bits known to be zero for a frame index.
getKnownHighZeroBitsForFrameIndex()28573471bf0Spatrick   unsigned getKnownHighZeroBitsForFrameIndex() const {
286*d415bd75Srobert     return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
28773471bf0Spatrick   }
28873471bf0Spatrick 
getLDSBankCount()28973471bf0Spatrick   int getLDSBankCount() const {
29073471bf0Spatrick     return LDSBankCount;
29173471bf0Spatrick   }
29273471bf0Spatrick 
29373471bf0Spatrick   unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {
29473471bf0Spatrick     return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16;
29573471bf0Spatrick   }
29673471bf0Spatrick 
29773471bf0Spatrick   unsigned getConstantBusLimit(unsigned Opcode) const;
29873471bf0Spatrick 
29973471bf0Spatrick   /// Returns if the result of this instruction with a 16-bit result returned in
30073471bf0Spatrick   /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
30173471bf0Spatrick   /// the original value.
30273471bf0Spatrick   bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
30373471bf0Spatrick 
supportsWGP()304*d415bd75Srobert   bool supportsWGP() const { return getGeneration() >= GFX10; }
305*d415bd75Srobert 
hasIntClamp()30673471bf0Spatrick   bool hasIntClamp() const {
30773471bf0Spatrick     return HasIntClamp;
30873471bf0Spatrick   }
30973471bf0Spatrick 
hasFP64()31073471bf0Spatrick   bool hasFP64() const {
31173471bf0Spatrick     return FP64;
31273471bf0Spatrick   }
31373471bf0Spatrick 
hasMIMG_R128()31473471bf0Spatrick   bool hasMIMG_R128() const {
31573471bf0Spatrick     return MIMG_R128;
31673471bf0Spatrick   }
31773471bf0Spatrick 
hasHWFP64()31873471bf0Spatrick   bool hasHWFP64() const {
31973471bf0Spatrick     return FP64;
32073471bf0Spatrick   }
32173471bf0Spatrick 
hasFastFMAF32()32273471bf0Spatrick   bool hasFastFMAF32() const {
32373471bf0Spatrick     return FastFMAF32;
32473471bf0Spatrick   }
32573471bf0Spatrick 
hasHalfRate64Ops()32673471bf0Spatrick   bool hasHalfRate64Ops() const {
32773471bf0Spatrick     return HalfRate64Ops;
32873471bf0Spatrick   }
32973471bf0Spatrick 
hasFullRate64Ops()33073471bf0Spatrick   bool hasFullRate64Ops() const {
33173471bf0Spatrick     return FullRate64Ops;
33273471bf0Spatrick   }
33373471bf0Spatrick 
hasAddr64()33473471bf0Spatrick   bool hasAddr64() const {
33573471bf0Spatrick     return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
33673471bf0Spatrick   }
33773471bf0Spatrick 
hasFlat()33873471bf0Spatrick   bool hasFlat() const {
33973471bf0Spatrick     return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);
34073471bf0Spatrick   }
34173471bf0Spatrick 
34273471bf0Spatrick   // Return true if the target only has the reverse operand versions of VALU
34373471bf0Spatrick   // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).
hasOnlyRevVALUShifts()34473471bf0Spatrick   bool hasOnlyRevVALUShifts() const {
34573471bf0Spatrick     return getGeneration() >= VOLCANIC_ISLANDS;
34673471bf0Spatrick   }
34773471bf0Spatrick 
hasFractBug()34873471bf0Spatrick   bool hasFractBug() const {
34973471bf0Spatrick     return getGeneration() == SOUTHERN_ISLANDS;
35073471bf0Spatrick   }
35173471bf0Spatrick 
hasBFE()35273471bf0Spatrick   bool hasBFE() const {
35373471bf0Spatrick     return true;
35473471bf0Spatrick   }
35573471bf0Spatrick 
hasBFI()35673471bf0Spatrick   bool hasBFI() const {
35773471bf0Spatrick     return true;
35873471bf0Spatrick   }
35973471bf0Spatrick 
hasBFM()36073471bf0Spatrick   bool hasBFM() const {
36173471bf0Spatrick     return hasBFE();
36273471bf0Spatrick   }
36373471bf0Spatrick 
hasBCNT(unsigned Size)36473471bf0Spatrick   bool hasBCNT(unsigned Size) const {
36573471bf0Spatrick     return true;
36673471bf0Spatrick   }
36773471bf0Spatrick 
hasFFBL()36873471bf0Spatrick   bool hasFFBL() const {
36973471bf0Spatrick     return true;
37073471bf0Spatrick   }
37173471bf0Spatrick 
hasFFBH()37273471bf0Spatrick   bool hasFFBH() const {
37373471bf0Spatrick     return true;
37473471bf0Spatrick   }
37573471bf0Spatrick 
hasMed3_16()37673471bf0Spatrick   bool hasMed3_16() const {
37773471bf0Spatrick     return getGeneration() >= AMDGPUSubtarget::GFX9;
37873471bf0Spatrick   }
37973471bf0Spatrick 
hasMin3Max3_16()38073471bf0Spatrick   bool hasMin3Max3_16() const {
38173471bf0Spatrick     return getGeneration() >= AMDGPUSubtarget::GFX9;
38273471bf0Spatrick   }
38373471bf0Spatrick 
hasFmaMixInsts()38473471bf0Spatrick   bool hasFmaMixInsts() const {
38573471bf0Spatrick     return HasFmaMixInsts;
38673471bf0Spatrick   }
38773471bf0Spatrick 
hasCARRY()38873471bf0Spatrick   bool hasCARRY() const {
38973471bf0Spatrick     return true;
39073471bf0Spatrick   }
39173471bf0Spatrick 
hasFMA()39273471bf0Spatrick   bool hasFMA() const {
39373471bf0Spatrick     return FMA;
39473471bf0Spatrick   }
39573471bf0Spatrick 
hasSwap()39673471bf0Spatrick   bool hasSwap() const {
39773471bf0Spatrick     return GFX9Insts;
39873471bf0Spatrick   }
39973471bf0Spatrick 
hasScalarPackInsts()40073471bf0Spatrick   bool hasScalarPackInsts() const {
40173471bf0Spatrick     return GFX9Insts;
40273471bf0Spatrick   }
40373471bf0Spatrick 
hasScalarMulHiInsts()40473471bf0Spatrick   bool hasScalarMulHiInsts() const {
40573471bf0Spatrick     return GFX9Insts;
40673471bf0Spatrick   }
40773471bf0Spatrick 
getTrapHandlerAbi()40873471bf0Spatrick   TrapHandlerAbi getTrapHandlerAbi() const {
40973471bf0Spatrick     return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;
41073471bf0Spatrick   }
41173471bf0Spatrick 
supportsGetDoorbellID()41273471bf0Spatrick   bool supportsGetDoorbellID() const {
41373471bf0Spatrick     // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.
41473471bf0Spatrick     return getGeneration() >= GFX9;
41573471bf0Spatrick   }
41673471bf0Spatrick 
41773471bf0Spatrick   /// True if the offset field of DS instructions works as expected. On SI, the
41873471bf0Spatrick   /// offset uses a 16-bit adder and does not always wrap properly.
hasUsableDSOffset()41973471bf0Spatrick   bool hasUsableDSOffset() const {
42073471bf0Spatrick     return getGeneration() >= SEA_ISLANDS;
42173471bf0Spatrick   }
42273471bf0Spatrick 
unsafeDSOffsetFoldingEnabled()42373471bf0Spatrick   bool unsafeDSOffsetFoldingEnabled() const {
42473471bf0Spatrick     return EnableUnsafeDSOffsetFolding;
42573471bf0Spatrick   }
42673471bf0Spatrick 
42773471bf0Spatrick   /// Condition output from div_scale is usable.
hasUsableDivScaleConditionOutput()42873471bf0Spatrick   bool hasUsableDivScaleConditionOutput() const {
42973471bf0Spatrick     return getGeneration() != SOUTHERN_ISLANDS;
43073471bf0Spatrick   }
43173471bf0Spatrick 
43273471bf0Spatrick   /// Extra wait hazard is needed in some cases before
43373471bf0Spatrick   /// s_cbranch_vccnz/s_cbranch_vccz.
hasReadVCCZBug()43473471bf0Spatrick   bool hasReadVCCZBug() const {
43573471bf0Spatrick     return getGeneration() <= SEA_ISLANDS;
43673471bf0Spatrick   }
43773471bf0Spatrick 
43873471bf0Spatrick   /// Writes to VCC_LO/VCC_HI update the VCCZ flag.
partialVCCWritesUpdateVCCZ()43973471bf0Spatrick   bool partialVCCWritesUpdateVCCZ() const {
44073471bf0Spatrick     return getGeneration() >= GFX10;
44173471bf0Spatrick   }
44273471bf0Spatrick 
44373471bf0Spatrick   /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR
44473471bf0Spatrick   /// was written by a VALU instruction.
hasSMRDReadVALUDefHazard()44573471bf0Spatrick   bool hasSMRDReadVALUDefHazard() const {
44673471bf0Spatrick     return getGeneration() == SOUTHERN_ISLANDS;
44773471bf0Spatrick   }
44873471bf0Spatrick 
44973471bf0Spatrick   /// A read of an SGPR by a VMEM instruction requires 5 wait states when the
45073471bf0Spatrick   /// SGPR was written by a VALU Instruction.
hasVMEMReadSGPRVALUDefHazard()45173471bf0Spatrick   bool hasVMEMReadSGPRVALUDefHazard() const {
45273471bf0Spatrick     return getGeneration() >= VOLCANIC_ISLANDS;
45373471bf0Spatrick   }
45473471bf0Spatrick 
hasRFEHazards()45573471bf0Spatrick   bool hasRFEHazards() const {
45673471bf0Spatrick     return getGeneration() >= VOLCANIC_ISLANDS;
45773471bf0Spatrick   }
45873471bf0Spatrick 
45973471bf0Spatrick   /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
getSetRegWaitStates()46073471bf0Spatrick   unsigned getSetRegWaitStates() const {
46173471bf0Spatrick     return getGeneration() <= SEA_ISLANDS ? 1 : 2;
46273471bf0Spatrick   }
46373471bf0Spatrick 
dumpCode()46473471bf0Spatrick   bool dumpCode() const {
46573471bf0Spatrick     return DumpCode;
46673471bf0Spatrick   }
46773471bf0Spatrick 
46873471bf0Spatrick   /// Return the amount of LDS that can be used that will not restrict the
46973471bf0Spatrick   /// occupancy lower than WaveCount.
47073471bf0Spatrick   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
47173471bf0Spatrick                                            const Function &) const;
47273471bf0Spatrick 
supportsMinMaxDenormModes()47373471bf0Spatrick   bool supportsMinMaxDenormModes() const {
47473471bf0Spatrick     return getGeneration() >= AMDGPUSubtarget::GFX9;
47573471bf0Spatrick   }
47673471bf0Spatrick 
47773471bf0Spatrick   /// \returns If target supports S_DENORM_MODE.
hasDenormModeInst()47873471bf0Spatrick   bool hasDenormModeInst() const {
47973471bf0Spatrick     return getGeneration() >= AMDGPUSubtarget::GFX10;
48073471bf0Spatrick   }
48173471bf0Spatrick 
useFlatForGlobal()48273471bf0Spatrick   bool useFlatForGlobal() const {
48373471bf0Spatrick     return FlatForGlobal;
48473471bf0Spatrick   }
48573471bf0Spatrick 
48673471bf0Spatrick   /// \returns If target supports ds_read/write_b128 and user enables generation
48773471bf0Spatrick   /// of ds_read/write_b128.
useDS128()48873471bf0Spatrick   bool useDS128() const {
48973471bf0Spatrick     return CIInsts && EnableDS128;
49073471bf0Spatrick   }
49173471bf0Spatrick 
49273471bf0Spatrick   /// \return If target supports ds_read/write_b96/128.
hasDS96AndDS128()49373471bf0Spatrick   bool hasDS96AndDS128() const {
49473471bf0Spatrick     return CIInsts;
49573471bf0Spatrick   }
49673471bf0Spatrick 
49773471bf0Spatrick   /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64
haveRoundOpsF64()49873471bf0Spatrick   bool haveRoundOpsF64() const {
49973471bf0Spatrick     return CIInsts;
50073471bf0Spatrick   }
50173471bf0Spatrick 
50273471bf0Spatrick   /// \returns If MUBUF instructions always perform range checking, even for
50373471bf0Spatrick   /// buffer resources used for private memory access.
privateMemoryResourceIsRangeChecked()50473471bf0Spatrick   bool privateMemoryResourceIsRangeChecked() const {
50573471bf0Spatrick     return getGeneration() < AMDGPUSubtarget::GFX9;
50673471bf0Spatrick   }
50773471bf0Spatrick 
50873471bf0Spatrick   /// \returns If target requires PRT Struct NULL support (zero result registers
50973471bf0Spatrick   /// for sparse texture support).
usePRTStrictNull()51073471bf0Spatrick   bool usePRTStrictNull() const {
51173471bf0Spatrick     return EnablePRTStrictNull;
51273471bf0Spatrick   }
51373471bf0Spatrick 
hasAutoWaitcntBeforeBarrier()51473471bf0Spatrick   bool hasAutoWaitcntBeforeBarrier() const {
51573471bf0Spatrick     return AutoWaitcntBeforeBarrier;
51673471bf0Spatrick   }
51773471bf0Spatrick 
518*d415bd75Srobert   /// \returns true if the target supports backing off of s_barrier instructions
519*d415bd75Srobert   /// when an exception is raised.
supportsBackOffBarrier()520*d415bd75Srobert   bool supportsBackOffBarrier() const {
521*d415bd75Srobert     return BackOffBarrier;
522*d415bd75Srobert   }
523*d415bd75Srobert 
hasUnalignedBufferAccess()52473471bf0Spatrick   bool hasUnalignedBufferAccess() const {
52573471bf0Spatrick     return UnalignedBufferAccess;
52673471bf0Spatrick   }
52773471bf0Spatrick 
hasUnalignedBufferAccessEnabled()52873471bf0Spatrick   bool hasUnalignedBufferAccessEnabled() const {
52973471bf0Spatrick     return UnalignedBufferAccess && UnalignedAccessMode;
53073471bf0Spatrick   }
53173471bf0Spatrick 
hasUnalignedDSAccess()53273471bf0Spatrick   bool hasUnalignedDSAccess() const {
53373471bf0Spatrick     return UnalignedDSAccess;
53473471bf0Spatrick   }
53573471bf0Spatrick 
hasUnalignedDSAccessEnabled()53673471bf0Spatrick   bool hasUnalignedDSAccessEnabled() const {
53773471bf0Spatrick     return UnalignedDSAccess && UnalignedAccessMode;
53873471bf0Spatrick   }
53973471bf0Spatrick 
hasUnalignedScratchAccess()54073471bf0Spatrick   bool hasUnalignedScratchAccess() const {
54173471bf0Spatrick     return UnalignedScratchAccess;
54273471bf0Spatrick   }
54373471bf0Spatrick 
hasUnalignedAccessMode()54473471bf0Spatrick   bool hasUnalignedAccessMode() const {
54573471bf0Spatrick     return UnalignedAccessMode;
54673471bf0Spatrick   }
54773471bf0Spatrick 
hasApertureRegs()54873471bf0Spatrick   bool hasApertureRegs() const {
54973471bf0Spatrick     return HasApertureRegs;
55073471bf0Spatrick   }
55173471bf0Spatrick 
isTrapHandlerEnabled()55273471bf0Spatrick   bool isTrapHandlerEnabled() const {
55373471bf0Spatrick     return TrapHandler;
55473471bf0Spatrick   }
55573471bf0Spatrick 
isXNACKEnabled()55673471bf0Spatrick   bool isXNACKEnabled() const {
55773471bf0Spatrick     return TargetID.isXnackOnOrAny();
55873471bf0Spatrick   }
55973471bf0Spatrick 
isTgSplitEnabled()56073471bf0Spatrick   bool isTgSplitEnabled() const {
56173471bf0Spatrick     return EnableTgSplit;
56273471bf0Spatrick   }
56373471bf0Spatrick 
isCuModeEnabled()56473471bf0Spatrick   bool isCuModeEnabled() const {
56573471bf0Spatrick     return EnableCuMode;
56673471bf0Spatrick   }
56773471bf0Spatrick 
hasFlatAddressSpace()56873471bf0Spatrick   bool hasFlatAddressSpace() const {
56973471bf0Spatrick     return FlatAddressSpace;
57073471bf0Spatrick   }
57173471bf0Spatrick 
hasFlatScrRegister()57273471bf0Spatrick   bool hasFlatScrRegister() const {
57373471bf0Spatrick     return hasFlatAddressSpace();
57473471bf0Spatrick   }
57573471bf0Spatrick 
hasFlatInstOffsets()57673471bf0Spatrick   bool hasFlatInstOffsets() const {
57773471bf0Spatrick     return FlatInstOffsets;
57873471bf0Spatrick   }
57973471bf0Spatrick 
hasFlatGlobalInsts()58073471bf0Spatrick   bool hasFlatGlobalInsts() const {
58173471bf0Spatrick     return FlatGlobalInsts;
58273471bf0Spatrick   }
58373471bf0Spatrick 
hasFlatScratchInsts()58473471bf0Spatrick   bool hasFlatScratchInsts() const {
58573471bf0Spatrick     return FlatScratchInsts;
58673471bf0Spatrick   }
58773471bf0Spatrick 
58873471bf0Spatrick   // Check if target supports ST addressing mode with FLAT scratch instructions.
58973471bf0Spatrick   // The ST addressing mode means no registers are used, either VGPR or SGPR,
59073471bf0Spatrick   // but only immediate offset is swizzled and added to the FLAT scratch base.
hasFlatScratchSTMode()59173471bf0Spatrick   bool hasFlatScratchSTMode() const {
592*d415bd75Srobert     return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
59373471bf0Spatrick   }
59473471bf0Spatrick 
hasFlatScratchSVSMode()595*d415bd75Srobert   bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
596*d415bd75Srobert 
hasScalarFlatScratchInsts()59773471bf0Spatrick   bool hasScalarFlatScratchInsts() const {
59873471bf0Spatrick     return ScalarFlatScratchInsts;
59973471bf0Spatrick   }
60073471bf0Spatrick 
enableFlatScratch()601*d415bd75Srobert   bool enableFlatScratch() const {
602*d415bd75Srobert     return flatScratchIsArchitected() ||
603*d415bd75Srobert            (EnableFlatScratch && hasFlatScratchInsts());
604*d415bd75Srobert   }
605*d415bd75Srobert 
hasGlobalAddTidInsts()60673471bf0Spatrick   bool hasGlobalAddTidInsts() const {
60773471bf0Spatrick     return GFX10_BEncoding;
60873471bf0Spatrick   }
60973471bf0Spatrick 
hasAtomicCSub()61073471bf0Spatrick   bool hasAtomicCSub() const {
61173471bf0Spatrick     return GFX10_BEncoding;
61273471bf0Spatrick   }
61373471bf0Spatrick 
hasMultiDwordFlatScratchAddressing()61473471bf0Spatrick   bool hasMultiDwordFlatScratchAddressing() const {
61573471bf0Spatrick     return getGeneration() >= GFX9;
61673471bf0Spatrick   }
61773471bf0Spatrick 
hasFlatSegmentOffsetBug()61873471bf0Spatrick   bool hasFlatSegmentOffsetBug() const {
61973471bf0Spatrick     return HasFlatSegmentOffsetBug;
62073471bf0Spatrick   }
62173471bf0Spatrick 
hasFlatLgkmVMemCountInOrder()62273471bf0Spatrick   bool hasFlatLgkmVMemCountInOrder() const {
62373471bf0Spatrick     return getGeneration() > GFX9;
62473471bf0Spatrick   }
62573471bf0Spatrick 
hasD16LoadStore()62673471bf0Spatrick   bool hasD16LoadStore() const {
62773471bf0Spatrick     return getGeneration() >= GFX9;
62873471bf0Spatrick   }
62973471bf0Spatrick 
d16PreservesUnusedBits()63073471bf0Spatrick   bool d16PreservesUnusedBits() const {
63173471bf0Spatrick     return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();
63273471bf0Spatrick   }
63373471bf0Spatrick 
hasD16Images()63473471bf0Spatrick   bool hasD16Images() const {
63573471bf0Spatrick     return getGeneration() >= VOLCANIC_ISLANDS;
63673471bf0Spatrick   }
63773471bf0Spatrick 
63873471bf0Spatrick   /// Return if most LDS instructions have an m0 use that require m0 to be
639*d415bd75Srobert   /// initialized.
ldsRequiresM0Init()64073471bf0Spatrick   bool ldsRequiresM0Init() const {
64173471bf0Spatrick     return getGeneration() < GFX9;
64273471bf0Spatrick   }
64373471bf0Spatrick 
64473471bf0Spatrick   // True if the hardware rewinds and replays GWS operations if a wave is
64573471bf0Spatrick   // preempted.
64673471bf0Spatrick   //
64773471bf0Spatrick   // If this is false, a GWS operation requires testing if a nack set the
64873471bf0Spatrick   // MEM_VIOL bit, and repeating if so.
hasGWSAutoReplay()64973471bf0Spatrick   bool hasGWSAutoReplay() const {
65073471bf0Spatrick     return getGeneration() >= GFX9;
65173471bf0Spatrick   }
65273471bf0Spatrick 
65373471bf0Spatrick   /// \returns if target has ds_gws_sema_release_all instruction.
hasGWSSemaReleaseAll()65473471bf0Spatrick   bool hasGWSSemaReleaseAll() const {
65573471bf0Spatrick     return CIInsts;
65673471bf0Spatrick   }
65773471bf0Spatrick 
65873471bf0Spatrick   /// \returns true if the target has integer add/sub instructions that do not
65973471bf0Spatrick   /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32,
66073471bf0Spatrick   /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier
66173471bf0Spatrick   /// for saturation.
hasAddNoCarry()66273471bf0Spatrick   bool hasAddNoCarry() const {
66373471bf0Spatrick     return AddNoCarryInsts;
66473471bf0Spatrick   }
66573471bf0Spatrick 
hasUnpackedD16VMem()66673471bf0Spatrick   bool hasUnpackedD16VMem() const {
66773471bf0Spatrick     return HasUnpackedD16VMem;
66873471bf0Spatrick   }
66973471bf0Spatrick 
67073471bf0Spatrick   // Covers VS/PS/CS graphics shaders
isMesaGfxShader(const Function & F)67173471bf0Spatrick   bool isMesaGfxShader(const Function &F) const {
67273471bf0Spatrick     return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());
67373471bf0Spatrick   }
67473471bf0Spatrick 
hasMad64_32()67573471bf0Spatrick   bool hasMad64_32() const {
67673471bf0Spatrick     return getGeneration() >= SEA_ISLANDS;
67773471bf0Spatrick   }
67873471bf0Spatrick 
hasSDWAOmod()67973471bf0Spatrick   bool hasSDWAOmod() const {
68073471bf0Spatrick     return HasSDWAOmod;
68173471bf0Spatrick   }
68273471bf0Spatrick 
hasSDWAScalar()68373471bf0Spatrick   bool hasSDWAScalar() const {
68473471bf0Spatrick     return HasSDWAScalar;
68573471bf0Spatrick   }
68673471bf0Spatrick 
hasSDWASdst()68773471bf0Spatrick   bool hasSDWASdst() const {
68873471bf0Spatrick     return HasSDWASdst;
68973471bf0Spatrick   }
69073471bf0Spatrick 
hasSDWAMac()69173471bf0Spatrick   bool hasSDWAMac() const {
69273471bf0Spatrick     return HasSDWAMac;
69373471bf0Spatrick   }
69473471bf0Spatrick 
hasSDWAOutModsVOPC()69573471bf0Spatrick   bool hasSDWAOutModsVOPC() const {
69673471bf0Spatrick     return HasSDWAOutModsVOPC;
69773471bf0Spatrick   }
69873471bf0Spatrick 
hasDLInsts()69973471bf0Spatrick   bool hasDLInsts() const {
70073471bf0Spatrick     return HasDLInsts;
70173471bf0Spatrick   }
70273471bf0Spatrick 
hasFmacF64Inst()703*d415bd75Srobert   bool hasFmacF64Inst() const { return HasFmacF64Inst; }
704*d415bd75Srobert 
hasDot1Insts()70573471bf0Spatrick   bool hasDot1Insts() const {
70673471bf0Spatrick     return HasDot1Insts;
70773471bf0Spatrick   }
70873471bf0Spatrick 
hasDot2Insts()70973471bf0Spatrick   bool hasDot2Insts() const {
71073471bf0Spatrick     return HasDot2Insts;
71173471bf0Spatrick   }
71273471bf0Spatrick 
hasDot3Insts()71373471bf0Spatrick   bool hasDot3Insts() const {
71473471bf0Spatrick     return HasDot3Insts;
71573471bf0Spatrick   }
71673471bf0Spatrick 
hasDot4Insts()71773471bf0Spatrick   bool hasDot4Insts() const {
71873471bf0Spatrick     return HasDot4Insts;
71973471bf0Spatrick   }
72073471bf0Spatrick 
hasDot5Insts()72173471bf0Spatrick   bool hasDot5Insts() const {
72273471bf0Spatrick     return HasDot5Insts;
72373471bf0Spatrick   }
72473471bf0Spatrick 
hasDot6Insts()72573471bf0Spatrick   bool hasDot6Insts() const {
72673471bf0Spatrick     return HasDot6Insts;
72773471bf0Spatrick   }
72873471bf0Spatrick 
hasDot7Insts()72973471bf0Spatrick   bool hasDot7Insts() const {
73073471bf0Spatrick     return HasDot7Insts;
73173471bf0Spatrick   }
73273471bf0Spatrick 
hasDot8Insts()733*d415bd75Srobert   bool hasDot8Insts() const {
734*d415bd75Srobert     return HasDot8Insts;
735*d415bd75Srobert   }
736*d415bd75Srobert 
hasDot9Insts()737*d415bd75Srobert   bool hasDot9Insts() const {
738*d415bd75Srobert     return HasDot9Insts;
739*d415bd75Srobert   }
740*d415bd75Srobert 
hasMAIInsts()74173471bf0Spatrick   bool hasMAIInsts() const {
74273471bf0Spatrick     return HasMAIInsts;
74373471bf0Spatrick   }
74473471bf0Spatrick 
hasFP8Insts()745*d415bd75Srobert   bool hasFP8Insts() const {
746*d415bd75Srobert     return HasFP8Insts;
747*d415bd75Srobert   }
748*d415bd75Srobert 
hasPkFmacF16Inst()74973471bf0Spatrick   bool hasPkFmacF16Inst() const {
75073471bf0Spatrick     return HasPkFmacF16Inst;
75173471bf0Spatrick   }
75273471bf0Spatrick 
hasAtomicFaddInsts()75373471bf0Spatrick   bool hasAtomicFaddInsts() const {
754*d415bd75Srobert     return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
75573471bf0Spatrick   }
75673471bf0Spatrick 
hasAtomicFaddRtnInsts()757*d415bd75Srobert   bool hasAtomicFaddRtnInsts() const { return HasAtomicFaddRtnInsts; }
758*d415bd75Srobert 
hasAtomicFaddNoRtnInsts()759*d415bd75Srobert   bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
760*d415bd75Srobert 
hasAtomicPkFaddNoRtnInsts()761*d415bd75Srobert   bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
762*d415bd75Srobert 
hasFlatAtomicFaddF32Inst()763*d415bd75Srobert   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
764*d415bd75Srobert 
hasNoSdstCMPX()76573471bf0Spatrick   bool hasNoSdstCMPX() const {
76673471bf0Spatrick     return HasNoSdstCMPX;
76773471bf0Spatrick   }
76873471bf0Spatrick 
hasVscnt()76973471bf0Spatrick   bool hasVscnt() const {
77073471bf0Spatrick     return HasVscnt;
77173471bf0Spatrick   }
77273471bf0Spatrick 
hasGetWaveIdInst()77373471bf0Spatrick   bool hasGetWaveIdInst() const {
77473471bf0Spatrick     return HasGetWaveIdInst;
77573471bf0Spatrick   }
77673471bf0Spatrick 
hasSMemTimeInst()77773471bf0Spatrick   bool hasSMemTimeInst() const {
77873471bf0Spatrick     return HasSMemTimeInst;
77973471bf0Spatrick   }
78073471bf0Spatrick 
hasShaderCyclesRegister()78173471bf0Spatrick   bool hasShaderCyclesRegister() const {
78273471bf0Spatrick     return HasShaderCyclesRegister;
78373471bf0Spatrick   }
78473471bf0Spatrick 
hasVOP3Literal()78573471bf0Spatrick   bool hasVOP3Literal() const {
78673471bf0Spatrick     return HasVOP3Literal;
78773471bf0Spatrick   }
78873471bf0Spatrick 
hasNoDataDepHazard()78973471bf0Spatrick   bool hasNoDataDepHazard() const {
79073471bf0Spatrick     return HasNoDataDepHazard;
79173471bf0Spatrick   }
79273471bf0Spatrick 
vmemWriteNeedsExpWaitcnt()79373471bf0Spatrick   bool vmemWriteNeedsExpWaitcnt() const {
79473471bf0Spatrick     return getGeneration() < SEA_ISLANDS;
79573471bf0Spatrick   }
79673471bf0Spatrick 
hasInstPrefetch()797*d415bd75Srobert   bool hasInstPrefetch() const { return getGeneration() >= GFX10; }
798*d415bd75Srobert 
79973471bf0Spatrick   // Scratch is allocated in 256 dword per wave blocks for the entire
800*d415bd75Srobert   // wavefront. When viewed from the perspective of an arbitrary workitem, this
80173471bf0Spatrick   // is 4-byte aligned.
80273471bf0Spatrick   //
80373471bf0Spatrick   // Only 4-byte alignment is really needed to access anything. Transformations
80473471bf0Spatrick   // on the pointer value itself may rely on the alignment / known low bits of
80573471bf0Spatrick   // the pointer. Set this to something above the minimum to avoid needing
80673471bf0Spatrick   // dynamic realignment in common cases.
getStackAlignment()80773471bf0Spatrick   Align getStackAlignment() const { return Align(16); }
80873471bf0Spatrick 
enableMachineScheduler()80973471bf0Spatrick   bool enableMachineScheduler() const override {
81073471bf0Spatrick     return true;
81173471bf0Spatrick   }
81273471bf0Spatrick 
81373471bf0Spatrick   bool useAA() const override;
81473471bf0Spatrick 
enableSubRegLiveness()81573471bf0Spatrick   bool enableSubRegLiveness() const override {
81673471bf0Spatrick     return true;
81773471bf0Spatrick   }
81873471bf0Spatrick 
setScalarizeGlobalBehavior(bool b)81973471bf0Spatrick   void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
getScalarizeGlobalBehavior()82073471bf0Spatrick   bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
82173471bf0Spatrick 
82273471bf0Spatrick   // static wrappers
82373471bf0Spatrick   static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
82473471bf0Spatrick 
82573471bf0Spatrick   // XXX - Why is this here if it isn't in the default pass set?
enableEarlyIfConversion()82673471bf0Spatrick   bool enableEarlyIfConversion() const override {
82773471bf0Spatrick     return true;
82873471bf0Spatrick   }
82973471bf0Spatrick 
83073471bf0Spatrick   void overrideSchedPolicy(MachineSchedPolicy &Policy,
83173471bf0Spatrick                            unsigned NumRegionInstrs) const override;
83273471bf0Spatrick 
getMaxNumUserSGPRs()83373471bf0Spatrick   unsigned getMaxNumUserSGPRs() const {
83473471bf0Spatrick     return 16;
83573471bf0Spatrick   }
83673471bf0Spatrick 
hasSMemRealTime()83773471bf0Spatrick   bool hasSMemRealTime() const {
83873471bf0Spatrick     return HasSMemRealTime;
83973471bf0Spatrick   }
84073471bf0Spatrick 
hasMovrel()84173471bf0Spatrick   bool hasMovrel() const {
84273471bf0Spatrick     return HasMovrel;
84373471bf0Spatrick   }
84473471bf0Spatrick 
hasVGPRIndexMode()84573471bf0Spatrick   bool hasVGPRIndexMode() const {
84673471bf0Spatrick     return HasVGPRIndexMode;
84773471bf0Spatrick   }
84873471bf0Spatrick 
84973471bf0Spatrick   bool useVGPRIndexMode() const;
85073471bf0Spatrick 
hasScalarCompareEq64()85173471bf0Spatrick   bool hasScalarCompareEq64() const {
85273471bf0Spatrick     return getGeneration() >= VOLCANIC_ISLANDS;
85373471bf0Spatrick   }
85473471bf0Spatrick 
hasScalarStores()85573471bf0Spatrick   bool hasScalarStores() const {
85673471bf0Spatrick     return HasScalarStores;
85773471bf0Spatrick   }
85873471bf0Spatrick 
hasScalarAtomics()85973471bf0Spatrick   bool hasScalarAtomics() const {
86073471bf0Spatrick     return HasScalarAtomics;
86173471bf0Spatrick   }
86273471bf0Spatrick 
hasLDSFPAtomicAdd()863*d415bd75Srobert   bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
86473471bf0Spatrick 
86573471bf0Spatrick   /// \returns true if the subtarget has the v_permlanex16_b32 instruction.
hasPermLaneX16()86673471bf0Spatrick   bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
86773471bf0Spatrick 
868*d415bd75Srobert   /// \returns true if the subtarget has the v_permlane64_b32 instruction.
hasPermLane64()869*d415bd75Srobert   bool hasPermLane64() const { return getGeneration() >= GFX11; }
870*d415bd75Srobert 
hasDPP()87173471bf0Spatrick   bool hasDPP() const {
87273471bf0Spatrick     return HasDPP;
87373471bf0Spatrick   }
87473471bf0Spatrick 
hasDPPBroadcasts()87573471bf0Spatrick   bool hasDPPBroadcasts() const {
87673471bf0Spatrick     return HasDPP && getGeneration() < GFX10;
87773471bf0Spatrick   }
87873471bf0Spatrick 
hasDPPWavefrontShifts()87973471bf0Spatrick   bool hasDPPWavefrontShifts() const {
88073471bf0Spatrick     return HasDPP && getGeneration() < GFX10;
88173471bf0Spatrick   }
88273471bf0Spatrick 
hasDPP8()88373471bf0Spatrick   bool hasDPP8() const {
88473471bf0Spatrick     return HasDPP8;
88573471bf0Spatrick   }
88673471bf0Spatrick 
has64BitDPP()88773471bf0Spatrick   bool has64BitDPP() const {
88873471bf0Spatrick     return Has64BitDPP;
88973471bf0Spatrick   }
89073471bf0Spatrick 
hasPackedFP32Ops()89173471bf0Spatrick   bool hasPackedFP32Ops() const {
89273471bf0Spatrick     return HasPackedFP32Ops;
89373471bf0Spatrick   }
89473471bf0Spatrick 
hasFmaakFmamkF32Insts()89573471bf0Spatrick   bool hasFmaakFmamkF32Insts() const {
896*d415bd75Srobert     return getGeneration() >= GFX10 || hasGFX940Insts();
897*d415bd75Srobert   }
898*d415bd75Srobert 
hasImageInsts()899*d415bd75Srobert   bool hasImageInsts() const {
900*d415bd75Srobert     return HasImageInsts;
90173471bf0Spatrick   }
90273471bf0Spatrick 
hasExtendedImageInsts()90373471bf0Spatrick   bool hasExtendedImageInsts() const {
90473471bf0Spatrick     return HasExtendedImageInsts;
90573471bf0Spatrick   }
90673471bf0Spatrick 
hasR128A16()90773471bf0Spatrick   bool hasR128A16() const {
90873471bf0Spatrick     return HasR128A16;
90973471bf0Spatrick   }
91073471bf0Spatrick 
hasA16()911*d415bd75Srobert   bool hasA16() const { return HasA16; }
91273471bf0Spatrick 
hasG16()91373471bf0Spatrick   bool hasG16() const { return HasG16; }
91473471bf0Spatrick 
hasOffset3fBug()91573471bf0Spatrick   bool hasOffset3fBug() const {
91673471bf0Spatrick     return HasOffset3fBug;
91773471bf0Spatrick   }
91873471bf0Spatrick 
hasImageStoreD16Bug()91973471bf0Spatrick   bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; }
92073471bf0Spatrick 
hasImageGather4D16Bug()92173471bf0Spatrick   bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; }
92273471bf0Spatrick 
hasMADIntraFwdBug()923*d415bd75Srobert   bool hasMADIntraFwdBug() const { return HasMADIntraFwdBug; }
924*d415bd75Srobert 
hasNSAEncoding()92573471bf0Spatrick   bool hasNSAEncoding() const { return HasNSAEncoding; }
92673471bf0Spatrick 
getNSAMaxSize()92773471bf0Spatrick   unsigned getNSAMaxSize() const { return NSAMaxSize; }
92873471bf0Spatrick 
hasGFX10_AEncoding()92973471bf0Spatrick   bool hasGFX10_AEncoding() const {
93073471bf0Spatrick     return GFX10_AEncoding;
93173471bf0Spatrick   }
93273471bf0Spatrick 
hasGFX10_BEncoding()93373471bf0Spatrick   bool hasGFX10_BEncoding() const {
93473471bf0Spatrick     return GFX10_BEncoding;
93573471bf0Spatrick   }
93673471bf0Spatrick 
hasGFX10_3Insts()93773471bf0Spatrick   bool hasGFX10_3Insts() const {
93873471bf0Spatrick     return GFX10_3Insts;
93973471bf0Spatrick   }
94073471bf0Spatrick 
94173471bf0Spatrick   bool hasMadF16() const;
94273471bf0Spatrick 
hasMovB64()943*d415bd75Srobert   bool hasMovB64() const { return GFX940Insts; }
944*d415bd75Srobert 
hasLshlAddB64()945*d415bd75Srobert   bool hasLshlAddB64() const { return GFX940Insts; }
946*d415bd75Srobert 
enableSIScheduler()94773471bf0Spatrick   bool enableSIScheduler() const {
94873471bf0Spatrick     return EnableSIScheduler;
94973471bf0Spatrick   }
95073471bf0Spatrick 
loadStoreOptEnabled()95173471bf0Spatrick   bool loadStoreOptEnabled() const {
95273471bf0Spatrick     return EnableLoadStoreOpt;
95373471bf0Spatrick   }
95473471bf0Spatrick 
hasSGPRInitBug()95573471bf0Spatrick   bool hasSGPRInitBug() const {
95673471bf0Spatrick     return SGPRInitBug;
95773471bf0Spatrick   }
95873471bf0Spatrick 
hasUserSGPRInit16Bug()959*d415bd75Srobert   bool hasUserSGPRInit16Bug() const {
960*d415bd75Srobert     return UserSGPRInit16Bug && isWave32();
961*d415bd75Srobert   }
962*d415bd75Srobert 
hasNegativeScratchOffsetBug()96373471bf0Spatrick   bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; }
96473471bf0Spatrick 
hasNegativeUnalignedScratchOffsetBug()96573471bf0Spatrick   bool hasNegativeUnalignedScratchOffsetBug() const {
96673471bf0Spatrick     return NegativeUnalignedScratchOffsetBug;
96773471bf0Spatrick   }
96873471bf0Spatrick 
hasMFMAInlineLiteralBug()96973471bf0Spatrick   bool hasMFMAInlineLiteralBug() const {
97073471bf0Spatrick     return HasMFMAInlineLiteralBug;
97173471bf0Spatrick   }
97273471bf0Spatrick 
has12DWordStoreHazard()97373471bf0Spatrick   bool has12DWordStoreHazard() const {
97473471bf0Spatrick     return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
97573471bf0Spatrick   }
97673471bf0Spatrick 
97773471bf0Spatrick   // \returns true if the subtarget supports DWORDX3 load/store instructions.
hasDwordx3LoadStores()97873471bf0Spatrick   bool hasDwordx3LoadStores() const {
97973471bf0Spatrick     return CIInsts;
98073471bf0Spatrick   }
98173471bf0Spatrick 
hasReadM0MovRelInterpHazard()98273471bf0Spatrick   bool hasReadM0MovRelInterpHazard() const {
98373471bf0Spatrick     return getGeneration() == AMDGPUSubtarget::GFX9;
98473471bf0Spatrick   }
98573471bf0Spatrick 
hasReadM0SendMsgHazard()98673471bf0Spatrick   bool hasReadM0SendMsgHazard() const {
98773471bf0Spatrick     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
98873471bf0Spatrick            getGeneration() <= AMDGPUSubtarget::GFX9;
98973471bf0Spatrick   }
99073471bf0Spatrick 
hasReadM0LdsDmaHazard()991*d415bd75Srobert   bool hasReadM0LdsDmaHazard() const {
992*d415bd75Srobert     return getGeneration() == AMDGPUSubtarget::GFX9;
993*d415bd75Srobert   }
994*d415bd75Srobert 
hasReadM0LdsDirectHazard()995*d415bd75Srobert   bool hasReadM0LdsDirectHazard() const {
996*d415bd75Srobert     return getGeneration() == AMDGPUSubtarget::GFX9;
997*d415bd75Srobert   }
998*d415bd75Srobert 
hasVcmpxPermlaneHazard()99973471bf0Spatrick   bool hasVcmpxPermlaneHazard() const {
100073471bf0Spatrick     return HasVcmpxPermlaneHazard;
100173471bf0Spatrick   }
100273471bf0Spatrick 
hasVMEMtoScalarWriteHazard()100373471bf0Spatrick   bool hasVMEMtoScalarWriteHazard() const {
100473471bf0Spatrick     return HasVMEMtoScalarWriteHazard;
100573471bf0Spatrick   }
100673471bf0Spatrick 
hasSMEMtoVectorWriteHazard()100773471bf0Spatrick   bool hasSMEMtoVectorWriteHazard() const {
100873471bf0Spatrick     return HasSMEMtoVectorWriteHazard;
100973471bf0Spatrick   }
101073471bf0Spatrick 
hasLDSMisalignedBug()101173471bf0Spatrick   bool hasLDSMisalignedBug() const {
101273471bf0Spatrick     return LDSMisalignedBug && !EnableCuMode;
101373471bf0Spatrick   }
101473471bf0Spatrick 
hasInstFwdPrefetchBug()101573471bf0Spatrick   bool hasInstFwdPrefetchBug() const {
101673471bf0Spatrick     return HasInstFwdPrefetchBug;
101773471bf0Spatrick   }
101873471bf0Spatrick 
hasVcmpxExecWARHazard()101973471bf0Spatrick   bool hasVcmpxExecWARHazard() const {
102073471bf0Spatrick     return HasVcmpxExecWARHazard;
102173471bf0Spatrick   }
102273471bf0Spatrick 
hasLdsBranchVmemWARHazard()102373471bf0Spatrick   bool hasLdsBranchVmemWARHazard() const {
102473471bf0Spatrick     return HasLdsBranchVmemWARHazard;
102573471bf0Spatrick   }
102673471bf0Spatrick 
1027*d415bd75Srobert   // Shift amount of a 64 bit shift cannot be a highest allocated register
1028*d415bd75Srobert   // if also at the end of the allocation block.
hasShift64HighRegBug()1029*d415bd75Srobert   bool hasShift64HighRegBug() const {
1030*d415bd75Srobert     return GFX90AInsts && !GFX940Insts;
1031*d415bd75Srobert   }
1032*d415bd75Srobert 
1033*d415bd75Srobert   // Has one cycle hazard on transcendental instruction feeding a
1034*d415bd75Srobert   // non transcendental VALU.
hasTransForwardingHazard()1035*d415bd75Srobert   bool hasTransForwardingHazard() const { return GFX940Insts; }
1036*d415bd75Srobert 
1037*d415bd75Srobert   // Has one cycle hazard on a VALU instruction partially writing dst with
1038*d415bd75Srobert   // a shift of result bits feeding another VALU instruction.
hasDstSelForwardingHazard()1039*d415bd75Srobert   bool hasDstSelForwardingHazard() const { return GFX940Insts; }
1040*d415bd75Srobert 
1041*d415bd75Srobert   // Cannot use op_sel with v_dot instructions.
hasDOTOpSelHazard()1042*d415bd75Srobert   bool hasDOTOpSelHazard() const { return GFX940Insts; }
1043*d415bd75Srobert 
1044*d415bd75Srobert   // Does not have HW interlocs for VALU writing and then reading SGPRs.
hasVDecCoExecHazard()1045*d415bd75Srobert   bool hasVDecCoExecHazard() const {
1046*d415bd75Srobert     return GFX940Insts;
1047*d415bd75Srobert   }
1048*d415bd75Srobert 
hasNSAtoVMEMBug()104973471bf0Spatrick   bool hasNSAtoVMEMBug() const {
105073471bf0Spatrick     return HasNSAtoVMEMBug;
105173471bf0Spatrick   }
105273471bf0Spatrick 
hasNSAClauseBug()105373471bf0Spatrick   bool hasNSAClauseBug() const { return HasNSAClauseBug; }
105473471bf0Spatrick 
hasHardClauses()105573471bf0Spatrick   bool hasHardClauses() const { return getGeneration() >= GFX10; }
105673471bf0Spatrick 
hasGFX90AInsts()105773471bf0Spatrick   bool hasGFX90AInsts() const { return GFX90AInsts; }
105873471bf0Spatrick 
hasFPAtomicToDenormModeHazard()1059*d415bd75Srobert   bool hasFPAtomicToDenormModeHazard() const {
1060*d415bd75Srobert     return getGeneration() == GFX10;
1061*d415bd75Srobert   }
1062*d415bd75Srobert 
hasVOP3DPP()1063*d415bd75Srobert   bool hasVOP3DPP() const { return getGeneration() >= GFX11; }
1064*d415bd75Srobert 
hasLdsDirect()1065*d415bd75Srobert   bool hasLdsDirect() const { return getGeneration() >= GFX11; }
1066*d415bd75Srobert 
hasVALUPartialForwardingHazard()1067*d415bd75Srobert   bool hasVALUPartialForwardingHazard() const {
1068*d415bd75Srobert     return getGeneration() >= GFX11;
1069*d415bd75Srobert   }
1070*d415bd75Srobert 
hasVALUTransUseHazard()1071*d415bd75Srobert   bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
1072*d415bd75Srobert 
hasVALUMaskWriteHazard()1073*d415bd75Srobert   bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
1074*d415bd75Srobert 
107573471bf0Spatrick   /// Return if operations acting on VGPR tuples require even alignment.
needsAlignedVGPRs()107673471bf0Spatrick   bool needsAlignedVGPRs() const { return GFX90AInsts; }
107773471bf0Spatrick 
1078*d415bd75Srobert   /// Return true if the target has the S_PACK_HL_B32_B16 instruction.
hasSPackHL()1079*d415bd75Srobert   bool hasSPackHL() const { return GFX11Insts; }
1080*d415bd75Srobert 
1081*d415bd75Srobert   /// Return true if the target's EXP instruction has the COMPR flag, which
1082*d415bd75Srobert   /// affects the meaning of the EN (enable) bits.
hasCompressedExport()1083*d415bd75Srobert   bool hasCompressedExport() const { return !GFX11Insts; }
1084*d415bd75Srobert 
1085*d415bd75Srobert   /// Return true if the target's EXP instruction supports the NULL export
1086*d415bd75Srobert   /// target.
hasNullExportTarget()1087*d415bd75Srobert   bool hasNullExportTarget() const { return !GFX11Insts; }
1088*d415bd75Srobert 
hasGFX11FullVGPRs()1089*d415bd75Srobert   bool hasGFX11FullVGPRs() const { return HasGFX11FullVGPRs; }
1090*d415bd75Srobert 
hasVOPDInsts()1091*d415bd75Srobert   bool hasVOPDInsts() const { return HasVOPDInsts; }
1092*d415bd75Srobert 
hasFlatScratchSVSSwizzleBug()1093*d415bd75Srobert   bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }
1094*d415bd75Srobert 
1095*d415bd75Srobert   /// Return true if the target has the S_DELAY_ALU instruction.
hasDelayAlu()1096*d415bd75Srobert   bool hasDelayAlu() const { return GFX11Insts; }
1097*d415bd75Srobert 
hasPackedTID()109873471bf0Spatrick   bool hasPackedTID() const { return HasPackedTID; }
109973471bf0Spatrick 
1100*d415bd75Srobert   // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
1101*d415bd75Srobert   // hasGFX90AInsts is also true.
hasGFX940Insts()1102*d415bd75Srobert   bool hasGFX940Insts() const { return GFX940Insts; }
1103*d415bd75Srobert 
110473471bf0Spatrick   /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
110573471bf0Spatrick   /// SGPRs
110673471bf0Spatrick   unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
110773471bf0Spatrick 
110873471bf0Spatrick   /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
110973471bf0Spatrick   /// VGPRs
111073471bf0Spatrick   unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
111173471bf0Spatrick 
111273471bf0Spatrick   /// Return occupancy for the given function. Used LDS and a number of
111373471bf0Spatrick   /// registers if provided.
111473471bf0Spatrick   /// Note, occupancy can be affected by the scratch allocation as well, but
111573471bf0Spatrick   /// we do not have enough information to compute it.
111673471bf0Spatrick   unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0,
111773471bf0Spatrick                             unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
111873471bf0Spatrick 
111973471bf0Spatrick   /// \returns true if the flat_scratch register should be initialized with the
112073471bf0Spatrick   /// pointer to the wave's scratch memory rather than a size and offset.
flatScratchIsPointer()112173471bf0Spatrick   bool flatScratchIsPointer() const {
112273471bf0Spatrick     return getGeneration() >= AMDGPUSubtarget::GFX9;
112373471bf0Spatrick   }
112473471bf0Spatrick 
112573471bf0Spatrick   /// \returns true if the flat_scratch register is initialized by the HW.
112673471bf0Spatrick   /// In this case it is readonly.
flatScratchIsArchitected()112773471bf0Spatrick   bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
112873471bf0Spatrick 
112973471bf0Spatrick   /// \returns true if the machine has merged shaders in which s0-s7 are
113073471bf0Spatrick   /// reserved by the hardware and user SGPRs start at s8
hasMergedShaders()113173471bf0Spatrick   bool hasMergedShaders() const {
113273471bf0Spatrick     return getGeneration() >= GFX9;
113373471bf0Spatrick   }
113473471bf0Spatrick 
1135*d415bd75Srobert   // \returns true if the target supports the pre-NGG legacy geometry path.
hasLegacyGeometry()1136*d415bd75Srobert   bool hasLegacyGeometry() const { return getGeneration() < GFX11; }
1137*d415bd75Srobert 
113873471bf0Spatrick   /// \returns SGPR allocation granularity supported by the subtarget.
getSGPRAllocGranule()113973471bf0Spatrick   unsigned getSGPRAllocGranule() const {
114073471bf0Spatrick     return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
114173471bf0Spatrick   }
114273471bf0Spatrick 
114373471bf0Spatrick   /// \returns SGPR encoding granularity supported by the subtarget.
getSGPREncodingGranule()114473471bf0Spatrick   unsigned getSGPREncodingGranule() const {
114573471bf0Spatrick     return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
114673471bf0Spatrick   }
114773471bf0Spatrick 
114873471bf0Spatrick   /// \returns Total number of SGPRs supported by the subtarget.
getTotalNumSGPRs()114973471bf0Spatrick   unsigned getTotalNumSGPRs() const {
115073471bf0Spatrick     return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
115173471bf0Spatrick   }
115273471bf0Spatrick 
115373471bf0Spatrick   /// \returns Addressable number of SGPRs supported by the subtarget.
getAddressableNumSGPRs()115473471bf0Spatrick   unsigned getAddressableNumSGPRs() const {
115573471bf0Spatrick     return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
115673471bf0Spatrick   }
115773471bf0Spatrick 
115873471bf0Spatrick   /// \returns Minimum number of SGPRs that meets the given number of waves per
115973471bf0Spatrick   /// execution unit requirement supported by the subtarget.
getMinNumSGPRs(unsigned WavesPerEU)116073471bf0Spatrick   unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
116173471bf0Spatrick     return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
116273471bf0Spatrick   }
116373471bf0Spatrick 
116473471bf0Spatrick   /// \returns Maximum number of SGPRs that meets the given number of waves per
116573471bf0Spatrick   /// execution unit requirement supported by the subtarget.
getMaxNumSGPRs(unsigned WavesPerEU,bool Addressable)116673471bf0Spatrick   unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
116773471bf0Spatrick     return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
116873471bf0Spatrick   }
116973471bf0Spatrick 
117073471bf0Spatrick   /// \returns Reserved number of SGPRs. This is common
117173471bf0Spatrick   /// utility function called by MachineFunction and
117273471bf0Spatrick   /// Function variants of getReservedNumSGPRs.
1173*d415bd75Srobert   unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;
117473471bf0Spatrick   /// \returns Reserved number of SGPRs for given machine function \p MF.
117573471bf0Spatrick   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
117673471bf0Spatrick 
117773471bf0Spatrick   /// \returns Reserved number of SGPRs for given function \p F.
117873471bf0Spatrick   unsigned getReservedNumSGPRs(const Function &F) const;
117973471bf0Spatrick 
118073471bf0Spatrick   /// \returns max num SGPRs. This is the common utility
118173471bf0Spatrick   /// function called by MachineFunction and Function
118273471bf0Spatrick   /// variants of getMaxNumSGPRs.
118373471bf0Spatrick   unsigned getBaseMaxNumSGPRs(const Function &F,
118473471bf0Spatrick                               std::pair<unsigned, unsigned> WavesPerEU,
118573471bf0Spatrick                               unsigned PreloadedSGPRs,
118673471bf0Spatrick                               unsigned ReservedNumSGPRs) const;
118773471bf0Spatrick 
118873471bf0Spatrick   /// \returns Maximum number of SGPRs that meets number of waves per execution
118973471bf0Spatrick   /// unit requirement for function \p MF, or number of SGPRs explicitly
119073471bf0Spatrick   /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
119173471bf0Spatrick   ///
119273471bf0Spatrick   /// \returns Value that meets number of waves per execution unit requirement
119373471bf0Spatrick   /// if explicitly requested value cannot be converted to integer, violates
119473471bf0Spatrick   /// subtarget's specifications, or does not meet number of waves per execution
119573471bf0Spatrick   /// unit requirement.
119673471bf0Spatrick   unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
119773471bf0Spatrick 
119873471bf0Spatrick   /// \returns Maximum number of SGPRs that meets number of waves per execution
119973471bf0Spatrick   /// unit requirement for function \p F, or number of SGPRs explicitly
120073471bf0Spatrick   /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.
120173471bf0Spatrick   ///
120273471bf0Spatrick   /// \returns Value that meets number of waves per execution unit requirement
120373471bf0Spatrick   /// if explicitly requested value cannot be converted to integer, violates
120473471bf0Spatrick   /// subtarget's specifications, or does not meet number of waves per execution
120573471bf0Spatrick   /// unit requirement.
120673471bf0Spatrick   unsigned getMaxNumSGPRs(const Function &F) const;
120773471bf0Spatrick 
120873471bf0Spatrick   /// \returns VGPR allocation granularity supported by the subtarget.
getVGPRAllocGranule()120973471bf0Spatrick   unsigned getVGPRAllocGranule() const {
121073471bf0Spatrick     return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
121173471bf0Spatrick   }
121273471bf0Spatrick 
121373471bf0Spatrick   /// \returns VGPR encoding granularity supported by the subtarget.
getVGPREncodingGranule()121473471bf0Spatrick   unsigned getVGPREncodingGranule() const {
121573471bf0Spatrick     return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
121673471bf0Spatrick   }
121773471bf0Spatrick 
121873471bf0Spatrick   /// \returns Total number of VGPRs supported by the subtarget.
getTotalNumVGPRs()121973471bf0Spatrick   unsigned getTotalNumVGPRs() const {
122073471bf0Spatrick     return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
122173471bf0Spatrick   }
122273471bf0Spatrick 
122373471bf0Spatrick   /// \returns Addressable number of VGPRs supported by the subtarget.
getAddressableNumVGPRs()122473471bf0Spatrick   unsigned getAddressableNumVGPRs() const {
122573471bf0Spatrick     return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
122673471bf0Spatrick   }
122773471bf0Spatrick 
1228*d415bd75Srobert   /// \returns the minimum number of VGPRs that will prevent achieving more than
1229*d415bd75Srobert   /// the specified number of waves \p WavesPerEU.
getMinNumVGPRs(unsigned WavesPerEU)123073471bf0Spatrick   unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
123173471bf0Spatrick     return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
123273471bf0Spatrick   }
123373471bf0Spatrick 
1234*d415bd75Srobert   /// \returns the maximum number of VGPRs that can be used and still achieved
1235*d415bd75Srobert   /// at least the specified number of waves \p WavesPerEU.
getMaxNumVGPRs(unsigned WavesPerEU)123673471bf0Spatrick   unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
123773471bf0Spatrick     return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
123873471bf0Spatrick   }
123973471bf0Spatrick 
124073471bf0Spatrick   /// \returns max num VGPRs. This is the common utility function
124173471bf0Spatrick   /// called by MachineFunction and Function variants of getMaxNumVGPRs.
124273471bf0Spatrick   unsigned getBaseMaxNumVGPRs(const Function &F,
124373471bf0Spatrick                               std::pair<unsigned, unsigned> WavesPerEU) const;
124473471bf0Spatrick   /// \returns Maximum number of VGPRs that meets number of waves per execution
124573471bf0Spatrick   /// unit requirement for function \p F, or number of VGPRs explicitly
124673471bf0Spatrick   /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.
124773471bf0Spatrick   ///
124873471bf0Spatrick   /// \returns Value that meets number of waves per execution unit requirement
124973471bf0Spatrick   /// if explicitly requested value cannot be converted to integer, violates
125073471bf0Spatrick   /// subtarget's specifications, or does not meet number of waves per execution
125173471bf0Spatrick   /// unit requirement.
125273471bf0Spatrick   unsigned getMaxNumVGPRs(const Function &F) const;
125373471bf0Spatrick 
getMaxNumAGPRs(const Function & F)1254*d415bd75Srobert   unsigned getMaxNumAGPRs(const Function &F) const {
1255*d415bd75Srobert     return getMaxNumVGPRs(F);
1256*d415bd75Srobert   }
1257*d415bd75Srobert 
125873471bf0Spatrick   /// \returns Maximum number of VGPRs that meets number of waves per execution
125973471bf0Spatrick   /// unit requirement for function \p MF, or number of VGPRs explicitly
126073471bf0Spatrick   /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
126173471bf0Spatrick   ///
126273471bf0Spatrick   /// \returns Value that meets number of waves per execution unit requirement
126373471bf0Spatrick   /// if explicitly requested value cannot be converted to integer, violates
126473471bf0Spatrick   /// subtarget's specifications, or does not meet number of waves per execution
126573471bf0Spatrick   /// unit requirement.
126673471bf0Spatrick   unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
126773471bf0Spatrick 
126873471bf0Spatrick   void getPostRAMutations(
126973471bf0Spatrick       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
127073471bf0Spatrick       const override;
127173471bf0Spatrick 
1272*d415bd75Srobert   std::unique_ptr<ScheduleDAGMutation>
1273*d415bd75Srobert   createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
1274*d415bd75Srobert 
isWave32()127573471bf0Spatrick   bool isWave32() const {
127673471bf0Spatrick     return getWavefrontSize() == 32;
127773471bf0Spatrick   }
127873471bf0Spatrick 
isWave64()127973471bf0Spatrick   bool isWave64() const {
128073471bf0Spatrick     return getWavefrontSize() == 64;
128173471bf0Spatrick   }
128273471bf0Spatrick 
getBoolRC()128373471bf0Spatrick   const TargetRegisterClass *getBoolRC() const {
128473471bf0Spatrick     return getRegisterInfo()->getBoolRC();
128573471bf0Spatrick   }
128673471bf0Spatrick 
128773471bf0Spatrick   /// \returns Maximum number of work groups per compute unit supported by the
128873471bf0Spatrick   /// subtarget and limited by given \p FlatWorkGroupSize.
getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize)128973471bf0Spatrick   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
129073471bf0Spatrick     return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
129173471bf0Spatrick   }
129273471bf0Spatrick 
129373471bf0Spatrick   /// \returns Minimum flat work group size supported by the subtarget.
getMinFlatWorkGroupSize()129473471bf0Spatrick   unsigned getMinFlatWorkGroupSize() const override {
129573471bf0Spatrick     return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
129673471bf0Spatrick   }
129773471bf0Spatrick 
129873471bf0Spatrick   /// \returns Maximum flat work group size supported by the subtarget.
getMaxFlatWorkGroupSize()129973471bf0Spatrick   unsigned getMaxFlatWorkGroupSize() const override {
130073471bf0Spatrick     return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
130173471bf0Spatrick   }
130273471bf0Spatrick 
130373471bf0Spatrick   /// \returns Number of waves per execution unit required to support the given
130473471bf0Spatrick   /// \p FlatWorkGroupSize.
130573471bf0Spatrick   unsigned
getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize)130673471bf0Spatrick   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {
130773471bf0Spatrick     return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize);
130873471bf0Spatrick   }
130973471bf0Spatrick 
131073471bf0Spatrick   /// \returns Minimum number of waves per execution unit supported by the
131173471bf0Spatrick   /// subtarget.
getMinWavesPerEU()131273471bf0Spatrick   unsigned getMinWavesPerEU() const override {
131373471bf0Spatrick     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
131473471bf0Spatrick   }
131573471bf0Spatrick 
131673471bf0Spatrick   void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
131773471bf0Spatrick                              SDep &Dep) const override;
1318*d415bd75Srobert 
1319*d415bd75Srobert   // \returns true if it's beneficial on this subtarget for the scheduler to
1320*d415bd75Srobert   // cluster stores as well as loads.
shouldClusterStores()1321*d415bd75Srobert   bool shouldClusterStores() const { return getGeneration() >= GFX11; }
1322*d415bd75Srobert 
1323*d415bd75Srobert   // \returns the number of address arguments from which to enable MIMG NSA
1324*d415bd75Srobert   // on supported architectures.
1325*d415bd75Srobert   unsigned getNSAThreshold(const MachineFunction &MF) const;
132673471bf0Spatrick };
132773471bf0Spatrick 
132873471bf0Spatrick } // end namespace llvm
132973471bf0Spatrick 
133073471bf0Spatrick #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
1331