1*da58b97aSjoerg //=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===// 2*da58b97aSjoerg // 3*da58b97aSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*da58b97aSjoerg // See https://llvm.org/LICENSE.txt for license information. 5*da58b97aSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*da58b97aSjoerg // 7*da58b97aSjoerg //==-----------------------------------------------------------------------===// 8*da58b97aSjoerg // 9*da58b97aSjoerg /// \file 10*da58b97aSjoerg /// AMD GCN specific subclass of TargetSubtarget. 11*da58b97aSjoerg // 12*da58b97aSjoerg //===----------------------------------------------------------------------===// 13*da58b97aSjoerg 14*da58b97aSjoerg #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 15*da58b97aSjoerg #define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 16*da58b97aSjoerg 17*da58b97aSjoerg #include "AMDGPUCallLowering.h" 18*da58b97aSjoerg #include "AMDGPUSubtarget.h" 19*da58b97aSjoerg #include "SIFrameLowering.h" 20*da58b97aSjoerg #include "SIISelLowering.h" 21*da58b97aSjoerg #include "SIInstrInfo.h" 22*da58b97aSjoerg #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 23*da58b97aSjoerg 24*da58b97aSjoerg namespace llvm { 25*da58b97aSjoerg 26*da58b97aSjoerg class MCInst; 27*da58b97aSjoerg class MCInstrInfo; 28*da58b97aSjoerg 29*da58b97aSjoerg } // namespace llvm 30*da58b97aSjoerg 31*da58b97aSjoerg #define GET_SUBTARGETINFO_HEADER 32*da58b97aSjoerg #include "AMDGPUGenSubtargetInfo.inc" 33*da58b97aSjoerg 34*da58b97aSjoerg namespace llvm { 35*da58b97aSjoerg 36*da58b97aSjoerg class GCNTargetMachine; 37*da58b97aSjoerg 38*da58b97aSjoerg class GCNSubtarget final : public AMDGPUGenSubtargetInfo, 39*da58b97aSjoerg public AMDGPUSubtarget { 40*da58b97aSjoerg 41*da58b97aSjoerg using AMDGPUSubtarget::getMaxWavesPerEU; 42*da58b97aSjoerg 43*da58b97aSjoerg public: 44*da58b97aSjoerg // Following 2 enums are documented at: 45*da58b97aSjoerg // - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi 46*da58b97aSjoerg enum class TrapHandlerAbi { 47*da58b97aSjoerg NONE = 0x00, 48*da58b97aSjoerg AMDHSA = 0x01, 49*da58b97aSjoerg }; 50*da58b97aSjoerg 51*da58b97aSjoerg enum class TrapID { 52*da58b97aSjoerg LLVMAMDHSATrap = 0x02, 53*da58b97aSjoerg LLVMAMDHSADebugTrap = 0x03, 54*da58b97aSjoerg }; 55*da58b97aSjoerg 56*da58b97aSjoerg private: 57*da58b97aSjoerg /// GlobalISel related APIs. 58*da58b97aSjoerg std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 59*da58b97aSjoerg std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo; 60*da58b97aSjoerg std::unique_ptr<InstructionSelector> InstSelector; 61*da58b97aSjoerg std::unique_ptr<LegalizerInfo> Legalizer; 62*da58b97aSjoerg std::unique_ptr<RegisterBankInfo> RegBankInfo; 63*da58b97aSjoerg 64*da58b97aSjoerg protected: 65*da58b97aSjoerg // Basic subtarget description. 66*da58b97aSjoerg Triple TargetTriple; 67*da58b97aSjoerg AMDGPU::IsaInfo::AMDGPUTargetID TargetID; 68*da58b97aSjoerg unsigned Gen; 69*da58b97aSjoerg InstrItineraryData InstrItins; 70*da58b97aSjoerg int LDSBankCount; 71*da58b97aSjoerg unsigned MaxPrivateElementSize; 72*da58b97aSjoerg 73*da58b97aSjoerg // Possibly statically set by tablegen, but may want to be overridden. 74*da58b97aSjoerg bool FastFMAF32; 75*da58b97aSjoerg bool FastDenormalF32; 76*da58b97aSjoerg bool HalfRate64Ops; 77*da58b97aSjoerg bool FullRate64Ops; 78*da58b97aSjoerg 79*da58b97aSjoerg // Dynamically set bits that enable features. 80*da58b97aSjoerg bool FlatForGlobal; 81*da58b97aSjoerg bool AutoWaitcntBeforeBarrier; 82*da58b97aSjoerg bool UnalignedScratchAccess; 83*da58b97aSjoerg bool UnalignedAccessMode; 84*da58b97aSjoerg bool HasApertureRegs; 85*da58b97aSjoerg bool SupportsXNACK; 86*da58b97aSjoerg 87*da58b97aSjoerg // This should not be used directly. 'TargetID' tracks the dynamic settings 88*da58b97aSjoerg // for XNACK. 89*da58b97aSjoerg bool EnableXNACK; 90*da58b97aSjoerg 91*da58b97aSjoerg bool EnableTgSplit; 92*da58b97aSjoerg bool EnableCuMode; 93*da58b97aSjoerg bool TrapHandler; 94*da58b97aSjoerg 95*da58b97aSjoerg // Used as options. 96*da58b97aSjoerg bool EnableLoadStoreOpt; 97*da58b97aSjoerg bool EnableUnsafeDSOffsetFolding; 98*da58b97aSjoerg bool EnableSIScheduler; 99*da58b97aSjoerg bool EnableDS128; 100*da58b97aSjoerg bool EnablePRTStrictNull; 101*da58b97aSjoerg bool DumpCode; 102*da58b97aSjoerg 103*da58b97aSjoerg // Subtarget statically properties set by tablegen 104*da58b97aSjoerg bool FP64; 105*da58b97aSjoerg bool FMA; 106*da58b97aSjoerg bool MIMG_R128; 107*da58b97aSjoerg bool IsGCN; 108*da58b97aSjoerg bool CIInsts; 109*da58b97aSjoerg bool GFX8Insts; 110*da58b97aSjoerg bool GFX9Insts; 111*da58b97aSjoerg bool GFX90AInsts; 112*da58b97aSjoerg bool GFX10Insts; 113*da58b97aSjoerg bool GFX10_3Insts; 114*da58b97aSjoerg bool GFX7GFX8GFX9Insts; 115*da58b97aSjoerg bool SGPRInitBug; 116*da58b97aSjoerg bool NegativeScratchOffsetBug; 117*da58b97aSjoerg bool NegativeUnalignedScratchOffsetBug; 118*da58b97aSjoerg bool HasSMemRealTime; 119*da58b97aSjoerg bool HasIntClamp; 120*da58b97aSjoerg bool HasFmaMixInsts; 121*da58b97aSjoerg bool HasMovrel; 122*da58b97aSjoerg bool HasVGPRIndexMode; 123*da58b97aSjoerg bool HasScalarStores; 124*da58b97aSjoerg bool HasScalarAtomics; 125*da58b97aSjoerg bool HasSDWAOmod; 126*da58b97aSjoerg bool HasSDWAScalar; 127*da58b97aSjoerg bool HasSDWASdst; 128*da58b97aSjoerg bool HasSDWAMac; 129*da58b97aSjoerg bool HasSDWAOutModsVOPC; 130*da58b97aSjoerg bool HasDPP; 131*da58b97aSjoerg bool HasDPP8; 132*da58b97aSjoerg bool Has64BitDPP; 133*da58b97aSjoerg bool HasPackedFP32Ops; 134*da58b97aSjoerg bool HasExtendedImageInsts; 135*da58b97aSjoerg bool HasR128A16; 136*da58b97aSjoerg bool HasGFX10A16; 137*da58b97aSjoerg bool HasG16; 138*da58b97aSjoerg bool HasNSAEncoding; 139*da58b97aSjoerg bool GFX10_BEncoding; 140*da58b97aSjoerg bool HasDLInsts; 141*da58b97aSjoerg bool HasDot1Insts; 142*da58b97aSjoerg bool HasDot2Insts; 143*da58b97aSjoerg bool HasDot3Insts; 144*da58b97aSjoerg bool HasDot4Insts; 145*da58b97aSjoerg bool HasDot5Insts; 146*da58b97aSjoerg bool HasDot6Insts; 147*da58b97aSjoerg bool HasDot7Insts; 148*da58b97aSjoerg bool HasMAIInsts; 149*da58b97aSjoerg bool HasPkFmacF16Inst; 150*da58b97aSjoerg bool HasAtomicFaddInsts; 151*da58b97aSjoerg bool SupportsSRAMECC; 152*da58b97aSjoerg 153*da58b97aSjoerg // This should not be used directly. 'TargetID' tracks the dynamic settings 154*da58b97aSjoerg // for SRAMECC. 155*da58b97aSjoerg bool EnableSRAMECC; 156*da58b97aSjoerg 157*da58b97aSjoerg bool HasNoSdstCMPX; 158*da58b97aSjoerg bool HasVscnt; 159*da58b97aSjoerg bool HasGetWaveIdInst; 160*da58b97aSjoerg bool HasSMemTimeInst; 161*da58b97aSjoerg bool HasShaderCyclesRegister; 162*da58b97aSjoerg bool HasRegisterBanking; 163*da58b97aSjoerg bool HasVOP3Literal; 164*da58b97aSjoerg bool HasNoDataDepHazard; 165*da58b97aSjoerg bool FlatAddressSpace; 166*da58b97aSjoerg bool FlatInstOffsets; 167*da58b97aSjoerg bool FlatGlobalInsts; 168*da58b97aSjoerg bool FlatScratchInsts; 169*da58b97aSjoerg bool ScalarFlatScratchInsts; 170*da58b97aSjoerg bool HasArchitectedFlatScratch; 171*da58b97aSjoerg bool AddNoCarryInsts; 172*da58b97aSjoerg bool HasUnpackedD16VMem; 173*da58b97aSjoerg bool R600ALUInst; 174*da58b97aSjoerg bool CaymanISA; 175*da58b97aSjoerg bool CFALUBug; 176*da58b97aSjoerg bool LDSMisalignedBug; 177*da58b97aSjoerg bool HasMFMAInlineLiteralBug; 178*da58b97aSjoerg bool HasVertexCache; 179*da58b97aSjoerg short TexVTXClauseSize; 180*da58b97aSjoerg bool UnalignedBufferAccess; 181*da58b97aSjoerg bool UnalignedDSAccess; 182*da58b97aSjoerg bool HasPackedTID; 183*da58b97aSjoerg bool ScalarizeGlobal; 184*da58b97aSjoerg 185*da58b97aSjoerg bool HasVcmpxPermlaneHazard; 186*da58b97aSjoerg bool HasVMEMtoScalarWriteHazard; 187*da58b97aSjoerg bool HasSMEMtoVectorWriteHazard; 188*da58b97aSjoerg bool HasInstFwdPrefetchBug; 189*da58b97aSjoerg bool HasVcmpxExecWARHazard; 190*da58b97aSjoerg bool HasLdsBranchVmemWARHazard; 191*da58b97aSjoerg bool HasNSAtoVMEMBug; 192*da58b97aSjoerg bool HasNSAClauseBug; 193*da58b97aSjoerg bool HasOffset3fBug; 194*da58b97aSjoerg bool HasFlatSegmentOffsetBug; 195*da58b97aSjoerg bool HasImageStoreD16Bug; 196*da58b97aSjoerg bool HasImageGather4D16Bug; 197*da58b97aSjoerg 198*da58b97aSjoerg // Dummy feature to use for assembler in tablegen. 199*da58b97aSjoerg bool FeatureDisable; 200*da58b97aSjoerg 201*da58b97aSjoerg SelectionDAGTargetInfo TSInfo; 202*da58b97aSjoerg private: 203*da58b97aSjoerg SIInstrInfo InstrInfo; 204*da58b97aSjoerg SITargetLowering TLInfo; 205*da58b97aSjoerg SIFrameLowering FrameLowering; 206*da58b97aSjoerg 207*da58b97aSjoerg public: 208*da58b97aSjoerg // See COMPUTE_TMPRING_SIZE.WAVESIZE, 13-bit field in units of 256-dword. 209*da58b97aSjoerg static const unsigned MaxWaveScratchSize = (256 * 4) * ((1 << 13) - 1); 210*da58b97aSjoerg 211*da58b97aSjoerg GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, 212*da58b97aSjoerg const GCNTargetMachine &TM); 213*da58b97aSjoerg ~GCNSubtarget() override; 214*da58b97aSjoerg 215*da58b97aSjoerg GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, 216*da58b97aSjoerg StringRef GPU, StringRef FS); 217*da58b97aSjoerg getInstrInfo()218*da58b97aSjoerg const SIInstrInfo *getInstrInfo() const override { 219*da58b97aSjoerg return &InstrInfo; 220*da58b97aSjoerg } 221*da58b97aSjoerg getFrameLowering()222*da58b97aSjoerg const SIFrameLowering *getFrameLowering() const override { 223*da58b97aSjoerg return &FrameLowering; 224*da58b97aSjoerg } 225*da58b97aSjoerg getTargetLowering()226*da58b97aSjoerg const SITargetLowering *getTargetLowering() const override { 227*da58b97aSjoerg return &TLInfo; 228*da58b97aSjoerg } 229*da58b97aSjoerg getRegisterInfo()230*da58b97aSjoerg const SIRegisterInfo *getRegisterInfo() const override { 231*da58b97aSjoerg return &InstrInfo.getRegisterInfo(); 232*da58b97aSjoerg } 233*da58b97aSjoerg getCallLowering()234*da58b97aSjoerg const CallLowering *getCallLowering() const override { 235*da58b97aSjoerg return CallLoweringInfo.get(); 236*da58b97aSjoerg } 237*da58b97aSjoerg getInlineAsmLowering()238*da58b97aSjoerg const InlineAsmLowering *getInlineAsmLowering() const override { 239*da58b97aSjoerg return InlineAsmLoweringInfo.get(); 240*da58b97aSjoerg } 241*da58b97aSjoerg getInstructionSelector()242*da58b97aSjoerg InstructionSelector *getInstructionSelector() const override { 243*da58b97aSjoerg return InstSelector.get(); 244*da58b97aSjoerg } 245*da58b97aSjoerg getLegalizerInfo()246*da58b97aSjoerg const LegalizerInfo *getLegalizerInfo() const override { 247*da58b97aSjoerg return Legalizer.get(); 248*da58b97aSjoerg } 249*da58b97aSjoerg getRegBankInfo()250*da58b97aSjoerg const RegisterBankInfo *getRegBankInfo() const override { 251*da58b97aSjoerg return RegBankInfo.get(); 252*da58b97aSjoerg } 253*da58b97aSjoerg getTargetID()254*da58b97aSjoerg const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { 255*da58b97aSjoerg return TargetID; 256*da58b97aSjoerg } 257*da58b97aSjoerg 258*da58b97aSjoerg // Nothing implemented, just prevent crashes on use. getSelectionDAGInfo()259*da58b97aSjoerg const SelectionDAGTargetInfo *getSelectionDAGInfo() const override { 260*da58b97aSjoerg return &TSInfo; 261*da58b97aSjoerg } 262*da58b97aSjoerg getInstrItineraryData()263*da58b97aSjoerg const InstrItineraryData *getInstrItineraryData() const override { 264*da58b97aSjoerg return &InstrItins; 265*da58b97aSjoerg } 266*da58b97aSjoerg 267*da58b97aSjoerg void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); 268*da58b97aSjoerg getGeneration()269*da58b97aSjoerg Generation getGeneration() const { 270*da58b97aSjoerg return (Generation)Gen; 271*da58b97aSjoerg } 272*da58b97aSjoerg 273*da58b97aSjoerg /// Return the number of high bits known to be zero fror a frame index. getKnownHighZeroBitsForFrameIndex()274*da58b97aSjoerg unsigned getKnownHighZeroBitsForFrameIndex() const { 275*da58b97aSjoerg return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2(); 276*da58b97aSjoerg } 277*da58b97aSjoerg getLDSBankCount()278*da58b97aSjoerg int getLDSBankCount() const { 279*da58b97aSjoerg return LDSBankCount; 280*da58b97aSjoerg } 281*da58b97aSjoerg 282*da58b97aSjoerg unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const { 283*da58b97aSjoerg return (ForBufferRSrc || !enableFlatScratch()) ? MaxPrivateElementSize : 16; 284*da58b97aSjoerg } 285*da58b97aSjoerg 286*da58b97aSjoerg unsigned getConstantBusLimit(unsigned Opcode) const; 287*da58b97aSjoerg hasIntClamp()288*da58b97aSjoerg bool hasIntClamp() const { 289*da58b97aSjoerg return HasIntClamp; 290*da58b97aSjoerg } 291*da58b97aSjoerg hasFP64()292*da58b97aSjoerg bool hasFP64() const { 293*da58b97aSjoerg return FP64; 294*da58b97aSjoerg } 295*da58b97aSjoerg hasMIMG_R128()296*da58b97aSjoerg bool hasMIMG_R128() const { 297*da58b97aSjoerg return MIMG_R128; 298*da58b97aSjoerg } 299*da58b97aSjoerg hasHWFP64()300*da58b97aSjoerg bool hasHWFP64() const { 301*da58b97aSjoerg return FP64; 302*da58b97aSjoerg } 303*da58b97aSjoerg hasFastFMAF32()304*da58b97aSjoerg bool hasFastFMAF32() const { 305*da58b97aSjoerg return FastFMAF32; 306*da58b97aSjoerg } 307*da58b97aSjoerg hasHalfRate64Ops()308*da58b97aSjoerg bool hasHalfRate64Ops() const { 309*da58b97aSjoerg return HalfRate64Ops; 310*da58b97aSjoerg } 311*da58b97aSjoerg hasFullRate64Ops()312*da58b97aSjoerg bool hasFullRate64Ops() const { 313*da58b97aSjoerg return FullRate64Ops; 314*da58b97aSjoerg } 315*da58b97aSjoerg hasAddr64()316*da58b97aSjoerg bool hasAddr64() const { 317*da58b97aSjoerg return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS); 318*da58b97aSjoerg } 319*da58b97aSjoerg hasFlat()320*da58b97aSjoerg bool hasFlat() const { 321*da58b97aSjoerg return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS); 322*da58b97aSjoerg } 323*da58b97aSjoerg 324*da58b97aSjoerg // Return true if the target only has the reverse operand versions of VALU 325*da58b97aSjoerg // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32). hasOnlyRevVALUShifts()326*da58b97aSjoerg bool hasOnlyRevVALUShifts() const { 327*da58b97aSjoerg return getGeneration() >= VOLCANIC_ISLANDS; 328*da58b97aSjoerg } 329*da58b97aSjoerg hasFractBug()330*da58b97aSjoerg bool hasFractBug() const { 331*da58b97aSjoerg return getGeneration() == SOUTHERN_ISLANDS; 332*da58b97aSjoerg } 333*da58b97aSjoerg hasBFE()334*da58b97aSjoerg bool hasBFE() const { 335*da58b97aSjoerg return true; 336*da58b97aSjoerg } 337*da58b97aSjoerg hasBFI()338*da58b97aSjoerg bool hasBFI() const { 339*da58b97aSjoerg return true; 340*da58b97aSjoerg } 341*da58b97aSjoerg hasBFM()342*da58b97aSjoerg bool hasBFM() const { 343*da58b97aSjoerg return hasBFE(); 344*da58b97aSjoerg } 345*da58b97aSjoerg hasBCNT(unsigned Size)346*da58b97aSjoerg bool hasBCNT(unsigned Size) const { 347*da58b97aSjoerg return true; 348*da58b97aSjoerg } 349*da58b97aSjoerg hasFFBL()350*da58b97aSjoerg bool hasFFBL() const { 351*da58b97aSjoerg return true; 352*da58b97aSjoerg } 353*da58b97aSjoerg hasFFBH()354*da58b97aSjoerg bool hasFFBH() const { 355*da58b97aSjoerg return true; 356*da58b97aSjoerg } 357*da58b97aSjoerg hasMed3_16()358*da58b97aSjoerg bool hasMed3_16() const { 359*da58b97aSjoerg return getGeneration() >= AMDGPUSubtarget::GFX9; 360*da58b97aSjoerg } 361*da58b97aSjoerg hasMin3Max3_16()362*da58b97aSjoerg bool hasMin3Max3_16() const { 363*da58b97aSjoerg return getGeneration() >= AMDGPUSubtarget::GFX9; 364*da58b97aSjoerg } 365*da58b97aSjoerg hasFmaMixInsts()366*da58b97aSjoerg bool hasFmaMixInsts() const { 367*da58b97aSjoerg return HasFmaMixInsts; 368*da58b97aSjoerg } 369*da58b97aSjoerg hasCARRY()370*da58b97aSjoerg bool hasCARRY() const { 371*da58b97aSjoerg return true; 372*da58b97aSjoerg } 373*da58b97aSjoerg hasFMA()374*da58b97aSjoerg bool hasFMA() const { 375*da58b97aSjoerg return FMA; 376*da58b97aSjoerg } 377*da58b97aSjoerg hasSwap()378*da58b97aSjoerg bool hasSwap() const { 379*da58b97aSjoerg return GFX9Insts; 380*da58b97aSjoerg } 381*da58b97aSjoerg hasScalarPackInsts()382*da58b97aSjoerg bool hasScalarPackInsts() const { 383*da58b97aSjoerg return GFX9Insts; 384*da58b97aSjoerg } 385*da58b97aSjoerg hasScalarMulHiInsts()386*da58b97aSjoerg bool hasScalarMulHiInsts() const { 387*da58b97aSjoerg return GFX9Insts; 388*da58b97aSjoerg } 389*da58b97aSjoerg getTrapHandlerAbi()390*da58b97aSjoerg TrapHandlerAbi getTrapHandlerAbi() const { 391*da58b97aSjoerg return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE; 392*da58b97aSjoerg } 393*da58b97aSjoerg supportsGetDoorbellID()394*da58b97aSjoerg bool supportsGetDoorbellID() const { 395*da58b97aSjoerg // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets. 396*da58b97aSjoerg return getGeneration() >= GFX9; 397*da58b97aSjoerg } 398*da58b97aSjoerg 399*da58b97aSjoerg /// True if the offset field of DS instructions works as expected. On SI, the 400*da58b97aSjoerg /// offset uses a 16-bit adder and does not always wrap properly. hasUsableDSOffset()401*da58b97aSjoerg bool hasUsableDSOffset() const { 402*da58b97aSjoerg return getGeneration() >= SEA_ISLANDS; 403*da58b97aSjoerg } 404*da58b97aSjoerg unsafeDSOffsetFoldingEnabled()405*da58b97aSjoerg bool unsafeDSOffsetFoldingEnabled() const { 406*da58b97aSjoerg return EnableUnsafeDSOffsetFolding; 407*da58b97aSjoerg } 408*da58b97aSjoerg 409*da58b97aSjoerg /// Condition output from div_scale is usable. hasUsableDivScaleConditionOutput()410*da58b97aSjoerg bool hasUsableDivScaleConditionOutput() const { 411*da58b97aSjoerg return getGeneration() != SOUTHERN_ISLANDS; 412*da58b97aSjoerg } 413*da58b97aSjoerg 414*da58b97aSjoerg /// Extra wait hazard is needed in some cases before 415*da58b97aSjoerg /// s_cbranch_vccnz/s_cbranch_vccz. hasReadVCCZBug()416*da58b97aSjoerg bool hasReadVCCZBug() const { 417*da58b97aSjoerg return getGeneration() <= SEA_ISLANDS; 418*da58b97aSjoerg } 419*da58b97aSjoerg 420*da58b97aSjoerg /// Writes to VCC_LO/VCC_HI update the VCCZ flag. partialVCCWritesUpdateVCCZ()421*da58b97aSjoerg bool partialVCCWritesUpdateVCCZ() const { 422*da58b97aSjoerg return getGeneration() >= GFX10; 423*da58b97aSjoerg } 424*da58b97aSjoerg 425*da58b97aSjoerg /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR 426*da58b97aSjoerg /// was written by a VALU instruction. hasSMRDReadVALUDefHazard()427*da58b97aSjoerg bool hasSMRDReadVALUDefHazard() const { 428*da58b97aSjoerg return getGeneration() == SOUTHERN_ISLANDS; 429*da58b97aSjoerg } 430*da58b97aSjoerg 431*da58b97aSjoerg /// A read of an SGPR by a VMEM instruction requires 5 wait states when the 432*da58b97aSjoerg /// SGPR was written by a VALU Instruction. hasVMEMReadSGPRVALUDefHazard()433*da58b97aSjoerg bool hasVMEMReadSGPRVALUDefHazard() const { 434*da58b97aSjoerg return getGeneration() >= VOLCANIC_ISLANDS; 435*da58b97aSjoerg } 436*da58b97aSjoerg hasRFEHazards()437*da58b97aSjoerg bool hasRFEHazards() const { 438*da58b97aSjoerg return getGeneration() >= VOLCANIC_ISLANDS; 439*da58b97aSjoerg } 440*da58b97aSjoerg 441*da58b97aSjoerg /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32. getSetRegWaitStates()442*da58b97aSjoerg unsigned getSetRegWaitStates() const { 443*da58b97aSjoerg return getGeneration() <= SEA_ISLANDS ? 1 : 2; 444*da58b97aSjoerg } 445*da58b97aSjoerg dumpCode()446*da58b97aSjoerg bool dumpCode() const { 447*da58b97aSjoerg return DumpCode; 448*da58b97aSjoerg } 449*da58b97aSjoerg 450*da58b97aSjoerg /// Return the amount of LDS that can be used that will not restrict the 451*da58b97aSjoerg /// occupancy lower than WaveCount. 452*da58b97aSjoerg unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 453*da58b97aSjoerg const Function &) const; 454*da58b97aSjoerg supportsMinMaxDenormModes()455*da58b97aSjoerg bool supportsMinMaxDenormModes() const { 456*da58b97aSjoerg return getGeneration() >= AMDGPUSubtarget::GFX9; 457*da58b97aSjoerg } 458*da58b97aSjoerg 459*da58b97aSjoerg /// \returns If target supports S_DENORM_MODE. hasDenormModeInst()460*da58b97aSjoerg bool hasDenormModeInst() const { 461*da58b97aSjoerg return getGeneration() >= AMDGPUSubtarget::GFX10; 462*da58b97aSjoerg } 463*da58b97aSjoerg useFlatForGlobal()464*da58b97aSjoerg bool useFlatForGlobal() const { 465*da58b97aSjoerg return FlatForGlobal; 466*da58b97aSjoerg } 467*da58b97aSjoerg 468*da58b97aSjoerg /// \returns If target supports ds_read/write_b128 and user enables generation 469*da58b97aSjoerg /// of ds_read/write_b128. useDS128()470*da58b97aSjoerg bool useDS128() const { 471*da58b97aSjoerg return CIInsts && EnableDS128; 472*da58b97aSjoerg } 473*da58b97aSjoerg 474*da58b97aSjoerg /// \return If target supports ds_read/write_b96/128. hasDS96AndDS128()475*da58b97aSjoerg bool hasDS96AndDS128() const { 476*da58b97aSjoerg return CIInsts; 477*da58b97aSjoerg } 478*da58b97aSjoerg 479*da58b97aSjoerg /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64 haveRoundOpsF64()480*da58b97aSjoerg bool haveRoundOpsF64() const { 481*da58b97aSjoerg return CIInsts; 482*da58b97aSjoerg } 483*da58b97aSjoerg 484*da58b97aSjoerg /// \returns If MUBUF instructions always perform range checking, even for 485*da58b97aSjoerg /// buffer resources used for private memory access. privateMemoryResourceIsRangeChecked()486*da58b97aSjoerg bool privateMemoryResourceIsRangeChecked() const { 487*da58b97aSjoerg return getGeneration() < AMDGPUSubtarget::GFX9; 488*da58b97aSjoerg } 489*da58b97aSjoerg 490*da58b97aSjoerg /// \returns If target requires PRT Struct NULL support (zero result registers 491*da58b97aSjoerg /// for sparse texture support). usePRTStrictNull()492*da58b97aSjoerg bool usePRTStrictNull() const { 493*da58b97aSjoerg return EnablePRTStrictNull; 494*da58b97aSjoerg } 495*da58b97aSjoerg hasAutoWaitcntBeforeBarrier()496*da58b97aSjoerg bool hasAutoWaitcntBeforeBarrier() const { 497*da58b97aSjoerg return AutoWaitcntBeforeBarrier; 498*da58b97aSjoerg } 499*da58b97aSjoerg hasUnalignedBufferAccess()500*da58b97aSjoerg bool hasUnalignedBufferAccess() const { 501*da58b97aSjoerg return UnalignedBufferAccess; 502*da58b97aSjoerg } 503*da58b97aSjoerg hasUnalignedBufferAccessEnabled()504*da58b97aSjoerg bool hasUnalignedBufferAccessEnabled() const { 505*da58b97aSjoerg return UnalignedBufferAccess && UnalignedAccessMode; 506*da58b97aSjoerg } 507*da58b97aSjoerg hasUnalignedDSAccess()508*da58b97aSjoerg bool hasUnalignedDSAccess() const { 509*da58b97aSjoerg return UnalignedDSAccess; 510*da58b97aSjoerg } 511*da58b97aSjoerg hasUnalignedDSAccessEnabled()512*da58b97aSjoerg bool hasUnalignedDSAccessEnabled() const { 513*da58b97aSjoerg return UnalignedDSAccess && UnalignedAccessMode; 514*da58b97aSjoerg } 515*da58b97aSjoerg hasUnalignedScratchAccess()516*da58b97aSjoerg bool hasUnalignedScratchAccess() const { 517*da58b97aSjoerg return UnalignedScratchAccess; 518*da58b97aSjoerg } 519*da58b97aSjoerg hasUnalignedAccessMode()520*da58b97aSjoerg bool hasUnalignedAccessMode() const { 521*da58b97aSjoerg return UnalignedAccessMode; 522*da58b97aSjoerg } 523*da58b97aSjoerg hasApertureRegs()524*da58b97aSjoerg bool hasApertureRegs() const { 525*da58b97aSjoerg return HasApertureRegs; 526*da58b97aSjoerg } 527*da58b97aSjoerg isTrapHandlerEnabled()528*da58b97aSjoerg bool isTrapHandlerEnabled() const { 529*da58b97aSjoerg return TrapHandler; 530*da58b97aSjoerg } 531*da58b97aSjoerg isXNACKEnabled()532*da58b97aSjoerg bool isXNACKEnabled() const { 533*da58b97aSjoerg return TargetID.isXnackOnOrAny(); 534*da58b97aSjoerg } 535*da58b97aSjoerg isTgSplitEnabled()536*da58b97aSjoerg bool isTgSplitEnabled() const { 537*da58b97aSjoerg return EnableTgSplit; 538*da58b97aSjoerg } 539*da58b97aSjoerg isCuModeEnabled()540*da58b97aSjoerg bool isCuModeEnabled() const { 541*da58b97aSjoerg return EnableCuMode; 542*da58b97aSjoerg } 543*da58b97aSjoerg hasFlatAddressSpace()544*da58b97aSjoerg bool hasFlatAddressSpace() const { 545*da58b97aSjoerg return FlatAddressSpace; 546*da58b97aSjoerg } 547*da58b97aSjoerg hasFlatScrRegister()548*da58b97aSjoerg bool hasFlatScrRegister() const { 549*da58b97aSjoerg return hasFlatAddressSpace(); 550*da58b97aSjoerg } 551*da58b97aSjoerg hasFlatInstOffsets()552*da58b97aSjoerg bool hasFlatInstOffsets() const { 553*da58b97aSjoerg return FlatInstOffsets; 554*da58b97aSjoerg } 555*da58b97aSjoerg hasFlatGlobalInsts()556*da58b97aSjoerg bool hasFlatGlobalInsts() const { 557*da58b97aSjoerg return FlatGlobalInsts; 558*da58b97aSjoerg } 559*da58b97aSjoerg hasFlatScratchInsts()560*da58b97aSjoerg bool hasFlatScratchInsts() const { 561*da58b97aSjoerg return FlatScratchInsts; 562*da58b97aSjoerg } 563*da58b97aSjoerg 564*da58b97aSjoerg // Check if target supports ST addressing mode with FLAT scratch instructions. 565*da58b97aSjoerg // The ST addressing mode means no registers are used, either VGPR or SGPR, 566*da58b97aSjoerg // but only immediate offset is swizzled and added to the FLAT scratch base. hasFlatScratchSTMode()567*da58b97aSjoerg bool hasFlatScratchSTMode() const { 568*da58b97aSjoerg return hasFlatScratchInsts() && hasGFX10_3Insts(); 569*da58b97aSjoerg } 570*da58b97aSjoerg hasScalarFlatScratchInsts()571*da58b97aSjoerg bool hasScalarFlatScratchInsts() const { 572*da58b97aSjoerg return ScalarFlatScratchInsts; 573*da58b97aSjoerg } 574*da58b97aSjoerg hasGlobalAddTidInsts()575*da58b97aSjoerg bool hasGlobalAddTidInsts() const { 576*da58b97aSjoerg return GFX10_BEncoding; 577*da58b97aSjoerg } 578*da58b97aSjoerg hasAtomicCSub()579*da58b97aSjoerg bool hasAtomicCSub() const { 580*da58b97aSjoerg return GFX10_BEncoding; 581*da58b97aSjoerg } 582*da58b97aSjoerg hasMultiDwordFlatScratchAddressing()583*da58b97aSjoerg bool hasMultiDwordFlatScratchAddressing() const { 584*da58b97aSjoerg return getGeneration() >= GFX9; 585*da58b97aSjoerg } 586*da58b97aSjoerg hasFlatSegmentOffsetBug()587*da58b97aSjoerg bool hasFlatSegmentOffsetBug() const { 588*da58b97aSjoerg return HasFlatSegmentOffsetBug; 589*da58b97aSjoerg } 590*da58b97aSjoerg hasFlatLgkmVMemCountInOrder()591*da58b97aSjoerg bool hasFlatLgkmVMemCountInOrder() const { 592*da58b97aSjoerg return getGeneration() > GFX9; 593*da58b97aSjoerg } 594*da58b97aSjoerg hasD16LoadStore()595*da58b97aSjoerg bool hasD16LoadStore() const { 596*da58b97aSjoerg return getGeneration() >= GFX9; 597*da58b97aSjoerg } 598*da58b97aSjoerg d16PreservesUnusedBits()599*da58b97aSjoerg bool d16PreservesUnusedBits() const { 600*da58b97aSjoerg return hasD16LoadStore() && !TargetID.isSramEccOnOrAny(); 601*da58b97aSjoerg } 602*da58b97aSjoerg hasD16Images()603*da58b97aSjoerg bool hasD16Images() const { 604*da58b97aSjoerg return getGeneration() >= VOLCANIC_ISLANDS; 605*da58b97aSjoerg } 606*da58b97aSjoerg 607*da58b97aSjoerg /// Return if most LDS instructions have an m0 use that require m0 to be 608*da58b97aSjoerg /// iniitalized. ldsRequiresM0Init()609*da58b97aSjoerg bool ldsRequiresM0Init() const { 610*da58b97aSjoerg return getGeneration() < GFX9; 611*da58b97aSjoerg } 612*da58b97aSjoerg 613*da58b97aSjoerg // True if the hardware rewinds and replays GWS operations if a wave is 614*da58b97aSjoerg // preempted. 615*da58b97aSjoerg // 616*da58b97aSjoerg // If this is false, a GWS operation requires testing if a nack set the 617*da58b97aSjoerg // MEM_VIOL bit, and repeating if so. hasGWSAutoReplay()618*da58b97aSjoerg bool hasGWSAutoReplay() const { 619*da58b97aSjoerg return getGeneration() >= GFX9; 620*da58b97aSjoerg } 621*da58b97aSjoerg 622*da58b97aSjoerg /// \returns if target has ds_gws_sema_release_all instruction. hasGWSSemaReleaseAll()623*da58b97aSjoerg bool hasGWSSemaReleaseAll() const { 624*da58b97aSjoerg return CIInsts; 625*da58b97aSjoerg } 626*da58b97aSjoerg 627*da58b97aSjoerg /// \returns true if the target has integer add/sub instructions that do not 628*da58b97aSjoerg /// produce a carry-out. This includes v_add_[iu]32, v_sub_[iu]32, 629*da58b97aSjoerg /// v_add_[iu]16, and v_sub_[iu]16, all of which support the clamp modifier 630*da58b97aSjoerg /// for saturation. hasAddNoCarry()631*da58b97aSjoerg bool hasAddNoCarry() const { 632*da58b97aSjoerg return AddNoCarryInsts; 633*da58b97aSjoerg } 634*da58b97aSjoerg hasUnpackedD16VMem()635*da58b97aSjoerg bool hasUnpackedD16VMem() const { 636*da58b97aSjoerg return HasUnpackedD16VMem; 637*da58b97aSjoerg } 638*da58b97aSjoerg 639*da58b97aSjoerg // Covers VS/PS/CS graphics shaders isMesaGfxShader(const Function & F)640*da58b97aSjoerg bool isMesaGfxShader(const Function &F) const { 641*da58b97aSjoerg return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv()); 642*da58b97aSjoerg } 643*da58b97aSjoerg hasMad64_32()644*da58b97aSjoerg bool hasMad64_32() const { 645*da58b97aSjoerg return getGeneration() >= SEA_ISLANDS; 646*da58b97aSjoerg } 647*da58b97aSjoerg hasSDWAOmod()648*da58b97aSjoerg bool hasSDWAOmod() const { 649*da58b97aSjoerg return HasSDWAOmod; 650*da58b97aSjoerg } 651*da58b97aSjoerg hasSDWAScalar()652*da58b97aSjoerg bool hasSDWAScalar() const { 653*da58b97aSjoerg return HasSDWAScalar; 654*da58b97aSjoerg } 655*da58b97aSjoerg hasSDWASdst()656*da58b97aSjoerg bool hasSDWASdst() const { 657*da58b97aSjoerg return HasSDWASdst; 658*da58b97aSjoerg } 659*da58b97aSjoerg hasSDWAMac()660*da58b97aSjoerg bool hasSDWAMac() const { 661*da58b97aSjoerg return HasSDWAMac; 662*da58b97aSjoerg } 663*da58b97aSjoerg hasSDWAOutModsVOPC()664*da58b97aSjoerg bool hasSDWAOutModsVOPC() const { 665*da58b97aSjoerg return HasSDWAOutModsVOPC; 666*da58b97aSjoerg } 667*da58b97aSjoerg hasDLInsts()668*da58b97aSjoerg bool hasDLInsts() const { 669*da58b97aSjoerg return HasDLInsts; 670*da58b97aSjoerg } 671*da58b97aSjoerg hasDot1Insts()672*da58b97aSjoerg bool hasDot1Insts() const { 673*da58b97aSjoerg return HasDot1Insts; 674*da58b97aSjoerg } 675*da58b97aSjoerg hasDot2Insts()676*da58b97aSjoerg bool hasDot2Insts() const { 677*da58b97aSjoerg return HasDot2Insts; 678*da58b97aSjoerg } 679*da58b97aSjoerg hasDot3Insts()680*da58b97aSjoerg bool hasDot3Insts() const { 681*da58b97aSjoerg return HasDot3Insts; 682*da58b97aSjoerg } 683*da58b97aSjoerg hasDot4Insts()684*da58b97aSjoerg bool hasDot4Insts() const { 685*da58b97aSjoerg return HasDot4Insts; 686*da58b97aSjoerg } 687*da58b97aSjoerg hasDot5Insts()688*da58b97aSjoerg bool hasDot5Insts() const { 689*da58b97aSjoerg return HasDot5Insts; 690*da58b97aSjoerg } 691*da58b97aSjoerg hasDot6Insts()692*da58b97aSjoerg bool hasDot6Insts() const { 693*da58b97aSjoerg return HasDot6Insts; 694*da58b97aSjoerg } 695*da58b97aSjoerg hasDot7Insts()696*da58b97aSjoerg bool hasDot7Insts() const { 697*da58b97aSjoerg return HasDot7Insts; 698*da58b97aSjoerg } 699*da58b97aSjoerg hasMAIInsts()700*da58b97aSjoerg bool hasMAIInsts() const { 701*da58b97aSjoerg return HasMAIInsts; 702*da58b97aSjoerg } 703*da58b97aSjoerg hasPkFmacF16Inst()704*da58b97aSjoerg bool hasPkFmacF16Inst() const { 705*da58b97aSjoerg return HasPkFmacF16Inst; 706*da58b97aSjoerg } 707*da58b97aSjoerg hasAtomicFaddInsts()708*da58b97aSjoerg bool hasAtomicFaddInsts() const { 709*da58b97aSjoerg return HasAtomicFaddInsts; 710*da58b97aSjoerg } 711*da58b97aSjoerg hasNoSdstCMPX()712*da58b97aSjoerg bool hasNoSdstCMPX() const { 713*da58b97aSjoerg return HasNoSdstCMPX; 714*da58b97aSjoerg } 715*da58b97aSjoerg hasVscnt()716*da58b97aSjoerg bool hasVscnt() const { 717*da58b97aSjoerg return HasVscnt; 718*da58b97aSjoerg } 719*da58b97aSjoerg hasGetWaveIdInst()720*da58b97aSjoerg bool hasGetWaveIdInst() const { 721*da58b97aSjoerg return HasGetWaveIdInst; 722*da58b97aSjoerg } 723*da58b97aSjoerg hasSMemTimeInst()724*da58b97aSjoerg bool hasSMemTimeInst() const { 725*da58b97aSjoerg return HasSMemTimeInst; 726*da58b97aSjoerg } 727*da58b97aSjoerg hasShaderCyclesRegister()728*da58b97aSjoerg bool hasShaderCyclesRegister() const { 729*da58b97aSjoerg return HasShaderCyclesRegister; 730*da58b97aSjoerg } 731*da58b97aSjoerg hasRegisterBanking()732*da58b97aSjoerg bool hasRegisterBanking() const { 733*da58b97aSjoerg return HasRegisterBanking; 734*da58b97aSjoerg } 735*da58b97aSjoerg hasVOP3Literal()736*da58b97aSjoerg bool hasVOP3Literal() const { 737*da58b97aSjoerg return HasVOP3Literal; 738*da58b97aSjoerg } 739*da58b97aSjoerg hasNoDataDepHazard()740*da58b97aSjoerg bool hasNoDataDepHazard() const { 741*da58b97aSjoerg return HasNoDataDepHazard; 742*da58b97aSjoerg } 743*da58b97aSjoerg vmemWriteNeedsExpWaitcnt()744*da58b97aSjoerg bool vmemWriteNeedsExpWaitcnt() const { 745*da58b97aSjoerg return getGeneration() < SEA_ISLANDS; 746*da58b97aSjoerg } 747*da58b97aSjoerg 748*da58b97aSjoerg // Scratch is allocated in 256 dword per wave blocks for the entire 749*da58b97aSjoerg // wavefront. When viewed from the perspecive of an arbitrary workitem, this 750*da58b97aSjoerg // is 4-byte aligned. 751*da58b97aSjoerg // 752*da58b97aSjoerg // Only 4-byte alignment is really needed to access anything. Transformations 753*da58b97aSjoerg // on the pointer value itself may rely on the alignment / known low bits of 754*da58b97aSjoerg // the pointer. Set this to something above the minimum to avoid needing 755*da58b97aSjoerg // dynamic realignment in common cases. getStackAlignment()756*da58b97aSjoerg Align getStackAlignment() const { return Align(16); } 757*da58b97aSjoerg enableMachineScheduler()758*da58b97aSjoerg bool enableMachineScheduler() const override { 759*da58b97aSjoerg return true; 760*da58b97aSjoerg } 761*da58b97aSjoerg 762*da58b97aSjoerg bool useAA() const override; 763*da58b97aSjoerg enableSubRegLiveness()764*da58b97aSjoerg bool enableSubRegLiveness() const override { 765*da58b97aSjoerg return true; 766*da58b97aSjoerg } 767*da58b97aSjoerg setScalarizeGlobalBehavior(bool b)768*da58b97aSjoerg void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; } getScalarizeGlobalBehavior()769*da58b97aSjoerg bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; } 770*da58b97aSjoerg 771*da58b97aSjoerg // static wrappers 772*da58b97aSjoerg static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI); 773*da58b97aSjoerg 774*da58b97aSjoerg // XXX - Why is this here if it isn't in the default pass set? enableEarlyIfConversion()775*da58b97aSjoerg bool enableEarlyIfConversion() const override { 776*da58b97aSjoerg return true; 777*da58b97aSjoerg } 778*da58b97aSjoerg 779*da58b97aSjoerg bool enableFlatScratch() const; 780*da58b97aSjoerg 781*da58b97aSjoerg void overrideSchedPolicy(MachineSchedPolicy &Policy, 782*da58b97aSjoerg unsigned NumRegionInstrs) const override; 783*da58b97aSjoerg getMaxNumUserSGPRs()784*da58b97aSjoerg unsigned getMaxNumUserSGPRs() const { 785*da58b97aSjoerg return 16; 786*da58b97aSjoerg } 787*da58b97aSjoerg hasSMemRealTime()788*da58b97aSjoerg bool hasSMemRealTime() const { 789*da58b97aSjoerg return HasSMemRealTime; 790*da58b97aSjoerg } 791*da58b97aSjoerg hasMovrel()792*da58b97aSjoerg bool hasMovrel() const { 793*da58b97aSjoerg return HasMovrel; 794*da58b97aSjoerg } 795*da58b97aSjoerg hasVGPRIndexMode()796*da58b97aSjoerg bool hasVGPRIndexMode() const { 797*da58b97aSjoerg return HasVGPRIndexMode; 798*da58b97aSjoerg } 799*da58b97aSjoerg 800*da58b97aSjoerg bool useVGPRIndexMode() const; 801*da58b97aSjoerg hasScalarCompareEq64()802*da58b97aSjoerg bool hasScalarCompareEq64() const { 803*da58b97aSjoerg return getGeneration() >= VOLCANIC_ISLANDS; 804*da58b97aSjoerg } 805*da58b97aSjoerg hasScalarStores()806*da58b97aSjoerg bool hasScalarStores() const { 807*da58b97aSjoerg return HasScalarStores; 808*da58b97aSjoerg } 809*da58b97aSjoerg hasScalarAtomics()810*da58b97aSjoerg bool hasScalarAtomics() const { 811*da58b97aSjoerg return HasScalarAtomics; 812*da58b97aSjoerg } 813*da58b97aSjoerg hasLDSFPAtomics()814*da58b97aSjoerg bool hasLDSFPAtomics() const { 815*da58b97aSjoerg return GFX8Insts; 816*da58b97aSjoerg } 817*da58b97aSjoerg 818*da58b97aSjoerg /// \returns true if the subtarget has the v_permlanex16_b32 instruction. hasPermLaneX16()819*da58b97aSjoerg bool hasPermLaneX16() const { return getGeneration() >= GFX10; } 820*da58b97aSjoerg hasDPP()821*da58b97aSjoerg bool hasDPP() const { 822*da58b97aSjoerg return HasDPP; 823*da58b97aSjoerg } 824*da58b97aSjoerg hasDPPBroadcasts()825*da58b97aSjoerg bool hasDPPBroadcasts() const { 826*da58b97aSjoerg return HasDPP && getGeneration() < GFX10; 827*da58b97aSjoerg } 828*da58b97aSjoerg hasDPPWavefrontShifts()829*da58b97aSjoerg bool hasDPPWavefrontShifts() const { 830*da58b97aSjoerg return HasDPP && getGeneration() < GFX10; 831*da58b97aSjoerg } 832*da58b97aSjoerg hasDPP8()833*da58b97aSjoerg bool hasDPP8() const { 834*da58b97aSjoerg return HasDPP8; 835*da58b97aSjoerg } 836*da58b97aSjoerg has64BitDPP()837*da58b97aSjoerg bool has64BitDPP() const { 838*da58b97aSjoerg return Has64BitDPP; 839*da58b97aSjoerg } 840*da58b97aSjoerg hasPackedFP32Ops()841*da58b97aSjoerg bool hasPackedFP32Ops() const { 842*da58b97aSjoerg return HasPackedFP32Ops; 843*da58b97aSjoerg } 844*da58b97aSjoerg hasFmaakFmamkF32Insts()845*da58b97aSjoerg bool hasFmaakFmamkF32Insts() const { 846*da58b97aSjoerg return getGeneration() >= GFX10; 847*da58b97aSjoerg } 848*da58b97aSjoerg hasExtendedImageInsts()849*da58b97aSjoerg bool hasExtendedImageInsts() const { 850*da58b97aSjoerg return HasExtendedImageInsts; 851*da58b97aSjoerg } 852*da58b97aSjoerg hasR128A16()853*da58b97aSjoerg bool hasR128A16() const { 854*da58b97aSjoerg return HasR128A16; 855*da58b97aSjoerg } 856*da58b97aSjoerg hasGFX10A16()857*da58b97aSjoerg bool hasGFX10A16() const { 858*da58b97aSjoerg return HasGFX10A16; 859*da58b97aSjoerg } 860*da58b97aSjoerg hasA16()861*da58b97aSjoerg bool hasA16() const { return hasR128A16() || hasGFX10A16(); } 862*da58b97aSjoerg hasG16()863*da58b97aSjoerg bool hasG16() const { return HasG16; } 864*da58b97aSjoerg hasOffset3fBug()865*da58b97aSjoerg bool hasOffset3fBug() const { 866*da58b97aSjoerg return HasOffset3fBug; 867*da58b97aSjoerg } 868*da58b97aSjoerg hasImageStoreD16Bug()869*da58b97aSjoerg bool hasImageStoreD16Bug() const { return HasImageStoreD16Bug; } 870*da58b97aSjoerg hasImageGather4D16Bug()871*da58b97aSjoerg bool hasImageGather4D16Bug() const { return HasImageGather4D16Bug; } 872*da58b97aSjoerg hasNSAEncoding()873*da58b97aSjoerg bool hasNSAEncoding() const { return HasNSAEncoding; } 874*da58b97aSjoerg hasGFX10_BEncoding()875*da58b97aSjoerg bool hasGFX10_BEncoding() const { 876*da58b97aSjoerg return GFX10_BEncoding; 877*da58b97aSjoerg } 878*da58b97aSjoerg hasGFX10_3Insts()879*da58b97aSjoerg bool hasGFX10_3Insts() const { 880*da58b97aSjoerg return GFX10_3Insts; 881*da58b97aSjoerg } 882*da58b97aSjoerg 883*da58b97aSjoerg bool hasMadF16() const; 884*da58b97aSjoerg enableSIScheduler()885*da58b97aSjoerg bool enableSIScheduler() const { 886*da58b97aSjoerg return EnableSIScheduler; 887*da58b97aSjoerg } 888*da58b97aSjoerg loadStoreOptEnabled()889*da58b97aSjoerg bool loadStoreOptEnabled() const { 890*da58b97aSjoerg return EnableLoadStoreOpt; 891*da58b97aSjoerg } 892*da58b97aSjoerg hasSGPRInitBug()893*da58b97aSjoerg bool hasSGPRInitBug() const { 894*da58b97aSjoerg return SGPRInitBug; 895*da58b97aSjoerg } 896*da58b97aSjoerg hasNegativeScratchOffsetBug()897*da58b97aSjoerg bool hasNegativeScratchOffsetBug() const { return NegativeScratchOffsetBug; } 898*da58b97aSjoerg hasNegativeUnalignedScratchOffsetBug()899*da58b97aSjoerg bool hasNegativeUnalignedScratchOffsetBug() const { 900*da58b97aSjoerg return NegativeUnalignedScratchOffsetBug; 901*da58b97aSjoerg } 902*da58b97aSjoerg hasMFMAInlineLiteralBug()903*da58b97aSjoerg bool hasMFMAInlineLiteralBug() const { 904*da58b97aSjoerg return HasMFMAInlineLiteralBug; 905*da58b97aSjoerg } 906*da58b97aSjoerg has12DWordStoreHazard()907*da58b97aSjoerg bool has12DWordStoreHazard() const { 908*da58b97aSjoerg return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS; 909*da58b97aSjoerg } 910*da58b97aSjoerg 911*da58b97aSjoerg // \returns true if the subtarget supports DWORDX3 load/store instructions. hasDwordx3LoadStores()912*da58b97aSjoerg bool hasDwordx3LoadStores() const { 913*da58b97aSjoerg return CIInsts; 914*da58b97aSjoerg } 915*da58b97aSjoerg hasReadM0MovRelInterpHazard()916*da58b97aSjoerg bool hasReadM0MovRelInterpHazard() const { 917*da58b97aSjoerg return getGeneration() == AMDGPUSubtarget::GFX9; 918*da58b97aSjoerg } 919*da58b97aSjoerg hasReadM0SendMsgHazard()920*da58b97aSjoerg bool hasReadM0SendMsgHazard() const { 921*da58b97aSjoerg return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS && 922*da58b97aSjoerg getGeneration() <= AMDGPUSubtarget::GFX9; 923*da58b97aSjoerg } 924*da58b97aSjoerg hasVcmpxPermlaneHazard()925*da58b97aSjoerg bool hasVcmpxPermlaneHazard() const { 926*da58b97aSjoerg return HasVcmpxPermlaneHazard; 927*da58b97aSjoerg } 928*da58b97aSjoerg hasVMEMtoScalarWriteHazard()929*da58b97aSjoerg bool hasVMEMtoScalarWriteHazard() const { 930*da58b97aSjoerg return HasVMEMtoScalarWriteHazard; 931*da58b97aSjoerg } 932*da58b97aSjoerg hasSMEMtoVectorWriteHazard()933*da58b97aSjoerg bool hasSMEMtoVectorWriteHazard() const { 934*da58b97aSjoerg return HasSMEMtoVectorWriteHazard; 935*da58b97aSjoerg } 936*da58b97aSjoerg hasLDSMisalignedBug()937*da58b97aSjoerg bool hasLDSMisalignedBug() const { 938*da58b97aSjoerg return LDSMisalignedBug && !EnableCuMode; 939*da58b97aSjoerg } 940*da58b97aSjoerg hasInstFwdPrefetchBug()941*da58b97aSjoerg bool hasInstFwdPrefetchBug() const { 942*da58b97aSjoerg return HasInstFwdPrefetchBug; 943*da58b97aSjoerg } 944*da58b97aSjoerg hasVcmpxExecWARHazard()945*da58b97aSjoerg bool hasVcmpxExecWARHazard() const { 946*da58b97aSjoerg return HasVcmpxExecWARHazard; 947*da58b97aSjoerg } 948*da58b97aSjoerg hasLdsBranchVmemWARHazard()949*da58b97aSjoerg bool hasLdsBranchVmemWARHazard() const { 950*da58b97aSjoerg return HasLdsBranchVmemWARHazard; 951*da58b97aSjoerg } 952*da58b97aSjoerg hasNSAtoVMEMBug()953*da58b97aSjoerg bool hasNSAtoVMEMBug() const { 954*da58b97aSjoerg return HasNSAtoVMEMBug; 955*da58b97aSjoerg } 956*da58b97aSjoerg hasNSAClauseBug()957*da58b97aSjoerg bool hasNSAClauseBug() const { return HasNSAClauseBug; } 958*da58b97aSjoerg hasHardClauses()959*da58b97aSjoerg bool hasHardClauses() const { return getGeneration() >= GFX10; } 960*da58b97aSjoerg hasGFX90AInsts()961*da58b97aSjoerg bool hasGFX90AInsts() const { return GFX90AInsts; } 962*da58b97aSjoerg 963*da58b97aSjoerg /// Return if operations acting on VGPR tuples require even alignment. needsAlignedVGPRs()964*da58b97aSjoerg bool needsAlignedVGPRs() const { return GFX90AInsts; } 965*da58b97aSjoerg hasPackedTID()966*da58b97aSjoerg bool hasPackedTID() const { return HasPackedTID; } 967*da58b97aSjoerg 968*da58b97aSjoerg /// Return the maximum number of waves per SIMD for kernels using \p SGPRs 969*da58b97aSjoerg /// SGPRs 970*da58b97aSjoerg unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const; 971*da58b97aSjoerg 972*da58b97aSjoerg /// Return the maximum number of waves per SIMD for kernels using \p VGPRs 973*da58b97aSjoerg /// VGPRs 974*da58b97aSjoerg unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const; 975*da58b97aSjoerg 976*da58b97aSjoerg /// Return occupancy for the given function. Used LDS and a number of 977*da58b97aSjoerg /// registers if provided. 978*da58b97aSjoerg /// Note, occupancy can be affected by the scratch allocation as well, but 979*da58b97aSjoerg /// we do not have enough information to compute it. 980*da58b97aSjoerg unsigned computeOccupancy(const Function &F, unsigned LDSSize = 0, 981*da58b97aSjoerg unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const; 982*da58b97aSjoerg 983*da58b97aSjoerg /// \returns true if the flat_scratch register should be initialized with the 984*da58b97aSjoerg /// pointer to the wave's scratch memory rather than a size and offset. flatScratchIsPointer()985*da58b97aSjoerg bool flatScratchIsPointer() const { 986*da58b97aSjoerg return getGeneration() >= AMDGPUSubtarget::GFX9; 987*da58b97aSjoerg } 988*da58b97aSjoerg 989*da58b97aSjoerg /// \returns true if the flat_scratch register is initialized by the HW. 990*da58b97aSjoerg /// In this case it is readonly. flatScratchIsArchitected()991*da58b97aSjoerg bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; } 992*da58b97aSjoerg 993*da58b97aSjoerg /// \returns true if the machine has merged shaders in which s0-s7 are 994*da58b97aSjoerg /// reserved by the hardware and user SGPRs start at s8 hasMergedShaders()995*da58b97aSjoerg bool hasMergedShaders() const { 996*da58b97aSjoerg return getGeneration() >= GFX9; 997*da58b97aSjoerg } 998*da58b97aSjoerg 999*da58b97aSjoerg /// \returns SGPR allocation granularity supported by the subtarget. getSGPRAllocGranule()1000*da58b97aSjoerg unsigned getSGPRAllocGranule() const { 1001*da58b97aSjoerg return AMDGPU::IsaInfo::getSGPRAllocGranule(this); 1002*da58b97aSjoerg } 1003*da58b97aSjoerg 1004*da58b97aSjoerg /// \returns SGPR encoding granularity supported by the subtarget. getSGPREncodingGranule()1005*da58b97aSjoerg unsigned getSGPREncodingGranule() const { 1006*da58b97aSjoerg return AMDGPU::IsaInfo::getSGPREncodingGranule(this); 1007*da58b97aSjoerg } 1008*da58b97aSjoerg 1009*da58b97aSjoerg /// \returns Total number of SGPRs supported by the subtarget. getTotalNumSGPRs()1010*da58b97aSjoerg unsigned getTotalNumSGPRs() const { 1011*da58b97aSjoerg return AMDGPU::IsaInfo::getTotalNumSGPRs(this); 1012*da58b97aSjoerg } 1013*da58b97aSjoerg 1014*da58b97aSjoerg /// \returns Addressable number of SGPRs supported by the subtarget. getAddressableNumSGPRs()1015*da58b97aSjoerg unsigned getAddressableNumSGPRs() const { 1016*da58b97aSjoerg return AMDGPU::IsaInfo::getAddressableNumSGPRs(this); 1017*da58b97aSjoerg } 1018*da58b97aSjoerg 1019*da58b97aSjoerg /// \returns Minimum number of SGPRs that meets the given number of waves per 1020*da58b97aSjoerg /// execution unit requirement supported by the subtarget. getMinNumSGPRs(unsigned WavesPerEU)1021*da58b97aSjoerg unsigned getMinNumSGPRs(unsigned WavesPerEU) const { 1022*da58b97aSjoerg return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU); 1023*da58b97aSjoerg } 1024*da58b97aSjoerg 1025*da58b97aSjoerg /// \returns Maximum number of SGPRs that meets the given number of waves per 1026*da58b97aSjoerg /// execution unit requirement supported by the subtarget. getMaxNumSGPRs(unsigned WavesPerEU,bool Addressable)1027*da58b97aSjoerg unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const { 1028*da58b97aSjoerg return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable); 1029*da58b97aSjoerg } 1030*da58b97aSjoerg 1031*da58b97aSjoerg /// \returns Reserved number of SGPRs for given function \p MF. 1032*da58b97aSjoerg unsigned getReservedNumSGPRs(const MachineFunction &MF) const; 1033*da58b97aSjoerg 1034*da58b97aSjoerg /// \returns Maximum number of SGPRs that meets number of waves per execution 1035*da58b97aSjoerg /// unit requirement for function \p MF, or number of SGPRs explicitly 1036*da58b97aSjoerg /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF. 1037*da58b97aSjoerg /// 1038*da58b97aSjoerg /// \returns Value that meets number of waves per execution unit requirement 1039*da58b97aSjoerg /// if explicitly requested value cannot be converted to integer, violates 1040*da58b97aSjoerg /// subtarget's specifications, or does not meet number of waves per execution 1041*da58b97aSjoerg /// unit requirement. 1042*da58b97aSjoerg unsigned getMaxNumSGPRs(const MachineFunction &MF) const; 1043*da58b97aSjoerg 1044*da58b97aSjoerg /// \returns VGPR allocation granularity supported by the subtarget. getVGPRAllocGranule()1045*da58b97aSjoerg unsigned getVGPRAllocGranule() const { 1046*da58b97aSjoerg return AMDGPU::IsaInfo::getVGPRAllocGranule(this); 1047*da58b97aSjoerg } 1048*da58b97aSjoerg 1049*da58b97aSjoerg /// \returns VGPR encoding granularity supported by the subtarget. getVGPREncodingGranule()1050*da58b97aSjoerg unsigned getVGPREncodingGranule() const { 1051*da58b97aSjoerg return AMDGPU::IsaInfo::getVGPREncodingGranule(this); 1052*da58b97aSjoerg } 1053*da58b97aSjoerg 1054*da58b97aSjoerg /// \returns Total number of VGPRs supported by the subtarget. getTotalNumVGPRs()1055*da58b97aSjoerg unsigned getTotalNumVGPRs() const { 1056*da58b97aSjoerg return AMDGPU::IsaInfo::getTotalNumVGPRs(this); 1057*da58b97aSjoerg } 1058*da58b97aSjoerg 1059*da58b97aSjoerg /// \returns Addressable number of VGPRs supported by the subtarget. getAddressableNumVGPRs()1060*da58b97aSjoerg unsigned getAddressableNumVGPRs() const { 1061*da58b97aSjoerg return AMDGPU::IsaInfo::getAddressableNumVGPRs(this); 1062*da58b97aSjoerg } 1063*da58b97aSjoerg 1064*da58b97aSjoerg /// \returns Minimum number of VGPRs that meets given number of waves per 1065*da58b97aSjoerg /// execution unit requirement supported by the subtarget. getMinNumVGPRs(unsigned WavesPerEU)1066*da58b97aSjoerg unsigned getMinNumVGPRs(unsigned WavesPerEU) const { 1067*da58b97aSjoerg return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU); 1068*da58b97aSjoerg } 1069*da58b97aSjoerg 1070*da58b97aSjoerg /// \returns Maximum number of VGPRs that meets given number of waves per 1071*da58b97aSjoerg /// execution unit requirement supported by the subtarget. getMaxNumVGPRs(unsigned WavesPerEU)1072*da58b97aSjoerg unsigned getMaxNumVGPRs(unsigned WavesPerEU) const { 1073*da58b97aSjoerg return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU); 1074*da58b97aSjoerg } 1075*da58b97aSjoerg 1076*da58b97aSjoerg /// \returns Maximum number of VGPRs that meets number of waves per execution 1077*da58b97aSjoerg /// unit requirement for function \p MF, or number of VGPRs explicitly 1078*da58b97aSjoerg /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF. 1079*da58b97aSjoerg /// 1080*da58b97aSjoerg /// \returns Value that meets number of waves per execution unit requirement 1081*da58b97aSjoerg /// if explicitly requested value cannot be converted to integer, violates 1082*da58b97aSjoerg /// subtarget's specifications, or does not meet number of waves per execution 1083*da58b97aSjoerg /// unit requirement. 1084*da58b97aSjoerg unsigned getMaxNumVGPRs(const MachineFunction &MF) const; 1085*da58b97aSjoerg 1086*da58b97aSjoerg void getPostRAMutations( 1087*da58b97aSjoerg std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) 1088*da58b97aSjoerg const override; 1089*da58b97aSjoerg isWave32()1090*da58b97aSjoerg bool isWave32() const { 1091*da58b97aSjoerg return getWavefrontSize() == 32; 1092*da58b97aSjoerg } 1093*da58b97aSjoerg isWave64()1094*da58b97aSjoerg bool isWave64() const { 1095*da58b97aSjoerg return getWavefrontSize() == 64; 1096*da58b97aSjoerg } 1097*da58b97aSjoerg getBoolRC()1098*da58b97aSjoerg const TargetRegisterClass *getBoolRC() const { 1099*da58b97aSjoerg return getRegisterInfo()->getBoolRC(); 1100*da58b97aSjoerg } 1101*da58b97aSjoerg 1102*da58b97aSjoerg /// \returns Maximum number of work groups per compute unit supported by the 1103*da58b97aSjoerg /// subtarget and limited by given \p FlatWorkGroupSize. getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize)1104*da58b97aSjoerg unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override { 1105*da58b97aSjoerg return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize); 1106*da58b97aSjoerg } 1107*da58b97aSjoerg 1108*da58b97aSjoerg /// \returns Minimum flat work group size supported by the subtarget. getMinFlatWorkGroupSize()1109*da58b97aSjoerg unsigned getMinFlatWorkGroupSize() const override { 1110*da58b97aSjoerg return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this); 1111*da58b97aSjoerg } 1112*da58b97aSjoerg 1113*da58b97aSjoerg /// \returns Maximum flat work group size supported by the subtarget. getMaxFlatWorkGroupSize()1114*da58b97aSjoerg unsigned getMaxFlatWorkGroupSize() const override { 1115*da58b97aSjoerg return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this); 1116*da58b97aSjoerg } 1117*da58b97aSjoerg 1118*da58b97aSjoerg /// \returns Number of waves per execution unit required to support the given 1119*da58b97aSjoerg /// \p FlatWorkGroupSize. 1120*da58b97aSjoerg unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize)1121*da58b97aSjoerg getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override { 1122*da58b97aSjoerg return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(this, FlatWorkGroupSize); 1123*da58b97aSjoerg } 1124*da58b97aSjoerg 1125*da58b97aSjoerg /// \returns Minimum number of waves per execution unit supported by the 1126*da58b97aSjoerg /// subtarget. getMinWavesPerEU()1127*da58b97aSjoerg unsigned getMinWavesPerEU() const override { 1128*da58b97aSjoerg return AMDGPU::IsaInfo::getMinWavesPerEU(this); 1129*da58b97aSjoerg } 1130*da58b97aSjoerg 1131*da58b97aSjoerg void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, 1132*da58b97aSjoerg SDep &Dep) const override; 1133*da58b97aSjoerg }; 1134*da58b97aSjoerg 1135*da58b97aSjoerg } // end namespace llvm 1136*da58b97aSjoerg 1137*da58b97aSjoerg #endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H 1138