1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Base class for AMDGPU specific classes of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 16 17 #include "llvm/ADT/Triple.h" 18 #include "llvm/IR/CallingConv.h" 19 #include "llvm/Support/Alignment.h" 20 21 namespace llvm { 22 23 enum AMDGPUDwarfFlavour : unsigned; 24 class Function; 25 class Instruction; 26 class MachineFunction; 27 class TargetMachine; 28 29 class AMDGPUSubtarget { 30 public: 31 enum Generation { 32 INVALID = 0, 33 R600 = 1, 34 R700 = 2, 35 EVERGREEN = 3, 36 NORTHERN_ISLANDS = 4, 37 SOUTHERN_ISLANDS = 5, 38 SEA_ISLANDS = 6, 39 VOLCANIC_ISLANDS = 7, 40 GFX9 = 8, 41 GFX10 = 9 42 }; 43 44 private: 45 Triple TargetTriple; 46 47 protected: 48 bool GCN3Encoding; 49 bool Has16BitInsts; 50 bool HasMadMixInsts; 51 bool HasMadMacF32Insts; 52 bool HasDsSrc2Insts; 53 bool HasSDWA; 54 bool HasVOP3PInsts; 55 bool HasMulI24; 56 bool HasMulU24; 57 bool HasInv2PiInlineImm; 58 bool HasFminFmaxLegacy; 59 bool EnablePromoteAlloca; 60 bool HasTrigReducedRange; 61 unsigned MaxWavesPerEU; 62 unsigned LocalMemorySize; 63 char WavefrontSizeLog2; 64 65 public: 66 AMDGPUSubtarget(const Triple &TT); 67 68 static const AMDGPUSubtarget &get(const MachineFunction &MF); 69 static const AMDGPUSubtarget &get(const TargetMachine &TM, 70 const Function &F); 71 72 /// \returns Default range flat work group size for a calling convention. 73 std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const; 74 75 /// \returns Subtarget's default pair of minimum/maximum flat work group sizes 76 /// for function \p F, or minimum/maximum flat work group sizes explicitly 77 /// requested using "amdgpu-flat-work-group-size" attribute attached to 78 /// function \p F. 79 /// 80 /// \returns Subtarget's default values if explicitly requested values cannot 81 /// be converted to integer, or violate subtarget's specifications. 82 std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const; 83 84 /// \returns Subtarget's default pair of minimum/maximum number of waves per 85 /// execution unit for function \p F, or minimum/maximum number of waves per 86 /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute 87 /// attached to function \p F. 88 /// 89 /// \returns Subtarget's default values if explicitly requested values cannot 90 /// be converted to integer, violate subtarget's specifications, or are not 91 /// compatible with minimum/maximum number of waves limited by flat work group 92 /// size, register usage, and/or lds usage. 93 std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const; 94 95 /// Return the amount of LDS that can be used that will not restrict the 96 /// occupancy lower than WaveCount. 97 unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, 98 const Function &) const; 99 100 /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if 101 /// the given LDS memory size is the only constraint. 102 unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const; 103 104 unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const; 105 isAmdHsaOS()106 bool isAmdHsaOS() const { 107 return TargetTriple.getOS() == Triple::AMDHSA; 108 } 109 isAmdPalOS()110 bool isAmdPalOS() const { 111 return TargetTriple.getOS() == Triple::AMDPAL; 112 } 113 isMesa3DOS()114 bool isMesa3DOS() const { 115 return TargetTriple.getOS() == Triple::Mesa3D; 116 } 117 118 bool isMesaKernel(const Function &F) const; 119 isAmdHsaOrMesa(const Function & F)120 bool isAmdHsaOrMesa(const Function &F) const { 121 return isAmdHsaOS() || isMesaKernel(F); 122 } 123 isGCN()124 bool isGCN() const { 125 return TargetTriple.getArch() == Triple::amdgcn; 126 } 127 isGCN3Encoding()128 bool isGCN3Encoding() const { 129 return GCN3Encoding; 130 } 131 has16BitInsts()132 bool has16BitInsts() const { 133 return Has16BitInsts; 134 } 135 hasMadMixInsts()136 bool hasMadMixInsts() const { 137 return HasMadMixInsts; 138 } 139 hasMadMacF32Insts()140 bool hasMadMacF32Insts() const { 141 return HasMadMacF32Insts || !isGCN(); 142 } 143 hasDsSrc2Insts()144 bool hasDsSrc2Insts() const { 145 return HasDsSrc2Insts; 146 } 147 hasSDWA()148 bool hasSDWA() const { 149 return HasSDWA; 150 } 151 hasVOP3PInsts()152 bool hasVOP3PInsts() const { 153 return HasVOP3PInsts; 154 } 155 hasMulI24()156 bool hasMulI24() const { 157 return HasMulI24; 158 } 159 hasMulU24()160 bool hasMulU24() const { 161 return HasMulU24; 162 } 163 hasInv2PiInlineImm()164 bool hasInv2PiInlineImm() const { 165 return HasInv2PiInlineImm; 166 } 167 hasFminFmaxLegacy()168 bool hasFminFmaxLegacy() const { 169 return HasFminFmaxLegacy; 170 } 171 hasTrigReducedRange()172 bool hasTrigReducedRange() const { 173 return HasTrigReducedRange; 174 } 175 isPromoteAllocaEnabled()176 bool isPromoteAllocaEnabled() const { 177 return EnablePromoteAlloca; 178 } 179 getWavefrontSize()180 unsigned getWavefrontSize() const { 181 return 1 << WavefrontSizeLog2; 182 } 183 getWavefrontSizeLog2()184 unsigned getWavefrontSizeLog2() const { 185 return WavefrontSizeLog2; 186 } 187 getLocalMemorySize()188 unsigned getLocalMemorySize() const { 189 return LocalMemorySize; 190 } 191 getAlignmentForImplicitArgPtr()192 Align getAlignmentForImplicitArgPtr() const { 193 return isAmdHsaOS() ? Align(8) : Align(4); 194 } 195 196 /// Returns the offset in bytes from the start of the input buffer 197 /// of the first explicit kernel argument. getExplicitKernelArgOffset(const Function & F)198 unsigned getExplicitKernelArgOffset(const Function &F) const { 199 return isAmdHsaOrMesa(F) ? 0 : 36; 200 } 201 202 /// \returns Maximum number of work groups per compute unit supported by the 203 /// subtarget and limited by given \p FlatWorkGroupSize. 204 virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0; 205 206 /// \returns Minimum flat work group size supported by the subtarget. 207 virtual unsigned getMinFlatWorkGroupSize() const = 0; 208 209 /// \returns Maximum flat work group size supported by the subtarget. 210 virtual unsigned getMaxFlatWorkGroupSize() const = 0; 211 212 /// \returns Number of waves per execution unit required to support the given 213 /// \p FlatWorkGroupSize. 214 virtual unsigned 215 getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0; 216 217 /// \returns Minimum number of waves per execution unit supported by the 218 /// subtarget. 219 virtual unsigned getMinWavesPerEU() const = 0; 220 221 /// \returns Maximum number of waves per execution unit supported by the 222 /// subtarget without any kind of limitation. getMaxWavesPerEU()223 unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; } 224 225 /// Return the maximum workitem ID value in the function, for the given (0, 1, 226 /// 2) dimension. 227 unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const; 228 229 /// Creates value range metadata on an workitemid.* intrinsic call or load. 230 bool makeLIDRangeMetadata(Instruction *I) const; 231 232 /// \returns Number of bytes of arguments that are passed to a shader or 233 /// kernel in addition to the explicit ones declared for the function. 234 unsigned getImplicitArgNumBytes(const Function &F) const; 235 uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const; 236 unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const; 237 238 /// \returns Corresponsing DWARF register number mapping flavour for the 239 /// \p WavefrontSize. 240 AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const; 241 ~AMDGPUSubtarget()242 virtual ~AMDGPUSubtarget() {} 243 }; 244 245 } // end namespace llvm 246 247 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H 248