1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
29 class AMDGPUSubtarget {
30 public:
31   enum Generation {
32     INVALID = 0,
33     R600 = 1,
34     R700 = 2,
35     EVERGREEN = 3,
36     NORTHERN_ISLANDS = 4,
37     SOUTHERN_ISLANDS = 5,
38     SEA_ISLANDS = 6,
39     VOLCANIC_ISLANDS = 7,
40     GFX9 = 8,
41     GFX10 = 9
42   };
43 
44 private:
45   Triple TargetTriple;
46 
47 protected:
48   bool GCN3Encoding;
49   bool Has16BitInsts;
50   bool HasMadMixInsts;
51   bool HasMadMacF32Insts;
52   bool HasDsSrc2Insts;
53   bool HasSDWA;
54   bool HasVOP3PInsts;
55   bool HasMulI24;
56   bool HasMulU24;
57   bool HasInv2PiInlineImm;
58   bool HasFminFmaxLegacy;
59   bool EnablePromoteAlloca;
60   bool HasTrigReducedRange;
61   unsigned MaxWavesPerEU;
62   unsigned LocalMemorySize;
63   char WavefrontSizeLog2;
64 
65 public:
66   AMDGPUSubtarget(const Triple &TT);
67 
68   static const AMDGPUSubtarget &get(const MachineFunction &MF);
69   static const AMDGPUSubtarget &get(const TargetMachine &TM,
70                                     const Function &F);
71 
72   /// \returns Default range flat work group size for a calling convention.
73   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
74 
75   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
76   /// for function \p F, or minimum/maximum flat work group sizes explicitly
77   /// requested using "amdgpu-flat-work-group-size" attribute attached to
78   /// function \p F.
79   ///
80   /// \returns Subtarget's default values if explicitly requested values cannot
81   /// be converted to integer, or violate subtarget's specifications.
82   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
83 
84   /// \returns Subtarget's default pair of minimum/maximum number of waves per
85   /// execution unit for function \p F, or minimum/maximum number of waves per
86   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
87   /// attached to function \p F.
88   ///
89   /// \returns Subtarget's default values if explicitly requested values cannot
90   /// be converted to integer, violate subtarget's specifications, or are not
91   /// compatible with minimum/maximum number of waves limited by flat work group
92   /// size, register usage, and/or lds usage.
93   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
94 
95   /// Return the amount of LDS that can be used that will not restrict the
96   /// occupancy lower than WaveCount.
97   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
98                                            const Function &) const;
99 
100   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
101   /// the given LDS memory size is the only constraint.
102   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
103 
104   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
105 
isAmdHsaOS()106   bool isAmdHsaOS() const {
107     return TargetTriple.getOS() == Triple::AMDHSA;
108   }
109 
isAmdPalOS()110   bool isAmdPalOS() const {
111     return TargetTriple.getOS() == Triple::AMDPAL;
112   }
113 
isMesa3DOS()114   bool isMesa3DOS() const {
115     return TargetTriple.getOS() == Triple::Mesa3D;
116   }
117 
118   bool isMesaKernel(const Function &F) const;
119 
isAmdHsaOrMesa(const Function & F)120   bool isAmdHsaOrMesa(const Function &F) const {
121     return isAmdHsaOS() || isMesaKernel(F);
122   }
123 
isGCN()124   bool isGCN() const {
125     return TargetTriple.getArch() == Triple::amdgcn;
126   }
127 
isGCN3Encoding()128   bool isGCN3Encoding() const {
129     return GCN3Encoding;
130   }
131 
has16BitInsts()132   bool has16BitInsts() const {
133     return Has16BitInsts;
134   }
135 
hasMadMixInsts()136   bool hasMadMixInsts() const {
137     return HasMadMixInsts;
138   }
139 
hasMadMacF32Insts()140   bool hasMadMacF32Insts() const {
141     return HasMadMacF32Insts || !isGCN();
142   }
143 
hasDsSrc2Insts()144   bool hasDsSrc2Insts() const {
145     return HasDsSrc2Insts;
146   }
147 
hasSDWA()148   bool hasSDWA() const {
149     return HasSDWA;
150   }
151 
hasVOP3PInsts()152   bool hasVOP3PInsts() const {
153     return HasVOP3PInsts;
154   }
155 
hasMulI24()156   bool hasMulI24() const {
157     return HasMulI24;
158   }
159 
hasMulU24()160   bool hasMulU24() const {
161     return HasMulU24;
162   }
163 
hasInv2PiInlineImm()164   bool hasInv2PiInlineImm() const {
165     return HasInv2PiInlineImm;
166   }
167 
hasFminFmaxLegacy()168   bool hasFminFmaxLegacy() const {
169     return HasFminFmaxLegacy;
170   }
171 
hasTrigReducedRange()172   bool hasTrigReducedRange() const {
173     return HasTrigReducedRange;
174   }
175 
isPromoteAllocaEnabled()176   bool isPromoteAllocaEnabled() const {
177     return EnablePromoteAlloca;
178   }
179 
getWavefrontSize()180   unsigned getWavefrontSize() const {
181     return 1 << WavefrontSizeLog2;
182   }
183 
getWavefrontSizeLog2()184   unsigned getWavefrontSizeLog2() const {
185     return WavefrontSizeLog2;
186   }
187 
getLocalMemorySize()188   unsigned getLocalMemorySize() const {
189     return LocalMemorySize;
190   }
191 
getAlignmentForImplicitArgPtr()192   Align getAlignmentForImplicitArgPtr() const {
193     return isAmdHsaOS() ? Align(8) : Align(4);
194   }
195 
196   /// Returns the offset in bytes from the start of the input buffer
197   ///        of the first explicit kernel argument.
getExplicitKernelArgOffset(const Function & F)198   unsigned getExplicitKernelArgOffset(const Function &F) const {
199     return isAmdHsaOrMesa(F) ? 0 : 36;
200   }
201 
202   /// \returns Maximum number of work groups per compute unit supported by the
203   /// subtarget and limited by given \p FlatWorkGroupSize.
204   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
205 
206   /// \returns Minimum flat work group size supported by the subtarget.
207   virtual unsigned getMinFlatWorkGroupSize() const = 0;
208 
209   /// \returns Maximum flat work group size supported by the subtarget.
210   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
211 
212   /// \returns Number of waves per execution unit required to support the given
213   /// \p FlatWorkGroupSize.
214   virtual unsigned
215   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
216 
217   /// \returns Minimum number of waves per execution unit supported by the
218   /// subtarget.
219   virtual unsigned getMinWavesPerEU() const = 0;
220 
221   /// \returns Maximum number of waves per execution unit supported by the
222   /// subtarget without any kind of limitation.
getMaxWavesPerEU()223   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
224 
225   /// Return the maximum workitem ID value in the function, for the given (0, 1,
226   /// 2) dimension.
227   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
228 
229   /// Creates value range metadata on an workitemid.* intrinsic call or load.
230   bool makeLIDRangeMetadata(Instruction *I) const;
231 
232   /// \returns Number of bytes of arguments that are passed to a shader or
233   /// kernel in addition to the explicit ones declared for the function.
234   unsigned getImplicitArgNumBytes(const Function &F) const;
235   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
236   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
237 
238   /// \returns Corresponsing DWARF register number mapping flavour for the
239   /// \p WavefrontSize.
240   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
241 
~AMDGPUSubtarget()242   virtual ~AMDGPUSubtarget() {}
243 };
244 
245 } // end namespace llvm
246 
247 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
248