1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
29 class AMDGPUSubtarget {
30 public:
31   enum Generation {
32     INVALID = 0,
33     R600 = 1,
34     R700 = 2,
35     EVERGREEN = 3,
36     NORTHERN_ISLANDS = 4,
37     SOUTHERN_ISLANDS = 5,
38     SEA_ISLANDS = 6,
39     VOLCANIC_ISLANDS = 7,
40     GFX9 = 8,
41     GFX10 = 9,
42     GFX11 = 10
43   };
44 
45 private:
46   Triple TargetTriple;
47 
48 protected:
49   bool GCN3Encoding = false;
50   bool Has16BitInsts = false;
51   bool HasTrue16BitInsts = false;
52   bool HasMadMixInsts = false;
53   bool HasMadMacF32Insts = false;
54   bool HasDsSrc2Insts = false;
55   bool HasSDWA = false;
56   bool HasVOP3PInsts = false;
57   bool HasMulI24 = true;
58   bool HasMulU24 = true;
59   bool HasSMulHi = false;
60   bool HasInv2PiInlineImm = false;
61   bool HasFminFmaxLegacy = true;
62   bool EnablePromoteAlloca = false;
63   bool HasTrigReducedRange = false;
64   unsigned MaxWavesPerEU = 10;
65   unsigned LocalMemorySize = 0;
66   char WavefrontSizeLog2 = 0;
67 
68 public:
69   AMDGPUSubtarget(const Triple &TT);
70 
71   static const AMDGPUSubtarget &get(const MachineFunction &MF);
72   static const AMDGPUSubtarget &get(const TargetMachine &TM,
73                                     const Function &F);
74 
75   /// \returns Default range flat work group size for a calling convention.
76   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
77 
78   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
79   /// for function \p F, or minimum/maximum flat work group sizes explicitly
80   /// requested using "amdgpu-flat-work-group-size" attribute attached to
81   /// function \p F.
82   ///
83   /// \returns Subtarget's default values if explicitly requested values cannot
84   /// be converted to integer, or violate subtarget's specifications.
85   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
86 
87   /// \returns Subtarget's default pair of minimum/maximum number of waves per
88   /// execution unit for function \p F, or minimum/maximum number of waves per
89   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
90   /// attached to function \p F.
91   ///
92   /// \returns Subtarget's default values if explicitly requested values cannot
93   /// be converted to integer, violate subtarget's specifications, or are not
94   /// compatible with minimum/maximum number of waves limited by flat work group
95   /// size, register usage, and/or lds usage.
96   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
97     // Default/requested minimum/maximum flat work group sizes.
98     std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
99     return getWavesPerEU(F, FlatWorkGroupSizes);
100   }
101 
102   /// Overload which uses the specified values for the flat work group sizes,
103   /// rather than querying the function itself. \p FlatWorkGroupSizes Should
104   /// correspond to the function's value for getFlatWorkGroupSizes.
105   std::pair<unsigned, unsigned>
106   getWavesPerEU(const Function &F,
107                 std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
108 
109   /// Return the amount of LDS that can be used that will not restrict the
110   /// occupancy lower than WaveCount.
111   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
112                                            const Function &) const;
113 
114   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
115   /// the given LDS memory size is the only constraint.
116   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
117 
118   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
119 
120   bool isAmdHsaOS() const {
121     return TargetTriple.getOS() == Triple::AMDHSA;
122   }
123 
124   bool isAmdPalOS() const {
125     return TargetTriple.getOS() == Triple::AMDPAL;
126   }
127 
128   bool isMesa3DOS() const {
129     return TargetTriple.getOS() == Triple::Mesa3D;
130   }
131 
132   bool isMesaKernel(const Function &F) const;
133 
134   bool isAmdHsaOrMesa(const Function &F) const {
135     return isAmdHsaOS() || isMesaKernel(F);
136   }
137 
138   bool isGCN() const {
139     return TargetTriple.getArch() == Triple::amdgcn;
140   }
141 
142   bool isGCN3Encoding() const {
143     return GCN3Encoding;
144   }
145 
146   bool has16BitInsts() const {
147     return Has16BitInsts;
148   }
149 
150   bool hasTrue16BitInsts() const { return HasTrue16BitInsts; }
151 
152   bool hasMadMixInsts() const {
153     return HasMadMixInsts;
154   }
155 
156   bool hasMadMacF32Insts() const {
157     return HasMadMacF32Insts || !isGCN();
158   }
159 
160   bool hasDsSrc2Insts() const {
161     return HasDsSrc2Insts;
162   }
163 
164   bool hasSDWA() const {
165     return HasSDWA;
166   }
167 
168   bool hasVOP3PInsts() const {
169     return HasVOP3PInsts;
170   }
171 
172   bool hasMulI24() const {
173     return HasMulI24;
174   }
175 
176   bool hasMulU24() const {
177     return HasMulU24;
178   }
179 
180   bool hasSMulHi() const {
181     return HasSMulHi;
182   }
183 
184   bool hasInv2PiInlineImm() const {
185     return HasInv2PiInlineImm;
186   }
187 
188   bool hasFminFmaxLegacy() const {
189     return HasFminFmaxLegacy;
190   }
191 
192   bool hasTrigReducedRange() const {
193     return HasTrigReducedRange;
194   }
195 
196   bool isPromoteAllocaEnabled() const {
197     return EnablePromoteAlloca;
198   }
199 
200   unsigned getWavefrontSize() const {
201     return 1 << WavefrontSizeLog2;
202   }
203 
204   unsigned getWavefrontSizeLog2() const {
205     return WavefrontSizeLog2;
206   }
207 
208   unsigned getLocalMemorySize() const {
209     return LocalMemorySize;
210   }
211 
212   Align getAlignmentForImplicitArgPtr() const {
213     return isAmdHsaOS() ? Align(8) : Align(4);
214   }
215 
216   /// Returns the offset in bytes from the start of the input buffer
217   ///        of the first explicit kernel argument.
218   unsigned getExplicitKernelArgOffset(const Function &F) const {
219     switch (TargetTriple.getOS()) {
220     case Triple::AMDHSA:
221     case Triple::AMDPAL:
222     case Triple::Mesa3D:
223       return 0;
224     case Triple::UnknownOS:
225     default:
226       // For legacy reasons unknown/other is treated as a different version of
227       // mesa.
228       return 36;
229     }
230 
231     llvm_unreachable("invalid triple OS");
232   }
233 
234   /// \returns Maximum number of work groups per compute unit supported by the
235   /// subtarget and limited by given \p FlatWorkGroupSize.
236   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
237 
238   /// \returns Minimum flat work group size supported by the subtarget.
239   virtual unsigned getMinFlatWorkGroupSize() const = 0;
240 
241   /// \returns Maximum flat work group size supported by the subtarget.
242   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
243 
244   /// \returns Number of waves per execution unit required to support the given
245   /// \p FlatWorkGroupSize.
246   virtual unsigned
247   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
248 
249   /// \returns Minimum number of waves per execution unit supported by the
250   /// subtarget.
251   virtual unsigned getMinWavesPerEU() const = 0;
252 
253   /// \returns Maximum number of waves per execution unit supported by the
254   /// subtarget without any kind of limitation.
255   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
256 
257   /// Return the maximum workitem ID value in the function, for the given (0, 1,
258   /// 2) dimension.
259   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
260 
261   /// Creates value range metadata on an workitemid.* intrinsic call or load.
262   bool makeLIDRangeMetadata(Instruction *I) const;
263 
264   /// \returns Number of bytes of arguments that are passed to a shader or
265   /// kernel in addition to the explicit ones declared for the function.
266   unsigned getImplicitArgNumBytes(const Function &F) const;
267   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
268   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
269 
270   /// \returns Corresponding DWARF register number mapping flavour for the
271   /// \p WavefrontSize.
272   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
273 
274   virtual ~AMDGPUSubtarget() = default;
275 };
276 
277 } // end namespace llvm
278 
279 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
280