1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
29 class AMDGPUSubtarget {
30 public:
31   enum Generation {
32     INVALID = 0,
33     R600 = 1,
34     R700 = 2,
35     EVERGREEN = 3,
36     NORTHERN_ISLANDS = 4,
37     SOUTHERN_ISLANDS = 5,
38     SEA_ISLANDS = 6,
39     VOLCANIC_ISLANDS = 7,
40     GFX9 = 8,
41     GFX10 = 9
42   };
43 
44 private:
45   Triple TargetTriple;
46 
47 protected:
48   bool GCN3Encoding;
49   bool Has16BitInsts;
50   bool HasMadMixInsts;
51   bool HasMadMacF32Insts;
52   bool HasDsSrc2Insts;
53   bool HasSDWA;
54   bool HasVOP3PInsts;
55   bool HasMulI24;
56   bool HasMulU24;
57   bool HasSMulHi;
58   bool HasInv2PiInlineImm;
59   bool HasFminFmaxLegacy;
60   bool EnablePromoteAlloca;
61   bool HasTrigReducedRange;
62   unsigned MaxWavesPerEU;
63   unsigned LocalMemorySize;
64   char WavefrontSizeLog2;
65 
66 public:
67   AMDGPUSubtarget(const Triple &TT);
68 
69   static const AMDGPUSubtarget &get(const MachineFunction &MF);
70   static const AMDGPUSubtarget &get(const TargetMachine &TM,
71                                     const Function &F);
72 
73   /// \returns Default range flat work group size for a calling convention.
74   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
75 
76   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
77   /// for function \p F, or minimum/maximum flat work group sizes explicitly
78   /// requested using "amdgpu-flat-work-group-size" attribute attached to
79   /// function \p F.
80   ///
81   /// \returns Subtarget's default values if explicitly requested values cannot
82   /// be converted to integer, or violate subtarget's specifications.
83   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
84 
85   /// \returns Subtarget's default pair of minimum/maximum number of waves per
86   /// execution unit for function \p F, or minimum/maximum number of waves per
87   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
88   /// attached to function \p F.
89   ///
90   /// \returns Subtarget's default values if explicitly requested values cannot
91   /// be converted to integer, violate subtarget's specifications, or are not
92   /// compatible with minimum/maximum number of waves limited by flat work group
93   /// size, register usage, and/or lds usage.
94   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
95 
96   /// Return the amount of LDS that can be used that will not restrict the
97   /// occupancy lower than WaveCount.
98   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
99                                            const Function &) const;
100 
101   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
102   /// the given LDS memory size is the only constraint.
103   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
104 
105   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
106 
107   bool isAmdHsaOS() const {
108     return TargetTriple.getOS() == Triple::AMDHSA;
109   }
110 
111   bool isAmdPalOS() const {
112     return TargetTriple.getOS() == Triple::AMDPAL;
113   }
114 
115   bool isMesa3DOS() const {
116     return TargetTriple.getOS() == Triple::Mesa3D;
117   }
118 
119   bool isMesaKernel(const Function &F) const;
120 
121   bool isAmdHsaOrMesa(const Function &F) const {
122     return isAmdHsaOS() || isMesaKernel(F);
123   }
124 
125   bool isGCN() const {
126     return TargetTriple.getArch() == Triple::amdgcn;
127   }
128 
129   bool isGCN3Encoding() const {
130     return GCN3Encoding;
131   }
132 
133   bool has16BitInsts() const {
134     return Has16BitInsts;
135   }
136 
137   bool hasMadMixInsts() const {
138     return HasMadMixInsts;
139   }
140 
141   bool hasMadMacF32Insts() const {
142     return HasMadMacF32Insts || !isGCN();
143   }
144 
145   bool hasDsSrc2Insts() const {
146     return HasDsSrc2Insts;
147   }
148 
149   bool hasSDWA() const {
150     return HasSDWA;
151   }
152 
153   bool hasVOP3PInsts() const {
154     return HasVOP3PInsts;
155   }
156 
157   bool hasMulI24() const {
158     return HasMulI24;
159   }
160 
161   bool hasMulU24() const {
162     return HasMulU24;
163   }
164 
165   bool hasSMulHi() const {
166     return HasSMulHi;
167   }
168 
169   bool hasInv2PiInlineImm() const {
170     return HasInv2PiInlineImm;
171   }
172 
173   bool hasFminFmaxLegacy() const {
174     return HasFminFmaxLegacy;
175   }
176 
177   bool hasTrigReducedRange() const {
178     return HasTrigReducedRange;
179   }
180 
181   bool isPromoteAllocaEnabled() const {
182     return EnablePromoteAlloca;
183   }
184 
185   unsigned getWavefrontSize() const {
186     return 1 << WavefrontSizeLog2;
187   }
188 
189   unsigned getWavefrontSizeLog2() const {
190     return WavefrontSizeLog2;
191   }
192 
193   unsigned getLocalMemorySize() const {
194     return LocalMemorySize;
195   }
196 
197   Align getAlignmentForImplicitArgPtr() const {
198     return isAmdHsaOS() ? Align(8) : Align(4);
199   }
200 
201   /// Returns the offset in bytes from the start of the input buffer
202   ///        of the first explicit kernel argument.
203   unsigned getExplicitKernelArgOffset(const Function &F) const {
204     return isAmdHsaOrMesa(F) ? 0 : 36;
205   }
206 
207   /// \returns Maximum number of work groups per compute unit supported by the
208   /// subtarget and limited by given \p FlatWorkGroupSize.
209   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
210 
211   /// \returns Minimum flat work group size supported by the subtarget.
212   virtual unsigned getMinFlatWorkGroupSize() const = 0;
213 
214   /// \returns Maximum flat work group size supported by the subtarget.
215   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
216 
217   /// \returns Number of waves per execution unit required to support the given
218   /// \p FlatWorkGroupSize.
219   virtual unsigned
220   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
221 
222   /// \returns Minimum number of waves per execution unit supported by the
223   /// subtarget.
224   virtual unsigned getMinWavesPerEU() const = 0;
225 
226   /// \returns Maximum number of waves per execution unit supported by the
227   /// subtarget without any kind of limitation.
228   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
229 
230   /// Return the maximum workitem ID value in the function, for the given (0, 1,
231   /// 2) dimension.
232   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
233 
234   /// Creates value range metadata on an workitemid.* intrinsic call or load.
235   bool makeLIDRangeMetadata(Instruction *I) const;
236 
237   /// \returns Number of bytes of arguments that are passed to a shader or
238   /// kernel in addition to the explicit ones declared for the function.
239   unsigned getImplicitArgNumBytes(const Function &F) const;
240   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
241   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
242 
243   /// \returns Corresponsing DWARF register number mapping flavour for the
244   /// \p WavefrontSize.
245   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
246 
247   virtual ~AMDGPUSubtarget() {}
248 };
249 
250 } // end namespace llvm
251 
252 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
253