1 //=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Base class for AMDGPU specific classes of TargetSubtarget.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
15 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
16 
17 #include "llvm/ADT/Triple.h"
18 #include "llvm/IR/CallingConv.h"
19 #include "llvm/Support/Alignment.h"
20 
21 namespace llvm {
22 
23 enum AMDGPUDwarfFlavour : unsigned;
24 class Function;
25 class Instruction;
26 class MachineFunction;
27 class TargetMachine;
28 
29 class AMDGPUSubtarget {
30 public:
31   enum Generation {
32     INVALID = 0,
33     R600 = 1,
34     R700 = 2,
35     EVERGREEN = 3,
36     NORTHERN_ISLANDS = 4,
37     SOUTHERN_ISLANDS = 5,
38     SEA_ISLANDS = 6,
39     VOLCANIC_ISLANDS = 7,
40     GFX9 = 8,
41     GFX10 = 9
42   };
43 
44 private:
45   Triple TargetTriple;
46 
47 protected:
48   bool Has16BitInsts;
49   bool HasMadMixInsts;
50   bool HasMadMacF32Insts;
51   bool HasDsSrc2Insts;
52   bool HasSDWA;
53   bool HasVOP3PInsts;
54   bool HasMulI24;
55   bool HasMulU24;
56   bool HasInv2PiInlineImm;
57   bool HasFminFmaxLegacy;
58   bool EnablePromoteAlloca;
59   bool HasTrigReducedRange;
60   unsigned MaxWavesPerEU;
61   unsigned LocalMemorySize;
62   char WavefrontSizeLog2;
63 
64 public:
65   AMDGPUSubtarget(const Triple &TT);
66 
67   static const AMDGPUSubtarget &get(const MachineFunction &MF);
68   static const AMDGPUSubtarget &get(const TargetMachine &TM,
69                                     const Function &F);
70 
71   /// \returns Default range flat work group size for a calling convention.
72   std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
73 
74   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
75   /// for function \p F, or minimum/maximum flat work group sizes explicitly
76   /// requested using "amdgpu-flat-work-group-size" attribute attached to
77   /// function \p F.
78   ///
79   /// \returns Subtarget's default values if explicitly requested values cannot
80   /// be converted to integer, or violate subtarget's specifications.
81   std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
82 
83   /// \returns Subtarget's default pair of minimum/maximum number of waves per
84   /// execution unit for function \p F, or minimum/maximum number of waves per
85   /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
86   /// attached to function \p F.
87   ///
88   /// \returns Subtarget's default values if explicitly requested values cannot
89   /// be converted to integer, violate subtarget's specifications, or are not
90   /// compatible with minimum/maximum number of waves limited by flat work group
91   /// size, register usage, and/or lds usage.
92   std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
93 
94   /// Return the amount of LDS that can be used that will not restrict the
95   /// occupancy lower than WaveCount.
96   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
97                                            const Function &) const;
98 
99   /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
100   /// the given LDS memory size is the only constraint.
101   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
102 
103   unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
104 
105   bool isAmdHsaOS() const {
106     return TargetTriple.getOS() == Triple::AMDHSA;
107   }
108 
109   bool isAmdPalOS() const {
110     return TargetTriple.getOS() == Triple::AMDPAL;
111   }
112 
113   bool isMesa3DOS() const {
114     return TargetTriple.getOS() == Triple::Mesa3D;
115   }
116 
117   bool isMesaKernel(const Function &F) const;
118 
119   bool isAmdHsaOrMesa(const Function &F) const {
120     return isAmdHsaOS() || isMesaKernel(F);
121   }
122 
123   bool isGCN() const {
124     return TargetTriple.getArch() == Triple::amdgcn;
125   }
126 
127   bool has16BitInsts() const {
128     return Has16BitInsts;
129   }
130 
131   bool hasMadMixInsts() const {
132     return HasMadMixInsts;
133   }
134 
135   bool hasMadMacF32Insts() const {
136     return HasMadMacF32Insts || !isGCN();
137   }
138 
139   bool hasDsSrc2Insts() const {
140     return HasDsSrc2Insts;
141   }
142 
143   bool hasSDWA() const {
144     return HasSDWA;
145   }
146 
147   bool hasVOP3PInsts() const {
148     return HasVOP3PInsts;
149   }
150 
151   bool hasMulI24() const {
152     return HasMulI24;
153   }
154 
155   bool hasMulU24() const {
156     return HasMulU24;
157   }
158 
159   bool hasInv2PiInlineImm() const {
160     return HasInv2PiInlineImm;
161   }
162 
163   bool hasFminFmaxLegacy() const {
164     return HasFminFmaxLegacy;
165   }
166 
167   bool hasTrigReducedRange() const {
168     return HasTrigReducedRange;
169   }
170 
171   bool isPromoteAllocaEnabled() const {
172     return EnablePromoteAlloca;
173   }
174 
175   unsigned getWavefrontSize() const {
176     return 1 << WavefrontSizeLog2;
177   }
178 
179   unsigned getWavefrontSizeLog2() const {
180     return WavefrontSizeLog2;
181   }
182 
183   unsigned getLocalMemorySize() const {
184     return LocalMemorySize;
185   }
186 
187   Align getAlignmentForImplicitArgPtr() const {
188     return isAmdHsaOS() ? Align(8) : Align(4);
189   }
190 
191   /// Returns the offset in bytes from the start of the input buffer
192   ///        of the first explicit kernel argument.
193   unsigned getExplicitKernelArgOffset(const Function &F) const {
194     return isAmdHsaOrMesa(F) ? 0 : 36;
195   }
196 
197   /// \returns Maximum number of work groups per compute unit supported by the
198   /// subtarget and limited by given \p FlatWorkGroupSize.
199   virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
200 
201   /// \returns Minimum flat work group size supported by the subtarget.
202   virtual unsigned getMinFlatWorkGroupSize() const = 0;
203 
204   /// \returns Maximum flat work group size supported by the subtarget.
205   virtual unsigned getMaxFlatWorkGroupSize() const = 0;
206 
207   /// \returns Number of waves per execution unit required to support the given
208   /// \p FlatWorkGroupSize.
209   virtual unsigned
210   getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const = 0;
211 
212   /// \returns Minimum number of waves per execution unit supported by the
213   /// subtarget.
214   virtual unsigned getMinWavesPerEU() const = 0;
215 
216   /// \returns Maximum number of waves per execution unit supported by the
217   /// subtarget without any kind of limitation.
218   unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
219 
220   /// Return the maximum workitem ID value in the function, for the given (0, 1,
221   /// 2) dimension.
222   unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
223 
224   /// Creates value range metadata on an workitemid.* intrinsic call or load.
225   bool makeLIDRangeMetadata(Instruction *I) const;
226 
227   /// \returns Number of bytes of arguments that are passed to a shader or
228   /// kernel in addition to the explicit ones declared for the function.
229   unsigned getImplicitArgNumBytes(const Function &F) const;
230   uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
231   unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
232 
233   /// \returns Corresponsing DWARF register number mapping flavour for the
234   /// \p WavefrontSize.
235   AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
236 
237   virtual ~AMDGPUSubtarget() {}
238 };
239 
240 } // end namespace llvm
241 
242 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
243