1 //===- AMDGPUTargetTransformInfo.h - AMDGPU specific TTI --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// AMDGPU target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
19 
20 #include "AMDGPU.h"
21 #include "AMDGPUSubtarget.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "llvm/CodeGen/BasicTTIImpl.h"
24 
25 namespace llvm {
26 
27 class AMDGPUTargetLowering;
28 class AMDGPUTargetMachine;
29 class GCNSubtarget;
30 class InstCombiner;
31 class Loop;
32 class R600Subtarget;
33 class ScalarEvolution;
34 class SITargetLowering;
35 class Type;
36 class Value;
37 
38 class AMDGPUTTIImpl final : public BasicTTIImplBase<AMDGPUTTIImpl> {
39   using BaseT = BasicTTIImplBase<AMDGPUTTIImpl>;
40   using TTI = TargetTransformInfo;
41 
42   friend BaseT;
43 
44   Triple TargetTriple;
45 
46   const TargetSubtargetInfo *ST;
47   const TargetLoweringBase *TLI;
48 
49   const TargetSubtargetInfo *getST() const { return ST; }
50   const TargetLoweringBase *getTLI() const { return TLI; }
51 
52 public:
53   explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
54 
55   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
56                                TTI::UnrollingPreferences &UP);
57 
58   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
59                              TTI::PeelingPreferences &PP);
60 };
61 
62 class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
63   using BaseT = BasicTTIImplBase<GCNTTIImpl>;
64   using TTI = TargetTransformInfo;
65 
66   friend BaseT;
67 
68   const GCNSubtarget *ST;
69   const SITargetLowering *TLI;
70   AMDGPUTTIImpl CommonTTI;
71   bool IsGraphics;
72   bool HasFP32Denormals;
73   bool HasFP64FP16Denormals;
74   unsigned MaxVGPRs;
75 
76   static const FeatureBitset InlineFeatureIgnoreList;
77 
78   const GCNSubtarget *getST() const { return ST; }
79   const SITargetLowering *getTLI() const { return TLI; }
80 
81   static inline int getFullRateInstrCost() {
82     return TargetTransformInfo::TCC_Basic;
83   }
84 
85   static inline int getHalfRateInstrCost(
86       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
87     return CostKind == TTI::TCK_CodeSize ? 2
88                                          : 2 * TargetTransformInfo::TCC_Basic;
89   }
90 
91   // TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
92   // should be 2 or 4.
93   static inline int getQuarterRateInstrCost(
94       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
95     return CostKind == TTI::TCK_CodeSize ? 2
96                                          : 4 * TargetTransformInfo::TCC_Basic;
97   }
98 
99   // On some parts, normal fp64 operations are half rate, and others
100   // quarter. This also applies to some integer operations.
101   int get64BitInstrCost(
102       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
103 
104 public:
105   explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
106 
107   bool hasBranchDivergence() { return true; }
108   bool useGPUDivergenceAnalysis() const;
109 
110   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
111                                TTI::UnrollingPreferences &UP);
112 
113   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
114                              TTI::PeelingPreferences &PP);
115 
116   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
117     assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
118     return TTI::PSK_FastHardware;
119   }
120 
121   unsigned getHardwareNumberOfRegisters(bool Vector) const;
122   unsigned getNumberOfRegisters(bool Vector) const;
123   unsigned getNumberOfRegisters(unsigned RCID) const;
124   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
125   unsigned getMinVectorRegisterBitWidth() const;
126   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
127   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
128                                unsigned ChainSizeInBytes,
129                                VectorType *VecTy) const;
130   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
131                                 unsigned ChainSizeInBytes,
132                                 VectorType *VecTy) const;
133   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
134 
135   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
136                                   unsigned AddrSpace) const;
137   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
138                                    unsigned AddrSpace) const;
139   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
140                                     unsigned AddrSpace) const;
141   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
142                                   unsigned SrcAddrSpace, unsigned DestAddrSpace,
143                                   unsigned SrcAlign, unsigned DestAlign) const;
144 
145   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
146                                          LLVMContext &Context,
147                                          unsigned RemainingBytes,
148                                          unsigned SrcAddrSpace,
149                                          unsigned DestAddrSpace,
150                                          unsigned SrcAlign,
151                                          unsigned DestAlign) const;
152   unsigned getMaxInterleaveFactor(unsigned VF);
153 
154   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
155 
156   InstructionCost getArithmeticInstrCost(
157       unsigned Opcode, Type *Ty,
158       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
159       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
160       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
161       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
162       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
163       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
164       const Instruction *CxtI = nullptr);
165 
166   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
167                                  const Instruction *I = nullptr);
168 
169   bool isInlineAsmSourceOfDivergence(const CallInst *CI,
170                                      ArrayRef<unsigned> Indices = {}) const;
171 
172   InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
173                                      unsigned Index);
174   bool isSourceOfDivergence(const Value *V) const;
175   bool isAlwaysUniform(const Value *V) const;
176 
177   unsigned getFlatAddressSpace() const {
178     // Don't bother running InferAddressSpaces pass on graphics shaders which
179     // don't use flat addressing.
180     if (IsGraphics)
181       return -1;
182     return AMDGPUAS::FLAT_ADDRESS;
183   }
184 
185   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
186                                   Intrinsic::ID IID) const;
187   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
188                                           Value *NewV) const;
189 
190   bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
191                                  InstCombiner &IC) const;
192   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
193                                                IntrinsicInst &II) const;
194   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
195       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
196       APInt &UndefElts2, APInt &UndefElts3,
197       std::function<void(Instruction *, unsigned, APInt, APInt &)>
198           SimplifyAndSetOp) const;
199 
200   InstructionCost getVectorSplitCost() { return 0; }
201 
202   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
203                                  ArrayRef<int> Mask, int Index,
204                                  VectorType *SubTp);
205 
206   bool areInlineCompatible(const Function *Caller,
207                            const Function *Callee) const;
208 
209   unsigned getInliningThresholdMultiplier() { return 11; }
210   unsigned adjustInliningThreshold(const CallBase *CB) const;
211 
212   int getInlinerVectorBonusPercent() { return 0; }
213 
214   InstructionCost getArithmeticReductionCost(
215       unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
216       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
217 
218   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
219                                         TTI::TargetCostKind CostKind);
220   InstructionCost getMinMaxReductionCost(
221       VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
222       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
223 };
224 
225 class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
226   using BaseT = BasicTTIImplBase<R600TTIImpl>;
227   using TTI = TargetTransformInfo;
228 
229   friend BaseT;
230 
231   const R600Subtarget *ST;
232   const AMDGPUTargetLowering *TLI;
233   AMDGPUTTIImpl CommonTTI;
234 
235 public:
236   explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
237 
238   const R600Subtarget *getST() const { return ST; }
239   const AMDGPUTargetLowering *getTLI() const { return TLI; }
240 
241   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
242                                TTI::UnrollingPreferences &UP);
243   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
244                              TTI::PeelingPreferences &PP);
245   unsigned getHardwareNumberOfRegisters(bool Vec) const;
246   unsigned getNumberOfRegisters(bool Vec) const;
247   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
248   unsigned getMinVectorRegisterBitWidth() const;
249   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
250   bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
251                                   unsigned AddrSpace) const;
252   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
253                                    unsigned AddrSpace) const;
254   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
255                                     unsigned AddrSpace) const;
256   unsigned getMaxInterleaveFactor(unsigned VF);
257   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
258                                  const Instruction *I = nullptr);
259   InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
260                                      unsigned Index);
261 };
262 
263 } // end namespace llvm
264 
265 #endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
266