1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18 
19 #include "AArch64.h"
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include <cstdint>
28 
29 namespace llvm {
30 
31 class APInt;
32 class Instruction;
33 class IntrinsicInst;
34 class Loop;
35 class SCEV;
36 class ScalarEvolution;
37 class Type;
38 class Value;
39 class VectorType;
40 
41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
42   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
43   using TTI = TargetTransformInfo;
44 
45   friend BaseT;
46 
47   const AArch64Subtarget *ST;
48   const AArch64TargetLowering *TLI;
49 
getST()50   const AArch64Subtarget *getST() const { return ST; }
getTLI()51   const AArch64TargetLowering *getTLI() const { return TLI; }
52 
53   enum MemIntrinsicType {
54     VECTOR_LDST_TWO_ELEMENTS,
55     VECTOR_LDST_THREE_ELEMENTS,
56     VECTOR_LDST_FOUR_ELEMENTS
57   };
58 
59   bool isWideningInstruction(Type *Ty, unsigned Opcode,
60                              ArrayRef<const Value *> Args);
61 
62 public:
AArch64TTIImpl(const AArch64TargetMachine * TM,const Function & F)63   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
64       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
65         TLI(ST->getTargetLowering()) {}
66 
67   bool areInlineCompatible(const Function *Caller,
68                            const Function *Callee) const;
69 
70   /// \name Scalar TTI Implementations
71   /// @{
72 
73   using BaseT::getIntImmCost;
74   InstructionCost getIntImmCost(int64_t Val);
75   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
76                                 TTI::TargetCostKind CostKind);
77   InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
78                                     const APInt &Imm, Type *Ty,
79                                     TTI::TargetCostKind CostKind,
80                                     Instruction *Inst = nullptr);
81   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
82                                       const APInt &Imm, Type *Ty,
83                                       TTI::TargetCostKind CostKind);
84   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
85 
86   /// @}
87 
88   /// \name Vector TTI Implementations
89   /// @{
90 
enableInterleavedAccessVectorization()91   bool enableInterleavedAccessVectorization() { return true; }
92 
getNumberOfRegisters(unsigned ClassID)93   unsigned getNumberOfRegisters(unsigned ClassID) const {
94     bool Vector = (ClassID == 1);
95     if (Vector) {
96       if (ST->hasNEON())
97         return 32;
98       return 0;
99     }
100     return 31;
101   }
102 
103   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
104                                         TTI::TargetCostKind CostKind);
105 
106   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
107                                                IntrinsicInst &II) const;
108 
getRegisterBitWidth(TargetTransformInfo::RegisterKind K)109   TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
110     switch (K) {
111     case TargetTransformInfo::RGK_Scalar:
112       return TypeSize::getFixed(64);
113     case TargetTransformInfo::RGK_FixedWidthVector:
114       if (ST->hasSVE())
115         return TypeSize::getFixed(
116             std::max(ST->getMinSVEVectorSizeInBits(), 128u));
117       return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
118     case TargetTransformInfo::RGK_ScalableVector:
119       return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
120     }
121     llvm_unreachable("Unsupported register kind");
122   }
123 
getMinVectorRegisterBitWidth()124   unsigned getMinVectorRegisterBitWidth() {
125     return ST->getMinVectorRegisterBitWidth();
126   }
127 
getMaxVScale()128   Optional<unsigned> getMaxVScale() const {
129     if (ST->hasSVE())
130       return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
131     return BaseT::getMaxVScale();
132   }
133 
134   unsigned getMaxInterleaveFactor(unsigned VF);
135 
136   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
137                                         Align Alignment, unsigned AddressSpace,
138                                         TTI::TargetCostKind CostKind);
139 
140   InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
141                                          const Value *Ptr, bool VariableMask,
142                                          Align Alignment,
143                                          TTI::TargetCostKind CostKind,
144                                          const Instruction *I = nullptr);
145 
146   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
147                                    TTI::CastContextHint CCH,
148                                    TTI::TargetCostKind CostKind,
149                                    const Instruction *I = nullptr);
150 
151   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
152                                            VectorType *VecTy, unsigned Index);
153 
154   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
155                                  const Instruction *I = nullptr);
156 
157   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
158                                      unsigned Index);
159 
160   InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
161                                          bool IsPairwise, bool IsUnsigned,
162                                          TTI::TargetCostKind CostKind);
163 
164   InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
165                                                 VectorType *ValTy,
166                                                 bool IsPairwiseForm,
167                                                 TTI::TargetCostKind CostKind);
168 
169   InstructionCost getArithmeticInstrCost(
170       unsigned Opcode, Type *Ty,
171       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
172       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
173       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
174       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
175       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
176       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
177       const Instruction *CxtI = nullptr);
178 
179   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
180                                             const SCEV *Ptr);
181 
182   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
183                                      CmpInst::Predicate VecPred,
184                                      TTI::TargetCostKind CostKind,
185                                      const Instruction *I = nullptr);
186 
187   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
188                                                     bool IsZeroCmp) const;
189   bool useNeonVector(const Type *Ty) const;
190 
191   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
192                                   MaybeAlign Alignment, unsigned AddressSpace,
193                                   TTI::TargetCostKind CostKind,
194                                   const Instruction *I = nullptr);
195 
196   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
197 
198   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
199                                TTI::UnrollingPreferences &UP);
200 
201   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
202                              TTI::PeelingPreferences &PP);
203 
204   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
205                                            Type *ExpectedType);
206 
207   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
208 
isLegalElementTypeForSVE(Type * Ty)209   bool isLegalElementTypeForSVE(Type *Ty) const {
210     if (Ty->isPointerTy())
211       return true;
212 
213     if (Ty->isBFloatTy() && ST->hasBF16())
214       return true;
215 
216     if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
217       return true;
218 
219     if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
220         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
221       return true;
222 
223     return false;
224   }
225 
isLegalMaskedLoadStore(Type * DataType,Align Alignment)226   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
227     if (!ST->hasSVE())
228       return false;
229 
230     // For fixed vectors, avoid scalarization if using SVE for them.
231     if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
232       return false; // Fall back to scalarization of masked operations.
233 
234     return isLegalElementTypeForSVE(DataType->getScalarType());
235   }
236 
isLegalMaskedLoad(Type * DataType,Align Alignment)237   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
238     return isLegalMaskedLoadStore(DataType, Alignment);
239   }
240 
isLegalMaskedStore(Type * DataType,Align Alignment)241   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
242     return isLegalMaskedLoadStore(DataType, Alignment);
243   }
244 
isLegalMaskedGatherScatter(Type * DataType)245   bool isLegalMaskedGatherScatter(Type *DataType) const {
246     if (isa<FixedVectorType>(DataType) || !ST->hasSVE())
247       return false;
248 
249     return isLegalElementTypeForSVE(DataType->getScalarType());
250   }
251 
isLegalMaskedGather(Type * DataType,Align Alignment)252   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
253     return isLegalMaskedGatherScatter(DataType);
254   }
isLegalMaskedScatter(Type * DataType,Align Alignment)255   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
256     return isLegalMaskedGatherScatter(DataType);
257   }
258 
isLegalNTStore(Type * DataType,Align Alignment)259   bool isLegalNTStore(Type *DataType, Align Alignment) {
260     // NOTE: The logic below is mostly geared towards LV, which calls it with
261     //       vectors with 2 elements. We might want to improve that, if other
262     //       users show up.
263     // Nontemporal vector stores can be directly lowered to STNP, if the vector
264     // can be halved so that each half fits into a register. That's the case if
265     // the element type fits into a register and the number of elements is a
266     // power of 2 > 1.
267     if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
268       unsigned NumElements =
269           cast<FixedVectorType>(DataTypeVTy)->getNumElements();
270       unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
271       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
272              EltSize <= 128 && isPowerOf2_64(EltSize);
273     }
274     return BaseT::isLegalNTStore(DataType, Alignment);
275   }
276 
277   InstructionCost getInterleavedMemoryOpCost(
278       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
279       Align Alignment, unsigned AddressSpace,
280       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
281       bool UseMaskForCond = false, bool UseMaskForGaps = false);
282 
283   bool
284   shouldConsiderAddressTypePromotion(const Instruction &I,
285                                      bool &AllowPromotionWithoutCommonHeader);
286 
shouldExpandReduction(const IntrinsicInst * II)287   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
288 
getGISelRematGlobalCost()289   unsigned getGISelRematGlobalCost() const {
290     return 2;
291   }
292 
supportsScalableVectors()293   bool supportsScalableVectors() const { return ST->hasSVE(); }
294 
295   bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc,
296                                    ElementCount VF) const;
297 
298   InstructionCost getArithmeticReductionCost(
299       unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
300       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
301 
302   InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
303                                  ArrayRef<int> Mask, int Index,
304                                  VectorType *SubTp);
305   /// @}
306 };
307 
308 } // end namespace llvm
309 
310 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
311