1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file a TargetTransformInfo::Concept conforming object specific to the
10 /// AArch64 target machine. It uses the target's detailed information to
11 /// provide more precise answers to certain TTI queries, while letting the
12 /// target independent and default TTI implementations handle the rest.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
18 
19 #include "AArch64.h"
20 #include "AArch64Subtarget.h"
21 #include "AArch64TargetMachine.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/BasicTTIImpl.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/Intrinsics.h"
27 #include <cstdint>
28 
29 namespace llvm {
30 
31 class APInt;
32 class Instruction;
33 class IntrinsicInst;
34 class Loop;
35 class SCEV;
36 class ScalarEvolution;
37 class Type;
38 class Value;
39 class VectorType;
40 
41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
42   using BaseT = BasicTTIImplBase<AArch64TTIImpl>;
43   using TTI = TargetTransformInfo;
44 
45   friend BaseT;
46 
47   const AArch64Subtarget *ST;
48   const AArch64TargetLowering *TLI;
49 
50   const AArch64Subtarget *getST() const { return ST; }
51   const AArch64TargetLowering *getTLI() const { return TLI; }
52 
53   enum MemIntrinsicType {
54     VECTOR_LDST_TWO_ELEMENTS,
55     VECTOR_LDST_THREE_ELEMENTS,
56     VECTOR_LDST_FOUR_ELEMENTS
57   };
58 
59   bool isWideningInstruction(Type *Ty, unsigned Opcode,
60                              ArrayRef<const Value *> Args);
61 
62 public:
63   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
64       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
65         TLI(ST->getTargetLowering()) {}
66 
67   bool areInlineCompatible(const Function *Caller,
68                            const Function *Callee) const;
69 
70   /// \name Scalar TTI Implementations
71   /// @{
72 
73   using BaseT::getIntImmCost;
74   int getIntImmCost(int64_t Val);
75   int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
76   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
77                         Type *Ty, TTI::TargetCostKind CostKind,
78                         Instruction *Inst = nullptr);
79   int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
80                           Type *Ty, TTI::TargetCostKind CostKind);
81   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
82 
83   /// @}
84 
85   /// \name Vector TTI Implementations
86   /// @{
87 
88   bool enableInterleavedAccessVectorization() { return true; }
89 
90   unsigned getNumberOfRegisters(unsigned ClassID) const {
91     bool Vector = (ClassID == 1);
92     if (Vector) {
93       if (ST->hasNEON())
94         return 32;
95       return 0;
96     }
97     return 31;
98   }
99 
100   unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
101                                  TTI::TargetCostKind CostKind);
102 
103   unsigned getRegisterBitWidth(bool Vector) const {
104     if (Vector) {
105       if (ST->hasSVE())
106         return std::max(ST->getMinSVEVectorSizeInBits(), 128u);
107       if (ST->hasNEON())
108         return 128;
109       return 0;
110     }
111     return 64;
112   }
113 
114   unsigned getMinVectorRegisterBitWidth() {
115     return ST->getMinVectorRegisterBitWidth();
116   }
117 
118   Optional<unsigned> getMaxVScale() const {
119     if (ST->hasSVE())
120       return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
121     return BaseT::getMaxVScale();
122   }
123 
124   unsigned getMaxInterleaveFactor(unsigned VF);
125 
126   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
127                                   const Value *Ptr, bool VariableMask,
128                                   Align Alignment, TTI::TargetCostKind CostKind,
129                                   const Instruction *I = nullptr);
130 
131   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
132                        TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
133                        const Instruction *I = nullptr);
134 
135   int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
136                                unsigned Index);
137 
138   unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind);
139 
140   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
141 
142   int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
143                              bool IsPairwise, bool IsUnsigned,
144                              TTI::TargetCostKind CostKind);
145 
146   int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy,
147                                     bool IsPairwiseForm,
148                                     TTI::TargetCostKind CostKind);
149 
150   int getArithmeticInstrCost(
151       unsigned Opcode, Type *Ty,
152       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
153       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
154       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
155       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
156       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
157       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
158       const Instruction *CxtI = nullptr);
159 
160   int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr);
161 
162   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
163                          CmpInst::Predicate VecPred,
164                          TTI::TargetCostKind CostKind,
165                          const Instruction *I = nullptr);
166 
167   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
168                                                     bool IsZeroCmp) const;
169   bool useNeonVector(const Type *Ty) const;
170 
171   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
172                       unsigned AddressSpace,
173                       TTI::TargetCostKind CostKind,
174                       const Instruction *I = nullptr);
175 
176   int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
177 
178   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
179                                TTI::UnrollingPreferences &UP);
180 
181   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
182                              TTI::PeelingPreferences &PP);
183 
184   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
185                                            Type *ExpectedType);
186 
187   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
188 
189   bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
190     if (!isa<ScalableVectorType>(DataType) || !ST->hasSVE())
191       return false;
192 
193     Type *Ty = cast<ScalableVectorType>(DataType)->getElementType();
194     if (Ty->isPointerTy())
195       return true;
196 
197     if (Ty->isBFloatTy() || Ty->isHalfTy() ||
198         Ty->isFloatTy() || Ty->isDoubleTy())
199       return true;
200 
201     if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
202         Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
203       return true;
204 
205     return false;
206   }
207 
208   bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
209     return isLegalMaskedLoadStore(DataType, Alignment);
210   }
211 
212   bool isLegalMaskedStore(Type *DataType, Align Alignment) {
213     return isLegalMaskedLoadStore(DataType, Alignment);
214   }
215 
216   bool isLegalNTStore(Type *DataType, Align Alignment) {
217     // NOTE: The logic below is mostly geared towards LV, which calls it with
218     //       vectors with 2 elements. We might want to improve that, if other
219     //       users show up.
220     // Nontemporal vector stores can be directly lowered to STNP, if the vector
221     // can be halved so that each half fits into a register. That's the case if
222     // the element type fits into a register and the number of elements is a
223     // power of 2 > 1.
224     if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
225       unsigned NumElements =
226           cast<FixedVectorType>(DataTypeVTy)->getNumElements();
227       unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
228       return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
229              EltSize <= 128 && isPowerOf2_64(EltSize);
230     }
231     return BaseT::isLegalNTStore(DataType, Alignment);
232   }
233 
234   int getInterleavedMemoryOpCost(
235       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
236       Align Alignment, unsigned AddressSpace,
237       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
238       bool UseMaskForCond = false, bool UseMaskForGaps = false);
239 
240   bool
241   shouldConsiderAddressTypePromotion(const Instruction &I,
242                                      bool &AllowPromotionWithoutCommonHeader);
243 
244   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
245 
246   unsigned getGISelRematGlobalCost() const {
247     return 2;
248   }
249 
250   bool supportsScalableVectors() const { return ST->hasSVE(); }
251 
252   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
253                              TTI::ReductionFlags Flags) const;
254 
255   int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
256                                  bool IsPairwiseForm,
257                                  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
258 
259   int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
260                      VectorType *SubTp);
261   /// @}
262 };
263 
264 } // end namespace llvm
265 
266 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H
267