1 //===- AArch64TargetTransformInfo.h - AArch64 specific TTI ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file a TargetTransformInfo::Concept conforming object specific to the 10 /// AArch64 target machine. It uses the target's detailed information to 11 /// provide more precise answers to certain TTI queries, while letting the 12 /// target independent and default TTI implementations handle the rest. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 17 #define LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 18 19 #include "AArch64.h" 20 #include "AArch64Subtarget.h" 21 #include "AArch64TargetMachine.h" 22 #include "llvm/ADT/ArrayRef.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/BasicTTIImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Intrinsics.h" 27 #include <cstdint> 28 29 namespace llvm { 30 31 class APInt; 32 class Instruction; 33 class IntrinsicInst; 34 class Loop; 35 class SCEV; 36 class ScalarEvolution; 37 class Type; 38 class Value; 39 class VectorType; 40 41 class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> { 42 using BaseT = BasicTTIImplBase<AArch64TTIImpl>; 43 using TTI = TargetTransformInfo; 44 45 friend BaseT; 46 47 const AArch64Subtarget *ST; 48 const AArch64TargetLowering *TLI; 49 getST()50 const AArch64Subtarget *getST() const { return ST; } getTLI()51 const AArch64TargetLowering *getTLI() const { return TLI; } 52 53 enum MemIntrinsicType { 54 VECTOR_LDST_TWO_ELEMENTS, 55 VECTOR_LDST_THREE_ELEMENTS, 56 VECTOR_LDST_FOUR_ELEMENTS 57 }; 58 59 bool isWideningInstruction(Type *Ty, unsigned Opcode, 60 ArrayRef<const Value *> Args); 61 62 public: AArch64TTIImpl(const AArch64TargetMachine * TM,const Function & F)63 explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F) 64 : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), 65 TLI(ST->getTargetLowering()) {} 66 67 bool areInlineCompatible(const Function *Caller, 68 const Function *Callee) const; 69 70 /// \name Scalar TTI Implementations 71 /// @{ 72 73 using BaseT::getIntImmCost; 74 InstructionCost getIntImmCost(int64_t Val); 75 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 76 TTI::TargetCostKind CostKind); 77 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 78 const APInt &Imm, Type *Ty, 79 TTI::TargetCostKind CostKind, 80 Instruction *Inst = nullptr); 81 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 82 const APInt &Imm, Type *Ty, 83 TTI::TargetCostKind CostKind); 84 TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); 85 86 /// @} 87 88 /// \name Vector TTI Implementations 89 /// @{ 90 enableInterleavedAccessVectorization()91 bool enableInterleavedAccessVectorization() { return true; } 92 getNumberOfRegisters(unsigned ClassID)93 unsigned getNumberOfRegisters(unsigned ClassID) const { 94 bool Vector = (ClassID == 1); 95 if (Vector) { 96 if (ST->hasNEON()) 97 return 32; 98 return 0; 99 } 100 return 31; 101 } 102 103 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 104 TTI::TargetCostKind CostKind); 105 106 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 107 IntrinsicInst &II) const; 108 getRegisterBitWidth(TargetTransformInfo::RegisterKind K)109 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 110 switch (K) { 111 case TargetTransformInfo::RGK_Scalar: 112 return TypeSize::getFixed(64); 113 case TargetTransformInfo::RGK_FixedWidthVector: 114 if (ST->hasSVE()) 115 return TypeSize::getFixed( 116 std::max(ST->getMinSVEVectorSizeInBits(), 128u)); 117 return TypeSize::getFixed(ST->hasNEON() ? 128 : 0); 118 case TargetTransformInfo::RGK_ScalableVector: 119 return TypeSize::getScalable(ST->hasSVE() ? 128 : 0); 120 } 121 llvm_unreachable("Unsupported register kind"); 122 } 123 getMinVectorRegisterBitWidth()124 unsigned getMinVectorRegisterBitWidth() { 125 return ST->getMinVectorRegisterBitWidth(); 126 } 127 getMaxVScale()128 Optional<unsigned> getMaxVScale() const { 129 if (ST->hasSVE()) 130 return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock; 131 return BaseT::getMaxVScale(); 132 } 133 134 unsigned getMaxInterleaveFactor(unsigned VF); 135 136 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 137 Align Alignment, unsigned AddressSpace, 138 TTI::TargetCostKind CostKind); 139 140 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 141 const Value *Ptr, bool VariableMask, 142 Align Alignment, 143 TTI::TargetCostKind CostKind, 144 const Instruction *I = nullptr); 145 146 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 147 TTI::CastContextHint CCH, 148 TTI::TargetCostKind CostKind, 149 const Instruction *I = nullptr); 150 151 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 152 VectorType *VecTy, unsigned Index); 153 154 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 155 const Instruction *I = nullptr); 156 157 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 158 unsigned Index); 159 160 InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 161 bool IsPairwise, bool IsUnsigned, 162 TTI::TargetCostKind CostKind); 163 164 InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, 165 VectorType *ValTy, 166 bool IsPairwiseForm, 167 TTI::TargetCostKind CostKind); 168 169 InstructionCost getArithmeticInstrCost( 170 unsigned Opcode, Type *Ty, 171 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 172 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, 173 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, 174 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, 175 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, 176 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 177 const Instruction *CxtI = nullptr); 178 179 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 180 const SCEV *Ptr); 181 182 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 183 CmpInst::Predicate VecPred, 184 TTI::TargetCostKind CostKind, 185 const Instruction *I = nullptr); 186 187 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 188 bool IsZeroCmp) const; 189 bool useNeonVector(const Type *Ty) const; 190 191 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, 192 MaybeAlign Alignment, unsigned AddressSpace, 193 TTI::TargetCostKind CostKind, 194 const Instruction *I = nullptr); 195 196 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys); 197 198 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 199 TTI::UnrollingPreferences &UP); 200 201 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 202 TTI::PeelingPreferences &PP); 203 204 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 205 Type *ExpectedType); 206 207 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info); 208 isLegalElementTypeForSVE(Type * Ty)209 bool isLegalElementTypeForSVE(Type *Ty) const { 210 if (Ty->isPointerTy()) 211 return true; 212 213 if (Ty->isBFloatTy() && ST->hasBF16()) 214 return true; 215 216 if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) 217 return true; 218 219 if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) || 220 Ty->isIntegerTy(32) || Ty->isIntegerTy(64)) 221 return true; 222 223 return false; 224 } 225 isLegalMaskedLoadStore(Type * DataType,Align Alignment)226 bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) { 227 if (!ST->hasSVE()) 228 return false; 229 230 // For fixed vectors, avoid scalarization if using SVE for them. 231 if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors()) 232 return false; // Fall back to scalarization of masked operations. 233 234 return isLegalElementTypeForSVE(DataType->getScalarType()); 235 } 236 isLegalMaskedLoad(Type * DataType,Align Alignment)237 bool isLegalMaskedLoad(Type *DataType, Align Alignment) { 238 return isLegalMaskedLoadStore(DataType, Alignment); 239 } 240 isLegalMaskedStore(Type * DataType,Align Alignment)241 bool isLegalMaskedStore(Type *DataType, Align Alignment) { 242 return isLegalMaskedLoadStore(DataType, Alignment); 243 } 244 isLegalMaskedGatherScatter(Type * DataType)245 bool isLegalMaskedGatherScatter(Type *DataType) const { 246 if (isa<FixedVectorType>(DataType) || !ST->hasSVE()) 247 return false; 248 249 return isLegalElementTypeForSVE(DataType->getScalarType()); 250 } 251 isLegalMaskedGather(Type * DataType,Align Alignment)252 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 253 return isLegalMaskedGatherScatter(DataType); 254 } isLegalMaskedScatter(Type * DataType,Align Alignment)255 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 256 return isLegalMaskedGatherScatter(DataType); 257 } 258 isLegalNTStore(Type * DataType,Align Alignment)259 bool isLegalNTStore(Type *DataType, Align Alignment) { 260 // NOTE: The logic below is mostly geared towards LV, which calls it with 261 // vectors with 2 elements. We might want to improve that, if other 262 // users show up. 263 // Nontemporal vector stores can be directly lowered to STNP, if the vector 264 // can be halved so that each half fits into a register. That's the case if 265 // the element type fits into a register and the number of elements is a 266 // power of 2 > 1. 267 if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) { 268 unsigned NumElements = 269 cast<FixedVectorType>(DataTypeVTy)->getNumElements(); 270 unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits(); 271 return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 && 272 EltSize <= 128 && isPowerOf2_64(EltSize); 273 } 274 return BaseT::isLegalNTStore(DataType, Alignment); 275 } 276 277 InstructionCost getInterleavedMemoryOpCost( 278 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 279 Align Alignment, unsigned AddressSpace, 280 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, 281 bool UseMaskForCond = false, bool UseMaskForGaps = false); 282 283 bool 284 shouldConsiderAddressTypePromotion(const Instruction &I, 285 bool &AllowPromotionWithoutCommonHeader); 286 shouldExpandReduction(const IntrinsicInst * II)287 bool shouldExpandReduction(const IntrinsicInst *II) const { return false; } 288 getGISelRematGlobalCost()289 unsigned getGISelRematGlobalCost() const { 290 return 2; 291 } 292 supportsScalableVectors()293 bool supportsScalableVectors() const { return ST->hasSVE(); } 294 295 bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, 296 ElementCount VF) const; 297 298 InstructionCost getArithmeticReductionCost( 299 unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, 300 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); 301 302 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, 303 ArrayRef<int> Mask, int Index, 304 VectorType *SubTp); 305 /// @} 306 }; 307 308 } // end namespace llvm 309 310 #endif // LLVM_LIB_TARGET_AARCH64_AARCH64TARGETTRANSFORMINFO_H 311