1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/GetElementPtrTypeIterator.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/Operator.h" 25 #include "llvm/IR/Type.h" 26 27 namespace llvm { 28 29 /// Base class for use as a mix-in that aids implementing 30 /// a TargetTransformInfo-compatible class. 31 class TargetTransformInfoImplBase { 32 protected: 33 typedef TargetTransformInfo TTI; 34 35 const DataLayout &DL; 36 37 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 38 39 public: 40 // Provide value semantics. MSVC requires that we spell all of these out. 41 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) 42 : DL(Arg.DL) {} 43 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 44 45 const DataLayout &getDataLayout() const { return DL; } 46 47 int getGEPCost(Type *PointeeType, const Value *Ptr, 48 ArrayRef<const Value *> Operands, 49 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { 50 // In the basic model, we just assume that all-constant GEPs will be folded 51 // into their uses via addressing modes. 52 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 53 if (!isa<Constant>(Operands[Idx])) 54 return TTI::TCC_Basic; 55 56 return TTI::TCC_Free; 57 } 58 59 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 60 unsigned &JTSize, 61 ProfileSummaryInfo *PSI, 62 BlockFrequencyInfo *BFI) const { 63 (void)PSI; 64 (void)BFI; 65 JTSize = 0; 66 return SI.getNumCases(); 67 } 68 69 unsigned getInliningThresholdMultiplier() const { return 1; } 70 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } 71 72 int getInlinerVectorBonusPercent() const { return 150; } 73 74 unsigned getMemcpyCost(const Instruction *I) const { 75 return TTI::TCC_Expensive; 76 } 77 78 bool hasBranchDivergence() const { return false; } 79 80 bool useGPUDivergenceAnalysis() const { return false; } 81 82 bool isSourceOfDivergence(const Value *V) const { return false; } 83 84 bool isAlwaysUniform(const Value *V) const { return false; } 85 86 unsigned getFlatAddressSpace() const { return -1; } 87 88 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 89 Intrinsic::ID IID) const { 90 return false; 91 } 92 93 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 94 95 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 96 97 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 98 Value *NewV) const { 99 return nullptr; 100 } 101 102 bool isLoweredToCall(const Function *F) const { 103 assert(F && "A concrete function must be provided to this routine."); 104 105 // FIXME: These should almost certainly not be handled here, and instead 106 // handled with the help of TLI or the target itself. This was largely 107 // ported from existing analysis heuristics here so that such refactorings 108 // can take place in the future. 109 110 if (F->isIntrinsic()) 111 return false; 112 113 if (F->hasLocalLinkage() || !F->hasName()) 114 return true; 115 116 StringRef Name = F->getName(); 117 118 // These will all likely lower to a single selection DAG node. 119 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 120 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 121 Name == "fmin" || Name == "fminf" || Name == "fminl" || 122 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 123 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 124 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 125 return false; 126 127 // These are all likely to be optimized into something smaller. 128 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 129 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 130 Name == "floorf" || Name == "ceil" || Name == "round" || 131 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 132 Name == "llabs") 133 return false; 134 135 return true; 136 } 137 138 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 139 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 140 HardwareLoopInfo &HWLoopInfo) const { 141 return false; 142 } 143 144 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 145 AssumptionCache &AC, TargetLibraryInfo *TLI, 146 DominatorTree *DT, 147 const LoopAccessInfo *LAI) const { 148 return false; 149 } 150 151 bool emitGetActiveLaneMask() const { 152 return false; 153 } 154 155 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 156 IntrinsicInst &II) const { 157 return None; 158 } 159 160 Optional<Value *> 161 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 162 APInt DemandedMask, KnownBits &Known, 163 bool &KnownBitsComputed) const { 164 return None; 165 } 166 167 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 168 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 169 APInt &UndefElts2, APInt &UndefElts3, 170 std::function<void(Instruction *, unsigned, APInt, APInt &)> 171 SimplifyAndSetOp) const { 172 return None; 173 } 174 175 void getUnrollingPreferences(Loop *, ScalarEvolution &, 176 TTI::UnrollingPreferences &) const {} 177 178 void getPeelingPreferences(Loop *, ScalarEvolution &, 179 TTI::PeelingPreferences &) const {} 180 181 bool isLegalAddImmediate(int64_t Imm) const { return false; } 182 183 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 184 185 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 186 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 187 Instruction *I = nullptr) const { 188 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 189 // taken from the implementation of LSR. 190 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 191 } 192 193 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const { 194 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 195 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 196 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 197 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 198 } 199 200 bool isNumRegsMajorCostOfLSR() const { return true; } 201 202 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 203 204 bool canMacroFuseCmp() const { return false; } 205 206 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 207 DominatorTree *DT, AssumptionCache *AC, 208 TargetLibraryInfo *LibInfo) const { 209 return false; 210 } 211 212 bool shouldFavorPostInc() const { return false; } 213 214 bool shouldFavorBackedgeIndex(const Loop *L) const { return false; } 215 216 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 217 return false; 218 } 219 220 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 221 return false; 222 } 223 224 bool isLegalNTStore(Type *DataType, Align Alignment) const { 225 // By default, assume nontemporal memory stores are available for stores 226 // that are aligned and have a size that is a power of 2. 227 unsigned DataSize = DL.getTypeStoreSize(DataType); 228 return Alignment >= DataSize && isPowerOf2_32(DataSize); 229 } 230 231 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 232 // By default, assume nontemporal memory loads are available for loads that 233 // are aligned and have a size that is a power of 2. 234 unsigned DataSize = DL.getTypeStoreSize(DataType); 235 return Alignment >= DataSize && isPowerOf2_32(DataSize); 236 } 237 238 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 239 return false; 240 } 241 242 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 243 return false; 244 } 245 246 bool isLegalMaskedCompressStore(Type *DataType) const { return false; } 247 248 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } 249 250 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 251 252 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 253 return false; 254 } 255 256 bool prefersVectorizedAddressing() const { return true; } 257 258 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 259 bool HasBaseReg, int64_t Scale, 260 unsigned AddrSpace) const { 261 // Guess that all legal addressing mode are free. 262 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 263 AddrSpace)) 264 return 0; 265 return -1; 266 } 267 268 bool LSRWithInstrQueries() const { return false; } 269 270 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 271 272 bool isProfitableToHoist(Instruction *I) const { return true; } 273 274 bool useAA() const { return false; } 275 276 bool isTypeLegal(Type *Ty) const { return false; } 277 278 unsigned getRegUsageForType(Type *Ty) const { return 1; } 279 280 bool shouldBuildLookupTables() const { return true; } 281 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 282 283 bool useColdCCForColdCall(Function &F) const { return false; } 284 285 unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, 286 bool Insert, bool Extract) const { 287 return 0; 288 } 289 290 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 291 unsigned VF) const { 292 return 0; 293 } 294 295 bool supportsEfficientVectorElementLoadStore() const { return false; } 296 297 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 298 return false; 299 } 300 301 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 302 bool IsZeroCmp) const { 303 return {}; 304 } 305 306 bool enableInterleavedAccessVectorization() const { return false; } 307 308 bool enableMaskedInterleavedAccessVectorization() const { return false; } 309 310 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 311 312 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 313 unsigned AddressSpace, unsigned Alignment, 314 bool *Fast) const { 315 return false; 316 } 317 318 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 319 return TTI::PSK_Software; 320 } 321 322 bool haveFastSqrt(Type *Ty) const { return false; } 323 324 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 325 326 unsigned getFPOpCost(Type *Ty) const { 327 return TargetTransformInfo::TCC_Basic; 328 } 329 330 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 331 Type *Ty) const { 332 return 0; 333 } 334 335 unsigned getIntImmCost(const APInt &Imm, Type *Ty, 336 TTI::TargetCostKind CostKind) const { 337 return TTI::TCC_Basic; 338 } 339 340 unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, 341 Type *Ty, TTI::TargetCostKind CostKind, 342 Instruction *Inst = nullptr) const { 343 return TTI::TCC_Free; 344 } 345 346 unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 347 const APInt &Imm, Type *Ty, 348 TTI::TargetCostKind CostKind) const { 349 return TTI::TCC_Free; 350 } 351 352 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 353 354 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 355 return Vector ? 1 : 0; 356 }; 357 358 const char *getRegisterClassName(unsigned ClassID) const { 359 switch (ClassID) { 360 default: 361 return "Generic::Unknown Register Class"; 362 case 0: 363 return "Generic::ScalarRC"; 364 case 1: 365 return "Generic::VectorRC"; 366 } 367 } 368 369 unsigned getRegisterBitWidth(bool Vector) const { return 32; } 370 371 unsigned getMinVectorRegisterBitWidth() const { return 128; } 372 373 Optional<unsigned> getMaxVScale() const { return None; } 374 375 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } 376 377 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } 378 379 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } 380 381 bool shouldConsiderAddressTypePromotion( 382 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 383 AllowPromotionWithoutCommonHeader = false; 384 return false; 385 } 386 387 unsigned getCacheLineSize() const { return 0; } 388 389 llvm::Optional<unsigned> 390 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 391 switch (Level) { 392 case TargetTransformInfo::CacheLevel::L1D: 393 LLVM_FALLTHROUGH; 394 case TargetTransformInfo::CacheLevel::L2D: 395 return llvm::Optional<unsigned>(); 396 } 397 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 398 } 399 400 llvm::Optional<unsigned> 401 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 402 switch (Level) { 403 case TargetTransformInfo::CacheLevel::L1D: 404 LLVM_FALLTHROUGH; 405 case TargetTransformInfo::CacheLevel::L2D: 406 return llvm::Optional<unsigned>(); 407 } 408 409 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 410 } 411 412 unsigned getPrefetchDistance() const { return 0; } 413 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 414 unsigned NumStridedMemAccesses, 415 unsigned NumPrefetches, bool HasCall) const { 416 return 1; 417 } 418 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } 419 bool enableWritePrefetching() const { return false; } 420 421 unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } 422 423 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 424 TTI::TargetCostKind CostKind, 425 TTI::OperandValueKind Opd1Info, 426 TTI::OperandValueKind Opd2Info, 427 TTI::OperandValueProperties Opd1PropInfo, 428 TTI::OperandValueProperties Opd2PropInfo, 429 ArrayRef<const Value *> Args, 430 const Instruction *CxtI = nullptr) const { 431 // FIXME: A number of transformation tests seem to require these values 432 // which seems a little odd for how arbitary there are. 433 switch (Opcode) { 434 default: 435 break; 436 case Instruction::FDiv: 437 case Instruction::FRem: 438 case Instruction::SDiv: 439 case Instruction::SRem: 440 case Instruction::UDiv: 441 case Instruction::URem: 442 // FIXME: Unlikely to be true for CodeSize. 443 return TTI::TCC_Expensive; 444 } 445 return 1; 446 } 447 448 unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index, 449 VectorType *SubTp) const { 450 return 1; 451 } 452 453 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 454 TTI::CastContextHint CCH, 455 TTI::TargetCostKind CostKind, 456 const Instruction *I) const { 457 switch (Opcode) { 458 default: 459 break; 460 case Instruction::IntToPtr: { 461 unsigned SrcSize = Src->getScalarSizeInBits(); 462 if (DL.isLegalInteger(SrcSize) && 463 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 464 return 0; 465 break; 466 } 467 case Instruction::PtrToInt: { 468 unsigned DstSize = Dst->getScalarSizeInBits(); 469 if (DL.isLegalInteger(DstSize) && 470 DstSize >= DL.getPointerTypeSizeInBits(Src)) 471 return 0; 472 break; 473 } 474 case Instruction::BitCast: 475 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 476 // Identity and pointer-to-pointer casts are free. 477 return 0; 478 break; 479 case Instruction::Trunc: { 480 // trunc to a native type is free (assuming the target has compare and 481 // shift-right of the same width). 482 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 483 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize())) 484 return 0; 485 break; 486 } 487 } 488 return 1; 489 } 490 491 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, 492 VectorType *VecTy, unsigned Index) const { 493 return 1; 494 } 495 496 unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const { 497 // A phi would be free, unless we're costing the throughput because it 498 // will require a register. 499 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 500 return 0; 501 return 1; 502 } 503 504 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 505 CmpInst::Predicate VecPred, 506 TTI::TargetCostKind CostKind, 507 const Instruction *I) const { 508 return 1; 509 } 510 511 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 512 unsigned Index) const { 513 return 1; 514 } 515 516 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 517 unsigned AddressSpace, TTI::TargetCostKind CostKind, 518 const Instruction *I) const { 519 return 1; 520 } 521 522 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 523 unsigned AddressSpace, 524 TTI::TargetCostKind CostKind) const { 525 return 1; 526 } 527 528 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 529 const Value *Ptr, bool VariableMask, 530 Align Alignment, TTI::TargetCostKind CostKind, 531 const Instruction *I = nullptr) const { 532 return 1; 533 } 534 535 unsigned getInterleavedMemoryOpCost( 536 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 537 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 538 bool UseMaskForCond, bool UseMaskForGaps) const { 539 return 1; 540 } 541 542 unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 543 TTI::TargetCostKind CostKind) const { 544 switch (ICA.getID()) { 545 default: 546 break; 547 case Intrinsic::annotation: 548 case Intrinsic::assume: 549 case Intrinsic::sideeffect: 550 case Intrinsic::pseudoprobe: 551 case Intrinsic::dbg_declare: 552 case Intrinsic::dbg_value: 553 case Intrinsic::dbg_label: 554 case Intrinsic::invariant_start: 555 case Intrinsic::invariant_end: 556 case Intrinsic::launder_invariant_group: 557 case Intrinsic::strip_invariant_group: 558 case Intrinsic::is_constant: 559 case Intrinsic::lifetime_start: 560 case Intrinsic::lifetime_end: 561 case Intrinsic::experimental_noalias_scope_decl: 562 case Intrinsic::objectsize: 563 case Intrinsic::ptr_annotation: 564 case Intrinsic::var_annotation: 565 case Intrinsic::experimental_gc_result: 566 case Intrinsic::experimental_gc_relocate: 567 case Intrinsic::coro_alloc: 568 case Intrinsic::coro_begin: 569 case Intrinsic::coro_free: 570 case Intrinsic::coro_end: 571 case Intrinsic::coro_frame: 572 case Intrinsic::coro_size: 573 case Intrinsic::coro_suspend: 574 case Intrinsic::coro_param: 575 case Intrinsic::coro_subfn_addr: 576 // These intrinsics don't actually represent code after lowering. 577 return 0; 578 } 579 return 1; 580 } 581 582 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, 583 TTI::TargetCostKind CostKind) const { 584 return 1; 585 } 586 587 unsigned getNumberOfParts(Type *Tp) const { return 0; } 588 589 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, 590 const SCEV *) const { 591 return 0; 592 } 593 594 unsigned getArithmeticReductionCost(unsigned, VectorType *, bool, 595 TTI::TargetCostKind) const { 596 return 1; 597 } 598 599 unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool, 600 TTI::TargetCostKind) const { 601 return 1; 602 } 603 604 InstructionCost getExtendedAddReductionCost( 605 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, 606 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const { 607 return 1; 608 } 609 610 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 611 return 0; 612 } 613 614 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 615 return false; 616 } 617 618 unsigned getAtomicMemIntrinsicMaxElementSize() const { 619 // Note for overrides: You must ensure for all element unordered-atomic 620 // memory intrinsics that all power-of-2 element sizes up to, and 621 // including, the return value of this method have a corresponding 622 // runtime lib call. These runtime lib call definitions can be found 623 // in RuntimeLibcalls.h 624 return 0; 625 } 626 627 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 628 Type *ExpectedType) const { 629 return nullptr; 630 } 631 632 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 633 unsigned SrcAddrSpace, unsigned DestAddrSpace, 634 unsigned SrcAlign, unsigned DestAlign) const { 635 return Type::getInt8Ty(Context); 636 } 637 638 void getMemcpyLoopResidualLoweringType( 639 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 640 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 641 unsigned SrcAlign, unsigned DestAlign) const { 642 for (unsigned i = 0; i != RemainingBytes; ++i) 643 OpsOut.push_back(Type::getInt8Ty(Context)); 644 } 645 646 bool areInlineCompatible(const Function *Caller, 647 const Function *Callee) const { 648 return (Caller->getFnAttribute("target-cpu") == 649 Callee->getFnAttribute("target-cpu")) && 650 (Caller->getFnAttribute("target-features") == 651 Callee->getFnAttribute("target-features")); 652 } 653 654 bool areFunctionArgsABICompatible(const Function *Caller, 655 const Function *Callee, 656 SmallPtrSetImpl<Argument *> &Args) const { 657 return (Caller->getFnAttribute("target-cpu") == 658 Callee->getFnAttribute("target-cpu")) && 659 (Caller->getFnAttribute("target-features") == 660 Callee->getFnAttribute("target-features")); 661 } 662 663 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 664 const DataLayout &DL) const { 665 return false; 666 } 667 668 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 669 const DataLayout &DL) const { 670 return false; 671 } 672 673 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 674 675 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 676 677 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 678 679 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 680 unsigned AddrSpace) const { 681 return true; 682 } 683 684 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 685 unsigned AddrSpace) const { 686 return true; 687 } 688 689 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 690 unsigned ChainSizeInBytes, 691 VectorType *VecTy) const { 692 return VF; 693 } 694 695 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 696 unsigned ChainSizeInBytes, 697 VectorType *VecTy) const { 698 return VF; 699 } 700 701 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 702 TTI::ReductionFlags Flags) const { 703 return false; 704 } 705 706 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 707 TTI::ReductionFlags Flags) const { 708 return false; 709 } 710 711 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 712 TTI::ReductionFlags Flags) const { 713 return false; 714 } 715 716 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 717 718 unsigned getGISelRematGlobalCost() const { return 1; } 719 720 bool supportsScalableVectors() const { return false; } 721 722 bool hasActiveVectorLength() const { return false; } 723 724 protected: 725 // Obtain the minimum required size to hold the value (without the sign) 726 // In case of a vector it returns the min required size for one element. 727 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 728 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 729 const auto *VectorValue = cast<Constant>(Val); 730 731 // In case of a vector need to pick the max between the min 732 // required size for each element 733 auto *VT = cast<FixedVectorType>(Val->getType()); 734 735 // Assume unsigned elements 736 isSigned = false; 737 738 // The max required size is the size of the vector element type 739 unsigned MaxRequiredSize = 740 VT->getElementType()->getPrimitiveSizeInBits().getFixedSize(); 741 742 unsigned MinRequiredSize = 0; 743 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 744 if (auto *IntElement = 745 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 746 bool signedElement = IntElement->getValue().isNegative(); 747 // Get the element min required size. 748 unsigned ElementMinRequiredSize = 749 IntElement->getValue().getMinSignedBits() - 1; 750 // In case one element is signed then all the vector is signed. 751 isSigned |= signedElement; 752 // Save the max required bit size between all the elements. 753 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 754 } else { 755 // not an int constant element 756 return MaxRequiredSize; 757 } 758 } 759 return MinRequiredSize; 760 } 761 762 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 763 isSigned = CI->getValue().isNegative(); 764 return CI->getValue().getMinSignedBits() - 1; 765 } 766 767 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 768 isSigned = true; 769 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 770 } 771 772 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 773 isSigned = false; 774 return Cast->getSrcTy()->getScalarSizeInBits(); 775 } 776 777 isSigned = false; 778 return Val->getType()->getScalarSizeInBits(); 779 } 780 781 bool isStridedAccess(const SCEV *Ptr) const { 782 return Ptr && isa<SCEVAddRecExpr>(Ptr); 783 } 784 785 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 786 const SCEV *Ptr) const { 787 if (!isStridedAccess(Ptr)) 788 return nullptr; 789 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 790 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 791 } 792 793 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 794 int64_t MergeDistance) const { 795 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 796 if (!Step) 797 return false; 798 APInt StrideVal = Step->getAPInt(); 799 if (StrideVal.getBitWidth() > 64) 800 return false; 801 // FIXME: Need to take absolute value for negative stride case. 802 return StrideVal.getSExtValue() < MergeDistance; 803 } 804 }; 805 806 /// CRTP base class for use as a mix-in that aids implementing 807 /// a TargetTransformInfo-compatible class. 808 template <typename T> 809 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 810 private: 811 typedef TargetTransformInfoImplBase BaseT; 812 813 protected: 814 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 815 816 public: 817 using BaseT::getGEPCost; 818 819 int getGEPCost(Type *PointeeType, const Value *Ptr, 820 ArrayRef<const Value *> Operands, 821 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { 822 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 823 // TODO: will remove this when pointers have an opaque type. 824 assert(Ptr->getType()->getScalarType()->getPointerElementType() == 825 PointeeType && 826 "explicit pointee type doesn't match operand's pointee type"); 827 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 828 bool HasBaseReg = (BaseGV == nullptr); 829 830 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 831 APInt BaseOffset(PtrSizeBits, 0); 832 int64_t Scale = 0; 833 834 auto GTI = gep_type_begin(PointeeType, Operands); 835 Type *TargetType = nullptr; 836 837 // Handle the case where the GEP instruction has a single operand, 838 // the basis, therefore TargetType is a nullptr. 839 if (Operands.empty()) 840 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 841 842 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 843 TargetType = GTI.getIndexedType(); 844 // We assume that the cost of Scalar GEP with constant index and the 845 // cost of Vector GEP with splat constant index are the same. 846 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 847 if (!ConstIdx) 848 if (auto Splat = getSplatValue(*I)) 849 ConstIdx = dyn_cast<ConstantInt>(Splat); 850 if (StructType *STy = GTI.getStructTypeOrNull()) { 851 // For structures the index is always splat or scalar constant 852 assert(ConstIdx && "Unexpected GEP index"); 853 uint64_t Field = ConstIdx->getZExtValue(); 854 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 855 } else { 856 // If this operand is a scalable type, bail out early. 857 // TODO: handle scalable vectors 858 if (isa<ScalableVectorType>(TargetType)) 859 return TTI::TCC_Basic; 860 int64_t ElementSize = 861 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize(); 862 if (ConstIdx) { 863 BaseOffset += 864 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 865 } else { 866 // Needs scale register. 867 if (Scale != 0) 868 // No addressing mode takes two scale registers. 869 return TTI::TCC_Basic; 870 Scale = ElementSize; 871 } 872 } 873 } 874 875 if (static_cast<T *>(this)->isLegalAddressingMode( 876 TargetType, const_cast<GlobalValue *>(BaseGV), 877 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 878 Ptr->getType()->getPointerAddressSpace())) 879 return TTI::TCC_Free; 880 return TTI::TCC_Basic; 881 } 882 883 int getUserCost(const User *U, ArrayRef<const Value *> Operands, 884 TTI::TargetCostKind CostKind) { 885 auto *TargetTTI = static_cast<T *>(this); 886 // Handle non-intrinsic calls, invokes, and callbr. 887 // FIXME: Unlikely to be true for anything but CodeSize. 888 auto *CB = dyn_cast<CallBase>(U); 889 if (CB && !isa<IntrinsicInst>(U)) { 890 if (const Function *F = CB->getCalledFunction()) { 891 if (!TargetTTI->isLoweredToCall(F)) 892 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 893 894 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 895 } 896 // For indirect or other calls, scale cost by number of arguments. 897 return TTI::TCC_Basic * (CB->arg_size() + 1); 898 } 899 900 Type *Ty = U->getType(); 901 Type *OpTy = 902 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr; 903 unsigned Opcode = Operator::getOpcode(U); 904 auto *I = dyn_cast<Instruction>(U); 905 switch (Opcode) { 906 default: 907 break; 908 case Instruction::Call: { 909 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 910 auto *Intrinsic = cast<IntrinsicInst>(U); 911 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 912 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 913 } 914 case Instruction::Br: 915 case Instruction::Ret: 916 case Instruction::PHI: 917 return TargetTTI->getCFInstrCost(Opcode, CostKind); 918 case Instruction::ExtractValue: 919 case Instruction::Freeze: 920 return TTI::TCC_Free; 921 case Instruction::Alloca: 922 if (cast<AllocaInst>(U)->isStaticAlloca()) 923 return TTI::TCC_Free; 924 break; 925 case Instruction::GetElementPtr: { 926 const GEPOperator *GEP = cast<GEPOperator>(U); 927 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 928 GEP->getPointerOperand(), 929 Operands.drop_front()); 930 } 931 case Instruction::Add: 932 case Instruction::FAdd: 933 case Instruction::Sub: 934 case Instruction::FSub: 935 case Instruction::Mul: 936 case Instruction::FMul: 937 case Instruction::UDiv: 938 case Instruction::SDiv: 939 case Instruction::FDiv: 940 case Instruction::URem: 941 case Instruction::SRem: 942 case Instruction::FRem: 943 case Instruction::Shl: 944 case Instruction::LShr: 945 case Instruction::AShr: 946 case Instruction::And: 947 case Instruction::Or: 948 case Instruction::Xor: 949 case Instruction::FNeg: { 950 TTI::OperandValueProperties Op1VP = TTI::OP_None; 951 TTI::OperandValueProperties Op2VP = TTI::OP_None; 952 TTI::OperandValueKind Op1VK = 953 TTI::getOperandInfo(U->getOperand(0), Op1VP); 954 TTI::OperandValueKind Op2VK = Opcode != Instruction::FNeg ? 955 TTI::getOperandInfo(U->getOperand(1), Op2VP) : TTI::OK_AnyValue; 956 SmallVector<const Value *, 2> Operands(U->operand_values()); 957 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, 958 Op1VK, Op2VK, 959 Op1VP, Op2VP, Operands, I); 960 } 961 case Instruction::IntToPtr: 962 case Instruction::PtrToInt: 963 case Instruction::SIToFP: 964 case Instruction::UIToFP: 965 case Instruction::FPToUI: 966 case Instruction::FPToSI: 967 case Instruction::Trunc: 968 case Instruction::FPTrunc: 969 case Instruction::BitCast: 970 case Instruction::FPExt: 971 case Instruction::SExt: 972 case Instruction::ZExt: 973 case Instruction::AddrSpaceCast: 974 return TargetTTI->getCastInstrCost( 975 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 976 case Instruction::Store: { 977 auto *SI = cast<StoreInst>(U); 978 Type *ValTy = U->getOperand(0)->getType(); 979 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 980 SI->getPointerAddressSpace(), 981 CostKind, I); 982 } 983 case Instruction::Load: { 984 auto *LI = cast<LoadInst>(U); 985 return TargetTTI->getMemoryOpCost(Opcode, U->getType(), LI->getAlign(), 986 LI->getPointerAddressSpace(), 987 CostKind, I); 988 } 989 case Instruction::Select: { 990 Type *CondTy = U->getOperand(0)->getType(); 991 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 992 CmpInst::BAD_ICMP_PREDICATE, 993 CostKind, I); 994 } 995 case Instruction::ICmp: 996 case Instruction::FCmp: { 997 Type *ValTy = U->getOperand(0)->getType(); 998 // TODO: Also handle ICmp/FCmp constant expressions. 999 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1000 I ? cast<CmpInst>(I)->getPredicate() 1001 : CmpInst::BAD_ICMP_PREDICATE, 1002 CostKind, I); 1003 } 1004 case Instruction::InsertElement: { 1005 auto *IE = dyn_cast<InsertElementInst>(U); 1006 if (!IE) 1007 return TTI::TCC_Basic; // FIXME 1008 auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 1009 unsigned Idx = CI ? CI->getZExtValue() : -1; 1010 return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx); 1011 } 1012 case Instruction::ShuffleVector: { 1013 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1014 if (!Shuffle) 1015 return TTI::TCC_Basic; // FIXME 1016 auto *VecTy = cast<VectorType>(U->getType()); 1017 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType()); 1018 1019 // TODO: Identify and add costs for insert subvector, etc. 1020 int SubIndex; 1021 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1022 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1023 SubIndex, VecTy); 1024 else if (Shuffle->changesLength()) 1025 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; 1026 else if (Shuffle->isIdentity()) 1027 return 0; 1028 else if (Shuffle->isReverse()) 1029 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 0, nullptr); 1030 else if (Shuffle->isSelect()) 1031 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 0, nullptr); 1032 else if (Shuffle->isTranspose()) 1033 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 0, nullptr); 1034 else if (Shuffle->isZeroEltSplat()) 1035 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 0, nullptr); 1036 else if (Shuffle->isSingleSource()) 1037 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 0, 1038 nullptr); 1039 1040 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 0, 1041 nullptr); 1042 } 1043 case Instruction::ExtractElement: { 1044 unsigned Idx = -1; 1045 auto *EEI = dyn_cast<ExtractElementInst>(U); 1046 if (!EEI) 1047 return TTI::TCC_Basic; // FIXME 1048 1049 auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)); 1050 if (CI) 1051 Idx = CI->getZExtValue(); 1052 1053 // Try to match a reduction (a series of shufflevector and vector ops 1054 // followed by an extractelement). 1055 unsigned RdxOpcode; 1056 VectorType *RdxType; 1057 bool IsPairwise; 1058 switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) { 1059 case TTI::RK_Arithmetic: 1060 return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType, 1061 IsPairwise, CostKind); 1062 case TTI::RK_MinMax: 1063 return TargetTTI->getMinMaxReductionCost( 1064 RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), 1065 IsPairwise, /*IsUnsigned=*/false, CostKind); 1066 case TTI::RK_UnsignedMinMax: 1067 return TargetTTI->getMinMaxReductionCost( 1068 RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)), 1069 IsPairwise, /*IsUnsigned=*/true, CostKind); 1070 case TTI::RK_None: 1071 break; 1072 } 1073 return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(), 1074 Idx); 1075 } 1076 } 1077 // By default, just classify everything as 'basic'. 1078 return TTI::TCC_Basic; 1079 } 1080 1081 int getInstructionLatency(const Instruction *I) { 1082 SmallVector<const Value *, 4> Operands(I->operand_values()); 1083 if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) 1084 return 0; 1085 1086 if (isa<LoadInst>(I)) 1087 return 4; 1088 1089 Type *DstTy = I->getType(); 1090 1091 // Usually an intrinsic is a simple instruction. 1092 // A real function call is much slower. 1093 if (auto *CI = dyn_cast<CallInst>(I)) { 1094 const Function *F = CI->getCalledFunction(); 1095 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 1096 return 40; 1097 // Some intrinsics return a value and a flag, we use the value type 1098 // to decide its latency. 1099 if (StructType *StructTy = dyn_cast<StructType>(DstTy)) 1100 DstTy = StructTy->getElementType(0); 1101 // Fall through to simple instructions. 1102 } 1103 1104 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 1105 DstTy = VectorTy->getElementType(); 1106 if (DstTy->isFloatingPointTy()) 1107 return 3; 1108 1109 return 1; 1110 } 1111 }; 1112 } // namespace llvm 1113 1114 #endif 1115