1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/GetElementPtrTypeIterator.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/Operator.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/IR/Type.h" 27 28 using namespace llvm::PatternMatch; 29 30 namespace llvm { 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 protected: 36 typedef TargetTransformInfo TTI; 37 38 const DataLayout &DL; 39 40 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 41 42 public: 43 // Provide value semantics. MSVC requires that we spell all of these out. 44 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) 45 : DL(Arg.DL) {} 46 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 47 48 const DataLayout &getDataLayout() const { return DL; } 49 50 InstructionCost 51 getGEPCost(Type *PointeeType, const Value *Ptr, 52 ArrayRef<const Value *> Operands, 53 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const { 54 // In the basic model, we just assume that all-constant GEPs will be folded 55 // into their uses via addressing modes. 56 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 57 if (!isa<Constant>(Operands[Idx])) 58 return TTI::TCC_Basic; 59 60 return TTI::TCC_Free; 61 } 62 63 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 64 unsigned &JTSize, 65 ProfileSummaryInfo *PSI, 66 BlockFrequencyInfo *BFI) const { 67 (void)PSI; 68 (void)BFI; 69 JTSize = 0; 70 return SI.getNumCases(); 71 } 72 73 unsigned getInliningThresholdMultiplier() const { return 1; } 74 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } 75 76 int getInlinerVectorBonusPercent() const { return 150; } 77 78 InstructionCost getMemcpyCost(const Instruction *I) const { 79 return TTI::TCC_Expensive; 80 } 81 82 // Although this default value is arbitrary, it is not random. It is assumed 83 // that a condition that evaluates the same way by a higher percentage than 84 // this is best represented as control flow. Therefore, the default value N 85 // should be set such that the win from N% correct executions is greater than 86 // the loss from (100 - N)% mispredicted executions for the majority of 87 // intended targets. 88 BranchProbability getPredictableBranchThreshold() const { 89 return BranchProbability(99, 100); 90 } 91 92 bool hasBranchDivergence() const { return false; } 93 94 bool useGPUDivergenceAnalysis() const { return false; } 95 96 bool isSourceOfDivergence(const Value *V) const { return false; } 97 98 bool isAlwaysUniform(const Value *V) const { return false; } 99 100 unsigned getFlatAddressSpace() const { return -1; } 101 102 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 103 Intrinsic::ID IID) const { 104 return false; 105 } 106 107 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 108 109 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 110 111 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 112 Value *NewV) const { 113 return nullptr; 114 } 115 116 bool isLoweredToCall(const Function *F) const { 117 assert(F && "A concrete function must be provided to this routine."); 118 119 // FIXME: These should almost certainly not be handled here, and instead 120 // handled with the help of TLI or the target itself. This was largely 121 // ported from existing analysis heuristics here so that such refactorings 122 // can take place in the future. 123 124 if (F->isIntrinsic()) 125 return false; 126 127 if (F->hasLocalLinkage() || !F->hasName()) 128 return true; 129 130 StringRef Name = F->getName(); 131 132 // These will all likely lower to a single selection DAG node. 133 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 134 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 135 Name == "fmin" || Name == "fminf" || Name == "fminl" || 136 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 137 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 138 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 139 return false; 140 141 // These are all likely to be optimized into something smaller. 142 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 143 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 144 Name == "floorf" || Name == "ceil" || Name == "round" || 145 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 146 Name == "llabs") 147 return false; 148 149 return true; 150 } 151 152 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 153 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 154 HardwareLoopInfo &HWLoopInfo) const { 155 return false; 156 } 157 158 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 159 AssumptionCache &AC, TargetLibraryInfo *TLI, 160 DominatorTree *DT, 161 const LoopAccessInfo *LAI) const { 162 return false; 163 } 164 165 bool emitGetActiveLaneMask() const { 166 return false; 167 } 168 169 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 170 IntrinsicInst &II) const { 171 return None; 172 } 173 174 Optional<Value *> 175 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 176 APInt DemandedMask, KnownBits &Known, 177 bool &KnownBitsComputed) const { 178 return None; 179 } 180 181 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 182 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 183 APInt &UndefElts2, APInt &UndefElts3, 184 std::function<void(Instruction *, unsigned, APInt, APInt &)> 185 SimplifyAndSetOp) const { 186 return None; 187 } 188 189 void getUnrollingPreferences(Loop *, ScalarEvolution &, 190 TTI::UnrollingPreferences &) const {} 191 192 void getPeelingPreferences(Loop *, ScalarEvolution &, 193 TTI::PeelingPreferences &) const {} 194 195 bool isLegalAddImmediate(int64_t Imm) const { return false; } 196 197 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 198 199 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 200 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 201 Instruction *I = nullptr) const { 202 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 203 // taken from the implementation of LSR. 204 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 205 } 206 207 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const { 208 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 209 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 210 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 211 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 212 } 213 214 bool isNumRegsMajorCostOfLSR() const { return true; } 215 216 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 217 218 bool canMacroFuseCmp() const { return false; } 219 220 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 221 DominatorTree *DT, AssumptionCache *AC, 222 TargetLibraryInfo *LibInfo) const { 223 return false; 224 } 225 226 TTI::AddressingModeKind 227 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 228 return TTI::AMK_None; 229 } 230 231 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 232 return false; 233 } 234 235 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 236 return false; 237 } 238 239 bool isLegalNTStore(Type *DataType, Align Alignment) const { 240 // By default, assume nontemporal memory stores are available for stores 241 // that are aligned and have a size that is a power of 2. 242 unsigned DataSize = DL.getTypeStoreSize(DataType); 243 return Alignment >= DataSize && isPowerOf2_32(DataSize); 244 } 245 246 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 247 // By default, assume nontemporal memory loads are available for loads that 248 // are aligned and have a size that is a power of 2. 249 unsigned DataSize = DL.getTypeStoreSize(DataType); 250 return Alignment >= DataSize && isPowerOf2_32(DataSize); 251 } 252 253 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 254 return false; 255 } 256 257 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 258 return false; 259 } 260 261 bool isLegalMaskedCompressStore(Type *DataType) const { return false; } 262 263 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } 264 265 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 266 267 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 268 return false; 269 } 270 271 bool prefersVectorizedAddressing() const { return true; } 272 273 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 274 int64_t BaseOffset, bool HasBaseReg, 275 int64_t Scale, 276 unsigned AddrSpace) const { 277 // Guess that all legal addressing mode are free. 278 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 279 AddrSpace)) 280 return 0; 281 return -1; 282 } 283 284 bool LSRWithInstrQueries() const { return false; } 285 286 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 287 288 bool isProfitableToHoist(Instruction *I) const { return true; } 289 290 bool useAA() const { return false; } 291 292 bool isTypeLegal(Type *Ty) const { return false; } 293 294 InstructionCost getRegUsageForType(Type *Ty) const { return 1; } 295 296 bool shouldBuildLookupTables() const { return true; } 297 298 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 299 300 bool shouldBuildRelLookupTables() const { return false; } 301 302 bool useColdCCForColdCall(Function &F) const { return false; } 303 304 InstructionCost getScalarizationOverhead(VectorType *Ty, 305 const APInt &DemandedElts, 306 bool Insert, bool Extract) const { 307 return 0; 308 } 309 310 InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 311 ArrayRef<Type *> Tys) const { 312 return 0; 313 } 314 315 bool supportsEfficientVectorElementLoadStore() const { return false; } 316 317 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 318 return false; 319 } 320 321 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 322 bool IsZeroCmp) const { 323 return {}; 324 } 325 326 bool enableInterleavedAccessVectorization() const { return false; } 327 328 bool enableMaskedInterleavedAccessVectorization() const { return false; } 329 330 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 331 332 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 333 unsigned AddressSpace, Align Alignment, 334 bool *Fast) const { 335 return false; 336 } 337 338 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 339 return TTI::PSK_Software; 340 } 341 342 bool haveFastSqrt(Type *Ty) const { return false; } 343 344 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 345 346 InstructionCost getFPOpCost(Type *Ty) const { 347 return TargetTransformInfo::TCC_Basic; 348 } 349 350 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 351 const APInt &Imm, Type *Ty) const { 352 return 0; 353 } 354 355 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 356 TTI::TargetCostKind CostKind) const { 357 return TTI::TCC_Basic; 358 } 359 360 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 361 const APInt &Imm, Type *Ty, 362 TTI::TargetCostKind CostKind, 363 Instruction *Inst = nullptr) const { 364 return TTI::TCC_Free; 365 } 366 367 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 368 const APInt &Imm, Type *Ty, 369 TTI::TargetCostKind CostKind) const { 370 return TTI::TCC_Free; 371 } 372 373 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 374 375 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 376 return Vector ? 1 : 0; 377 }; 378 379 const char *getRegisterClassName(unsigned ClassID) const { 380 switch (ClassID) { 381 default: 382 return "Generic::Unknown Register Class"; 383 case 0: 384 return "Generic::ScalarRC"; 385 case 1: 386 return "Generic::VectorRC"; 387 } 388 } 389 390 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 391 return TypeSize::getFixed(32); 392 } 393 394 unsigned getMinVectorRegisterBitWidth() const { return 128; } 395 396 Optional<unsigned> getMaxVScale() const { return None; } 397 398 bool shouldMaximizeVectorBandwidth() const { return false; } 399 400 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 401 return ElementCount::get(0, IsScalable); 402 } 403 404 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } 405 406 bool shouldConsiderAddressTypePromotion( 407 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 408 AllowPromotionWithoutCommonHeader = false; 409 return false; 410 } 411 412 unsigned getCacheLineSize() const { return 0; } 413 414 llvm::Optional<unsigned> 415 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 416 switch (Level) { 417 case TargetTransformInfo::CacheLevel::L1D: 418 LLVM_FALLTHROUGH; 419 case TargetTransformInfo::CacheLevel::L2D: 420 return llvm::Optional<unsigned>(); 421 } 422 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 423 } 424 425 llvm::Optional<unsigned> 426 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 427 switch (Level) { 428 case TargetTransformInfo::CacheLevel::L1D: 429 LLVM_FALLTHROUGH; 430 case TargetTransformInfo::CacheLevel::L2D: 431 return llvm::Optional<unsigned>(); 432 } 433 434 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 435 } 436 437 unsigned getPrefetchDistance() const { return 0; } 438 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 439 unsigned NumStridedMemAccesses, 440 unsigned NumPrefetches, bool HasCall) const { 441 return 1; 442 } 443 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } 444 bool enableWritePrefetching() const { return false; } 445 446 unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } 447 448 InstructionCost getArithmeticInstrCost( 449 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 450 TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, 451 TTI::OperandValueProperties Opd1PropInfo, 452 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, 453 const Instruction *CxtI = nullptr) const { 454 // FIXME: A number of transformation tests seem to require these values 455 // which seems a little odd for how arbitary there are. 456 switch (Opcode) { 457 default: 458 break; 459 case Instruction::FDiv: 460 case Instruction::FRem: 461 case Instruction::SDiv: 462 case Instruction::SRem: 463 case Instruction::UDiv: 464 case Instruction::URem: 465 // FIXME: Unlikely to be true for CodeSize. 466 return TTI::TCC_Expensive; 467 } 468 return 1; 469 } 470 471 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, 472 ArrayRef<int> Mask, int Index, 473 VectorType *SubTp) const { 474 return 1; 475 } 476 477 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 478 TTI::CastContextHint CCH, 479 TTI::TargetCostKind CostKind, 480 const Instruction *I) const { 481 switch (Opcode) { 482 default: 483 break; 484 case Instruction::IntToPtr: { 485 unsigned SrcSize = Src->getScalarSizeInBits(); 486 if (DL.isLegalInteger(SrcSize) && 487 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 488 return 0; 489 break; 490 } 491 case Instruction::PtrToInt: { 492 unsigned DstSize = Dst->getScalarSizeInBits(); 493 if (DL.isLegalInteger(DstSize) && 494 DstSize >= DL.getPointerTypeSizeInBits(Src)) 495 return 0; 496 break; 497 } 498 case Instruction::BitCast: 499 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 500 // Identity and pointer-to-pointer casts are free. 501 return 0; 502 break; 503 case Instruction::Trunc: { 504 // trunc to a native type is free (assuming the target has compare and 505 // shift-right of the same width). 506 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 507 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize())) 508 return 0; 509 break; 510 } 511 } 512 return 1; 513 } 514 515 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 516 VectorType *VecTy, 517 unsigned Index) const { 518 return 1; 519 } 520 521 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 522 const Instruction *I = nullptr) const { 523 // A phi would be free, unless we're costing the throughput because it 524 // will require a register. 525 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 526 return 0; 527 return 1; 528 } 529 530 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 531 CmpInst::Predicate VecPred, 532 TTI::TargetCostKind CostKind, 533 const Instruction *I) const { 534 return 1; 535 } 536 537 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 538 unsigned Index) const { 539 return 1; 540 } 541 542 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 543 unsigned AddressSpace, 544 TTI::TargetCostKind CostKind, 545 const Instruction *I) const { 546 return 1; 547 } 548 549 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 550 Align Alignment, unsigned AddressSpace, 551 TTI::TargetCostKind CostKind) const { 552 return 1; 553 } 554 555 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 556 const Value *Ptr, bool VariableMask, 557 Align Alignment, 558 TTI::TargetCostKind CostKind, 559 const Instruction *I = nullptr) const { 560 return 1; 561 } 562 563 unsigned getInterleavedMemoryOpCost( 564 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 565 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 566 bool UseMaskForCond, bool UseMaskForGaps) const { 567 return 1; 568 } 569 570 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 571 TTI::TargetCostKind CostKind) const { 572 switch (ICA.getID()) { 573 default: 574 break; 575 case Intrinsic::annotation: 576 case Intrinsic::assume: 577 case Intrinsic::sideeffect: 578 case Intrinsic::pseudoprobe: 579 case Intrinsic::arithmetic_fence: 580 case Intrinsic::dbg_declare: 581 case Intrinsic::dbg_value: 582 case Intrinsic::dbg_label: 583 case Intrinsic::invariant_start: 584 case Intrinsic::invariant_end: 585 case Intrinsic::launder_invariant_group: 586 case Intrinsic::strip_invariant_group: 587 case Intrinsic::is_constant: 588 case Intrinsic::lifetime_start: 589 case Intrinsic::lifetime_end: 590 case Intrinsic::experimental_noalias_scope_decl: 591 case Intrinsic::objectsize: 592 case Intrinsic::ptr_annotation: 593 case Intrinsic::var_annotation: 594 case Intrinsic::experimental_gc_result: 595 case Intrinsic::experimental_gc_relocate: 596 case Intrinsic::coro_alloc: 597 case Intrinsic::coro_begin: 598 case Intrinsic::coro_free: 599 case Intrinsic::coro_end: 600 case Intrinsic::coro_frame: 601 case Intrinsic::coro_size: 602 case Intrinsic::coro_suspend: 603 case Intrinsic::coro_param: 604 case Intrinsic::coro_subfn_addr: 605 // These intrinsics don't actually represent code after lowering. 606 return 0; 607 } 608 return 1; 609 } 610 611 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 612 ArrayRef<Type *> Tys, 613 TTI::TargetCostKind CostKind) const { 614 return 1; 615 } 616 617 unsigned getNumberOfParts(Type *Tp) const { return 0; } 618 619 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 620 const SCEV *) const { 621 return 0; 622 } 623 624 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, 625 Optional<FastMathFlags> FMF, 626 TTI::TargetCostKind) const { 627 return 1; 628 } 629 630 InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, 631 TTI::TargetCostKind) const { 632 return 1; 633 } 634 635 InstructionCost getExtendedAddReductionCost( 636 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, 637 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const { 638 return 1; 639 } 640 641 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 642 return 0; 643 } 644 645 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 646 return false; 647 } 648 649 unsigned getAtomicMemIntrinsicMaxElementSize() const { 650 // Note for overrides: You must ensure for all element unordered-atomic 651 // memory intrinsics that all power-of-2 element sizes up to, and 652 // including, the return value of this method have a corresponding 653 // runtime lib call. These runtime lib call definitions can be found 654 // in RuntimeLibcalls.h 655 return 0; 656 } 657 658 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 659 Type *ExpectedType) const { 660 return nullptr; 661 } 662 663 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 664 unsigned SrcAddrSpace, unsigned DestAddrSpace, 665 unsigned SrcAlign, unsigned DestAlign) const { 666 return Type::getInt8Ty(Context); 667 } 668 669 void getMemcpyLoopResidualLoweringType( 670 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 671 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 672 unsigned SrcAlign, unsigned DestAlign) const { 673 for (unsigned i = 0; i != RemainingBytes; ++i) 674 OpsOut.push_back(Type::getInt8Ty(Context)); 675 } 676 677 bool areInlineCompatible(const Function *Caller, 678 const Function *Callee) const { 679 return (Caller->getFnAttribute("target-cpu") == 680 Callee->getFnAttribute("target-cpu")) && 681 (Caller->getFnAttribute("target-features") == 682 Callee->getFnAttribute("target-features")); 683 } 684 685 bool areFunctionArgsABICompatible(const Function *Caller, 686 const Function *Callee, 687 SmallPtrSetImpl<Argument *> &Args) const { 688 return (Caller->getFnAttribute("target-cpu") == 689 Callee->getFnAttribute("target-cpu")) && 690 (Caller->getFnAttribute("target-features") == 691 Callee->getFnAttribute("target-features")); 692 } 693 694 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 695 const DataLayout &DL) const { 696 return false; 697 } 698 699 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 700 const DataLayout &DL) const { 701 return false; 702 } 703 704 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 705 706 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 707 708 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 709 710 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 711 unsigned AddrSpace) const { 712 return true; 713 } 714 715 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 716 unsigned AddrSpace) const { 717 return true; 718 } 719 720 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 721 ElementCount VF) const { 722 return true; 723 } 724 725 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } 726 727 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 728 unsigned ChainSizeInBytes, 729 VectorType *VecTy) const { 730 return VF; 731 } 732 733 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 734 unsigned ChainSizeInBytes, 735 VectorType *VecTy) const { 736 return VF; 737 } 738 739 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 740 TTI::ReductionFlags Flags) const { 741 return false; 742 } 743 744 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 745 TTI::ReductionFlags Flags) const { 746 return false; 747 } 748 749 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 750 751 unsigned getGISelRematGlobalCost() const { return 1; } 752 753 bool supportsScalableVectors() const { return false; } 754 755 bool hasActiveVectorLength() const { return false; } 756 757 TargetTransformInfo::VPLegalization 758 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 759 return TargetTransformInfo::VPLegalization( 760 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 761 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 762 } 763 764 protected: 765 // Obtain the minimum required size to hold the value (without the sign) 766 // In case of a vector it returns the min required size for one element. 767 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 768 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 769 const auto *VectorValue = cast<Constant>(Val); 770 771 // In case of a vector need to pick the max between the min 772 // required size for each element 773 auto *VT = cast<FixedVectorType>(Val->getType()); 774 775 // Assume unsigned elements 776 isSigned = false; 777 778 // The max required size is the size of the vector element type 779 unsigned MaxRequiredSize = 780 VT->getElementType()->getPrimitiveSizeInBits().getFixedSize(); 781 782 unsigned MinRequiredSize = 0; 783 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 784 if (auto *IntElement = 785 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 786 bool signedElement = IntElement->getValue().isNegative(); 787 // Get the element min required size. 788 unsigned ElementMinRequiredSize = 789 IntElement->getValue().getMinSignedBits() - 1; 790 // In case one element is signed then all the vector is signed. 791 isSigned |= signedElement; 792 // Save the max required bit size between all the elements. 793 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 794 } else { 795 // not an int constant element 796 return MaxRequiredSize; 797 } 798 } 799 return MinRequiredSize; 800 } 801 802 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 803 isSigned = CI->getValue().isNegative(); 804 return CI->getValue().getMinSignedBits() - 1; 805 } 806 807 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 808 isSigned = true; 809 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 810 } 811 812 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 813 isSigned = false; 814 return Cast->getSrcTy()->getScalarSizeInBits(); 815 } 816 817 isSigned = false; 818 return Val->getType()->getScalarSizeInBits(); 819 } 820 821 bool isStridedAccess(const SCEV *Ptr) const { 822 return Ptr && isa<SCEVAddRecExpr>(Ptr); 823 } 824 825 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 826 const SCEV *Ptr) const { 827 if (!isStridedAccess(Ptr)) 828 return nullptr; 829 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 830 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 831 } 832 833 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 834 int64_t MergeDistance) const { 835 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 836 if (!Step) 837 return false; 838 APInt StrideVal = Step->getAPInt(); 839 if (StrideVal.getBitWidth() > 64) 840 return false; 841 // FIXME: Need to take absolute value for negative stride case. 842 return StrideVal.getSExtValue() < MergeDistance; 843 } 844 }; 845 846 /// CRTP base class for use as a mix-in that aids implementing 847 /// a TargetTransformInfo-compatible class. 848 template <typename T> 849 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 850 private: 851 typedef TargetTransformInfoImplBase BaseT; 852 853 protected: 854 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 855 856 public: 857 using BaseT::getGEPCost; 858 859 InstructionCost 860 getGEPCost(Type *PointeeType, const Value *Ptr, 861 ArrayRef<const Value *> Operands, 862 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { 863 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 864 assert(cast<PointerType>(Ptr->getType()->getScalarType()) 865 ->isOpaqueOrPointeeTypeMatches(PointeeType) && 866 "explicit pointee type doesn't match operand's pointee type"); 867 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 868 bool HasBaseReg = (BaseGV == nullptr); 869 870 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 871 APInt BaseOffset(PtrSizeBits, 0); 872 int64_t Scale = 0; 873 874 auto GTI = gep_type_begin(PointeeType, Operands); 875 Type *TargetType = nullptr; 876 877 // Handle the case where the GEP instruction has a single operand, 878 // the basis, therefore TargetType is a nullptr. 879 if (Operands.empty()) 880 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 881 882 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 883 TargetType = GTI.getIndexedType(); 884 // We assume that the cost of Scalar GEP with constant index and the 885 // cost of Vector GEP with splat constant index are the same. 886 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 887 if (!ConstIdx) 888 if (auto Splat = getSplatValue(*I)) 889 ConstIdx = dyn_cast<ConstantInt>(Splat); 890 if (StructType *STy = GTI.getStructTypeOrNull()) { 891 // For structures the index is always splat or scalar constant 892 assert(ConstIdx && "Unexpected GEP index"); 893 uint64_t Field = ConstIdx->getZExtValue(); 894 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 895 } else { 896 // If this operand is a scalable type, bail out early. 897 // TODO: handle scalable vectors 898 if (isa<ScalableVectorType>(TargetType)) 899 return TTI::TCC_Basic; 900 int64_t ElementSize = 901 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize(); 902 if (ConstIdx) { 903 BaseOffset += 904 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 905 } else { 906 // Needs scale register. 907 if (Scale != 0) 908 // No addressing mode takes two scale registers. 909 return TTI::TCC_Basic; 910 Scale = ElementSize; 911 } 912 } 913 } 914 915 if (static_cast<T *>(this)->isLegalAddressingMode( 916 TargetType, const_cast<GlobalValue *>(BaseGV), 917 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 918 Ptr->getType()->getPointerAddressSpace())) 919 return TTI::TCC_Free; 920 return TTI::TCC_Basic; 921 } 922 923 InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, 924 TTI::TargetCostKind CostKind) { 925 auto *TargetTTI = static_cast<T *>(this); 926 // Handle non-intrinsic calls, invokes, and callbr. 927 // FIXME: Unlikely to be true for anything but CodeSize. 928 auto *CB = dyn_cast<CallBase>(U); 929 if (CB && !isa<IntrinsicInst>(U)) { 930 if (const Function *F = CB->getCalledFunction()) { 931 if (!TargetTTI->isLoweredToCall(F)) 932 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 933 934 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 935 } 936 // For indirect or other calls, scale cost by number of arguments. 937 return TTI::TCC_Basic * (CB->arg_size() + 1); 938 } 939 940 Type *Ty = U->getType(); 941 Type *OpTy = 942 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr; 943 unsigned Opcode = Operator::getOpcode(U); 944 auto *I = dyn_cast<Instruction>(U); 945 switch (Opcode) { 946 default: 947 break; 948 case Instruction::Call: { 949 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 950 auto *Intrinsic = cast<IntrinsicInst>(U); 951 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 952 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 953 } 954 case Instruction::Br: 955 case Instruction::Ret: 956 case Instruction::PHI: 957 case Instruction::Switch: 958 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 959 case Instruction::ExtractValue: 960 case Instruction::Freeze: 961 return TTI::TCC_Free; 962 case Instruction::Alloca: 963 if (cast<AllocaInst>(U)->isStaticAlloca()) 964 return TTI::TCC_Free; 965 break; 966 case Instruction::GetElementPtr: { 967 const GEPOperator *GEP = cast<GEPOperator>(U); 968 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 969 GEP->getPointerOperand(), 970 Operands.drop_front()); 971 } 972 case Instruction::Add: 973 case Instruction::FAdd: 974 case Instruction::Sub: 975 case Instruction::FSub: 976 case Instruction::Mul: 977 case Instruction::FMul: 978 case Instruction::UDiv: 979 case Instruction::SDiv: 980 case Instruction::FDiv: 981 case Instruction::URem: 982 case Instruction::SRem: 983 case Instruction::FRem: 984 case Instruction::Shl: 985 case Instruction::LShr: 986 case Instruction::AShr: 987 case Instruction::And: 988 case Instruction::Or: 989 case Instruction::Xor: 990 case Instruction::FNeg: { 991 TTI::OperandValueProperties Op1VP = TTI::OP_None; 992 TTI::OperandValueProperties Op2VP = TTI::OP_None; 993 TTI::OperandValueKind Op1VK = 994 TTI::getOperandInfo(U->getOperand(0), Op1VP); 995 TTI::OperandValueKind Op2VK = Opcode != Instruction::FNeg ? 996 TTI::getOperandInfo(U->getOperand(1), Op2VP) : TTI::OK_AnyValue; 997 SmallVector<const Value *, 2> Operands(U->operand_values()); 998 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, 999 Op1VK, Op2VK, 1000 Op1VP, Op2VP, Operands, I); 1001 } 1002 case Instruction::IntToPtr: 1003 case Instruction::PtrToInt: 1004 case Instruction::SIToFP: 1005 case Instruction::UIToFP: 1006 case Instruction::FPToUI: 1007 case Instruction::FPToSI: 1008 case Instruction::Trunc: 1009 case Instruction::FPTrunc: 1010 case Instruction::BitCast: 1011 case Instruction::FPExt: 1012 case Instruction::SExt: 1013 case Instruction::ZExt: 1014 case Instruction::AddrSpaceCast: 1015 return TargetTTI->getCastInstrCost( 1016 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1017 case Instruction::Store: { 1018 auto *SI = cast<StoreInst>(U); 1019 Type *ValTy = U->getOperand(0)->getType(); 1020 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1021 SI->getPointerAddressSpace(), 1022 CostKind, I); 1023 } 1024 case Instruction::Load: { 1025 auto *LI = cast<LoadInst>(U); 1026 return TargetTTI->getMemoryOpCost(Opcode, U->getType(), LI->getAlign(), 1027 LI->getPointerAddressSpace(), 1028 CostKind, I); 1029 } 1030 case Instruction::Select: { 1031 const Value *Op0, *Op1; 1032 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1033 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1034 // select x, y, false --> x & y 1035 // select x, true, y --> x | y 1036 TTI::OperandValueProperties Op1VP = TTI::OP_None; 1037 TTI::OperandValueProperties Op2VP = TTI::OP_None; 1038 TTI::OperandValueKind Op1VK = TTI::getOperandInfo(Op0, Op1VP); 1039 TTI::OperandValueKind Op2VK = TTI::getOperandInfo(Op1, Op2VP); 1040 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1041 Op1->getType()->getScalarSizeInBits() == 1); 1042 1043 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1044 return TargetTTI->getArithmeticInstrCost( 1045 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1046 CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); 1047 } 1048 Type *CondTy = U->getOperand(0)->getType(); 1049 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1050 CmpInst::BAD_ICMP_PREDICATE, 1051 CostKind, I); 1052 } 1053 case Instruction::ICmp: 1054 case Instruction::FCmp: { 1055 Type *ValTy = U->getOperand(0)->getType(); 1056 // TODO: Also handle ICmp/FCmp constant expressions. 1057 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1058 I ? cast<CmpInst>(I)->getPredicate() 1059 : CmpInst::BAD_ICMP_PREDICATE, 1060 CostKind, I); 1061 } 1062 case Instruction::InsertElement: { 1063 auto *IE = dyn_cast<InsertElementInst>(U); 1064 if (!IE) 1065 return TTI::TCC_Basic; // FIXME 1066 auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 1067 unsigned Idx = CI ? CI->getZExtValue() : -1; 1068 return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx); 1069 } 1070 case Instruction::ShuffleVector: { 1071 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1072 if (!Shuffle) 1073 return TTI::TCC_Basic; // FIXME 1074 auto *VecTy = cast<VectorType>(U->getType()); 1075 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType()); 1076 1077 // TODO: Identify and add costs for insert subvector, etc. 1078 int SubIndex; 1079 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1080 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1081 Shuffle->getShuffleMask(), SubIndex, 1082 VecTy); 1083 else if (Shuffle->changesLength()) 1084 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; 1085 else if (Shuffle->isIdentity()) 1086 return 0; 1087 else if (Shuffle->isReverse()) 1088 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 1089 Shuffle->getShuffleMask(), 0, nullptr); 1090 else if (Shuffle->isSelect()) 1091 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 1092 Shuffle->getShuffleMask(), 0, nullptr); 1093 else if (Shuffle->isTranspose()) 1094 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 1095 Shuffle->getShuffleMask(), 0, nullptr); 1096 else if (Shuffle->isZeroEltSplat()) 1097 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 1098 Shuffle->getShuffleMask(), 0, nullptr); 1099 else if (Shuffle->isSingleSource()) 1100 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 1101 Shuffle->getShuffleMask(), 0, nullptr); 1102 1103 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 1104 Shuffle->getShuffleMask(), 0, nullptr); 1105 } 1106 case Instruction::ExtractElement: { 1107 unsigned Idx = -1; 1108 auto *EEI = dyn_cast<ExtractElementInst>(U); 1109 if (!EEI) 1110 return TTI::TCC_Basic; // FIXME 1111 1112 auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)); 1113 if (CI) 1114 Idx = CI->getZExtValue(); 1115 1116 return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(), 1117 Idx); 1118 } 1119 } 1120 // By default, just classify everything as 'basic'. 1121 return TTI::TCC_Basic; 1122 } 1123 1124 InstructionCost getInstructionLatency(const Instruction *I) { 1125 SmallVector<const Value *, 4> Operands(I->operand_values()); 1126 if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) 1127 return 0; 1128 1129 if (isa<LoadInst>(I)) 1130 return 4; 1131 1132 Type *DstTy = I->getType(); 1133 1134 // Usually an intrinsic is a simple instruction. 1135 // A real function call is much slower. 1136 if (auto *CI = dyn_cast<CallInst>(I)) { 1137 const Function *F = CI->getCalledFunction(); 1138 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 1139 return 40; 1140 // Some intrinsics return a value and a flag, we use the value type 1141 // to decide its latency. 1142 if (StructType *StructTy = dyn_cast<StructType>(DstTy)) 1143 DstTy = StructTy->getElementType(0); 1144 // Fall through to simple instructions. 1145 } 1146 1147 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 1148 DstTy = VectorTy->getElementType(); 1149 if (DstTy->isFloatingPointTy()) 1150 return 3; 1151 1152 return 1; 1153 } 1154 }; 1155 } // namespace llvm 1156 1157 #endif 1158