1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/GetElementPtrTypeIterator.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include <optional> 26 #include <utility> 27 28 namespace llvm { 29 30 class Function; 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 protected: 36 typedef TargetTransformInfo TTI; 37 38 const DataLayout &DL; 39 40 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 41 42 public: 43 // Provide value semantics. MSVC requires that we spell all of these out. 44 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; 45 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 46 47 const DataLayout &getDataLayout() const { return DL; } 48 49 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 50 ArrayRef<const Value *> Operands, Type *AccessType, 51 TTI::TargetCostKind CostKind) const { 52 // In the basic model, we just assume that all-constant GEPs will be folded 53 // into their uses via addressing modes. 54 for (const Value *Operand : Operands) 55 if (!isa<Constant>(Operand)) 56 return TTI::TCC_Basic; 57 58 return TTI::TCC_Free; 59 } 60 61 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 62 unsigned &JTSize, 63 ProfileSummaryInfo *PSI, 64 BlockFrequencyInfo *BFI) const { 65 (void)PSI; 66 (void)BFI; 67 JTSize = 0; 68 return SI.getNumCases(); 69 } 70 71 unsigned getInliningThresholdMultiplier() const { return 1; } 72 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } 73 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const { 74 return 0; 75 }; 76 77 int getInlinerVectorBonusPercent() const { return 150; } 78 79 InstructionCost getMemcpyCost(const Instruction *I) const { 80 return TTI::TCC_Expensive; 81 } 82 83 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { 84 return 64; 85 } 86 87 // Although this default value is arbitrary, it is not random. It is assumed 88 // that a condition that evaluates the same way by a higher percentage than 89 // this is best represented as control flow. Therefore, the default value N 90 // should be set such that the win from N% correct executions is greater than 91 // the loss from (100 - N)% mispredicted executions for the majority of 92 // intended targets. 93 BranchProbability getPredictableBranchThreshold() const { 94 return BranchProbability(99, 100); 95 } 96 97 bool hasBranchDivergence(const Function *F = nullptr) const { return false; } 98 99 bool isSourceOfDivergence(const Value *V) const { return false; } 100 101 bool isAlwaysUniform(const Value *V) const { return false; } 102 103 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { 104 return false; 105 } 106 107 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const { 108 return true; 109 } 110 111 unsigned getFlatAddressSpace() const { return -1; } 112 113 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 114 Intrinsic::ID IID) const { 115 return false; 116 } 117 118 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 119 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 120 return AS == 0; 121 }; 122 123 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 124 125 bool isSingleThreaded() const { return false; } 126 127 std::pair<const Value *, unsigned> 128 getPredicatedAddrSpace(const Value *V) const { 129 return std::make_pair(nullptr, -1); 130 } 131 132 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 133 Value *NewV) const { 134 return nullptr; 135 } 136 137 bool isLoweredToCall(const Function *F) const { 138 assert(F && "A concrete function must be provided to this routine."); 139 140 // FIXME: These should almost certainly not be handled here, and instead 141 // handled with the help of TLI or the target itself. This was largely 142 // ported from existing analysis heuristics here so that such refactorings 143 // can take place in the future. 144 145 if (F->isIntrinsic()) 146 return false; 147 148 if (F->hasLocalLinkage() || !F->hasName()) 149 return true; 150 151 StringRef Name = F->getName(); 152 153 // These will all likely lower to a single selection DAG node. 154 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 155 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 156 Name == "fmin" || Name == "fminf" || Name == "fminl" || 157 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 158 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 159 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 160 return false; 161 162 // These are all likely to be optimized into something smaller. 163 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 164 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 165 Name == "floorf" || Name == "ceil" || Name == "round" || 166 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 167 Name == "llabs") 168 return false; 169 170 return true; 171 } 172 173 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 174 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 175 HardwareLoopInfo &HWLoopInfo) const { 176 return false; 177 } 178 179 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; } 180 181 TailFoldingStyle 182 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { 183 return TailFoldingStyle::DataWithoutLaneMask; 184 } 185 186 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 187 IntrinsicInst &II) const { 188 return std::nullopt; 189 } 190 191 std::optional<Value *> 192 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 193 APInt DemandedMask, KnownBits &Known, 194 bool &KnownBitsComputed) const { 195 return std::nullopt; 196 } 197 198 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 199 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 200 APInt &UndefElts2, APInt &UndefElts3, 201 std::function<void(Instruction *, unsigned, APInt, APInt &)> 202 SimplifyAndSetOp) const { 203 return std::nullopt; 204 } 205 206 void getUnrollingPreferences(Loop *, ScalarEvolution &, 207 TTI::UnrollingPreferences &, 208 OptimizationRemarkEmitter *) const {} 209 210 void getPeelingPreferences(Loop *, ScalarEvolution &, 211 TTI::PeelingPreferences &) const {} 212 213 bool isLegalAddImmediate(int64_t Imm) const { return false; } 214 215 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 216 217 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 218 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 219 Instruction *I = nullptr) const { 220 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 221 // taken from the implementation of LSR. 222 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 223 } 224 225 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { 226 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 227 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 228 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 229 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 230 } 231 232 bool isNumRegsMajorCostOfLSR() const { return true; } 233 234 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 235 236 bool canMacroFuseCmp() const { return false; } 237 238 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 239 DominatorTree *DT, AssumptionCache *AC, 240 TargetLibraryInfo *LibInfo) const { 241 return false; 242 } 243 244 TTI::AddressingModeKind 245 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 246 return TTI::AMK_None; 247 } 248 249 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 250 return false; 251 } 252 253 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 254 return false; 255 } 256 257 bool isLegalNTStore(Type *DataType, Align Alignment) const { 258 // By default, assume nontemporal memory stores are available for stores 259 // that are aligned and have a size that is a power of 2. 260 unsigned DataSize = DL.getTypeStoreSize(DataType); 261 return Alignment >= DataSize && isPowerOf2_32(DataSize); 262 } 263 264 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 265 // By default, assume nontemporal memory loads are available for loads that 266 // are aligned and have a size that is a power of 2. 267 unsigned DataSize = DL.getTypeStoreSize(DataType); 268 return Alignment >= DataSize && isPowerOf2_32(DataSize); 269 } 270 271 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 272 return false; 273 } 274 275 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 276 return false; 277 } 278 279 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 280 return false; 281 } 282 283 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const { 284 return false; 285 } 286 287 bool forceScalarizeMaskedScatter(VectorType *DataType, 288 Align Alignment) const { 289 return false; 290 } 291 292 bool isLegalMaskedCompressStore(Type *DataType) const { return false; } 293 294 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 295 const SmallBitVector &OpcodeMask) const { 296 return false; 297 } 298 299 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } 300 301 bool enableOrderedReductions() const { return false; } 302 303 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 304 305 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 306 return false; 307 } 308 309 bool prefersVectorizedAddressing() const { return true; } 310 311 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 312 int64_t BaseOffset, bool HasBaseReg, 313 int64_t Scale, 314 unsigned AddrSpace) const { 315 // Guess that all legal addressing mode are free. 316 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 317 AddrSpace)) 318 return 0; 319 return -1; 320 } 321 322 bool LSRWithInstrQueries() const { return false; } 323 324 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 325 326 bool isProfitableToHoist(Instruction *I) const { return true; } 327 328 bool useAA() const { return false; } 329 330 bool isTypeLegal(Type *Ty) const { return false; } 331 332 unsigned getRegUsageForType(Type *Ty) const { return 1; } 333 334 bool shouldBuildLookupTables() const { return true; } 335 336 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 337 338 bool shouldBuildRelLookupTables() const { return false; } 339 340 bool useColdCCForColdCall(Function &F) const { return false; } 341 342 InstructionCost getScalarizationOverhead(VectorType *Ty, 343 const APInt &DemandedElts, 344 bool Insert, bool Extract, 345 TTI::TargetCostKind CostKind) const { 346 return 0; 347 } 348 349 InstructionCost 350 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 351 ArrayRef<Type *> Tys, 352 TTI::TargetCostKind CostKind) const { 353 return 0; 354 } 355 356 bool supportsEfficientVectorElementLoadStore() const { return false; } 357 358 bool supportsTailCalls() const { return true; } 359 360 bool supportsTailCallFor(const CallBase *CB) const { 361 return supportsTailCalls(); 362 } 363 364 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 365 return false; 366 } 367 368 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 369 bool IsZeroCmp) const { 370 return {}; 371 } 372 373 bool enableSelectOptimize() const { return true; } 374 375 bool enableInterleavedAccessVectorization() const { return false; } 376 377 bool enableMaskedInterleavedAccessVectorization() const { return false; } 378 379 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 380 381 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 382 unsigned AddressSpace, Align Alignment, 383 unsigned *Fast) const { 384 return false; 385 } 386 387 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 388 return TTI::PSK_Software; 389 } 390 391 bool haveFastSqrt(Type *Ty) const { return false; } 392 393 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; } 394 395 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 396 397 InstructionCost getFPOpCost(Type *Ty) const { 398 return TargetTransformInfo::TCC_Basic; 399 } 400 401 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 402 const APInt &Imm, Type *Ty) const { 403 return 0; 404 } 405 406 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 407 TTI::TargetCostKind CostKind) const { 408 return TTI::TCC_Basic; 409 } 410 411 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 412 const APInt &Imm, Type *Ty, 413 TTI::TargetCostKind CostKind, 414 Instruction *Inst = nullptr) const { 415 return TTI::TCC_Free; 416 } 417 418 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 419 const APInt &Imm, Type *Ty, 420 TTI::TargetCostKind CostKind) const { 421 return TTI::TCC_Free; 422 } 423 424 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 425 426 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 427 return Vector ? 1 : 0; 428 }; 429 430 const char *getRegisterClassName(unsigned ClassID) const { 431 switch (ClassID) { 432 default: 433 return "Generic::Unknown Register Class"; 434 case 0: 435 return "Generic::ScalarRC"; 436 case 1: 437 return "Generic::VectorRC"; 438 } 439 } 440 441 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 442 return TypeSize::getFixed(32); 443 } 444 445 unsigned getMinVectorRegisterBitWidth() const { return 128; } 446 447 std::optional<unsigned> getMaxVScale() const { return std::nullopt; } 448 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; } 449 bool isVScaleKnownToBeAPowerOfTwo() const { return false; } 450 451 bool 452 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { 453 return false; 454 } 455 456 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 457 return ElementCount::get(0, IsScalable); 458 } 459 460 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } 461 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } 462 463 bool shouldConsiderAddressTypePromotion( 464 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 465 AllowPromotionWithoutCommonHeader = false; 466 return false; 467 } 468 469 unsigned getCacheLineSize() const { return 0; } 470 std::optional<unsigned> 471 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 472 switch (Level) { 473 case TargetTransformInfo::CacheLevel::L1D: 474 [[fallthrough]]; 475 case TargetTransformInfo::CacheLevel::L2D: 476 return std::nullopt; 477 } 478 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 479 } 480 481 std::optional<unsigned> 482 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 483 switch (Level) { 484 case TargetTransformInfo::CacheLevel::L1D: 485 [[fallthrough]]; 486 case TargetTransformInfo::CacheLevel::L2D: 487 return std::nullopt; 488 } 489 490 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 491 } 492 493 unsigned getPrefetchDistance() const { return 0; } 494 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 495 unsigned NumStridedMemAccesses, 496 unsigned NumPrefetches, bool HasCall) const { 497 return 1; 498 } 499 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } 500 bool enableWritePrefetching() const { return false; } 501 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } 502 503 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; } 504 505 InstructionCost getArithmeticInstrCost( 506 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 507 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, 508 ArrayRef<const Value *> Args, 509 const Instruction *CxtI = nullptr) const { 510 // Widenable conditions will eventually lower into constants, so some 511 // operations with them will be trivially optimized away. 512 auto IsWidenableCondition = [](const Value *V) { 513 if (auto *II = dyn_cast<IntrinsicInst>(V)) 514 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition) 515 return true; 516 return false; 517 }; 518 // FIXME: A number of transformation tests seem to require these values 519 // which seems a little odd for how arbitary there are. 520 switch (Opcode) { 521 default: 522 break; 523 case Instruction::FDiv: 524 case Instruction::FRem: 525 case Instruction::SDiv: 526 case Instruction::SRem: 527 case Instruction::UDiv: 528 case Instruction::URem: 529 // FIXME: Unlikely to be true for CodeSize. 530 return TTI::TCC_Expensive; 531 case Instruction::And: 532 case Instruction::Or: 533 if (any_of(Args, IsWidenableCondition)) 534 return TTI::TCC_Free; 535 break; 536 } 537 538 // Assume a 3cy latency for fp arithmetic ops. 539 if (CostKind == TTI::TCK_Latency) 540 if (Ty->getScalarType()->isFloatingPointTy()) 541 return 3; 542 543 return 1; 544 } 545 546 InstructionCost 547 getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, 548 TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, 549 ArrayRef<const Value *> Args = std::nullopt) const { 550 return 1; 551 } 552 553 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 554 TTI::CastContextHint CCH, 555 TTI::TargetCostKind CostKind, 556 const Instruction *I) const { 557 switch (Opcode) { 558 default: 559 break; 560 case Instruction::IntToPtr: { 561 unsigned SrcSize = Src->getScalarSizeInBits(); 562 if (DL.isLegalInteger(SrcSize) && 563 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 564 return 0; 565 break; 566 } 567 case Instruction::PtrToInt: { 568 unsigned DstSize = Dst->getScalarSizeInBits(); 569 if (DL.isLegalInteger(DstSize) && 570 DstSize >= DL.getPointerTypeSizeInBits(Src)) 571 return 0; 572 break; 573 } 574 case Instruction::BitCast: 575 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 576 // Identity and pointer-to-pointer casts are free. 577 return 0; 578 break; 579 case Instruction::Trunc: { 580 // trunc to a native type is free (assuming the target has compare and 581 // shift-right of the same width). 582 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 583 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue())) 584 return 0; 585 break; 586 } 587 } 588 return 1; 589 } 590 591 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 592 VectorType *VecTy, 593 unsigned Index) const { 594 return 1; 595 } 596 597 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 598 const Instruction *I = nullptr) const { 599 // A phi would be free, unless we're costing the throughput because it 600 // will require a register. 601 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 602 return 0; 603 return 1; 604 } 605 606 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 607 CmpInst::Predicate VecPred, 608 TTI::TargetCostKind CostKind, 609 const Instruction *I) const { 610 return 1; 611 } 612 613 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 614 TTI::TargetCostKind CostKind, 615 unsigned Index, Value *Op0, 616 Value *Op1) const { 617 return 1; 618 } 619 620 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 621 TTI::TargetCostKind CostKind, 622 unsigned Index) const { 623 return 1; 624 } 625 626 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 627 const APInt &DemandedDstElts, 628 TTI::TargetCostKind CostKind) { 629 return 1; 630 } 631 632 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 633 unsigned AddressSpace, 634 TTI::TargetCostKind CostKind, 635 TTI::OperandValueInfo OpInfo, 636 const Instruction *I) const { 637 return 1; 638 } 639 640 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 641 unsigned AddressSpace, 642 TTI::TargetCostKind CostKind, 643 const Instruction *I) const { 644 return 1; 645 } 646 647 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 648 Align Alignment, unsigned AddressSpace, 649 TTI::TargetCostKind CostKind) const { 650 return 1; 651 } 652 653 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 654 const Value *Ptr, bool VariableMask, 655 Align Alignment, 656 TTI::TargetCostKind CostKind, 657 const Instruction *I = nullptr) const { 658 return 1; 659 } 660 661 unsigned getInterleavedMemoryOpCost( 662 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 663 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 664 bool UseMaskForCond, bool UseMaskForGaps) const { 665 return 1; 666 } 667 668 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 669 TTI::TargetCostKind CostKind) const { 670 switch (ICA.getID()) { 671 default: 672 break; 673 case Intrinsic::annotation: 674 case Intrinsic::assume: 675 case Intrinsic::sideeffect: 676 case Intrinsic::pseudoprobe: 677 case Intrinsic::arithmetic_fence: 678 case Intrinsic::dbg_assign: 679 case Intrinsic::dbg_declare: 680 case Intrinsic::dbg_value: 681 case Intrinsic::dbg_label: 682 case Intrinsic::invariant_start: 683 case Intrinsic::invariant_end: 684 case Intrinsic::launder_invariant_group: 685 case Intrinsic::strip_invariant_group: 686 case Intrinsic::is_constant: 687 case Intrinsic::lifetime_start: 688 case Intrinsic::lifetime_end: 689 case Intrinsic::experimental_noalias_scope_decl: 690 case Intrinsic::objectsize: 691 case Intrinsic::ptr_annotation: 692 case Intrinsic::var_annotation: 693 case Intrinsic::experimental_gc_result: 694 case Intrinsic::experimental_gc_relocate: 695 case Intrinsic::coro_alloc: 696 case Intrinsic::coro_begin: 697 case Intrinsic::coro_free: 698 case Intrinsic::coro_end: 699 case Intrinsic::coro_frame: 700 case Intrinsic::coro_size: 701 case Intrinsic::coro_align: 702 case Intrinsic::coro_suspend: 703 case Intrinsic::coro_subfn_addr: 704 case Intrinsic::threadlocal_address: 705 case Intrinsic::experimental_widenable_condition: 706 // These intrinsics don't actually represent code after lowering. 707 return 0; 708 } 709 return 1; 710 } 711 712 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 713 ArrayRef<Type *> Tys, 714 TTI::TargetCostKind CostKind) const { 715 return 1; 716 } 717 718 // Assume that we have a register of the right size for the type. 719 unsigned getNumberOfParts(Type *Tp) const { return 1; } 720 721 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 722 const SCEV *) const { 723 return 0; 724 } 725 726 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, 727 std::optional<FastMathFlags> FMF, 728 TTI::TargetCostKind) const { 729 return 1; 730 } 731 732 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, 733 FastMathFlags, 734 TTI::TargetCostKind) const { 735 return 1; 736 } 737 738 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, 739 Type *ResTy, VectorType *Ty, 740 FastMathFlags FMF, 741 TTI::TargetCostKind CostKind) const { 742 return 1; 743 } 744 745 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, 746 VectorType *Ty, 747 TTI::TargetCostKind CostKind) const { 748 return 1; 749 } 750 751 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 752 return 0; 753 } 754 755 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 756 return false; 757 } 758 759 unsigned getAtomicMemIntrinsicMaxElementSize() const { 760 // Note for overrides: You must ensure for all element unordered-atomic 761 // memory intrinsics that all power-of-2 element sizes up to, and 762 // including, the return value of this method have a corresponding 763 // runtime lib call. These runtime lib call definitions can be found 764 // in RuntimeLibcalls.h 765 return 0; 766 } 767 768 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 769 Type *ExpectedType) const { 770 return nullptr; 771 } 772 773 Type * 774 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 775 unsigned SrcAddrSpace, unsigned DestAddrSpace, 776 unsigned SrcAlign, unsigned DestAlign, 777 std::optional<uint32_t> AtomicElementSize) const { 778 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) 779 : Type::getInt8Ty(Context); 780 } 781 782 void getMemcpyLoopResidualLoweringType( 783 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 784 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 785 unsigned SrcAlign, unsigned DestAlign, 786 std::optional<uint32_t> AtomicCpySize) const { 787 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; 788 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); 789 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) 790 OpsOut.push_back(OpType); 791 } 792 793 bool areInlineCompatible(const Function *Caller, 794 const Function *Callee) const { 795 return (Caller->getFnAttribute("target-cpu") == 796 Callee->getFnAttribute("target-cpu")) && 797 (Caller->getFnAttribute("target-features") == 798 Callee->getFnAttribute("target-features")); 799 } 800 801 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 802 const ArrayRef<Type *> &Types) const { 803 return (Caller->getFnAttribute("target-cpu") == 804 Callee->getFnAttribute("target-cpu")) && 805 (Caller->getFnAttribute("target-features") == 806 Callee->getFnAttribute("target-features")); 807 } 808 809 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 810 const DataLayout &DL) const { 811 return false; 812 } 813 814 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 815 const DataLayout &DL) const { 816 return false; 817 } 818 819 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 820 821 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 822 823 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 824 825 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 826 unsigned AddrSpace) const { 827 return true; 828 } 829 830 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 831 unsigned AddrSpace) const { 832 return true; 833 } 834 835 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 836 ElementCount VF) const { 837 return true; 838 } 839 840 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } 841 842 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 843 unsigned ChainSizeInBytes, 844 VectorType *VecTy) const { 845 return VF; 846 } 847 848 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 849 unsigned ChainSizeInBytes, 850 VectorType *VecTy) const { 851 return VF; 852 } 853 854 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 855 TTI::ReductionFlags Flags) const { 856 return false; 857 } 858 859 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 860 TTI::ReductionFlags Flags) const { 861 return false; 862 } 863 864 bool preferEpilogueVectorization() const { 865 return true; 866 } 867 868 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 869 870 unsigned getGISelRematGlobalCost() const { return 1; } 871 872 unsigned getMinTripCountTailFoldingThreshold() const { return 0; } 873 874 bool supportsScalableVectors() const { return false; } 875 876 bool enableScalableVectorization() const { return false; } 877 878 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 879 Align Alignment) const { 880 return false; 881 } 882 883 TargetTransformInfo::VPLegalization 884 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 885 return TargetTransformInfo::VPLegalization( 886 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 887 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 888 } 889 890 bool hasArmWideBranch(bool) const { return false; } 891 892 unsigned getMaxNumArgs() const { return UINT_MAX; } 893 894 protected: 895 // Obtain the minimum required size to hold the value (without the sign) 896 // In case of a vector it returns the min required size for one element. 897 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 898 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 899 const auto *VectorValue = cast<Constant>(Val); 900 901 // In case of a vector need to pick the max between the min 902 // required size for each element 903 auto *VT = cast<FixedVectorType>(Val->getType()); 904 905 // Assume unsigned elements 906 isSigned = false; 907 908 // The max required size is the size of the vector element type 909 unsigned MaxRequiredSize = 910 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue(); 911 912 unsigned MinRequiredSize = 0; 913 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 914 if (auto *IntElement = 915 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 916 bool signedElement = IntElement->getValue().isNegative(); 917 // Get the element min required size. 918 unsigned ElementMinRequiredSize = 919 IntElement->getValue().getSignificantBits() - 1; 920 // In case one element is signed then all the vector is signed. 921 isSigned |= signedElement; 922 // Save the max required bit size between all the elements. 923 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 924 } else { 925 // not an int constant element 926 return MaxRequiredSize; 927 } 928 } 929 return MinRequiredSize; 930 } 931 932 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 933 isSigned = CI->getValue().isNegative(); 934 return CI->getValue().getSignificantBits() - 1; 935 } 936 937 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 938 isSigned = true; 939 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 940 } 941 942 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 943 isSigned = false; 944 return Cast->getSrcTy()->getScalarSizeInBits(); 945 } 946 947 isSigned = false; 948 return Val->getType()->getScalarSizeInBits(); 949 } 950 951 bool isStridedAccess(const SCEV *Ptr) const { 952 return Ptr && isa<SCEVAddRecExpr>(Ptr); 953 } 954 955 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 956 const SCEV *Ptr) const { 957 if (!isStridedAccess(Ptr)) 958 return nullptr; 959 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 960 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 961 } 962 963 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 964 int64_t MergeDistance) const { 965 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 966 if (!Step) 967 return false; 968 APInt StrideVal = Step->getAPInt(); 969 if (StrideVal.getBitWidth() > 64) 970 return false; 971 // FIXME: Need to take absolute value for negative stride case. 972 return StrideVal.getSExtValue() < MergeDistance; 973 } 974 }; 975 976 /// CRTP base class for use as a mix-in that aids implementing 977 /// a TargetTransformInfo-compatible class. 978 template <typename T> 979 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 980 private: 981 typedef TargetTransformInfoImplBase BaseT; 982 983 protected: 984 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 985 986 public: 987 using BaseT::getGEPCost; 988 989 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 990 ArrayRef<const Value *> Operands, Type *AccessType, 991 TTI::TargetCostKind CostKind) { 992 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 993 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 994 bool HasBaseReg = (BaseGV == nullptr); 995 996 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 997 APInt BaseOffset(PtrSizeBits, 0); 998 int64_t Scale = 0; 999 1000 auto GTI = gep_type_begin(PointeeType, Operands); 1001 Type *TargetType = nullptr; 1002 1003 // Handle the case where the GEP instruction has a single operand, 1004 // the basis, therefore TargetType is a nullptr. 1005 if (Operands.empty()) 1006 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 1007 1008 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 1009 TargetType = GTI.getIndexedType(); 1010 // We assume that the cost of Scalar GEP with constant index and the 1011 // cost of Vector GEP with splat constant index are the same. 1012 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 1013 if (!ConstIdx) 1014 if (auto Splat = getSplatValue(*I)) 1015 ConstIdx = dyn_cast<ConstantInt>(Splat); 1016 if (StructType *STy = GTI.getStructTypeOrNull()) { 1017 // For structures the index is always splat or scalar constant 1018 assert(ConstIdx && "Unexpected GEP index"); 1019 uint64_t Field = ConstIdx->getZExtValue(); 1020 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 1021 } else { 1022 // If this operand is a scalable type, bail out early. 1023 // TODO: handle scalable vectors 1024 if (isa<ScalableVectorType>(TargetType)) 1025 return TTI::TCC_Basic; 1026 int64_t ElementSize = 1027 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue(); 1028 if (ConstIdx) { 1029 BaseOffset += 1030 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 1031 } else { 1032 // Needs scale register. 1033 if (Scale != 0) 1034 // No addressing mode takes two scale registers. 1035 return TTI::TCC_Basic; 1036 Scale = ElementSize; 1037 } 1038 } 1039 } 1040 1041 // If we haven't been provided a hint, use the target type for now. 1042 // 1043 // TODO: Take a look at potentially removing this: This is *slightly* wrong 1044 // as it's possible to have a GEP with a foldable target type but a memory 1045 // access that isn't foldable. For example, this load isn't foldable on 1046 // RISC-V: 1047 // 1048 // %p = getelementptr i32, ptr %base, i32 42 1049 // %x = load <2 x i32>, ptr %p 1050 if (!AccessType) 1051 AccessType = TargetType; 1052 1053 // If the final address of the GEP is a legal addressing mode for the given 1054 // access type, then we can fold it into its users. 1055 if (static_cast<T *>(this)->isLegalAddressingMode( 1056 AccessType, const_cast<GlobalValue *>(BaseGV), 1057 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 1058 Ptr->getType()->getPointerAddressSpace())) 1059 return TTI::TCC_Free; 1060 1061 // TODO: Instead of returning TCC_Basic here, we should use 1062 // getArithmeticInstrCost. Or better yet, provide a hook to let the target 1063 // model it. 1064 return TTI::TCC_Basic; 1065 } 1066 1067 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, 1068 const Value *Base, 1069 const TTI::PointersChainInfo &Info, 1070 Type *AccessTy, 1071 TTI::TargetCostKind CostKind) { 1072 InstructionCost Cost = TTI::TCC_Free; 1073 // In the basic model we take into account GEP instructions only 1074 // (although here can come alloca instruction, a value, constants and/or 1075 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a 1076 // pointer). Typically, if Base is a not a GEP-instruction and all the 1077 // pointers are relative to the same base address, all the rest are 1078 // either GEP instructions, PHIs, bitcasts or constants. When we have same 1079 // base, we just calculate cost of each non-Base GEP as an ADD operation if 1080 // any their index is a non-const. 1081 // If no known dependecies between the pointers cost is calculated as a sum 1082 // of costs of GEP instructions. 1083 for (const Value *V : Ptrs) { 1084 const auto *GEP = dyn_cast<GetElementPtrInst>(V); 1085 if (!GEP) 1086 continue; 1087 if (Info.isSameBase() && V != Base) { 1088 if (GEP->hasAllConstantIndices()) 1089 continue; 1090 Cost += static_cast<T *>(this)->getArithmeticInstrCost( 1091 Instruction::Add, GEP->getType(), CostKind, 1092 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, 1093 std::nullopt); 1094 } else { 1095 SmallVector<const Value *> Indices(GEP->indices()); 1096 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 1097 GEP->getPointerOperand(), 1098 Indices, AccessTy, CostKind); 1099 } 1100 } 1101 return Cost; 1102 } 1103 1104 InstructionCost getInstructionCost(const User *U, 1105 ArrayRef<const Value *> Operands, 1106 TTI::TargetCostKind CostKind) { 1107 using namespace llvm::PatternMatch; 1108 1109 auto *TargetTTI = static_cast<T *>(this); 1110 // Handle non-intrinsic calls, invokes, and callbr. 1111 // FIXME: Unlikely to be true for anything but CodeSize. 1112 auto *CB = dyn_cast<CallBase>(U); 1113 if (CB && !isa<IntrinsicInst>(U)) { 1114 if (const Function *F = CB->getCalledFunction()) { 1115 if (!TargetTTI->isLoweredToCall(F)) 1116 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 1117 1118 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 1119 } 1120 // For indirect or other calls, scale cost by number of arguments. 1121 return TTI::TCC_Basic * (CB->arg_size() + 1); 1122 } 1123 1124 Type *Ty = U->getType(); 1125 unsigned Opcode = Operator::getOpcode(U); 1126 auto *I = dyn_cast<Instruction>(U); 1127 switch (Opcode) { 1128 default: 1129 break; 1130 case Instruction::Call: { 1131 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 1132 auto *Intrinsic = cast<IntrinsicInst>(U); 1133 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 1134 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 1135 } 1136 case Instruction::Br: 1137 case Instruction::Ret: 1138 case Instruction::PHI: 1139 case Instruction::Switch: 1140 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 1141 case Instruction::ExtractValue: 1142 case Instruction::Freeze: 1143 return TTI::TCC_Free; 1144 case Instruction::Alloca: 1145 if (cast<AllocaInst>(U)->isStaticAlloca()) 1146 return TTI::TCC_Free; 1147 break; 1148 case Instruction::GetElementPtr: { 1149 const auto *GEP = cast<GEPOperator>(U); 1150 Type *AccessType = nullptr; 1151 // For now, only provide the AccessType in the simple case where the GEP 1152 // only has one user. 1153 if (GEP->hasOneUser() && I) 1154 AccessType = I->user_back()->getAccessType(); 1155 1156 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 1157 Operands.front(), Operands.drop_front(), 1158 AccessType, CostKind); 1159 } 1160 case Instruction::Add: 1161 case Instruction::FAdd: 1162 case Instruction::Sub: 1163 case Instruction::FSub: 1164 case Instruction::Mul: 1165 case Instruction::FMul: 1166 case Instruction::UDiv: 1167 case Instruction::SDiv: 1168 case Instruction::FDiv: 1169 case Instruction::URem: 1170 case Instruction::SRem: 1171 case Instruction::FRem: 1172 case Instruction::Shl: 1173 case Instruction::LShr: 1174 case Instruction::AShr: 1175 case Instruction::And: 1176 case Instruction::Or: 1177 case Instruction::Xor: 1178 case Instruction::FNeg: { 1179 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]); 1180 TTI::OperandValueInfo Op2Info; 1181 if (Opcode != Instruction::FNeg) 1182 Op2Info = TTI::getOperandInfo(Operands[1]); 1183 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 1184 Op2Info, Operands, I); 1185 } 1186 case Instruction::IntToPtr: 1187 case Instruction::PtrToInt: 1188 case Instruction::SIToFP: 1189 case Instruction::UIToFP: 1190 case Instruction::FPToUI: 1191 case Instruction::FPToSI: 1192 case Instruction::Trunc: 1193 case Instruction::FPTrunc: 1194 case Instruction::BitCast: 1195 case Instruction::FPExt: 1196 case Instruction::SExt: 1197 case Instruction::ZExt: 1198 case Instruction::AddrSpaceCast: { 1199 Type *OpTy = Operands[0]->getType(); 1200 return TargetTTI->getCastInstrCost( 1201 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1202 } 1203 case Instruction::Store: { 1204 auto *SI = cast<StoreInst>(U); 1205 Type *ValTy = Operands[0]->getType(); 1206 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]); 1207 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1208 SI->getPointerAddressSpace(), CostKind, 1209 OpInfo, I); 1210 } 1211 case Instruction::Load: { 1212 // FIXME: Arbitary cost which could come from the backend. 1213 if (CostKind == TTI::TCK_Latency) 1214 return 4; 1215 auto *LI = cast<LoadInst>(U); 1216 Type *LoadType = U->getType(); 1217 // If there is a non-register sized type, the cost estimation may expand 1218 // it to be several instructions to load into multiple registers on the 1219 // target. But, if the only use of the load is a trunc instruction to a 1220 // register sized type, the instruction selector can combine these 1221 // instructions to be a single load. So, in this case, we use the 1222 // destination type of the trunc instruction rather than the load to 1223 // accurately estimate the cost of this load instruction. 1224 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && 1225 !LoadType->isVectorTy()) { 1226 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) 1227 LoadType = TI->getDestTy(); 1228 } 1229 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), 1230 LI->getPointerAddressSpace(), CostKind, 1231 {TTI::OK_AnyValue, TTI::OP_None}, I); 1232 } 1233 case Instruction::Select: { 1234 const Value *Op0, *Op1; 1235 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1236 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1237 // select x, y, false --> x & y 1238 // select x, true, y --> x | y 1239 const auto Op1Info = TTI::getOperandInfo(Op0); 1240 const auto Op2Info = TTI::getOperandInfo(Op1); 1241 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1242 Op1->getType()->getScalarSizeInBits() == 1); 1243 1244 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1245 return TargetTTI->getArithmeticInstrCost( 1246 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1247 CostKind, Op1Info, Op2Info, Operands, I); 1248 } 1249 Type *CondTy = Operands[0]->getType(); 1250 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1251 CmpInst::BAD_ICMP_PREDICATE, 1252 CostKind, I); 1253 } 1254 case Instruction::ICmp: 1255 case Instruction::FCmp: { 1256 Type *ValTy = Operands[0]->getType(); 1257 // TODO: Also handle ICmp/FCmp constant expressions. 1258 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1259 I ? cast<CmpInst>(I)->getPredicate() 1260 : CmpInst::BAD_ICMP_PREDICATE, 1261 CostKind, I); 1262 } 1263 case Instruction::InsertElement: { 1264 auto *IE = dyn_cast<InsertElementInst>(U); 1265 if (!IE) 1266 return TTI::TCC_Basic; // FIXME 1267 unsigned Idx = -1; 1268 if (auto *CI = dyn_cast<ConstantInt>(Operands[2])) 1269 if (CI->getValue().getActiveBits() <= 32) 1270 Idx = CI->getZExtValue(); 1271 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx); 1272 } 1273 case Instruction::ShuffleVector: { 1274 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1275 if (!Shuffle) 1276 return TTI::TCC_Basic; // FIXME 1277 1278 auto *VecTy = cast<VectorType>(U->getType()); 1279 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType()); 1280 int NumSubElts, SubIndex; 1281 1282 if (Shuffle->changesLength()) { 1283 // Treat a 'subvector widening' as a free shuffle. 1284 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) 1285 return 0; 1286 1287 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1288 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1289 Shuffle->getShuffleMask(), CostKind, 1290 SubIndex, VecTy, Operands); 1291 1292 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1293 return TargetTTI->getShuffleCost( 1294 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), 1295 CostKind, SubIndex, 1296 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1297 Operands); 1298 1299 int ReplicationFactor, VF; 1300 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { 1301 APInt DemandedDstElts = 1302 APInt::getZero(Shuffle->getShuffleMask().size()); 1303 for (auto I : enumerate(Shuffle->getShuffleMask())) { 1304 if (I.value() != PoisonMaskElem) 1305 DemandedDstElts.setBit(I.index()); 1306 } 1307 return TargetTTI->getReplicationShuffleCost( 1308 VecSrcTy->getElementType(), ReplicationFactor, VF, 1309 DemandedDstElts, CostKind); 1310 } 1311 1312 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; 1313 } 1314 1315 if (Shuffle->isIdentity()) 1316 return 0; 1317 1318 if (Shuffle->isReverse()) 1319 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 1320 Shuffle->getShuffleMask(), CostKind, 0, 1321 nullptr, Operands); 1322 1323 if (Shuffle->isSelect()) 1324 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 1325 Shuffle->getShuffleMask(), CostKind, 0, 1326 nullptr, Operands); 1327 1328 if (Shuffle->isTranspose()) 1329 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 1330 Shuffle->getShuffleMask(), CostKind, 0, 1331 nullptr, Operands); 1332 1333 if (Shuffle->isZeroEltSplat()) 1334 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 1335 Shuffle->getShuffleMask(), CostKind, 0, 1336 nullptr, Operands); 1337 1338 if (Shuffle->isSingleSource()) 1339 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 1340 Shuffle->getShuffleMask(), CostKind, 0, 1341 nullptr, Operands); 1342 1343 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1344 return TargetTTI->getShuffleCost( 1345 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind, 1346 SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1347 Operands); 1348 1349 if (Shuffle->isSplice(SubIndex)) 1350 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, 1351 Shuffle->getShuffleMask(), CostKind, 1352 SubIndex, nullptr, Operands); 1353 1354 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 1355 Shuffle->getShuffleMask(), CostKind, 0, 1356 nullptr, Operands); 1357 } 1358 case Instruction::ExtractElement: { 1359 auto *EEI = dyn_cast<ExtractElementInst>(U); 1360 if (!EEI) 1361 return TTI::TCC_Basic; // FIXME 1362 unsigned Idx = -1; 1363 if (auto *CI = dyn_cast<ConstantInt>(Operands[1])) 1364 if (CI->getValue().getActiveBits() <= 32) 1365 Idx = CI->getZExtValue(); 1366 Type *DstTy = Operands[0]->getType(); 1367 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); 1368 } 1369 } 1370 1371 // By default, just classify everything as 'basic' or -1 to represent that 1372 // don't know the throughput cost. 1373 return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; 1374 } 1375 1376 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { 1377 auto *TargetTTI = static_cast<T *>(this); 1378 SmallVector<const Value *, 4> Ops(I->operand_values()); 1379 InstructionCost Cost = TargetTTI->getInstructionCost( 1380 I, Ops, TargetTransformInfo::TCK_SizeAndLatency); 1381 return Cost >= TargetTransformInfo::TCC_Expensive; 1382 } 1383 }; 1384 } // namespace llvm 1385 1386 #endif 1387