1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/GetElementPtrTypeIterator.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include <optional> 26 #include <utility> 27 28 namespace llvm { 29 30 class Function; 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 protected: 36 typedef TargetTransformInfo TTI; 37 38 const DataLayout &DL; 39 40 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 41 42 public: 43 // Provide value semantics. MSVC requires that we spell all of these out. 44 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; 45 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 46 47 const DataLayout &getDataLayout() const { return DL; } 48 49 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 50 ArrayRef<const Value *> Operands, 51 TTI::TargetCostKind CostKind) const { 52 // In the basic model, we just assume that all-constant GEPs will be folded 53 // into their uses via addressing modes. 54 for (const Value *Operand : Operands) 55 if (!isa<Constant>(Operand)) 56 return TTI::TCC_Basic; 57 58 return TTI::TCC_Free; 59 } 60 61 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 62 unsigned &JTSize, 63 ProfileSummaryInfo *PSI, 64 BlockFrequencyInfo *BFI) const { 65 (void)PSI; 66 (void)BFI; 67 JTSize = 0; 68 return SI.getNumCases(); 69 } 70 71 unsigned getInliningThresholdMultiplier() const { return 1; } 72 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } 73 74 int getInlinerVectorBonusPercent() const { return 150; } 75 76 InstructionCost getMemcpyCost(const Instruction *I) const { 77 return TTI::TCC_Expensive; 78 } 79 80 // Although this default value is arbitrary, it is not random. It is assumed 81 // that a condition that evaluates the same way by a higher percentage than 82 // this is best represented as control flow. Therefore, the default value N 83 // should be set such that the win from N% correct executions is greater than 84 // the loss from (100 - N)% mispredicted executions for the majority of 85 // intended targets. 86 BranchProbability getPredictableBranchThreshold() const { 87 return BranchProbability(99, 100); 88 } 89 90 bool hasBranchDivergence() const { return false; } 91 92 bool useGPUDivergenceAnalysis() const { return false; } 93 94 bool isSourceOfDivergence(const Value *V) const { return false; } 95 96 bool isAlwaysUniform(const Value *V) const { return false; } 97 98 unsigned getFlatAddressSpace() const { return -1; } 99 100 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 101 Intrinsic::ID IID) const { 102 return false; 103 } 104 105 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 106 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 107 return AS == 0; 108 }; 109 110 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 111 112 bool isSingleThreaded() const { return false; } 113 114 std::pair<const Value *, unsigned> 115 getPredicatedAddrSpace(const Value *V) const { 116 return std::make_pair(nullptr, -1); 117 } 118 119 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 120 Value *NewV) const { 121 return nullptr; 122 } 123 124 bool isLoweredToCall(const Function *F) const { 125 assert(F && "A concrete function must be provided to this routine."); 126 127 // FIXME: These should almost certainly not be handled here, and instead 128 // handled with the help of TLI or the target itself. This was largely 129 // ported from existing analysis heuristics here so that such refactorings 130 // can take place in the future. 131 132 if (F->isIntrinsic()) 133 return false; 134 135 if (F->hasLocalLinkage() || !F->hasName()) 136 return true; 137 138 StringRef Name = F->getName(); 139 140 // These will all likely lower to a single selection DAG node. 141 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 142 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 143 Name == "fmin" || Name == "fminf" || Name == "fminl" || 144 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 145 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 146 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 147 return false; 148 149 // These are all likely to be optimized into something smaller. 150 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 151 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 152 Name == "floorf" || Name == "ceil" || Name == "round" || 153 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 154 Name == "llabs") 155 return false; 156 157 return true; 158 } 159 160 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 161 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 162 HardwareLoopInfo &HWLoopInfo) const { 163 return false; 164 } 165 166 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 167 AssumptionCache &AC, TargetLibraryInfo *TLI, 168 DominatorTree *DT, 169 LoopVectorizationLegality *LVL, 170 InterleavedAccessInfo *IAI) const { 171 return false; 172 } 173 174 PredicationStyle emitGetActiveLaneMask() const { 175 return PredicationStyle::None; 176 } 177 178 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 179 IntrinsicInst &II) const { 180 return std::nullopt; 181 } 182 183 std::optional<Value *> 184 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 185 APInt DemandedMask, KnownBits &Known, 186 bool &KnownBitsComputed) const { 187 return std::nullopt; 188 } 189 190 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 191 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 192 APInt &UndefElts2, APInt &UndefElts3, 193 std::function<void(Instruction *, unsigned, APInt, APInt &)> 194 SimplifyAndSetOp) const { 195 return std::nullopt; 196 } 197 198 void getUnrollingPreferences(Loop *, ScalarEvolution &, 199 TTI::UnrollingPreferences &, 200 OptimizationRemarkEmitter *) const {} 201 202 void getPeelingPreferences(Loop *, ScalarEvolution &, 203 TTI::PeelingPreferences &) const {} 204 205 bool isLegalAddImmediate(int64_t Imm) const { return false; } 206 207 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 208 209 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 210 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 211 Instruction *I = nullptr) const { 212 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 213 // taken from the implementation of LSR. 214 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 215 } 216 217 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { 218 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 219 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 220 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 221 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 222 } 223 224 bool isNumRegsMajorCostOfLSR() const { return true; } 225 226 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 227 228 bool canMacroFuseCmp() const { return false; } 229 230 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 231 DominatorTree *DT, AssumptionCache *AC, 232 TargetLibraryInfo *LibInfo) const { 233 return false; 234 } 235 236 TTI::AddressingModeKind 237 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 238 return TTI::AMK_None; 239 } 240 241 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 242 return false; 243 } 244 245 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 246 return false; 247 } 248 249 bool isLegalNTStore(Type *DataType, Align Alignment) const { 250 // By default, assume nontemporal memory stores are available for stores 251 // that are aligned and have a size that is a power of 2. 252 unsigned DataSize = DL.getTypeStoreSize(DataType); 253 return Alignment >= DataSize && isPowerOf2_32(DataSize); 254 } 255 256 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 257 // By default, assume nontemporal memory loads are available for loads that 258 // are aligned and have a size that is a power of 2. 259 unsigned DataSize = DL.getTypeStoreSize(DataType); 260 return Alignment >= DataSize && isPowerOf2_32(DataSize); 261 } 262 263 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 264 return false; 265 } 266 267 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 268 return false; 269 } 270 271 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 272 return false; 273 } 274 275 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const { 276 return false; 277 } 278 279 bool forceScalarizeMaskedScatter(VectorType *DataType, 280 Align Alignment) const { 281 return false; 282 } 283 284 bool isLegalMaskedCompressStore(Type *DataType) const { return false; } 285 286 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 287 const SmallBitVector &OpcodeMask) const { 288 return false; 289 } 290 291 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } 292 293 bool enableOrderedReductions() const { return false; } 294 295 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 296 297 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 298 return false; 299 } 300 301 bool prefersVectorizedAddressing() const { return true; } 302 303 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 304 int64_t BaseOffset, bool HasBaseReg, 305 int64_t Scale, 306 unsigned AddrSpace) const { 307 // Guess that all legal addressing mode are free. 308 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 309 AddrSpace)) 310 return 0; 311 return -1; 312 } 313 314 bool LSRWithInstrQueries() const { return false; } 315 316 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 317 318 bool isProfitableToHoist(Instruction *I) const { return true; } 319 320 bool useAA() const { return false; } 321 322 bool isTypeLegal(Type *Ty) const { return false; } 323 324 unsigned getRegUsageForType(Type *Ty) const { return 1; } 325 326 bool shouldBuildLookupTables() const { return true; } 327 328 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 329 330 bool shouldBuildRelLookupTables() const { return false; } 331 332 bool useColdCCForColdCall(Function &F) const { return false; } 333 334 InstructionCost getScalarizationOverhead(VectorType *Ty, 335 const APInt &DemandedElts, 336 bool Insert, bool Extract, 337 TTI::TargetCostKind CostKind) const { 338 return 0; 339 } 340 341 InstructionCost 342 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 343 ArrayRef<Type *> Tys, 344 TTI::TargetCostKind CostKind) const { 345 return 0; 346 } 347 348 bool supportsEfficientVectorElementLoadStore() const { return false; } 349 350 bool supportsTailCalls() const { return true; } 351 352 bool supportsTailCallFor(const CallBase *CB) const { 353 return supportsTailCalls(); 354 } 355 356 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 357 return false; 358 } 359 360 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 361 bool IsZeroCmp) const { 362 return {}; 363 } 364 365 bool enableSelectOptimize() const { return true; } 366 367 bool enableInterleavedAccessVectorization() const { return false; } 368 369 bool enableMaskedInterleavedAccessVectorization() const { return false; } 370 371 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 372 373 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 374 unsigned AddressSpace, Align Alignment, 375 unsigned *Fast) const { 376 return false; 377 } 378 379 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 380 return TTI::PSK_Software; 381 } 382 383 bool haveFastSqrt(Type *Ty) const { return false; } 384 385 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; } 386 387 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 388 389 InstructionCost getFPOpCost(Type *Ty) const { 390 return TargetTransformInfo::TCC_Basic; 391 } 392 393 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 394 const APInt &Imm, Type *Ty) const { 395 return 0; 396 } 397 398 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 399 TTI::TargetCostKind CostKind) const { 400 return TTI::TCC_Basic; 401 } 402 403 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 404 const APInt &Imm, Type *Ty, 405 TTI::TargetCostKind CostKind, 406 Instruction *Inst = nullptr) const { 407 return TTI::TCC_Free; 408 } 409 410 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 411 const APInt &Imm, Type *Ty, 412 TTI::TargetCostKind CostKind) const { 413 return TTI::TCC_Free; 414 } 415 416 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 417 418 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 419 return Vector ? 1 : 0; 420 }; 421 422 const char *getRegisterClassName(unsigned ClassID) const { 423 switch (ClassID) { 424 default: 425 return "Generic::Unknown Register Class"; 426 case 0: 427 return "Generic::ScalarRC"; 428 case 1: 429 return "Generic::VectorRC"; 430 } 431 } 432 433 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 434 return TypeSize::getFixed(32); 435 } 436 437 unsigned getMinVectorRegisterBitWidth() const { return 128; } 438 439 std::optional<unsigned> getMaxVScale() const { return std::nullopt; } 440 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; } 441 442 bool 443 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { 444 return false; 445 } 446 447 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 448 return ElementCount::get(0, IsScalable); 449 } 450 451 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } 452 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } 453 454 bool shouldConsiderAddressTypePromotion( 455 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 456 AllowPromotionWithoutCommonHeader = false; 457 return false; 458 } 459 460 unsigned getCacheLineSize() const { return 0; } 461 std::optional<unsigned> 462 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 463 switch (Level) { 464 case TargetTransformInfo::CacheLevel::L1D: 465 [[fallthrough]]; 466 case TargetTransformInfo::CacheLevel::L2D: 467 return std::nullopt; 468 } 469 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 470 } 471 472 std::optional<unsigned> 473 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 474 switch (Level) { 475 case TargetTransformInfo::CacheLevel::L1D: 476 [[fallthrough]]; 477 case TargetTransformInfo::CacheLevel::L2D: 478 return std::nullopt; 479 } 480 481 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 482 } 483 484 unsigned getPrefetchDistance() const { return 0; } 485 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 486 unsigned NumStridedMemAccesses, 487 unsigned NumPrefetches, bool HasCall) const { 488 return 1; 489 } 490 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } 491 bool enableWritePrefetching() const { return false; } 492 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } 493 494 unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } 495 496 InstructionCost getArithmeticInstrCost( 497 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 498 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, 499 ArrayRef<const Value *> Args, 500 const Instruction *CxtI = nullptr) const { 501 // FIXME: A number of transformation tests seem to require these values 502 // which seems a little odd for how arbitary there are. 503 switch (Opcode) { 504 default: 505 break; 506 case Instruction::FDiv: 507 case Instruction::FRem: 508 case Instruction::SDiv: 509 case Instruction::SRem: 510 case Instruction::UDiv: 511 case Instruction::URem: 512 // FIXME: Unlikely to be true for CodeSize. 513 return TTI::TCC_Expensive; 514 } 515 516 // Assume a 3cy latency for fp arithmetic ops. 517 if (CostKind == TTI::TCK_Latency) 518 if (Ty->getScalarType()->isFloatingPointTy()) 519 return 3; 520 521 return 1; 522 } 523 524 InstructionCost 525 getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, 526 TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, 527 ArrayRef<const Value *> Args = std::nullopt) const { 528 return 1; 529 } 530 531 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 532 TTI::CastContextHint CCH, 533 TTI::TargetCostKind CostKind, 534 const Instruction *I) const { 535 switch (Opcode) { 536 default: 537 break; 538 case Instruction::IntToPtr: { 539 unsigned SrcSize = Src->getScalarSizeInBits(); 540 if (DL.isLegalInteger(SrcSize) && 541 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 542 return 0; 543 break; 544 } 545 case Instruction::PtrToInt: { 546 unsigned DstSize = Dst->getScalarSizeInBits(); 547 if (DL.isLegalInteger(DstSize) && 548 DstSize >= DL.getPointerTypeSizeInBits(Src)) 549 return 0; 550 break; 551 } 552 case Instruction::BitCast: 553 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 554 // Identity and pointer-to-pointer casts are free. 555 return 0; 556 break; 557 case Instruction::Trunc: { 558 // trunc to a native type is free (assuming the target has compare and 559 // shift-right of the same width). 560 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 561 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue())) 562 return 0; 563 break; 564 } 565 } 566 return 1; 567 } 568 569 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 570 VectorType *VecTy, 571 unsigned Index) const { 572 return 1; 573 } 574 575 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 576 const Instruction *I = nullptr) const { 577 // A phi would be free, unless we're costing the throughput because it 578 // will require a register. 579 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 580 return 0; 581 return 1; 582 } 583 584 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 585 CmpInst::Predicate VecPred, 586 TTI::TargetCostKind CostKind, 587 const Instruction *I) const { 588 return 1; 589 } 590 591 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 592 TTI::TargetCostKind CostKind, 593 unsigned Index, Value *Op0, 594 Value *Op1) const { 595 return 1; 596 } 597 598 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 599 TTI::TargetCostKind CostKind, 600 unsigned Index) const { 601 return 1; 602 } 603 604 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 605 const APInt &DemandedDstElts, 606 TTI::TargetCostKind CostKind) { 607 return 1; 608 } 609 610 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 611 unsigned AddressSpace, 612 TTI::TargetCostKind CostKind, 613 TTI::OperandValueInfo OpInfo, 614 const Instruction *I) const { 615 return 1; 616 } 617 618 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 619 unsigned AddressSpace, 620 TTI::TargetCostKind CostKind, 621 const Instruction *I) const { 622 return 1; 623 } 624 625 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 626 Align Alignment, unsigned AddressSpace, 627 TTI::TargetCostKind CostKind) const { 628 return 1; 629 } 630 631 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 632 const Value *Ptr, bool VariableMask, 633 Align Alignment, 634 TTI::TargetCostKind CostKind, 635 const Instruction *I = nullptr) const { 636 return 1; 637 } 638 639 unsigned getInterleavedMemoryOpCost( 640 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 641 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 642 bool UseMaskForCond, bool UseMaskForGaps) const { 643 return 1; 644 } 645 646 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 647 TTI::TargetCostKind CostKind) const { 648 switch (ICA.getID()) { 649 default: 650 break; 651 case Intrinsic::annotation: 652 case Intrinsic::assume: 653 case Intrinsic::sideeffect: 654 case Intrinsic::pseudoprobe: 655 case Intrinsic::arithmetic_fence: 656 case Intrinsic::dbg_declare: 657 case Intrinsic::dbg_value: 658 case Intrinsic::dbg_label: 659 case Intrinsic::invariant_start: 660 case Intrinsic::invariant_end: 661 case Intrinsic::launder_invariant_group: 662 case Intrinsic::strip_invariant_group: 663 case Intrinsic::is_constant: 664 case Intrinsic::lifetime_start: 665 case Intrinsic::lifetime_end: 666 case Intrinsic::experimental_noalias_scope_decl: 667 case Intrinsic::objectsize: 668 case Intrinsic::ptr_annotation: 669 case Intrinsic::var_annotation: 670 case Intrinsic::experimental_gc_result: 671 case Intrinsic::experimental_gc_relocate: 672 case Intrinsic::coro_alloc: 673 case Intrinsic::coro_begin: 674 case Intrinsic::coro_free: 675 case Intrinsic::coro_end: 676 case Intrinsic::coro_frame: 677 case Intrinsic::coro_size: 678 case Intrinsic::coro_align: 679 case Intrinsic::coro_suspend: 680 case Intrinsic::coro_subfn_addr: 681 case Intrinsic::threadlocal_address: 682 // These intrinsics don't actually represent code after lowering. 683 return 0; 684 } 685 return 1; 686 } 687 688 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 689 ArrayRef<Type *> Tys, 690 TTI::TargetCostKind CostKind) const { 691 return 1; 692 } 693 694 // Assume that we have a register of the right size for the type. 695 unsigned getNumberOfParts(Type *Tp) const { return 1; } 696 697 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 698 const SCEV *) const { 699 return 0; 700 } 701 702 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, 703 std::optional<FastMathFlags> FMF, 704 TTI::TargetCostKind) const { 705 return 1; 706 } 707 708 InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, 709 TTI::TargetCostKind) const { 710 return 1; 711 } 712 713 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, 714 Type *ResTy, VectorType *Ty, 715 std::optional<FastMathFlags> FMF, 716 TTI::TargetCostKind CostKind) const { 717 return 1; 718 } 719 720 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, 721 VectorType *Ty, 722 TTI::TargetCostKind CostKind) const { 723 return 1; 724 } 725 726 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 727 return 0; 728 } 729 730 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 731 return false; 732 } 733 734 unsigned getAtomicMemIntrinsicMaxElementSize() const { 735 // Note for overrides: You must ensure for all element unordered-atomic 736 // memory intrinsics that all power-of-2 element sizes up to, and 737 // including, the return value of this method have a corresponding 738 // runtime lib call. These runtime lib call definitions can be found 739 // in RuntimeLibcalls.h 740 return 0; 741 } 742 743 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 744 Type *ExpectedType) const { 745 return nullptr; 746 } 747 748 Type * 749 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 750 unsigned SrcAddrSpace, unsigned DestAddrSpace, 751 unsigned SrcAlign, unsigned DestAlign, 752 std::optional<uint32_t> AtomicElementSize) const { 753 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) 754 : Type::getInt8Ty(Context); 755 } 756 757 void getMemcpyLoopResidualLoweringType( 758 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 759 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 760 unsigned SrcAlign, unsigned DestAlign, 761 std::optional<uint32_t> AtomicCpySize) const { 762 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; 763 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); 764 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) 765 OpsOut.push_back(OpType); 766 } 767 768 bool areInlineCompatible(const Function *Caller, 769 const Function *Callee) const { 770 return (Caller->getFnAttribute("target-cpu") == 771 Callee->getFnAttribute("target-cpu")) && 772 (Caller->getFnAttribute("target-features") == 773 Callee->getFnAttribute("target-features")); 774 } 775 776 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 777 const ArrayRef<Type *> &Types) const { 778 return (Caller->getFnAttribute("target-cpu") == 779 Callee->getFnAttribute("target-cpu")) && 780 (Caller->getFnAttribute("target-features") == 781 Callee->getFnAttribute("target-features")); 782 } 783 784 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 785 const DataLayout &DL) const { 786 return false; 787 } 788 789 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 790 const DataLayout &DL) const { 791 return false; 792 } 793 794 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 795 796 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 797 798 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 799 800 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 801 unsigned AddrSpace) const { 802 return true; 803 } 804 805 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 806 unsigned AddrSpace) const { 807 return true; 808 } 809 810 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 811 ElementCount VF) const { 812 return true; 813 } 814 815 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } 816 817 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 818 unsigned ChainSizeInBytes, 819 VectorType *VecTy) const { 820 return VF; 821 } 822 823 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 824 unsigned ChainSizeInBytes, 825 VectorType *VecTy) const { 826 return VF; 827 } 828 829 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 830 TTI::ReductionFlags Flags) const { 831 return false; 832 } 833 834 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 835 TTI::ReductionFlags Flags) const { 836 return false; 837 } 838 839 bool preferEpilogueVectorization() const { 840 return true; 841 } 842 843 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 844 845 unsigned getGISelRematGlobalCost() const { return 1; } 846 847 unsigned getMinTripCountTailFoldingThreshold() const { return 0; } 848 849 bool supportsScalableVectors() const { return false; } 850 851 bool enableScalableVectorization() const { return false; } 852 853 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 854 Align Alignment) const { 855 return false; 856 } 857 858 TargetTransformInfo::VPLegalization 859 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 860 return TargetTransformInfo::VPLegalization( 861 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 862 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 863 } 864 865 protected: 866 // Obtain the minimum required size to hold the value (without the sign) 867 // In case of a vector it returns the min required size for one element. 868 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 869 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 870 const auto *VectorValue = cast<Constant>(Val); 871 872 // In case of a vector need to pick the max between the min 873 // required size for each element 874 auto *VT = cast<FixedVectorType>(Val->getType()); 875 876 // Assume unsigned elements 877 isSigned = false; 878 879 // The max required size is the size of the vector element type 880 unsigned MaxRequiredSize = 881 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue(); 882 883 unsigned MinRequiredSize = 0; 884 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 885 if (auto *IntElement = 886 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 887 bool signedElement = IntElement->getValue().isNegative(); 888 // Get the element min required size. 889 unsigned ElementMinRequiredSize = 890 IntElement->getValue().getMinSignedBits() - 1; 891 // In case one element is signed then all the vector is signed. 892 isSigned |= signedElement; 893 // Save the max required bit size between all the elements. 894 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 895 } else { 896 // not an int constant element 897 return MaxRequiredSize; 898 } 899 } 900 return MinRequiredSize; 901 } 902 903 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 904 isSigned = CI->getValue().isNegative(); 905 return CI->getValue().getMinSignedBits() - 1; 906 } 907 908 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 909 isSigned = true; 910 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 911 } 912 913 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 914 isSigned = false; 915 return Cast->getSrcTy()->getScalarSizeInBits(); 916 } 917 918 isSigned = false; 919 return Val->getType()->getScalarSizeInBits(); 920 } 921 922 bool isStridedAccess(const SCEV *Ptr) const { 923 return Ptr && isa<SCEVAddRecExpr>(Ptr); 924 } 925 926 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 927 const SCEV *Ptr) const { 928 if (!isStridedAccess(Ptr)) 929 return nullptr; 930 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 931 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 932 } 933 934 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 935 int64_t MergeDistance) const { 936 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 937 if (!Step) 938 return false; 939 APInt StrideVal = Step->getAPInt(); 940 if (StrideVal.getBitWidth() > 64) 941 return false; 942 // FIXME: Need to take absolute value for negative stride case. 943 return StrideVal.getSExtValue() < MergeDistance; 944 } 945 }; 946 947 /// CRTP base class for use as a mix-in that aids implementing 948 /// a TargetTransformInfo-compatible class. 949 template <typename T> 950 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 951 private: 952 typedef TargetTransformInfoImplBase BaseT; 953 954 protected: 955 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 956 957 public: 958 using BaseT::getGEPCost; 959 960 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 961 ArrayRef<const Value *> Operands, 962 TTI::TargetCostKind CostKind) { 963 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 964 assert(cast<PointerType>(Ptr->getType()->getScalarType()) 965 ->isOpaqueOrPointeeTypeMatches(PointeeType) && 966 "explicit pointee type doesn't match operand's pointee type"); 967 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 968 bool HasBaseReg = (BaseGV == nullptr); 969 970 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 971 APInt BaseOffset(PtrSizeBits, 0); 972 int64_t Scale = 0; 973 974 auto GTI = gep_type_begin(PointeeType, Operands); 975 Type *TargetType = nullptr; 976 977 // Handle the case where the GEP instruction has a single operand, 978 // the basis, therefore TargetType is a nullptr. 979 if (Operands.empty()) 980 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 981 982 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 983 TargetType = GTI.getIndexedType(); 984 // We assume that the cost of Scalar GEP with constant index and the 985 // cost of Vector GEP with splat constant index are the same. 986 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 987 if (!ConstIdx) 988 if (auto Splat = getSplatValue(*I)) 989 ConstIdx = dyn_cast<ConstantInt>(Splat); 990 if (StructType *STy = GTI.getStructTypeOrNull()) { 991 // For structures the index is always splat or scalar constant 992 assert(ConstIdx && "Unexpected GEP index"); 993 uint64_t Field = ConstIdx->getZExtValue(); 994 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 995 } else { 996 // If this operand is a scalable type, bail out early. 997 // TODO: handle scalable vectors 998 if (isa<ScalableVectorType>(TargetType)) 999 return TTI::TCC_Basic; 1000 int64_t ElementSize = 1001 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue(); 1002 if (ConstIdx) { 1003 BaseOffset += 1004 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 1005 } else { 1006 // Needs scale register. 1007 if (Scale != 0) 1008 // No addressing mode takes two scale registers. 1009 return TTI::TCC_Basic; 1010 Scale = ElementSize; 1011 } 1012 } 1013 } 1014 1015 if (static_cast<T *>(this)->isLegalAddressingMode( 1016 TargetType, const_cast<GlobalValue *>(BaseGV), 1017 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 1018 Ptr->getType()->getPointerAddressSpace())) 1019 return TTI::TCC_Free; 1020 return TTI::TCC_Basic; 1021 } 1022 1023 InstructionCost getInstructionCost(const User *U, 1024 ArrayRef<const Value *> Operands, 1025 TTI::TargetCostKind CostKind) { 1026 using namespace llvm::PatternMatch; 1027 1028 auto *TargetTTI = static_cast<T *>(this); 1029 // Handle non-intrinsic calls, invokes, and callbr. 1030 // FIXME: Unlikely to be true for anything but CodeSize. 1031 auto *CB = dyn_cast<CallBase>(U); 1032 if (CB && !isa<IntrinsicInst>(U)) { 1033 if (const Function *F = CB->getCalledFunction()) { 1034 if (!TargetTTI->isLoweredToCall(F)) 1035 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 1036 1037 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 1038 } 1039 // For indirect or other calls, scale cost by number of arguments. 1040 return TTI::TCC_Basic * (CB->arg_size() + 1); 1041 } 1042 1043 Type *Ty = U->getType(); 1044 unsigned Opcode = Operator::getOpcode(U); 1045 auto *I = dyn_cast<Instruction>(U); 1046 switch (Opcode) { 1047 default: 1048 break; 1049 case Instruction::Call: { 1050 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 1051 auto *Intrinsic = cast<IntrinsicInst>(U); 1052 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 1053 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 1054 } 1055 case Instruction::Br: 1056 case Instruction::Ret: 1057 case Instruction::PHI: 1058 case Instruction::Switch: 1059 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 1060 case Instruction::ExtractValue: 1061 case Instruction::Freeze: 1062 return TTI::TCC_Free; 1063 case Instruction::Alloca: 1064 if (cast<AllocaInst>(U)->isStaticAlloca()) 1065 return TTI::TCC_Free; 1066 break; 1067 case Instruction::GetElementPtr: { 1068 const auto *GEP = cast<GEPOperator>(U); 1069 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 1070 GEP->getPointerOperand(), 1071 Operands.drop_front(), CostKind); 1072 } 1073 case Instruction::Add: 1074 case Instruction::FAdd: 1075 case Instruction::Sub: 1076 case Instruction::FSub: 1077 case Instruction::Mul: 1078 case Instruction::FMul: 1079 case Instruction::UDiv: 1080 case Instruction::SDiv: 1081 case Instruction::FDiv: 1082 case Instruction::URem: 1083 case Instruction::SRem: 1084 case Instruction::FRem: 1085 case Instruction::Shl: 1086 case Instruction::LShr: 1087 case Instruction::AShr: 1088 case Instruction::And: 1089 case Instruction::Or: 1090 case Instruction::Xor: 1091 case Instruction::FNeg: { 1092 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(U->getOperand(0)); 1093 TTI::OperandValueInfo Op2Info; 1094 if (Opcode != Instruction::FNeg) 1095 Op2Info = TTI::getOperandInfo(U->getOperand(1)); 1096 SmallVector<const Value *, 2> Operands(U->operand_values()); 1097 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 1098 Op2Info, Operands, I); 1099 } 1100 case Instruction::IntToPtr: 1101 case Instruction::PtrToInt: 1102 case Instruction::SIToFP: 1103 case Instruction::UIToFP: 1104 case Instruction::FPToUI: 1105 case Instruction::FPToSI: 1106 case Instruction::Trunc: 1107 case Instruction::FPTrunc: 1108 case Instruction::BitCast: 1109 case Instruction::FPExt: 1110 case Instruction::SExt: 1111 case Instruction::ZExt: 1112 case Instruction::AddrSpaceCast: { 1113 Type *OpTy = U->getOperand(0)->getType(); 1114 return TargetTTI->getCastInstrCost( 1115 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1116 } 1117 case Instruction::Store: { 1118 auto *SI = cast<StoreInst>(U); 1119 Type *ValTy = U->getOperand(0)->getType(); 1120 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(U->getOperand(0)); 1121 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1122 SI->getPointerAddressSpace(), CostKind, 1123 OpInfo, I); 1124 } 1125 case Instruction::Load: { 1126 // FIXME: Arbitary cost which could come from the backend. 1127 if (CostKind == TTI::TCK_Latency) 1128 return 4; 1129 auto *LI = cast<LoadInst>(U); 1130 Type *LoadType = U->getType(); 1131 // If there is a non-register sized type, the cost estimation may expand 1132 // it to be several instructions to load into multiple registers on the 1133 // target. But, if the only use of the load is a trunc instruction to a 1134 // register sized type, the instruction selector can combine these 1135 // instructions to be a single load. So, in this case, we use the 1136 // destination type of the trunc instruction rather than the load to 1137 // accurately estimate the cost of this load instruction. 1138 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && 1139 !LoadType->isVectorTy()) { 1140 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) 1141 LoadType = TI->getDestTy(); 1142 } 1143 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), 1144 LI->getPointerAddressSpace(), CostKind, 1145 {TTI::OK_AnyValue, TTI::OP_None}, I); 1146 } 1147 case Instruction::Select: { 1148 const Value *Op0, *Op1; 1149 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1150 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1151 // select x, y, false --> x & y 1152 // select x, true, y --> x | y 1153 const auto Op1Info = TTI::getOperandInfo(Op0); 1154 const auto Op2Info = TTI::getOperandInfo(Op1); 1155 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1156 Op1->getType()->getScalarSizeInBits() == 1); 1157 1158 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1159 return TargetTTI->getArithmeticInstrCost( 1160 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1161 CostKind, Op1Info, Op2Info, Operands, I); 1162 } 1163 Type *CondTy = U->getOperand(0)->getType(); 1164 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1165 CmpInst::BAD_ICMP_PREDICATE, 1166 CostKind, I); 1167 } 1168 case Instruction::ICmp: 1169 case Instruction::FCmp: { 1170 Type *ValTy = U->getOperand(0)->getType(); 1171 // TODO: Also handle ICmp/FCmp constant expressions. 1172 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1173 I ? cast<CmpInst>(I)->getPredicate() 1174 : CmpInst::BAD_ICMP_PREDICATE, 1175 CostKind, I); 1176 } 1177 case Instruction::InsertElement: { 1178 auto *IE = dyn_cast<InsertElementInst>(U); 1179 if (!IE) 1180 return TTI::TCC_Basic; // FIXME 1181 unsigned Idx = -1; 1182 if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) 1183 if (CI->getValue().getActiveBits() <= 32) 1184 Idx = CI->getZExtValue(); 1185 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx); 1186 } 1187 case Instruction::ShuffleVector: { 1188 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1189 if (!Shuffle) 1190 return TTI::TCC_Basic; // FIXME 1191 1192 auto *VecTy = cast<VectorType>(U->getType()); 1193 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType()); 1194 int NumSubElts, SubIndex; 1195 1196 if (Shuffle->changesLength()) { 1197 // Treat a 'subvector widening' as a free shuffle. 1198 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) 1199 return 0; 1200 1201 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1202 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1203 Shuffle->getShuffleMask(), CostKind, 1204 SubIndex, VecTy, Operands); 1205 1206 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1207 return TargetTTI->getShuffleCost( 1208 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), 1209 CostKind, SubIndex, 1210 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1211 Operands); 1212 1213 int ReplicationFactor, VF; 1214 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { 1215 APInt DemandedDstElts = 1216 APInt::getNullValue(Shuffle->getShuffleMask().size()); 1217 for (auto I : enumerate(Shuffle->getShuffleMask())) { 1218 if (I.value() != UndefMaskElem) 1219 DemandedDstElts.setBit(I.index()); 1220 } 1221 return TargetTTI->getReplicationShuffleCost( 1222 VecSrcTy->getElementType(), ReplicationFactor, VF, 1223 DemandedDstElts, CostKind); 1224 } 1225 1226 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; 1227 } 1228 1229 if (Shuffle->isIdentity()) 1230 return 0; 1231 1232 if (Shuffle->isReverse()) 1233 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 1234 Shuffle->getShuffleMask(), CostKind, 0, 1235 nullptr, Operands); 1236 1237 if (Shuffle->isSelect()) 1238 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 1239 Shuffle->getShuffleMask(), CostKind, 0, 1240 nullptr, Operands); 1241 1242 if (Shuffle->isTranspose()) 1243 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 1244 Shuffle->getShuffleMask(), CostKind, 0, 1245 nullptr, Operands); 1246 1247 if (Shuffle->isZeroEltSplat()) 1248 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 1249 Shuffle->getShuffleMask(), CostKind, 0, 1250 nullptr, Operands); 1251 1252 if (Shuffle->isSingleSource()) 1253 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 1254 Shuffle->getShuffleMask(), CostKind, 0, 1255 nullptr, Operands); 1256 1257 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1258 return TargetTTI->getShuffleCost( 1259 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind, 1260 SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1261 Operands); 1262 1263 if (Shuffle->isSplice(SubIndex)) 1264 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, 1265 Shuffle->getShuffleMask(), CostKind, 1266 SubIndex, nullptr, Operands); 1267 1268 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 1269 Shuffle->getShuffleMask(), CostKind, 0, 1270 nullptr, Operands); 1271 } 1272 case Instruction::ExtractElement: { 1273 auto *EEI = dyn_cast<ExtractElementInst>(U); 1274 if (!EEI) 1275 return TTI::TCC_Basic; // FIXME 1276 unsigned Idx = -1; 1277 if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1))) 1278 if (CI->getValue().getActiveBits() <= 32) 1279 Idx = CI->getZExtValue(); 1280 Type *DstTy = U->getOperand(0)->getType(); 1281 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); 1282 } 1283 } 1284 1285 // By default, just classify everything as 'basic' or -1 to represent that 1286 // don't know the throughput cost. 1287 return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; 1288 } 1289 1290 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { 1291 auto *TargetTTI = static_cast<T *>(this); 1292 SmallVector<const Value *, 4> Ops(I->operand_values()); 1293 InstructionCost Cost = TargetTTI->getInstructionCost( 1294 I, Ops, TargetTransformInfo::TCK_SizeAndLatency); 1295 return Cost >= TargetTransformInfo::TCC_Expensive; 1296 } 1297 }; 1298 } // namespace llvm 1299 1300 #endif 1301