1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/GetElementPtrTypeIterator.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include <utility> 26 27 namespace llvm { 28 29 class Function; 30 31 /// Base class for use as a mix-in that aids implementing 32 /// a TargetTransformInfo-compatible class. 33 class TargetTransformInfoImplBase { 34 protected: 35 typedef TargetTransformInfo TTI; 36 37 const DataLayout &DL; 38 39 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 40 41 public: 42 // Provide value semantics. MSVC requires that we spell all of these out. 43 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; 44 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 45 46 const DataLayout &getDataLayout() const { return DL; } 47 48 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 49 ArrayRef<const Value *> Operands, 50 TTI::TargetCostKind CostKind) const { 51 // In the basic model, we just assume that all-constant GEPs will be folded 52 // into their uses via addressing modes. 53 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 54 if (!isa<Constant>(Operands[Idx])) 55 return TTI::TCC_Basic; 56 57 return TTI::TCC_Free; 58 } 59 60 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 61 unsigned &JTSize, 62 ProfileSummaryInfo *PSI, 63 BlockFrequencyInfo *BFI) const { 64 (void)PSI; 65 (void)BFI; 66 JTSize = 0; 67 return SI.getNumCases(); 68 } 69 70 unsigned getInliningThresholdMultiplier() const { return 1; } 71 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } 72 73 int getInlinerVectorBonusPercent() const { return 150; } 74 75 InstructionCost getMemcpyCost(const Instruction *I) const { 76 return TTI::TCC_Expensive; 77 } 78 79 // Although this default value is arbitrary, it is not random. It is assumed 80 // that a condition that evaluates the same way by a higher percentage than 81 // this is best represented as control flow. Therefore, the default value N 82 // should be set such that the win from N% correct executions is greater than 83 // the loss from (100 - N)% mispredicted executions for the majority of 84 // intended targets. 85 BranchProbability getPredictableBranchThreshold() const { 86 return BranchProbability(99, 100); 87 } 88 89 bool hasBranchDivergence() const { return false; } 90 91 bool useGPUDivergenceAnalysis() const { return false; } 92 93 bool isSourceOfDivergence(const Value *V) const { return false; } 94 95 bool isAlwaysUniform(const Value *V) const { return false; } 96 97 unsigned getFlatAddressSpace() const { return -1; } 98 99 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 100 Intrinsic::ID IID) const { 101 return false; 102 } 103 104 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } 105 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 106 return AS == 0; 107 }; 108 109 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 110 111 std::pair<const Value *, unsigned> 112 getPredicatedAddrSpace(const Value *V) const { 113 return std::make_pair(nullptr, -1); 114 } 115 116 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 117 Value *NewV) const { 118 return nullptr; 119 } 120 121 bool isLoweredToCall(const Function *F) const { 122 assert(F && "A concrete function must be provided to this routine."); 123 124 // FIXME: These should almost certainly not be handled here, and instead 125 // handled with the help of TLI or the target itself. This was largely 126 // ported from existing analysis heuristics here so that such refactorings 127 // can take place in the future. 128 129 if (F->isIntrinsic()) 130 return false; 131 132 if (F->hasLocalLinkage() || !F->hasName()) 133 return true; 134 135 StringRef Name = F->getName(); 136 137 // These will all likely lower to a single selection DAG node. 138 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 139 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 140 Name == "fmin" || Name == "fminf" || Name == "fminl" || 141 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 142 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 143 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 144 return false; 145 146 // These are all likely to be optimized into something smaller. 147 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 148 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 149 Name == "floorf" || Name == "ceil" || Name == "round" || 150 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 151 Name == "llabs") 152 return false; 153 154 return true; 155 } 156 157 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 158 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 159 HardwareLoopInfo &HWLoopInfo) const { 160 return false; 161 } 162 163 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 164 AssumptionCache &AC, TargetLibraryInfo *TLI, 165 DominatorTree *DT, 166 LoopVectorizationLegality *LVL) const { 167 return false; 168 } 169 170 PredicationStyle emitGetActiveLaneMask() const { 171 return PredicationStyle::None; 172 } 173 174 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 175 IntrinsicInst &II) const { 176 return None; 177 } 178 179 Optional<Value *> 180 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 181 APInt DemandedMask, KnownBits &Known, 182 bool &KnownBitsComputed) const { 183 return None; 184 } 185 186 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 187 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 188 APInt &UndefElts2, APInt &UndefElts3, 189 std::function<void(Instruction *, unsigned, APInt, APInt &)> 190 SimplifyAndSetOp) const { 191 return None; 192 } 193 194 void getUnrollingPreferences(Loop *, ScalarEvolution &, 195 TTI::UnrollingPreferences &, 196 OptimizationRemarkEmitter *) const {} 197 198 void getPeelingPreferences(Loop *, ScalarEvolution &, 199 TTI::PeelingPreferences &) const {} 200 201 bool isLegalAddImmediate(int64_t Imm) const { return false; } 202 203 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 204 205 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 206 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 207 Instruction *I = nullptr) const { 208 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 209 // taken from the implementation of LSR. 210 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 211 } 212 213 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { 214 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 215 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 216 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 217 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 218 } 219 220 bool isNumRegsMajorCostOfLSR() const { return true; } 221 222 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 223 224 bool canMacroFuseCmp() const { return false; } 225 226 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 227 DominatorTree *DT, AssumptionCache *AC, 228 TargetLibraryInfo *LibInfo) const { 229 return false; 230 } 231 232 TTI::AddressingModeKind 233 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 234 return TTI::AMK_None; 235 } 236 237 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 238 return false; 239 } 240 241 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 242 return false; 243 } 244 245 bool isLegalNTStore(Type *DataType, Align Alignment) const { 246 // By default, assume nontemporal memory stores are available for stores 247 // that are aligned and have a size that is a power of 2. 248 unsigned DataSize = DL.getTypeStoreSize(DataType); 249 return Alignment >= DataSize && isPowerOf2_32(DataSize); 250 } 251 252 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 253 // By default, assume nontemporal memory loads are available for loads that 254 // are aligned and have a size that is a power of 2. 255 unsigned DataSize = DL.getTypeStoreSize(DataType); 256 return Alignment >= DataSize && isPowerOf2_32(DataSize); 257 } 258 259 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 260 return false; 261 } 262 263 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 264 return false; 265 } 266 267 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 268 return false; 269 } 270 271 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const { 272 return false; 273 } 274 275 bool forceScalarizeMaskedScatter(VectorType *DataType, 276 Align Alignment) const { 277 return false; 278 } 279 280 bool isLegalMaskedCompressStore(Type *DataType) const { return false; } 281 282 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 283 const SmallBitVector &OpcodeMask) const { 284 return false; 285 } 286 287 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } 288 289 bool enableOrderedReductions() const { return false; } 290 291 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 292 293 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 294 return false; 295 } 296 297 bool prefersVectorizedAddressing() const { return true; } 298 299 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 300 int64_t BaseOffset, bool HasBaseReg, 301 int64_t Scale, 302 unsigned AddrSpace) const { 303 // Guess that all legal addressing mode are free. 304 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 305 AddrSpace)) 306 return 0; 307 return -1; 308 } 309 310 bool LSRWithInstrQueries() const { return false; } 311 312 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 313 314 bool isProfitableToHoist(Instruction *I) const { return true; } 315 316 bool useAA() const { return false; } 317 318 bool isTypeLegal(Type *Ty) const { return false; } 319 320 unsigned getRegUsageForType(Type *Ty) const { return 1; } 321 322 bool shouldBuildLookupTables() const { return true; } 323 324 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 325 326 bool shouldBuildRelLookupTables() const { return false; } 327 328 bool useColdCCForColdCall(Function &F) const { return false; } 329 330 InstructionCost getScalarizationOverhead(VectorType *Ty, 331 const APInt &DemandedElts, 332 bool Insert, bool Extract) const { 333 return 0; 334 } 335 336 InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 337 ArrayRef<Type *> Tys) const { 338 return 0; 339 } 340 341 bool supportsEfficientVectorElementLoadStore() const { return false; } 342 343 bool supportsTailCalls() const { return true; } 344 345 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 346 return false; 347 } 348 349 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 350 bool IsZeroCmp) const { 351 return {}; 352 } 353 354 bool enableInterleavedAccessVectorization() const { return false; } 355 356 bool enableMaskedInterleavedAccessVectorization() const { return false; } 357 358 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 359 360 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 361 unsigned AddressSpace, Align Alignment, 362 bool *Fast) const { 363 return false; 364 } 365 366 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 367 return TTI::PSK_Software; 368 } 369 370 bool haveFastSqrt(Type *Ty) const { return false; } 371 372 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 373 374 InstructionCost getFPOpCost(Type *Ty) const { 375 return TargetTransformInfo::TCC_Basic; 376 } 377 378 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 379 const APInt &Imm, Type *Ty) const { 380 return 0; 381 } 382 383 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 384 TTI::TargetCostKind CostKind) const { 385 return TTI::TCC_Basic; 386 } 387 388 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 389 const APInt &Imm, Type *Ty, 390 TTI::TargetCostKind CostKind, 391 Instruction *Inst = nullptr) const { 392 return TTI::TCC_Free; 393 } 394 395 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 396 const APInt &Imm, Type *Ty, 397 TTI::TargetCostKind CostKind) const { 398 return TTI::TCC_Free; 399 } 400 401 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 402 403 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 404 return Vector ? 1 : 0; 405 }; 406 407 const char *getRegisterClassName(unsigned ClassID) const { 408 switch (ClassID) { 409 default: 410 return "Generic::Unknown Register Class"; 411 case 0: 412 return "Generic::ScalarRC"; 413 case 1: 414 return "Generic::VectorRC"; 415 } 416 } 417 418 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 419 return TypeSize::getFixed(32); 420 } 421 422 unsigned getMinVectorRegisterBitWidth() const { return 128; } 423 424 Optional<unsigned> getMaxVScale() const { return None; } 425 Optional<unsigned> getVScaleForTuning() const { return None; } 426 427 bool 428 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { 429 return false; 430 } 431 432 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 433 return ElementCount::get(0, IsScalable); 434 } 435 436 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } 437 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } 438 439 bool shouldConsiderAddressTypePromotion( 440 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 441 AllowPromotionWithoutCommonHeader = false; 442 return false; 443 } 444 445 unsigned getCacheLineSize() const { return 0; } 446 447 llvm::Optional<unsigned> 448 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 449 switch (Level) { 450 case TargetTransformInfo::CacheLevel::L1D: 451 LLVM_FALLTHROUGH; 452 case TargetTransformInfo::CacheLevel::L2D: 453 return llvm::Optional<unsigned>(); 454 } 455 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 456 } 457 458 llvm::Optional<unsigned> 459 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 460 switch (Level) { 461 case TargetTransformInfo::CacheLevel::L1D: 462 LLVM_FALLTHROUGH; 463 case TargetTransformInfo::CacheLevel::L2D: 464 return llvm::Optional<unsigned>(); 465 } 466 467 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 468 } 469 470 unsigned getPrefetchDistance() const { return 0; } 471 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 472 unsigned NumStridedMemAccesses, 473 unsigned NumPrefetches, bool HasCall) const { 474 return 1; 475 } 476 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } 477 bool enableWritePrefetching() const { return false; } 478 479 unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; } 480 481 InstructionCost getArithmeticInstrCost( 482 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 483 TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, 484 TTI::OperandValueProperties Opd1PropInfo, 485 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, 486 const Instruction *CxtI = nullptr) const { 487 // FIXME: A number of transformation tests seem to require these values 488 // which seems a little odd for how arbitary there are. 489 switch (Opcode) { 490 default: 491 break; 492 case Instruction::FDiv: 493 case Instruction::FRem: 494 case Instruction::SDiv: 495 case Instruction::SRem: 496 case Instruction::UDiv: 497 case Instruction::URem: 498 // FIXME: Unlikely to be true for CodeSize. 499 return TTI::TCC_Expensive; 500 } 501 return 1; 502 } 503 504 InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, 505 ArrayRef<int> Mask, int Index, 506 VectorType *SubTp, 507 ArrayRef<const Value *> Args = None) const { 508 return 1; 509 } 510 511 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 512 TTI::CastContextHint CCH, 513 TTI::TargetCostKind CostKind, 514 const Instruction *I) const { 515 switch (Opcode) { 516 default: 517 break; 518 case Instruction::IntToPtr: { 519 unsigned SrcSize = Src->getScalarSizeInBits(); 520 if (DL.isLegalInteger(SrcSize) && 521 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 522 return 0; 523 break; 524 } 525 case Instruction::PtrToInt: { 526 unsigned DstSize = Dst->getScalarSizeInBits(); 527 if (DL.isLegalInteger(DstSize) && 528 DstSize >= DL.getPointerTypeSizeInBits(Src)) 529 return 0; 530 break; 531 } 532 case Instruction::BitCast: 533 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 534 // Identity and pointer-to-pointer casts are free. 535 return 0; 536 break; 537 case Instruction::Trunc: { 538 // trunc to a native type is free (assuming the target has compare and 539 // shift-right of the same width). 540 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 541 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize())) 542 return 0; 543 break; 544 } 545 } 546 return 1; 547 } 548 549 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 550 VectorType *VecTy, 551 unsigned Index) const { 552 return 1; 553 } 554 555 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 556 const Instruction *I = nullptr) const { 557 // A phi would be free, unless we're costing the throughput because it 558 // will require a register. 559 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 560 return 0; 561 return 1; 562 } 563 564 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 565 CmpInst::Predicate VecPred, 566 TTI::TargetCostKind CostKind, 567 const Instruction *I) const { 568 return 1; 569 } 570 571 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 572 unsigned Index) const { 573 return 1; 574 } 575 576 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 577 const APInt &DemandedDstElts, 578 TTI::TargetCostKind CostKind) { 579 return 1; 580 } 581 582 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 583 unsigned AddressSpace, 584 TTI::TargetCostKind CostKind, 585 const Instruction *I) const { 586 return 1; 587 } 588 589 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 590 unsigned AddressSpace, 591 TTI::TargetCostKind CostKind, 592 const Instruction *I) const { 593 return 1; 594 } 595 596 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 597 Align Alignment, unsigned AddressSpace, 598 TTI::TargetCostKind CostKind) const { 599 return 1; 600 } 601 602 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 603 const Value *Ptr, bool VariableMask, 604 Align Alignment, 605 TTI::TargetCostKind CostKind, 606 const Instruction *I = nullptr) const { 607 return 1; 608 } 609 610 unsigned getInterleavedMemoryOpCost( 611 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 612 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 613 bool UseMaskForCond, bool UseMaskForGaps) const { 614 return 1; 615 } 616 617 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 618 TTI::TargetCostKind CostKind) const { 619 switch (ICA.getID()) { 620 default: 621 break; 622 case Intrinsic::annotation: 623 case Intrinsic::assume: 624 case Intrinsic::sideeffect: 625 case Intrinsic::pseudoprobe: 626 case Intrinsic::arithmetic_fence: 627 case Intrinsic::dbg_declare: 628 case Intrinsic::dbg_value: 629 case Intrinsic::dbg_label: 630 case Intrinsic::invariant_start: 631 case Intrinsic::invariant_end: 632 case Intrinsic::launder_invariant_group: 633 case Intrinsic::strip_invariant_group: 634 case Intrinsic::is_constant: 635 case Intrinsic::lifetime_start: 636 case Intrinsic::lifetime_end: 637 case Intrinsic::experimental_noalias_scope_decl: 638 case Intrinsic::objectsize: 639 case Intrinsic::ptr_annotation: 640 case Intrinsic::var_annotation: 641 case Intrinsic::experimental_gc_result: 642 case Intrinsic::experimental_gc_relocate: 643 case Intrinsic::coro_alloc: 644 case Intrinsic::coro_begin: 645 case Intrinsic::coro_free: 646 case Intrinsic::coro_end: 647 case Intrinsic::coro_frame: 648 case Intrinsic::coro_size: 649 case Intrinsic::coro_align: 650 case Intrinsic::coro_suspend: 651 case Intrinsic::coro_subfn_addr: 652 // These intrinsics don't actually represent code after lowering. 653 return 0; 654 } 655 return 1; 656 } 657 658 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 659 ArrayRef<Type *> Tys, 660 TTI::TargetCostKind CostKind) const { 661 return 1; 662 } 663 664 // Assume that we have a register of the right size for the type. 665 unsigned getNumberOfParts(Type *Tp) const { return 1; } 666 667 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 668 const SCEV *) const { 669 return 0; 670 } 671 672 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, 673 Optional<FastMathFlags> FMF, 674 TTI::TargetCostKind) const { 675 return 1; 676 } 677 678 InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, 679 TTI::TargetCostKind) const { 680 return 1; 681 } 682 683 InstructionCost 684 getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, 685 VectorType *Ty, 686 TTI::TargetCostKind CostKind) const { 687 return 1; 688 } 689 690 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 691 return 0; 692 } 693 694 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 695 return false; 696 } 697 698 unsigned getAtomicMemIntrinsicMaxElementSize() const { 699 // Note for overrides: You must ensure for all element unordered-atomic 700 // memory intrinsics that all power-of-2 element sizes up to, and 701 // including, the return value of this method have a corresponding 702 // runtime lib call. These runtime lib call definitions can be found 703 // in RuntimeLibcalls.h 704 return 0; 705 } 706 707 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 708 Type *ExpectedType) const { 709 return nullptr; 710 } 711 712 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 713 unsigned SrcAddrSpace, unsigned DestAddrSpace, 714 unsigned SrcAlign, unsigned DestAlign, 715 Optional<uint32_t> AtomicElementSize) const { 716 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) 717 : Type::getInt8Ty(Context); 718 } 719 720 void getMemcpyLoopResidualLoweringType( 721 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 722 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 723 unsigned SrcAlign, unsigned DestAlign, 724 Optional<uint32_t> AtomicCpySize) const { 725 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; 726 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); 727 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) 728 OpsOut.push_back(OpType); 729 } 730 731 bool areInlineCompatible(const Function *Caller, 732 const Function *Callee) const { 733 return (Caller->getFnAttribute("target-cpu") == 734 Callee->getFnAttribute("target-cpu")) && 735 (Caller->getFnAttribute("target-features") == 736 Callee->getFnAttribute("target-features")); 737 } 738 739 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 740 const ArrayRef<Type *> &Types) const { 741 return (Caller->getFnAttribute("target-cpu") == 742 Callee->getFnAttribute("target-cpu")) && 743 (Caller->getFnAttribute("target-features") == 744 Callee->getFnAttribute("target-features")); 745 } 746 747 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 748 const DataLayout &DL) const { 749 return false; 750 } 751 752 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 753 const DataLayout &DL) const { 754 return false; 755 } 756 757 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 758 759 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 760 761 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 762 763 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 764 unsigned AddrSpace) const { 765 return true; 766 } 767 768 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 769 unsigned AddrSpace) const { 770 return true; 771 } 772 773 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 774 ElementCount VF) const { 775 return true; 776 } 777 778 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } 779 780 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 781 unsigned ChainSizeInBytes, 782 VectorType *VecTy) const { 783 return VF; 784 } 785 786 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 787 unsigned ChainSizeInBytes, 788 VectorType *VecTy) const { 789 return VF; 790 } 791 792 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 793 TTI::ReductionFlags Flags) const { 794 return false; 795 } 796 797 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 798 TTI::ReductionFlags Flags) const { 799 return false; 800 } 801 802 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 803 804 unsigned getGISelRematGlobalCost() const { return 1; } 805 806 unsigned getMinTripCountTailFoldingThreshold() const { return 0; } 807 808 bool supportsScalableVectors() const { return false; } 809 810 bool enableScalableVectorization() const { return false; } 811 812 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 813 Align Alignment) const { 814 return false; 815 } 816 817 TargetTransformInfo::VPLegalization 818 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 819 return TargetTransformInfo::VPLegalization( 820 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 821 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 822 } 823 824 protected: 825 // Obtain the minimum required size to hold the value (without the sign) 826 // In case of a vector it returns the min required size for one element. 827 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 828 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 829 const auto *VectorValue = cast<Constant>(Val); 830 831 // In case of a vector need to pick the max between the min 832 // required size for each element 833 auto *VT = cast<FixedVectorType>(Val->getType()); 834 835 // Assume unsigned elements 836 isSigned = false; 837 838 // The max required size is the size of the vector element type 839 unsigned MaxRequiredSize = 840 VT->getElementType()->getPrimitiveSizeInBits().getFixedSize(); 841 842 unsigned MinRequiredSize = 0; 843 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 844 if (auto *IntElement = 845 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 846 bool signedElement = IntElement->getValue().isNegative(); 847 // Get the element min required size. 848 unsigned ElementMinRequiredSize = 849 IntElement->getValue().getMinSignedBits() - 1; 850 // In case one element is signed then all the vector is signed. 851 isSigned |= signedElement; 852 // Save the max required bit size between all the elements. 853 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 854 } else { 855 // not an int constant element 856 return MaxRequiredSize; 857 } 858 } 859 return MinRequiredSize; 860 } 861 862 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 863 isSigned = CI->getValue().isNegative(); 864 return CI->getValue().getMinSignedBits() - 1; 865 } 866 867 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 868 isSigned = true; 869 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 870 } 871 872 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 873 isSigned = false; 874 return Cast->getSrcTy()->getScalarSizeInBits(); 875 } 876 877 isSigned = false; 878 return Val->getType()->getScalarSizeInBits(); 879 } 880 881 bool isStridedAccess(const SCEV *Ptr) const { 882 return Ptr && isa<SCEVAddRecExpr>(Ptr); 883 } 884 885 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 886 const SCEV *Ptr) const { 887 if (!isStridedAccess(Ptr)) 888 return nullptr; 889 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 890 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 891 } 892 893 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 894 int64_t MergeDistance) const { 895 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 896 if (!Step) 897 return false; 898 APInt StrideVal = Step->getAPInt(); 899 if (StrideVal.getBitWidth() > 64) 900 return false; 901 // FIXME: Need to take absolute value for negative stride case. 902 return StrideVal.getSExtValue() < MergeDistance; 903 } 904 }; 905 906 /// CRTP base class for use as a mix-in that aids implementing 907 /// a TargetTransformInfo-compatible class. 908 template <typename T> 909 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 910 private: 911 typedef TargetTransformInfoImplBase BaseT; 912 913 protected: 914 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 915 916 public: 917 using BaseT::getGEPCost; 918 919 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 920 ArrayRef<const Value *> Operands, 921 TTI::TargetCostKind CostKind) { 922 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 923 assert(cast<PointerType>(Ptr->getType()->getScalarType()) 924 ->isOpaqueOrPointeeTypeMatches(PointeeType) && 925 "explicit pointee type doesn't match operand's pointee type"); 926 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 927 bool HasBaseReg = (BaseGV == nullptr); 928 929 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 930 APInt BaseOffset(PtrSizeBits, 0); 931 int64_t Scale = 0; 932 933 auto GTI = gep_type_begin(PointeeType, Operands); 934 Type *TargetType = nullptr; 935 936 // Handle the case where the GEP instruction has a single operand, 937 // the basis, therefore TargetType is a nullptr. 938 if (Operands.empty()) 939 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 940 941 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 942 TargetType = GTI.getIndexedType(); 943 // We assume that the cost of Scalar GEP with constant index and the 944 // cost of Vector GEP with splat constant index are the same. 945 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 946 if (!ConstIdx) 947 if (auto Splat = getSplatValue(*I)) 948 ConstIdx = dyn_cast<ConstantInt>(Splat); 949 if (StructType *STy = GTI.getStructTypeOrNull()) { 950 // For structures the index is always splat or scalar constant 951 assert(ConstIdx && "Unexpected GEP index"); 952 uint64_t Field = ConstIdx->getZExtValue(); 953 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 954 } else { 955 // If this operand is a scalable type, bail out early. 956 // TODO: handle scalable vectors 957 if (isa<ScalableVectorType>(TargetType)) 958 return TTI::TCC_Basic; 959 int64_t ElementSize = 960 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize(); 961 if (ConstIdx) { 962 BaseOffset += 963 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 964 } else { 965 // Needs scale register. 966 if (Scale != 0) 967 // No addressing mode takes two scale registers. 968 return TTI::TCC_Basic; 969 Scale = ElementSize; 970 } 971 } 972 } 973 974 if (static_cast<T *>(this)->isLegalAddressingMode( 975 TargetType, const_cast<GlobalValue *>(BaseGV), 976 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 977 Ptr->getType()->getPointerAddressSpace())) 978 return TTI::TCC_Free; 979 return TTI::TCC_Basic; 980 } 981 982 InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, 983 TTI::TargetCostKind CostKind) { 984 using namespace llvm::PatternMatch; 985 986 auto *TargetTTI = static_cast<T *>(this); 987 // Handle non-intrinsic calls, invokes, and callbr. 988 // FIXME: Unlikely to be true for anything but CodeSize. 989 auto *CB = dyn_cast<CallBase>(U); 990 if (CB && !isa<IntrinsicInst>(U)) { 991 if (const Function *F = CB->getCalledFunction()) { 992 if (!TargetTTI->isLoweredToCall(F)) 993 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 994 995 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 996 } 997 // For indirect or other calls, scale cost by number of arguments. 998 return TTI::TCC_Basic * (CB->arg_size() + 1); 999 } 1000 1001 Type *Ty = U->getType(); 1002 unsigned Opcode = Operator::getOpcode(U); 1003 auto *I = dyn_cast<Instruction>(U); 1004 switch (Opcode) { 1005 default: 1006 break; 1007 case Instruction::Call: { 1008 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 1009 auto *Intrinsic = cast<IntrinsicInst>(U); 1010 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 1011 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 1012 } 1013 case Instruction::Br: 1014 case Instruction::Ret: 1015 case Instruction::PHI: 1016 case Instruction::Switch: 1017 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 1018 case Instruction::ExtractValue: 1019 case Instruction::Freeze: 1020 return TTI::TCC_Free; 1021 case Instruction::Alloca: 1022 if (cast<AllocaInst>(U)->isStaticAlloca()) 1023 return TTI::TCC_Free; 1024 break; 1025 case Instruction::GetElementPtr: { 1026 const auto *GEP = cast<GEPOperator>(U); 1027 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 1028 GEP->getPointerOperand(), 1029 Operands.drop_front(), CostKind); 1030 } 1031 case Instruction::Add: 1032 case Instruction::FAdd: 1033 case Instruction::Sub: 1034 case Instruction::FSub: 1035 case Instruction::Mul: 1036 case Instruction::FMul: 1037 case Instruction::UDiv: 1038 case Instruction::SDiv: 1039 case Instruction::FDiv: 1040 case Instruction::URem: 1041 case Instruction::SRem: 1042 case Instruction::FRem: 1043 case Instruction::Shl: 1044 case Instruction::LShr: 1045 case Instruction::AShr: 1046 case Instruction::And: 1047 case Instruction::Or: 1048 case Instruction::Xor: 1049 case Instruction::FNeg: { 1050 TTI::OperandValueProperties Op1VP = TTI::OP_None; 1051 TTI::OperandValueProperties Op2VP = TTI::OP_None; 1052 TTI::OperandValueKind Op1VK = 1053 TTI::getOperandInfo(U->getOperand(0), Op1VP); 1054 TTI::OperandValueKind Op2VK = Opcode != Instruction::FNeg ? 1055 TTI::getOperandInfo(U->getOperand(1), Op2VP) : TTI::OK_AnyValue; 1056 SmallVector<const Value *, 2> Operands(U->operand_values()); 1057 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, 1058 Op1VK, Op2VK, 1059 Op1VP, Op2VP, Operands, I); 1060 } 1061 case Instruction::IntToPtr: 1062 case Instruction::PtrToInt: 1063 case Instruction::SIToFP: 1064 case Instruction::UIToFP: 1065 case Instruction::FPToUI: 1066 case Instruction::FPToSI: 1067 case Instruction::Trunc: 1068 case Instruction::FPTrunc: 1069 case Instruction::BitCast: 1070 case Instruction::FPExt: 1071 case Instruction::SExt: 1072 case Instruction::ZExt: 1073 case Instruction::AddrSpaceCast: { 1074 Type *OpTy = U->getOperand(0)->getType(); 1075 return TargetTTI->getCastInstrCost( 1076 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1077 } 1078 case Instruction::Store: { 1079 auto *SI = cast<StoreInst>(U); 1080 Type *ValTy = U->getOperand(0)->getType(); 1081 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1082 SI->getPointerAddressSpace(), 1083 CostKind, I); 1084 } 1085 case Instruction::Load: { 1086 auto *LI = cast<LoadInst>(U); 1087 Type *LoadType = U->getType(); 1088 // If there is a non-register sized type, the cost estimation may expand 1089 // it to be several instructions to load into multiple registers on the 1090 // target. But, if the only use of the load is a trunc instruction to a 1091 // register sized type, the instruction selector can combine these 1092 // instructions to be a single load. So, in this case, we use the 1093 // destination type of the trunc instruction rather than the load to 1094 // accurately estimate the cost of this load instruction. 1095 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && 1096 !LoadType->isVectorTy()) { 1097 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) 1098 LoadType = TI->getDestTy(); 1099 } 1100 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), 1101 LI->getPointerAddressSpace(), 1102 CostKind, I); 1103 } 1104 case Instruction::Select: { 1105 const Value *Op0, *Op1; 1106 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1107 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1108 // select x, y, false --> x & y 1109 // select x, true, y --> x | y 1110 TTI::OperandValueProperties Op1VP = TTI::OP_None; 1111 TTI::OperandValueProperties Op2VP = TTI::OP_None; 1112 TTI::OperandValueKind Op1VK = TTI::getOperandInfo(Op0, Op1VP); 1113 TTI::OperandValueKind Op2VK = TTI::getOperandInfo(Op1, Op2VP); 1114 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1115 Op1->getType()->getScalarSizeInBits() == 1); 1116 1117 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1118 return TargetTTI->getArithmeticInstrCost( 1119 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1120 CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); 1121 } 1122 Type *CondTy = U->getOperand(0)->getType(); 1123 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1124 CmpInst::BAD_ICMP_PREDICATE, 1125 CostKind, I); 1126 } 1127 case Instruction::ICmp: 1128 case Instruction::FCmp: { 1129 Type *ValTy = U->getOperand(0)->getType(); 1130 // TODO: Also handle ICmp/FCmp constant expressions. 1131 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1132 I ? cast<CmpInst>(I)->getPredicate() 1133 : CmpInst::BAD_ICMP_PREDICATE, 1134 CostKind, I); 1135 } 1136 case Instruction::InsertElement: { 1137 auto *IE = dyn_cast<InsertElementInst>(U); 1138 if (!IE) 1139 return TTI::TCC_Basic; // FIXME 1140 unsigned Idx = -1; 1141 if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2))) 1142 if (CI->getValue().getActiveBits() <= 32) 1143 Idx = CI->getZExtValue(); 1144 return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx); 1145 } 1146 case Instruction::ShuffleVector: { 1147 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1148 if (!Shuffle) 1149 return TTI::TCC_Basic; // FIXME 1150 1151 auto *VecTy = cast<VectorType>(U->getType()); 1152 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType()); 1153 int NumSubElts, SubIndex; 1154 1155 if (Shuffle->changesLength()) { 1156 // Treat a 'subvector widening' as a free shuffle. 1157 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) 1158 return 0; 1159 1160 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1161 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1162 Shuffle->getShuffleMask(), SubIndex, 1163 VecTy, Operands); 1164 1165 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1166 return TargetTTI->getShuffleCost( 1167 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), 1168 SubIndex, 1169 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1170 Operands); 1171 1172 int ReplicationFactor, VF; 1173 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { 1174 APInt DemandedDstElts = 1175 APInt::getNullValue(Shuffle->getShuffleMask().size()); 1176 for (auto I : enumerate(Shuffle->getShuffleMask())) { 1177 if (I.value() != UndefMaskElem) 1178 DemandedDstElts.setBit(I.index()); 1179 } 1180 return TargetTTI->getReplicationShuffleCost( 1181 VecSrcTy->getElementType(), ReplicationFactor, VF, 1182 DemandedDstElts, CostKind); 1183 } 1184 1185 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; 1186 } 1187 1188 if (Shuffle->isIdentity()) 1189 return 0; 1190 1191 if (Shuffle->isReverse()) 1192 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 1193 Shuffle->getShuffleMask(), 0, nullptr, 1194 Operands); 1195 1196 if (Shuffle->isSelect()) 1197 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 1198 Shuffle->getShuffleMask(), 0, nullptr, 1199 Operands); 1200 1201 if (Shuffle->isTranspose()) 1202 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 1203 Shuffle->getShuffleMask(), 0, nullptr, 1204 Operands); 1205 1206 if (Shuffle->isZeroEltSplat()) 1207 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 1208 Shuffle->getShuffleMask(), 0, nullptr, 1209 Operands); 1210 1211 if (Shuffle->isSingleSource()) 1212 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 1213 Shuffle->getShuffleMask(), 0, nullptr, 1214 Operands); 1215 1216 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1217 return TargetTTI->getShuffleCost( 1218 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex, 1219 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands); 1220 1221 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 1222 Shuffle->getShuffleMask(), 0, nullptr, 1223 Operands); 1224 } 1225 case Instruction::ExtractElement: { 1226 auto *EEI = dyn_cast<ExtractElementInst>(U); 1227 if (!EEI) 1228 return TTI::TCC_Basic; // FIXME 1229 unsigned Idx = -1; 1230 if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1))) 1231 if (CI->getValue().getActiveBits() <= 32) 1232 Idx = CI->getZExtValue(); 1233 Type *DstTy = U->getOperand(0)->getType(); 1234 return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx); 1235 } 1236 } 1237 // By default, just classify everything as 'basic'. 1238 return TTI::TCC_Basic; 1239 } 1240 1241 InstructionCost getInstructionLatency(const Instruction *I) { 1242 SmallVector<const Value *, 4> Operands(I->operand_values()); 1243 if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) 1244 return 0; 1245 1246 if (isa<LoadInst>(I)) 1247 return 4; 1248 1249 Type *DstTy = I->getType(); 1250 1251 // Usually an intrinsic is a simple instruction. 1252 // A real function call is much slower. 1253 if (auto *CI = dyn_cast<CallInst>(I)) { 1254 const Function *F = CI->getCalledFunction(); 1255 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 1256 return 40; 1257 // Some intrinsics return a value and a flag, we use the value type 1258 // to decide its latency. 1259 if (StructType *StructTy = dyn_cast<StructType>(DstTy)) 1260 DstTy = StructTy->getElementType(0); 1261 // Fall through to simple instructions. 1262 } 1263 1264 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 1265 DstTy = VectorTy->getElementType(); 1266 if (DstTy->isFloatingPointTy()) 1267 return 3; 1268 1269 return 1; 1270 } 1271 }; 1272 } // namespace llvm 1273 1274 #endif 1275