1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This pass exposes codegen information to IR-level passes. Every 10 /// transformation that uses codegen information is broken into three parts: 11 /// 1. The IR-level analysis pass. 12 /// 2. The IR-level transformation interface which provides the needed 13 /// information. 14 /// 3. Codegen-level implementation which uses target-specific hooks. 15 /// 16 /// This file defines #2, which is the interface that IR-level transformations 17 /// use for querying the codegen. 18 /// 19 //===----------------------------------------------------------------------===// 20 21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 23 24 #include "llvm/ADT/SmallBitVector.h" 25 #include "llvm/IR/FMF.h" 26 #include "llvm/IR/InstrTypes.h" 27 #include "llvm/IR/PassManager.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/AtomicOrdering.h" 30 #include "llvm/Support/BranchProbability.h" 31 #include "llvm/Support/InstructionCost.h" 32 #include <functional> 33 #include <utility> 34 35 namespace llvm { 36 37 namespace Intrinsic { 38 typedef unsigned ID; 39 } 40 41 class AssumptionCache; 42 class BlockFrequencyInfo; 43 class DominatorTree; 44 class BranchInst; 45 class CallBase; 46 class Function; 47 class GlobalValue; 48 class InstCombiner; 49 class OptimizationRemarkEmitter; 50 class IntrinsicInst; 51 class LoadInst; 52 class Loop; 53 class LoopInfo; 54 class LoopVectorizationLegality; 55 class ProfileSummaryInfo; 56 class RecurrenceDescriptor; 57 class SCEV; 58 class ScalarEvolution; 59 class StoreInst; 60 class SwitchInst; 61 class TargetLibraryInfo; 62 class Type; 63 class User; 64 class Value; 65 class VPIntrinsic; 66 struct KnownBits; 67 template <typename T> class Optional; 68 69 /// Information about a load/store intrinsic defined by the target. 70 struct MemIntrinsicInfo { 71 /// This is the pointer that the intrinsic is loading from or storing to. 72 /// If this is non-null, then analysis/optimization passes can assume that 73 /// this intrinsic is functionally equivalent to a load/store from this 74 /// pointer. 75 Value *PtrVal = nullptr; 76 77 // Ordering for atomic operations. 78 AtomicOrdering Ordering = AtomicOrdering::NotAtomic; 79 80 // Same Id is set by the target for corresponding load/store intrinsics. 81 unsigned short MatchingId = 0; 82 83 bool ReadMem = false; 84 bool WriteMem = false; 85 bool IsVolatile = false; 86 87 bool isUnordered() const { 88 return (Ordering == AtomicOrdering::NotAtomic || 89 Ordering == AtomicOrdering::Unordered) && 90 !IsVolatile; 91 } 92 }; 93 94 /// Attributes of a target dependent hardware loop. 95 struct HardwareLoopInfo { 96 HardwareLoopInfo() = delete; 97 HardwareLoopInfo(Loop *L) : L(L) {} 98 Loop *L = nullptr; 99 BasicBlock *ExitBlock = nullptr; 100 BranchInst *ExitBranch = nullptr; 101 const SCEV *ExitCount = nullptr; 102 IntegerType *CountType = nullptr; 103 Value *LoopDecrement = nullptr; // Decrement the loop counter by this 104 // value in every iteration. 105 bool IsNestingLegal = false; // Can a hardware loop be a parent to 106 // another hardware loop? 107 bool CounterInReg = false; // Should loop counter be updated in 108 // the loop via a phi? 109 bool PerformEntryTest = false; // Generate the intrinsic which also performs 110 // icmp ne zero on the loop counter value and 111 // produces an i1 to guard the loop entry. 112 bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, 113 DominatorTree &DT, bool ForceNestedLoop = false, 114 bool ForceHardwareLoopPHI = false); 115 bool canAnalyze(LoopInfo &LI); 116 }; 117 118 class IntrinsicCostAttributes { 119 const IntrinsicInst *II = nullptr; 120 Type *RetTy = nullptr; 121 Intrinsic::ID IID; 122 SmallVector<Type *, 4> ParamTys; 123 SmallVector<const Value *, 4> Arguments; 124 FastMathFlags FMF; 125 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the 126 // arguments and the return value will be computed based on types. 127 InstructionCost ScalarizationCost = InstructionCost::getInvalid(); 128 129 public: 130 IntrinsicCostAttributes( 131 Intrinsic::ID Id, const CallBase &CI, 132 InstructionCost ScalarCost = InstructionCost::getInvalid(), 133 bool TypeBasedOnly = false); 134 135 IntrinsicCostAttributes( 136 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, 137 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr, 138 InstructionCost ScalarCost = InstructionCost::getInvalid()); 139 140 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, 141 ArrayRef<const Value *> Args); 142 143 IntrinsicCostAttributes( 144 Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args, 145 ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(), 146 const IntrinsicInst *I = nullptr, 147 InstructionCost ScalarCost = InstructionCost::getInvalid()); 148 149 Intrinsic::ID getID() const { return IID; } 150 const IntrinsicInst *getInst() const { return II; } 151 Type *getReturnType() const { return RetTy; } 152 FastMathFlags getFlags() const { return FMF; } 153 InstructionCost getScalarizationCost() const { return ScalarizationCost; } 154 const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; } 155 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; } 156 157 bool isTypeBasedOnly() const { 158 return Arguments.empty(); 159 } 160 161 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); } 162 }; 163 164 enum class PredicationStyle { None, Data, DataAndControlFlow }; 165 166 class TargetTransformInfo; 167 typedef TargetTransformInfo TTI; 168 169 /// This pass provides access to the codegen interfaces that are needed 170 /// for IR-level transformations. 171 class TargetTransformInfo { 172 public: 173 /// Construct a TTI object using a type implementing the \c Concept 174 /// API below. 175 /// 176 /// This is used by targets to construct a TTI wrapping their target-specific 177 /// implementation that encodes appropriate costs for their target. 178 template <typename T> TargetTransformInfo(T Impl); 179 180 /// Construct a baseline TTI object using a minimal implementation of 181 /// the \c Concept API below. 182 /// 183 /// The TTI implementation will reflect the information in the DataLayout 184 /// provided if non-null. 185 explicit TargetTransformInfo(const DataLayout &DL); 186 187 // Provide move semantics. 188 TargetTransformInfo(TargetTransformInfo &&Arg); 189 TargetTransformInfo &operator=(TargetTransformInfo &&RHS); 190 191 // We need to define the destructor out-of-line to define our sub-classes 192 // out-of-line. 193 ~TargetTransformInfo(); 194 195 /// Handle the invalidation of this information. 196 /// 197 /// When used as a result of \c TargetIRAnalysis this method will be called 198 /// when the function this was computed for changes. When it returns false, 199 /// the information is preserved across those changes. 200 bool invalidate(Function &, const PreservedAnalyses &, 201 FunctionAnalysisManager::Invalidator &) { 202 // FIXME: We should probably in some way ensure that the subtarget 203 // information for a function hasn't changed. 204 return false; 205 } 206 207 /// \name Generic Target Information 208 /// @{ 209 210 /// The kind of cost model. 211 /// 212 /// There are several different cost models that can be customized by the 213 /// target. The normalization of each cost model may be target specific. 214 enum TargetCostKind { 215 TCK_RecipThroughput, ///< Reciprocal throughput. 216 TCK_Latency, ///< The latency of instruction. 217 TCK_CodeSize, ///< Instruction code size. 218 TCK_SizeAndLatency ///< The weighted sum of size and latency. 219 }; 220 221 /// Query the cost of a specified instruction. 222 /// 223 /// Clients should use this interface to query the cost of an existing 224 /// instruction. The instruction must have a valid parent (basic block). 225 /// 226 /// Note, this method does not cache the cost calculation and it 227 /// can be expensive in some cases. 228 InstructionCost getInstructionCost(const Instruction *I, 229 enum TargetCostKind kind) const { 230 InstructionCost Cost; 231 switch (kind) { 232 case TCK_RecipThroughput: 233 Cost = getInstructionThroughput(I); 234 break; 235 case TCK_Latency: 236 Cost = getInstructionLatency(I); 237 break; 238 case TCK_CodeSize: 239 case TCK_SizeAndLatency: 240 Cost = getUserCost(I, kind); 241 break; 242 } 243 return Cost; 244 } 245 246 /// Underlying constants for 'cost' values in this interface. 247 /// 248 /// Many APIs in this interface return a cost. This enum defines the 249 /// fundamental values that should be used to interpret (and produce) those 250 /// costs. The costs are returned as an int rather than a member of this 251 /// enumeration because it is expected that the cost of one IR instruction 252 /// may have a multiplicative factor to it or otherwise won't fit directly 253 /// into the enum. Moreover, it is common to sum or average costs which works 254 /// better as simple integral values. Thus this enum only provides constants. 255 /// Also note that the returned costs are signed integers to make it natural 256 /// to add, subtract, and test with zero (a common boundary condition). It is 257 /// not expected that 2^32 is a realistic cost to be modeling at any point. 258 /// 259 /// Note that these costs should usually reflect the intersection of code-size 260 /// cost and execution cost. A free instruction is typically one that folds 261 /// into another instruction. For example, reg-to-reg moves can often be 262 /// skipped by renaming the registers in the CPU, but they still are encoded 263 /// and thus wouldn't be considered 'free' here. 264 enum TargetCostConstants { 265 TCC_Free = 0, ///< Expected to fold away in lowering. 266 TCC_Basic = 1, ///< The cost of a typical 'add' instruction. 267 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. 268 }; 269 270 /// Estimate the cost of a GEP operation when lowered. 271 InstructionCost 272 getGEPCost(Type *PointeeType, const Value *Ptr, 273 ArrayRef<const Value *> Operands, 274 TargetCostKind CostKind = TCK_SizeAndLatency) const; 275 276 /// \returns A value by which our inlining threshold should be multiplied. 277 /// This is primarily used to bump up the inlining threshold wholesale on 278 /// targets where calls are unusually expensive. 279 /// 280 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of 281 /// individual classes of instructions would be better. 282 unsigned getInliningThresholdMultiplier() const; 283 284 /// \returns A value to be added to the inlining threshold. 285 unsigned adjustInliningThreshold(const CallBase *CB) const; 286 287 /// \returns Vector bonus in percent. 288 /// 289 /// Vector bonuses: We want to more aggressively inline vector-dense kernels 290 /// and apply this bonus based on the percentage of vector instructions. A 291 /// bonus is applied if the vector instructions exceed 50% and half that 292 /// amount is applied if it exceeds 10%. Note that these bonuses are some what 293 /// arbitrary and evolved over time by accident as much as because they are 294 /// principled bonuses. 295 /// FIXME: It would be nice to base the bonus values on something more 296 /// scientific. A target may has no bonus on vector instructions. 297 int getInlinerVectorBonusPercent() const; 298 299 /// \return the expected cost of a memcpy, which could e.g. depend on the 300 /// source/destination type and alignment and the number of bytes copied. 301 InstructionCost getMemcpyCost(const Instruction *I) const; 302 303 /// \return The estimated number of case clusters when lowering \p 'SI'. 304 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump 305 /// table. 306 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 307 unsigned &JTSize, 308 ProfileSummaryInfo *PSI, 309 BlockFrequencyInfo *BFI) const; 310 311 /// Estimate the cost of a given IR user when lowered. 312 /// 313 /// This can estimate the cost of either a ConstantExpr or Instruction when 314 /// lowered. 315 /// 316 /// \p Operands is a list of operands which can be a result of transformations 317 /// of the current operands. The number of the operands on the list must equal 318 /// to the number of the current operands the IR user has. Their order on the 319 /// list must be the same as the order of the current operands the IR user 320 /// has. 321 /// 322 /// The returned cost is defined in terms of \c TargetCostConstants, see its 323 /// comments for a detailed explanation of the cost values. 324 InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, 325 TargetCostKind CostKind) const; 326 327 /// This is a helper function which calls the two-argument getUserCost 328 /// with \p Operands which are the current operands U has. 329 InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const { 330 SmallVector<const Value *, 4> Operands(U->operand_values()); 331 return getUserCost(U, Operands, CostKind); 332 } 333 334 /// If a branch or a select condition is skewed in one direction by more than 335 /// this factor, it is very likely to be predicted correctly. 336 BranchProbability getPredictableBranchThreshold() const; 337 338 /// Return true if branch divergence exists. 339 /// 340 /// Branch divergence has a significantly negative impact on GPU performance 341 /// when threads in the same wavefront take different paths due to conditional 342 /// branches. 343 bool hasBranchDivergence() const; 344 345 /// Return true if the target prefers to use GPU divergence analysis to 346 /// replace the legacy version. 347 bool useGPUDivergenceAnalysis() const; 348 349 /// Returns whether V is a source of divergence. 350 /// 351 /// This function provides the target-dependent information for 352 /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis 353 /// first builds the dependency graph, and then runs the reachability 354 /// algorithm starting with the sources of divergence. 355 bool isSourceOfDivergence(const Value *V) const; 356 357 // Returns true for the target specific 358 // set of operations which produce uniform result 359 // even taking non-uniform arguments 360 bool isAlwaysUniform(const Value *V) const; 361 362 /// Returns the address space ID for a target's 'flat' address space. Note 363 /// this is not necessarily the same as addrspace(0), which LLVM sometimes 364 /// refers to as the generic address space. The flat address space is a 365 /// generic address space that can be used access multiple segments of memory 366 /// with different address spaces. Access of a memory location through a 367 /// pointer with this address space is expected to be legal but slower 368 /// compared to the same memory location accessed through a pointer with a 369 /// different address space. 370 // 371 /// This is for targets with different pointer representations which can 372 /// be converted with the addrspacecast instruction. If a pointer is converted 373 /// to this address space, optimizations should attempt to replace the access 374 /// with the source address space. 375 /// 376 /// \returns ~0u if the target does not have such a flat address space to 377 /// optimize away. 378 unsigned getFlatAddressSpace() const; 379 380 /// Return any intrinsic address operand indexes which may be rewritten if 381 /// they use a flat address space pointer. 382 /// 383 /// \returns true if the intrinsic was handled. 384 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 385 Intrinsic::ID IID) const; 386 387 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; 388 389 /// Return true if globals in this address space can have initializers other 390 /// than `undef`. 391 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const; 392 393 unsigned getAssumedAddrSpace(const Value *V) const; 394 395 std::pair<const Value *, unsigned> 396 getPredicatedAddrSpace(const Value *V) const; 397 398 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p 399 /// NewV, which has a different address space. This should happen for every 400 /// operand index that collectFlatAddressOperands returned for the intrinsic. 401 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the 402 /// new value (which may be the original \p II with modified operands). 403 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 404 Value *NewV) const; 405 406 /// Test whether calls to a function lower to actual program function 407 /// calls. 408 /// 409 /// The idea is to test whether the program is likely to require a 'call' 410 /// instruction or equivalent in order to call the given function. 411 /// 412 /// FIXME: It's not clear that this is a good or useful query API. Client's 413 /// should probably move to simpler cost metrics using the above. 414 /// Alternatively, we could split the cost interface into distinct code-size 415 /// and execution-speed costs. This would allow modelling the core of this 416 /// query more accurately as a call is a single small instruction, but 417 /// incurs significant execution cost. 418 bool isLoweredToCall(const Function *F) const; 419 420 struct LSRCost { 421 /// TODO: Some of these could be merged. Also, a lexical ordering 422 /// isn't always optimal. 423 unsigned Insns; 424 unsigned NumRegs; 425 unsigned AddRecCost; 426 unsigned NumIVMuls; 427 unsigned NumBaseAdds; 428 unsigned ImmCost; 429 unsigned SetupCost; 430 unsigned ScaleCost; 431 }; 432 433 /// Parameters that control the generic loop unrolling transformation. 434 struct UnrollingPreferences { 435 /// The cost threshold for the unrolled loop. Should be relative to the 436 /// getUserCost values returned by this API, and the expectation is that 437 /// the unrolled loop's instructions when run through that interface should 438 /// not exceed this cost. However, this is only an estimate. Also, specific 439 /// loops may be unrolled even with a cost above this threshold if deemed 440 /// profitable. Set this to UINT_MAX to disable the loop body cost 441 /// restriction. 442 unsigned Threshold; 443 /// If complete unrolling will reduce the cost of the loop, we will boost 444 /// the Threshold by a certain percent to allow more aggressive complete 445 /// unrolling. This value provides the maximum boost percentage that we 446 /// can apply to Threshold (The value should be no less than 100). 447 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost, 448 /// MaxPercentThresholdBoost / 100) 449 /// E.g. if complete unrolling reduces the loop execution time by 50% 450 /// then we boost the threshold by the factor of 2x. If unrolling is not 451 /// expected to reduce the running time, then we do not increase the 452 /// threshold. 453 unsigned MaxPercentThresholdBoost; 454 /// The cost threshold for the unrolled loop when optimizing for size (set 455 /// to UINT_MAX to disable). 456 unsigned OptSizeThreshold; 457 /// The cost threshold for the unrolled loop, like Threshold, but used 458 /// for partial/runtime unrolling (set to UINT_MAX to disable). 459 unsigned PartialThreshold; 460 /// The cost threshold for the unrolled loop when optimizing for size, like 461 /// OptSizeThreshold, but used for partial/runtime unrolling (set to 462 /// UINT_MAX to disable). 463 unsigned PartialOptSizeThreshold; 464 /// A forced unrolling factor (the number of concatenated bodies of the 465 /// original loop in the unrolled loop body). When set to 0, the unrolling 466 /// transformation will select an unrolling factor based on the current cost 467 /// threshold and other factors. 468 unsigned Count; 469 /// Default unroll count for loops with run-time trip count. 470 unsigned DefaultUnrollRuntimeCount; 471 // Set the maximum unrolling factor. The unrolling factor may be selected 472 // using the appropriate cost threshold, but may not exceed this number 473 // (set to UINT_MAX to disable). This does not apply in cases where the 474 // loop is being fully unrolled. 475 unsigned MaxCount; 476 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but 477 /// applies even if full unrolling is selected. This allows a target to fall 478 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. 479 unsigned FullUnrollMaxCount; 480 // Represents number of instructions optimized when "back edge" 481 // becomes "fall through" in unrolled loop. 482 // For now we count a conditional branch on a backedge and a comparison 483 // feeding it. 484 unsigned BEInsns; 485 /// Allow partial unrolling (unrolling of loops to expand the size of the 486 /// loop body, not only to eliminate small constant-trip-count loops). 487 bool Partial; 488 /// Allow runtime unrolling (unrolling of loops to expand the size of the 489 /// loop body even when the number of loop iterations is not known at 490 /// compile time). 491 bool Runtime; 492 /// Allow generation of a loop remainder (extra iterations after unroll). 493 bool AllowRemainder; 494 /// Allow emitting expensive instructions (such as divisions) when computing 495 /// the trip count of a loop for runtime unrolling. 496 bool AllowExpensiveTripCount; 497 /// Apply loop unroll on any kind of loop 498 /// (mainly to loops that fail runtime unrolling). 499 bool Force; 500 /// Allow using trip count upper bound to unroll loops. 501 bool UpperBound; 502 /// Allow unrolling of all the iterations of the runtime loop remainder. 503 bool UnrollRemainder; 504 /// Allow unroll and jam. Used to enable unroll and jam for the target. 505 bool UnrollAndJam; 506 /// Threshold for unroll and jam, for inner loop size. The 'Threshold' 507 /// value above is used during unroll and jam for the outer loop size. 508 /// This value is used in the same manner to limit the size of the inner 509 /// loop. 510 unsigned UnrollAndJamInnerLoopThreshold; 511 /// Don't allow loop unrolling to simulate more than this number of 512 /// iterations when checking full unroll profitability 513 unsigned MaxIterationsCountToAnalyze; 514 }; 515 516 /// Get target-customized preferences for the generic loop unrolling 517 /// transformation. The caller will initialize UP with the current 518 /// target-independent defaults. 519 void getUnrollingPreferences(Loop *L, ScalarEvolution &, 520 UnrollingPreferences &UP, 521 OptimizationRemarkEmitter *ORE) const; 522 523 /// Query the target whether it would be profitable to convert the given loop 524 /// into a hardware loop. 525 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 526 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 527 HardwareLoopInfo &HWLoopInfo) const; 528 529 /// Query the target whether it would be prefered to create a predicated 530 /// vector loop, which can avoid the need to emit a scalar epilogue loop. 531 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 532 AssumptionCache &AC, TargetLibraryInfo *TLI, 533 DominatorTree *DT, 534 LoopVectorizationLegality *LVL) const; 535 536 /// Query the target whether lowering of the llvm.get.active.lane.mask 537 /// intrinsic is supported and how the mask should be used. A return value 538 /// of PredicationStyle::Data indicates the mask is used as data only, 539 /// whereas PredicationStyle::DataAndControlFlow indicates we should also use 540 /// the mask for control flow in the loop. If unsupported the return value is 541 /// PredicationStyle::None. 542 PredicationStyle emitGetActiveLaneMask() const; 543 544 // Parameters that control the loop peeling transformation 545 struct PeelingPreferences { 546 /// A forced peeling factor (the number of bodied of the original loop 547 /// that should be peeled off before the loop body). When set to 0, the 548 /// a peeling factor based on profile information and other factors. 549 unsigned PeelCount; 550 /// Allow peeling off loop iterations. 551 bool AllowPeeling; 552 /// Allow peeling off loop iterations for loop nests. 553 bool AllowLoopNestsPeeling; 554 /// Allow peeling basing on profile. Uses to enable peeling off all 555 /// iterations basing on provided profile. 556 /// If the value is true the peeling cost model can decide to peel only 557 /// some iterations and in this case it will set this to false. 558 bool PeelProfiledIterations; 559 }; 560 561 /// Get target-customized preferences for the generic loop peeling 562 /// transformation. The caller will initialize \p PP with the current 563 /// target-independent defaults with information from \p L and \p SE. 564 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 565 PeelingPreferences &PP) const; 566 567 /// Targets can implement their own combinations for target-specific 568 /// intrinsics. This function will be called from the InstCombine pass every 569 /// time a target-specific intrinsic is encountered. 570 /// 571 /// \returns None to not do anything target specific or a value that will be 572 /// returned from the InstCombiner. It is possible to return null and stop 573 /// further processing of the intrinsic by returning nullptr. 574 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 575 IntrinsicInst &II) const; 576 /// Can be used to implement target-specific instruction combining. 577 /// \see instCombineIntrinsic 578 Optional<Value *> 579 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 580 APInt DemandedMask, KnownBits &Known, 581 bool &KnownBitsComputed) const; 582 /// Can be used to implement target-specific instruction combining. 583 /// \see instCombineIntrinsic 584 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 585 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 586 APInt &UndefElts2, APInt &UndefElts3, 587 std::function<void(Instruction *, unsigned, APInt, APInt &)> 588 SimplifyAndSetOp) const; 589 /// @} 590 591 /// \name Scalar Target Information 592 /// @{ 593 594 /// Flags indicating the kind of support for population count. 595 /// 596 /// Compared to the SW implementation, HW support is supposed to 597 /// significantly boost the performance when the population is dense, and it 598 /// may or may not degrade performance if the population is sparse. A HW 599 /// support is considered as "Fast" if it can outperform, or is on a par 600 /// with, SW implementation when the population is sparse; otherwise, it is 601 /// considered as "Slow". 602 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware }; 603 604 /// Return true if the specified immediate is legal add immediate, that 605 /// is the target has add instructions which can add a register with the 606 /// immediate without having to materialize the immediate into a register. 607 bool isLegalAddImmediate(int64_t Imm) const; 608 609 /// Return true if the specified immediate is legal icmp immediate, 610 /// that is the target has icmp instructions which can compare a register 611 /// against the immediate without having to materialize the immediate into a 612 /// register. 613 bool isLegalICmpImmediate(int64_t Imm) const; 614 615 /// Return true if the addressing mode represented by AM is legal for 616 /// this target, for a load/store of the specified type. 617 /// The type may be VoidTy, in which case only return true if the addressing 618 /// mode is legal for a load/store of any legal type. 619 /// If target returns true in LSRWithInstrQueries(), I may be valid. 620 /// TODO: Handle pre/postinc as well. 621 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 622 bool HasBaseReg, int64_t Scale, 623 unsigned AddrSpace = 0, 624 Instruction *I = nullptr) const; 625 626 /// Return true if LSR cost of C1 is lower than C1. 627 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, 628 const TargetTransformInfo::LSRCost &C2) const; 629 630 /// Return true if LSR major cost is number of registers. Targets which 631 /// implement their own isLSRCostLess and unset number of registers as major 632 /// cost should return false, otherwise return true. 633 bool isNumRegsMajorCostOfLSR() const; 634 635 /// \returns true if LSR should not optimize a chain that includes \p I. 636 bool isProfitableLSRChainElement(Instruction *I) const; 637 638 /// Return true if the target can fuse a compare and branch. 639 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost 640 /// calculation for the instructions in a loop. 641 bool canMacroFuseCmp() const; 642 643 /// Return true if the target can save a compare for loop count, for example 644 /// hardware loop saves a compare. 645 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 646 DominatorTree *DT, AssumptionCache *AC, 647 TargetLibraryInfo *LibInfo) const; 648 649 enum AddressingModeKind { 650 AMK_PreIndexed, 651 AMK_PostIndexed, 652 AMK_None 653 }; 654 655 /// Return the preferred addressing mode LSR should make efforts to generate. 656 AddressingModeKind getPreferredAddressingMode(const Loop *L, 657 ScalarEvolution *SE) const; 658 659 /// Return true if the target supports masked store. 660 bool isLegalMaskedStore(Type *DataType, Align Alignment) const; 661 /// Return true if the target supports masked load. 662 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const; 663 664 /// Return true if the target supports nontemporal store. 665 bool isLegalNTStore(Type *DataType, Align Alignment) const; 666 /// Return true if the target supports nontemporal load. 667 bool isLegalNTLoad(Type *DataType, Align Alignment) const; 668 669 /// \Returns true if the target supports broadcasting a load to a vector of 670 /// type <NumElements x ElementTy>. 671 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const; 672 673 /// Return true if the target supports masked scatter. 674 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const; 675 /// Return true if the target supports masked gather. 676 bool isLegalMaskedGather(Type *DataType, Align Alignment) const; 677 /// Return true if the target forces scalarizing of llvm.masked.gather 678 /// intrinsics. 679 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const; 680 /// Return true if the target forces scalarizing of llvm.masked.scatter 681 /// intrinsics. 682 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const; 683 684 /// Return true if the target supports masked compress store. 685 bool isLegalMaskedCompressStore(Type *DataType) const; 686 /// Return true if the target supports masked expand load. 687 bool isLegalMaskedExpandLoad(Type *DataType) const; 688 689 /// Return true if this is an alternating opcode pattern that can be lowered 690 /// to a single instruction on the target. In X86 this is for the addsub 691 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR. 692 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being 693 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0` 694 /// when \p Opcode0 is selected and `1` when Opcode1 is selected. 695 /// \p VecTy is the vector type of the instruction to be generated. 696 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 697 const SmallBitVector &OpcodeMask) const; 698 699 /// Return true if we should be enabling ordered reductions for the target. 700 bool enableOrderedReductions() const; 701 702 /// Return true if the target has a unified operation to calculate division 703 /// and remainder. If so, the additional implicit multiplication and 704 /// subtraction required to calculate a remainder from division are free. This 705 /// can enable more aggressive transformations for division and remainder than 706 /// would typically be allowed using throughput or size cost models. 707 bool hasDivRemOp(Type *DataType, bool IsSigned) const; 708 709 /// Return true if the given instruction (assumed to be a memory access 710 /// instruction) has a volatile variant. If that's the case then we can avoid 711 /// addrspacecast to generic AS for volatile loads/stores. Default 712 /// implementation returns false, which prevents address space inference for 713 /// volatile loads/stores. 714 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; 715 716 /// Return true if target doesn't mind addresses in vectors. 717 bool prefersVectorizedAddressing() const; 718 719 /// Return the cost of the scaling factor used in the addressing 720 /// mode represented by AM for this target, for a load/store 721 /// of the specified type. 722 /// If the AM is supported, the return value must be >= 0. 723 /// If the AM is not supported, it returns a negative value. 724 /// TODO: Handle pre/postinc as well. 725 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 726 int64_t BaseOffset, bool HasBaseReg, 727 int64_t Scale, 728 unsigned AddrSpace = 0) const; 729 730 /// Return true if the loop strength reduce pass should make 731 /// Instruction* based TTI queries to isLegalAddressingMode(). This is 732 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned 733 /// immediate offset and no index register. 734 bool LSRWithInstrQueries() const; 735 736 /// Return true if it's free to truncate a value of type Ty1 to type 737 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 738 /// by referencing its sub-register AX. 739 bool isTruncateFree(Type *Ty1, Type *Ty2) const; 740 741 /// Return true if it is profitable to hoist instruction in the 742 /// then/else to before if. 743 bool isProfitableToHoist(Instruction *I) const; 744 745 bool useAA() const; 746 747 /// Return true if this type is legal. 748 bool isTypeLegal(Type *Ty) const; 749 750 /// Returns the estimated number of registers required to represent \p Ty. 751 unsigned getRegUsageForType(Type *Ty) const; 752 753 /// Return true if switches should be turned into lookup tables for the 754 /// target. 755 bool shouldBuildLookupTables() const; 756 757 /// Return true if switches should be turned into lookup tables 758 /// containing this constant value for the target. 759 bool shouldBuildLookupTablesForConstant(Constant *C) const; 760 761 /// Return true if lookup tables should be turned into relative lookup tables. 762 bool shouldBuildRelLookupTables() const; 763 764 /// Return true if the input function which is cold at all call sites, 765 /// should use coldcc calling convention. 766 bool useColdCCForColdCall(Function &F) const; 767 768 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 769 /// are set if the demanded result elements need to be inserted and/or 770 /// extracted from vectors. 771 InstructionCost getScalarizationOverhead(VectorType *Ty, 772 const APInt &DemandedElts, 773 bool Insert, bool Extract) const; 774 775 /// Estimate the overhead of scalarizing an instructions unique 776 /// non-constant operands. The (potentially vector) types to use for each of 777 /// argument are passes via Tys. 778 InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 779 ArrayRef<Type *> Tys) const; 780 781 /// If target has efficient vector element load/store instructions, it can 782 /// return true here so that insertion/extraction costs are not added to 783 /// the scalarization cost of a load/store. 784 bool supportsEfficientVectorElementLoadStore() const; 785 786 /// If the target supports tail calls. 787 bool supportsTailCalls() const; 788 789 /// Don't restrict interleaved unrolling to small loops. 790 bool enableAggressiveInterleaving(bool LoopHasReductions) const; 791 792 /// Returns options for expansion of memcmp. IsZeroCmp is 793 // true if this is the expansion of memcmp(p1, p2, s) == 0. 794 struct MemCmpExpansionOptions { 795 // Return true if memcmp expansion is enabled. 796 operator bool() const { return MaxNumLoads > 0; } 797 798 // Maximum number of load operations. 799 unsigned MaxNumLoads = 0; 800 801 // The list of available load sizes (in bytes), sorted in decreasing order. 802 SmallVector<unsigned, 8> LoadSizes; 803 804 // For memcmp expansion when the memcmp result is only compared equal or 805 // not-equal to 0, allow up to this number of load pairs per block. As an 806 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block: 807 // a0 = load2bytes &a[0] 808 // b0 = load2bytes &b[0] 809 // a2 = load1byte &a[2] 810 // b2 = load1byte &b[2] 811 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 812 unsigned NumLoadsPerBlock = 1; 813 814 // Set to true to allow overlapping loads. For example, 7-byte compares can 815 // be done with two 4-byte compares instead of 4+2+1-byte compares. This 816 // requires all loads in LoadSizes to be doable in an unaligned way. 817 bool AllowOverlappingLoads = false; 818 }; 819 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 820 bool IsZeroCmp) const; 821 822 /// Enable matching of interleaved access groups. 823 bool enableInterleavedAccessVectorization() const; 824 825 /// Enable matching of interleaved access groups that contain predicated 826 /// accesses or gaps and therefore vectorized using masked 827 /// vector loads/stores. 828 bool enableMaskedInterleavedAccessVectorization() const; 829 830 /// Indicate that it is potentially unsafe to automatically vectorize 831 /// floating-point operations because the semantics of vector and scalar 832 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math 833 /// does not support IEEE-754 denormal numbers, while depending on the 834 /// platform, scalar floating-point math does. 835 /// This applies to floating-point math operations and calls, not memory 836 /// operations, shuffles, or casts. 837 bool isFPVectorizationPotentiallyUnsafe() const; 838 839 /// Determine if the target supports unaligned memory accesses. 840 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 841 unsigned AddressSpace = 0, 842 Align Alignment = Align(1), 843 bool *Fast = nullptr) const; 844 845 /// Return hardware support for population count. 846 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; 847 848 /// Return true if the hardware has a fast square-root instruction. 849 bool haveFastSqrt(Type *Ty) const; 850 851 /// Return true if it is faster to check if a floating-point value is NaN 852 /// (or not-NaN) versus a comparison against a constant FP zero value. 853 /// Targets should override this if materializing a 0.0 for comparison is 854 /// generally as cheap as checking for ordered/unordered. 855 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const; 856 857 /// Return the expected cost of supporting the floating point operation 858 /// of the specified type. 859 InstructionCost getFPOpCost(Type *Ty) const; 860 861 /// Return the expected cost of materializing for the given integer 862 /// immediate of the specified type. 863 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 864 TargetCostKind CostKind) const; 865 866 /// Return the expected cost of materialization for the given integer 867 /// immediate of the specified type for a given instruction. The cost can be 868 /// zero if the immediate can be folded into the specified instruction. 869 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, 870 const APInt &Imm, Type *Ty, 871 TargetCostKind CostKind, 872 Instruction *Inst = nullptr) const; 873 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 874 const APInt &Imm, Type *Ty, 875 TargetCostKind CostKind) const; 876 877 /// Return the expected cost for the given integer when optimising 878 /// for size. This is different than the other integer immediate cost 879 /// functions in that it is subtarget agnostic. This is useful when you e.g. 880 /// target one ISA such as Aarch32 but smaller encodings could be possible 881 /// with another such as Thumb. This return value is used as a penalty when 882 /// the total costs for a constant is calculated (the bigger the cost, the 883 /// more beneficial constant hoisting is). 884 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, 885 const APInt &Imm, Type *Ty) const; 886 /// @} 887 888 /// \name Vector Target Information 889 /// @{ 890 891 /// The various kinds of shuffle patterns for vector queries. 892 enum ShuffleKind { 893 SK_Broadcast, ///< Broadcast element 0 to all other elements. 894 SK_Reverse, ///< Reverse the order of the vector. 895 SK_Select, ///< Selects elements from the corresponding lane of 896 ///< either source operand. This is equivalent to a 897 ///< vector select with a constant condition operand. 898 SK_Transpose, ///< Transpose two vectors. 899 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. 900 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset. 901 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one 902 ///< with any shuffle mask. 903 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any 904 ///< shuffle mask. 905 SK_Splice ///< Concatenates elements from the first input vector 906 ///< with elements of the second input vector. Returning 907 ///< a vector of the same type as the input vectors. 908 }; 909 910 /// Additional information about an operand's possible values. 911 enum OperandValueKind { 912 OK_AnyValue, // Operand can have any value. 913 OK_UniformValue, // Operand is uniform (splat of a value). 914 OK_UniformConstantValue, // Operand is uniform constant. 915 OK_NonUniformConstantValue // Operand is a non uniform constant value. 916 }; 917 918 /// Additional properties of an operand's values. 919 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; 920 921 /// \return the number of registers in the target-provided register class. 922 unsigned getNumberOfRegisters(unsigned ClassID) const; 923 924 /// \return the target-provided register class ID for the provided type, 925 /// accounting for type promotion and other type-legalization techniques that 926 /// the target might apply. However, it specifically does not account for the 927 /// scalarization or splitting of vector types. Should a vector type require 928 /// scalarization or splitting into multiple underlying vector registers, that 929 /// type should be mapped to a register class containing no registers. 930 /// Specifically, this is designed to provide a simple, high-level view of the 931 /// register allocation later performed by the backend. These register classes 932 /// don't necessarily map onto the register classes used by the backend. 933 /// FIXME: It's not currently possible to determine how many registers 934 /// are used by the provided type. 935 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; 936 937 /// \return the target-provided register class name 938 const char *getRegisterClassName(unsigned ClassID) const; 939 940 enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector }; 941 942 /// \return The width of the largest scalar or vector register type. 943 TypeSize getRegisterBitWidth(RegisterKind K) const; 944 945 /// \return The width of the smallest vector register type. 946 unsigned getMinVectorRegisterBitWidth() const; 947 948 /// \return The maximum value of vscale if the target specifies an 949 /// architectural maximum vector length, and None otherwise. 950 Optional<unsigned> getMaxVScale() const; 951 952 /// \return the value of vscale to tune the cost model for. 953 Optional<unsigned> getVScaleForTuning() const; 954 955 /// \return True if the vectorization factor should be chosen to 956 /// make the vector of the smallest element type match the size of a 957 /// vector register. For wider element types, this could result in 958 /// creating vectors that span multiple vector registers. 959 /// If false, the vectorization factor will be chosen based on the 960 /// size of the widest element type. 961 /// \p K Register Kind for vectorization. 962 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const; 963 964 /// \return The minimum vectorization factor for types of given element 965 /// bit width, or 0 if there is no minimum VF. The returned value only 966 /// applies when shouldMaximizeVectorBandwidth returns true. 967 /// If IsScalable is true, the returned ElementCount must be a scalable VF. 968 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const; 969 970 /// \return The maximum vectorization factor for types of given element 971 /// bit width and opcode, or 0 if there is no maximum VF. 972 /// Currently only used by the SLP vectorizer. 973 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; 974 975 /// \return The minimum vectorization factor for the store instruction. Given 976 /// the initial estimation of the minimum vector factor and store value type, 977 /// it tries to find possible lowest VF, which still might be profitable for 978 /// the vectorization. 979 /// \param VF Initial estimation of the minimum vector factor. 980 /// \param ScalarMemTy Scalar memory type of the store operation. 981 /// \param ScalarValTy Scalar type of the stored value. 982 /// Currently only used by the SLP vectorizer. 983 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, 984 Type *ScalarValTy) const; 985 986 /// \return True if it should be considered for address type promotion. 987 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is 988 /// profitable without finding other extensions fed by the same input. 989 bool shouldConsiderAddressTypePromotion( 990 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; 991 992 /// \return The size of a cache line in bytes. 993 unsigned getCacheLineSize() const; 994 995 /// The possible cache levels 996 enum class CacheLevel { 997 L1D, // The L1 data cache 998 L2D, // The L2 data cache 999 1000 // We currently do not model L3 caches, as their sizes differ widely between 1001 // microarchitectures. Also, we currently do not have a use for L3 cache 1002 // size modeling yet. 1003 }; 1004 1005 /// \return The size of the cache level in bytes, if available. 1006 Optional<unsigned> getCacheSize(CacheLevel Level) const; 1007 1008 /// \return The associativity of the cache level, if available. 1009 Optional<unsigned> getCacheAssociativity(CacheLevel Level) const; 1010 1011 /// \return How much before a load we should place the prefetch 1012 /// instruction. This is currently measured in number of 1013 /// instructions. 1014 unsigned getPrefetchDistance() const; 1015 1016 /// Some HW prefetchers can handle accesses up to a certain constant stride. 1017 /// Sometimes prefetching is beneficial even below the HW prefetcher limit, 1018 /// and the arguments provided are meant to serve as a basis for deciding this 1019 /// for a particular loop. 1020 /// 1021 /// \param NumMemAccesses Number of memory accesses in the loop. 1022 /// \param NumStridedMemAccesses Number of the memory accesses that 1023 /// ScalarEvolution could find a known stride 1024 /// for. 1025 /// \param NumPrefetches Number of software prefetches that will be 1026 /// emitted as determined by the addresses 1027 /// involved and the cache line size. 1028 /// \param HasCall True if the loop contains a call. 1029 /// 1030 /// \return This is the minimum stride in bytes where it makes sense to start 1031 /// adding SW prefetches. The default is 1, i.e. prefetch with any 1032 /// stride. 1033 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 1034 unsigned NumStridedMemAccesses, 1035 unsigned NumPrefetches, bool HasCall) const; 1036 1037 /// \return The maximum number of iterations to prefetch ahead. If 1038 /// the required number of iterations is more than this number, no 1039 /// prefetching is performed. 1040 unsigned getMaxPrefetchIterationsAhead() const; 1041 1042 /// \return True if prefetching should also be done for writes. 1043 bool enableWritePrefetching() const; 1044 1045 /// \return The maximum interleave factor that any transform should try to 1046 /// perform for this target. This number depends on the level of parallelism 1047 /// and the number of execution units in the CPU. 1048 unsigned getMaxInterleaveFactor(unsigned VF) const; 1049 1050 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2. 1051 static OperandValueKind getOperandInfo(const Value *V, 1052 OperandValueProperties &OpProps); 1053 1054 /// This is an approximation of reciprocal throughput of a math/logic op. 1055 /// A higher cost indicates less expected throughput. 1056 /// From Agner Fog's guides, reciprocal throughput is "the average number of 1057 /// clock cycles per instruction when the instructions are not part of a 1058 /// limiting dependency chain." 1059 /// Therefore, costs should be scaled to account for multiple execution units 1060 /// on the target that can process this type of instruction. For example, if 1061 /// there are 5 scalar integer units and 2 vector integer units that can 1062 /// calculate an 'add' in a single cycle, this model should indicate that the 1063 /// cost of the vector add instruction is 2.5 times the cost of the scalar 1064 /// add instruction. 1065 /// \p Args is an optional argument which holds the instruction operands 1066 /// values so the TTI can analyze those values searching for special 1067 /// cases or optimizations based on those values. 1068 /// \p CxtI is the optional original context instruction, if one exists, to 1069 /// provide even more information. 1070 InstructionCost getArithmeticInstrCost( 1071 unsigned Opcode, Type *Ty, 1072 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1073 OperandValueKind Opd1Info = OK_AnyValue, 1074 OperandValueKind Opd2Info = OK_AnyValue, 1075 OperandValueProperties Opd1PropInfo = OP_None, 1076 OperandValueProperties Opd2PropInfo = OP_None, 1077 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 1078 const Instruction *CxtI = nullptr) const; 1079 1080 /// \return The cost of a shuffle instruction of kind Kind and of type Tp. 1081 /// The exact mask may be passed as Mask, or else the array will be empty. 1082 /// The index and subtype parameters are used by the subvector insertion and 1083 /// extraction shuffle kinds to show the insert/extract point and the type of 1084 /// the subvector being inserted/extracted. The operands of the shuffle can be 1085 /// passed through \p Args, which helps improve the cost estimation in some 1086 /// cases, like in broadcast loads. 1087 /// NOTE: For subvector extractions Tp represents the source type. 1088 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, 1089 ArrayRef<int> Mask = None, int Index = 0, 1090 VectorType *SubTp = nullptr, 1091 ArrayRef<const Value *> Args = None) const; 1092 1093 /// Represents a hint about the context in which a cast is used. 1094 /// 1095 /// For zext/sext, the context of the cast is the operand, which must be a 1096 /// load of some kind. For trunc, the context is of the cast is the single 1097 /// user of the instruction, which must be a store of some kind. 1098 /// 1099 /// This enum allows the vectorizer to give getCastInstrCost an idea of the 1100 /// type of cast it's dealing with, as not every cast is equal. For instance, 1101 /// the zext of a load may be free, but the zext of an interleaving load can 1102 //// be (very) expensive! 1103 /// 1104 /// See \c getCastContextHint to compute a CastContextHint from a cast 1105 /// Instruction*. Callers can use it if they don't need to override the 1106 /// context and just want it to be calculated from the instruction. 1107 /// 1108 /// FIXME: This handles the types of load/store that the vectorizer can 1109 /// produce, which are the cases where the context instruction is most 1110 /// likely to be incorrect. There are other situations where that can happen 1111 /// too, which might be handled here but in the long run a more general 1112 /// solution of costing multiple instructions at the same times may be better. 1113 enum class CastContextHint : uint8_t { 1114 None, ///< The cast is not used with a load/store of any kind. 1115 Normal, ///< The cast is used with a normal load/store. 1116 Masked, ///< The cast is used with a masked load/store. 1117 GatherScatter, ///< The cast is used with a gather/scatter. 1118 Interleave, ///< The cast is used with an interleaved load/store. 1119 Reversed, ///< The cast is used with a reversed load/store. 1120 }; 1121 1122 /// Calculates a CastContextHint from \p I. 1123 /// This should be used by callers of getCastInstrCost if they wish to 1124 /// determine the context from some instruction. 1125 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr, 1126 /// or if it's another type of cast. 1127 static CastContextHint getCastContextHint(const Instruction *I); 1128 1129 /// \return The expected cost of cast instructions, such as bitcast, trunc, 1130 /// zext, etc. If there is an existing instruction that holds Opcode, it 1131 /// may be passed in the 'I' parameter. 1132 InstructionCost 1133 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 1134 TTI::CastContextHint CCH, 1135 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, 1136 const Instruction *I = nullptr) const; 1137 1138 /// \return The expected cost of a sign- or zero-extended vector extract. Use 1139 /// -1 to indicate that there is no information about the index value. 1140 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 1141 VectorType *VecTy, 1142 unsigned Index = -1) const; 1143 1144 /// \return The expected cost of control-flow related instructions such as 1145 /// Phi, Ret, Br, Switch. 1146 InstructionCost 1147 getCFInstrCost(unsigned Opcode, 1148 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, 1149 const Instruction *I = nullptr) const; 1150 1151 /// \returns The expected cost of compare and select instructions. If there 1152 /// is an existing instruction that holds Opcode, it may be passed in the 1153 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select 1154 /// is using a compare with the specified predicate as condition. When vector 1155 /// types are passed, \p VecPred must be used for all lanes. 1156 InstructionCost 1157 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 1158 CmpInst::Predicate VecPred, 1159 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1160 const Instruction *I = nullptr) const; 1161 1162 /// \return The expected cost of vector Insert and Extract. 1163 /// Use -1 to indicate that there is no information on the index value. 1164 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 1165 unsigned Index = -1) const; 1166 1167 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy 1168 /// \p ReplicationFactor times. 1169 /// 1170 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is: 1171 /// <0,0,0,1,1,1,2,2,2,3,3,3> 1172 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, 1173 int VF, 1174 const APInt &DemandedDstElts, 1175 TTI::TargetCostKind CostKind); 1176 1177 /// \return The cost of Load and Store instructions. 1178 InstructionCost 1179 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1180 unsigned AddressSpace, 1181 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1182 const Instruction *I = nullptr) const; 1183 1184 /// \return The cost of VP Load and Store instructions. 1185 InstructionCost 1186 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1187 unsigned AddressSpace, 1188 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1189 const Instruction *I = nullptr) const; 1190 1191 /// \return The cost of masked Load and Store instructions. 1192 InstructionCost getMaskedMemoryOpCost( 1193 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, 1194 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 1195 1196 /// \return The cost of Gather or Scatter operation 1197 /// \p Opcode - is a type of memory access Load or Store 1198 /// \p DataTy - a vector type of the data to be loaded or stored 1199 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory 1200 /// \p VariableMask - true when the memory access is predicated with a mask 1201 /// that is not a compile-time constant 1202 /// \p Alignment - alignment of single element 1203 /// \p I - the optional original context instruction, if one exists, e.g. the 1204 /// load/store to transform or the call to the gather/scatter intrinsic 1205 InstructionCost getGatherScatterOpCost( 1206 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, 1207 Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1208 const Instruction *I = nullptr) const; 1209 1210 /// \return The cost of the interleaved memory operation. 1211 /// \p Opcode is the memory operation code 1212 /// \p VecTy is the vector type of the interleaved access. 1213 /// \p Factor is the interleave factor 1214 /// \p Indices is the indices for interleaved load members (as interleaved 1215 /// load allows gaps) 1216 /// \p Alignment is the alignment of the memory operation 1217 /// \p AddressSpace is address space of the pointer. 1218 /// \p UseMaskForCond indicates if the memory access is predicated. 1219 /// \p UseMaskForGaps indicates if gaps should be masked. 1220 InstructionCost getInterleavedMemoryOpCost( 1221 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 1222 Align Alignment, unsigned AddressSpace, 1223 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1224 bool UseMaskForCond = false, bool UseMaskForGaps = false) const; 1225 1226 /// A helper function to determine the type of reduction algorithm used 1227 /// for a given \p Opcode and set of FastMathFlags \p FMF. 1228 static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) { 1229 return FMF != None && !(*FMF).allowReassoc(); 1230 } 1231 1232 /// Calculate the cost of vector reduction intrinsics. 1233 /// 1234 /// This is the cost of reducing the vector value of type \p Ty to a scalar 1235 /// value using the operation denoted by \p Opcode. The FastMathFlags 1236 /// parameter \p FMF indicates what type of reduction we are performing: 1237 /// 1. Tree-wise. This is the typical 'fast' reduction performed that 1238 /// involves successively splitting a vector into half and doing the 1239 /// operation on the pair of halves until you have a scalar value. For 1240 /// example: 1241 /// (v0, v1, v2, v3) 1242 /// ((v0+v2), (v1+v3), undef, undef) 1243 /// ((v0+v2+v1+v3), undef, undef, undef) 1244 /// This is the default behaviour for integer operations, whereas for 1245 /// floating point we only do this if \p FMF indicates that 1246 /// reassociation is allowed. 1247 /// 2. Ordered. For a vector with N elements this involves performing N 1248 /// operations in lane order, starting with an initial scalar value, i.e. 1249 /// result = InitVal + v0 1250 /// result = result + v1 1251 /// result = result + v2 1252 /// result = result + v3 1253 /// This is only the case for FP operations and when reassociation is not 1254 /// allowed. 1255 /// 1256 InstructionCost getArithmeticReductionCost( 1257 unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, 1258 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 1259 1260 InstructionCost getMinMaxReductionCost( 1261 VectorType *Ty, VectorType *CondTy, bool IsUnsigned, 1262 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 1263 1264 /// Calculate the cost of an extended reduction pattern, similar to 1265 /// getArithmeticReductionCost of an Add reduction with an extension and 1266 /// optional multiply. This is the cost of as: 1267 /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then: 1268 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens 1269 /// on a VectorType with ResTy elements and Ty lanes. 1270 InstructionCost getExtendedAddReductionCost( 1271 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, 1272 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 1273 1274 /// \returns The cost of Intrinsic instructions. Analyses the real arguments. 1275 /// Three cases are handled: 1. scalar instruction 2. vector instruction 1276 /// 3. scalar instruction which is to be vectorized. 1277 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 1278 TTI::TargetCostKind CostKind) const; 1279 1280 /// \returns The cost of Call instructions. 1281 InstructionCost getCallInstrCost( 1282 Function *F, Type *RetTy, ArrayRef<Type *> Tys, 1283 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; 1284 1285 /// \returns The number of pieces into which the provided type must be 1286 /// split during legalization. Zero is returned when the answer is unknown. 1287 unsigned getNumberOfParts(Type *Tp) const; 1288 1289 /// \returns The cost of the address computation. For most targets this can be 1290 /// merged into the instruction indexing mode. Some targets might want to 1291 /// distinguish between address computation for memory operations on vector 1292 /// types and scalar types. Such targets should override this function. 1293 /// The 'SE' parameter holds pointer for the scalar evolution object which 1294 /// is used in order to get the Ptr step value in case of constant stride. 1295 /// The 'Ptr' parameter holds SCEV of the access pointer. 1296 InstructionCost getAddressComputationCost(Type *Ty, 1297 ScalarEvolution *SE = nullptr, 1298 const SCEV *Ptr = nullptr) const; 1299 1300 /// \returns The cost, if any, of keeping values of the given types alive 1301 /// over a callsite. 1302 /// 1303 /// Some types may require the use of register classes that do not have 1304 /// any callee-saved registers, so would require a spill and fill. 1305 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; 1306 1307 /// \returns True if the intrinsic is a supported memory intrinsic. Info 1308 /// will contain additional information - whether the intrinsic may write 1309 /// or read to memory, volatility and the pointer. Info is undefined 1310 /// if false is returned. 1311 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 1312 1313 /// \returns The maximum element size, in bytes, for an element 1314 /// unordered-atomic memory intrinsic. 1315 unsigned getAtomicMemIntrinsicMaxElementSize() const; 1316 1317 /// \returns A value which is the result of the given memory intrinsic. New 1318 /// instructions may be created to extract the result from the given intrinsic 1319 /// memory operation. Returns nullptr if the target cannot create a result 1320 /// from the given intrinsic. 1321 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 1322 Type *ExpectedType) const; 1323 1324 /// \returns The type to use in a loop expansion of a memcpy call. 1325 Type * 1326 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 1327 unsigned SrcAddrSpace, unsigned DestAddrSpace, 1328 unsigned SrcAlign, unsigned DestAlign, 1329 Optional<uint32_t> AtomicElementSize = None) const; 1330 1331 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. 1332 /// \param RemainingBytes The number of bytes to copy. 1333 /// 1334 /// Calculates the operand types to use when copying \p RemainingBytes of 1335 /// memory, where source and destination alignments are \p SrcAlign and 1336 /// \p DestAlign respectively. 1337 void getMemcpyLoopResidualLoweringType( 1338 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 1339 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 1340 unsigned SrcAlign, unsigned DestAlign, 1341 Optional<uint32_t> AtomicCpySize = None) const; 1342 1343 /// \returns True if the two functions have compatible attributes for inlining 1344 /// purposes. 1345 bool areInlineCompatible(const Function *Caller, 1346 const Function *Callee) const; 1347 1348 /// \returns True if the caller and callee agree on how \p Types will be 1349 /// passed to or returned from the callee. 1350 /// to the callee. 1351 /// \param Types List of types to check. 1352 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 1353 const ArrayRef<Type *> &Types) const; 1354 1355 /// The type of load/store indexing. 1356 enum MemIndexedMode { 1357 MIM_Unindexed, ///< No indexing. 1358 MIM_PreInc, ///< Pre-incrementing. 1359 MIM_PreDec, ///< Pre-decrementing. 1360 MIM_PostInc, ///< Post-incrementing. 1361 MIM_PostDec ///< Post-decrementing. 1362 }; 1363 1364 /// \returns True if the specified indexed load for the given type is legal. 1365 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const; 1366 1367 /// \returns True if the specified indexed store for the given type is legal. 1368 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const; 1369 1370 /// \returns The bitwidth of the largest vector type that should be used to 1371 /// load/store in the given address space. 1372 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 1373 1374 /// \returns True if the load instruction is legal to vectorize. 1375 bool isLegalToVectorizeLoad(LoadInst *LI) const; 1376 1377 /// \returns True if the store instruction is legal to vectorize. 1378 bool isLegalToVectorizeStore(StoreInst *SI) const; 1379 1380 /// \returns True if it is legal to vectorize the given load chain. 1381 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 1382 unsigned AddrSpace) const; 1383 1384 /// \returns True if it is legal to vectorize the given store chain. 1385 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 1386 unsigned AddrSpace) const; 1387 1388 /// \returns True if it is legal to vectorize the given reduction kind. 1389 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 1390 ElementCount VF) const; 1391 1392 /// \returns True if the given type is supported for scalable vectors 1393 bool isElementTypeLegalForScalableVector(Type *Ty) const; 1394 1395 /// \returns The new vector factor value if the target doesn't support \p 1396 /// SizeInBytes loads or has a better vector factor. 1397 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 1398 unsigned ChainSizeInBytes, 1399 VectorType *VecTy) const; 1400 1401 /// \returns The new vector factor value if the target doesn't support \p 1402 /// SizeInBytes stores or has a better vector factor. 1403 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 1404 unsigned ChainSizeInBytes, 1405 VectorType *VecTy) const; 1406 1407 /// Flags describing the kind of vector reduction. 1408 struct ReductionFlags { 1409 ReductionFlags() = default; 1410 bool IsMaxOp = 1411 false; ///< If the op a min/max kind, true if it's a max operation. 1412 bool IsSigned = false; ///< Whether the operation is a signed int reduction. 1413 bool NoNaN = 1414 false; ///< If op is an fp min/max, whether NaNs may be present. 1415 }; 1416 1417 /// \returns True if the target prefers reductions in loop. 1418 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 1419 ReductionFlags Flags) const; 1420 1421 /// \returns True if the target prefers reductions select kept in the loop 1422 /// when tail folding. i.e. 1423 /// loop: 1424 /// p = phi (0, s) 1425 /// a = add (p, x) 1426 /// s = select (mask, a, p) 1427 /// vecreduce.add(s) 1428 /// 1429 /// As opposed to the normal scheme of p = phi (0, a) which allows the select 1430 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated 1431 /// by the target, this can lead to cleaner code generation. 1432 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 1433 ReductionFlags Flags) const; 1434 1435 /// \returns True if the target wants to expand the given reduction intrinsic 1436 /// into a shuffle sequence. 1437 bool shouldExpandReduction(const IntrinsicInst *II) const; 1438 1439 /// \returns the size cost of rematerializing a GlobalValue address relative 1440 /// to a stack reload. 1441 unsigned getGISelRematGlobalCost() const; 1442 1443 /// \returns True if the target supports scalable vectors. 1444 bool supportsScalableVectors() const; 1445 1446 /// \return true when scalable vectorization is preferred. 1447 bool enableScalableVectorization() const; 1448 1449 /// \name Vector Predication Information 1450 /// @{ 1451 /// Whether the target supports the %evl parameter of VP intrinsic efficiently 1452 /// in hardware, for the given opcode and type/alignment. (see LLVM Language 1453 /// Reference - "Vector Predication Intrinsics"). 1454 /// Use of %evl is discouraged when that is not the case. 1455 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 1456 Align Alignment) const; 1457 1458 struct VPLegalization { 1459 enum VPTransform { 1460 // keep the predicating parameter 1461 Legal = 0, 1462 // where legal, discard the predicate parameter 1463 Discard = 1, 1464 // transform into something else that is also predicating 1465 Convert = 2 1466 }; 1467 1468 // How to transform the EVL parameter. 1469 // Legal: keep the EVL parameter as it is. 1470 // Discard: Ignore the EVL parameter where it is safe to do so. 1471 // Convert: Fold the EVL into the mask parameter. 1472 VPTransform EVLParamStrategy; 1473 1474 // How to transform the operator. 1475 // Legal: The target supports this operator. 1476 // Convert: Convert this to a non-VP operation. 1477 // The 'Discard' strategy is invalid. 1478 VPTransform OpStrategy; 1479 1480 bool shouldDoNothing() const { 1481 return (EVLParamStrategy == Legal) && (OpStrategy == Legal); 1482 } 1483 VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy) 1484 : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {} 1485 }; 1486 1487 /// \returns How the target needs this vector-predicated operation to be 1488 /// transformed. 1489 VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const; 1490 /// @} 1491 1492 /// @} 1493 1494 private: 1495 /// Estimate the latency of specified instruction. 1496 /// Returns 1 as the default value. 1497 InstructionCost getInstructionLatency(const Instruction *I) const; 1498 1499 /// Returns the expected throughput cost of the instruction. 1500 /// Returns -1 if the cost is unknown. 1501 InstructionCost getInstructionThroughput(const Instruction *I) const; 1502 1503 /// The abstract base class used to type erase specific TTI 1504 /// implementations. 1505 class Concept; 1506 1507 /// The template model for the base class which wraps a concrete 1508 /// implementation in a type erased interface. 1509 template <typename T> class Model; 1510 1511 std::unique_ptr<Concept> TTIImpl; 1512 }; 1513 1514 class TargetTransformInfo::Concept { 1515 public: 1516 virtual ~Concept() = 0; 1517 virtual const DataLayout &getDataLayout() const = 0; 1518 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 1519 ArrayRef<const Value *> Operands, 1520 TTI::TargetCostKind CostKind) = 0; 1521 virtual unsigned getInliningThresholdMultiplier() = 0; 1522 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0; 1523 virtual int getInlinerVectorBonusPercent() = 0; 1524 virtual InstructionCost getMemcpyCost(const Instruction *I) = 0; 1525 virtual unsigned 1526 getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, 1527 ProfileSummaryInfo *PSI, 1528 BlockFrequencyInfo *BFI) = 0; 1529 virtual InstructionCost getUserCost(const User *U, 1530 ArrayRef<const Value *> Operands, 1531 TargetCostKind CostKind) = 0; 1532 virtual BranchProbability getPredictableBranchThreshold() = 0; 1533 virtual bool hasBranchDivergence() = 0; 1534 virtual bool useGPUDivergenceAnalysis() = 0; 1535 virtual bool isSourceOfDivergence(const Value *V) = 0; 1536 virtual bool isAlwaysUniform(const Value *V) = 0; 1537 virtual unsigned getFlatAddressSpace() = 0; 1538 virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 1539 Intrinsic::ID IID) const = 0; 1540 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; 1541 virtual bool 1542 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; 1543 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; 1544 virtual std::pair<const Value *, unsigned> 1545 getPredicatedAddrSpace(const Value *V) const = 0; 1546 virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, 1547 Value *OldV, 1548 Value *NewV) const = 0; 1549 virtual bool isLoweredToCall(const Function *F) = 0; 1550 virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, 1551 UnrollingPreferences &UP, 1552 OptimizationRemarkEmitter *ORE) = 0; 1553 virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 1554 PeelingPreferences &PP) = 0; 1555 virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 1556 AssumptionCache &AC, 1557 TargetLibraryInfo *LibInfo, 1558 HardwareLoopInfo &HWLoopInfo) = 0; 1559 virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, 1560 ScalarEvolution &SE, 1561 AssumptionCache &AC, 1562 TargetLibraryInfo *TLI, 1563 DominatorTree *DT, 1564 LoopVectorizationLegality *LVL) = 0; 1565 virtual PredicationStyle emitGetActiveLaneMask() = 0; 1566 virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 1567 IntrinsicInst &II) = 0; 1568 virtual Optional<Value *> 1569 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 1570 APInt DemandedMask, KnownBits &Known, 1571 bool &KnownBitsComputed) = 0; 1572 virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 1573 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 1574 APInt &UndefElts2, APInt &UndefElts3, 1575 std::function<void(Instruction *, unsigned, APInt, APInt &)> 1576 SimplifyAndSetOp) = 0; 1577 virtual bool isLegalAddImmediate(int64_t Imm) = 0; 1578 virtual bool isLegalICmpImmediate(int64_t Imm) = 0; 1579 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 1580 int64_t BaseOffset, bool HasBaseReg, 1581 int64_t Scale, unsigned AddrSpace, 1582 Instruction *I) = 0; 1583 virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, 1584 const TargetTransformInfo::LSRCost &C2) = 0; 1585 virtual bool isNumRegsMajorCostOfLSR() = 0; 1586 virtual bool isProfitableLSRChainElement(Instruction *I) = 0; 1587 virtual bool canMacroFuseCmp() = 0; 1588 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, 1589 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, 1590 TargetLibraryInfo *LibInfo) = 0; 1591 virtual AddressingModeKind 1592 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0; 1593 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; 1594 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; 1595 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; 1596 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; 1597 virtual bool isLegalBroadcastLoad(Type *ElementTy, 1598 ElementCount NumElements) const = 0; 1599 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0; 1600 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; 1601 virtual bool forceScalarizeMaskedGather(VectorType *DataType, 1602 Align Alignment) = 0; 1603 virtual bool forceScalarizeMaskedScatter(VectorType *DataType, 1604 Align Alignment) = 0; 1605 virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; 1606 virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; 1607 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, 1608 unsigned Opcode1, 1609 const SmallBitVector &OpcodeMask) const = 0; 1610 virtual bool enableOrderedReductions() = 0; 1611 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; 1612 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; 1613 virtual bool prefersVectorizedAddressing() = 0; 1614 virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 1615 int64_t BaseOffset, 1616 bool HasBaseReg, int64_t Scale, 1617 unsigned AddrSpace) = 0; 1618 virtual bool LSRWithInstrQueries() = 0; 1619 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; 1620 virtual bool isProfitableToHoist(Instruction *I) = 0; 1621 virtual bool useAA() = 0; 1622 virtual bool isTypeLegal(Type *Ty) = 0; 1623 virtual unsigned getRegUsageForType(Type *Ty) = 0; 1624 virtual bool shouldBuildLookupTables() = 0; 1625 virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; 1626 virtual bool shouldBuildRelLookupTables() = 0; 1627 virtual bool useColdCCForColdCall(Function &F) = 0; 1628 virtual InstructionCost getScalarizationOverhead(VectorType *Ty, 1629 const APInt &DemandedElts, 1630 bool Insert, 1631 bool Extract) = 0; 1632 virtual InstructionCost 1633 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 1634 ArrayRef<Type *> Tys) = 0; 1635 virtual bool supportsEfficientVectorElementLoadStore() = 0; 1636 virtual bool supportsTailCalls() = 0; 1637 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; 1638 virtual MemCmpExpansionOptions 1639 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; 1640 virtual bool enableInterleavedAccessVectorization() = 0; 1641 virtual bool enableMaskedInterleavedAccessVectorization() = 0; 1642 virtual bool isFPVectorizationPotentiallyUnsafe() = 0; 1643 virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, 1644 unsigned BitWidth, 1645 unsigned AddressSpace, 1646 Align Alignment, 1647 bool *Fast) = 0; 1648 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; 1649 virtual bool haveFastSqrt(Type *Ty) = 0; 1650 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; 1651 virtual InstructionCost getFPOpCost(Type *Ty) = 0; 1652 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, 1653 const APInt &Imm, Type *Ty) = 0; 1654 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 1655 TargetCostKind CostKind) = 0; 1656 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, 1657 const APInt &Imm, Type *Ty, 1658 TargetCostKind CostKind, 1659 Instruction *Inst = nullptr) = 0; 1660 virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 1661 const APInt &Imm, Type *Ty, 1662 TargetCostKind CostKind) = 0; 1663 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; 1664 virtual unsigned getRegisterClassForType(bool Vector, 1665 Type *Ty = nullptr) const = 0; 1666 virtual const char *getRegisterClassName(unsigned ClassID) const = 0; 1667 virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; 1668 virtual unsigned getMinVectorRegisterBitWidth() const = 0; 1669 virtual Optional<unsigned> getMaxVScale() const = 0; 1670 virtual Optional<unsigned> getVScaleForTuning() const = 0; 1671 virtual bool 1672 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0; 1673 virtual ElementCount getMinimumVF(unsigned ElemWidth, 1674 bool IsScalable) const = 0; 1675 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; 1676 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, 1677 Type *ScalarValTy) const = 0; 1678 virtual bool shouldConsiderAddressTypePromotion( 1679 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; 1680 virtual unsigned getCacheLineSize() const = 0; 1681 virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0; 1682 virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0; 1683 1684 /// \return How much before a load we should place the prefetch 1685 /// instruction. This is currently measured in number of 1686 /// instructions. 1687 virtual unsigned getPrefetchDistance() const = 0; 1688 1689 /// \return Some HW prefetchers can handle accesses up to a certain 1690 /// constant stride. This is the minimum stride in bytes where it 1691 /// makes sense to start adding SW prefetches. The default is 1, 1692 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial 1693 /// even below the HW prefetcher limit, and the arguments provided are 1694 /// meant to serve as a basis for deciding this for a particular loop. 1695 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, 1696 unsigned NumStridedMemAccesses, 1697 unsigned NumPrefetches, 1698 bool HasCall) const = 0; 1699 1700 /// \return The maximum number of iterations to prefetch ahead. If 1701 /// the required number of iterations is more than this number, no 1702 /// prefetching is performed. 1703 virtual unsigned getMaxPrefetchIterationsAhead() const = 0; 1704 1705 /// \return True if prefetching should also be done for writes. 1706 virtual bool enableWritePrefetching() const = 0; 1707 1708 virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; 1709 virtual InstructionCost getArithmeticInstrCost( 1710 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 1711 OperandValueKind Opd1Info, OperandValueKind Opd2Info, 1712 OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, 1713 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0; 1714 virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, 1715 ArrayRef<int> Mask, int Index, 1716 VectorType *SubTp, 1717 ArrayRef<const Value *> Args) = 0; 1718 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, 1719 Type *Src, CastContextHint CCH, 1720 TTI::TargetCostKind CostKind, 1721 const Instruction *I) = 0; 1722 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 1723 VectorType *VecTy, 1724 unsigned Index) = 0; 1725 virtual InstructionCost getCFInstrCost(unsigned Opcode, 1726 TTI::TargetCostKind CostKind, 1727 const Instruction *I = nullptr) = 0; 1728 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 1729 Type *CondTy, 1730 CmpInst::Predicate VecPred, 1731 TTI::TargetCostKind CostKind, 1732 const Instruction *I) = 0; 1733 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 1734 unsigned Index) = 0; 1735 1736 virtual InstructionCost 1737 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 1738 const APInt &DemandedDstElts, 1739 TTI::TargetCostKind CostKind) = 0; 1740 1741 virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, 1742 Align Alignment, 1743 unsigned AddressSpace, 1744 TTI::TargetCostKind CostKind, 1745 const Instruction *I) = 0; 1746 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, 1747 Align Alignment, 1748 unsigned AddressSpace, 1749 TTI::TargetCostKind CostKind, 1750 const Instruction *I) = 0; 1751 virtual InstructionCost 1752 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1753 unsigned AddressSpace, 1754 TTI::TargetCostKind CostKind) = 0; 1755 virtual InstructionCost 1756 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, 1757 bool VariableMask, Align Alignment, 1758 TTI::TargetCostKind CostKind, 1759 const Instruction *I = nullptr) = 0; 1760 1761 virtual InstructionCost getInterleavedMemoryOpCost( 1762 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 1763 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 1764 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; 1765 virtual InstructionCost 1766 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 1767 Optional<FastMathFlags> FMF, 1768 TTI::TargetCostKind CostKind) = 0; 1769 virtual InstructionCost 1770 getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, 1771 TTI::TargetCostKind CostKind) = 0; 1772 virtual InstructionCost getExtendedAddReductionCost( 1773 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, 1774 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0; 1775 virtual InstructionCost 1776 getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 1777 TTI::TargetCostKind CostKind) = 0; 1778 virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, 1779 ArrayRef<Type *> Tys, 1780 TTI::TargetCostKind CostKind) = 0; 1781 virtual unsigned getNumberOfParts(Type *Tp) = 0; 1782 virtual InstructionCost 1783 getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0; 1784 virtual InstructionCost 1785 getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; 1786 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, 1787 MemIntrinsicInfo &Info) = 0; 1788 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; 1789 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 1790 Type *ExpectedType) = 0; 1791 virtual Type * 1792 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 1793 unsigned SrcAddrSpace, unsigned DestAddrSpace, 1794 unsigned SrcAlign, unsigned DestAlign, 1795 Optional<uint32_t> AtomicElementSize) const = 0; 1796 1797 virtual void getMemcpyLoopResidualLoweringType( 1798 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 1799 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 1800 unsigned SrcAlign, unsigned DestAlign, 1801 Optional<uint32_t> AtomicCpySize) const = 0; 1802 virtual bool areInlineCompatible(const Function *Caller, 1803 const Function *Callee) const = 0; 1804 virtual bool areTypesABICompatible(const Function *Caller, 1805 const Function *Callee, 1806 const ArrayRef<Type *> &Types) const = 0; 1807 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; 1808 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0; 1809 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; 1810 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0; 1811 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0; 1812 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 1813 Align Alignment, 1814 unsigned AddrSpace) const = 0; 1815 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 1816 Align Alignment, 1817 unsigned AddrSpace) const = 0; 1818 virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 1819 ElementCount VF) const = 0; 1820 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0; 1821 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 1822 unsigned ChainSizeInBytes, 1823 VectorType *VecTy) const = 0; 1824 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 1825 unsigned ChainSizeInBytes, 1826 VectorType *VecTy) const = 0; 1827 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, 1828 ReductionFlags) const = 0; 1829 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 1830 ReductionFlags) const = 0; 1831 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; 1832 virtual unsigned getGISelRematGlobalCost() const = 0; 1833 virtual bool enableScalableVectorization() const = 0; 1834 virtual bool supportsScalableVectors() const = 0; 1835 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 1836 Align Alignment) const = 0; 1837 virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; 1838 virtual VPLegalization 1839 getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; 1840 }; 1841 1842 template <typename T> 1843 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { 1844 T Impl; 1845 1846 public: 1847 Model(T Impl) : Impl(std::move(Impl)) {} 1848 ~Model() override = default; 1849 1850 const DataLayout &getDataLayout() const override { 1851 return Impl.getDataLayout(); 1852 } 1853 1854 InstructionCost 1855 getGEPCost(Type *PointeeType, const Value *Ptr, 1856 ArrayRef<const Value *> Operands, 1857 TargetTransformInfo::TargetCostKind CostKind) override { 1858 return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind); 1859 } 1860 unsigned getInliningThresholdMultiplier() override { 1861 return Impl.getInliningThresholdMultiplier(); 1862 } 1863 unsigned adjustInliningThreshold(const CallBase *CB) override { 1864 return Impl.adjustInliningThreshold(CB); 1865 } 1866 int getInlinerVectorBonusPercent() override { 1867 return Impl.getInlinerVectorBonusPercent(); 1868 } 1869 InstructionCost getMemcpyCost(const Instruction *I) override { 1870 return Impl.getMemcpyCost(I); 1871 } 1872 InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, 1873 TargetCostKind CostKind) override { 1874 return Impl.getUserCost(U, Operands, CostKind); 1875 } 1876 BranchProbability getPredictableBranchThreshold() override { 1877 return Impl.getPredictableBranchThreshold(); 1878 } 1879 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } 1880 bool useGPUDivergenceAnalysis() override { 1881 return Impl.useGPUDivergenceAnalysis(); 1882 } 1883 bool isSourceOfDivergence(const Value *V) override { 1884 return Impl.isSourceOfDivergence(V); 1885 } 1886 1887 bool isAlwaysUniform(const Value *V) override { 1888 return Impl.isAlwaysUniform(V); 1889 } 1890 1891 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } 1892 1893 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 1894 Intrinsic::ID IID) const override { 1895 return Impl.collectFlatAddressOperands(OpIndexes, IID); 1896 } 1897 1898 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override { 1899 return Impl.isNoopAddrSpaceCast(FromAS, ToAS); 1900 } 1901 1902 bool 1903 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override { 1904 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS); 1905 } 1906 1907 unsigned getAssumedAddrSpace(const Value *V) const override { 1908 return Impl.getAssumedAddrSpace(V); 1909 } 1910 1911 std::pair<const Value *, unsigned> 1912 getPredicatedAddrSpace(const Value *V) const override { 1913 return Impl.getPredicatedAddrSpace(V); 1914 } 1915 1916 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 1917 Value *NewV) const override { 1918 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); 1919 } 1920 1921 bool isLoweredToCall(const Function *F) override { 1922 return Impl.isLoweredToCall(F); 1923 } 1924 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 1925 UnrollingPreferences &UP, 1926 OptimizationRemarkEmitter *ORE) override { 1927 return Impl.getUnrollingPreferences(L, SE, UP, ORE); 1928 } 1929 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 1930 PeelingPreferences &PP) override { 1931 return Impl.getPeelingPreferences(L, SE, PP); 1932 } 1933 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 1934 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 1935 HardwareLoopInfo &HWLoopInfo) override { 1936 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); 1937 } 1938 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 1939 AssumptionCache &AC, TargetLibraryInfo *TLI, 1940 DominatorTree *DT, 1941 LoopVectorizationLegality *LVL) override { 1942 return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL); 1943 } 1944 PredicationStyle emitGetActiveLaneMask() override { 1945 return Impl.emitGetActiveLaneMask(); 1946 } 1947 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 1948 IntrinsicInst &II) override { 1949 return Impl.instCombineIntrinsic(IC, II); 1950 } 1951 Optional<Value *> 1952 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 1953 APInt DemandedMask, KnownBits &Known, 1954 bool &KnownBitsComputed) override { 1955 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, 1956 KnownBitsComputed); 1957 } 1958 Optional<Value *> simplifyDemandedVectorEltsIntrinsic( 1959 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 1960 APInt &UndefElts2, APInt &UndefElts3, 1961 std::function<void(Instruction *, unsigned, APInt, APInt &)> 1962 SimplifyAndSetOp) override { 1963 return Impl.simplifyDemandedVectorEltsIntrinsic( 1964 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, 1965 SimplifyAndSetOp); 1966 } 1967 bool isLegalAddImmediate(int64_t Imm) override { 1968 return Impl.isLegalAddImmediate(Imm); 1969 } 1970 bool isLegalICmpImmediate(int64_t Imm) override { 1971 return Impl.isLegalICmpImmediate(Imm); 1972 } 1973 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 1974 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 1975 Instruction *I) override { 1976 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 1977 AddrSpace, I); 1978 } 1979 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, 1980 const TargetTransformInfo::LSRCost &C2) override { 1981 return Impl.isLSRCostLess(C1, C2); 1982 } 1983 bool isNumRegsMajorCostOfLSR() override { 1984 return Impl.isNumRegsMajorCostOfLSR(); 1985 } 1986 bool isProfitableLSRChainElement(Instruction *I) override { 1987 return Impl.isProfitableLSRChainElement(I); 1988 } 1989 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); } 1990 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 1991 DominatorTree *DT, AssumptionCache *AC, 1992 TargetLibraryInfo *LibInfo) override { 1993 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); 1994 } 1995 AddressingModeKind 1996 getPreferredAddressingMode(const Loop *L, 1997 ScalarEvolution *SE) const override { 1998 return Impl.getPreferredAddressingMode(L, SE); 1999 } 2000 bool isLegalMaskedStore(Type *DataType, Align Alignment) override { 2001 return Impl.isLegalMaskedStore(DataType, Alignment); 2002 } 2003 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override { 2004 return Impl.isLegalMaskedLoad(DataType, Alignment); 2005 } 2006 bool isLegalNTStore(Type *DataType, Align Alignment) override { 2007 return Impl.isLegalNTStore(DataType, Alignment); 2008 } 2009 bool isLegalNTLoad(Type *DataType, Align Alignment) override { 2010 return Impl.isLegalNTLoad(DataType, Alignment); 2011 } 2012 bool isLegalBroadcastLoad(Type *ElementTy, 2013 ElementCount NumElements) const override { 2014 return Impl.isLegalBroadcastLoad(ElementTy, NumElements); 2015 } 2016 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override { 2017 return Impl.isLegalMaskedScatter(DataType, Alignment); 2018 } 2019 bool isLegalMaskedGather(Type *DataType, Align Alignment) override { 2020 return Impl.isLegalMaskedGather(DataType, Alignment); 2021 } 2022 bool forceScalarizeMaskedGather(VectorType *DataType, 2023 Align Alignment) override { 2024 return Impl.forceScalarizeMaskedGather(DataType, Alignment); 2025 } 2026 bool forceScalarizeMaskedScatter(VectorType *DataType, 2027 Align Alignment) override { 2028 return Impl.forceScalarizeMaskedScatter(DataType, Alignment); 2029 } 2030 bool isLegalMaskedCompressStore(Type *DataType) override { 2031 return Impl.isLegalMaskedCompressStore(DataType); 2032 } 2033 bool isLegalMaskedExpandLoad(Type *DataType) override { 2034 return Impl.isLegalMaskedExpandLoad(DataType); 2035 } 2036 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 2037 const SmallBitVector &OpcodeMask) const override { 2038 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask); 2039 } 2040 bool enableOrderedReductions() override { 2041 return Impl.enableOrderedReductions(); 2042 } 2043 bool hasDivRemOp(Type *DataType, bool IsSigned) override { 2044 return Impl.hasDivRemOp(DataType, IsSigned); 2045 } 2046 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { 2047 return Impl.hasVolatileVariant(I, AddrSpace); 2048 } 2049 bool prefersVectorizedAddressing() override { 2050 return Impl.prefersVectorizedAddressing(); 2051 } 2052 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 2053 int64_t BaseOffset, bool HasBaseReg, 2054 int64_t Scale, 2055 unsigned AddrSpace) override { 2056 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 2057 AddrSpace); 2058 } 2059 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); } 2060 bool isTruncateFree(Type *Ty1, Type *Ty2) override { 2061 return Impl.isTruncateFree(Ty1, Ty2); 2062 } 2063 bool isProfitableToHoist(Instruction *I) override { 2064 return Impl.isProfitableToHoist(I); 2065 } 2066 bool useAA() override { return Impl.useAA(); } 2067 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } 2068 unsigned getRegUsageForType(Type *Ty) override { 2069 return Impl.getRegUsageForType(Ty); 2070 } 2071 bool shouldBuildLookupTables() override { 2072 return Impl.shouldBuildLookupTables(); 2073 } 2074 bool shouldBuildLookupTablesForConstant(Constant *C) override { 2075 return Impl.shouldBuildLookupTablesForConstant(C); 2076 } 2077 bool shouldBuildRelLookupTables() override { 2078 return Impl.shouldBuildRelLookupTables(); 2079 } 2080 bool useColdCCForColdCall(Function &F) override { 2081 return Impl.useColdCCForColdCall(F); 2082 } 2083 2084 InstructionCost getScalarizationOverhead(VectorType *Ty, 2085 const APInt &DemandedElts, 2086 bool Insert, bool Extract) override { 2087 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); 2088 } 2089 InstructionCost 2090 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 2091 ArrayRef<Type *> Tys) override { 2092 return Impl.getOperandsScalarizationOverhead(Args, Tys); 2093 } 2094 2095 bool supportsEfficientVectorElementLoadStore() override { 2096 return Impl.supportsEfficientVectorElementLoadStore(); 2097 } 2098 2099 bool supportsTailCalls() override { return Impl.supportsTailCalls(); } 2100 2101 bool enableAggressiveInterleaving(bool LoopHasReductions) override { 2102 return Impl.enableAggressiveInterleaving(LoopHasReductions); 2103 } 2104 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 2105 bool IsZeroCmp) const override { 2106 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp); 2107 } 2108 bool enableInterleavedAccessVectorization() override { 2109 return Impl.enableInterleavedAccessVectorization(); 2110 } 2111 bool enableMaskedInterleavedAccessVectorization() override { 2112 return Impl.enableMaskedInterleavedAccessVectorization(); 2113 } 2114 bool isFPVectorizationPotentiallyUnsafe() override { 2115 return Impl.isFPVectorizationPotentiallyUnsafe(); 2116 } 2117 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 2118 unsigned AddressSpace, Align Alignment, 2119 bool *Fast) override { 2120 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, 2121 Alignment, Fast); 2122 } 2123 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { 2124 return Impl.getPopcntSupport(IntTyWidthInBit); 2125 } 2126 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } 2127 2128 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override { 2129 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); 2130 } 2131 2132 InstructionCost getFPOpCost(Type *Ty) override { 2133 return Impl.getFPOpCost(Ty); 2134 } 2135 2136 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, 2137 const APInt &Imm, Type *Ty) override { 2138 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); 2139 } 2140 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 2141 TargetCostKind CostKind) override { 2142 return Impl.getIntImmCost(Imm, Ty, CostKind); 2143 } 2144 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, 2145 const APInt &Imm, Type *Ty, 2146 TargetCostKind CostKind, 2147 Instruction *Inst = nullptr) override { 2148 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst); 2149 } 2150 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 2151 const APInt &Imm, Type *Ty, 2152 TargetCostKind CostKind) override { 2153 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); 2154 } 2155 unsigned getNumberOfRegisters(unsigned ClassID) const override { 2156 return Impl.getNumberOfRegisters(ClassID); 2157 } 2158 unsigned getRegisterClassForType(bool Vector, 2159 Type *Ty = nullptr) const override { 2160 return Impl.getRegisterClassForType(Vector, Ty); 2161 } 2162 const char *getRegisterClassName(unsigned ClassID) const override { 2163 return Impl.getRegisterClassName(ClassID); 2164 } 2165 TypeSize getRegisterBitWidth(RegisterKind K) const override { 2166 return Impl.getRegisterBitWidth(K); 2167 } 2168 unsigned getMinVectorRegisterBitWidth() const override { 2169 return Impl.getMinVectorRegisterBitWidth(); 2170 } 2171 Optional<unsigned> getMaxVScale() const override { 2172 return Impl.getMaxVScale(); 2173 } 2174 Optional<unsigned> getVScaleForTuning() const override { 2175 return Impl.getVScaleForTuning(); 2176 } 2177 bool shouldMaximizeVectorBandwidth( 2178 TargetTransformInfo::RegisterKind K) const override { 2179 return Impl.shouldMaximizeVectorBandwidth(K); 2180 } 2181 ElementCount getMinimumVF(unsigned ElemWidth, 2182 bool IsScalable) const override { 2183 return Impl.getMinimumVF(ElemWidth, IsScalable); 2184 } 2185 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override { 2186 return Impl.getMaximumVF(ElemWidth, Opcode); 2187 } 2188 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, 2189 Type *ScalarValTy) const override { 2190 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy); 2191 } 2192 bool shouldConsiderAddressTypePromotion( 2193 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { 2194 return Impl.shouldConsiderAddressTypePromotion( 2195 I, AllowPromotionWithoutCommonHeader); 2196 } 2197 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } 2198 Optional<unsigned> getCacheSize(CacheLevel Level) const override { 2199 return Impl.getCacheSize(Level); 2200 } 2201 Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override { 2202 return Impl.getCacheAssociativity(Level); 2203 } 2204 2205 /// Return the preferred prefetch distance in terms of instructions. 2206 /// 2207 unsigned getPrefetchDistance() const override { 2208 return Impl.getPrefetchDistance(); 2209 } 2210 2211 /// Return the minimum stride necessary to trigger software 2212 /// prefetching. 2213 /// 2214 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 2215 unsigned NumStridedMemAccesses, 2216 unsigned NumPrefetches, 2217 bool HasCall) const override { 2218 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, 2219 NumPrefetches, HasCall); 2220 } 2221 2222 /// Return the maximum prefetch distance in terms of loop 2223 /// iterations. 2224 /// 2225 unsigned getMaxPrefetchIterationsAhead() const override { 2226 return Impl.getMaxPrefetchIterationsAhead(); 2227 } 2228 2229 /// \return True if prefetching should also be done for writes. 2230 bool enableWritePrefetching() const override { 2231 return Impl.enableWritePrefetching(); 2232 } 2233 2234 unsigned getMaxInterleaveFactor(unsigned VF) override { 2235 return Impl.getMaxInterleaveFactor(VF); 2236 } 2237 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 2238 unsigned &JTSize, 2239 ProfileSummaryInfo *PSI, 2240 BlockFrequencyInfo *BFI) override { 2241 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); 2242 } 2243 InstructionCost getArithmeticInstrCost( 2244 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 2245 OperandValueKind Opd1Info, OperandValueKind Opd2Info, 2246 OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, 2247 ArrayRef<const Value *> Args, 2248 const Instruction *CxtI = nullptr) override { 2249 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, 2250 Opd1PropInfo, Opd2PropInfo, Args, CxtI); 2251 } 2252 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, 2253 ArrayRef<int> Mask, int Index, 2254 VectorType *SubTp, 2255 ArrayRef<const Value *> Args) override { 2256 return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args); 2257 } 2258 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 2259 CastContextHint CCH, 2260 TTI::TargetCostKind CostKind, 2261 const Instruction *I) override { 2262 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); 2263 } 2264 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 2265 VectorType *VecTy, 2266 unsigned Index) override { 2267 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); 2268 } 2269 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 2270 const Instruction *I = nullptr) override { 2271 return Impl.getCFInstrCost(Opcode, CostKind, I); 2272 } 2273 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 2274 CmpInst::Predicate VecPred, 2275 TTI::TargetCostKind CostKind, 2276 const Instruction *I) override { 2277 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); 2278 } 2279 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 2280 unsigned Index) override { 2281 return Impl.getVectorInstrCost(Opcode, Val, Index); 2282 } 2283 InstructionCost 2284 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 2285 const APInt &DemandedDstElts, 2286 TTI::TargetCostKind CostKind) override { 2287 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF, 2288 DemandedDstElts, CostKind); 2289 } 2290 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 2291 unsigned AddressSpace, 2292 TTI::TargetCostKind CostKind, 2293 const Instruction *I) override { 2294 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 2295 CostKind, I); 2296 } 2297 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 2298 unsigned AddressSpace, 2299 TTI::TargetCostKind CostKind, 2300 const Instruction *I) override { 2301 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 2302 CostKind, I); 2303 } 2304 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 2305 Align Alignment, unsigned AddressSpace, 2306 TTI::TargetCostKind CostKind) override { 2307 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 2308 CostKind); 2309 } 2310 InstructionCost 2311 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, 2312 bool VariableMask, Align Alignment, 2313 TTI::TargetCostKind CostKind, 2314 const Instruction *I = nullptr) override { 2315 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 2316 Alignment, CostKind, I); 2317 } 2318 InstructionCost getInterleavedMemoryOpCost( 2319 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 2320 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 2321 bool UseMaskForCond, bool UseMaskForGaps) override { 2322 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, 2323 Alignment, AddressSpace, CostKind, 2324 UseMaskForCond, UseMaskForGaps); 2325 } 2326 InstructionCost 2327 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 2328 Optional<FastMathFlags> FMF, 2329 TTI::TargetCostKind CostKind) override { 2330 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); 2331 } 2332 InstructionCost 2333 getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, 2334 TTI::TargetCostKind CostKind) override { 2335 return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); 2336 } 2337 InstructionCost getExtendedAddReductionCost( 2338 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, 2339 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override { 2340 return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty, 2341 CostKind); 2342 } 2343 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 2344 TTI::TargetCostKind CostKind) override { 2345 return Impl.getIntrinsicInstrCost(ICA, CostKind); 2346 } 2347 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 2348 ArrayRef<Type *> Tys, 2349 TTI::TargetCostKind CostKind) override { 2350 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind); 2351 } 2352 unsigned getNumberOfParts(Type *Tp) override { 2353 return Impl.getNumberOfParts(Tp); 2354 } 2355 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 2356 const SCEV *Ptr) override { 2357 return Impl.getAddressComputationCost(Ty, SE, Ptr); 2358 } 2359 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { 2360 return Impl.getCostOfKeepingLiveOverCall(Tys); 2361 } 2362 bool getTgtMemIntrinsic(IntrinsicInst *Inst, 2363 MemIntrinsicInfo &Info) override { 2364 return Impl.getTgtMemIntrinsic(Inst, Info); 2365 } 2366 unsigned getAtomicMemIntrinsicMaxElementSize() const override { 2367 return Impl.getAtomicMemIntrinsicMaxElementSize(); 2368 } 2369 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 2370 Type *ExpectedType) override { 2371 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); 2372 } 2373 Type *getMemcpyLoopLoweringType( 2374 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, 2375 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, 2376 Optional<uint32_t> AtomicElementSize) const override { 2377 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, 2378 DestAddrSpace, SrcAlign, DestAlign, 2379 AtomicElementSize); 2380 } 2381 void getMemcpyLoopResidualLoweringType( 2382 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 2383 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 2384 unsigned SrcAlign, unsigned DestAlign, 2385 Optional<uint32_t> AtomicCpySize) const override { 2386 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, 2387 SrcAddrSpace, DestAddrSpace, 2388 SrcAlign, DestAlign, AtomicCpySize); 2389 } 2390 bool areInlineCompatible(const Function *Caller, 2391 const Function *Callee) const override { 2392 return Impl.areInlineCompatible(Caller, Callee); 2393 } 2394 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 2395 const ArrayRef<Type *> &Types) const override { 2396 return Impl.areTypesABICompatible(Caller, Callee, Types); 2397 } 2398 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { 2399 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); 2400 } 2401 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override { 2402 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout()); 2403 } 2404 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override { 2405 return Impl.getLoadStoreVecRegBitWidth(AddrSpace); 2406 } 2407 bool isLegalToVectorizeLoad(LoadInst *LI) const override { 2408 return Impl.isLegalToVectorizeLoad(LI); 2409 } 2410 bool isLegalToVectorizeStore(StoreInst *SI) const override { 2411 return Impl.isLegalToVectorizeStore(SI); 2412 } 2413 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 2414 unsigned AddrSpace) const override { 2415 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, 2416 AddrSpace); 2417 } 2418 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 2419 unsigned AddrSpace) const override { 2420 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, 2421 AddrSpace); 2422 } 2423 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 2424 ElementCount VF) const override { 2425 return Impl.isLegalToVectorizeReduction(RdxDesc, VF); 2426 } 2427 bool isElementTypeLegalForScalableVector(Type *Ty) const override { 2428 return Impl.isElementTypeLegalForScalableVector(Ty); 2429 } 2430 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 2431 unsigned ChainSizeInBytes, 2432 VectorType *VecTy) const override { 2433 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); 2434 } 2435 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 2436 unsigned ChainSizeInBytes, 2437 VectorType *VecTy) const override { 2438 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); 2439 } 2440 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 2441 ReductionFlags Flags) const override { 2442 return Impl.preferInLoopReduction(Opcode, Ty, Flags); 2443 } 2444 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 2445 ReductionFlags Flags) const override { 2446 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags); 2447 } 2448 bool shouldExpandReduction(const IntrinsicInst *II) const override { 2449 return Impl.shouldExpandReduction(II); 2450 } 2451 2452 unsigned getGISelRematGlobalCost() const override { 2453 return Impl.getGISelRematGlobalCost(); 2454 } 2455 2456 bool supportsScalableVectors() const override { 2457 return Impl.supportsScalableVectors(); 2458 } 2459 2460 bool enableScalableVectorization() const override { 2461 return Impl.enableScalableVectorization(); 2462 } 2463 2464 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 2465 Align Alignment) const override { 2466 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment); 2467 } 2468 2469 InstructionCost getInstructionLatency(const Instruction *I) override { 2470 return Impl.getInstructionLatency(I); 2471 } 2472 2473 VPLegalization 2474 getVPLegalizationStrategy(const VPIntrinsic &PI) const override { 2475 return Impl.getVPLegalizationStrategy(PI); 2476 } 2477 }; 2478 2479 template <typename T> 2480 TargetTransformInfo::TargetTransformInfo(T Impl) 2481 : TTIImpl(new Model<T>(Impl)) {} 2482 2483 /// Analysis pass providing the \c TargetTransformInfo. 2484 /// 2485 /// The core idea of the TargetIRAnalysis is to expose an interface through 2486 /// which LLVM targets can analyze and provide information about the middle 2487 /// end's target-independent IR. This supports use cases such as target-aware 2488 /// cost modeling of IR constructs. 2489 /// 2490 /// This is a function analysis because much of the cost modeling for targets 2491 /// is done in a subtarget specific way and LLVM supports compiling different 2492 /// functions targeting different subtargets in order to support runtime 2493 /// dispatch according to the observed subtarget. 2494 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { 2495 public: 2496 typedef TargetTransformInfo Result; 2497 2498 /// Default construct a target IR analysis. 2499 /// 2500 /// This will use the module's datalayout to construct a baseline 2501 /// conservative TTI result. 2502 TargetIRAnalysis(); 2503 2504 /// Construct an IR analysis pass around a target-provide callback. 2505 /// 2506 /// The callback will be called with a particular function for which the TTI 2507 /// is needed and must return a TTI object for that function. 2508 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback); 2509 2510 // Value semantics. We spell out the constructors for MSVC. 2511 TargetIRAnalysis(const TargetIRAnalysis &Arg) 2512 : TTICallback(Arg.TTICallback) {} 2513 TargetIRAnalysis(TargetIRAnalysis &&Arg) 2514 : TTICallback(std::move(Arg.TTICallback)) {} 2515 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) { 2516 TTICallback = RHS.TTICallback; 2517 return *this; 2518 } 2519 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) { 2520 TTICallback = std::move(RHS.TTICallback); 2521 return *this; 2522 } 2523 2524 Result run(const Function &F, FunctionAnalysisManager &); 2525 2526 private: 2527 friend AnalysisInfoMixin<TargetIRAnalysis>; 2528 static AnalysisKey Key; 2529 2530 /// The callback used to produce a result. 2531 /// 2532 /// We use a completely opaque callback so that targets can provide whatever 2533 /// mechanism they desire for constructing the TTI for a given function. 2534 /// 2535 /// FIXME: Should we really use std::function? It's relatively inefficient. 2536 /// It might be possible to arrange for even stateful callbacks to outlive 2537 /// the analysis and thus use a function_ref which would be lighter weight. 2538 /// This may also be less error prone as the callback is likely to reference 2539 /// the external TargetMachine, and that reference needs to never dangle. 2540 std::function<Result(const Function &)> TTICallback; 2541 2542 /// Helper function used as the callback in the default constructor. 2543 static Result getDefaultTTI(const Function &F); 2544 }; 2545 2546 /// Wrapper pass for TargetTransformInfo. 2547 /// 2548 /// This pass can be constructed from a TTI object which it stores internally 2549 /// and is queried by passes. 2550 class TargetTransformInfoWrapperPass : public ImmutablePass { 2551 TargetIRAnalysis TIRA; 2552 Optional<TargetTransformInfo> TTI; 2553 2554 virtual void anchor(); 2555 2556 public: 2557 static char ID; 2558 2559 /// We must provide a default constructor for the pass but it should 2560 /// never be used. 2561 /// 2562 /// Use the constructor below or call one of the creation routines. 2563 TargetTransformInfoWrapperPass(); 2564 2565 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 2566 2567 TargetTransformInfo &getTTI(const Function &F); 2568 }; 2569 2570 /// Create an analysis pass wrapper around a TTI object. 2571 /// 2572 /// This analysis pass just holds the TTI instance and makes it available to 2573 /// clients. 2574 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 2575 2576 } // namespace llvm 2577 2578 #endif 2579