1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This pass exposes codegen information to IR-level passes. Every 10 /// transformation that uses codegen information is broken into three parts: 11 /// 1. The IR-level analysis pass. 12 /// 2. The IR-level transformation interface which provides the needed 13 /// information. 14 /// 3. Codegen-level implementation which uses target-specific hooks. 15 /// 16 /// This file defines #2, which is the interface that IR-level transformations 17 /// use for querying the codegen. 18 /// 19 //===----------------------------------------------------------------------===// 20 21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H 23 24 #include "llvm/IR/Operator.h" 25 #include "llvm/IR/PassManager.h" 26 #include "llvm/Pass.h" 27 #include "llvm/Support/AtomicOrdering.h" 28 #include "llvm/Support/DataTypes.h" 29 #include <functional> 30 31 namespace llvm { 32 33 namespace Intrinsic { 34 typedef unsigned ID; 35 } 36 37 class AssumptionCache; 38 class BlockFrequencyInfo; 39 class DominatorTree; 40 class BranchInst; 41 class CallBase; 42 class ExtractElementInst; 43 class Function; 44 class GlobalValue; 45 class IntrinsicInst; 46 class LoadInst; 47 class LoopAccessInfo; 48 class Loop; 49 class LoopInfo; 50 class ProfileSummaryInfo; 51 class SCEV; 52 class ScalarEvolution; 53 class StoreInst; 54 class SwitchInst; 55 class TargetLibraryInfo; 56 class Type; 57 class User; 58 class Value; 59 template <typename T> class Optional; 60 61 /// Information about a load/store intrinsic defined by the target. 62 struct MemIntrinsicInfo { 63 /// This is the pointer that the intrinsic is loading from or storing to. 64 /// If this is non-null, then analysis/optimization passes can assume that 65 /// this intrinsic is functionally equivalent to a load/store from this 66 /// pointer. 67 Value *PtrVal = nullptr; 68 69 // Ordering for atomic operations. 70 AtomicOrdering Ordering = AtomicOrdering::NotAtomic; 71 72 // Same Id is set by the target for corresponding load/store intrinsics. 73 unsigned short MatchingId = 0; 74 75 bool ReadMem = false; 76 bool WriteMem = false; 77 bool IsVolatile = false; 78 79 bool isUnordered() const { 80 return (Ordering == AtomicOrdering::NotAtomic || 81 Ordering == AtomicOrdering::Unordered) && 82 !IsVolatile; 83 } 84 }; 85 86 /// Attributes of a target dependent hardware loop. 87 struct HardwareLoopInfo { 88 HardwareLoopInfo() = delete; 89 HardwareLoopInfo(Loop *L) : L(L) {} 90 Loop *L = nullptr; 91 BasicBlock *ExitBlock = nullptr; 92 BranchInst *ExitBranch = nullptr; 93 const SCEV *ExitCount = nullptr; 94 IntegerType *CountType = nullptr; 95 Value *LoopDecrement = nullptr; // Decrement the loop counter by this 96 // value in every iteration. 97 bool IsNestingLegal = false; // Can a hardware loop be a parent to 98 // another hardware loop? 99 bool CounterInReg = false; // Should loop counter be updated in 100 // the loop via a phi? 101 bool PerformEntryTest = false; // Generate the intrinsic which also performs 102 // icmp ne zero on the loop counter value and 103 // produces an i1 to guard the loop entry. 104 bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, 105 DominatorTree &DT, bool ForceNestedLoop = false, 106 bool ForceHardwareLoopPHI = false); 107 bool canAnalyze(LoopInfo &LI); 108 }; 109 110 class IntrinsicCostAttributes { 111 const IntrinsicInst *II = nullptr; 112 Type *RetTy = nullptr; 113 Intrinsic::ID IID; 114 SmallVector<Type *, 4> ParamTys; 115 SmallVector<const Value *, 4> Arguments; 116 FastMathFlags FMF; 117 unsigned VF = 1; 118 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the 119 // arguments and the return value will be computed based on types. 120 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); 121 122 public: 123 IntrinsicCostAttributes(const IntrinsicInst &I); 124 125 IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI); 126 127 IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, 128 unsigned Factor); 129 130 IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, 131 unsigned Factor, unsigned ScalarCost); 132 133 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, 134 ArrayRef<Type *> Tys, FastMathFlags Flags); 135 136 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, 137 ArrayRef<Type *> Tys, FastMathFlags Flags, 138 unsigned ScalarCost); 139 140 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, 141 ArrayRef<Type *> Tys, FastMathFlags Flags, 142 unsigned ScalarCost, 143 const IntrinsicInst *I); 144 145 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, 146 ArrayRef<Type *> Tys); 147 148 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, 149 ArrayRef<const Value *> Args); 150 151 Intrinsic::ID getID() const { return IID; } 152 const IntrinsicInst *getInst() const { return II; } 153 Type *getReturnType() const { return RetTy; } 154 unsigned getVectorFactor() const { return VF; } 155 FastMathFlags getFlags() const { return FMF; } 156 unsigned getScalarizationCost() const { return ScalarizationCost; } 157 const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; } 158 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; } 159 160 bool isTypeBasedOnly() const { 161 return Arguments.empty(); 162 } 163 164 bool skipScalarizationCost() const { 165 return ScalarizationCost != std::numeric_limits<unsigned>::max(); 166 } 167 }; 168 169 class TargetTransformInfo; 170 typedef TargetTransformInfo TTI; 171 172 /// This pass provides access to the codegen interfaces that are needed 173 /// for IR-level transformations. 174 class TargetTransformInfo { 175 public: 176 /// Construct a TTI object using a type implementing the \c Concept 177 /// API below. 178 /// 179 /// This is used by targets to construct a TTI wrapping their target-specific 180 /// implementation that encodes appropriate costs for their target. 181 template <typename T> TargetTransformInfo(T Impl); 182 183 /// Construct a baseline TTI object using a minimal implementation of 184 /// the \c Concept API below. 185 /// 186 /// The TTI implementation will reflect the information in the DataLayout 187 /// provided if non-null. 188 explicit TargetTransformInfo(const DataLayout &DL); 189 190 // Provide move semantics. 191 TargetTransformInfo(TargetTransformInfo &&Arg); 192 TargetTransformInfo &operator=(TargetTransformInfo &&RHS); 193 194 // We need to define the destructor out-of-line to define our sub-classes 195 // out-of-line. 196 ~TargetTransformInfo(); 197 198 /// Handle the invalidation of this information. 199 /// 200 /// When used as a result of \c TargetIRAnalysis this method will be called 201 /// when the function this was computed for changes. When it returns false, 202 /// the information is preserved across those changes. 203 bool invalidate(Function &, const PreservedAnalyses &, 204 FunctionAnalysisManager::Invalidator &) { 205 // FIXME: We should probably in some way ensure that the subtarget 206 // information for a function hasn't changed. 207 return false; 208 } 209 210 /// \name Generic Target Information 211 /// @{ 212 213 /// The kind of cost model. 214 /// 215 /// There are several different cost models that can be customized by the 216 /// target. The normalization of each cost model may be target specific. 217 enum TargetCostKind { 218 TCK_RecipThroughput, ///< Reciprocal throughput. 219 TCK_Latency, ///< The latency of instruction. 220 TCK_CodeSize, ///< Instruction code size. 221 TCK_SizeAndLatency ///< The weighted sum of size and latency. 222 }; 223 224 /// Query the cost of a specified instruction. 225 /// 226 /// Clients should use this interface to query the cost of an existing 227 /// instruction. The instruction must have a valid parent (basic block). 228 /// 229 /// Note, this method does not cache the cost calculation and it 230 /// can be expensive in some cases. 231 int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const { 232 switch (kind) { 233 case TCK_RecipThroughput: 234 return getInstructionThroughput(I); 235 236 case TCK_Latency: 237 return getInstructionLatency(I); 238 239 case TCK_CodeSize: 240 case TCK_SizeAndLatency: 241 return getUserCost(I, kind); 242 } 243 llvm_unreachable("Unknown instruction cost kind"); 244 } 245 246 /// Underlying constants for 'cost' values in this interface. 247 /// 248 /// Many APIs in this interface return a cost. This enum defines the 249 /// fundamental values that should be used to interpret (and produce) those 250 /// costs. The costs are returned as an int rather than a member of this 251 /// enumeration because it is expected that the cost of one IR instruction 252 /// may have a multiplicative factor to it or otherwise won't fit directly 253 /// into the enum. Moreover, it is common to sum or average costs which works 254 /// better as simple integral values. Thus this enum only provides constants. 255 /// Also note that the returned costs are signed integers to make it natural 256 /// to add, subtract, and test with zero (a common boundary condition). It is 257 /// not expected that 2^32 is a realistic cost to be modeling at any point. 258 /// 259 /// Note that these costs should usually reflect the intersection of code-size 260 /// cost and execution cost. A free instruction is typically one that folds 261 /// into another instruction. For example, reg-to-reg moves can often be 262 /// skipped by renaming the registers in the CPU, but they still are encoded 263 /// and thus wouldn't be considered 'free' here. 264 enum TargetCostConstants { 265 TCC_Free = 0, ///< Expected to fold away in lowering. 266 TCC_Basic = 1, ///< The cost of a typical 'add' instruction. 267 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86. 268 }; 269 270 /// Estimate the cost of a GEP operation when lowered. 271 int getGEPCost(Type *PointeeType, const Value *Ptr, 272 ArrayRef<const Value *> Operands, 273 TargetCostKind CostKind = TCK_SizeAndLatency) const; 274 275 /// \returns A value by which our inlining threshold should be multiplied. 276 /// This is primarily used to bump up the inlining threshold wholesale on 277 /// targets where calls are unusually expensive. 278 /// 279 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of 280 /// individual classes of instructions would be better. 281 unsigned getInliningThresholdMultiplier() const; 282 283 /// \returns Vector bonus in percent. 284 /// 285 /// Vector bonuses: We want to more aggressively inline vector-dense kernels 286 /// and apply this bonus based on the percentage of vector instructions. A 287 /// bonus is applied if the vector instructions exceed 50% and half that 288 /// amount is applied if it exceeds 10%. Note that these bonuses are some what 289 /// arbitrary and evolved over time by accident as much as because they are 290 /// principled bonuses. 291 /// FIXME: It would be nice to base the bonus values on something more 292 /// scientific. A target may has no bonus on vector instructions. 293 int getInlinerVectorBonusPercent() const; 294 295 /// \return the expected cost of a memcpy, which could e.g. depend on the 296 /// source/destination type and alignment and the number of bytes copied. 297 int getMemcpyCost(const Instruction *I) const; 298 299 /// \return The estimated number of case clusters when lowering \p 'SI'. 300 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump 301 /// table. 302 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 303 unsigned &JTSize, 304 ProfileSummaryInfo *PSI, 305 BlockFrequencyInfo *BFI) const; 306 307 /// Estimate the cost of a given IR user when lowered. 308 /// 309 /// This can estimate the cost of either a ConstantExpr or Instruction when 310 /// lowered. 311 /// 312 /// \p Operands is a list of operands which can be a result of transformations 313 /// of the current operands. The number of the operands on the list must equal 314 /// to the number of the current operands the IR user has. Their order on the 315 /// list must be the same as the order of the current operands the IR user 316 /// has. 317 /// 318 /// The returned cost is defined in terms of \c TargetCostConstants, see its 319 /// comments for a detailed explanation of the cost values. 320 int getUserCost(const User *U, ArrayRef<const Value *> Operands, 321 TargetCostKind CostKind) const; 322 323 /// This is a helper function which calls the two-argument getUserCost 324 /// with \p Operands which are the current operands U has. 325 int getUserCost(const User *U, TargetCostKind CostKind) const { 326 SmallVector<const Value *, 4> Operands(U->value_op_begin(), 327 U->value_op_end()); 328 return getUserCost(U, Operands, CostKind); 329 } 330 331 /// Return true if branch divergence exists. 332 /// 333 /// Branch divergence has a significantly negative impact on GPU performance 334 /// when threads in the same wavefront take different paths due to conditional 335 /// branches. 336 bool hasBranchDivergence() const; 337 338 /// Return true if the target prefers to use GPU divergence analysis to 339 /// replace the legacy version. 340 bool useGPUDivergenceAnalysis() const; 341 342 /// Returns whether V is a source of divergence. 343 /// 344 /// This function provides the target-dependent information for 345 /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis 346 /// first builds the dependency graph, and then runs the reachability 347 /// algorithm starting with the sources of divergence. 348 bool isSourceOfDivergence(const Value *V) const; 349 350 // Returns true for the target specific 351 // set of operations which produce uniform result 352 // even taking non-uniform arguments 353 bool isAlwaysUniform(const Value *V) const; 354 355 /// Returns the address space ID for a target's 'flat' address space. Note 356 /// this is not necessarily the same as addrspace(0), which LLVM sometimes 357 /// refers to as the generic address space. The flat address space is a 358 /// generic address space that can be used access multiple segments of memory 359 /// with different address spaces. Access of a memory location through a 360 /// pointer with this address space is expected to be legal but slower 361 /// compared to the same memory location accessed through a pointer with a 362 /// different address space. 363 // 364 /// This is for targets with different pointer representations which can 365 /// be converted with the addrspacecast instruction. If a pointer is converted 366 /// to this address space, optimizations should attempt to replace the access 367 /// with the source address space. 368 /// 369 /// \returns ~0u if the target does not have such a flat address space to 370 /// optimize away. 371 unsigned getFlatAddressSpace() const; 372 373 /// Return any intrinsic address operand indexes which may be rewritten if 374 /// they use a flat address space pointer. 375 /// 376 /// \returns true if the intrinsic was handled. 377 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 378 Intrinsic::ID IID) const; 379 380 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; 381 382 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p 383 /// NewV, which has a different address space. This should happen for every 384 /// operand index that collectFlatAddressOperands returned for the intrinsic. 385 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the 386 /// new value (which may be the original \p II with modified operands). 387 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 388 Value *NewV) const; 389 390 /// Test whether calls to a function lower to actual program function 391 /// calls. 392 /// 393 /// The idea is to test whether the program is likely to require a 'call' 394 /// instruction or equivalent in order to call the given function. 395 /// 396 /// FIXME: It's not clear that this is a good or useful query API. Client's 397 /// should probably move to simpler cost metrics using the above. 398 /// Alternatively, we could split the cost interface into distinct code-size 399 /// and execution-speed costs. This would allow modelling the core of this 400 /// query more accurately as a call is a single small instruction, but 401 /// incurs significant execution cost. 402 bool isLoweredToCall(const Function *F) const; 403 404 struct LSRCost { 405 /// TODO: Some of these could be merged. Also, a lexical ordering 406 /// isn't always optimal. 407 unsigned Insns; 408 unsigned NumRegs; 409 unsigned AddRecCost; 410 unsigned NumIVMuls; 411 unsigned NumBaseAdds; 412 unsigned ImmCost; 413 unsigned SetupCost; 414 unsigned ScaleCost; 415 }; 416 417 /// Parameters that control the generic loop unrolling transformation. 418 struct UnrollingPreferences { 419 /// The cost threshold for the unrolled loop. Should be relative to the 420 /// getUserCost values returned by this API, and the expectation is that 421 /// the unrolled loop's instructions when run through that interface should 422 /// not exceed this cost. However, this is only an estimate. Also, specific 423 /// loops may be unrolled even with a cost above this threshold if deemed 424 /// profitable. Set this to UINT_MAX to disable the loop body cost 425 /// restriction. 426 unsigned Threshold; 427 /// If complete unrolling will reduce the cost of the loop, we will boost 428 /// the Threshold by a certain percent to allow more aggressive complete 429 /// unrolling. This value provides the maximum boost percentage that we 430 /// can apply to Threshold (The value should be no less than 100). 431 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost, 432 /// MaxPercentThresholdBoost / 100) 433 /// E.g. if complete unrolling reduces the loop execution time by 50% 434 /// then we boost the threshold by the factor of 2x. If unrolling is not 435 /// expected to reduce the running time, then we do not increase the 436 /// threshold. 437 unsigned MaxPercentThresholdBoost; 438 /// The cost threshold for the unrolled loop when optimizing for size (set 439 /// to UINT_MAX to disable). 440 unsigned OptSizeThreshold; 441 /// The cost threshold for the unrolled loop, like Threshold, but used 442 /// for partial/runtime unrolling (set to UINT_MAX to disable). 443 unsigned PartialThreshold; 444 /// The cost threshold for the unrolled loop when optimizing for size, like 445 /// OptSizeThreshold, but used for partial/runtime unrolling (set to 446 /// UINT_MAX to disable). 447 unsigned PartialOptSizeThreshold; 448 /// A forced unrolling factor (the number of concatenated bodies of the 449 /// original loop in the unrolled loop body). When set to 0, the unrolling 450 /// transformation will select an unrolling factor based on the current cost 451 /// threshold and other factors. 452 unsigned Count; 453 /// Default unroll count for loops with run-time trip count. 454 unsigned DefaultUnrollRuntimeCount; 455 // Set the maximum unrolling factor. The unrolling factor may be selected 456 // using the appropriate cost threshold, but may not exceed this number 457 // (set to UINT_MAX to disable). This does not apply in cases where the 458 // loop is being fully unrolled. 459 unsigned MaxCount; 460 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but 461 /// applies even if full unrolling is selected. This allows a target to fall 462 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount. 463 unsigned FullUnrollMaxCount; 464 // Represents number of instructions optimized when "back edge" 465 // becomes "fall through" in unrolled loop. 466 // For now we count a conditional branch on a backedge and a comparison 467 // feeding it. 468 unsigned BEInsns; 469 /// Allow partial unrolling (unrolling of loops to expand the size of the 470 /// loop body, not only to eliminate small constant-trip-count loops). 471 bool Partial; 472 /// Allow runtime unrolling (unrolling of loops to expand the size of the 473 /// loop body even when the number of loop iterations is not known at 474 /// compile time). 475 bool Runtime; 476 /// Allow generation of a loop remainder (extra iterations after unroll). 477 bool AllowRemainder; 478 /// Allow emitting expensive instructions (such as divisions) when computing 479 /// the trip count of a loop for runtime unrolling. 480 bool AllowExpensiveTripCount; 481 /// Apply loop unroll on any kind of loop 482 /// (mainly to loops that fail runtime unrolling). 483 bool Force; 484 /// Allow using trip count upper bound to unroll loops. 485 bool UpperBound; 486 /// Allow unrolling of all the iterations of the runtime loop remainder. 487 bool UnrollRemainder; 488 /// Allow unroll and jam. Used to enable unroll and jam for the target. 489 bool UnrollAndJam; 490 /// Threshold for unroll and jam, for inner loop size. The 'Threshold' 491 /// value above is used during unroll and jam for the outer loop size. 492 /// This value is used in the same manner to limit the size of the inner 493 /// loop. 494 unsigned UnrollAndJamInnerLoopThreshold; 495 /// Don't allow loop unrolling to simulate more than this number of 496 /// iterations when checking full unroll profitability 497 unsigned MaxIterationsCountToAnalyze; 498 }; 499 500 /// Get target-customized preferences for the generic loop unrolling 501 /// transformation. The caller will initialize UP with the current 502 /// target-independent defaults. 503 void getUnrollingPreferences(Loop *L, ScalarEvolution &, 504 UnrollingPreferences &UP) const; 505 506 /// Query the target whether it would be profitable to convert the given loop 507 /// into a hardware loop. 508 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 509 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 510 HardwareLoopInfo &HWLoopInfo) const; 511 512 /// Query the target whether it would be prefered to create a predicated 513 /// vector loop, which can avoid the need to emit a scalar epilogue loop. 514 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 515 AssumptionCache &AC, TargetLibraryInfo *TLI, 516 DominatorTree *DT, 517 const LoopAccessInfo *LAI) const; 518 519 /// Query the target whether lowering of the llvm.get.active.lane.mask 520 /// intrinsic is supported. 521 bool emitGetActiveLaneMask() const; 522 523 // Parameters that control the loop peeling transformation 524 struct PeelingPreferences { 525 /// A forced peeling factor (the number of bodied of the original loop 526 /// that should be peeled off before the loop body). When set to 0, the 527 /// a peeling factor based on profile information and other factors. 528 unsigned PeelCount; 529 /// Allow peeling off loop iterations. 530 bool AllowPeeling; 531 /// Allow peeling off loop iterations for loop nests. 532 bool AllowLoopNestsPeeling; 533 /// Allow peeling basing on profile. Uses to enable peeling off all 534 /// iterations basing on provided profile. 535 /// If the value is true the peeling cost model can decide to peel only 536 /// some iterations and in this case it will set this to false. 537 bool PeelProfiledIterations; 538 }; 539 540 /// Get target-customized preferences for the generic loop peeling 541 /// transformation. The caller will initialize \p PP with the current 542 /// target-independent defaults with information from \p L and \p SE. 543 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 544 PeelingPreferences &PP) const; 545 /// @} 546 547 /// \name Scalar Target Information 548 /// @{ 549 550 /// Flags indicating the kind of support for population count. 551 /// 552 /// Compared to the SW implementation, HW support is supposed to 553 /// significantly boost the performance when the population is dense, and it 554 /// may or may not degrade performance if the population is sparse. A HW 555 /// support is considered as "Fast" if it can outperform, or is on a par 556 /// with, SW implementation when the population is sparse; otherwise, it is 557 /// considered as "Slow". 558 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware }; 559 560 /// Return true if the specified immediate is legal add immediate, that 561 /// is the target has add instructions which can add a register with the 562 /// immediate without having to materialize the immediate into a register. 563 bool isLegalAddImmediate(int64_t Imm) const; 564 565 /// Return true if the specified immediate is legal icmp immediate, 566 /// that is the target has icmp instructions which can compare a register 567 /// against the immediate without having to materialize the immediate into a 568 /// register. 569 bool isLegalICmpImmediate(int64_t Imm) const; 570 571 /// Return true if the addressing mode represented by AM is legal for 572 /// this target, for a load/store of the specified type. 573 /// The type may be VoidTy, in which case only return true if the addressing 574 /// mode is legal for a load/store of any legal type. 575 /// If target returns true in LSRWithInstrQueries(), I may be valid. 576 /// TODO: Handle pre/postinc as well. 577 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 578 bool HasBaseReg, int64_t Scale, 579 unsigned AddrSpace = 0, 580 Instruction *I = nullptr) const; 581 582 /// Return true if LSR cost of C1 is lower than C1. 583 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, 584 TargetTransformInfo::LSRCost &C2) const; 585 586 /// \returns true if LSR should not optimize a chain that includes \p I. 587 bool isProfitableLSRChainElement(Instruction *I) const; 588 589 /// Return true if the target can fuse a compare and branch. 590 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost 591 /// calculation for the instructions in a loop. 592 bool canMacroFuseCmp() const; 593 594 /// Return true if the target can save a compare for loop count, for example 595 /// hardware loop saves a compare. 596 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 597 DominatorTree *DT, AssumptionCache *AC, 598 TargetLibraryInfo *LibInfo) const; 599 600 /// \return True is LSR should make efforts to create/preserve post-inc 601 /// addressing mode expressions. 602 bool shouldFavorPostInc() const; 603 604 /// Return true if LSR should make efforts to generate indexed addressing 605 /// modes that operate across loop iterations. 606 bool shouldFavorBackedgeIndex(const Loop *L) const; 607 608 /// Return true if the target supports masked store. 609 bool isLegalMaskedStore(Type *DataType, Align Alignment) const; 610 /// Return true if the target supports masked load. 611 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const; 612 613 /// Return true if the target supports nontemporal store. 614 bool isLegalNTStore(Type *DataType, Align Alignment) const; 615 /// Return true if the target supports nontemporal load. 616 bool isLegalNTLoad(Type *DataType, Align Alignment) const; 617 618 /// Return true if the target supports masked scatter. 619 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const; 620 /// Return true if the target supports masked gather. 621 bool isLegalMaskedGather(Type *DataType, Align Alignment) const; 622 623 /// Return true if the target supports masked compress store. 624 bool isLegalMaskedCompressStore(Type *DataType) const; 625 /// Return true if the target supports masked expand load. 626 bool isLegalMaskedExpandLoad(Type *DataType) const; 627 628 /// Return true if the target has a unified operation to calculate division 629 /// and remainder. If so, the additional implicit multiplication and 630 /// subtraction required to calculate a remainder from division are free. This 631 /// can enable more aggressive transformations for division and remainder than 632 /// would typically be allowed using throughput or size cost models. 633 bool hasDivRemOp(Type *DataType, bool IsSigned) const; 634 635 /// Return true if the given instruction (assumed to be a memory access 636 /// instruction) has a volatile variant. If that's the case then we can avoid 637 /// addrspacecast to generic AS for volatile loads/stores. Default 638 /// implementation returns false, which prevents address space inference for 639 /// volatile loads/stores. 640 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; 641 642 /// Return true if target doesn't mind addresses in vectors. 643 bool prefersVectorizedAddressing() const; 644 645 /// Return the cost of the scaling factor used in the addressing 646 /// mode represented by AM for this target, for a load/store 647 /// of the specified type. 648 /// If the AM is supported, the return value must be >= 0. 649 /// If the AM is not supported, it returns a negative value. 650 /// TODO: Handle pre/postinc as well. 651 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 652 bool HasBaseReg, int64_t Scale, 653 unsigned AddrSpace = 0) const; 654 655 /// Return true if the loop strength reduce pass should make 656 /// Instruction* based TTI queries to isLegalAddressingMode(). This is 657 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned 658 /// immediate offset and no index register. 659 bool LSRWithInstrQueries() const; 660 661 /// Return true if it's free to truncate a value of type Ty1 to type 662 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 663 /// by referencing its sub-register AX. 664 bool isTruncateFree(Type *Ty1, Type *Ty2) const; 665 666 /// Return true if it is profitable to hoist instruction in the 667 /// then/else to before if. 668 bool isProfitableToHoist(Instruction *I) const; 669 670 bool useAA() const; 671 672 /// Return true if this type is legal. 673 bool isTypeLegal(Type *Ty) const; 674 675 /// Return true if switches should be turned into lookup tables for the 676 /// target. 677 bool shouldBuildLookupTables() const; 678 679 /// Return true if switches should be turned into lookup tables 680 /// containing this constant value for the target. 681 bool shouldBuildLookupTablesForConstant(Constant *C) const; 682 683 /// Return true if the input function which is cold at all call sites, 684 /// should use coldcc calling convention. 685 bool useColdCCForColdCall(Function &F) const; 686 687 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 688 /// are set if the demanded result elements need to be inserted and/or 689 /// extracted from vectors. 690 unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, 691 bool Insert, bool Extract) const; 692 693 /// Estimate the overhead of scalarizing an instructions unique 694 /// non-constant operands. The types of the arguments are ordinarily 695 /// scalar, in which case the costs are multiplied with VF. 696 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 697 unsigned VF) const; 698 699 /// If target has efficient vector element load/store instructions, it can 700 /// return true here so that insertion/extraction costs are not added to 701 /// the scalarization cost of a load/store. 702 bool supportsEfficientVectorElementLoadStore() const; 703 704 /// Don't restrict interleaved unrolling to small loops. 705 bool enableAggressiveInterleaving(bool LoopHasReductions) const; 706 707 /// Returns options for expansion of memcmp. IsZeroCmp is 708 // true if this is the expansion of memcmp(p1, p2, s) == 0. 709 struct MemCmpExpansionOptions { 710 // Return true if memcmp expansion is enabled. 711 operator bool() const { return MaxNumLoads > 0; } 712 713 // Maximum number of load operations. 714 unsigned MaxNumLoads = 0; 715 716 // The list of available load sizes (in bytes), sorted in decreasing order. 717 SmallVector<unsigned, 8> LoadSizes; 718 719 // For memcmp expansion when the memcmp result is only compared equal or 720 // not-equal to 0, allow up to this number of load pairs per block. As an 721 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block: 722 // a0 = load2bytes &a[0] 723 // b0 = load2bytes &b[0] 724 // a2 = load1byte &a[2] 725 // b2 = load1byte &b[2] 726 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0 727 unsigned NumLoadsPerBlock = 1; 728 729 // Set to true to allow overlapping loads. For example, 7-byte compares can 730 // be done with two 4-byte compares instead of 4+2+1-byte compares. This 731 // requires all loads in LoadSizes to be doable in an unaligned way. 732 bool AllowOverlappingLoads = false; 733 }; 734 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 735 bool IsZeroCmp) const; 736 737 /// Enable matching of interleaved access groups. 738 bool enableInterleavedAccessVectorization() const; 739 740 /// Enable matching of interleaved access groups that contain predicated 741 /// accesses or gaps and therefore vectorized using masked 742 /// vector loads/stores. 743 bool enableMaskedInterleavedAccessVectorization() const; 744 745 /// Indicate that it is potentially unsafe to automatically vectorize 746 /// floating-point operations because the semantics of vector and scalar 747 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math 748 /// does not support IEEE-754 denormal numbers, while depending on the 749 /// platform, scalar floating-point math does. 750 /// This applies to floating-point math operations and calls, not memory 751 /// operations, shuffles, or casts. 752 bool isFPVectorizationPotentiallyUnsafe() const; 753 754 /// Determine if the target supports unaligned memory accesses. 755 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 756 unsigned AddressSpace = 0, 757 unsigned Alignment = 1, 758 bool *Fast = nullptr) const; 759 760 /// Return hardware support for population count. 761 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; 762 763 /// Return true if the hardware has a fast square-root instruction. 764 bool haveFastSqrt(Type *Ty) const; 765 766 /// Return true if it is faster to check if a floating-point value is NaN 767 /// (or not-NaN) versus a comparison against a constant FP zero value. 768 /// Targets should override this if materializing a 0.0 for comparison is 769 /// generally as cheap as checking for ordered/unordered. 770 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const; 771 772 /// Return the expected cost of supporting the floating point operation 773 /// of the specified type. 774 int getFPOpCost(Type *Ty) const; 775 776 /// Return the expected cost of materializing for the given integer 777 /// immediate of the specified type. 778 int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const; 779 780 /// Return the expected cost of materialization for the given integer 781 /// immediate of the specified type for a given instruction. The cost can be 782 /// zero if the immediate can be folded into the specified instruction. 783 int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, 784 Type *Ty, TargetCostKind CostKind) const; 785 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 786 Type *Ty, TargetCostKind CostKind) const; 787 788 /// Return the expected cost for the given integer when optimising 789 /// for size. This is different than the other integer immediate cost 790 /// functions in that it is subtarget agnostic. This is useful when you e.g. 791 /// target one ISA such as Aarch32 but smaller encodings could be possible 792 /// with another such as Thumb. This return value is used as a penalty when 793 /// the total costs for a constant is calculated (the bigger the cost, the 794 /// more beneficial constant hoisting is). 795 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, 796 Type *Ty) const; 797 /// @} 798 799 /// \name Vector Target Information 800 /// @{ 801 802 /// The various kinds of shuffle patterns for vector queries. 803 enum ShuffleKind { 804 SK_Broadcast, ///< Broadcast element 0 to all other elements. 805 SK_Reverse, ///< Reverse the order of the vector. 806 SK_Select, ///< Selects elements from the corresponding lane of 807 ///< either source operand. This is equivalent to a 808 ///< vector select with a constant condition operand. 809 SK_Transpose, ///< Transpose two vectors. 810 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. 811 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset. 812 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one 813 ///< with any shuffle mask. 814 SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any 815 ///< shuffle mask. 816 }; 817 818 /// Kind of the reduction data. 819 enum ReductionKind { 820 RK_None, /// Not a reduction. 821 RK_Arithmetic, /// Binary reduction data. 822 RK_MinMax, /// Min/max reduction data. 823 RK_UnsignedMinMax, /// Unsigned min/max reduction data. 824 }; 825 826 /// Contains opcode + LHS/RHS parts of the reduction operations. 827 struct ReductionData { 828 ReductionData() = delete; 829 ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) 830 : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { 831 assert(Kind != RK_None && "expected binary or min/max reduction only."); 832 } 833 unsigned Opcode = 0; 834 Value *LHS = nullptr; 835 Value *RHS = nullptr; 836 ReductionKind Kind = RK_None; 837 bool hasSameData(ReductionData &RD) const { 838 return Kind == RD.Kind && Opcode == RD.Opcode; 839 } 840 }; 841 842 static ReductionKind matchPairwiseReduction( 843 const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty); 844 845 static ReductionKind matchVectorSplittingReduction( 846 const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty); 847 848 /// Additional information about an operand's possible values. 849 enum OperandValueKind { 850 OK_AnyValue, // Operand can have any value. 851 OK_UniformValue, // Operand is uniform (splat of a value). 852 OK_UniformConstantValue, // Operand is uniform constant. 853 OK_NonUniformConstantValue // Operand is a non uniform constant value. 854 }; 855 856 /// Additional properties of an operand's values. 857 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; 858 859 /// \return the number of registers in the target-provided register class. 860 unsigned getNumberOfRegisters(unsigned ClassID) const; 861 862 /// \return the target-provided register class ID for the provided type, 863 /// accounting for type promotion and other type-legalization techniques that 864 /// the target might apply. However, it specifically does not account for the 865 /// scalarization or splitting of vector types. Should a vector type require 866 /// scalarization or splitting into multiple underlying vector registers, that 867 /// type should be mapped to a register class containing no registers. 868 /// Specifically, this is designed to provide a simple, high-level view of the 869 /// register allocation later performed by the backend. These register classes 870 /// don't necessarily map onto the register classes used by the backend. 871 /// FIXME: It's not currently possible to determine how many registers 872 /// are used by the provided type. 873 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; 874 875 /// \return the target-provided register class name 876 const char *getRegisterClassName(unsigned ClassID) const; 877 878 /// \return The width of the largest scalar or vector register type. 879 unsigned getRegisterBitWidth(bool Vector) const; 880 881 /// \return The width of the smallest vector register type. 882 unsigned getMinVectorRegisterBitWidth() const; 883 884 /// \return True if the vectorization factor should be chosen to 885 /// make the vector of the smallest element type match the size of a 886 /// vector register. For wider element types, this could result in 887 /// creating vectors that span multiple vector registers. 888 /// If false, the vectorization factor will be chosen based on the 889 /// size of the widest element type. 890 bool shouldMaximizeVectorBandwidth(bool OptSize) const; 891 892 /// \return The minimum vectorization factor for types of given element 893 /// bit width, or 0 if there is no minimum VF. The returned value only 894 /// applies when shouldMaximizeVectorBandwidth returns true. 895 unsigned getMinimumVF(unsigned ElemWidth) const; 896 897 /// \return True if it should be considered for address type promotion. 898 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is 899 /// profitable without finding other extensions fed by the same input. 900 bool shouldConsiderAddressTypePromotion( 901 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; 902 903 /// \return The size of a cache line in bytes. 904 unsigned getCacheLineSize() const; 905 906 /// The possible cache levels 907 enum class CacheLevel { 908 L1D, // The L1 data cache 909 L2D, // The L2 data cache 910 911 // We currently do not model L3 caches, as their sizes differ widely between 912 // microarchitectures. Also, we currently do not have a use for L3 cache 913 // size modeling yet. 914 }; 915 916 /// \return The size of the cache level in bytes, if available. 917 Optional<unsigned> getCacheSize(CacheLevel Level) const; 918 919 /// \return The associativity of the cache level, if available. 920 Optional<unsigned> getCacheAssociativity(CacheLevel Level) const; 921 922 /// \return How much before a load we should place the prefetch 923 /// instruction. This is currently measured in number of 924 /// instructions. 925 unsigned getPrefetchDistance() const; 926 927 /// Some HW prefetchers can handle accesses up to a certain constant stride. 928 /// Sometimes prefetching is beneficial even below the HW prefetcher limit, 929 /// and the arguments provided are meant to serve as a basis for deciding this 930 /// for a particular loop. 931 /// 932 /// \param NumMemAccesses Number of memory accesses in the loop. 933 /// \param NumStridedMemAccesses Number of the memory accesses that 934 /// ScalarEvolution could find a known stride 935 /// for. 936 /// \param NumPrefetches Number of software prefetches that will be 937 /// emitted as determined by the addresses 938 /// involved and the cache line size. 939 /// \param HasCall True if the loop contains a call. 940 /// 941 /// \return This is the minimum stride in bytes where it makes sense to start 942 /// adding SW prefetches. The default is 1, i.e. prefetch with any 943 /// stride. 944 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 945 unsigned NumStridedMemAccesses, 946 unsigned NumPrefetches, bool HasCall) const; 947 948 /// \return The maximum number of iterations to prefetch ahead. If 949 /// the required number of iterations is more than this number, no 950 /// prefetching is performed. 951 unsigned getMaxPrefetchIterationsAhead() const; 952 953 /// \return True if prefetching should also be done for writes. 954 bool enableWritePrefetching() const; 955 956 /// \return The maximum interleave factor that any transform should try to 957 /// perform for this target. This number depends on the level of parallelism 958 /// and the number of execution units in the CPU. 959 unsigned getMaxInterleaveFactor(unsigned VF) const; 960 961 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2. 962 static OperandValueKind getOperandInfo(const Value *V, 963 OperandValueProperties &OpProps); 964 965 /// This is an approximation of reciprocal throughput of a math/logic op. 966 /// A higher cost indicates less expected throughput. 967 /// From Agner Fog's guides, reciprocal throughput is "the average number of 968 /// clock cycles per instruction when the instructions are not part of a 969 /// limiting dependency chain." 970 /// Therefore, costs should be scaled to account for multiple execution units 971 /// on the target that can process this type of instruction. For example, if 972 /// there are 5 scalar integer units and 2 vector integer units that can 973 /// calculate an 'add' in a single cycle, this model should indicate that the 974 /// cost of the vector add instruction is 2.5 times the cost of the scalar 975 /// add instruction. 976 /// \p Args is an optional argument which holds the instruction operands 977 /// values so the TTI can analyze those values searching for special 978 /// cases or optimizations based on those values. 979 /// \p CxtI is the optional original context instruction, if one exists, to 980 /// provide even more information. 981 int getArithmeticInstrCost( 982 unsigned Opcode, Type *Ty, 983 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 984 OperandValueKind Opd1Info = OK_AnyValue, 985 OperandValueKind Opd2Info = OK_AnyValue, 986 OperandValueProperties Opd1PropInfo = OP_None, 987 OperandValueProperties Opd2PropInfo = OP_None, 988 ArrayRef<const Value *> Args = ArrayRef<const Value *>(), 989 const Instruction *CxtI = nullptr) const; 990 991 /// \return The cost of a shuffle instruction of kind Kind and of type Tp. 992 /// The index and subtype parameters are used by the subvector insertion and 993 /// extraction shuffle kinds to show the insert/extract point and the type of 994 /// the subvector being inserted/extracted. 995 /// NOTE: For subvector extractions Tp represents the source type. 996 int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0, 997 VectorType *SubTp = nullptr) const; 998 999 /// \return The expected cost of cast instructions, such as bitcast, trunc, 1000 /// zext, etc. If there is an existing instruction that holds Opcode, it 1001 /// may be passed in the 'I' parameter. 1002 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 1003 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, 1004 const Instruction *I = nullptr) const; 1005 1006 /// \return The expected cost of a sign- or zero-extended vector extract. Use 1007 /// -1 to indicate that there is no information about the index value. 1008 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, 1009 unsigned Index = -1) const; 1010 1011 /// \return The expected cost of control-flow related instructions such as 1012 /// Phi, Ret, Br. 1013 int getCFInstrCost(unsigned Opcode, 1014 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; 1015 1016 /// \returns The expected cost of compare and select instructions. If there 1017 /// is an existing instruction that holds Opcode, it may be passed in the 1018 /// 'I' parameter. 1019 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, 1020 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1021 const Instruction *I = nullptr) const; 1022 1023 /// \return The expected cost of vector Insert and Extract. 1024 /// Use -1 to indicate that there is no information on the index value. 1025 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; 1026 1027 /// \return The cost of Load and Store instructions. 1028 int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1029 unsigned AddressSpace, 1030 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1031 const Instruction *I = nullptr) const; 1032 1033 /// \return The cost of masked Load and Store instructions. 1034 int getMaskedMemoryOpCost( 1035 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, 1036 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 1037 1038 /// \return The cost of Gather or Scatter operation 1039 /// \p Opcode - is a type of memory access Load or Store 1040 /// \p DataTy - a vector type of the data to be loaded or stored 1041 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory 1042 /// \p VariableMask - true when the memory access is predicated with a mask 1043 /// that is not a compile-time constant 1044 /// \p Alignment - alignment of single element 1045 /// \p I - the optional original context instruction, if one exists, e.g. the 1046 /// load/store to transform or the call to the gather/scatter intrinsic 1047 int getGatherScatterOpCost( 1048 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, 1049 Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1050 const Instruction *I = nullptr) const; 1051 1052 /// \return The cost of the interleaved memory operation. 1053 /// \p Opcode is the memory operation code 1054 /// \p VecTy is the vector type of the interleaved access. 1055 /// \p Factor is the interleave factor 1056 /// \p Indices is the indices for interleaved load members (as interleaved 1057 /// load allows gaps) 1058 /// \p Alignment is the alignment of the memory operation 1059 /// \p AddressSpace is address space of the pointer. 1060 /// \p UseMaskForCond indicates if the memory access is predicated. 1061 /// \p UseMaskForGaps indicates if gaps should be masked. 1062 int getInterleavedMemoryOpCost( 1063 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 1064 Align Alignment, unsigned AddressSpace, 1065 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, 1066 bool UseMaskForCond = false, bool UseMaskForGaps = false) const; 1067 1068 /// Calculate the cost of performing a vector reduction. 1069 /// 1070 /// This is the cost of reducing the vector value of type \p Ty to a scalar 1071 /// value using the operation denoted by \p Opcode. The form of the reduction 1072 /// can either be a pairwise reduction or a reduction that splits the vector 1073 /// at every reduction level. 1074 /// 1075 /// Pairwise: 1076 /// (v0, v1, v2, v3) 1077 /// ((v0+v1), (v2+v3), undef, undef) 1078 /// Split: 1079 /// (v0, v1, v2, v3) 1080 /// ((v0+v2), (v1+v3), undef, undef) 1081 int getArithmeticReductionCost( 1082 unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, 1083 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 1084 1085 int getMinMaxReductionCost( 1086 VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, 1087 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; 1088 1089 /// \returns The cost of Intrinsic instructions. Analyses the real arguments. 1090 /// Three cases are handled: 1. scalar instruction 2. vector instruction 1091 /// 3. scalar instruction which is to be vectorized. 1092 int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 1093 TTI::TargetCostKind CostKind) const; 1094 1095 /// \returns The cost of Call instructions. 1096 int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, 1097 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; 1098 1099 /// \returns The number of pieces into which the provided type must be 1100 /// split during legalization. Zero is returned when the answer is unknown. 1101 unsigned getNumberOfParts(Type *Tp) const; 1102 1103 /// \returns The cost of the address computation. For most targets this can be 1104 /// merged into the instruction indexing mode. Some targets might want to 1105 /// distinguish between address computation for memory operations on vector 1106 /// types and scalar types. Such targets should override this function. 1107 /// The 'SE' parameter holds pointer for the scalar evolution object which 1108 /// is used in order to get the Ptr step value in case of constant stride. 1109 /// The 'Ptr' parameter holds SCEV of the access pointer. 1110 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr, 1111 const SCEV *Ptr = nullptr) const; 1112 1113 /// \returns The cost, if any, of keeping values of the given types alive 1114 /// over a callsite. 1115 /// 1116 /// Some types may require the use of register classes that do not have 1117 /// any callee-saved registers, so would require a spill and fill. 1118 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; 1119 1120 /// \returns True if the intrinsic is a supported memory intrinsic. Info 1121 /// will contain additional information - whether the intrinsic may write 1122 /// or read to memory, volatility and the pointer. Info is undefined 1123 /// if false is returned. 1124 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; 1125 1126 /// \returns The maximum element size, in bytes, for an element 1127 /// unordered-atomic memory intrinsic. 1128 unsigned getAtomicMemIntrinsicMaxElementSize() const; 1129 1130 /// \returns A value which is the result of the given memory intrinsic. New 1131 /// instructions may be created to extract the result from the given intrinsic 1132 /// memory operation. Returns nullptr if the target cannot create a result 1133 /// from the given intrinsic. 1134 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 1135 Type *ExpectedType) const; 1136 1137 /// \returns The type to use in a loop expansion of a memcpy call. 1138 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 1139 unsigned SrcAddrSpace, unsigned DestAddrSpace, 1140 unsigned SrcAlign, unsigned DestAlign) const; 1141 1142 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory. 1143 /// \param RemainingBytes The number of bytes to copy. 1144 /// 1145 /// Calculates the operand types to use when copying \p RemainingBytes of 1146 /// memory, where source and destination alignments are \p SrcAlign and 1147 /// \p DestAlign respectively. 1148 void getMemcpyLoopResidualLoweringType( 1149 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 1150 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 1151 unsigned SrcAlign, unsigned DestAlign) const; 1152 1153 /// \returns True if the two functions have compatible attributes for inlining 1154 /// purposes. 1155 bool areInlineCompatible(const Function *Caller, 1156 const Function *Callee) const; 1157 1158 /// \returns True if the caller and callee agree on how \p Args will be passed 1159 /// to the callee. 1160 /// \param[out] Args The list of compatible arguments. The implementation may 1161 /// filter out any incompatible args from this list. 1162 bool areFunctionArgsABICompatible(const Function *Caller, 1163 const Function *Callee, 1164 SmallPtrSetImpl<Argument *> &Args) const; 1165 1166 /// The type of load/store indexing. 1167 enum MemIndexedMode { 1168 MIM_Unindexed, ///< No indexing. 1169 MIM_PreInc, ///< Pre-incrementing. 1170 MIM_PreDec, ///< Pre-decrementing. 1171 MIM_PostInc, ///< Post-incrementing. 1172 MIM_PostDec ///< Post-decrementing. 1173 }; 1174 1175 /// \returns True if the specified indexed load for the given type is legal. 1176 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const; 1177 1178 /// \returns True if the specified indexed store for the given type is legal. 1179 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const; 1180 1181 /// \returns The bitwidth of the largest vector type that should be used to 1182 /// load/store in the given address space. 1183 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; 1184 1185 /// \returns True if the load instruction is legal to vectorize. 1186 bool isLegalToVectorizeLoad(LoadInst *LI) const; 1187 1188 /// \returns True if the store instruction is legal to vectorize. 1189 bool isLegalToVectorizeStore(StoreInst *SI) const; 1190 1191 /// \returns True if it is legal to vectorize the given load chain. 1192 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 1193 unsigned AddrSpace) const; 1194 1195 /// \returns True if it is legal to vectorize the given store chain. 1196 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 1197 unsigned AddrSpace) const; 1198 1199 /// \returns The new vector factor value if the target doesn't support \p 1200 /// SizeInBytes loads or has a better vector factor. 1201 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 1202 unsigned ChainSizeInBytes, 1203 VectorType *VecTy) const; 1204 1205 /// \returns The new vector factor value if the target doesn't support \p 1206 /// SizeInBytes stores or has a better vector factor. 1207 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 1208 unsigned ChainSizeInBytes, 1209 VectorType *VecTy) const; 1210 1211 /// Flags describing the kind of vector reduction. 1212 struct ReductionFlags { 1213 ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {} 1214 bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation. 1215 bool IsSigned; ///< Whether the operation is a signed int reduction. 1216 bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present. 1217 }; 1218 1219 /// \returns True if the target wants to handle the given reduction idiom in 1220 /// the intrinsics form instead of the shuffle form. 1221 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 1222 ReductionFlags Flags) const; 1223 1224 /// \returns True if the target wants to expand the given reduction intrinsic 1225 /// into a shuffle sequence. 1226 bool shouldExpandReduction(const IntrinsicInst *II) const; 1227 1228 /// \returns the size cost of rematerializing a GlobalValue address relative 1229 /// to a stack reload. 1230 unsigned getGISelRematGlobalCost() const; 1231 1232 /// \name Vector Predication Information 1233 /// @{ 1234 /// Whether the target supports the %evl parameter of VP intrinsic efficiently 1235 /// in hardware. (see LLVM Language Reference - "Vector Predication 1236 /// Intrinsics") Use of %evl is discouraged when that is not the case. 1237 bool hasActiveVectorLength() const; 1238 1239 /// @} 1240 1241 /// @} 1242 1243 private: 1244 /// Estimate the latency of specified instruction. 1245 /// Returns 1 as the default value. 1246 int getInstructionLatency(const Instruction *I) const; 1247 1248 /// Returns the expected throughput cost of the instruction. 1249 /// Returns -1 if the cost is unknown. 1250 int getInstructionThroughput(const Instruction *I) const; 1251 1252 /// The abstract base class used to type erase specific TTI 1253 /// implementations. 1254 class Concept; 1255 1256 /// The template model for the base class which wraps a concrete 1257 /// implementation in a type erased interface. 1258 template <typename T> class Model; 1259 1260 std::unique_ptr<Concept> TTIImpl; 1261 }; 1262 1263 class TargetTransformInfo::Concept { 1264 public: 1265 virtual ~Concept() = 0; 1266 virtual const DataLayout &getDataLayout() const = 0; 1267 virtual int getGEPCost(Type *PointeeType, const Value *Ptr, 1268 ArrayRef<const Value *> Operands, 1269 TTI::TargetCostKind CostKind) = 0; 1270 virtual unsigned getInliningThresholdMultiplier() = 0; 1271 virtual int getInlinerVectorBonusPercent() = 0; 1272 virtual int getMemcpyCost(const Instruction *I) = 0; 1273 virtual unsigned 1274 getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, 1275 ProfileSummaryInfo *PSI, 1276 BlockFrequencyInfo *BFI) = 0; 1277 virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands, 1278 TargetCostKind CostKind) = 0; 1279 virtual bool hasBranchDivergence() = 0; 1280 virtual bool useGPUDivergenceAnalysis() = 0; 1281 virtual bool isSourceOfDivergence(const Value *V) = 0; 1282 virtual bool isAlwaysUniform(const Value *V) = 0; 1283 virtual unsigned getFlatAddressSpace() = 0; 1284 virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 1285 Intrinsic::ID IID) const = 0; 1286 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; 1287 virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, 1288 Value *OldV, 1289 Value *NewV) const = 0; 1290 virtual bool isLoweredToCall(const Function *F) = 0; 1291 virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, 1292 UnrollingPreferences &UP) = 0; 1293 virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 1294 PeelingPreferences &PP) = 0; 1295 virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 1296 AssumptionCache &AC, 1297 TargetLibraryInfo *LibInfo, 1298 HardwareLoopInfo &HWLoopInfo) = 0; 1299 virtual bool 1300 preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 1301 AssumptionCache &AC, TargetLibraryInfo *TLI, 1302 DominatorTree *DT, const LoopAccessInfo *LAI) = 0; 1303 virtual bool emitGetActiveLaneMask() = 0; 1304 virtual bool isLegalAddImmediate(int64_t Imm) = 0; 1305 virtual bool isLegalICmpImmediate(int64_t Imm) = 0; 1306 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 1307 int64_t BaseOffset, bool HasBaseReg, 1308 int64_t Scale, unsigned AddrSpace, 1309 Instruction *I) = 0; 1310 virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, 1311 TargetTransformInfo::LSRCost &C2) = 0; 1312 virtual bool isProfitableLSRChainElement(Instruction *I) = 0; 1313 virtual bool canMacroFuseCmp() = 0; 1314 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, 1315 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, 1316 TargetLibraryInfo *LibInfo) = 0; 1317 virtual bool shouldFavorPostInc() const = 0; 1318 virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0; 1319 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; 1320 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; 1321 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; 1322 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; 1323 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0; 1324 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; 1325 virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; 1326 virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; 1327 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; 1328 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; 1329 virtual bool prefersVectorizedAddressing() = 0; 1330 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 1331 int64_t BaseOffset, bool HasBaseReg, 1332 int64_t Scale, unsigned AddrSpace) = 0; 1333 virtual bool LSRWithInstrQueries() = 0; 1334 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; 1335 virtual bool isProfitableToHoist(Instruction *I) = 0; 1336 virtual bool useAA() = 0; 1337 virtual bool isTypeLegal(Type *Ty) = 0; 1338 virtual bool shouldBuildLookupTables() = 0; 1339 virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; 1340 virtual bool useColdCCForColdCall(Function &F) = 0; 1341 virtual unsigned getScalarizationOverhead(VectorType *Ty, 1342 const APInt &DemandedElts, 1343 bool Insert, bool Extract) = 0; 1344 virtual unsigned 1345 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 1346 unsigned VF) = 0; 1347 virtual bool supportsEfficientVectorElementLoadStore() = 0; 1348 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; 1349 virtual MemCmpExpansionOptions 1350 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; 1351 virtual bool enableInterleavedAccessVectorization() = 0; 1352 virtual bool enableMaskedInterleavedAccessVectorization() = 0; 1353 virtual bool isFPVectorizationPotentiallyUnsafe() = 0; 1354 virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, 1355 unsigned BitWidth, 1356 unsigned AddressSpace, 1357 unsigned Alignment, 1358 bool *Fast) = 0; 1359 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; 1360 virtual bool haveFastSqrt(Type *Ty) = 0; 1361 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; 1362 virtual int getFPOpCost(Type *Ty) = 0; 1363 virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, 1364 const APInt &Imm, Type *Ty) = 0; 1365 virtual int getIntImmCost(const APInt &Imm, Type *Ty, 1366 TargetCostKind CostKind) = 0; 1367 virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, 1368 Type *Ty, TargetCostKind CostKind) = 0; 1369 virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 1370 const APInt &Imm, Type *Ty, 1371 TargetCostKind CostKind) = 0; 1372 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; 1373 virtual unsigned getRegisterClassForType(bool Vector, 1374 Type *Ty = nullptr) const = 0; 1375 virtual const char *getRegisterClassName(unsigned ClassID) const = 0; 1376 virtual unsigned getRegisterBitWidth(bool Vector) const = 0; 1377 virtual unsigned getMinVectorRegisterBitWidth() = 0; 1378 virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; 1379 virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; 1380 virtual bool shouldConsiderAddressTypePromotion( 1381 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; 1382 virtual unsigned getCacheLineSize() const = 0; 1383 virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0; 1384 virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0; 1385 1386 /// \return How much before a load we should place the prefetch 1387 /// instruction. This is currently measured in number of 1388 /// instructions. 1389 virtual unsigned getPrefetchDistance() const = 0; 1390 1391 /// \return Some HW prefetchers can handle accesses up to a certain 1392 /// constant stride. This is the minimum stride in bytes where it 1393 /// makes sense to start adding SW prefetches. The default is 1, 1394 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial 1395 /// even below the HW prefetcher limit, and the arguments provided are 1396 /// meant to serve as a basis for deciding this for a particular loop. 1397 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, 1398 unsigned NumStridedMemAccesses, 1399 unsigned NumPrefetches, 1400 bool HasCall) const = 0; 1401 1402 /// \return The maximum number of iterations to prefetch ahead. If 1403 /// the required number of iterations is more than this number, no 1404 /// prefetching is performed. 1405 virtual unsigned getMaxPrefetchIterationsAhead() const = 0; 1406 1407 /// \return True if prefetching should also be done for writes. 1408 virtual bool enableWritePrefetching() const = 0; 1409 1410 virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; 1411 virtual unsigned getArithmeticInstrCost( 1412 unsigned Opcode, Type *Ty, 1413 TTI::TargetCostKind CostKind, 1414 OperandValueKind Opd1Info, 1415 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, 1416 OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, 1417 const Instruction *CxtI = nullptr) = 0; 1418 virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, 1419 VectorType *SubTp) = 0; 1420 virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 1421 TTI::TargetCostKind CostKind, 1422 const Instruction *I) = 0; 1423 virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, 1424 VectorType *VecTy, unsigned Index) = 0; 1425 virtual int getCFInstrCost(unsigned Opcode, 1426 TTI::TargetCostKind CostKind) = 0; 1427 virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 1428 TTI::TargetCostKind CostKind, 1429 const Instruction *I) = 0; 1430 virtual int getVectorInstrCost(unsigned Opcode, Type *Val, 1431 unsigned Index) = 0; 1432 virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1433 unsigned AddressSpace, 1434 TTI::TargetCostKind CostKind, 1435 const Instruction *I) = 0; 1436 virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1437 unsigned AddressSpace, 1438 TTI::TargetCostKind CostKind) = 0; 1439 virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 1440 const Value *Ptr, bool VariableMask, 1441 Align Alignment, 1442 TTI::TargetCostKind CostKind, 1443 const Instruction *I = nullptr) = 0; 1444 1445 virtual int getInterleavedMemoryOpCost( 1446 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 1447 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 1448 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; 1449 virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 1450 bool IsPairwiseForm, 1451 TTI::TargetCostKind CostKind) = 0; 1452 virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 1453 bool IsPairwiseForm, bool IsUnsigned, 1454 TTI::TargetCostKind CostKind) = 0; 1455 virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 1456 TTI::TargetCostKind CostKind) = 0; 1457 virtual int getCallInstrCost(Function *F, Type *RetTy, 1458 ArrayRef<Type *> Tys, 1459 TTI::TargetCostKind CostKind) = 0; 1460 virtual unsigned getNumberOfParts(Type *Tp) = 0; 1461 virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 1462 const SCEV *Ptr) = 0; 1463 virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; 1464 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, 1465 MemIntrinsicInfo &Info) = 0; 1466 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; 1467 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 1468 Type *ExpectedType) = 0; 1469 virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 1470 unsigned SrcAddrSpace, 1471 unsigned DestAddrSpace, 1472 unsigned SrcAlign, 1473 unsigned DestAlign) const = 0; 1474 virtual void getMemcpyLoopResidualLoweringType( 1475 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 1476 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 1477 unsigned SrcAlign, unsigned DestAlign) const = 0; 1478 virtual bool areInlineCompatible(const Function *Caller, 1479 const Function *Callee) const = 0; 1480 virtual bool 1481 areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, 1482 SmallPtrSetImpl<Argument *> &Args) const = 0; 1483 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; 1484 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0; 1485 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; 1486 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0; 1487 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0; 1488 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 1489 Align Alignment, 1490 unsigned AddrSpace) const = 0; 1491 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 1492 Align Alignment, 1493 unsigned AddrSpace) const = 0; 1494 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 1495 unsigned ChainSizeInBytes, 1496 VectorType *VecTy) const = 0; 1497 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 1498 unsigned ChainSizeInBytes, 1499 VectorType *VecTy) const = 0; 1500 virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 1501 ReductionFlags) const = 0; 1502 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; 1503 virtual unsigned getGISelRematGlobalCost() const = 0; 1504 virtual bool hasActiveVectorLength() const = 0; 1505 virtual int getInstructionLatency(const Instruction *I) = 0; 1506 }; 1507 1508 template <typename T> 1509 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { 1510 T Impl; 1511 1512 public: 1513 Model(T Impl) : Impl(std::move(Impl)) {} 1514 ~Model() override {} 1515 1516 const DataLayout &getDataLayout() const override { 1517 return Impl.getDataLayout(); 1518 } 1519 1520 int getGEPCost(Type *PointeeType, const Value *Ptr, 1521 ArrayRef<const Value *> Operands, 1522 enum TargetTransformInfo::TargetCostKind CostKind) override { 1523 return Impl.getGEPCost(PointeeType, Ptr, Operands); 1524 } 1525 unsigned getInliningThresholdMultiplier() override { 1526 return Impl.getInliningThresholdMultiplier(); 1527 } 1528 int getInlinerVectorBonusPercent() override { 1529 return Impl.getInlinerVectorBonusPercent(); 1530 } 1531 int getMemcpyCost(const Instruction *I) override { 1532 return Impl.getMemcpyCost(I); 1533 } 1534 int getUserCost(const User *U, ArrayRef<const Value *> Operands, 1535 TargetCostKind CostKind) override { 1536 return Impl.getUserCost(U, Operands, CostKind); 1537 } 1538 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } 1539 bool useGPUDivergenceAnalysis() override { 1540 return Impl.useGPUDivergenceAnalysis(); 1541 } 1542 bool isSourceOfDivergence(const Value *V) override { 1543 return Impl.isSourceOfDivergence(V); 1544 } 1545 1546 bool isAlwaysUniform(const Value *V) override { 1547 return Impl.isAlwaysUniform(V); 1548 } 1549 1550 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } 1551 1552 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 1553 Intrinsic::ID IID) const override { 1554 return Impl.collectFlatAddressOperands(OpIndexes, IID); 1555 } 1556 1557 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override { 1558 return Impl.isNoopAddrSpaceCast(FromAS, ToAS); 1559 } 1560 1561 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 1562 Value *NewV) const override { 1563 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); 1564 } 1565 1566 bool isLoweredToCall(const Function *F) override { 1567 return Impl.isLoweredToCall(F); 1568 } 1569 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, 1570 UnrollingPreferences &UP) override { 1571 return Impl.getUnrollingPreferences(L, SE, UP); 1572 } 1573 void getPeelingPreferences(Loop *L, ScalarEvolution &SE, 1574 PeelingPreferences &PP) override { 1575 return Impl.getPeelingPreferences(L, SE, PP); 1576 } 1577 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 1578 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 1579 HardwareLoopInfo &HWLoopInfo) override { 1580 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); 1581 } 1582 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, 1583 AssumptionCache &AC, TargetLibraryInfo *TLI, 1584 DominatorTree *DT, 1585 const LoopAccessInfo *LAI) override { 1586 return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); 1587 } 1588 bool emitGetActiveLaneMask() override { 1589 return Impl.emitGetActiveLaneMask(); 1590 } 1591 bool isLegalAddImmediate(int64_t Imm) override { 1592 return Impl.isLegalAddImmediate(Imm); 1593 } 1594 bool isLegalICmpImmediate(int64_t Imm) override { 1595 return Impl.isLegalICmpImmediate(Imm); 1596 } 1597 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 1598 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 1599 Instruction *I) override { 1600 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 1601 AddrSpace, I); 1602 } 1603 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, 1604 TargetTransformInfo::LSRCost &C2) override { 1605 return Impl.isLSRCostLess(C1, C2); 1606 } 1607 bool isProfitableLSRChainElement(Instruction *I) override { 1608 return Impl.isProfitableLSRChainElement(I); 1609 } 1610 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); } 1611 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 1612 DominatorTree *DT, AssumptionCache *AC, 1613 TargetLibraryInfo *LibInfo) override { 1614 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); 1615 } 1616 bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); } 1617 bool shouldFavorBackedgeIndex(const Loop *L) const override { 1618 return Impl.shouldFavorBackedgeIndex(L); 1619 } 1620 bool isLegalMaskedStore(Type *DataType, Align Alignment) override { 1621 return Impl.isLegalMaskedStore(DataType, Alignment); 1622 } 1623 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override { 1624 return Impl.isLegalMaskedLoad(DataType, Alignment); 1625 } 1626 bool isLegalNTStore(Type *DataType, Align Alignment) override { 1627 return Impl.isLegalNTStore(DataType, Alignment); 1628 } 1629 bool isLegalNTLoad(Type *DataType, Align Alignment) override { 1630 return Impl.isLegalNTLoad(DataType, Alignment); 1631 } 1632 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override { 1633 return Impl.isLegalMaskedScatter(DataType, Alignment); 1634 } 1635 bool isLegalMaskedGather(Type *DataType, Align Alignment) override { 1636 return Impl.isLegalMaskedGather(DataType, Alignment); 1637 } 1638 bool isLegalMaskedCompressStore(Type *DataType) override { 1639 return Impl.isLegalMaskedCompressStore(DataType); 1640 } 1641 bool isLegalMaskedExpandLoad(Type *DataType) override { 1642 return Impl.isLegalMaskedExpandLoad(DataType); 1643 } 1644 bool hasDivRemOp(Type *DataType, bool IsSigned) override { 1645 return Impl.hasDivRemOp(DataType, IsSigned); 1646 } 1647 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { 1648 return Impl.hasVolatileVariant(I, AddrSpace); 1649 } 1650 bool prefersVectorizedAddressing() override { 1651 return Impl.prefersVectorizedAddressing(); 1652 } 1653 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 1654 bool HasBaseReg, int64_t Scale, 1655 unsigned AddrSpace) override { 1656 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 1657 AddrSpace); 1658 } 1659 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); } 1660 bool isTruncateFree(Type *Ty1, Type *Ty2) override { 1661 return Impl.isTruncateFree(Ty1, Ty2); 1662 } 1663 bool isProfitableToHoist(Instruction *I) override { 1664 return Impl.isProfitableToHoist(I); 1665 } 1666 bool useAA() override { return Impl.useAA(); } 1667 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } 1668 bool shouldBuildLookupTables() override { 1669 return Impl.shouldBuildLookupTables(); 1670 } 1671 bool shouldBuildLookupTablesForConstant(Constant *C) override { 1672 return Impl.shouldBuildLookupTablesForConstant(C); 1673 } 1674 bool useColdCCForColdCall(Function &F) override { 1675 return Impl.useColdCCForColdCall(F); 1676 } 1677 1678 unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, 1679 bool Insert, bool Extract) override { 1680 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); 1681 } 1682 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 1683 unsigned VF) override { 1684 return Impl.getOperandsScalarizationOverhead(Args, VF); 1685 } 1686 1687 bool supportsEfficientVectorElementLoadStore() override { 1688 return Impl.supportsEfficientVectorElementLoadStore(); 1689 } 1690 1691 bool enableAggressiveInterleaving(bool LoopHasReductions) override { 1692 return Impl.enableAggressiveInterleaving(LoopHasReductions); 1693 } 1694 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 1695 bool IsZeroCmp) const override { 1696 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp); 1697 } 1698 bool enableInterleavedAccessVectorization() override { 1699 return Impl.enableInterleavedAccessVectorization(); 1700 } 1701 bool enableMaskedInterleavedAccessVectorization() override { 1702 return Impl.enableMaskedInterleavedAccessVectorization(); 1703 } 1704 bool isFPVectorizationPotentiallyUnsafe() override { 1705 return Impl.isFPVectorizationPotentiallyUnsafe(); 1706 } 1707 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 1708 unsigned AddressSpace, unsigned Alignment, 1709 bool *Fast) override { 1710 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, 1711 Alignment, Fast); 1712 } 1713 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { 1714 return Impl.getPopcntSupport(IntTyWidthInBit); 1715 } 1716 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } 1717 1718 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override { 1719 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); 1720 } 1721 1722 int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); } 1723 1724 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, 1725 Type *Ty) override { 1726 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); 1727 } 1728 int getIntImmCost(const APInt &Imm, Type *Ty, 1729 TargetCostKind CostKind) override { 1730 return Impl.getIntImmCost(Imm, Ty, CostKind); 1731 } 1732 int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, 1733 Type *Ty, TargetCostKind CostKind) override { 1734 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind); 1735 } 1736 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 1737 Type *Ty, TargetCostKind CostKind) override { 1738 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); 1739 } 1740 unsigned getNumberOfRegisters(unsigned ClassID) const override { 1741 return Impl.getNumberOfRegisters(ClassID); 1742 } 1743 unsigned getRegisterClassForType(bool Vector, 1744 Type *Ty = nullptr) const override { 1745 return Impl.getRegisterClassForType(Vector, Ty); 1746 } 1747 const char *getRegisterClassName(unsigned ClassID) const override { 1748 return Impl.getRegisterClassName(ClassID); 1749 } 1750 unsigned getRegisterBitWidth(bool Vector) const override { 1751 return Impl.getRegisterBitWidth(Vector); 1752 } 1753 unsigned getMinVectorRegisterBitWidth() override { 1754 return Impl.getMinVectorRegisterBitWidth(); 1755 } 1756 bool shouldMaximizeVectorBandwidth(bool OptSize) const override { 1757 return Impl.shouldMaximizeVectorBandwidth(OptSize); 1758 } 1759 unsigned getMinimumVF(unsigned ElemWidth) const override { 1760 return Impl.getMinimumVF(ElemWidth); 1761 } 1762 bool shouldConsiderAddressTypePromotion( 1763 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { 1764 return Impl.shouldConsiderAddressTypePromotion( 1765 I, AllowPromotionWithoutCommonHeader); 1766 } 1767 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } 1768 Optional<unsigned> getCacheSize(CacheLevel Level) const override { 1769 return Impl.getCacheSize(Level); 1770 } 1771 Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override { 1772 return Impl.getCacheAssociativity(Level); 1773 } 1774 1775 /// Return the preferred prefetch distance in terms of instructions. 1776 /// 1777 unsigned getPrefetchDistance() const override { 1778 return Impl.getPrefetchDistance(); 1779 } 1780 1781 /// Return the minimum stride necessary to trigger software 1782 /// prefetching. 1783 /// 1784 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 1785 unsigned NumStridedMemAccesses, 1786 unsigned NumPrefetches, 1787 bool HasCall) const override { 1788 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, 1789 NumPrefetches, HasCall); 1790 } 1791 1792 /// Return the maximum prefetch distance in terms of loop 1793 /// iterations. 1794 /// 1795 unsigned getMaxPrefetchIterationsAhead() const override { 1796 return Impl.getMaxPrefetchIterationsAhead(); 1797 } 1798 1799 /// \return True if prefetching should also be done for writes. 1800 bool enableWritePrefetching() const override { 1801 return Impl.enableWritePrefetching(); 1802 } 1803 1804 unsigned getMaxInterleaveFactor(unsigned VF) override { 1805 return Impl.getMaxInterleaveFactor(VF); 1806 } 1807 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 1808 unsigned &JTSize, 1809 ProfileSummaryInfo *PSI, 1810 BlockFrequencyInfo *BFI) override { 1811 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); 1812 } 1813 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 1814 TTI::TargetCostKind CostKind, 1815 OperandValueKind Opd1Info, 1816 OperandValueKind Opd2Info, 1817 OperandValueProperties Opd1PropInfo, 1818 OperandValueProperties Opd2PropInfo, 1819 ArrayRef<const Value *> Args, 1820 const Instruction *CxtI = nullptr) override { 1821 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, 1822 Opd1PropInfo, Opd2PropInfo, Args, CxtI); 1823 } 1824 int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, 1825 VectorType *SubTp) override { 1826 return Impl.getShuffleCost(Kind, Tp, Index, SubTp); 1827 } 1828 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 1829 TTI::TargetCostKind CostKind, 1830 const Instruction *I) override { 1831 return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I); 1832 } 1833 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, 1834 unsigned Index) override { 1835 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); 1836 } 1837 int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override { 1838 return Impl.getCFInstrCost(Opcode, CostKind); 1839 } 1840 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 1841 TTI::TargetCostKind CostKind, 1842 const Instruction *I) override { 1843 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); 1844 } 1845 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { 1846 return Impl.getVectorInstrCost(Opcode, Val, Index); 1847 } 1848 int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1849 unsigned AddressSpace, TTI::TargetCostKind CostKind, 1850 const Instruction *I) override { 1851 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 1852 CostKind, I); 1853 } 1854 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 1855 unsigned AddressSpace, 1856 TTI::TargetCostKind CostKind) override { 1857 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, 1858 CostKind); 1859 } 1860 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, 1861 bool VariableMask, Align Alignment, 1862 TTI::TargetCostKind CostKind, 1863 const Instruction *I = nullptr) override { 1864 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, 1865 Alignment, CostKind, I); 1866 } 1867 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, 1868 ArrayRef<unsigned> Indices, Align Alignment, 1869 unsigned AddressSpace, 1870 TTI::TargetCostKind CostKind, 1871 bool UseMaskForCond, 1872 bool UseMaskForGaps) override { 1873 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, 1874 Alignment, AddressSpace, CostKind, 1875 UseMaskForCond, UseMaskForGaps); 1876 } 1877 int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, 1878 bool IsPairwiseForm, 1879 TTI::TargetCostKind CostKind) override { 1880 return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, 1881 CostKind); 1882 } 1883 int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, 1884 bool IsPairwiseForm, bool IsUnsigned, 1885 TTI::TargetCostKind CostKind) override { 1886 return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, 1887 CostKind); 1888 } 1889 int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 1890 TTI::TargetCostKind CostKind) override { 1891 return Impl.getIntrinsicInstrCost(ICA, CostKind); 1892 } 1893 int getCallInstrCost(Function *F, Type *RetTy, 1894 ArrayRef<Type *> Tys, 1895 TTI::TargetCostKind CostKind) override { 1896 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind); 1897 } 1898 unsigned getNumberOfParts(Type *Tp) override { 1899 return Impl.getNumberOfParts(Tp); 1900 } 1901 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, 1902 const SCEV *Ptr) override { 1903 return Impl.getAddressComputationCost(Ty, SE, Ptr); 1904 } 1905 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { 1906 return Impl.getCostOfKeepingLiveOverCall(Tys); 1907 } 1908 bool getTgtMemIntrinsic(IntrinsicInst *Inst, 1909 MemIntrinsicInfo &Info) override { 1910 return Impl.getTgtMemIntrinsic(Inst, Info); 1911 } 1912 unsigned getAtomicMemIntrinsicMaxElementSize() const override { 1913 return Impl.getAtomicMemIntrinsicMaxElementSize(); 1914 } 1915 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 1916 Type *ExpectedType) override { 1917 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); 1918 } 1919 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 1920 unsigned SrcAddrSpace, unsigned DestAddrSpace, 1921 unsigned SrcAlign, 1922 unsigned DestAlign) const override { 1923 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, 1924 DestAddrSpace, SrcAlign, DestAlign); 1925 } 1926 void getMemcpyLoopResidualLoweringType( 1927 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 1928 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 1929 unsigned SrcAlign, unsigned DestAlign) const override { 1930 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, 1931 SrcAddrSpace, DestAddrSpace, 1932 SrcAlign, DestAlign); 1933 } 1934 bool areInlineCompatible(const Function *Caller, 1935 const Function *Callee) const override { 1936 return Impl.areInlineCompatible(Caller, Callee); 1937 } 1938 bool areFunctionArgsABICompatible( 1939 const Function *Caller, const Function *Callee, 1940 SmallPtrSetImpl<Argument *> &Args) const override { 1941 return Impl.areFunctionArgsABICompatible(Caller, Callee, Args); 1942 } 1943 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { 1944 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); 1945 } 1946 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override { 1947 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout()); 1948 } 1949 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override { 1950 return Impl.getLoadStoreVecRegBitWidth(AddrSpace); 1951 } 1952 bool isLegalToVectorizeLoad(LoadInst *LI) const override { 1953 return Impl.isLegalToVectorizeLoad(LI); 1954 } 1955 bool isLegalToVectorizeStore(StoreInst *SI) const override { 1956 return Impl.isLegalToVectorizeStore(SI); 1957 } 1958 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 1959 unsigned AddrSpace) const override { 1960 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, 1961 AddrSpace); 1962 } 1963 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 1964 unsigned AddrSpace) const override { 1965 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, 1966 AddrSpace); 1967 } 1968 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 1969 unsigned ChainSizeInBytes, 1970 VectorType *VecTy) const override { 1971 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); 1972 } 1973 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 1974 unsigned ChainSizeInBytes, 1975 VectorType *VecTy) const override { 1976 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); 1977 } 1978 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 1979 ReductionFlags Flags) const override { 1980 return Impl.useReductionIntrinsic(Opcode, Ty, Flags); 1981 } 1982 bool shouldExpandReduction(const IntrinsicInst *II) const override { 1983 return Impl.shouldExpandReduction(II); 1984 } 1985 1986 unsigned getGISelRematGlobalCost() const override { 1987 return Impl.getGISelRematGlobalCost(); 1988 } 1989 1990 bool hasActiveVectorLength() const override { 1991 return Impl.hasActiveVectorLength(); 1992 } 1993 1994 int getInstructionLatency(const Instruction *I) override { 1995 return Impl.getInstructionLatency(I); 1996 } 1997 }; 1998 1999 template <typename T> 2000 TargetTransformInfo::TargetTransformInfo(T Impl) 2001 : TTIImpl(new Model<T>(Impl)) {} 2002 2003 /// Analysis pass providing the \c TargetTransformInfo. 2004 /// 2005 /// The core idea of the TargetIRAnalysis is to expose an interface through 2006 /// which LLVM targets can analyze and provide information about the middle 2007 /// end's target-independent IR. This supports use cases such as target-aware 2008 /// cost modeling of IR constructs. 2009 /// 2010 /// This is a function analysis because much of the cost modeling for targets 2011 /// is done in a subtarget specific way and LLVM supports compiling different 2012 /// functions targeting different subtargets in order to support runtime 2013 /// dispatch according to the observed subtarget. 2014 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { 2015 public: 2016 typedef TargetTransformInfo Result; 2017 2018 /// Default construct a target IR analysis. 2019 /// 2020 /// This will use the module's datalayout to construct a baseline 2021 /// conservative TTI result. 2022 TargetIRAnalysis(); 2023 2024 /// Construct an IR analysis pass around a target-provide callback. 2025 /// 2026 /// The callback will be called with a particular function for which the TTI 2027 /// is needed and must return a TTI object for that function. 2028 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback); 2029 2030 // Value semantics. We spell out the constructors for MSVC. 2031 TargetIRAnalysis(const TargetIRAnalysis &Arg) 2032 : TTICallback(Arg.TTICallback) {} 2033 TargetIRAnalysis(TargetIRAnalysis &&Arg) 2034 : TTICallback(std::move(Arg.TTICallback)) {} 2035 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) { 2036 TTICallback = RHS.TTICallback; 2037 return *this; 2038 } 2039 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) { 2040 TTICallback = std::move(RHS.TTICallback); 2041 return *this; 2042 } 2043 2044 Result run(const Function &F, FunctionAnalysisManager &); 2045 2046 private: 2047 friend AnalysisInfoMixin<TargetIRAnalysis>; 2048 static AnalysisKey Key; 2049 2050 /// The callback used to produce a result. 2051 /// 2052 /// We use a completely opaque callback so that targets can provide whatever 2053 /// mechanism they desire for constructing the TTI for a given function. 2054 /// 2055 /// FIXME: Should we really use std::function? It's relatively inefficient. 2056 /// It might be possible to arrange for even stateful callbacks to outlive 2057 /// the analysis and thus use a function_ref which would be lighter weight. 2058 /// This may also be less error prone as the callback is likely to reference 2059 /// the external TargetMachine, and that reference needs to never dangle. 2060 std::function<Result(const Function &)> TTICallback; 2061 2062 /// Helper function used as the callback in the default constructor. 2063 static Result getDefaultTTI(const Function &F); 2064 }; 2065 2066 /// Wrapper pass for TargetTransformInfo. 2067 /// 2068 /// This pass can be constructed from a TTI object which it stores internally 2069 /// and is queried by passes. 2070 class TargetTransformInfoWrapperPass : public ImmutablePass { 2071 TargetIRAnalysis TIRA; 2072 Optional<TargetTransformInfo> TTI; 2073 2074 virtual void anchor(); 2075 2076 public: 2077 static char ID; 2078 2079 /// We must provide a default constructor for the pass but it should 2080 /// never be used. 2081 /// 2082 /// Use the constructor below or call one of the creation routines. 2083 TargetTransformInfoWrapperPass(); 2084 2085 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 2086 2087 TargetTransformInfo &getTTI(const Function &F); 2088 }; 2089 2090 /// Create an analysis pass wrapper around a TTI object. 2091 /// 2092 /// This analysis pass just holds the TTI instance and makes it available to 2093 /// clients. 2094 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); 2095 2096 } // namespace llvm 2097 2098 #endif 2099