1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 /// information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24 #include "llvm/IR/InstrTypes.h"
25 #include "llvm/IR/Operator.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/Pass.h"
28 #include "llvm/Support/AtomicOrdering.h"
29 #include "llvm/Support/DataTypes.h"
30 #include "llvm/Support/InstructionCost.h"
31 #include <functional>
32
33 namespace llvm {
34
35 namespace Intrinsic {
36 typedef unsigned ID;
37 }
38
39 class AssumptionCache;
40 class BlockFrequencyInfo;
41 class DominatorTree;
42 class BranchInst;
43 class CallBase;
44 class ExtractElementInst;
45 class Function;
46 class GlobalValue;
47 class InstCombiner;
48 class IntrinsicInst;
49 class LoadInst;
50 class LoopAccessInfo;
51 class Loop;
52 class LoopInfo;
53 class ProfileSummaryInfo;
54 class SCEV;
55 class ScalarEvolution;
56 class StoreInst;
57 class SwitchInst;
58 class TargetLibraryInfo;
59 class Type;
60 class User;
61 class Value;
62 struct KnownBits;
63 template <typename T> class Optional;
64
65 /// Information about a load/store intrinsic defined by the target.
66 struct MemIntrinsicInfo {
67 /// This is the pointer that the intrinsic is loading from or storing to.
68 /// If this is non-null, then analysis/optimization passes can assume that
69 /// this intrinsic is functionally equivalent to a load/store from this
70 /// pointer.
71 Value *PtrVal = nullptr;
72
73 // Ordering for atomic operations.
74 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
75
76 // Same Id is set by the target for corresponding load/store intrinsics.
77 unsigned short MatchingId = 0;
78
79 bool ReadMem = false;
80 bool WriteMem = false;
81 bool IsVolatile = false;
82
isUnorderedMemIntrinsicInfo83 bool isUnordered() const {
84 return (Ordering == AtomicOrdering::NotAtomic ||
85 Ordering == AtomicOrdering::Unordered) &&
86 !IsVolatile;
87 }
88 };
89
90 /// Attributes of a target dependent hardware loop.
91 struct HardwareLoopInfo {
92 HardwareLoopInfo() = delete;
HardwareLoopInfoHardwareLoopInfo93 HardwareLoopInfo(Loop *L) : L(L) {}
94 Loop *L = nullptr;
95 BasicBlock *ExitBlock = nullptr;
96 BranchInst *ExitBranch = nullptr;
97 const SCEV *TripCount = nullptr;
98 IntegerType *CountType = nullptr;
99 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
100 // value in every iteration.
101 bool IsNestingLegal = false; // Can a hardware loop be a parent to
102 // another hardware loop?
103 bool CounterInReg = false; // Should loop counter be updated in
104 // the loop via a phi?
105 bool PerformEntryTest = false; // Generate the intrinsic which also performs
106 // icmp ne zero on the loop counter value and
107 // produces an i1 to guard the loop entry.
108 bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
109 DominatorTree &DT, bool ForceNestedLoop = false,
110 bool ForceHardwareLoopPHI = false);
111 bool canAnalyze(LoopInfo &LI);
112 };
113
114 class IntrinsicCostAttributes {
115 const IntrinsicInst *II = nullptr;
116 Type *RetTy = nullptr;
117 Intrinsic::ID IID;
118 SmallVector<Type *, 4> ParamTys;
119 SmallVector<const Value *, 4> Arguments;
120 FastMathFlags FMF;
121 ElementCount VF = ElementCount::getFixed(1);
122 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
123 // arguments and the return value will be computed based on types.
124 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
125
126 public:
127 IntrinsicCostAttributes(const IntrinsicInst &I);
128
129 IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
130
131 IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
132 ElementCount Factor);
133
134 IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
135 ElementCount Factor, unsigned ScalarCost);
136
137 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
138 ArrayRef<Type *> Tys, FastMathFlags Flags);
139
140 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
141 ArrayRef<Type *> Tys, FastMathFlags Flags,
142 unsigned ScalarCost);
143
144 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
145 ArrayRef<Type *> Tys, FastMathFlags Flags,
146 unsigned ScalarCost,
147 const IntrinsicInst *I);
148
149 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
150 ArrayRef<Type *> Tys);
151
152 IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
153 ArrayRef<const Value *> Args);
154
getID()155 Intrinsic::ID getID() const { return IID; }
getInst()156 const IntrinsicInst *getInst() const { return II; }
getReturnType()157 Type *getReturnType() const { return RetTy; }
getVectorFactor()158 ElementCount getVectorFactor() const { return VF; }
getFlags()159 FastMathFlags getFlags() const { return FMF; }
getScalarizationCost()160 unsigned getScalarizationCost() const { return ScalarizationCost; }
getArgs()161 const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
getArgTypes()162 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
163
isTypeBasedOnly()164 bool isTypeBasedOnly() const {
165 return Arguments.empty();
166 }
167
skipScalarizationCost()168 bool skipScalarizationCost() const {
169 return ScalarizationCost != std::numeric_limits<unsigned>::max();
170 }
171 };
172
173 class TargetTransformInfo;
174 typedef TargetTransformInfo TTI;
175
176 /// This pass provides access to the codegen interfaces that are needed
177 /// for IR-level transformations.
178 class TargetTransformInfo {
179 public:
180 /// Construct a TTI object using a type implementing the \c Concept
181 /// API below.
182 ///
183 /// This is used by targets to construct a TTI wrapping their target-specific
184 /// implementation that encodes appropriate costs for their target.
185 template <typename T> TargetTransformInfo(T Impl);
186
187 /// Construct a baseline TTI object using a minimal implementation of
188 /// the \c Concept API below.
189 ///
190 /// The TTI implementation will reflect the information in the DataLayout
191 /// provided if non-null.
192 explicit TargetTransformInfo(const DataLayout &DL);
193
194 // Provide move semantics.
195 TargetTransformInfo(TargetTransformInfo &&Arg);
196 TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
197
198 // We need to define the destructor out-of-line to define our sub-classes
199 // out-of-line.
200 ~TargetTransformInfo();
201
202 /// Handle the invalidation of this information.
203 ///
204 /// When used as a result of \c TargetIRAnalysis this method will be called
205 /// when the function this was computed for changes. When it returns false,
206 /// the information is preserved across those changes.
invalidate(Function &,const PreservedAnalyses &,FunctionAnalysisManager::Invalidator &)207 bool invalidate(Function &, const PreservedAnalyses &,
208 FunctionAnalysisManager::Invalidator &) {
209 // FIXME: We should probably in some way ensure that the subtarget
210 // information for a function hasn't changed.
211 return false;
212 }
213
214 /// \name Generic Target Information
215 /// @{
216
217 /// The kind of cost model.
218 ///
219 /// There are several different cost models that can be customized by the
220 /// target. The normalization of each cost model may be target specific.
221 enum TargetCostKind {
222 TCK_RecipThroughput, ///< Reciprocal throughput.
223 TCK_Latency, ///< The latency of instruction.
224 TCK_CodeSize, ///< Instruction code size.
225 TCK_SizeAndLatency ///< The weighted sum of size and latency.
226 };
227
228 /// Query the cost of a specified instruction.
229 ///
230 /// Clients should use this interface to query the cost of an existing
231 /// instruction. The instruction must have a valid parent (basic block).
232 ///
233 /// Note, this method does not cache the cost calculation and it
234 /// can be expensive in some cases.
getInstructionCost(const Instruction * I,enum TargetCostKind kind)235 InstructionCost getInstructionCost(const Instruction *I,
236 enum TargetCostKind kind) const {
237 InstructionCost Cost;
238 switch (kind) {
239 case TCK_RecipThroughput:
240 Cost = getInstructionThroughput(I);
241 break;
242 case TCK_Latency:
243 Cost = getInstructionLatency(I);
244 break;
245 case TCK_CodeSize:
246 case TCK_SizeAndLatency:
247 Cost = getUserCost(I, kind);
248 break;
249 }
250 if (Cost == -1)
251 Cost.setInvalid();
252 return Cost;
253 }
254
255 /// Underlying constants for 'cost' values in this interface.
256 ///
257 /// Many APIs in this interface return a cost. This enum defines the
258 /// fundamental values that should be used to interpret (and produce) those
259 /// costs. The costs are returned as an int rather than a member of this
260 /// enumeration because it is expected that the cost of one IR instruction
261 /// may have a multiplicative factor to it or otherwise won't fit directly
262 /// into the enum. Moreover, it is common to sum or average costs which works
263 /// better as simple integral values. Thus this enum only provides constants.
264 /// Also note that the returned costs are signed integers to make it natural
265 /// to add, subtract, and test with zero (a common boundary condition). It is
266 /// not expected that 2^32 is a realistic cost to be modeling at any point.
267 ///
268 /// Note that these costs should usually reflect the intersection of code-size
269 /// cost and execution cost. A free instruction is typically one that folds
270 /// into another instruction. For example, reg-to-reg moves can often be
271 /// skipped by renaming the registers in the CPU, but they still are encoded
272 /// and thus wouldn't be considered 'free' here.
273 enum TargetCostConstants {
274 TCC_Free = 0, ///< Expected to fold away in lowering.
275 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
276 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
277 };
278
279 /// Estimate the cost of a GEP operation when lowered.
280 int getGEPCost(Type *PointeeType, const Value *Ptr,
281 ArrayRef<const Value *> Operands,
282 TargetCostKind CostKind = TCK_SizeAndLatency) const;
283
284 /// \returns A value by which our inlining threshold should be multiplied.
285 /// This is primarily used to bump up the inlining threshold wholesale on
286 /// targets where calls are unusually expensive.
287 ///
288 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
289 /// individual classes of instructions would be better.
290 unsigned getInliningThresholdMultiplier() const;
291
292 /// \returns A value to be added to the inlining threshold.
293 unsigned adjustInliningThreshold(const CallBase *CB) const;
294
295 /// \returns Vector bonus in percent.
296 ///
297 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
298 /// and apply this bonus based on the percentage of vector instructions. A
299 /// bonus is applied if the vector instructions exceed 50% and half that
300 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
301 /// arbitrary and evolved over time by accident as much as because they are
302 /// principled bonuses.
303 /// FIXME: It would be nice to base the bonus values on something more
304 /// scientific. A target may has no bonus on vector instructions.
305 int getInlinerVectorBonusPercent() const;
306
307 /// \return the expected cost of a memcpy, which could e.g. depend on the
308 /// source/destination type and alignment and the number of bytes copied.
309 int getMemcpyCost(const Instruction *I) const;
310
311 /// \return The estimated number of case clusters when lowering \p 'SI'.
312 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
313 /// table.
314 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
315 unsigned &JTSize,
316 ProfileSummaryInfo *PSI,
317 BlockFrequencyInfo *BFI) const;
318
319 /// Estimate the cost of a given IR user when lowered.
320 ///
321 /// This can estimate the cost of either a ConstantExpr or Instruction when
322 /// lowered.
323 ///
324 /// \p Operands is a list of operands which can be a result of transformations
325 /// of the current operands. The number of the operands on the list must equal
326 /// to the number of the current operands the IR user has. Their order on the
327 /// list must be the same as the order of the current operands the IR user
328 /// has.
329 ///
330 /// The returned cost is defined in terms of \c TargetCostConstants, see its
331 /// comments for a detailed explanation of the cost values.
332 int getUserCost(const User *U, ArrayRef<const Value *> Operands,
333 TargetCostKind CostKind) const;
334
335 /// This is a helper function which calls the two-argument getUserCost
336 /// with \p Operands which are the current operands U has.
getUserCost(const User * U,TargetCostKind CostKind)337 int getUserCost(const User *U, TargetCostKind CostKind) const {
338 SmallVector<const Value *, 4> Operands(U->operand_values());
339 return getUserCost(U, Operands, CostKind);
340 }
341
342 /// Return true if branch divergence exists.
343 ///
344 /// Branch divergence has a significantly negative impact on GPU performance
345 /// when threads in the same wavefront take different paths due to conditional
346 /// branches.
347 bool hasBranchDivergence() const;
348
349 /// Return true if the target prefers to use GPU divergence analysis to
350 /// replace the legacy version.
351 bool useGPUDivergenceAnalysis() const;
352
353 /// Returns whether V is a source of divergence.
354 ///
355 /// This function provides the target-dependent information for
356 /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
357 /// first builds the dependency graph, and then runs the reachability
358 /// algorithm starting with the sources of divergence.
359 bool isSourceOfDivergence(const Value *V) const;
360
361 // Returns true for the target specific
362 // set of operations which produce uniform result
363 // even taking non-uniform arguments
364 bool isAlwaysUniform(const Value *V) const;
365
366 /// Returns the address space ID for a target's 'flat' address space. Note
367 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
368 /// refers to as the generic address space. The flat address space is a
369 /// generic address space that can be used access multiple segments of memory
370 /// with different address spaces. Access of a memory location through a
371 /// pointer with this address space is expected to be legal but slower
372 /// compared to the same memory location accessed through a pointer with a
373 /// different address space.
374 //
375 /// This is for targets with different pointer representations which can
376 /// be converted with the addrspacecast instruction. If a pointer is converted
377 /// to this address space, optimizations should attempt to replace the access
378 /// with the source address space.
379 ///
380 /// \returns ~0u if the target does not have such a flat address space to
381 /// optimize away.
382 unsigned getFlatAddressSpace() const;
383
384 /// Return any intrinsic address operand indexes which may be rewritten if
385 /// they use a flat address space pointer.
386 ///
387 /// \returns true if the intrinsic was handled.
388 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
389 Intrinsic::ID IID) const;
390
391 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
392
393 unsigned getAssumedAddrSpace(const Value *V) const;
394
395 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
396 /// NewV, which has a different address space. This should happen for every
397 /// operand index that collectFlatAddressOperands returned for the intrinsic.
398 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
399 /// new value (which may be the original \p II with modified operands).
400 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
401 Value *NewV) const;
402
403 /// Test whether calls to a function lower to actual program function
404 /// calls.
405 ///
406 /// The idea is to test whether the program is likely to require a 'call'
407 /// instruction or equivalent in order to call the given function.
408 ///
409 /// FIXME: It's not clear that this is a good or useful query API. Client's
410 /// should probably move to simpler cost metrics using the above.
411 /// Alternatively, we could split the cost interface into distinct code-size
412 /// and execution-speed costs. This would allow modelling the core of this
413 /// query more accurately as a call is a single small instruction, but
414 /// incurs significant execution cost.
415 bool isLoweredToCall(const Function *F) const;
416
417 struct LSRCost {
418 /// TODO: Some of these could be merged. Also, a lexical ordering
419 /// isn't always optimal.
420 unsigned Insns;
421 unsigned NumRegs;
422 unsigned AddRecCost;
423 unsigned NumIVMuls;
424 unsigned NumBaseAdds;
425 unsigned ImmCost;
426 unsigned SetupCost;
427 unsigned ScaleCost;
428 };
429
430 /// Parameters that control the generic loop unrolling transformation.
431 struct UnrollingPreferences {
432 /// The cost threshold for the unrolled loop. Should be relative to the
433 /// getUserCost values returned by this API, and the expectation is that
434 /// the unrolled loop's instructions when run through that interface should
435 /// not exceed this cost. However, this is only an estimate. Also, specific
436 /// loops may be unrolled even with a cost above this threshold if deemed
437 /// profitable. Set this to UINT_MAX to disable the loop body cost
438 /// restriction.
439 unsigned Threshold;
440 /// If complete unrolling will reduce the cost of the loop, we will boost
441 /// the Threshold by a certain percent to allow more aggressive complete
442 /// unrolling. This value provides the maximum boost percentage that we
443 /// can apply to Threshold (The value should be no less than 100).
444 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
445 /// MaxPercentThresholdBoost / 100)
446 /// E.g. if complete unrolling reduces the loop execution time by 50%
447 /// then we boost the threshold by the factor of 2x. If unrolling is not
448 /// expected to reduce the running time, then we do not increase the
449 /// threshold.
450 unsigned MaxPercentThresholdBoost;
451 /// The cost threshold for the unrolled loop when optimizing for size (set
452 /// to UINT_MAX to disable).
453 unsigned OptSizeThreshold;
454 /// The cost threshold for the unrolled loop, like Threshold, but used
455 /// for partial/runtime unrolling (set to UINT_MAX to disable).
456 unsigned PartialThreshold;
457 /// The cost threshold for the unrolled loop when optimizing for size, like
458 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
459 /// UINT_MAX to disable).
460 unsigned PartialOptSizeThreshold;
461 /// A forced unrolling factor (the number of concatenated bodies of the
462 /// original loop in the unrolled loop body). When set to 0, the unrolling
463 /// transformation will select an unrolling factor based on the current cost
464 /// threshold and other factors.
465 unsigned Count;
466 /// Default unroll count for loops with run-time trip count.
467 unsigned DefaultUnrollRuntimeCount;
468 // Set the maximum unrolling factor. The unrolling factor may be selected
469 // using the appropriate cost threshold, but may not exceed this number
470 // (set to UINT_MAX to disable). This does not apply in cases where the
471 // loop is being fully unrolled.
472 unsigned MaxCount;
473 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
474 /// applies even if full unrolling is selected. This allows a target to fall
475 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
476 unsigned FullUnrollMaxCount;
477 // Represents number of instructions optimized when "back edge"
478 // becomes "fall through" in unrolled loop.
479 // For now we count a conditional branch on a backedge and a comparison
480 // feeding it.
481 unsigned BEInsns;
482 /// Allow partial unrolling (unrolling of loops to expand the size of the
483 /// loop body, not only to eliminate small constant-trip-count loops).
484 bool Partial;
485 /// Allow runtime unrolling (unrolling of loops to expand the size of the
486 /// loop body even when the number of loop iterations is not known at
487 /// compile time).
488 bool Runtime;
489 /// Allow generation of a loop remainder (extra iterations after unroll).
490 bool AllowRemainder;
491 /// Allow emitting expensive instructions (such as divisions) when computing
492 /// the trip count of a loop for runtime unrolling.
493 bool AllowExpensiveTripCount;
494 /// Apply loop unroll on any kind of loop
495 /// (mainly to loops that fail runtime unrolling).
496 bool Force;
497 /// Allow using trip count upper bound to unroll loops.
498 bool UpperBound;
499 /// Allow unrolling of all the iterations of the runtime loop remainder.
500 bool UnrollRemainder;
501 /// Allow unroll and jam. Used to enable unroll and jam for the target.
502 bool UnrollAndJam;
503 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
504 /// value above is used during unroll and jam for the outer loop size.
505 /// This value is used in the same manner to limit the size of the inner
506 /// loop.
507 unsigned UnrollAndJamInnerLoopThreshold;
508 /// Don't allow loop unrolling to simulate more than this number of
509 /// iterations when checking full unroll profitability
510 unsigned MaxIterationsCountToAnalyze;
511 };
512
513 /// Get target-customized preferences for the generic loop unrolling
514 /// transformation. The caller will initialize UP with the current
515 /// target-independent defaults.
516 void getUnrollingPreferences(Loop *L, ScalarEvolution &,
517 UnrollingPreferences &UP) const;
518
519 /// Query the target whether it would be profitable to convert the given loop
520 /// into a hardware loop.
521 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
522 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
523 HardwareLoopInfo &HWLoopInfo) const;
524
525 /// Query the target whether it would be prefered to create a predicated
526 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
527 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
528 AssumptionCache &AC, TargetLibraryInfo *TLI,
529 DominatorTree *DT,
530 const LoopAccessInfo *LAI) const;
531
532 /// Query the target whether lowering of the llvm.get.active.lane.mask
533 /// intrinsic is supported.
534 bool emitGetActiveLaneMask() const;
535
536 // Parameters that control the loop peeling transformation
537 struct PeelingPreferences {
538 /// A forced peeling factor (the number of bodied of the original loop
539 /// that should be peeled off before the loop body). When set to 0, the
540 /// a peeling factor based on profile information and other factors.
541 unsigned PeelCount;
542 /// Allow peeling off loop iterations.
543 bool AllowPeeling;
544 /// Allow peeling off loop iterations for loop nests.
545 bool AllowLoopNestsPeeling;
546 /// Allow peeling basing on profile. Uses to enable peeling off all
547 /// iterations basing on provided profile.
548 /// If the value is true the peeling cost model can decide to peel only
549 /// some iterations and in this case it will set this to false.
550 bool PeelProfiledIterations;
551 };
552
553 /// Get target-customized preferences for the generic loop peeling
554 /// transformation. The caller will initialize \p PP with the current
555 /// target-independent defaults with information from \p L and \p SE.
556 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
557 PeelingPreferences &PP) const;
558
559 /// Targets can implement their own combinations for target-specific
560 /// intrinsics. This function will be called from the InstCombine pass every
561 /// time a target-specific intrinsic is encountered.
562 ///
563 /// \returns None to not do anything target specific or a value that will be
564 /// returned from the InstCombiner. It is possible to return null and stop
565 /// further processing of the intrinsic by returning nullptr.
566 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
567 IntrinsicInst &II) const;
568 /// Can be used to implement target-specific instruction combining.
569 /// \see instCombineIntrinsic
570 Optional<Value *>
571 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
572 APInt DemandedMask, KnownBits &Known,
573 bool &KnownBitsComputed) const;
574 /// Can be used to implement target-specific instruction combining.
575 /// \see instCombineIntrinsic
576 Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
577 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
578 APInt &UndefElts2, APInt &UndefElts3,
579 std::function<void(Instruction *, unsigned, APInt, APInt &)>
580 SimplifyAndSetOp) const;
581 /// @}
582
583 /// \name Scalar Target Information
584 /// @{
585
586 /// Flags indicating the kind of support for population count.
587 ///
588 /// Compared to the SW implementation, HW support is supposed to
589 /// significantly boost the performance when the population is dense, and it
590 /// may or may not degrade performance if the population is sparse. A HW
591 /// support is considered as "Fast" if it can outperform, or is on a par
592 /// with, SW implementation when the population is sparse; otherwise, it is
593 /// considered as "Slow".
594 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
595
596 /// Return true if the specified immediate is legal add immediate, that
597 /// is the target has add instructions which can add a register with the
598 /// immediate without having to materialize the immediate into a register.
599 bool isLegalAddImmediate(int64_t Imm) const;
600
601 /// Return true if the specified immediate is legal icmp immediate,
602 /// that is the target has icmp instructions which can compare a register
603 /// against the immediate without having to materialize the immediate into a
604 /// register.
605 bool isLegalICmpImmediate(int64_t Imm) const;
606
607 /// Return true if the addressing mode represented by AM is legal for
608 /// this target, for a load/store of the specified type.
609 /// The type may be VoidTy, in which case only return true if the addressing
610 /// mode is legal for a load/store of any legal type.
611 /// If target returns true in LSRWithInstrQueries(), I may be valid.
612 /// TODO: Handle pre/postinc as well.
613 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
614 bool HasBaseReg, int64_t Scale,
615 unsigned AddrSpace = 0,
616 Instruction *I = nullptr) const;
617
618 /// Return true if LSR cost of C1 is lower than C1.
619 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
620 TargetTransformInfo::LSRCost &C2) const;
621
622 /// Return true if LSR major cost is number of registers. Targets which
623 /// implement their own isLSRCostLess and unset number of registers as major
624 /// cost should return false, otherwise return true.
625 bool isNumRegsMajorCostOfLSR() const;
626
627 /// \returns true if LSR should not optimize a chain that includes \p I.
628 bool isProfitableLSRChainElement(Instruction *I) const;
629
630 /// Return true if the target can fuse a compare and branch.
631 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
632 /// calculation for the instructions in a loop.
633 bool canMacroFuseCmp() const;
634
635 /// Return true if the target can save a compare for loop count, for example
636 /// hardware loop saves a compare.
637 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
638 DominatorTree *DT, AssumptionCache *AC,
639 TargetLibraryInfo *LibInfo) const;
640
641 /// \return True is LSR should make efforts to create/preserve post-inc
642 /// addressing mode expressions.
643 bool shouldFavorPostInc() const;
644
645 /// Return true if LSR should make efforts to generate indexed addressing
646 /// modes that operate across loop iterations.
647 bool shouldFavorBackedgeIndex(const Loop *L) const;
648
649 /// Return true if the target supports masked store.
650 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
651 /// Return true if the target supports masked load.
652 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
653
654 /// Return true if the target supports nontemporal store.
655 bool isLegalNTStore(Type *DataType, Align Alignment) const;
656 /// Return true if the target supports nontemporal load.
657 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
658
659 /// Return true if the target supports masked scatter.
660 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
661 /// Return true if the target supports masked gather.
662 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
663
664 /// Return true if the target supports masked compress store.
665 bool isLegalMaskedCompressStore(Type *DataType) const;
666 /// Return true if the target supports masked expand load.
667 bool isLegalMaskedExpandLoad(Type *DataType) const;
668
669 /// Return true if the target has a unified operation to calculate division
670 /// and remainder. If so, the additional implicit multiplication and
671 /// subtraction required to calculate a remainder from division are free. This
672 /// can enable more aggressive transformations for division and remainder than
673 /// would typically be allowed using throughput or size cost models.
674 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
675
676 /// Return true if the given instruction (assumed to be a memory access
677 /// instruction) has a volatile variant. If that's the case then we can avoid
678 /// addrspacecast to generic AS for volatile loads/stores. Default
679 /// implementation returns false, which prevents address space inference for
680 /// volatile loads/stores.
681 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
682
683 /// Return true if target doesn't mind addresses in vectors.
684 bool prefersVectorizedAddressing() const;
685
686 /// Return the cost of the scaling factor used in the addressing
687 /// mode represented by AM for this target, for a load/store
688 /// of the specified type.
689 /// If the AM is supported, the return value must be >= 0.
690 /// If the AM is not supported, it returns a negative value.
691 /// TODO: Handle pre/postinc as well.
692 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
693 bool HasBaseReg, int64_t Scale,
694 unsigned AddrSpace = 0) const;
695
696 /// Return true if the loop strength reduce pass should make
697 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
698 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
699 /// immediate offset and no index register.
700 bool LSRWithInstrQueries() const;
701
702 /// Return true if it's free to truncate a value of type Ty1 to type
703 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
704 /// by referencing its sub-register AX.
705 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
706
707 /// Return true if it is profitable to hoist instruction in the
708 /// then/else to before if.
709 bool isProfitableToHoist(Instruction *I) const;
710
711 bool useAA() const;
712
713 /// Return true if this type is legal.
714 bool isTypeLegal(Type *Ty) const;
715
716 /// Returns the estimated number of registers required to represent \p Ty.
717 unsigned getRegUsageForType(Type *Ty) const;
718
719 /// Return true if switches should be turned into lookup tables for the
720 /// target.
721 bool shouldBuildLookupTables() const;
722
723 /// Return true if switches should be turned into lookup tables
724 /// containing this constant value for the target.
725 bool shouldBuildLookupTablesForConstant(Constant *C) const;
726
727 /// Return true if the input function which is cold at all call sites,
728 /// should use coldcc calling convention.
729 bool useColdCCForColdCall(Function &F) const;
730
731 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
732 /// are set if the demanded result elements need to be inserted and/or
733 /// extracted from vectors.
734 unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
735 bool Insert, bool Extract) const;
736
737 /// Estimate the overhead of scalarizing an instructions unique
738 /// non-constant operands. The types of the arguments are ordinarily
739 /// scalar, in which case the costs are multiplied with VF.
740 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
741 unsigned VF) const;
742
743 /// If target has efficient vector element load/store instructions, it can
744 /// return true here so that insertion/extraction costs are not added to
745 /// the scalarization cost of a load/store.
746 bool supportsEfficientVectorElementLoadStore() const;
747
748 /// Don't restrict interleaved unrolling to small loops.
749 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
750
751 /// Returns options for expansion of memcmp. IsZeroCmp is
752 // true if this is the expansion of memcmp(p1, p2, s) == 0.
753 struct MemCmpExpansionOptions {
754 // Return true if memcmp expansion is enabled.
755 operator bool() const { return MaxNumLoads > 0; }
756
757 // Maximum number of load operations.
758 unsigned MaxNumLoads = 0;
759
760 // The list of available load sizes (in bytes), sorted in decreasing order.
761 SmallVector<unsigned, 8> LoadSizes;
762
763 // For memcmp expansion when the memcmp result is only compared equal or
764 // not-equal to 0, allow up to this number of load pairs per block. As an
765 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
766 // a0 = load2bytes &a[0]
767 // b0 = load2bytes &b[0]
768 // a2 = load1byte &a[2]
769 // b2 = load1byte &b[2]
770 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
771 unsigned NumLoadsPerBlock = 1;
772
773 // Set to true to allow overlapping loads. For example, 7-byte compares can
774 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
775 // requires all loads in LoadSizes to be doable in an unaligned way.
776 bool AllowOverlappingLoads = false;
777 };
778 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
779 bool IsZeroCmp) const;
780
781 /// Enable matching of interleaved access groups.
782 bool enableInterleavedAccessVectorization() const;
783
784 /// Enable matching of interleaved access groups that contain predicated
785 /// accesses or gaps and therefore vectorized using masked
786 /// vector loads/stores.
787 bool enableMaskedInterleavedAccessVectorization() const;
788
789 /// Indicate that it is potentially unsafe to automatically vectorize
790 /// floating-point operations because the semantics of vector and scalar
791 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
792 /// does not support IEEE-754 denormal numbers, while depending on the
793 /// platform, scalar floating-point math does.
794 /// This applies to floating-point math operations and calls, not memory
795 /// operations, shuffles, or casts.
796 bool isFPVectorizationPotentiallyUnsafe() const;
797
798 /// Determine if the target supports unaligned memory accesses.
799 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
800 unsigned AddressSpace = 0,
801 unsigned Alignment = 1,
802 bool *Fast = nullptr) const;
803
804 /// Return hardware support for population count.
805 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
806
807 /// Return true if the hardware has a fast square-root instruction.
808 bool haveFastSqrt(Type *Ty) const;
809
810 /// Return true if it is faster to check if a floating-point value is NaN
811 /// (or not-NaN) versus a comparison against a constant FP zero value.
812 /// Targets should override this if materializing a 0.0 for comparison is
813 /// generally as cheap as checking for ordered/unordered.
814 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
815
816 /// Return the expected cost of supporting the floating point operation
817 /// of the specified type.
818 int getFPOpCost(Type *Ty) const;
819
820 /// Return the expected cost of materializing for the given integer
821 /// immediate of the specified type.
822 int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
823
824 /// Return the expected cost of materialization for the given integer
825 /// immediate of the specified type for a given instruction. The cost can be
826 /// zero if the immediate can be folded into the specified instruction.
827 int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
828 TargetCostKind CostKind,
829 Instruction *Inst = nullptr) const;
830 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
831 Type *Ty, TargetCostKind CostKind) const;
832
833 /// Return the expected cost for the given integer when optimising
834 /// for size. This is different than the other integer immediate cost
835 /// functions in that it is subtarget agnostic. This is useful when you e.g.
836 /// target one ISA such as Aarch32 but smaller encodings could be possible
837 /// with another such as Thumb. This return value is used as a penalty when
838 /// the total costs for a constant is calculated (the bigger the cost, the
839 /// more beneficial constant hoisting is).
840 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
841 Type *Ty) const;
842 /// @}
843
844 /// \name Vector Target Information
845 /// @{
846
847 /// The various kinds of shuffle patterns for vector queries.
848 enum ShuffleKind {
849 SK_Broadcast, ///< Broadcast element 0 to all other elements.
850 SK_Reverse, ///< Reverse the order of the vector.
851 SK_Select, ///< Selects elements from the corresponding lane of
852 ///< either source operand. This is equivalent to a
853 ///< vector select with a constant condition operand.
854 SK_Transpose, ///< Transpose two vectors.
855 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
856 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
857 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
858 ///< with any shuffle mask.
859 SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
860 ///< shuffle mask.
861 };
862
863 /// Kind of the reduction data.
864 enum ReductionKind {
865 RK_None, /// Not a reduction.
866 RK_Arithmetic, /// Binary reduction data.
867 RK_MinMax, /// Min/max reduction data.
868 RK_UnsignedMinMax, /// Unsigned min/max reduction data.
869 };
870
871 /// Contains opcode + LHS/RHS parts of the reduction operations.
872 struct ReductionData {
873 ReductionData() = delete;
ReductionDataReductionData874 ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
875 : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
876 assert(Kind != RK_None && "expected binary or min/max reduction only.");
877 }
878 unsigned Opcode = 0;
879 Value *LHS = nullptr;
880 Value *RHS = nullptr;
881 ReductionKind Kind = RK_None;
hasSameDataReductionData882 bool hasSameData(ReductionData &RD) const {
883 return Kind == RD.Kind && Opcode == RD.Opcode;
884 }
885 };
886
887 static ReductionKind matchPairwiseReduction(
888 const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
889
890 static ReductionKind matchVectorSplittingReduction(
891 const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
892
893 static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,
894 unsigned &Opcode, VectorType *&Ty,
895 bool &IsPairwise);
896
897 /// Additional information about an operand's possible values.
898 enum OperandValueKind {
899 OK_AnyValue, // Operand can have any value.
900 OK_UniformValue, // Operand is uniform (splat of a value).
901 OK_UniformConstantValue, // Operand is uniform constant.
902 OK_NonUniformConstantValue // Operand is a non uniform constant value.
903 };
904
905 /// Additional properties of an operand's values.
906 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
907
908 /// \return the number of registers in the target-provided register class.
909 unsigned getNumberOfRegisters(unsigned ClassID) const;
910
911 /// \return the target-provided register class ID for the provided type,
912 /// accounting for type promotion and other type-legalization techniques that
913 /// the target might apply. However, it specifically does not account for the
914 /// scalarization or splitting of vector types. Should a vector type require
915 /// scalarization or splitting into multiple underlying vector registers, that
916 /// type should be mapped to a register class containing no registers.
917 /// Specifically, this is designed to provide a simple, high-level view of the
918 /// register allocation later performed by the backend. These register classes
919 /// don't necessarily map onto the register classes used by the backend.
920 /// FIXME: It's not currently possible to determine how many registers
921 /// are used by the provided type.
922 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
923
924 /// \return the target-provided register class name
925 const char *getRegisterClassName(unsigned ClassID) const;
926
927 /// \return The width of the largest scalar or vector register type.
928 unsigned getRegisterBitWidth(bool Vector) const;
929
930 /// \return The width of the smallest vector register type.
931 unsigned getMinVectorRegisterBitWidth() const;
932
933 /// \return The maximum value of vscale if the target specifies an
934 /// architectural maximum vector length, and None otherwise.
935 Optional<unsigned> getMaxVScale() const;
936
937 /// \return True if the vectorization factor should be chosen to
938 /// make the vector of the smallest element type match the size of a
939 /// vector register. For wider element types, this could result in
940 /// creating vectors that span multiple vector registers.
941 /// If false, the vectorization factor will be chosen based on the
942 /// size of the widest element type.
943 bool shouldMaximizeVectorBandwidth(bool OptSize) const;
944
945 /// \return The minimum vectorization factor for types of given element
946 /// bit width, or 0 if there is no minimum VF. The returned value only
947 /// applies when shouldMaximizeVectorBandwidth returns true.
948 unsigned getMinimumVF(unsigned ElemWidth) const;
949
950 /// \return The maximum vectorization factor for types of given element
951 /// bit width and opcode, or 0 if there is no maximum VF.
952 /// Currently only used by the SLP vectorizer.
953 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
954
955 /// \return True if it should be considered for address type promotion.
956 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
957 /// profitable without finding other extensions fed by the same input.
958 bool shouldConsiderAddressTypePromotion(
959 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
960
961 /// \return The size of a cache line in bytes.
962 unsigned getCacheLineSize() const;
963
964 /// The possible cache levels
965 enum class CacheLevel {
966 L1D, // The L1 data cache
967 L2D, // The L2 data cache
968
969 // We currently do not model L3 caches, as their sizes differ widely between
970 // microarchitectures. Also, we currently do not have a use for L3 cache
971 // size modeling yet.
972 };
973
974 /// \return The size of the cache level in bytes, if available.
975 Optional<unsigned> getCacheSize(CacheLevel Level) const;
976
977 /// \return The associativity of the cache level, if available.
978 Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
979
980 /// \return How much before a load we should place the prefetch
981 /// instruction. This is currently measured in number of
982 /// instructions.
983 unsigned getPrefetchDistance() const;
984
985 /// Some HW prefetchers can handle accesses up to a certain constant stride.
986 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
987 /// and the arguments provided are meant to serve as a basis for deciding this
988 /// for a particular loop.
989 ///
990 /// \param NumMemAccesses Number of memory accesses in the loop.
991 /// \param NumStridedMemAccesses Number of the memory accesses that
992 /// ScalarEvolution could find a known stride
993 /// for.
994 /// \param NumPrefetches Number of software prefetches that will be
995 /// emitted as determined by the addresses
996 /// involved and the cache line size.
997 /// \param HasCall True if the loop contains a call.
998 ///
999 /// \return This is the minimum stride in bytes where it makes sense to start
1000 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1001 /// stride.
1002 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1003 unsigned NumStridedMemAccesses,
1004 unsigned NumPrefetches, bool HasCall) const;
1005
1006 /// \return The maximum number of iterations to prefetch ahead. If
1007 /// the required number of iterations is more than this number, no
1008 /// prefetching is performed.
1009 unsigned getMaxPrefetchIterationsAhead() const;
1010
1011 /// \return True if prefetching should also be done for writes.
1012 bool enableWritePrefetching() const;
1013
1014 /// \return The maximum interleave factor that any transform should try to
1015 /// perform for this target. This number depends on the level of parallelism
1016 /// and the number of execution units in the CPU.
1017 unsigned getMaxInterleaveFactor(unsigned VF) const;
1018
1019 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1020 static OperandValueKind getOperandInfo(const Value *V,
1021 OperandValueProperties &OpProps);
1022
1023 /// This is an approximation of reciprocal throughput of a math/logic op.
1024 /// A higher cost indicates less expected throughput.
1025 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1026 /// clock cycles per instruction when the instructions are not part of a
1027 /// limiting dependency chain."
1028 /// Therefore, costs should be scaled to account for multiple execution units
1029 /// on the target that can process this type of instruction. For example, if
1030 /// there are 5 scalar integer units and 2 vector integer units that can
1031 /// calculate an 'add' in a single cycle, this model should indicate that the
1032 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1033 /// add instruction.
1034 /// \p Args is an optional argument which holds the instruction operands
1035 /// values so the TTI can analyze those values searching for special
1036 /// cases or optimizations based on those values.
1037 /// \p CxtI is the optional original context instruction, if one exists, to
1038 /// provide even more information.
1039 int getArithmeticInstrCost(
1040 unsigned Opcode, Type *Ty,
1041 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1042 OperandValueKind Opd1Info = OK_AnyValue,
1043 OperandValueKind Opd2Info = OK_AnyValue,
1044 OperandValueProperties Opd1PropInfo = OP_None,
1045 OperandValueProperties Opd2PropInfo = OP_None,
1046 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1047 const Instruction *CxtI = nullptr) const;
1048
1049 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1050 /// The index and subtype parameters are used by the subvector insertion and
1051 /// extraction shuffle kinds to show the insert/extract point and the type of
1052 /// the subvector being inserted/extracted.
1053 /// NOTE: For subvector extractions Tp represents the source type.
1054 int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
1055 VectorType *SubTp = nullptr) const;
1056
1057 /// Represents a hint about the context in which a cast is used.
1058 ///
1059 /// For zext/sext, the context of the cast is the operand, which must be a
1060 /// load of some kind. For trunc, the context is of the cast is the single
1061 /// user of the instruction, which must be a store of some kind.
1062 ///
1063 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1064 /// type of cast it's dealing with, as not every cast is equal. For instance,
1065 /// the zext of a load may be free, but the zext of an interleaving load can
1066 //// be (very) expensive!
1067 ///
1068 /// See \c getCastContextHint to compute a CastContextHint from a cast
1069 /// Instruction*. Callers can use it if they don't need to override the
1070 /// context and just want it to be calculated from the instruction.
1071 ///
1072 /// FIXME: This handles the types of load/store that the vectorizer can
1073 /// produce, which are the cases where the context instruction is most
1074 /// likely to be incorrect. There are other situations where that can happen
1075 /// too, which might be handled here but in the long run a more general
1076 /// solution of costing multiple instructions at the same times may be better.
1077 enum class CastContextHint : uint8_t {
1078 None, ///< The cast is not used with a load/store of any kind.
1079 Normal, ///< The cast is used with a normal load/store.
1080 Masked, ///< The cast is used with a masked load/store.
1081 GatherScatter, ///< The cast is used with a gather/scatter.
1082 Interleave, ///< The cast is used with an interleaved load/store.
1083 Reversed, ///< The cast is used with a reversed load/store.
1084 };
1085
1086 /// Calculates a CastContextHint from \p I.
1087 /// This should be used by callers of getCastInstrCost if they wish to
1088 /// determine the context from some instruction.
1089 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1090 /// or if it's another type of cast.
1091 static CastContextHint getCastContextHint(const Instruction *I);
1092
1093 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1094 /// zext, etc. If there is an existing instruction that holds Opcode, it
1095 /// may be passed in the 'I' parameter.
1096 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1097 TTI::CastContextHint CCH,
1098 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1099 const Instruction *I = nullptr) const;
1100
1101 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1102 /// -1 to indicate that there is no information about the index value.
1103 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1104 unsigned Index = -1) const;
1105
1106 /// \return The expected cost of control-flow related instructions such as
1107 /// Phi, Ret, Br.
1108 int getCFInstrCost(unsigned Opcode,
1109 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1110
1111 /// \returns The expected cost of compare and select instructions. If there
1112 /// is an existing instruction that holds Opcode, it may be passed in the
1113 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1114 /// is using a compare with the specified predicate as condition. When vector
1115 /// types are passed, \p VecPred must be used for all lanes.
1116 int getCmpSelInstrCost(
1117 unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
1118 CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
1119 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1120 const Instruction *I = nullptr) const;
1121
1122 /// \return The expected cost of vector Insert and Extract.
1123 /// Use -1 to indicate that there is no information on the index value.
1124 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
1125
1126 /// \return The cost of Load and Store instructions.
1127 int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1128 unsigned AddressSpace,
1129 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1130 const Instruction *I = nullptr) const;
1131
1132 /// \return The cost of masked Load and Store instructions.
1133 int getMaskedMemoryOpCost(
1134 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1135 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1136
1137 /// \return The cost of Gather or Scatter operation
1138 /// \p Opcode - is a type of memory access Load or Store
1139 /// \p DataTy - a vector type of the data to be loaded or stored
1140 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1141 /// \p VariableMask - true when the memory access is predicated with a mask
1142 /// that is not a compile-time constant
1143 /// \p Alignment - alignment of single element
1144 /// \p I - the optional original context instruction, if one exists, e.g. the
1145 /// load/store to transform or the call to the gather/scatter intrinsic
1146 int getGatherScatterOpCost(
1147 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1148 Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1149 const Instruction *I = nullptr) const;
1150
1151 /// \return The cost of the interleaved memory operation.
1152 /// \p Opcode is the memory operation code
1153 /// \p VecTy is the vector type of the interleaved access.
1154 /// \p Factor is the interleave factor
1155 /// \p Indices is the indices for interleaved load members (as interleaved
1156 /// load allows gaps)
1157 /// \p Alignment is the alignment of the memory operation
1158 /// \p AddressSpace is address space of the pointer.
1159 /// \p UseMaskForCond indicates if the memory access is predicated.
1160 /// \p UseMaskForGaps indicates if gaps should be masked.
1161 int getInterleavedMemoryOpCost(
1162 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1163 Align Alignment, unsigned AddressSpace,
1164 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1165 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1166
1167 /// Calculate the cost of performing a vector reduction.
1168 ///
1169 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1170 /// value using the operation denoted by \p Opcode. The form of the reduction
1171 /// can either be a pairwise reduction or a reduction that splits the vector
1172 /// at every reduction level.
1173 ///
1174 /// Pairwise:
1175 /// (v0, v1, v2, v3)
1176 /// ((v0+v1), (v2+v3), undef, undef)
1177 /// Split:
1178 /// (v0, v1, v2, v3)
1179 /// ((v0+v2), (v1+v3), undef, undef)
1180 int getArithmeticReductionCost(
1181 unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
1182 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1183
1184 int getMinMaxReductionCost(
1185 VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
1186 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1187
1188 /// Calculate the cost of an extended reduction pattern, similar to
1189 /// getArithmeticReductionCost of an Add reduction with an extension and
1190 /// optional multiply. This is the cost of as:
1191 /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1192 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1193 /// on a VectorType with ResTy elements and Ty lanes.
1194 InstructionCost getExtendedAddReductionCost(
1195 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1196 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1197
1198 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1199 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1200 /// 3. scalar instruction which is to be vectorized.
1201 int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1202 TTI::TargetCostKind CostKind) const;
1203
1204 /// \returns The cost of Call instructions.
1205 int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1206 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1207
1208 /// \returns The number of pieces into which the provided type must be
1209 /// split during legalization. Zero is returned when the answer is unknown.
1210 unsigned getNumberOfParts(Type *Tp) const;
1211
1212 /// \returns The cost of the address computation. For most targets this can be
1213 /// merged into the instruction indexing mode. Some targets might want to
1214 /// distinguish between address computation for memory operations on vector
1215 /// types and scalar types. Such targets should override this function.
1216 /// The 'SE' parameter holds pointer for the scalar evolution object which
1217 /// is used in order to get the Ptr step value in case of constant stride.
1218 /// The 'Ptr' parameter holds SCEV of the access pointer.
1219 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
1220 const SCEV *Ptr = nullptr) const;
1221
1222 /// \returns The cost, if any, of keeping values of the given types alive
1223 /// over a callsite.
1224 ///
1225 /// Some types may require the use of register classes that do not have
1226 /// any callee-saved registers, so would require a spill and fill.
1227 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1228
1229 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1230 /// will contain additional information - whether the intrinsic may write
1231 /// or read to memory, volatility and the pointer. Info is undefined
1232 /// if false is returned.
1233 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1234
1235 /// \returns The maximum element size, in bytes, for an element
1236 /// unordered-atomic memory intrinsic.
1237 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1238
1239 /// \returns A value which is the result of the given memory intrinsic. New
1240 /// instructions may be created to extract the result from the given intrinsic
1241 /// memory operation. Returns nullptr if the target cannot create a result
1242 /// from the given intrinsic.
1243 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1244 Type *ExpectedType) const;
1245
1246 /// \returns The type to use in a loop expansion of a memcpy call.
1247 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1248 unsigned SrcAddrSpace, unsigned DestAddrSpace,
1249 unsigned SrcAlign, unsigned DestAlign) const;
1250
1251 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1252 /// \param RemainingBytes The number of bytes to copy.
1253 ///
1254 /// Calculates the operand types to use when copying \p RemainingBytes of
1255 /// memory, where source and destination alignments are \p SrcAlign and
1256 /// \p DestAlign respectively.
1257 void getMemcpyLoopResidualLoweringType(
1258 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1259 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1260 unsigned SrcAlign, unsigned DestAlign) const;
1261
1262 /// \returns True if the two functions have compatible attributes for inlining
1263 /// purposes.
1264 bool areInlineCompatible(const Function *Caller,
1265 const Function *Callee) const;
1266
1267 /// \returns True if the caller and callee agree on how \p Args will be passed
1268 /// to the callee.
1269 /// \param[out] Args The list of compatible arguments. The implementation may
1270 /// filter out any incompatible args from this list.
1271 bool areFunctionArgsABICompatible(const Function *Caller,
1272 const Function *Callee,
1273 SmallPtrSetImpl<Argument *> &Args) const;
1274
1275 /// The type of load/store indexing.
1276 enum MemIndexedMode {
1277 MIM_Unindexed, ///< No indexing.
1278 MIM_PreInc, ///< Pre-incrementing.
1279 MIM_PreDec, ///< Pre-decrementing.
1280 MIM_PostInc, ///< Post-incrementing.
1281 MIM_PostDec ///< Post-decrementing.
1282 };
1283
1284 /// \returns True if the specified indexed load for the given type is legal.
1285 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1286
1287 /// \returns True if the specified indexed store for the given type is legal.
1288 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1289
1290 /// \returns The bitwidth of the largest vector type that should be used to
1291 /// load/store in the given address space.
1292 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1293
1294 /// \returns True if the load instruction is legal to vectorize.
1295 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1296
1297 /// \returns True if the store instruction is legal to vectorize.
1298 bool isLegalToVectorizeStore(StoreInst *SI) const;
1299
1300 /// \returns True if it is legal to vectorize the given load chain.
1301 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1302 unsigned AddrSpace) const;
1303
1304 /// \returns True if it is legal to vectorize the given store chain.
1305 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1306 unsigned AddrSpace) const;
1307
1308 /// \returns The new vector factor value if the target doesn't support \p
1309 /// SizeInBytes loads or has a better vector factor.
1310 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1311 unsigned ChainSizeInBytes,
1312 VectorType *VecTy) const;
1313
1314 /// \returns The new vector factor value if the target doesn't support \p
1315 /// SizeInBytes stores or has a better vector factor.
1316 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1317 unsigned ChainSizeInBytes,
1318 VectorType *VecTy) const;
1319
1320 /// Flags describing the kind of vector reduction.
1321 struct ReductionFlags {
ReductionFlagsReductionFlags1322 ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
1323 bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
1324 bool IsSigned; ///< Whether the operation is a signed int reduction.
1325 bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
1326 };
1327
1328 /// \returns True if the target wants to handle the given reduction idiom in
1329 /// the intrinsics form instead of the shuffle form.
1330 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1331 ReductionFlags Flags) const;
1332
1333 /// \returns True if the target prefers reductions in loop.
1334 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1335 ReductionFlags Flags) const;
1336
1337 /// \returns True if the target prefers reductions select kept in the loop
1338 /// when tail folding. i.e.
1339 /// loop:
1340 /// p = phi (0, s)
1341 /// a = add (p, x)
1342 /// s = select (mask, a, p)
1343 /// vecreduce.add(s)
1344 ///
1345 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1346 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1347 /// by the target, this can lead to cleaner code generation.
1348 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1349 ReductionFlags Flags) const;
1350
1351 /// \returns True if the target wants to expand the given reduction intrinsic
1352 /// into a shuffle sequence.
1353 bool shouldExpandReduction(const IntrinsicInst *II) const;
1354
1355 /// \returns the size cost of rematerializing a GlobalValue address relative
1356 /// to a stack reload.
1357 unsigned getGISelRematGlobalCost() const;
1358
1359 /// \returns True if the target supports scalable vectors.
1360 bool supportsScalableVectors() const;
1361
1362 /// \name Vector Predication Information
1363 /// @{
1364 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1365 /// in hardware. (see LLVM Language Reference - "Vector Predication
1366 /// Intrinsics") Use of %evl is discouraged when that is not the case.
1367 bool hasActiveVectorLength() const;
1368
1369 /// @}
1370
1371 /// @}
1372
1373 private:
1374 /// Estimate the latency of specified instruction.
1375 /// Returns 1 as the default value.
1376 int getInstructionLatency(const Instruction *I) const;
1377
1378 /// Returns the expected throughput cost of the instruction.
1379 /// Returns -1 if the cost is unknown.
1380 int getInstructionThroughput(const Instruction *I) const;
1381
1382 /// The abstract base class used to type erase specific TTI
1383 /// implementations.
1384 class Concept;
1385
1386 /// The template model for the base class which wraps a concrete
1387 /// implementation in a type erased interface.
1388 template <typename T> class Model;
1389
1390 std::unique_ptr<Concept> TTIImpl;
1391 };
1392
1393 class TargetTransformInfo::Concept {
1394 public:
1395 virtual ~Concept() = 0;
1396 virtual const DataLayout &getDataLayout() const = 0;
1397 virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
1398 ArrayRef<const Value *> Operands,
1399 TTI::TargetCostKind CostKind) = 0;
1400 virtual unsigned getInliningThresholdMultiplier() = 0;
1401 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1402 virtual int getInlinerVectorBonusPercent() = 0;
1403 virtual int getMemcpyCost(const Instruction *I) = 0;
1404 virtual unsigned
1405 getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1406 ProfileSummaryInfo *PSI,
1407 BlockFrequencyInfo *BFI) = 0;
1408 virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands,
1409 TargetCostKind CostKind) = 0;
1410 virtual bool hasBranchDivergence() = 0;
1411 virtual bool useGPUDivergenceAnalysis() = 0;
1412 virtual bool isSourceOfDivergence(const Value *V) = 0;
1413 virtual bool isAlwaysUniform(const Value *V) = 0;
1414 virtual unsigned getFlatAddressSpace() = 0;
1415 virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1416 Intrinsic::ID IID) const = 0;
1417 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1418 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1419 virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
1420 Value *OldV,
1421 Value *NewV) const = 0;
1422 virtual bool isLoweredToCall(const Function *F) = 0;
1423 virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1424 UnrollingPreferences &UP) = 0;
1425 virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1426 PeelingPreferences &PP) = 0;
1427 virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1428 AssumptionCache &AC,
1429 TargetLibraryInfo *LibInfo,
1430 HardwareLoopInfo &HWLoopInfo) = 0;
1431 virtual bool
1432 preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1433 AssumptionCache &AC, TargetLibraryInfo *TLI,
1434 DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
1435 virtual bool emitGetActiveLaneMask() = 0;
1436 virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1437 IntrinsicInst &II) = 0;
1438 virtual Optional<Value *>
1439 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1440 APInt DemandedMask, KnownBits &Known,
1441 bool &KnownBitsComputed) = 0;
1442 virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1443 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1444 APInt &UndefElts2, APInt &UndefElts3,
1445 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1446 SimplifyAndSetOp) = 0;
1447 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1448 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1449 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1450 int64_t BaseOffset, bool HasBaseReg,
1451 int64_t Scale, unsigned AddrSpace,
1452 Instruction *I) = 0;
1453 virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1454 TargetTransformInfo::LSRCost &C2) = 0;
1455 virtual bool isNumRegsMajorCostOfLSR() = 0;
1456 virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1457 virtual bool canMacroFuseCmp() = 0;
1458 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1459 LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1460 TargetLibraryInfo *LibInfo) = 0;
1461 virtual bool shouldFavorPostInc() const = 0;
1462 virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
1463 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1464 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1465 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1466 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1467 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1468 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1469 virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1470 virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1471 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1472 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1473 virtual bool prefersVectorizedAddressing() = 0;
1474 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1475 int64_t BaseOffset, bool HasBaseReg,
1476 int64_t Scale, unsigned AddrSpace) = 0;
1477 virtual bool LSRWithInstrQueries() = 0;
1478 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1479 virtual bool isProfitableToHoist(Instruction *I) = 0;
1480 virtual bool useAA() = 0;
1481 virtual bool isTypeLegal(Type *Ty) = 0;
1482 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1483 virtual bool shouldBuildLookupTables() = 0;
1484 virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1485 virtual bool useColdCCForColdCall(Function &F) = 0;
1486 virtual unsigned getScalarizationOverhead(VectorType *Ty,
1487 const APInt &DemandedElts,
1488 bool Insert, bool Extract) = 0;
1489 virtual unsigned
1490 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1491 unsigned VF) = 0;
1492 virtual bool supportsEfficientVectorElementLoadStore() = 0;
1493 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1494 virtual MemCmpExpansionOptions
1495 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1496 virtual bool enableInterleavedAccessVectorization() = 0;
1497 virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1498 virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1499 virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1500 unsigned BitWidth,
1501 unsigned AddressSpace,
1502 unsigned Alignment,
1503 bool *Fast) = 0;
1504 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1505 virtual bool haveFastSqrt(Type *Ty) = 0;
1506 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1507 virtual int getFPOpCost(Type *Ty) = 0;
1508 virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1509 const APInt &Imm, Type *Ty) = 0;
1510 virtual int getIntImmCost(const APInt &Imm, Type *Ty,
1511 TargetCostKind CostKind) = 0;
1512 virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
1513 Type *Ty, TargetCostKind CostKind,
1514 Instruction *Inst = nullptr) = 0;
1515 virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1516 const APInt &Imm, Type *Ty,
1517 TargetCostKind CostKind) = 0;
1518 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1519 virtual unsigned getRegisterClassForType(bool Vector,
1520 Type *Ty = nullptr) const = 0;
1521 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1522 virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1523 virtual unsigned getMinVectorRegisterBitWidth() = 0;
1524 virtual Optional<unsigned> getMaxVScale() const = 0;
1525 virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1526 virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
1527 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1528 virtual bool shouldConsiderAddressTypePromotion(
1529 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1530 virtual unsigned getCacheLineSize() const = 0;
1531 virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1532 virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
1533
1534 /// \return How much before a load we should place the prefetch
1535 /// instruction. This is currently measured in number of
1536 /// instructions.
1537 virtual unsigned getPrefetchDistance() const = 0;
1538
1539 /// \return Some HW prefetchers can handle accesses up to a certain
1540 /// constant stride. This is the minimum stride in bytes where it
1541 /// makes sense to start adding SW prefetches. The default is 1,
1542 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1543 /// even below the HW prefetcher limit, and the arguments provided are
1544 /// meant to serve as a basis for deciding this for a particular loop.
1545 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1546 unsigned NumStridedMemAccesses,
1547 unsigned NumPrefetches,
1548 bool HasCall) const = 0;
1549
1550 /// \return The maximum number of iterations to prefetch ahead. If
1551 /// the required number of iterations is more than this number, no
1552 /// prefetching is performed.
1553 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1554
1555 /// \return True if prefetching should also be done for writes.
1556 virtual bool enableWritePrefetching() const = 0;
1557
1558 virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1559 virtual unsigned getArithmeticInstrCost(
1560 unsigned Opcode, Type *Ty,
1561 TTI::TargetCostKind CostKind,
1562 OperandValueKind Opd1Info,
1563 OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
1564 OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
1565 const Instruction *CxtI = nullptr) = 0;
1566 virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
1567 VectorType *SubTp) = 0;
1568 virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1569 CastContextHint CCH,
1570 TTI::TargetCostKind CostKind,
1571 const Instruction *I) = 0;
1572 virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1573 VectorType *VecTy, unsigned Index) = 0;
1574 virtual int getCFInstrCost(unsigned Opcode,
1575 TTI::TargetCostKind CostKind) = 0;
1576 virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1577 CmpInst::Predicate VecPred,
1578 TTI::TargetCostKind CostKind,
1579 const Instruction *I) = 0;
1580 virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1581 unsigned Index) = 0;
1582 virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1583 unsigned AddressSpace,
1584 TTI::TargetCostKind CostKind,
1585 const Instruction *I) = 0;
1586 virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1587 unsigned AddressSpace,
1588 TTI::TargetCostKind CostKind) = 0;
1589 virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1590 const Value *Ptr, bool VariableMask,
1591 Align Alignment,
1592 TTI::TargetCostKind CostKind,
1593 const Instruction *I = nullptr) = 0;
1594
1595 virtual int getInterleavedMemoryOpCost(
1596 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1597 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1598 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1599 virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1600 bool IsPairwiseForm,
1601 TTI::TargetCostKind CostKind) = 0;
1602 virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
1603 bool IsPairwiseForm, bool IsUnsigned,
1604 TTI::TargetCostKind CostKind) = 0;
1605 virtual InstructionCost getExtendedAddReductionCost(
1606 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1607 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
1608 virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1609 TTI::TargetCostKind CostKind) = 0;
1610 virtual int getCallInstrCost(Function *F, Type *RetTy,
1611 ArrayRef<Type *> Tys,
1612 TTI::TargetCostKind CostKind) = 0;
1613 virtual unsigned getNumberOfParts(Type *Tp) = 0;
1614 virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1615 const SCEV *Ptr) = 0;
1616 virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1617 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1618 MemIntrinsicInfo &Info) = 0;
1619 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1620 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1621 Type *ExpectedType) = 0;
1622 virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1623 unsigned SrcAddrSpace,
1624 unsigned DestAddrSpace,
1625 unsigned SrcAlign,
1626 unsigned DestAlign) const = 0;
1627 virtual void getMemcpyLoopResidualLoweringType(
1628 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1629 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1630 unsigned SrcAlign, unsigned DestAlign) const = 0;
1631 virtual bool areInlineCompatible(const Function *Caller,
1632 const Function *Callee) const = 0;
1633 virtual bool
1634 areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1635 SmallPtrSetImpl<Argument *> &Args) const = 0;
1636 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1637 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1638 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1639 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1640 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1641 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1642 Align Alignment,
1643 unsigned AddrSpace) const = 0;
1644 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1645 Align Alignment,
1646 unsigned AddrSpace) const = 0;
1647 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1648 unsigned ChainSizeInBytes,
1649 VectorType *VecTy) const = 0;
1650 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1651 unsigned ChainSizeInBytes,
1652 VectorType *VecTy) const = 0;
1653 virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1654 ReductionFlags) const = 0;
1655 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1656 ReductionFlags) const = 0;
1657 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1658 ReductionFlags) const = 0;
1659 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1660 virtual unsigned getGISelRematGlobalCost() const = 0;
1661 virtual bool supportsScalableVectors() const = 0;
1662 virtual bool hasActiveVectorLength() const = 0;
1663 virtual int getInstructionLatency(const Instruction *I) = 0;
1664 };
1665
1666 template <typename T>
1667 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1668 T Impl;
1669
1670 public:
Model(T Impl)1671 Model(T Impl) : Impl(std::move(Impl)) {}
~Model()1672 ~Model() override {}
1673
getDataLayout()1674 const DataLayout &getDataLayout() const override {
1675 return Impl.getDataLayout();
1676 }
1677
getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,enum TargetTransformInfo::TargetCostKind CostKind)1678 int getGEPCost(Type *PointeeType, const Value *Ptr,
1679 ArrayRef<const Value *> Operands,
1680 enum TargetTransformInfo::TargetCostKind CostKind) override {
1681 return Impl.getGEPCost(PointeeType, Ptr, Operands);
1682 }
getInliningThresholdMultiplier()1683 unsigned getInliningThresholdMultiplier() override {
1684 return Impl.getInliningThresholdMultiplier();
1685 }
adjustInliningThreshold(const CallBase * CB)1686 unsigned adjustInliningThreshold(const CallBase *CB) override {
1687 return Impl.adjustInliningThreshold(CB);
1688 }
getInlinerVectorBonusPercent()1689 int getInlinerVectorBonusPercent() override {
1690 return Impl.getInlinerVectorBonusPercent();
1691 }
getMemcpyCost(const Instruction * I)1692 int getMemcpyCost(const Instruction *I) override {
1693 return Impl.getMemcpyCost(I);
1694 }
getUserCost(const User * U,ArrayRef<const Value * > Operands,TargetCostKind CostKind)1695 int getUserCost(const User *U, ArrayRef<const Value *> Operands,
1696 TargetCostKind CostKind) override {
1697 return Impl.getUserCost(U, Operands, CostKind);
1698 }
hasBranchDivergence()1699 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
useGPUDivergenceAnalysis()1700 bool useGPUDivergenceAnalysis() override {
1701 return Impl.useGPUDivergenceAnalysis();
1702 }
isSourceOfDivergence(const Value * V)1703 bool isSourceOfDivergence(const Value *V) override {
1704 return Impl.isSourceOfDivergence(V);
1705 }
1706
isAlwaysUniform(const Value * V)1707 bool isAlwaysUniform(const Value *V) override {
1708 return Impl.isAlwaysUniform(V);
1709 }
1710
getFlatAddressSpace()1711 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1712
collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)1713 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1714 Intrinsic::ID IID) const override {
1715 return Impl.collectFlatAddressOperands(OpIndexes, IID);
1716 }
1717
isNoopAddrSpaceCast(unsigned FromAS,unsigned ToAS)1718 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1719 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1720 }
1721
getAssumedAddrSpace(const Value * V)1722 unsigned getAssumedAddrSpace(const Value *V) const override {
1723 return Impl.getAssumedAddrSpace(V);
1724 }
1725
rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)1726 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1727 Value *NewV) const override {
1728 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1729 }
1730
isLoweredToCall(const Function * F)1731 bool isLoweredToCall(const Function *F) override {
1732 return Impl.isLoweredToCall(F);
1733 }
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,UnrollingPreferences & UP)1734 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1735 UnrollingPreferences &UP) override {
1736 return Impl.getUnrollingPreferences(L, SE, UP);
1737 }
getPeelingPreferences(Loop * L,ScalarEvolution & SE,PeelingPreferences & PP)1738 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1739 PeelingPreferences &PP) override {
1740 return Impl.getPeelingPreferences(L, SE, PP);
1741 }
isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)1742 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1743 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1744 HardwareLoopInfo &HWLoopInfo) override {
1745 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1746 }
preferPredicateOverEpilogue(Loop * L,LoopInfo * LI,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * TLI,DominatorTree * DT,const LoopAccessInfo * LAI)1747 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1748 AssumptionCache &AC, TargetLibraryInfo *TLI,
1749 DominatorTree *DT,
1750 const LoopAccessInfo *LAI) override {
1751 return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
1752 }
emitGetActiveLaneMask()1753 bool emitGetActiveLaneMask() override {
1754 return Impl.emitGetActiveLaneMask();
1755 }
instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II)1756 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1757 IntrinsicInst &II) override {
1758 return Impl.instCombineIntrinsic(IC, II);
1759 }
1760 Optional<Value *>
simplifyDemandedUseBitsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedMask,KnownBits & Known,bool & KnownBitsComputed)1761 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1762 APInt DemandedMask, KnownBits &Known,
1763 bool &KnownBitsComputed) override {
1764 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1765 KnownBitsComputed);
1766 }
simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp)1767 Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1768 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1769 APInt &UndefElts2, APInt &UndefElts3,
1770 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1771 SimplifyAndSetOp) override {
1772 return Impl.simplifyDemandedVectorEltsIntrinsic(
1773 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1774 SimplifyAndSetOp);
1775 }
isLegalAddImmediate(int64_t Imm)1776 bool isLegalAddImmediate(int64_t Imm) override {
1777 return Impl.isLegalAddImmediate(Imm);
1778 }
isLegalICmpImmediate(int64_t Imm)1779 bool isLegalICmpImmediate(int64_t Imm) override {
1780 return Impl.isLegalICmpImmediate(Imm);
1781 }
isLegalAddressingMode(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace,Instruction * I)1782 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1783 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1784 Instruction *I) override {
1785 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1786 AddrSpace, I);
1787 }
isLSRCostLess(TargetTransformInfo::LSRCost & C1,TargetTransformInfo::LSRCost & C2)1788 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1789 TargetTransformInfo::LSRCost &C2) override {
1790 return Impl.isLSRCostLess(C1, C2);
1791 }
isNumRegsMajorCostOfLSR()1792 bool isNumRegsMajorCostOfLSR() override {
1793 return Impl.isNumRegsMajorCostOfLSR();
1794 }
isProfitableLSRChainElement(Instruction * I)1795 bool isProfitableLSRChainElement(Instruction *I) override {
1796 return Impl.isProfitableLSRChainElement(I);
1797 }
canMacroFuseCmp()1798 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)1799 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
1800 DominatorTree *DT, AssumptionCache *AC,
1801 TargetLibraryInfo *LibInfo) override {
1802 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1803 }
shouldFavorPostInc()1804 bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); }
shouldFavorBackedgeIndex(const Loop * L)1805 bool shouldFavorBackedgeIndex(const Loop *L) const override {
1806 return Impl.shouldFavorBackedgeIndex(L);
1807 }
isLegalMaskedStore(Type * DataType,Align Alignment)1808 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
1809 return Impl.isLegalMaskedStore(DataType, Alignment);
1810 }
isLegalMaskedLoad(Type * DataType,Align Alignment)1811 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
1812 return Impl.isLegalMaskedLoad(DataType, Alignment);
1813 }
isLegalNTStore(Type * DataType,Align Alignment)1814 bool isLegalNTStore(Type *DataType, Align Alignment) override {
1815 return Impl.isLegalNTStore(DataType, Alignment);
1816 }
isLegalNTLoad(Type * DataType,Align Alignment)1817 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
1818 return Impl.isLegalNTLoad(DataType, Alignment);
1819 }
isLegalMaskedScatter(Type * DataType,Align Alignment)1820 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
1821 return Impl.isLegalMaskedScatter(DataType, Alignment);
1822 }
isLegalMaskedGather(Type * DataType,Align Alignment)1823 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
1824 return Impl.isLegalMaskedGather(DataType, Alignment);
1825 }
isLegalMaskedCompressStore(Type * DataType)1826 bool isLegalMaskedCompressStore(Type *DataType) override {
1827 return Impl.isLegalMaskedCompressStore(DataType);
1828 }
isLegalMaskedExpandLoad(Type * DataType)1829 bool isLegalMaskedExpandLoad(Type *DataType) override {
1830 return Impl.isLegalMaskedExpandLoad(DataType);
1831 }
hasDivRemOp(Type * DataType,bool IsSigned)1832 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1833 return Impl.hasDivRemOp(DataType, IsSigned);
1834 }
hasVolatileVariant(Instruction * I,unsigned AddrSpace)1835 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1836 return Impl.hasVolatileVariant(I, AddrSpace);
1837 }
prefersVectorizedAddressing()1838 bool prefersVectorizedAddressing() override {
1839 return Impl.prefersVectorizedAddressing();
1840 }
getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)1841 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1842 bool HasBaseReg, int64_t Scale,
1843 unsigned AddrSpace) override {
1844 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1845 AddrSpace);
1846 }
LSRWithInstrQueries()1847 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
isTruncateFree(Type * Ty1,Type * Ty2)1848 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1849 return Impl.isTruncateFree(Ty1, Ty2);
1850 }
isProfitableToHoist(Instruction * I)1851 bool isProfitableToHoist(Instruction *I) override {
1852 return Impl.isProfitableToHoist(I);
1853 }
useAA()1854 bool useAA() override { return Impl.useAA(); }
isTypeLegal(Type * Ty)1855 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
getRegUsageForType(Type * Ty)1856 unsigned getRegUsageForType(Type *Ty) override {
1857 return Impl.getRegUsageForType(Ty);
1858 }
shouldBuildLookupTables()1859 bool shouldBuildLookupTables() override {
1860 return Impl.shouldBuildLookupTables();
1861 }
shouldBuildLookupTablesForConstant(Constant * C)1862 bool shouldBuildLookupTablesForConstant(Constant *C) override {
1863 return Impl.shouldBuildLookupTablesForConstant(C);
1864 }
useColdCCForColdCall(Function & F)1865 bool useColdCCForColdCall(Function &F) override {
1866 return Impl.useColdCCForColdCall(F);
1867 }
1868
getScalarizationOverhead(VectorType * Ty,const APInt & DemandedElts,bool Insert,bool Extract)1869 unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
1870 bool Insert, bool Extract) override {
1871 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
1872 }
getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,unsigned VF)1873 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1874 unsigned VF) override {
1875 return Impl.getOperandsScalarizationOverhead(Args, VF);
1876 }
1877
supportsEfficientVectorElementLoadStore()1878 bool supportsEfficientVectorElementLoadStore() override {
1879 return Impl.supportsEfficientVectorElementLoadStore();
1880 }
1881
enableAggressiveInterleaving(bool LoopHasReductions)1882 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1883 return Impl.enableAggressiveInterleaving(LoopHasReductions);
1884 }
enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)1885 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
1886 bool IsZeroCmp) const override {
1887 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
1888 }
enableInterleavedAccessVectorization()1889 bool enableInterleavedAccessVectorization() override {
1890 return Impl.enableInterleavedAccessVectorization();
1891 }
enableMaskedInterleavedAccessVectorization()1892 bool enableMaskedInterleavedAccessVectorization() override {
1893 return Impl.enableMaskedInterleavedAccessVectorization();
1894 }
isFPVectorizationPotentiallyUnsafe()1895 bool isFPVectorizationPotentiallyUnsafe() override {
1896 return Impl.isFPVectorizationPotentiallyUnsafe();
1897 }
allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)1898 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
1899 unsigned AddressSpace, unsigned Alignment,
1900 bool *Fast) override {
1901 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1902 Alignment, Fast);
1903 }
getPopcntSupport(unsigned IntTyWidthInBit)1904 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1905 return Impl.getPopcntSupport(IntTyWidthInBit);
1906 }
haveFastSqrt(Type * Ty)1907 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1908
isFCmpOrdCheaperThanFCmpZero(Type * Ty)1909 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1910 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1911 }
1912
getFPOpCost(Type * Ty)1913 int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1914
getIntImmCodeSizeCost(unsigned Opc,unsigned Idx,const APInt & Imm,Type * Ty)1915 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1916 Type *Ty) override {
1917 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1918 }
getIntImmCost(const APInt & Imm,Type * Ty,TargetCostKind CostKind)1919 int getIntImmCost(const APInt &Imm, Type *Ty,
1920 TargetCostKind CostKind) override {
1921 return Impl.getIntImmCost(Imm, Ty, CostKind);
1922 }
1923 int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
1924 TargetCostKind CostKind,
1925 Instruction *Inst = nullptr) override {
1926 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
1927 }
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TargetCostKind CostKind)1928 int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1929 Type *Ty, TargetCostKind CostKind) override {
1930 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
1931 }
getNumberOfRegisters(unsigned ClassID)1932 unsigned getNumberOfRegisters(unsigned ClassID) const override {
1933 return Impl.getNumberOfRegisters(ClassID);
1934 }
1935 unsigned getRegisterClassForType(bool Vector,
1936 Type *Ty = nullptr) const override {
1937 return Impl.getRegisterClassForType(Vector, Ty);
1938 }
getRegisterClassName(unsigned ClassID)1939 const char *getRegisterClassName(unsigned ClassID) const override {
1940 return Impl.getRegisterClassName(ClassID);
1941 }
getRegisterBitWidth(bool Vector)1942 unsigned getRegisterBitWidth(bool Vector) const override {
1943 return Impl.getRegisterBitWidth(Vector);
1944 }
getMinVectorRegisterBitWidth()1945 unsigned getMinVectorRegisterBitWidth() override {
1946 return Impl.getMinVectorRegisterBitWidth();
1947 }
getMaxVScale()1948 Optional<unsigned> getMaxVScale() const override {
1949 return Impl.getMaxVScale();
1950 }
shouldMaximizeVectorBandwidth(bool OptSize)1951 bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1952 return Impl.shouldMaximizeVectorBandwidth(OptSize);
1953 }
getMinimumVF(unsigned ElemWidth)1954 unsigned getMinimumVF(unsigned ElemWidth) const override {
1955 return Impl.getMinimumVF(ElemWidth);
1956 }
getMaximumVF(unsigned ElemWidth,unsigned Opcode)1957 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
1958 return Impl.getMaximumVF(ElemWidth, Opcode);
1959 }
shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)1960 bool shouldConsiderAddressTypePromotion(
1961 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1962 return Impl.shouldConsiderAddressTypePromotion(
1963 I, AllowPromotionWithoutCommonHeader);
1964 }
getCacheLineSize()1965 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
getCacheSize(CacheLevel Level)1966 Optional<unsigned> getCacheSize(CacheLevel Level) const override {
1967 return Impl.getCacheSize(Level);
1968 }
getCacheAssociativity(CacheLevel Level)1969 Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
1970 return Impl.getCacheAssociativity(Level);
1971 }
1972
1973 /// Return the preferred prefetch distance in terms of instructions.
1974 ///
getPrefetchDistance()1975 unsigned getPrefetchDistance() const override {
1976 return Impl.getPrefetchDistance();
1977 }
1978
1979 /// Return the minimum stride necessary to trigger software
1980 /// prefetching.
1981 ///
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)1982 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1983 unsigned NumStridedMemAccesses,
1984 unsigned NumPrefetches,
1985 bool HasCall) const override {
1986 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
1987 NumPrefetches, HasCall);
1988 }
1989
1990 /// Return the maximum prefetch distance in terms of loop
1991 /// iterations.
1992 ///
getMaxPrefetchIterationsAhead()1993 unsigned getMaxPrefetchIterationsAhead() const override {
1994 return Impl.getMaxPrefetchIterationsAhead();
1995 }
1996
1997 /// \return True if prefetching should also be done for writes.
enableWritePrefetching()1998 bool enableWritePrefetching() const override {
1999 return Impl.enableWritePrefetching();
2000 }
2001
getMaxInterleaveFactor(unsigned VF)2002 unsigned getMaxInterleaveFactor(unsigned VF) override {
2003 return Impl.getMaxInterleaveFactor(VF);
2004 }
getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)2005 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2006 unsigned &JTSize,
2007 ProfileSummaryInfo *PSI,
2008 BlockFrequencyInfo *BFI) override {
2009 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2010 }
2011 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
2012 TTI::TargetCostKind CostKind,
2013 OperandValueKind Opd1Info,
2014 OperandValueKind Opd2Info,
2015 OperandValueProperties Opd1PropInfo,
2016 OperandValueProperties Opd2PropInfo,
2017 ArrayRef<const Value *> Args,
2018 const Instruction *CxtI = nullptr) override {
2019 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2020 Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2021 }
getShuffleCost(ShuffleKind Kind,VectorType * Tp,int Index,VectorType * SubTp)2022 int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
2023 VectorType *SubTp) override {
2024 return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
2025 }
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)2026 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2027 CastContextHint CCH, TTI::TargetCostKind CostKind,
2028 const Instruction *I) override {
2029 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2030 }
getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)2031 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
2032 unsigned Index) override {
2033 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2034 }
getCFInstrCost(unsigned Opcode,TTI::TargetCostKind CostKind)2035 int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
2036 return Impl.getCFInstrCost(Opcode, CostKind);
2037 }
getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,CmpInst::Predicate VecPred,TTI::TargetCostKind CostKind,const Instruction * I)2038 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2039 CmpInst::Predicate VecPred,
2040 TTI::TargetCostKind CostKind,
2041 const Instruction *I) override {
2042 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2043 }
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)2044 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
2045 return Impl.getVectorInstrCost(Opcode, Val, Index);
2046 }
getMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,const Instruction * I)2047 int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2048 unsigned AddressSpace, TTI::TargetCostKind CostKind,
2049 const Instruction *I) override {
2050 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2051 CostKind, I);
2052 }
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)2053 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2054 unsigned AddressSpace,
2055 TTI::TargetCostKind CostKind) override {
2056 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2057 CostKind);
2058 }
2059 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2060 bool VariableMask, Align Alignment,
2061 TTI::TargetCostKind CostKind,
2062 const Instruction *I = nullptr) override {
2063 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2064 Alignment, CostKind, I);
2065 }
getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,bool UseMaskForCond,bool UseMaskForGaps)2066 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
2067 ArrayRef<unsigned> Indices, Align Alignment,
2068 unsigned AddressSpace,
2069 TTI::TargetCostKind CostKind,
2070 bool UseMaskForCond,
2071 bool UseMaskForGaps) override {
2072 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2073 Alignment, AddressSpace, CostKind,
2074 UseMaskForCond, UseMaskForGaps);
2075 }
getArithmeticReductionCost(unsigned Opcode,VectorType * Ty,bool IsPairwiseForm,TTI::TargetCostKind CostKind)2076 int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2077 bool IsPairwiseForm,
2078 TTI::TargetCostKind CostKind) override {
2079 return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
2080 CostKind);
2081 }
getMinMaxReductionCost(VectorType * Ty,VectorType * CondTy,bool IsPairwiseForm,bool IsUnsigned,TTI::TargetCostKind CostKind)2082 int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
2083 bool IsPairwiseForm, bool IsUnsigned,
2084 TTI::TargetCostKind CostKind) override {
2085 return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
2086 CostKind);
2087 }
2088 InstructionCost getExtendedAddReductionCost(
2089 bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2090 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
2091 return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2092 CostKind);
2093 }
getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)2094 int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2095 TTI::TargetCostKind CostKind) override {
2096 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2097 }
getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)2098 int getCallInstrCost(Function *F, Type *RetTy,
2099 ArrayRef<Type *> Tys,
2100 TTI::TargetCostKind CostKind) override {
2101 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2102 }
getNumberOfParts(Type * Tp)2103 unsigned getNumberOfParts(Type *Tp) override {
2104 return Impl.getNumberOfParts(Tp);
2105 }
getAddressComputationCost(Type * Ty,ScalarEvolution * SE,const SCEV * Ptr)2106 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2107 const SCEV *Ptr) override {
2108 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2109 }
getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)2110 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2111 return Impl.getCostOfKeepingLiveOverCall(Tys);
2112 }
getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)2113 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2114 MemIntrinsicInfo &Info) override {
2115 return Impl.getTgtMemIntrinsic(Inst, Info);
2116 }
getAtomicMemIntrinsicMaxElementSize()2117 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2118 return Impl.getAtomicMemIntrinsicMaxElementSize();
2119 }
getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)2120 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2121 Type *ExpectedType) override {
2122 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2123 }
getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign)2124 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
2125 unsigned SrcAddrSpace, unsigned DestAddrSpace,
2126 unsigned SrcAlign,
2127 unsigned DestAlign) const override {
2128 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2129 DestAddrSpace, SrcAlign, DestAlign);
2130 }
getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign)2131 void getMemcpyLoopResidualLoweringType(
2132 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2133 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2134 unsigned SrcAlign, unsigned DestAlign) const override {
2135 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2136 SrcAddrSpace, DestAddrSpace,
2137 SrcAlign, DestAlign);
2138 }
areInlineCompatible(const Function * Caller,const Function * Callee)2139 bool areInlineCompatible(const Function *Caller,
2140 const Function *Callee) const override {
2141 return Impl.areInlineCompatible(Caller, Callee);
2142 }
areFunctionArgsABICompatible(const Function * Caller,const Function * Callee,SmallPtrSetImpl<Argument * > & Args)2143 bool areFunctionArgsABICompatible(
2144 const Function *Caller, const Function *Callee,
2145 SmallPtrSetImpl<Argument *> &Args) const override {
2146 return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
2147 }
isIndexedLoadLegal(MemIndexedMode Mode,Type * Ty)2148 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2149 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2150 }
isIndexedStoreLegal(MemIndexedMode Mode,Type * Ty)2151 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2152 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2153 }
getLoadStoreVecRegBitWidth(unsigned AddrSpace)2154 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2155 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2156 }
isLegalToVectorizeLoad(LoadInst * LI)2157 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2158 return Impl.isLegalToVectorizeLoad(LI);
2159 }
isLegalToVectorizeStore(StoreInst * SI)2160 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2161 return Impl.isLegalToVectorizeStore(SI);
2162 }
isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)2163 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2164 unsigned AddrSpace) const override {
2165 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2166 AddrSpace);
2167 }
isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)2168 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2169 unsigned AddrSpace) const override {
2170 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2171 AddrSpace);
2172 }
getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)2173 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2174 unsigned ChainSizeInBytes,
2175 VectorType *VecTy) const override {
2176 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2177 }
getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)2178 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2179 unsigned ChainSizeInBytes,
2180 VectorType *VecTy) const override {
2181 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2182 }
useReductionIntrinsic(unsigned Opcode,Type * Ty,ReductionFlags Flags)2183 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
2184 ReductionFlags Flags) const override {
2185 return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
2186 }
preferInLoopReduction(unsigned Opcode,Type * Ty,ReductionFlags Flags)2187 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2188 ReductionFlags Flags) const override {
2189 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2190 }
preferPredicatedReductionSelect(unsigned Opcode,Type * Ty,ReductionFlags Flags)2191 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2192 ReductionFlags Flags) const override {
2193 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2194 }
shouldExpandReduction(const IntrinsicInst * II)2195 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2196 return Impl.shouldExpandReduction(II);
2197 }
2198
getGISelRematGlobalCost()2199 unsigned getGISelRematGlobalCost() const override {
2200 return Impl.getGISelRematGlobalCost();
2201 }
2202
supportsScalableVectors()2203 bool supportsScalableVectors() const override {
2204 return Impl.supportsScalableVectors();
2205 }
2206
hasActiveVectorLength()2207 bool hasActiveVectorLength() const override {
2208 return Impl.hasActiveVectorLength();
2209 }
2210
getInstructionLatency(const Instruction * I)2211 int getInstructionLatency(const Instruction *I) override {
2212 return Impl.getInstructionLatency(I);
2213 }
2214 };
2215
2216 template <typename T>
TargetTransformInfo(T Impl)2217 TargetTransformInfo::TargetTransformInfo(T Impl)
2218 : TTIImpl(new Model<T>(Impl)) {}
2219
2220 /// Analysis pass providing the \c TargetTransformInfo.
2221 ///
2222 /// The core idea of the TargetIRAnalysis is to expose an interface through
2223 /// which LLVM targets can analyze and provide information about the middle
2224 /// end's target-independent IR. This supports use cases such as target-aware
2225 /// cost modeling of IR constructs.
2226 ///
2227 /// This is a function analysis because much of the cost modeling for targets
2228 /// is done in a subtarget specific way and LLVM supports compiling different
2229 /// functions targeting different subtargets in order to support runtime
2230 /// dispatch according to the observed subtarget.
2231 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2232 public:
2233 typedef TargetTransformInfo Result;
2234
2235 /// Default construct a target IR analysis.
2236 ///
2237 /// This will use the module's datalayout to construct a baseline
2238 /// conservative TTI result.
2239 TargetIRAnalysis();
2240
2241 /// Construct an IR analysis pass around a target-provide callback.
2242 ///
2243 /// The callback will be called with a particular function for which the TTI
2244 /// is needed and must return a TTI object for that function.
2245 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2246
2247 // Value semantics. We spell out the constructors for MSVC.
TargetIRAnalysis(const TargetIRAnalysis & Arg)2248 TargetIRAnalysis(const TargetIRAnalysis &Arg)
2249 : TTICallback(Arg.TTICallback) {}
TargetIRAnalysis(TargetIRAnalysis && Arg)2250 TargetIRAnalysis(TargetIRAnalysis &&Arg)
2251 : TTICallback(std::move(Arg.TTICallback)) {}
2252 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
2253 TTICallback = RHS.TTICallback;
2254 return *this;
2255 }
2256 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
2257 TTICallback = std::move(RHS.TTICallback);
2258 return *this;
2259 }
2260
2261 Result run(const Function &F, FunctionAnalysisManager &);
2262
2263 private:
2264 friend AnalysisInfoMixin<TargetIRAnalysis>;
2265 static AnalysisKey Key;
2266
2267 /// The callback used to produce a result.
2268 ///
2269 /// We use a completely opaque callback so that targets can provide whatever
2270 /// mechanism they desire for constructing the TTI for a given function.
2271 ///
2272 /// FIXME: Should we really use std::function? It's relatively inefficient.
2273 /// It might be possible to arrange for even stateful callbacks to outlive
2274 /// the analysis and thus use a function_ref which would be lighter weight.
2275 /// This may also be less error prone as the callback is likely to reference
2276 /// the external TargetMachine, and that reference needs to never dangle.
2277 std::function<Result(const Function &)> TTICallback;
2278
2279 /// Helper function used as the callback in the default constructor.
2280 static Result getDefaultTTI(const Function &F);
2281 };
2282
2283 /// Wrapper pass for TargetTransformInfo.
2284 ///
2285 /// This pass can be constructed from a TTI object which it stores internally
2286 /// and is queried by passes.
2287 class TargetTransformInfoWrapperPass : public ImmutablePass {
2288 TargetIRAnalysis TIRA;
2289 Optional<TargetTransformInfo> TTI;
2290
2291 virtual void anchor();
2292
2293 public:
2294 static char ID;
2295
2296 /// We must provide a default constructor for the pass but it should
2297 /// never be used.
2298 ///
2299 /// Use the constructor below or call one of the creation routines.
2300 TargetTransformInfoWrapperPass();
2301
2302 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2303
2304 TargetTransformInfo &getTTI(const Function &F);
2305 };
2306
2307 /// Create an analysis pass wrapper around a TTI object.
2308 ///
2309 /// This analysis pass just holds the TTI instance and makes it available to
2310 /// clients.
2311 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2312
2313 } // namespace llvm
2314
2315 #endif
2316