1 //===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This pass exposes codegen information to IR-level passes. Every
10 /// transformation that uses codegen information is broken into three parts:
11 /// 1. The IR-level analysis pass.
12 /// 2. The IR-level transformation interface which provides the needed
13 ///    information.
14 /// 3. Codegen-level implementation which uses target-specific hooks.
15 ///
16 /// This file defines #2, which is the interface that IR-level transformations
17 /// use for querying the codegen.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22 #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23 
24 #include "llvm/ADT/SmallBitVector.h"
25 #include "llvm/IR/FMF.h"
26 #include "llvm/IR/InstrTypes.h"
27 #include "llvm/IR/PassManager.h"
28 #include "llvm/Pass.h"
29 #include "llvm/Support/AtomicOrdering.h"
30 #include "llvm/Support/BranchProbability.h"
31 #include "llvm/Support/InstructionCost.h"
32 #include <functional>
33 #include <utility>
34 
35 namespace llvm {
36 
37 namespace Intrinsic {
38 typedef unsigned ID;
39 }
40 
41 class AssumptionCache;
42 class BlockFrequencyInfo;
43 class DominatorTree;
44 class BranchInst;
45 class CallBase;
46 class Function;
47 class GlobalValue;
48 class InstCombiner;
49 class OptimizationRemarkEmitter;
50 class IntrinsicInst;
51 class LoadInst;
52 class Loop;
53 class LoopInfo;
54 class LoopVectorizationLegality;
55 class ProfileSummaryInfo;
56 class RecurrenceDescriptor;
57 class SCEV;
58 class ScalarEvolution;
59 class StoreInst;
60 class SwitchInst;
61 class TargetLibraryInfo;
62 class Type;
63 class User;
64 class Value;
65 class VPIntrinsic;
66 struct KnownBits;
67 template <typename T> class Optional;
68 
69 /// Information about a load/store intrinsic defined by the target.
70 struct MemIntrinsicInfo {
71   /// This is the pointer that the intrinsic is loading from or storing to.
72   /// If this is non-null, then analysis/optimization passes can assume that
73   /// this intrinsic is functionally equivalent to a load/store from this
74   /// pointer.
75   Value *PtrVal = nullptr;
76 
77   // Ordering for atomic operations.
78   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
79 
80   // Same Id is set by the target for corresponding load/store intrinsics.
81   unsigned short MatchingId = 0;
82 
83   bool ReadMem = false;
84   bool WriteMem = false;
85   bool IsVolatile = false;
86 
87   bool isUnordered() const {
88     return (Ordering == AtomicOrdering::NotAtomic ||
89             Ordering == AtomicOrdering::Unordered) &&
90            !IsVolatile;
91   }
92 };
93 
94 /// Attributes of a target dependent hardware loop.
95 struct HardwareLoopInfo {
96   HardwareLoopInfo() = delete;
97   HardwareLoopInfo(Loop *L) : L(L) {}
98   Loop *L = nullptr;
99   BasicBlock *ExitBlock = nullptr;
100   BranchInst *ExitBranch = nullptr;
101   const SCEV *ExitCount = nullptr;
102   IntegerType *CountType = nullptr;
103   Value *LoopDecrement = nullptr; // Decrement the loop counter by this
104                                   // value in every iteration.
105   bool IsNestingLegal = false;    // Can a hardware loop be a parent to
106                                   // another hardware loop?
107   bool CounterInReg = false;      // Should loop counter be updated in
108                                   // the loop via a phi?
109   bool PerformEntryTest = false;  // Generate the intrinsic which also performs
110                                   // icmp ne zero on the loop counter value and
111                                   // produces an i1 to guard the loop entry.
112   bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
113                                DominatorTree &DT, bool ForceNestedLoop = false,
114                                bool ForceHardwareLoopPHI = false);
115   bool canAnalyze(LoopInfo &LI);
116 };
117 
118 class IntrinsicCostAttributes {
119   const IntrinsicInst *II = nullptr;
120   Type *RetTy = nullptr;
121   Intrinsic::ID IID;
122   SmallVector<Type *, 4> ParamTys;
123   SmallVector<const Value *, 4> Arguments;
124   FastMathFlags FMF;
125   // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
126   // arguments and the return value will be computed based on types.
127   InstructionCost ScalarizationCost = InstructionCost::getInvalid();
128 
129 public:
130   IntrinsicCostAttributes(
131       Intrinsic::ID Id, const CallBase &CI,
132       InstructionCost ScalarCost = InstructionCost::getInvalid(),
133       bool TypeBasedOnly = false);
134 
135   IntrinsicCostAttributes(
136       Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
137       FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
138       InstructionCost ScalarCost = InstructionCost::getInvalid());
139 
140   IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
141                           ArrayRef<const Value *> Args);
142 
143   IntrinsicCostAttributes(
144       Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
145       ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
146       const IntrinsicInst *I = nullptr,
147       InstructionCost ScalarCost = InstructionCost::getInvalid());
148 
149   Intrinsic::ID getID() const { return IID; }
150   const IntrinsicInst *getInst() const { return II; }
151   Type *getReturnType() const { return RetTy; }
152   FastMathFlags getFlags() const { return FMF; }
153   InstructionCost getScalarizationCost() const { return ScalarizationCost; }
154   const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
155   const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
156 
157   bool isTypeBasedOnly() const {
158     return Arguments.empty();
159   }
160 
161   bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
162 };
163 
164 enum class PredicationStyle { None, Data, DataAndControlFlow };
165 
166 class TargetTransformInfo;
167 typedef TargetTransformInfo TTI;
168 
169 /// This pass provides access to the codegen interfaces that are needed
170 /// for IR-level transformations.
171 class TargetTransformInfo {
172 public:
173   /// Construct a TTI object using a type implementing the \c Concept
174   /// API below.
175   ///
176   /// This is used by targets to construct a TTI wrapping their target-specific
177   /// implementation that encodes appropriate costs for their target.
178   template <typename T> TargetTransformInfo(T Impl);
179 
180   /// Construct a baseline TTI object using a minimal implementation of
181   /// the \c Concept API below.
182   ///
183   /// The TTI implementation will reflect the information in the DataLayout
184   /// provided if non-null.
185   explicit TargetTransformInfo(const DataLayout &DL);
186 
187   // Provide move semantics.
188   TargetTransformInfo(TargetTransformInfo &&Arg);
189   TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
190 
191   // We need to define the destructor out-of-line to define our sub-classes
192   // out-of-line.
193   ~TargetTransformInfo();
194 
195   /// Handle the invalidation of this information.
196   ///
197   /// When used as a result of \c TargetIRAnalysis this method will be called
198   /// when the function this was computed for changes. When it returns false,
199   /// the information is preserved across those changes.
200   bool invalidate(Function &, const PreservedAnalyses &,
201                   FunctionAnalysisManager::Invalidator &) {
202     // FIXME: We should probably in some way ensure that the subtarget
203     // information for a function hasn't changed.
204     return false;
205   }
206 
207   /// \name Generic Target Information
208   /// @{
209 
210   /// The kind of cost model.
211   ///
212   /// There are several different cost models that can be customized by the
213   /// target. The normalization of each cost model may be target specific.
214   enum TargetCostKind {
215     TCK_RecipThroughput, ///< Reciprocal throughput.
216     TCK_Latency,         ///< The latency of instruction.
217     TCK_CodeSize,        ///< Instruction code size.
218     TCK_SizeAndLatency   ///< The weighted sum of size and latency.
219   };
220 
221   /// Query the cost of a specified instruction.
222   ///
223   /// Clients should use this interface to query the cost of an existing
224   /// instruction. The instruction must have a valid parent (basic block).
225   ///
226   /// Note, this method does not cache the cost calculation and it
227   /// can be expensive in some cases.
228   InstructionCost getInstructionCost(const Instruction *I,
229                                      enum TargetCostKind kind) const {
230     InstructionCost Cost;
231     switch (kind) {
232     case TCK_RecipThroughput:
233       Cost = getInstructionThroughput(I);
234       break;
235     case TCK_Latency:
236       Cost = getInstructionLatency(I);
237       break;
238     case TCK_CodeSize:
239     case TCK_SizeAndLatency:
240       Cost = getUserCost(I, kind);
241       break;
242     }
243     return Cost;
244   }
245 
246   /// Underlying constants for 'cost' values in this interface.
247   ///
248   /// Many APIs in this interface return a cost. This enum defines the
249   /// fundamental values that should be used to interpret (and produce) those
250   /// costs. The costs are returned as an int rather than a member of this
251   /// enumeration because it is expected that the cost of one IR instruction
252   /// may have a multiplicative factor to it or otherwise won't fit directly
253   /// into the enum. Moreover, it is common to sum or average costs which works
254   /// better as simple integral values. Thus this enum only provides constants.
255   /// Also note that the returned costs are signed integers to make it natural
256   /// to add, subtract, and test with zero (a common boundary condition). It is
257   /// not expected that 2^32 is a realistic cost to be modeling at any point.
258   ///
259   /// Note that these costs should usually reflect the intersection of code-size
260   /// cost and execution cost. A free instruction is typically one that folds
261   /// into another instruction. For example, reg-to-reg moves can often be
262   /// skipped by renaming the registers in the CPU, but they still are encoded
263   /// and thus wouldn't be considered 'free' here.
264   enum TargetCostConstants {
265     TCC_Free = 0,     ///< Expected to fold away in lowering.
266     TCC_Basic = 1,    ///< The cost of a typical 'add' instruction.
267     TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
268   };
269 
270   /// Estimate the cost of a GEP operation when lowered.
271   InstructionCost
272   getGEPCost(Type *PointeeType, const Value *Ptr,
273              ArrayRef<const Value *> Operands,
274              TargetCostKind CostKind = TCK_SizeAndLatency) const;
275 
276   /// \returns A value by which our inlining threshold should be multiplied.
277   /// This is primarily used to bump up the inlining threshold wholesale on
278   /// targets where calls are unusually expensive.
279   ///
280   /// TODO: This is a rather blunt instrument.  Perhaps altering the costs of
281   /// individual classes of instructions would be better.
282   unsigned getInliningThresholdMultiplier() const;
283 
284   /// \returns A value to be added to the inlining threshold.
285   unsigned adjustInliningThreshold(const CallBase *CB) const;
286 
287   /// \returns Vector bonus in percent.
288   ///
289   /// Vector bonuses: We want to more aggressively inline vector-dense kernels
290   /// and apply this bonus based on the percentage of vector instructions. A
291   /// bonus is applied if the vector instructions exceed 50% and half that
292   /// amount is applied if it exceeds 10%. Note that these bonuses are some what
293   /// arbitrary and evolved over time by accident as much as because they are
294   /// principled bonuses.
295   /// FIXME: It would be nice to base the bonus values on something more
296   /// scientific. A target may has no bonus on vector instructions.
297   int getInlinerVectorBonusPercent() const;
298 
299   /// \return the expected cost of a memcpy, which could e.g. depend on the
300   /// source/destination type and alignment and the number of bytes copied.
301   InstructionCost getMemcpyCost(const Instruction *I) const;
302 
303   /// \return The estimated number of case clusters when lowering \p 'SI'.
304   /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
305   /// table.
306   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
307                                             unsigned &JTSize,
308                                             ProfileSummaryInfo *PSI,
309                                             BlockFrequencyInfo *BFI) const;
310 
311   /// Estimate the cost of a given IR user when lowered.
312   ///
313   /// This can estimate the cost of either a ConstantExpr or Instruction when
314   /// lowered.
315   ///
316   /// \p Operands is a list of operands which can be a result of transformations
317   /// of the current operands. The number of the operands on the list must equal
318   /// to the number of the current operands the IR user has. Their order on the
319   /// list must be the same as the order of the current operands the IR user
320   /// has.
321   ///
322   /// The returned cost is defined in terms of \c TargetCostConstants, see its
323   /// comments for a detailed explanation of the cost values.
324   InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
325                               TargetCostKind CostKind) const;
326 
327   /// This is a helper function which calls the two-argument getUserCost
328   /// with \p Operands which are the current operands U has.
329   InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const {
330     SmallVector<const Value *, 4> Operands(U->operand_values());
331     return getUserCost(U, Operands, CostKind);
332   }
333 
334   /// If a branch or a select condition is skewed in one direction by more than
335   /// this factor, it is very likely to be predicted correctly.
336   BranchProbability getPredictableBranchThreshold() const;
337 
338   /// Return true if branch divergence exists.
339   ///
340   /// Branch divergence has a significantly negative impact on GPU performance
341   /// when threads in the same wavefront take different paths due to conditional
342   /// branches.
343   bool hasBranchDivergence() const;
344 
345   /// Return true if the target prefers to use GPU divergence analysis to
346   /// replace the legacy version.
347   bool useGPUDivergenceAnalysis() const;
348 
349   /// Returns whether V is a source of divergence.
350   ///
351   /// This function provides the target-dependent information for
352   /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
353   /// first builds the dependency graph, and then runs the reachability
354   /// algorithm starting with the sources of divergence.
355   bool isSourceOfDivergence(const Value *V) const;
356 
357   // Returns true for the target specific
358   // set of operations which produce uniform result
359   // even taking non-uniform arguments
360   bool isAlwaysUniform(const Value *V) const;
361 
362   /// Returns the address space ID for a target's 'flat' address space. Note
363   /// this is not necessarily the same as addrspace(0), which LLVM sometimes
364   /// refers to as the generic address space. The flat address space is a
365   /// generic address space that can be used access multiple segments of memory
366   /// with different address spaces. Access of a memory location through a
367   /// pointer with this address space is expected to be legal but slower
368   /// compared to the same memory location accessed through a pointer with a
369   /// different address space.
370   //
371   /// This is for targets with different pointer representations which can
372   /// be converted with the addrspacecast instruction. If a pointer is converted
373   /// to this address space, optimizations should attempt to replace the access
374   /// with the source address space.
375   ///
376   /// \returns ~0u if the target does not have such a flat address space to
377   /// optimize away.
378   unsigned getFlatAddressSpace() const;
379 
380   /// Return any intrinsic address operand indexes which may be rewritten if
381   /// they use a flat address space pointer.
382   ///
383   /// \returns true if the intrinsic was handled.
384   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
385                                   Intrinsic::ID IID) const;
386 
387   bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
388 
389   /// Return true if globals in this address space can have initializers other
390   /// than `undef`.
391   bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
392 
393   unsigned getAssumedAddrSpace(const Value *V) const;
394 
395   std::pair<const Value *, unsigned>
396   getPredicatedAddrSpace(const Value *V) const;
397 
398   /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
399   /// NewV, which has a different address space. This should happen for every
400   /// operand index that collectFlatAddressOperands returned for the intrinsic.
401   /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
402   /// new value (which may be the original \p II with modified operands).
403   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
404                                           Value *NewV) const;
405 
406   /// Test whether calls to a function lower to actual program function
407   /// calls.
408   ///
409   /// The idea is to test whether the program is likely to require a 'call'
410   /// instruction or equivalent in order to call the given function.
411   ///
412   /// FIXME: It's not clear that this is a good or useful query API. Client's
413   /// should probably move to simpler cost metrics using the above.
414   /// Alternatively, we could split the cost interface into distinct code-size
415   /// and execution-speed costs. This would allow modelling the core of this
416   /// query more accurately as a call is a single small instruction, but
417   /// incurs significant execution cost.
418   bool isLoweredToCall(const Function *F) const;
419 
420   struct LSRCost {
421     /// TODO: Some of these could be merged. Also, a lexical ordering
422     /// isn't always optimal.
423     unsigned Insns;
424     unsigned NumRegs;
425     unsigned AddRecCost;
426     unsigned NumIVMuls;
427     unsigned NumBaseAdds;
428     unsigned ImmCost;
429     unsigned SetupCost;
430     unsigned ScaleCost;
431   };
432 
433   /// Parameters that control the generic loop unrolling transformation.
434   struct UnrollingPreferences {
435     /// The cost threshold for the unrolled loop. Should be relative to the
436     /// getUserCost values returned by this API, and the expectation is that
437     /// the unrolled loop's instructions when run through that interface should
438     /// not exceed this cost. However, this is only an estimate. Also, specific
439     /// loops may be unrolled even with a cost above this threshold if deemed
440     /// profitable. Set this to UINT_MAX to disable the loop body cost
441     /// restriction.
442     unsigned Threshold;
443     /// If complete unrolling will reduce the cost of the loop, we will boost
444     /// the Threshold by a certain percent to allow more aggressive complete
445     /// unrolling. This value provides the maximum boost percentage that we
446     /// can apply to Threshold (The value should be no less than 100).
447     /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
448     ///                                    MaxPercentThresholdBoost / 100)
449     /// E.g. if complete unrolling reduces the loop execution time by 50%
450     /// then we boost the threshold by the factor of 2x. If unrolling is not
451     /// expected to reduce the running time, then we do not increase the
452     /// threshold.
453     unsigned MaxPercentThresholdBoost;
454     /// The cost threshold for the unrolled loop when optimizing for size (set
455     /// to UINT_MAX to disable).
456     unsigned OptSizeThreshold;
457     /// The cost threshold for the unrolled loop, like Threshold, but used
458     /// for partial/runtime unrolling (set to UINT_MAX to disable).
459     unsigned PartialThreshold;
460     /// The cost threshold for the unrolled loop when optimizing for size, like
461     /// OptSizeThreshold, but used for partial/runtime unrolling (set to
462     /// UINT_MAX to disable).
463     unsigned PartialOptSizeThreshold;
464     /// A forced unrolling factor (the number of concatenated bodies of the
465     /// original loop in the unrolled loop body). When set to 0, the unrolling
466     /// transformation will select an unrolling factor based on the current cost
467     /// threshold and other factors.
468     unsigned Count;
469     /// Default unroll count for loops with run-time trip count.
470     unsigned DefaultUnrollRuntimeCount;
471     // Set the maximum unrolling factor. The unrolling factor may be selected
472     // using the appropriate cost threshold, but may not exceed this number
473     // (set to UINT_MAX to disable). This does not apply in cases where the
474     // loop is being fully unrolled.
475     unsigned MaxCount;
476     /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
477     /// applies even if full unrolling is selected. This allows a target to fall
478     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
479     unsigned FullUnrollMaxCount;
480     // Represents number of instructions optimized when "back edge"
481     // becomes "fall through" in unrolled loop.
482     // For now we count a conditional branch on a backedge and a comparison
483     // feeding it.
484     unsigned BEInsns;
485     /// Allow partial unrolling (unrolling of loops to expand the size of the
486     /// loop body, not only to eliminate small constant-trip-count loops).
487     bool Partial;
488     /// Allow runtime unrolling (unrolling of loops to expand the size of the
489     /// loop body even when the number of loop iterations is not known at
490     /// compile time).
491     bool Runtime;
492     /// Allow generation of a loop remainder (extra iterations after unroll).
493     bool AllowRemainder;
494     /// Allow emitting expensive instructions (such as divisions) when computing
495     /// the trip count of a loop for runtime unrolling.
496     bool AllowExpensiveTripCount;
497     /// Apply loop unroll on any kind of loop
498     /// (mainly to loops that fail runtime unrolling).
499     bool Force;
500     /// Allow using trip count upper bound to unroll loops.
501     bool UpperBound;
502     /// Allow unrolling of all the iterations of the runtime loop remainder.
503     bool UnrollRemainder;
504     /// Allow unroll and jam. Used to enable unroll and jam for the target.
505     bool UnrollAndJam;
506     /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
507     /// value above is used during unroll and jam for the outer loop size.
508     /// This value is used in the same manner to limit the size of the inner
509     /// loop.
510     unsigned UnrollAndJamInnerLoopThreshold;
511     /// Don't allow loop unrolling to simulate more than this number of
512     /// iterations when checking full unroll profitability
513     unsigned MaxIterationsCountToAnalyze;
514   };
515 
516   /// Get target-customized preferences for the generic loop unrolling
517   /// transformation. The caller will initialize UP with the current
518   /// target-independent defaults.
519   void getUnrollingPreferences(Loop *L, ScalarEvolution &,
520                                UnrollingPreferences &UP,
521                                OptimizationRemarkEmitter *ORE) const;
522 
523   /// Query the target whether it would be profitable to convert the given loop
524   /// into a hardware loop.
525   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
526                                 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
527                                 HardwareLoopInfo &HWLoopInfo) const;
528 
529   /// Query the target whether it would be prefered to create a predicated
530   /// vector loop, which can avoid the need to emit a scalar epilogue loop.
531   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
532                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
533                                    DominatorTree *DT,
534                                    LoopVectorizationLegality *LVL) const;
535 
536   /// Query the target whether lowering of the llvm.get.active.lane.mask
537   /// intrinsic is supported and how the mask should be used. A return value
538   /// of PredicationStyle::Data indicates the mask is used as data only,
539   /// whereas PredicationStyle::DataAndControlFlow indicates we should also use
540   /// the mask for control flow in the loop. If unsupported the return value is
541   /// PredicationStyle::None.
542   PredicationStyle emitGetActiveLaneMask() const;
543 
544   // Parameters that control the loop peeling transformation
545   struct PeelingPreferences {
546     /// A forced peeling factor (the number of bodied of the original loop
547     /// that should be peeled off before the loop body). When set to 0, the
548     /// a peeling factor based on profile information and other factors.
549     unsigned PeelCount;
550     /// Allow peeling off loop iterations.
551     bool AllowPeeling;
552     /// Allow peeling off loop iterations for loop nests.
553     bool AllowLoopNestsPeeling;
554     /// Allow peeling basing on profile. Uses to enable peeling off all
555     /// iterations basing on provided profile.
556     /// If the value is true the peeling cost model can decide to peel only
557     /// some iterations and in this case it will set this to false.
558     bool PeelProfiledIterations;
559   };
560 
561   /// Get target-customized preferences for the generic loop peeling
562   /// transformation. The caller will initialize \p PP with the current
563   /// target-independent defaults with information from \p L and \p SE.
564   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
565                              PeelingPreferences &PP) const;
566 
567   /// Targets can implement their own combinations for target-specific
568   /// intrinsics. This function will be called from the InstCombine pass every
569   /// time a target-specific intrinsic is encountered.
570   ///
571   /// \returns None to not do anything target specific or a value that will be
572   /// returned from the InstCombiner. It is possible to return null and stop
573   /// further processing of the intrinsic by returning nullptr.
574   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
575                                                IntrinsicInst &II) const;
576   /// Can be used to implement target-specific instruction combining.
577   /// \see instCombineIntrinsic
578   Optional<Value *>
579   simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
580                                    APInt DemandedMask, KnownBits &Known,
581                                    bool &KnownBitsComputed) const;
582   /// Can be used to implement target-specific instruction combining.
583   /// \see instCombineIntrinsic
584   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
585       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
586       APInt &UndefElts2, APInt &UndefElts3,
587       std::function<void(Instruction *, unsigned, APInt, APInt &)>
588           SimplifyAndSetOp) const;
589   /// @}
590 
591   /// \name Scalar Target Information
592   /// @{
593 
594   /// Flags indicating the kind of support for population count.
595   ///
596   /// Compared to the SW implementation, HW support is supposed to
597   /// significantly boost the performance when the population is dense, and it
598   /// may or may not degrade performance if the population is sparse. A HW
599   /// support is considered as "Fast" if it can outperform, or is on a par
600   /// with, SW implementation when the population is sparse; otherwise, it is
601   /// considered as "Slow".
602   enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
603 
604   /// Return true if the specified immediate is legal add immediate, that
605   /// is the target has add instructions which can add a register with the
606   /// immediate without having to materialize the immediate into a register.
607   bool isLegalAddImmediate(int64_t Imm) const;
608 
609   /// Return true if the specified immediate is legal icmp immediate,
610   /// that is the target has icmp instructions which can compare a register
611   /// against the immediate without having to materialize the immediate into a
612   /// register.
613   bool isLegalICmpImmediate(int64_t Imm) const;
614 
615   /// Return true if the addressing mode represented by AM is legal for
616   /// this target, for a load/store of the specified type.
617   /// The type may be VoidTy, in which case only return true if the addressing
618   /// mode is legal for a load/store of any legal type.
619   /// If target returns true in LSRWithInstrQueries(), I may be valid.
620   /// TODO: Handle pre/postinc as well.
621   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
622                              bool HasBaseReg, int64_t Scale,
623                              unsigned AddrSpace = 0,
624                              Instruction *I = nullptr) const;
625 
626   /// Return true if LSR cost of C1 is lower than C1.
627   bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
628                      const TargetTransformInfo::LSRCost &C2) const;
629 
630   /// Return true if LSR major cost is number of registers. Targets which
631   /// implement their own isLSRCostLess and unset number of registers as major
632   /// cost should return false, otherwise return true.
633   bool isNumRegsMajorCostOfLSR() const;
634 
635   /// \returns true if LSR should not optimize a chain that includes \p I.
636   bool isProfitableLSRChainElement(Instruction *I) const;
637 
638   /// Return true if the target can fuse a compare and branch.
639   /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
640   /// calculation for the instructions in a loop.
641   bool canMacroFuseCmp() const;
642 
643   /// Return true if the target can save a compare for loop count, for example
644   /// hardware loop saves a compare.
645   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
646                   DominatorTree *DT, AssumptionCache *AC,
647                   TargetLibraryInfo *LibInfo) const;
648 
649   enum AddressingModeKind {
650     AMK_PreIndexed,
651     AMK_PostIndexed,
652     AMK_None
653   };
654 
655   /// Return the preferred addressing mode LSR should make efforts to generate.
656   AddressingModeKind getPreferredAddressingMode(const Loop *L,
657                                                 ScalarEvolution *SE) const;
658 
659   /// Return true if the target supports masked store.
660   bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
661   /// Return true if the target supports masked load.
662   bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
663 
664   /// Return true if the target supports nontemporal store.
665   bool isLegalNTStore(Type *DataType, Align Alignment) const;
666   /// Return true if the target supports nontemporal load.
667   bool isLegalNTLoad(Type *DataType, Align Alignment) const;
668 
669   /// \Returns true if the target supports broadcasting a load to a vector of
670   /// type <NumElements x ElementTy>.
671   bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
672 
673   /// Return true if the target supports masked scatter.
674   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
675   /// Return true if the target supports masked gather.
676   bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
677   /// Return true if the target forces scalarizing of llvm.masked.gather
678   /// intrinsics.
679   bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
680   /// Return true if the target forces scalarizing of llvm.masked.scatter
681   /// intrinsics.
682   bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
683 
684   /// Return true if the target supports masked compress store.
685   bool isLegalMaskedCompressStore(Type *DataType) const;
686   /// Return true if the target supports masked expand load.
687   bool isLegalMaskedExpandLoad(Type *DataType) const;
688 
689   /// Return true if this is an alternating opcode pattern that can be lowered
690   /// to a single instruction on the target. In X86 this is for the addsub
691   /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
692   /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
693   /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
694   /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
695   /// \p VecTy is the vector type of the instruction to be generated.
696   bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
697                        const SmallBitVector &OpcodeMask) const;
698 
699   /// Return true if we should be enabling ordered reductions for the target.
700   bool enableOrderedReductions() const;
701 
702   /// Return true if the target has a unified operation to calculate division
703   /// and remainder. If so, the additional implicit multiplication and
704   /// subtraction required to calculate a remainder from division are free. This
705   /// can enable more aggressive transformations for division and remainder than
706   /// would typically be allowed using throughput or size cost models.
707   bool hasDivRemOp(Type *DataType, bool IsSigned) const;
708 
709   /// Return true if the given instruction (assumed to be a memory access
710   /// instruction) has a volatile variant. If that's the case then we can avoid
711   /// addrspacecast to generic AS for volatile loads/stores. Default
712   /// implementation returns false, which prevents address space inference for
713   /// volatile loads/stores.
714   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
715 
716   /// Return true if target doesn't mind addresses in vectors.
717   bool prefersVectorizedAddressing() const;
718 
719   /// Return the cost of the scaling factor used in the addressing
720   /// mode represented by AM for this target, for a load/store
721   /// of the specified type.
722   /// If the AM is supported, the return value must be >= 0.
723   /// If the AM is not supported, it returns a negative value.
724   /// TODO: Handle pre/postinc as well.
725   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
726                                        int64_t BaseOffset, bool HasBaseReg,
727                                        int64_t Scale,
728                                        unsigned AddrSpace = 0) const;
729 
730   /// Return true if the loop strength reduce pass should make
731   /// Instruction* based TTI queries to isLegalAddressingMode(). This is
732   /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
733   /// immediate offset and no index register.
734   bool LSRWithInstrQueries() const;
735 
736   /// Return true if it's free to truncate a value of type Ty1 to type
737   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
738   /// by referencing its sub-register AX.
739   bool isTruncateFree(Type *Ty1, Type *Ty2) const;
740 
741   /// Return true if it is profitable to hoist instruction in the
742   /// then/else to before if.
743   bool isProfitableToHoist(Instruction *I) const;
744 
745   bool useAA() const;
746 
747   /// Return true if this type is legal.
748   bool isTypeLegal(Type *Ty) const;
749 
750   /// Returns the estimated number of registers required to represent \p Ty.
751   unsigned getRegUsageForType(Type *Ty) const;
752 
753   /// Return true if switches should be turned into lookup tables for the
754   /// target.
755   bool shouldBuildLookupTables() const;
756 
757   /// Return true if switches should be turned into lookup tables
758   /// containing this constant value for the target.
759   bool shouldBuildLookupTablesForConstant(Constant *C) const;
760 
761   /// Return true if lookup tables should be turned into relative lookup tables.
762   bool shouldBuildRelLookupTables() const;
763 
764   /// Return true if the input function which is cold at all call sites,
765   ///  should use coldcc calling convention.
766   bool useColdCCForColdCall(Function &F) const;
767 
768   /// Estimate the overhead of scalarizing an instruction. Insert and Extract
769   /// are set if the demanded result elements need to be inserted and/or
770   /// extracted from vectors.
771   InstructionCost getScalarizationOverhead(VectorType *Ty,
772                                            const APInt &DemandedElts,
773                                            bool Insert, bool Extract) const;
774 
775   /// Estimate the overhead of scalarizing an instructions unique
776   /// non-constant operands. The (potentially vector) types to use for each of
777   /// argument are passes via Tys.
778   InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
779                                                    ArrayRef<Type *> Tys) const;
780 
781   /// If target has efficient vector element load/store instructions, it can
782   /// return true here so that insertion/extraction costs are not added to
783   /// the scalarization cost of a load/store.
784   bool supportsEfficientVectorElementLoadStore() const;
785 
786   /// If the target supports tail calls.
787   bool supportsTailCalls() const;
788 
789   /// Don't restrict interleaved unrolling to small loops.
790   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
791 
792   /// Returns options for expansion of memcmp. IsZeroCmp is
793   // true if this is the expansion of memcmp(p1, p2, s) == 0.
794   struct MemCmpExpansionOptions {
795     // Return true if memcmp expansion is enabled.
796     operator bool() const { return MaxNumLoads > 0; }
797 
798     // Maximum number of load operations.
799     unsigned MaxNumLoads = 0;
800 
801     // The list of available load sizes (in bytes), sorted in decreasing order.
802     SmallVector<unsigned, 8> LoadSizes;
803 
804     // For memcmp expansion when the memcmp result is only compared equal or
805     // not-equal to 0, allow up to this number of load pairs per block. As an
806     // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
807     //   a0 = load2bytes &a[0]
808     //   b0 = load2bytes &b[0]
809     //   a2 = load1byte  &a[2]
810     //   b2 = load1byte  &b[2]
811     //   r  = cmp eq (a0 ^ b0 | a2 ^ b2), 0
812     unsigned NumLoadsPerBlock = 1;
813 
814     // Set to true to allow overlapping loads. For example, 7-byte compares can
815     // be done with two 4-byte compares instead of 4+2+1-byte compares. This
816     // requires all loads in LoadSizes to be doable in an unaligned way.
817     bool AllowOverlappingLoads = false;
818   };
819   MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
820                                                bool IsZeroCmp) const;
821 
822   /// Enable matching of interleaved access groups.
823   bool enableInterleavedAccessVectorization() const;
824 
825   /// Enable matching of interleaved access groups that contain predicated
826   /// accesses or gaps and therefore vectorized using masked
827   /// vector loads/stores.
828   bool enableMaskedInterleavedAccessVectorization() const;
829 
830   /// Indicate that it is potentially unsafe to automatically vectorize
831   /// floating-point operations because the semantics of vector and scalar
832   /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
833   /// does not support IEEE-754 denormal numbers, while depending on the
834   /// platform, scalar floating-point math does.
835   /// This applies to floating-point math operations and calls, not memory
836   /// operations, shuffles, or casts.
837   bool isFPVectorizationPotentiallyUnsafe() const;
838 
839   /// Determine if the target supports unaligned memory accesses.
840   bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
841                                       unsigned AddressSpace = 0,
842                                       Align Alignment = Align(1),
843                                       bool *Fast = nullptr) const;
844 
845   /// Return hardware support for population count.
846   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
847 
848   /// Return true if the hardware has a fast square-root instruction.
849   bool haveFastSqrt(Type *Ty) const;
850 
851   /// Return true if it is faster to check if a floating-point value is NaN
852   /// (or not-NaN) versus a comparison against a constant FP zero value.
853   /// Targets should override this if materializing a 0.0 for comparison is
854   /// generally as cheap as checking for ordered/unordered.
855   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
856 
857   /// Return the expected cost of supporting the floating point operation
858   /// of the specified type.
859   InstructionCost getFPOpCost(Type *Ty) const;
860 
861   /// Return the expected cost of materializing for the given integer
862   /// immediate of the specified type.
863   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
864                                 TargetCostKind CostKind) const;
865 
866   /// Return the expected cost of materialization for the given integer
867   /// immediate of the specified type for a given instruction. The cost can be
868   /// zero if the immediate can be folded into the specified instruction.
869   InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
870                                     const APInt &Imm, Type *Ty,
871                                     TargetCostKind CostKind,
872                                     Instruction *Inst = nullptr) const;
873   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
874                                       const APInt &Imm, Type *Ty,
875                                       TargetCostKind CostKind) const;
876 
877   /// Return the expected cost for the given integer when optimising
878   /// for size. This is different than the other integer immediate cost
879   /// functions in that it is subtarget agnostic. This is useful when you e.g.
880   /// target one ISA such as Aarch32 but smaller encodings could be possible
881   /// with another such as Thumb. This return value is used as a penalty when
882   /// the total costs for a constant is calculated (the bigger the cost, the
883   /// more beneficial constant hoisting is).
884   InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
885                                         const APInt &Imm, Type *Ty) const;
886   /// @}
887 
888   /// \name Vector Target Information
889   /// @{
890 
891   /// The various kinds of shuffle patterns for vector queries.
892   enum ShuffleKind {
893     SK_Broadcast,        ///< Broadcast element 0 to all other elements.
894     SK_Reverse,          ///< Reverse the order of the vector.
895     SK_Select,           ///< Selects elements from the corresponding lane of
896                          ///< either source operand. This is equivalent to a
897                          ///< vector select with a constant condition operand.
898     SK_Transpose,        ///< Transpose two vectors.
899     SK_InsertSubvector,  ///< InsertSubvector. Index indicates start offset.
900     SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
901     SK_PermuteTwoSrc,    ///< Merge elements from two source vectors into one
902                          ///< with any shuffle mask.
903     SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
904                          ///< shuffle mask.
905     SK_Splice            ///< Concatenates elements from the first input vector
906                          ///< with elements of the second input vector. Returning
907                          ///< a vector of the same type as the input vectors.
908   };
909 
910   /// Additional information about an operand's possible values.
911   enum OperandValueKind {
912     OK_AnyValue,               // Operand can have any value.
913     OK_UniformValue,           // Operand is uniform (splat of a value).
914     OK_UniformConstantValue,   // Operand is uniform constant.
915     OK_NonUniformConstantValue // Operand is a non uniform constant value.
916   };
917 
918   /// Additional properties of an operand's values.
919   enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
920 
921   /// \return the number of registers in the target-provided register class.
922   unsigned getNumberOfRegisters(unsigned ClassID) const;
923 
924   /// \return the target-provided register class ID for the provided type,
925   /// accounting for type promotion and other type-legalization techniques that
926   /// the target might apply. However, it specifically does not account for the
927   /// scalarization or splitting of vector types. Should a vector type require
928   /// scalarization or splitting into multiple underlying vector registers, that
929   /// type should be mapped to a register class containing no registers.
930   /// Specifically, this is designed to provide a simple, high-level view of the
931   /// register allocation later performed by the backend. These register classes
932   /// don't necessarily map onto the register classes used by the backend.
933   /// FIXME: It's not currently possible to determine how many registers
934   /// are used by the provided type.
935   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
936 
937   /// \return the target-provided register class name
938   const char *getRegisterClassName(unsigned ClassID) const;
939 
940   enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
941 
942   /// \return The width of the largest scalar or vector register type.
943   TypeSize getRegisterBitWidth(RegisterKind K) const;
944 
945   /// \return The width of the smallest vector register type.
946   unsigned getMinVectorRegisterBitWidth() const;
947 
948   /// \return The maximum value of vscale if the target specifies an
949   ///  architectural maximum vector length, and None otherwise.
950   Optional<unsigned> getMaxVScale() const;
951 
952   /// \return the value of vscale to tune the cost model for.
953   Optional<unsigned> getVScaleForTuning() const;
954 
955   /// \return True if the vectorization factor should be chosen to
956   /// make the vector of the smallest element type match the size of a
957   /// vector register. For wider element types, this could result in
958   /// creating vectors that span multiple vector registers.
959   /// If false, the vectorization factor will be chosen based on the
960   /// size of the widest element type.
961   /// \p K Register Kind for vectorization.
962   bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
963 
964   /// \return The minimum vectorization factor for types of given element
965   /// bit width, or 0 if there is no minimum VF. The returned value only
966   /// applies when shouldMaximizeVectorBandwidth returns true.
967   /// If IsScalable is true, the returned ElementCount must be a scalable VF.
968   ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
969 
970   /// \return The maximum vectorization factor for types of given element
971   /// bit width and opcode, or 0 if there is no maximum VF.
972   /// Currently only used by the SLP vectorizer.
973   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
974 
975   /// \return The minimum vectorization factor for the store instruction. Given
976   /// the initial estimation of the minimum vector factor and store value type,
977   /// it tries to find possible lowest VF, which still might be profitable for
978   /// the vectorization.
979   /// \param VF Initial estimation of the minimum vector factor.
980   /// \param ScalarMemTy Scalar memory type of the store operation.
981   /// \param ScalarValTy Scalar type of the stored value.
982   /// Currently only used by the SLP vectorizer.
983   unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
984                              Type *ScalarValTy) const;
985 
986   /// \return True if it should be considered for address type promotion.
987   /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
988   /// profitable without finding other extensions fed by the same input.
989   bool shouldConsiderAddressTypePromotion(
990       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
991 
992   /// \return The size of a cache line in bytes.
993   unsigned getCacheLineSize() const;
994 
995   /// The possible cache levels
996   enum class CacheLevel {
997     L1D, // The L1 data cache
998     L2D, // The L2 data cache
999 
1000     // We currently do not model L3 caches, as their sizes differ widely between
1001     // microarchitectures. Also, we currently do not have a use for L3 cache
1002     // size modeling yet.
1003   };
1004 
1005   /// \return The size of the cache level in bytes, if available.
1006   Optional<unsigned> getCacheSize(CacheLevel Level) const;
1007 
1008   /// \return The associativity of the cache level, if available.
1009   Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1010 
1011   /// \return How much before a load we should place the prefetch
1012   /// instruction.  This is currently measured in number of
1013   /// instructions.
1014   unsigned getPrefetchDistance() const;
1015 
1016   /// Some HW prefetchers can handle accesses up to a certain constant stride.
1017   /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1018   /// and the arguments provided are meant to serve as a basis for deciding this
1019   /// for a particular loop.
1020   ///
1021   /// \param NumMemAccesses        Number of memory accesses in the loop.
1022   /// \param NumStridedMemAccesses Number of the memory accesses that
1023   ///                              ScalarEvolution could find a known stride
1024   ///                              for.
1025   /// \param NumPrefetches         Number of software prefetches that will be
1026   ///                              emitted as determined by the addresses
1027   ///                              involved and the cache line size.
1028   /// \param HasCall               True if the loop contains a call.
1029   ///
1030   /// \return This is the minimum stride in bytes where it makes sense to start
1031   ///         adding SW prefetches. The default is 1, i.e. prefetch with any
1032   ///         stride.
1033   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1034                                 unsigned NumStridedMemAccesses,
1035                                 unsigned NumPrefetches, bool HasCall) const;
1036 
1037   /// \return The maximum number of iterations to prefetch ahead.  If
1038   /// the required number of iterations is more than this number, no
1039   /// prefetching is performed.
1040   unsigned getMaxPrefetchIterationsAhead() const;
1041 
1042   /// \return True if prefetching should also be done for writes.
1043   bool enableWritePrefetching() const;
1044 
1045   /// \return The maximum interleave factor that any transform should try to
1046   /// perform for this target. This number depends on the level of parallelism
1047   /// and the number of execution units in the CPU.
1048   unsigned getMaxInterleaveFactor(unsigned VF) const;
1049 
1050   /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1051   static OperandValueKind getOperandInfo(const Value *V,
1052                                          OperandValueProperties &OpProps);
1053 
1054   /// This is an approximation of reciprocal throughput of a math/logic op.
1055   /// A higher cost indicates less expected throughput.
1056   /// From Agner Fog's guides, reciprocal throughput is "the average number of
1057   /// clock cycles per instruction when the instructions are not part of a
1058   /// limiting dependency chain."
1059   /// Therefore, costs should be scaled to account for multiple execution units
1060   /// on the target that can process this type of instruction. For example, if
1061   /// there are 5 scalar integer units and 2 vector integer units that can
1062   /// calculate an 'add' in a single cycle, this model should indicate that the
1063   /// cost of the vector add instruction is 2.5 times the cost of the scalar
1064   /// add instruction.
1065   /// \p Args is an optional argument which holds the instruction operands
1066   /// values so the TTI can analyze those values searching for special
1067   /// cases or optimizations based on those values.
1068   /// \p CxtI is the optional original context instruction, if one exists, to
1069   /// provide even more information.
1070   InstructionCost getArithmeticInstrCost(
1071       unsigned Opcode, Type *Ty,
1072       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1073       OperandValueKind Opd1Info = OK_AnyValue,
1074       OperandValueKind Opd2Info = OK_AnyValue,
1075       OperandValueProperties Opd1PropInfo = OP_None,
1076       OperandValueProperties Opd2PropInfo = OP_None,
1077       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1078       const Instruction *CxtI = nullptr) const;
1079 
1080   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1081   /// The exact mask may be passed as Mask, or else the array will be empty.
1082   /// The index and subtype parameters are used by the subvector insertion and
1083   /// extraction shuffle kinds to show the insert/extract point and the type of
1084   /// the subvector being inserted/extracted. The operands of the shuffle can be
1085   /// passed through \p Args, which helps improve the cost estimation in some
1086   /// cases, like in broadcast loads.
1087   /// NOTE: For subvector extractions Tp represents the source type.
1088   InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1089                                  ArrayRef<int> Mask = None, int Index = 0,
1090                                  VectorType *SubTp = nullptr,
1091                                  ArrayRef<const Value *> Args = None) const;
1092 
1093   /// Represents a hint about the context in which a cast is used.
1094   ///
1095   /// For zext/sext, the context of the cast is the operand, which must be a
1096   /// load of some kind. For trunc, the context is of the cast is the single
1097   /// user of the instruction, which must be a store of some kind.
1098   ///
1099   /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1100   /// type of cast it's dealing with, as not every cast is equal. For instance,
1101   /// the zext of a load may be free, but the zext of an interleaving load can
1102   //// be (very) expensive!
1103   ///
1104   /// See \c getCastContextHint to compute a CastContextHint from a cast
1105   /// Instruction*. Callers can use it if they don't need to override the
1106   /// context and just want it to be calculated from the instruction.
1107   ///
1108   /// FIXME: This handles the types of load/store that the vectorizer can
1109   /// produce, which are the cases where the context instruction is most
1110   /// likely to be incorrect. There are other situations where that can happen
1111   /// too, which might be handled here but in the long run a more general
1112   /// solution of costing multiple instructions at the same times may be better.
1113   enum class CastContextHint : uint8_t {
1114     None,          ///< The cast is not used with a load/store of any kind.
1115     Normal,        ///< The cast is used with a normal load/store.
1116     Masked,        ///< The cast is used with a masked load/store.
1117     GatherScatter, ///< The cast is used with a gather/scatter.
1118     Interleave,    ///< The cast is used with an interleaved load/store.
1119     Reversed,      ///< The cast is used with a reversed load/store.
1120   };
1121 
1122   /// Calculates a CastContextHint from \p I.
1123   /// This should be used by callers of getCastInstrCost if they wish to
1124   /// determine the context from some instruction.
1125   /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1126   /// or if it's another type of cast.
1127   static CastContextHint getCastContextHint(const Instruction *I);
1128 
1129   /// \return The expected cost of cast instructions, such as bitcast, trunc,
1130   /// zext, etc. If there is an existing instruction that holds Opcode, it
1131   /// may be passed in the 'I' parameter.
1132   InstructionCost
1133   getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1134                    TTI::CastContextHint CCH,
1135                    TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1136                    const Instruction *I = nullptr) const;
1137 
1138   /// \return The expected cost of a sign- or zero-extended vector extract. Use
1139   /// -1 to indicate that there is no information about the index value.
1140   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1141                                            VectorType *VecTy,
1142                                            unsigned Index = -1) const;
1143 
1144   /// \return The expected cost of control-flow related instructions such as
1145   /// Phi, Ret, Br, Switch.
1146   InstructionCost
1147   getCFInstrCost(unsigned Opcode,
1148                  TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1149                  const Instruction *I = nullptr) const;
1150 
1151   /// \returns The expected cost of compare and select instructions. If there
1152   /// is an existing instruction that holds Opcode, it may be passed in the
1153   /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1154   /// is using a compare with the specified predicate as condition. When vector
1155   /// types are passed, \p VecPred must be used for all lanes.
1156   InstructionCost
1157   getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1158                      CmpInst::Predicate VecPred,
1159                      TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1160                      const Instruction *I = nullptr) const;
1161 
1162   /// \return The expected cost of vector Insert and Extract.
1163   /// Use -1 to indicate that there is no information on the index value.
1164   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1165                                      unsigned Index = -1) const;
1166 
1167   /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1168   /// \p ReplicationFactor times.
1169   ///
1170   /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1171   ///   <0,0,0,1,1,1,2,2,2,3,3,3>
1172   InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1173                                             int VF,
1174                                             const APInt &DemandedDstElts,
1175                                             TTI::TargetCostKind CostKind);
1176 
1177   /// \return The cost of Load and Store instructions.
1178   InstructionCost
1179   getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1180                   unsigned AddressSpace,
1181                   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1182                   const Instruction *I = nullptr) const;
1183 
1184   /// \return The cost of VP Load and Store instructions.
1185   InstructionCost
1186   getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1187                     unsigned AddressSpace,
1188                     TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1189                     const Instruction *I = nullptr) const;
1190 
1191   /// \return The cost of masked Load and Store instructions.
1192   InstructionCost getMaskedMemoryOpCost(
1193       unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1194       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1195 
1196   /// \return The cost of Gather or Scatter operation
1197   /// \p Opcode - is a type of memory access Load or Store
1198   /// \p DataTy - a vector type of the data to be loaded or stored
1199   /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1200   /// \p VariableMask - true when the memory access is predicated with a mask
1201   ///                   that is not a compile-time constant
1202   /// \p Alignment - alignment of single element
1203   /// \p I - the optional original context instruction, if one exists, e.g. the
1204   ///        load/store to transform or the call to the gather/scatter intrinsic
1205   InstructionCost getGatherScatterOpCost(
1206       unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1207       Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1208       const Instruction *I = nullptr) const;
1209 
1210   /// \return The cost of the interleaved memory operation.
1211   /// \p Opcode is the memory operation code
1212   /// \p VecTy is the vector type of the interleaved access.
1213   /// \p Factor is the interleave factor
1214   /// \p Indices is the indices for interleaved load members (as interleaved
1215   ///    load allows gaps)
1216   /// \p Alignment is the alignment of the memory operation
1217   /// \p AddressSpace is address space of the pointer.
1218   /// \p UseMaskForCond indicates if the memory access is predicated.
1219   /// \p UseMaskForGaps indicates if gaps should be masked.
1220   InstructionCost getInterleavedMemoryOpCost(
1221       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1222       Align Alignment, unsigned AddressSpace,
1223       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1224       bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1225 
1226   /// A helper function to determine the type of reduction algorithm used
1227   /// for a given \p Opcode and set of FastMathFlags \p FMF.
1228   static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) {
1229     return FMF != None && !(*FMF).allowReassoc();
1230   }
1231 
1232   /// Calculate the cost of vector reduction intrinsics.
1233   ///
1234   /// This is the cost of reducing the vector value of type \p Ty to a scalar
1235   /// value using the operation denoted by \p Opcode. The FastMathFlags
1236   /// parameter \p FMF indicates what type of reduction we are performing:
1237   ///   1. Tree-wise. This is the typical 'fast' reduction performed that
1238   ///   involves successively splitting a vector into half and doing the
1239   ///   operation on the pair of halves until you have a scalar value. For
1240   ///   example:
1241   ///     (v0, v1, v2, v3)
1242   ///     ((v0+v2), (v1+v3), undef, undef)
1243   ///     ((v0+v2+v1+v3), undef, undef, undef)
1244   ///   This is the default behaviour for integer operations, whereas for
1245   ///   floating point we only do this if \p FMF indicates that
1246   ///   reassociation is allowed.
1247   ///   2. Ordered. For a vector with N elements this involves performing N
1248   ///   operations in lane order, starting with an initial scalar value, i.e.
1249   ///     result = InitVal + v0
1250   ///     result = result + v1
1251   ///     result = result + v2
1252   ///     result = result + v3
1253   ///   This is only the case for FP operations and when reassociation is not
1254   ///   allowed.
1255   ///
1256   InstructionCost getArithmeticReductionCost(
1257       unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
1258       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1259 
1260   InstructionCost getMinMaxReductionCost(
1261       VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1262       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1263 
1264   /// Calculate the cost of an extended reduction pattern, similar to
1265   /// getArithmeticReductionCost of an Add reduction with an extension and
1266   /// optional multiply. This is the cost of as:
1267   /// ResTy vecreduce.add(ext(Ty A)), or if IsMLA flag is set then:
1268   /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)). The reduction happens
1269   /// on a VectorType with ResTy elements and Ty lanes.
1270   InstructionCost getExtendedAddReductionCost(
1271       bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1272       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1273 
1274   /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1275   /// Three cases are handled: 1. scalar instruction 2. vector instruction
1276   /// 3. scalar instruction which is to be vectorized.
1277   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1278                                         TTI::TargetCostKind CostKind) const;
1279 
1280   /// \returns The cost of Call instructions.
1281   InstructionCost getCallInstrCost(
1282       Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1283       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1284 
1285   /// \returns The number of pieces into which the provided type must be
1286   /// split during legalization. Zero is returned when the answer is unknown.
1287   unsigned getNumberOfParts(Type *Tp) const;
1288 
1289   /// \returns The cost of the address computation. For most targets this can be
1290   /// merged into the instruction indexing mode. Some targets might want to
1291   /// distinguish between address computation for memory operations on vector
1292   /// types and scalar types. Such targets should override this function.
1293   /// The 'SE' parameter holds pointer for the scalar evolution object which
1294   /// is used in order to get the Ptr step value in case of constant stride.
1295   /// The 'Ptr' parameter holds SCEV of the access pointer.
1296   InstructionCost getAddressComputationCost(Type *Ty,
1297                                             ScalarEvolution *SE = nullptr,
1298                                             const SCEV *Ptr = nullptr) const;
1299 
1300   /// \returns The cost, if any, of keeping values of the given types alive
1301   /// over a callsite.
1302   ///
1303   /// Some types may require the use of register classes that do not have
1304   /// any callee-saved registers, so would require a spill and fill.
1305   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
1306 
1307   /// \returns True if the intrinsic is a supported memory intrinsic.  Info
1308   /// will contain additional information - whether the intrinsic may write
1309   /// or read to memory, volatility and the pointer.  Info is undefined
1310   /// if false is returned.
1311   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
1312 
1313   /// \returns The maximum element size, in bytes, for an element
1314   /// unordered-atomic memory intrinsic.
1315   unsigned getAtomicMemIntrinsicMaxElementSize() const;
1316 
1317   /// \returns A value which is the result of the given memory intrinsic.  New
1318   /// instructions may be created to extract the result from the given intrinsic
1319   /// memory operation.  Returns nullptr if the target cannot create a result
1320   /// from the given intrinsic.
1321   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1322                                            Type *ExpectedType) const;
1323 
1324   /// \returns The type to use in a loop expansion of a memcpy call.
1325   Type *
1326   getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1327                             unsigned SrcAddrSpace, unsigned DestAddrSpace,
1328                             unsigned SrcAlign, unsigned DestAlign,
1329                             Optional<uint32_t> AtomicElementSize = None) const;
1330 
1331   /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1332   /// \param RemainingBytes The number of bytes to copy.
1333   ///
1334   /// Calculates the operand types to use when copying \p RemainingBytes of
1335   /// memory, where source and destination alignments are \p SrcAlign and
1336   /// \p DestAlign respectively.
1337   void getMemcpyLoopResidualLoweringType(
1338       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1339       unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1340       unsigned SrcAlign, unsigned DestAlign,
1341       Optional<uint32_t> AtomicCpySize = None) const;
1342 
1343   /// \returns True if the two functions have compatible attributes for inlining
1344   /// purposes.
1345   bool areInlineCompatible(const Function *Caller,
1346                            const Function *Callee) const;
1347 
1348   /// \returns True if the caller and callee agree on how \p Types will be
1349   /// passed to or returned from the callee.
1350   /// to the callee.
1351   /// \param Types List of types to check.
1352   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1353                              const ArrayRef<Type *> &Types) const;
1354 
1355   /// The type of load/store indexing.
1356   enum MemIndexedMode {
1357     MIM_Unindexed, ///< No indexing.
1358     MIM_PreInc,    ///< Pre-incrementing.
1359     MIM_PreDec,    ///< Pre-decrementing.
1360     MIM_PostInc,   ///< Post-incrementing.
1361     MIM_PostDec    ///< Post-decrementing.
1362   };
1363 
1364   /// \returns True if the specified indexed load for the given type is legal.
1365   bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1366 
1367   /// \returns True if the specified indexed store for the given type is legal.
1368   bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1369 
1370   /// \returns The bitwidth of the largest vector type that should be used to
1371   /// load/store in the given address space.
1372   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1373 
1374   /// \returns True if the load instruction is legal to vectorize.
1375   bool isLegalToVectorizeLoad(LoadInst *LI) const;
1376 
1377   /// \returns True if the store instruction is legal to vectorize.
1378   bool isLegalToVectorizeStore(StoreInst *SI) const;
1379 
1380   /// \returns True if it is legal to vectorize the given load chain.
1381   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1382                                    unsigned AddrSpace) const;
1383 
1384   /// \returns True if it is legal to vectorize the given store chain.
1385   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1386                                     unsigned AddrSpace) const;
1387 
1388   /// \returns True if it is legal to vectorize the given reduction kind.
1389   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1390                                    ElementCount VF) const;
1391 
1392   /// \returns True if the given type is supported for scalable vectors
1393   bool isElementTypeLegalForScalableVector(Type *Ty) const;
1394 
1395   /// \returns The new vector factor value if the target doesn't support \p
1396   /// SizeInBytes loads or has a better vector factor.
1397   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1398                                unsigned ChainSizeInBytes,
1399                                VectorType *VecTy) const;
1400 
1401   /// \returns The new vector factor value if the target doesn't support \p
1402   /// SizeInBytes stores or has a better vector factor.
1403   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1404                                 unsigned ChainSizeInBytes,
1405                                 VectorType *VecTy) const;
1406 
1407   /// Flags describing the kind of vector reduction.
1408   struct ReductionFlags {
1409     ReductionFlags() = default;
1410     bool IsMaxOp =
1411         false; ///< If the op a min/max kind, true if it's a max operation.
1412     bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1413     bool NoNaN =
1414         false; ///< If op is an fp min/max, whether NaNs may be present.
1415   };
1416 
1417   /// \returns True if the target prefers reductions in loop.
1418   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1419                              ReductionFlags Flags) const;
1420 
1421   /// \returns True if the target prefers reductions select kept in the loop
1422   /// when tail folding. i.e.
1423   /// loop:
1424   ///   p = phi (0, s)
1425   ///   a = add (p, x)
1426   ///   s = select (mask, a, p)
1427   /// vecreduce.add(s)
1428   ///
1429   /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1430   /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1431   /// by the target, this can lead to cleaner code generation.
1432   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1433                                        ReductionFlags Flags) const;
1434 
1435   /// \returns True if the target wants to expand the given reduction intrinsic
1436   /// into a shuffle sequence.
1437   bool shouldExpandReduction(const IntrinsicInst *II) const;
1438 
1439   /// \returns the size cost of rematerializing a GlobalValue address relative
1440   /// to a stack reload.
1441   unsigned getGISelRematGlobalCost() const;
1442 
1443   /// \returns True if the target supports scalable vectors.
1444   bool supportsScalableVectors() const;
1445 
1446   /// \return true when scalable vectorization is preferred.
1447   bool enableScalableVectorization() const;
1448 
1449   /// \name Vector Predication Information
1450   /// @{
1451   /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1452   /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1453   /// Reference - "Vector Predication Intrinsics").
1454   /// Use of %evl is discouraged when that is not the case.
1455   bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1456                              Align Alignment) const;
1457 
1458   struct VPLegalization {
1459     enum VPTransform {
1460       // keep the predicating parameter
1461       Legal = 0,
1462       // where legal, discard the predicate parameter
1463       Discard = 1,
1464       // transform into something else that is also predicating
1465       Convert = 2
1466     };
1467 
1468     // How to transform the EVL parameter.
1469     // Legal:   keep the EVL parameter as it is.
1470     // Discard: Ignore the EVL parameter where it is safe to do so.
1471     // Convert: Fold the EVL into the mask parameter.
1472     VPTransform EVLParamStrategy;
1473 
1474     // How to transform the operator.
1475     // Legal:   The target supports this operator.
1476     // Convert: Convert this to a non-VP operation.
1477     // The 'Discard' strategy is invalid.
1478     VPTransform OpStrategy;
1479 
1480     bool shouldDoNothing() const {
1481       return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1482     }
1483     VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
1484         : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
1485   };
1486 
1487   /// \returns How the target needs this vector-predicated operation to be
1488   /// transformed.
1489   VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
1490   /// @}
1491 
1492   /// @}
1493 
1494 private:
1495   /// Estimate the latency of specified instruction.
1496   /// Returns 1 as the default value.
1497   InstructionCost getInstructionLatency(const Instruction *I) const;
1498 
1499   /// Returns the expected throughput cost of the instruction.
1500   /// Returns -1 if the cost is unknown.
1501   InstructionCost getInstructionThroughput(const Instruction *I) const;
1502 
1503   /// The abstract base class used to type erase specific TTI
1504   /// implementations.
1505   class Concept;
1506 
1507   /// The template model for the base class which wraps a concrete
1508   /// implementation in a type erased interface.
1509   template <typename T> class Model;
1510 
1511   std::unique_ptr<Concept> TTIImpl;
1512 };
1513 
1514 class TargetTransformInfo::Concept {
1515 public:
1516   virtual ~Concept() = 0;
1517   virtual const DataLayout &getDataLayout() const = 0;
1518   virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1519                                      ArrayRef<const Value *> Operands,
1520                                      TTI::TargetCostKind CostKind) = 0;
1521   virtual unsigned getInliningThresholdMultiplier() = 0;
1522   virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1523   virtual int getInlinerVectorBonusPercent() = 0;
1524   virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
1525   virtual unsigned
1526   getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1527                                    ProfileSummaryInfo *PSI,
1528                                    BlockFrequencyInfo *BFI) = 0;
1529   virtual InstructionCost getUserCost(const User *U,
1530                                       ArrayRef<const Value *> Operands,
1531                                       TargetCostKind CostKind) = 0;
1532   virtual BranchProbability getPredictableBranchThreshold() = 0;
1533   virtual bool hasBranchDivergence() = 0;
1534   virtual bool useGPUDivergenceAnalysis() = 0;
1535   virtual bool isSourceOfDivergence(const Value *V) = 0;
1536   virtual bool isAlwaysUniform(const Value *V) = 0;
1537   virtual unsigned getFlatAddressSpace() = 0;
1538   virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1539                                           Intrinsic::ID IID) const = 0;
1540   virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1541   virtual bool
1542   canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
1543   virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1544   virtual std::pair<const Value *, unsigned>
1545   getPredicatedAddrSpace(const Value *V) const = 0;
1546   virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
1547                                                   Value *OldV,
1548                                                   Value *NewV) const = 0;
1549   virtual bool isLoweredToCall(const Function *F) = 0;
1550   virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1551                                        UnrollingPreferences &UP,
1552                                        OptimizationRemarkEmitter *ORE) = 0;
1553   virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1554                                      PeelingPreferences &PP) = 0;
1555   virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1556                                         AssumptionCache &AC,
1557                                         TargetLibraryInfo *LibInfo,
1558                                         HardwareLoopInfo &HWLoopInfo) = 0;
1559   virtual bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
1560                                            ScalarEvolution &SE,
1561                                            AssumptionCache &AC,
1562                                            TargetLibraryInfo *TLI,
1563                                            DominatorTree *DT,
1564                                            LoopVectorizationLegality *LVL) = 0;
1565   virtual PredicationStyle emitGetActiveLaneMask() = 0;
1566   virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1567                                                        IntrinsicInst &II) = 0;
1568   virtual Optional<Value *>
1569   simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1570                                    APInt DemandedMask, KnownBits &Known,
1571                                    bool &KnownBitsComputed) = 0;
1572   virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1573       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1574       APInt &UndefElts2, APInt &UndefElts3,
1575       std::function<void(Instruction *, unsigned, APInt, APInt &)>
1576           SimplifyAndSetOp) = 0;
1577   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1578   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1579   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1580                                      int64_t BaseOffset, bool HasBaseReg,
1581                                      int64_t Scale, unsigned AddrSpace,
1582                                      Instruction *I) = 0;
1583   virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1584                              const TargetTransformInfo::LSRCost &C2) = 0;
1585   virtual bool isNumRegsMajorCostOfLSR() = 0;
1586   virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
1587   virtual bool canMacroFuseCmp() = 0;
1588   virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1589                           LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
1590                           TargetLibraryInfo *LibInfo) = 0;
1591   virtual AddressingModeKind
1592     getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0;
1593   virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1594   virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1595   virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1596   virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1597   virtual bool isLegalBroadcastLoad(Type *ElementTy,
1598                                     ElementCount NumElements) const = 0;
1599   virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1600   virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1601   virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1602                                           Align Alignment) = 0;
1603   virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1604                                            Align Alignment) = 0;
1605   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1606   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1607   virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1608                                unsigned Opcode1,
1609                                const SmallBitVector &OpcodeMask) const = 0;
1610   virtual bool enableOrderedReductions() = 0;
1611   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1612   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1613   virtual bool prefersVectorizedAddressing() = 0;
1614   virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1615                                                int64_t BaseOffset,
1616                                                bool HasBaseReg, int64_t Scale,
1617                                                unsigned AddrSpace) = 0;
1618   virtual bool LSRWithInstrQueries() = 0;
1619   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1620   virtual bool isProfitableToHoist(Instruction *I) = 0;
1621   virtual bool useAA() = 0;
1622   virtual bool isTypeLegal(Type *Ty) = 0;
1623   virtual unsigned getRegUsageForType(Type *Ty) = 0;
1624   virtual bool shouldBuildLookupTables() = 0;
1625   virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1626   virtual bool shouldBuildRelLookupTables() = 0;
1627   virtual bool useColdCCForColdCall(Function &F) = 0;
1628   virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
1629                                                    const APInt &DemandedElts,
1630                                                    bool Insert,
1631                                                    bool Extract) = 0;
1632   virtual InstructionCost
1633   getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1634                                    ArrayRef<Type *> Tys) = 0;
1635   virtual bool supportsEfficientVectorElementLoadStore() = 0;
1636   virtual bool supportsTailCalls() = 0;
1637   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1638   virtual MemCmpExpansionOptions
1639   enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1640   virtual bool enableInterleavedAccessVectorization() = 0;
1641   virtual bool enableMaskedInterleavedAccessVectorization() = 0;
1642   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1643   virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1644                                               unsigned BitWidth,
1645                                               unsigned AddressSpace,
1646                                               Align Alignment,
1647                                               bool *Fast) = 0;
1648   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1649   virtual bool haveFastSqrt(Type *Ty) = 0;
1650   virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1651   virtual InstructionCost getFPOpCost(Type *Ty) = 0;
1652   virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1653                                                 const APInt &Imm, Type *Ty) = 0;
1654   virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1655                                         TargetCostKind CostKind) = 0;
1656   virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1657                                             const APInt &Imm, Type *Ty,
1658                                             TargetCostKind CostKind,
1659                                             Instruction *Inst = nullptr) = 0;
1660   virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1661                                               const APInt &Imm, Type *Ty,
1662                                               TargetCostKind CostKind) = 0;
1663   virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1664   virtual unsigned getRegisterClassForType(bool Vector,
1665                                            Type *Ty = nullptr) const = 0;
1666   virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1667   virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
1668   virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1669   virtual Optional<unsigned> getMaxVScale() const = 0;
1670   virtual Optional<unsigned> getVScaleForTuning() const = 0;
1671   virtual bool
1672   shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
1673   virtual ElementCount getMinimumVF(unsigned ElemWidth,
1674                                     bool IsScalable) const = 0;
1675   virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1676   virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1677                                      Type *ScalarValTy) const = 0;
1678   virtual bool shouldConsiderAddressTypePromotion(
1679       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1680   virtual unsigned getCacheLineSize() const = 0;
1681   virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1682   virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
1683 
1684   /// \return How much before a load we should place the prefetch
1685   /// instruction.  This is currently measured in number of
1686   /// instructions.
1687   virtual unsigned getPrefetchDistance() const = 0;
1688 
1689   /// \return Some HW prefetchers can handle accesses up to a certain
1690   /// constant stride.  This is the minimum stride in bytes where it
1691   /// makes sense to start adding SW prefetches.  The default is 1,
1692   /// i.e. prefetch with any stride.  Sometimes prefetching is beneficial
1693   /// even below the HW prefetcher limit, and the arguments provided are
1694   /// meant to serve as a basis for deciding this for a particular loop.
1695   virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1696                                         unsigned NumStridedMemAccesses,
1697                                         unsigned NumPrefetches,
1698                                         bool HasCall) const = 0;
1699 
1700   /// \return The maximum number of iterations to prefetch ahead.  If
1701   /// the required number of iterations is more than this number, no
1702   /// prefetching is performed.
1703   virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1704 
1705   /// \return True if prefetching should also be done for writes.
1706   virtual bool enableWritePrefetching() const = 0;
1707 
1708   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1709   virtual InstructionCost getArithmeticInstrCost(
1710       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1711       OperandValueKind Opd1Info, OperandValueKind Opd2Info,
1712       OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
1713       ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1714   virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1715                                          ArrayRef<int> Mask, int Index,
1716                                          VectorType *SubTp,
1717                                          ArrayRef<const Value *> Args) = 0;
1718   virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1719                                            Type *Src, CastContextHint CCH,
1720                                            TTI::TargetCostKind CostKind,
1721                                            const Instruction *I) = 0;
1722   virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1723                                                    VectorType *VecTy,
1724                                                    unsigned Index) = 0;
1725   virtual InstructionCost getCFInstrCost(unsigned Opcode,
1726                                          TTI::TargetCostKind CostKind,
1727                                          const Instruction *I = nullptr) = 0;
1728   virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1729                                              Type *CondTy,
1730                                              CmpInst::Predicate VecPred,
1731                                              TTI::TargetCostKind CostKind,
1732                                              const Instruction *I) = 0;
1733   virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1734                                              unsigned Index) = 0;
1735 
1736   virtual InstructionCost
1737   getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1738                             const APInt &DemandedDstElts,
1739                             TTI::TargetCostKind CostKind) = 0;
1740 
1741   virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1742                                           Align Alignment,
1743                                           unsigned AddressSpace,
1744                                           TTI::TargetCostKind CostKind,
1745                                           const Instruction *I) = 0;
1746   virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1747                                             Align Alignment,
1748                                             unsigned AddressSpace,
1749                                             TTI::TargetCostKind CostKind,
1750                                             const Instruction *I) = 0;
1751   virtual InstructionCost
1752   getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1753                         unsigned AddressSpace,
1754                         TTI::TargetCostKind CostKind) = 0;
1755   virtual InstructionCost
1756   getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1757                          bool VariableMask, Align Alignment,
1758                          TTI::TargetCostKind CostKind,
1759                          const Instruction *I = nullptr) = 0;
1760 
1761   virtual InstructionCost getInterleavedMemoryOpCost(
1762       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1763       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1764       bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1765   virtual InstructionCost
1766   getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1767                              Optional<FastMathFlags> FMF,
1768                              TTI::TargetCostKind CostKind) = 0;
1769   virtual InstructionCost
1770   getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1771                          TTI::TargetCostKind CostKind) = 0;
1772   virtual InstructionCost getExtendedAddReductionCost(
1773       bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1774       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
1775   virtual InstructionCost
1776   getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1777                         TTI::TargetCostKind CostKind) = 0;
1778   virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
1779                                            ArrayRef<Type *> Tys,
1780                                            TTI::TargetCostKind CostKind) = 0;
1781   virtual unsigned getNumberOfParts(Type *Tp) = 0;
1782   virtual InstructionCost
1783   getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0;
1784   virtual InstructionCost
1785   getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1786   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1787                                   MemIntrinsicInfo &Info) = 0;
1788   virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1789   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1790                                                    Type *ExpectedType) = 0;
1791   virtual Type *
1792   getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1793                             unsigned SrcAddrSpace, unsigned DestAddrSpace,
1794                             unsigned SrcAlign, unsigned DestAlign,
1795                             Optional<uint32_t> AtomicElementSize) const = 0;
1796 
1797   virtual void getMemcpyLoopResidualLoweringType(
1798       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1799       unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1800       unsigned SrcAlign, unsigned DestAlign,
1801       Optional<uint32_t> AtomicCpySize) const = 0;
1802   virtual bool areInlineCompatible(const Function *Caller,
1803                                    const Function *Callee) const = 0;
1804   virtual bool areTypesABICompatible(const Function *Caller,
1805                                      const Function *Callee,
1806                                      const ArrayRef<Type *> &Types) const = 0;
1807   virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1808   virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1809   virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1810   virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1811   virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1812   virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1813                                            Align Alignment,
1814                                            unsigned AddrSpace) const = 0;
1815   virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1816                                             Align Alignment,
1817                                             unsigned AddrSpace) const = 0;
1818   virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1819                                            ElementCount VF) const = 0;
1820   virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1821   virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1822                                        unsigned ChainSizeInBytes,
1823                                        VectorType *VecTy) const = 0;
1824   virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1825                                         unsigned ChainSizeInBytes,
1826                                         VectorType *VecTy) const = 0;
1827   virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1828                                      ReductionFlags) const = 0;
1829   virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1830                                                ReductionFlags) const = 0;
1831   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1832   virtual unsigned getGISelRematGlobalCost() const = 0;
1833   virtual bool enableScalableVectorization() const = 0;
1834   virtual bool supportsScalableVectors() const = 0;
1835   virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1836                                      Align Alignment) const = 0;
1837   virtual InstructionCost getInstructionLatency(const Instruction *I) = 0;
1838   virtual VPLegalization
1839   getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
1840 };
1841 
1842 template <typename T>
1843 class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1844   T Impl;
1845 
1846 public:
1847   Model(T Impl) : Impl(std::move(Impl)) {}
1848   ~Model() override = default;
1849 
1850   const DataLayout &getDataLayout() const override {
1851     return Impl.getDataLayout();
1852   }
1853 
1854   InstructionCost
1855   getGEPCost(Type *PointeeType, const Value *Ptr,
1856              ArrayRef<const Value *> Operands,
1857              TargetTransformInfo::TargetCostKind CostKind) override {
1858     return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
1859   }
1860   unsigned getInliningThresholdMultiplier() override {
1861     return Impl.getInliningThresholdMultiplier();
1862   }
1863   unsigned adjustInliningThreshold(const CallBase *CB) override {
1864     return Impl.adjustInliningThreshold(CB);
1865   }
1866   int getInlinerVectorBonusPercent() override {
1867     return Impl.getInlinerVectorBonusPercent();
1868   }
1869   InstructionCost getMemcpyCost(const Instruction *I) override {
1870     return Impl.getMemcpyCost(I);
1871   }
1872   InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands,
1873                               TargetCostKind CostKind) override {
1874     return Impl.getUserCost(U, Operands, CostKind);
1875   }
1876   BranchProbability getPredictableBranchThreshold() override {
1877     return Impl.getPredictableBranchThreshold();
1878   }
1879   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1880   bool useGPUDivergenceAnalysis() override {
1881     return Impl.useGPUDivergenceAnalysis();
1882   }
1883   bool isSourceOfDivergence(const Value *V) override {
1884     return Impl.isSourceOfDivergence(V);
1885   }
1886 
1887   bool isAlwaysUniform(const Value *V) override {
1888     return Impl.isAlwaysUniform(V);
1889   }
1890 
1891   unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
1892 
1893   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1894                                   Intrinsic::ID IID) const override {
1895     return Impl.collectFlatAddressOperands(OpIndexes, IID);
1896   }
1897 
1898   bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
1899     return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
1900   }
1901 
1902   bool
1903   canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
1904     return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
1905   }
1906 
1907   unsigned getAssumedAddrSpace(const Value *V) const override {
1908     return Impl.getAssumedAddrSpace(V);
1909   }
1910 
1911   std::pair<const Value *, unsigned>
1912   getPredicatedAddrSpace(const Value *V) const override {
1913     return Impl.getPredicatedAddrSpace(V);
1914   }
1915 
1916   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
1917                                           Value *NewV) const override {
1918     return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
1919   }
1920 
1921   bool isLoweredToCall(const Function *F) override {
1922     return Impl.isLoweredToCall(F);
1923   }
1924   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1925                                UnrollingPreferences &UP,
1926                                OptimizationRemarkEmitter *ORE) override {
1927     return Impl.getUnrollingPreferences(L, SE, UP, ORE);
1928   }
1929   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1930                              PeelingPreferences &PP) override {
1931     return Impl.getPeelingPreferences(L, SE, PP);
1932   }
1933   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1934                                 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
1935                                 HardwareLoopInfo &HWLoopInfo) override {
1936     return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
1937   }
1938   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
1939                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
1940                                    DominatorTree *DT,
1941                                    LoopVectorizationLegality *LVL) override {
1942     return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL);
1943   }
1944   PredicationStyle emitGetActiveLaneMask() override {
1945     return Impl.emitGetActiveLaneMask();
1946   }
1947   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
1948                                                IntrinsicInst &II) override {
1949     return Impl.instCombineIntrinsic(IC, II);
1950   }
1951   Optional<Value *>
1952   simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
1953                                    APInt DemandedMask, KnownBits &Known,
1954                                    bool &KnownBitsComputed) override {
1955     return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
1956                                                  KnownBitsComputed);
1957   }
1958   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1959       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
1960       APInt &UndefElts2, APInt &UndefElts3,
1961       std::function<void(Instruction *, unsigned, APInt, APInt &)>
1962           SimplifyAndSetOp) override {
1963     return Impl.simplifyDemandedVectorEltsIntrinsic(
1964         IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
1965         SimplifyAndSetOp);
1966   }
1967   bool isLegalAddImmediate(int64_t Imm) override {
1968     return Impl.isLegalAddImmediate(Imm);
1969   }
1970   bool isLegalICmpImmediate(int64_t Imm) override {
1971     return Impl.isLegalICmpImmediate(Imm);
1972   }
1973   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1974                              bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
1975                              Instruction *I) override {
1976     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
1977                                       AddrSpace, I);
1978   }
1979   bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1980                      const TargetTransformInfo::LSRCost &C2) override {
1981     return Impl.isLSRCostLess(C1, C2);
1982   }
1983   bool isNumRegsMajorCostOfLSR() override {
1984     return Impl.isNumRegsMajorCostOfLSR();
1985   }
1986   bool isProfitableLSRChainElement(Instruction *I) override {
1987     return Impl.isProfitableLSRChainElement(I);
1988   }
1989   bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
1990   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
1991                   DominatorTree *DT, AssumptionCache *AC,
1992                   TargetLibraryInfo *LibInfo) override {
1993     return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
1994   }
1995   AddressingModeKind
1996     getPreferredAddressingMode(const Loop *L,
1997                                ScalarEvolution *SE) const override {
1998     return Impl.getPreferredAddressingMode(L, SE);
1999   }
2000   bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2001     return Impl.isLegalMaskedStore(DataType, Alignment);
2002   }
2003   bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2004     return Impl.isLegalMaskedLoad(DataType, Alignment);
2005   }
2006   bool isLegalNTStore(Type *DataType, Align Alignment) override {
2007     return Impl.isLegalNTStore(DataType, Alignment);
2008   }
2009   bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2010     return Impl.isLegalNTLoad(DataType, Alignment);
2011   }
2012   bool isLegalBroadcastLoad(Type *ElementTy,
2013                             ElementCount NumElements) const override {
2014     return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2015   }
2016   bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2017     return Impl.isLegalMaskedScatter(DataType, Alignment);
2018   }
2019   bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2020     return Impl.isLegalMaskedGather(DataType, Alignment);
2021   }
2022   bool forceScalarizeMaskedGather(VectorType *DataType,
2023                                   Align Alignment) override {
2024     return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2025   }
2026   bool forceScalarizeMaskedScatter(VectorType *DataType,
2027                                    Align Alignment) override {
2028     return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2029   }
2030   bool isLegalMaskedCompressStore(Type *DataType) override {
2031     return Impl.isLegalMaskedCompressStore(DataType);
2032   }
2033   bool isLegalMaskedExpandLoad(Type *DataType) override {
2034     return Impl.isLegalMaskedExpandLoad(DataType);
2035   }
2036   bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2037                        const SmallBitVector &OpcodeMask) const override {
2038     return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2039   }
2040   bool enableOrderedReductions() override {
2041     return Impl.enableOrderedReductions();
2042   }
2043   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2044     return Impl.hasDivRemOp(DataType, IsSigned);
2045   }
2046   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2047     return Impl.hasVolatileVariant(I, AddrSpace);
2048   }
2049   bool prefersVectorizedAddressing() override {
2050     return Impl.prefersVectorizedAddressing();
2051   }
2052   InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2053                                        int64_t BaseOffset, bool HasBaseReg,
2054                                        int64_t Scale,
2055                                        unsigned AddrSpace) override {
2056     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2057                                      AddrSpace);
2058   }
2059   bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2060   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2061     return Impl.isTruncateFree(Ty1, Ty2);
2062   }
2063   bool isProfitableToHoist(Instruction *I) override {
2064     return Impl.isProfitableToHoist(I);
2065   }
2066   bool useAA() override { return Impl.useAA(); }
2067   bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2068   unsigned getRegUsageForType(Type *Ty) override {
2069     return Impl.getRegUsageForType(Ty);
2070   }
2071   bool shouldBuildLookupTables() override {
2072     return Impl.shouldBuildLookupTables();
2073   }
2074   bool shouldBuildLookupTablesForConstant(Constant *C) override {
2075     return Impl.shouldBuildLookupTablesForConstant(C);
2076   }
2077   bool shouldBuildRelLookupTables() override {
2078     return Impl.shouldBuildRelLookupTables();
2079   }
2080   bool useColdCCForColdCall(Function &F) override {
2081     return Impl.useColdCCForColdCall(F);
2082   }
2083 
2084   InstructionCost getScalarizationOverhead(VectorType *Ty,
2085                                            const APInt &DemandedElts,
2086                                            bool Insert, bool Extract) override {
2087     return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
2088   }
2089   InstructionCost
2090   getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2091                                    ArrayRef<Type *> Tys) override {
2092     return Impl.getOperandsScalarizationOverhead(Args, Tys);
2093   }
2094 
2095   bool supportsEfficientVectorElementLoadStore() override {
2096     return Impl.supportsEfficientVectorElementLoadStore();
2097   }
2098 
2099   bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2100 
2101   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2102     return Impl.enableAggressiveInterleaving(LoopHasReductions);
2103   }
2104   MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2105                                                bool IsZeroCmp) const override {
2106     return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2107   }
2108   bool enableInterleavedAccessVectorization() override {
2109     return Impl.enableInterleavedAccessVectorization();
2110   }
2111   bool enableMaskedInterleavedAccessVectorization() override {
2112     return Impl.enableMaskedInterleavedAccessVectorization();
2113   }
2114   bool isFPVectorizationPotentiallyUnsafe() override {
2115     return Impl.isFPVectorizationPotentiallyUnsafe();
2116   }
2117   bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2118                                       unsigned AddressSpace, Align Alignment,
2119                                       bool *Fast) override {
2120     return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2121                                                Alignment, Fast);
2122   }
2123   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2124     return Impl.getPopcntSupport(IntTyWidthInBit);
2125   }
2126   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2127 
2128   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2129     return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2130   }
2131 
2132   InstructionCost getFPOpCost(Type *Ty) override {
2133     return Impl.getFPOpCost(Ty);
2134   }
2135 
2136   InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2137                                         const APInt &Imm, Type *Ty) override {
2138     return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2139   }
2140   InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2141                                 TargetCostKind CostKind) override {
2142     return Impl.getIntImmCost(Imm, Ty, CostKind);
2143   }
2144   InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2145                                     const APInt &Imm, Type *Ty,
2146                                     TargetCostKind CostKind,
2147                                     Instruction *Inst = nullptr) override {
2148     return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2149   }
2150   InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2151                                       const APInt &Imm, Type *Ty,
2152                                       TargetCostKind CostKind) override {
2153     return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2154   }
2155   unsigned getNumberOfRegisters(unsigned ClassID) const override {
2156     return Impl.getNumberOfRegisters(ClassID);
2157   }
2158   unsigned getRegisterClassForType(bool Vector,
2159                                    Type *Ty = nullptr) const override {
2160     return Impl.getRegisterClassForType(Vector, Ty);
2161   }
2162   const char *getRegisterClassName(unsigned ClassID) const override {
2163     return Impl.getRegisterClassName(ClassID);
2164   }
2165   TypeSize getRegisterBitWidth(RegisterKind K) const override {
2166     return Impl.getRegisterBitWidth(K);
2167   }
2168   unsigned getMinVectorRegisterBitWidth() const override {
2169     return Impl.getMinVectorRegisterBitWidth();
2170   }
2171   Optional<unsigned> getMaxVScale() const override {
2172     return Impl.getMaxVScale();
2173   }
2174   Optional<unsigned> getVScaleForTuning() const override {
2175     return Impl.getVScaleForTuning();
2176   }
2177   bool shouldMaximizeVectorBandwidth(
2178       TargetTransformInfo::RegisterKind K) const override {
2179     return Impl.shouldMaximizeVectorBandwidth(K);
2180   }
2181   ElementCount getMinimumVF(unsigned ElemWidth,
2182                             bool IsScalable) const override {
2183     return Impl.getMinimumVF(ElemWidth, IsScalable);
2184   }
2185   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2186     return Impl.getMaximumVF(ElemWidth, Opcode);
2187   }
2188   unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2189                              Type *ScalarValTy) const override {
2190     return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2191   }
2192   bool shouldConsiderAddressTypePromotion(
2193       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2194     return Impl.shouldConsiderAddressTypePromotion(
2195         I, AllowPromotionWithoutCommonHeader);
2196   }
2197   unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2198   Optional<unsigned> getCacheSize(CacheLevel Level) const override {
2199     return Impl.getCacheSize(Level);
2200   }
2201   Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
2202     return Impl.getCacheAssociativity(Level);
2203   }
2204 
2205   /// Return the preferred prefetch distance in terms of instructions.
2206   ///
2207   unsigned getPrefetchDistance() const override {
2208     return Impl.getPrefetchDistance();
2209   }
2210 
2211   /// Return the minimum stride necessary to trigger software
2212   /// prefetching.
2213   ///
2214   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2215                                 unsigned NumStridedMemAccesses,
2216                                 unsigned NumPrefetches,
2217                                 bool HasCall) const override {
2218     return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2219                                      NumPrefetches, HasCall);
2220   }
2221 
2222   /// Return the maximum prefetch distance in terms of loop
2223   /// iterations.
2224   ///
2225   unsigned getMaxPrefetchIterationsAhead() const override {
2226     return Impl.getMaxPrefetchIterationsAhead();
2227   }
2228 
2229   /// \return True if prefetching should also be done for writes.
2230   bool enableWritePrefetching() const override {
2231     return Impl.enableWritePrefetching();
2232   }
2233 
2234   unsigned getMaxInterleaveFactor(unsigned VF) override {
2235     return Impl.getMaxInterleaveFactor(VF);
2236   }
2237   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2238                                             unsigned &JTSize,
2239                                             ProfileSummaryInfo *PSI,
2240                                             BlockFrequencyInfo *BFI) override {
2241     return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2242   }
2243   InstructionCost getArithmeticInstrCost(
2244       unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2245       OperandValueKind Opd1Info, OperandValueKind Opd2Info,
2246       OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
2247       ArrayRef<const Value *> Args,
2248       const Instruction *CxtI = nullptr) override {
2249     return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2250                                        Opd1PropInfo, Opd2PropInfo, Args, CxtI);
2251   }
2252   InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2253                                  ArrayRef<int> Mask, int Index,
2254                                  VectorType *SubTp,
2255                                  ArrayRef<const Value *> Args) override {
2256     return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
2257   }
2258   InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2259                                    CastContextHint CCH,
2260                                    TTI::TargetCostKind CostKind,
2261                                    const Instruction *I) override {
2262     return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2263   }
2264   InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2265                                            VectorType *VecTy,
2266                                            unsigned Index) override {
2267     return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2268   }
2269   InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2270                                  const Instruction *I = nullptr) override {
2271     return Impl.getCFInstrCost(Opcode, CostKind, I);
2272   }
2273   InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2274                                      CmpInst::Predicate VecPred,
2275                                      TTI::TargetCostKind CostKind,
2276                                      const Instruction *I) override {
2277     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2278   }
2279   InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2280                                      unsigned Index) override {
2281     return Impl.getVectorInstrCost(Opcode, Val, Index);
2282   }
2283   InstructionCost
2284   getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2285                             const APInt &DemandedDstElts,
2286                             TTI::TargetCostKind CostKind) override {
2287     return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2288                                           DemandedDstElts, CostKind);
2289   }
2290   InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2291                                   unsigned AddressSpace,
2292                                   TTI::TargetCostKind CostKind,
2293                                   const Instruction *I) override {
2294     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2295                                 CostKind, I);
2296   }
2297   InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2298                                     unsigned AddressSpace,
2299                                     TTI::TargetCostKind CostKind,
2300                                     const Instruction *I) override {
2301     return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2302                                   CostKind, I);
2303   }
2304   InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2305                                         Align Alignment, unsigned AddressSpace,
2306                                         TTI::TargetCostKind CostKind) override {
2307     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2308                                       CostKind);
2309   }
2310   InstructionCost
2311   getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2312                          bool VariableMask, Align Alignment,
2313                          TTI::TargetCostKind CostKind,
2314                          const Instruction *I = nullptr) override {
2315     return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2316                                        Alignment, CostKind, I);
2317   }
2318   InstructionCost getInterleavedMemoryOpCost(
2319       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2320       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2321       bool UseMaskForCond, bool UseMaskForGaps) override {
2322     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2323                                            Alignment, AddressSpace, CostKind,
2324                                            UseMaskForCond, UseMaskForGaps);
2325   }
2326   InstructionCost
2327   getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2328                              Optional<FastMathFlags> FMF,
2329                              TTI::TargetCostKind CostKind) override {
2330     return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2331   }
2332   InstructionCost
2333   getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2334                          TTI::TargetCostKind CostKind) override {
2335     return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
2336   }
2337   InstructionCost getExtendedAddReductionCost(
2338       bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
2339       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
2340     return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty,
2341                                             CostKind);
2342   }
2343   InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2344                                         TTI::TargetCostKind CostKind) override {
2345     return Impl.getIntrinsicInstrCost(ICA, CostKind);
2346   }
2347   InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2348                                    ArrayRef<Type *> Tys,
2349                                    TTI::TargetCostKind CostKind) override {
2350     return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2351   }
2352   unsigned getNumberOfParts(Type *Tp) override {
2353     return Impl.getNumberOfParts(Tp);
2354   }
2355   InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2356                                             const SCEV *Ptr) override {
2357     return Impl.getAddressComputationCost(Ty, SE, Ptr);
2358   }
2359   InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2360     return Impl.getCostOfKeepingLiveOverCall(Tys);
2361   }
2362   bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2363                           MemIntrinsicInfo &Info) override {
2364     return Impl.getTgtMemIntrinsic(Inst, Info);
2365   }
2366   unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2367     return Impl.getAtomicMemIntrinsicMaxElementSize();
2368   }
2369   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2370                                            Type *ExpectedType) override {
2371     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2372   }
2373   Type *getMemcpyLoopLoweringType(
2374       LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2375       unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2376       Optional<uint32_t> AtomicElementSize) const override {
2377     return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2378                                           DestAddrSpace, SrcAlign, DestAlign,
2379                                           AtomicElementSize);
2380   }
2381   void getMemcpyLoopResidualLoweringType(
2382       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2383       unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2384       unsigned SrcAlign, unsigned DestAlign,
2385       Optional<uint32_t> AtomicCpySize) const override {
2386     Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2387                                            SrcAddrSpace, DestAddrSpace,
2388                                            SrcAlign, DestAlign, AtomicCpySize);
2389   }
2390   bool areInlineCompatible(const Function *Caller,
2391                            const Function *Callee) const override {
2392     return Impl.areInlineCompatible(Caller, Callee);
2393   }
2394   bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2395                              const ArrayRef<Type *> &Types) const override {
2396     return Impl.areTypesABICompatible(Caller, Callee, Types);
2397   }
2398   bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2399     return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2400   }
2401   bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2402     return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2403   }
2404   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2405     return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2406   }
2407   bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2408     return Impl.isLegalToVectorizeLoad(LI);
2409   }
2410   bool isLegalToVectorizeStore(StoreInst *SI) const override {
2411     return Impl.isLegalToVectorizeStore(SI);
2412   }
2413   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2414                                    unsigned AddrSpace) const override {
2415     return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2416                                             AddrSpace);
2417   }
2418   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2419                                     unsigned AddrSpace) const override {
2420     return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2421                                              AddrSpace);
2422   }
2423   bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2424                                    ElementCount VF) const override {
2425     return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2426   }
2427   bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2428     return Impl.isElementTypeLegalForScalableVector(Ty);
2429   }
2430   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2431                                unsigned ChainSizeInBytes,
2432                                VectorType *VecTy) const override {
2433     return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2434   }
2435   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2436                                 unsigned ChainSizeInBytes,
2437                                 VectorType *VecTy) const override {
2438     return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2439   }
2440   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2441                              ReductionFlags Flags) const override {
2442     return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2443   }
2444   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2445                                        ReductionFlags Flags) const override {
2446     return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2447   }
2448   bool shouldExpandReduction(const IntrinsicInst *II) const override {
2449     return Impl.shouldExpandReduction(II);
2450   }
2451 
2452   unsigned getGISelRematGlobalCost() const override {
2453     return Impl.getGISelRematGlobalCost();
2454   }
2455 
2456   bool supportsScalableVectors() const override {
2457     return Impl.supportsScalableVectors();
2458   }
2459 
2460   bool enableScalableVectorization() const override {
2461     return Impl.enableScalableVectorization();
2462   }
2463 
2464   bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2465                              Align Alignment) const override {
2466     return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2467   }
2468 
2469   InstructionCost getInstructionLatency(const Instruction *I) override {
2470     return Impl.getInstructionLatency(I);
2471   }
2472 
2473   VPLegalization
2474   getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2475     return Impl.getVPLegalizationStrategy(PI);
2476   }
2477 };
2478 
2479 template <typename T>
2480 TargetTransformInfo::TargetTransformInfo(T Impl)
2481     : TTIImpl(new Model<T>(Impl)) {}
2482 
2483 /// Analysis pass providing the \c TargetTransformInfo.
2484 ///
2485 /// The core idea of the TargetIRAnalysis is to expose an interface through
2486 /// which LLVM targets can analyze and provide information about the middle
2487 /// end's target-independent IR. This supports use cases such as target-aware
2488 /// cost modeling of IR constructs.
2489 ///
2490 /// This is a function analysis because much of the cost modeling for targets
2491 /// is done in a subtarget specific way and LLVM supports compiling different
2492 /// functions targeting different subtargets in order to support runtime
2493 /// dispatch according to the observed subtarget.
2494 class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2495 public:
2496   typedef TargetTransformInfo Result;
2497 
2498   /// Default construct a target IR analysis.
2499   ///
2500   /// This will use the module's datalayout to construct a baseline
2501   /// conservative TTI result.
2502   TargetIRAnalysis();
2503 
2504   /// Construct an IR analysis pass around a target-provide callback.
2505   ///
2506   /// The callback will be called with a particular function for which the TTI
2507   /// is needed and must return a TTI object for that function.
2508   TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2509 
2510   // Value semantics. We spell out the constructors for MSVC.
2511   TargetIRAnalysis(const TargetIRAnalysis &Arg)
2512       : TTICallback(Arg.TTICallback) {}
2513   TargetIRAnalysis(TargetIRAnalysis &&Arg)
2514       : TTICallback(std::move(Arg.TTICallback)) {}
2515   TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
2516     TTICallback = RHS.TTICallback;
2517     return *this;
2518   }
2519   TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
2520     TTICallback = std::move(RHS.TTICallback);
2521     return *this;
2522   }
2523 
2524   Result run(const Function &F, FunctionAnalysisManager &);
2525 
2526 private:
2527   friend AnalysisInfoMixin<TargetIRAnalysis>;
2528   static AnalysisKey Key;
2529 
2530   /// The callback used to produce a result.
2531   ///
2532   /// We use a completely opaque callback so that targets can provide whatever
2533   /// mechanism they desire for constructing the TTI for a given function.
2534   ///
2535   /// FIXME: Should we really use std::function? It's relatively inefficient.
2536   /// It might be possible to arrange for even stateful callbacks to outlive
2537   /// the analysis and thus use a function_ref which would be lighter weight.
2538   /// This may also be less error prone as the callback is likely to reference
2539   /// the external TargetMachine, and that reference needs to never dangle.
2540   std::function<Result(const Function &)> TTICallback;
2541 
2542   /// Helper function used as the callback in the default constructor.
2543   static Result getDefaultTTI(const Function &F);
2544 };
2545 
2546 /// Wrapper pass for TargetTransformInfo.
2547 ///
2548 /// This pass can be constructed from a TTI object which it stores internally
2549 /// and is queried by passes.
2550 class TargetTransformInfoWrapperPass : public ImmutablePass {
2551   TargetIRAnalysis TIRA;
2552   Optional<TargetTransformInfo> TTI;
2553 
2554   virtual void anchor();
2555 
2556 public:
2557   static char ID;
2558 
2559   /// We must provide a default constructor for the pass but it should
2560   /// never be used.
2561   ///
2562   /// Use the constructor below or call one of the creation routines.
2563   TargetTransformInfoWrapperPass();
2564 
2565   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2566 
2567   TargetTransformInfo &getTTI(const Function &F);
2568 };
2569 
2570 /// Create an analysis pass wrapper around a TTI object.
2571 ///
2572 /// This analysis pass just holds the TTI instance and makes it available to
2573 /// clients.
2574 ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2575 
2576 } // namespace llvm
2577 
2578 #endif
2579