1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file provides helpers for the implementation of
10 /// a TargetTransformInfo-conforming class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16 
17 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/Analysis/VectorUtils.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/GetElementPtrTypeIterator.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/Type.h"
26 
27 namespace llvm {
28 
29 /// Base class for use as a mix-in that aids implementing
30 /// a TargetTransformInfo-compatible class.
31 class TargetTransformInfoImplBase {
32 protected:
33   typedef TargetTransformInfo TTI;
34 
35   const DataLayout &DL;
36 
37   explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
38 
39 public:
40   // Provide value semantics. MSVC requires that we spell all of these out.
41   TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42       : DL(Arg.DL) {}
43   TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
44 
45   const DataLayout &getDataLayout() const { return DL; }
46 
47   unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
48     switch (Opcode) {
49     default:
50       // By default, just classify everything as 'basic'.
51       return TTI::TCC_Basic;
52 
53     case Instruction::GetElementPtr:
54       llvm_unreachable("Use getGEPCost for GEP operations!");
55 
56     case Instruction::BitCast:
57       assert(OpTy && "Cast instructions must provide the operand type");
58       if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
59         // Identity and pointer-to-pointer casts are free.
60         return TTI::TCC_Free;
61 
62       // Otherwise, the default basic cost is used.
63       return TTI::TCC_Basic;
64 
65     case Instruction::FDiv:
66     case Instruction::FRem:
67     case Instruction::SDiv:
68     case Instruction::SRem:
69     case Instruction::UDiv:
70     case Instruction::URem:
71       return TTI::TCC_Expensive;
72 
73     case Instruction::IntToPtr: {
74       // An inttoptr cast is free so long as the input is a legal integer type
75       // which doesn't contain values outside the range of a pointer.
76       unsigned OpSize = OpTy->getScalarSizeInBits();
77       if (DL.isLegalInteger(OpSize) &&
78           OpSize <= DL.getPointerTypeSizeInBits(Ty))
79         return TTI::TCC_Free;
80 
81       // Otherwise it's not a no-op.
82       return TTI::TCC_Basic;
83     }
84     case Instruction::PtrToInt: {
85       // A ptrtoint cast is free so long as the result is large enough to store
86       // the pointer, and a legal integer type.
87       unsigned DestSize = Ty->getScalarSizeInBits();
88       if (DL.isLegalInteger(DestSize) &&
89           DestSize >= DL.getPointerTypeSizeInBits(OpTy))
90         return TTI::TCC_Free;
91 
92       // Otherwise it's not a no-op.
93       return TTI::TCC_Basic;
94     }
95     case Instruction::Trunc:
96       // trunc to a native type is free (assuming the target has compare and
97       // shift-right of the same width).
98       if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
99         return TTI::TCC_Free;
100 
101       return TTI::TCC_Basic;
102     }
103   }
104 
105   int getGEPCost(Type *PointeeType, const Value *Ptr,
106                  ArrayRef<const Value *> Operands) {
107     // In the basic model, we just assume that all-constant GEPs will be folded
108     // into their uses via addressing modes.
109     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
110       if (!isa<Constant>(Operands[Idx]))
111         return TTI::TCC_Basic;
112 
113     return TTI::TCC_Free;
114   }
115 
116   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
117                                             unsigned &JTSize,
118                                             ProfileSummaryInfo *PSI,
119                                             BlockFrequencyInfo *BFI) {
120     (void)PSI;
121     (void)BFI;
122     JTSize = 0;
123     return SI.getNumCases();
124   }
125 
126   int getExtCost(const Instruction *I, const Value *Src) {
127     return TTI::TCC_Basic;
128   }
129 
130   unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
131     assert(FTy && "FunctionType must be provided to this routine.");
132 
133     // The target-independent implementation just measures the size of the
134     // function by approximating that each argument will take on average one
135     // instruction to prepare.
136 
137     if (NumArgs < 0)
138       // Set the argument number to the number of explicit arguments in the
139       // function.
140       NumArgs = FTy->getNumParams();
141 
142     return TTI::TCC_Basic * (NumArgs + 1);
143   }
144 
145   unsigned getInliningThresholdMultiplier() { return 1; }
146 
147   int getInlinerVectorBonusPercent() { return 150; }
148 
149   unsigned getMemcpyCost(const Instruction *I) {
150     return TTI::TCC_Expensive;
151   }
152 
153   bool hasBranchDivergence() { return false; }
154 
155   bool isSourceOfDivergence(const Value *V) { return false; }
156 
157   bool isAlwaysUniform(const Value *V) { return false; }
158 
159   unsigned getFlatAddressSpace () {
160     return -1;
161   }
162 
163   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
164                                   Intrinsic::ID IID) const {
165     return false;
166   }
167 
168   bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
169                                         Value *OldV, Value *NewV) const {
170     return false;
171   }
172 
173   bool isLoweredToCall(const Function *F) {
174     assert(F && "A concrete function must be provided to this routine.");
175 
176     // FIXME: These should almost certainly not be handled here, and instead
177     // handled with the help of TLI or the target itself. This was largely
178     // ported from existing analysis heuristics here so that such refactorings
179     // can take place in the future.
180 
181     if (F->isIntrinsic())
182       return false;
183 
184     if (F->hasLocalLinkage() || !F->hasName())
185       return true;
186 
187     StringRef Name = F->getName();
188 
189     // These will all likely lower to a single selection DAG node.
190     if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
191         Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
192         Name == "fmin" || Name == "fminf" || Name == "fminl" ||
193         Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
194         Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
195         Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
196       return false;
197 
198     // These are all likely to be optimized into something smaller.
199     if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
200         Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
201         Name == "floorf" || Name == "ceil" || Name == "round" ||
202         Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
203         Name == "llabs")
204       return false;
205 
206     return true;
207   }
208 
209   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
210                                 AssumptionCache &AC,
211                                 TargetLibraryInfo *LibInfo,
212                                 HardwareLoopInfo &HWLoopInfo) {
213     return false;
214   }
215 
216   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
217                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
218                                    DominatorTree *DT,
219                                    const LoopAccessInfo *LAI) const {
220     return false;
221   }
222 
223   void getUnrollingPreferences(Loop *, ScalarEvolution &,
224                                TTI::UnrollingPreferences &) {}
225 
226   bool isLegalAddImmediate(int64_t Imm) { return false; }
227 
228   bool isLegalICmpImmediate(int64_t Imm) { return false; }
229 
230   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
231                              bool HasBaseReg, int64_t Scale,
232                              unsigned AddrSpace, Instruction *I = nullptr) {
233     // Guess that only reg and reg+reg addressing is allowed. This heuristic is
234     // taken from the implementation of LSR.
235     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
236   }
237 
238   bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
239     return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
240                     C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
241            std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
242                     C2.ScaleCost, C2.ImmCost, C2.SetupCost);
243   }
244 
245   bool canMacroFuseCmp() { return false; }
246 
247   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
248                   DominatorTree *DT, AssumptionCache *AC,
249                   TargetLibraryInfo *LibInfo) {
250     return false;
251   }
252 
253   bool shouldFavorPostInc() const { return false; }
254 
255   bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
256 
257   bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) { return false; }
258 
259   bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) { return false; }
260 
261   bool isLegalNTStore(Type *DataType, Align Alignment) {
262     // By default, assume nontemporal memory stores are available for stores
263     // that are aligned and have a size that is a power of 2.
264     unsigned DataSize = DL.getTypeStoreSize(DataType);
265     return Alignment >= DataSize && isPowerOf2_32(DataSize);
266   }
267 
268   bool isLegalNTLoad(Type *DataType, Align Alignment) {
269     // By default, assume nontemporal memory loads are available for loads that
270     // are aligned and have a size that is a power of 2.
271     unsigned DataSize = DL.getTypeStoreSize(DataType);
272     return Alignment >= DataSize && isPowerOf2_32(DataSize);
273   }
274 
275   bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) {
276     return false;
277   }
278 
279   bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) {
280     return false;
281   }
282 
283   bool isLegalMaskedCompressStore(Type *DataType) { return false; }
284 
285   bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
286 
287   bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
288 
289   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
290 
291   bool prefersVectorizedAddressing() { return true; }
292 
293   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
294                            bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
295     // Guess that all legal addressing mode are free.
296     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
297                               Scale, AddrSpace))
298       return 0;
299     return -1;
300   }
301 
302   bool LSRWithInstrQueries() { return false; }
303 
304   bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
305 
306   bool isProfitableToHoist(Instruction *I) { return true; }
307 
308   bool useAA() { return false; }
309 
310   bool isTypeLegal(Type *Ty) { return false; }
311 
312   bool shouldBuildLookupTables() { return true; }
313   bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
314 
315   bool useColdCCForColdCall(Function &F) { return false; }
316 
317   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
318     return 0;
319   }
320 
321   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
322                                             unsigned VF) { return 0; }
323 
324   bool supportsEfficientVectorElementLoadStore() { return false; }
325 
326   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
327 
328   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
329                                                     bool IsZeroCmp) const {
330     return {};
331   }
332 
333   bool enableInterleavedAccessVectorization() { return false; }
334 
335   bool enableMaskedInterleavedAccessVectorization() { return false; }
336 
337   bool isFPVectorizationPotentiallyUnsafe() { return false; }
338 
339   bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
340                                       unsigned BitWidth,
341                                       unsigned AddressSpace,
342                                       unsigned Alignment,
343                                       bool *Fast) { return false; }
344 
345   TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
346     return TTI::PSK_Software;
347   }
348 
349   bool haveFastSqrt(Type *Ty) { return false; }
350 
351   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
352 
353   unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
354 
355   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
356                             Type *Ty) {
357     return 0;
358   }
359 
360   unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
361 
362   unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
363                              Type *Ty) {
364     return TTI::TCC_Free;
365   }
366 
367   unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
368                                const APInt &Imm, Type *Ty) {
369     return TTI::TCC_Free;
370   }
371 
372   unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
373 
374   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
375     return Vector ? 1 : 0;
376   };
377 
378   const char* getRegisterClassName(unsigned ClassID) const {
379     switch (ClassID) {
380       default:
381         return "Generic::Unknown Register Class";
382       case 0: return "Generic::ScalarRC";
383       case 1: return "Generic::VectorRC";
384     }
385   }
386 
387   unsigned getRegisterBitWidth(bool Vector) const { return 32; }
388 
389   unsigned getMinVectorRegisterBitWidth() { return 128; }
390 
391   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
392 
393   unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
394 
395   bool
396   shouldConsiderAddressTypePromotion(const Instruction &I,
397                                      bool &AllowPromotionWithoutCommonHeader) {
398     AllowPromotionWithoutCommonHeader = false;
399     return false;
400   }
401 
402   unsigned getCacheLineSize() const { return 0; }
403 
404   llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) const {
405     switch (Level) {
406     case TargetTransformInfo::CacheLevel::L1D:
407       LLVM_FALLTHROUGH;
408     case TargetTransformInfo::CacheLevel::L2D:
409       return llvm::Optional<unsigned>();
410     }
411     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
412   }
413 
414   llvm::Optional<unsigned> getCacheAssociativity(
415     TargetTransformInfo::CacheLevel Level) const {
416     switch (Level) {
417     case TargetTransformInfo::CacheLevel::L1D:
418       LLVM_FALLTHROUGH;
419     case TargetTransformInfo::CacheLevel::L2D:
420       return llvm::Optional<unsigned>();
421     }
422 
423     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
424   }
425 
426   unsigned getPrefetchDistance() const { return 0; }
427   unsigned getMinPrefetchStride() const { return 1; }
428   unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
429 
430   unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
431 
432   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
433                                   TTI::OperandValueKind Opd1Info,
434                                   TTI::OperandValueKind Opd2Info,
435                                   TTI::OperandValueProperties Opd1PropInfo,
436                                   TTI::OperandValueProperties Opd2PropInfo,
437                                   ArrayRef<const Value *> Args,
438                                   const Instruction *CxtI = nullptr) {
439     return 1;
440   }
441 
442   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
443                           Type *SubTp) {
444     return 1;
445   }
446 
447   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
448                             const Instruction *I) { return 1; }
449 
450   unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
451                                     VectorType *VecTy, unsigned Index) {
452     return 1;
453   }
454 
455   unsigned getCFInstrCost(unsigned Opcode) { return 1; }
456 
457   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
458                               const Instruction *I) {
459     return 1;
460   }
461 
462   unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
463     return 1;
464   }
465 
466   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
467                            unsigned AddressSpace, const Instruction *I) {
468     return 1;
469   }
470 
471   unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
472                                  unsigned AddressSpace) {
473     return 1;
474   }
475 
476   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
477                                   bool VariableMask,
478                                   unsigned Alignment) {
479     return 1;
480   }
481 
482   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
483                                       unsigned Factor,
484                                       ArrayRef<unsigned> Indices,
485                                       unsigned Alignment, unsigned AddressSpace,
486                                       bool UseMaskForCond = false,
487                                       bool UseMaskForGaps = false) {
488     return 1;
489   }
490 
491   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
492                                  ArrayRef<Type *> Tys, FastMathFlags FMF,
493                                  unsigned ScalarizationCostPassed) {
494     return 1;
495   }
496   unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
497             ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
498     return 1;
499   }
500 
501   unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
502     return 1;
503   }
504 
505   unsigned getNumberOfParts(Type *Tp) { return 0; }
506 
507   unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
508                                      const SCEV *) {
509     return 0;
510   }
511 
512   unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
513 
514   unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
515 
516   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
517 
518   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
519     return false;
520   }
521 
522   unsigned getAtomicMemIntrinsicMaxElementSize() const {
523     // Note for overrides: You must ensure for all element unordered-atomic
524     // memory intrinsics that all power-of-2 element sizes up to, and
525     // including, the return value of this method have a corresponding
526     // runtime lib call. These runtime lib call definitions can be found
527     // in RuntimeLibcalls.h
528     return 0;
529   }
530 
531   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
532                                            Type *ExpectedType) {
533     return nullptr;
534   }
535 
536   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
537                                   unsigned SrcAlign, unsigned DestAlign) const {
538     return Type::getInt8Ty(Context);
539   }
540 
541   void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
542                                          LLVMContext &Context,
543                                          unsigned RemainingBytes,
544                                          unsigned SrcAlign,
545                                          unsigned DestAlign) const {
546     for (unsigned i = 0; i != RemainingBytes; ++i)
547       OpsOut.push_back(Type::getInt8Ty(Context));
548   }
549 
550   bool areInlineCompatible(const Function *Caller,
551                            const Function *Callee) const {
552     return (Caller->getFnAttribute("target-cpu") ==
553             Callee->getFnAttribute("target-cpu")) &&
554            (Caller->getFnAttribute("target-features") ==
555             Callee->getFnAttribute("target-features"));
556   }
557 
558   bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
559                                     SmallPtrSetImpl<Argument *> &Args) const {
560     return (Caller->getFnAttribute("target-cpu") ==
561             Callee->getFnAttribute("target-cpu")) &&
562            (Caller->getFnAttribute("target-features") ==
563             Callee->getFnAttribute("target-features"));
564   }
565 
566   bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
567                           const DataLayout &DL) const {
568     return false;
569   }
570 
571   bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
572                            const DataLayout &DL) const {
573     return false;
574   }
575 
576   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
577 
578   bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
579 
580   bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
581 
582   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
583                                    unsigned Alignment,
584                                    unsigned AddrSpace) const {
585     return true;
586   }
587 
588   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
589                                     unsigned Alignment,
590                                     unsigned AddrSpace) const {
591     return true;
592   }
593 
594   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
595                                unsigned ChainSizeInBytes,
596                                VectorType *VecTy) const {
597     return VF;
598   }
599 
600   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
601                                 unsigned ChainSizeInBytes,
602                                 VectorType *VecTy) const {
603     return VF;
604   }
605 
606   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
607                              TTI::ReductionFlags Flags) const {
608     return false;
609   }
610 
611   bool shouldExpandReduction(const IntrinsicInst *II) const {
612     return true;
613   }
614 
615   unsigned getGISelRematGlobalCost() const {
616     return 1;
617   }
618 
619 protected:
620   // Obtain the minimum required size to hold the value (without the sign)
621   // In case of a vector it returns the min required size for one element.
622   unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
623     if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
624       const auto* VectorValue = cast<Constant>(Val);
625 
626       // In case of a vector need to pick the max between the min
627       // required size for each element
628       auto *VT = cast<VectorType>(Val->getType());
629 
630       // Assume unsigned elements
631       isSigned = false;
632 
633       // The max required size is the total vector width divided by num
634       // of elements in the vector
635       unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
636 
637       unsigned MinRequiredSize = 0;
638       for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
639         if (auto* IntElement =
640               dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
641           bool signedElement = IntElement->getValue().isNegative();
642           // Get the element min required size.
643           unsigned ElementMinRequiredSize =
644             IntElement->getValue().getMinSignedBits() - 1;
645           // In case one element is signed then all the vector is signed.
646           isSigned |= signedElement;
647           // Save the max required bit size between all the elements.
648           MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
649         }
650         else {
651           // not an int constant element
652           return MaxRequiredSize;
653         }
654       }
655       return MinRequiredSize;
656     }
657 
658     if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
659       isSigned = CI->getValue().isNegative();
660       return CI->getValue().getMinSignedBits() - 1;
661     }
662 
663     if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
664       isSigned = true;
665       return Cast->getSrcTy()->getScalarSizeInBits() - 1;
666     }
667 
668     if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
669       isSigned = false;
670       return Cast->getSrcTy()->getScalarSizeInBits();
671     }
672 
673     isSigned = false;
674     return Val->getType()->getScalarSizeInBits();
675   }
676 
677   bool isStridedAccess(const SCEV *Ptr) {
678     return Ptr && isa<SCEVAddRecExpr>(Ptr);
679   }
680 
681   const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
682                                             const SCEV *Ptr) {
683     if (!isStridedAccess(Ptr))
684       return nullptr;
685     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
686     return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
687   }
688 
689   bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
690                                        int64_t MergeDistance) {
691     const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
692     if (!Step)
693       return false;
694     APInt StrideVal = Step->getAPInt();
695     if (StrideVal.getBitWidth() > 64)
696       return false;
697     // FIXME: Need to take absolute value for negative stride case.
698     return StrideVal.getSExtValue() < MergeDistance;
699   }
700 };
701 
702 /// CRTP base class for use as a mix-in that aids implementing
703 /// a TargetTransformInfo-compatible class.
704 template <typename T>
705 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
706 private:
707   typedef TargetTransformInfoImplBase BaseT;
708 
709 protected:
710   explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
711 
712 public:
713   using BaseT::getCallCost;
714 
715   unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
716     assert(F && "A concrete function must be provided to this routine.");
717 
718     if (NumArgs < 0)
719       // Set the argument number to the number of explicit arguments in the
720       // function.
721       NumArgs = F->arg_size();
722 
723     if (Intrinsic::ID IID = F->getIntrinsicID()) {
724       FunctionType *FTy = F->getFunctionType();
725       SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
726       return static_cast<T *>(this)
727           ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
728     }
729 
730     if (!static_cast<T *>(this)->isLoweredToCall(F))
731       return TTI::TCC_Basic; // Give a basic cost if it will be lowered
732                              // directly.
733 
734     return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
735   }
736 
737   unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
738                        const User *U) {
739     // Simply delegate to generic handling of the call.
740     // FIXME: We should use instsimplify or something else to catch calls which
741     // will constant fold with these arguments.
742     return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
743   }
744 
745   using BaseT::getGEPCost;
746 
747   int getGEPCost(Type *PointeeType, const Value *Ptr,
748                  ArrayRef<const Value *> Operands) {
749     assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
750     // TODO: will remove this when pointers have an opaque type.
751     assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
752                PointeeType &&
753            "explicit pointee type doesn't match operand's pointee type");
754     auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
755     bool HasBaseReg = (BaseGV == nullptr);
756 
757     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
758     APInt BaseOffset(PtrSizeBits, 0);
759     int64_t Scale = 0;
760 
761     auto GTI = gep_type_begin(PointeeType, Operands);
762     Type *TargetType = nullptr;
763 
764     // Handle the case where the GEP instruction has a single operand,
765     // the basis, therefore TargetType is a nullptr.
766     if (Operands.empty())
767       return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
768 
769     for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
770       TargetType = GTI.getIndexedType();
771       // We assume that the cost of Scalar GEP with constant index and the
772       // cost of Vector GEP with splat constant index are the same.
773       const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
774       if (!ConstIdx)
775         if (auto Splat = getSplatValue(*I))
776           ConstIdx = dyn_cast<ConstantInt>(Splat);
777       if (StructType *STy = GTI.getStructTypeOrNull()) {
778         // For structures the index is always splat or scalar constant
779         assert(ConstIdx && "Unexpected GEP index");
780         uint64_t Field = ConstIdx->getZExtValue();
781         BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
782       } else {
783         int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
784         if (ConstIdx) {
785           BaseOffset +=
786               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
787         } else {
788           // Needs scale register.
789           if (Scale != 0)
790             // No addressing mode takes two scale registers.
791             return TTI::TCC_Basic;
792           Scale = ElementSize;
793         }
794       }
795     }
796 
797     if (static_cast<T *>(this)->isLegalAddressingMode(
798             TargetType, const_cast<GlobalValue *>(BaseGV),
799             BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
800             Ptr->getType()->getPointerAddressSpace()))
801       return TTI::TCC_Free;
802     return TTI::TCC_Basic;
803   }
804 
805   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
806                             ArrayRef<Type *> ParamTys, const User *U) {
807     switch (IID) {
808     default:
809       // Intrinsics rarely (if ever) have normal argument setup constraints.
810       // Model them as having a basic instruction cost.
811       return TTI::TCC_Basic;
812 
813     // TODO: other libc intrinsics.
814     case Intrinsic::memcpy:
815       return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
816 
817     case Intrinsic::annotation:
818     case Intrinsic::assume:
819     case Intrinsic::sideeffect:
820     case Intrinsic::dbg_declare:
821     case Intrinsic::dbg_value:
822     case Intrinsic::dbg_label:
823     case Intrinsic::invariant_start:
824     case Intrinsic::invariant_end:
825     case Intrinsic::launder_invariant_group:
826     case Intrinsic::strip_invariant_group:
827     case Intrinsic::is_constant:
828     case Intrinsic::lifetime_start:
829     case Intrinsic::lifetime_end:
830     case Intrinsic::objectsize:
831     case Intrinsic::ptr_annotation:
832     case Intrinsic::var_annotation:
833     case Intrinsic::experimental_gc_result:
834     case Intrinsic::experimental_gc_relocate:
835     case Intrinsic::coro_alloc:
836     case Intrinsic::coro_begin:
837     case Intrinsic::coro_free:
838     case Intrinsic::coro_end:
839     case Intrinsic::coro_frame:
840     case Intrinsic::coro_size:
841     case Intrinsic::coro_suspend:
842     case Intrinsic::coro_param:
843     case Intrinsic::coro_subfn_addr:
844       // These intrinsics don't actually represent code after lowering.
845       return TTI::TCC_Free;
846     }
847   }
848 
849   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
850                             ArrayRef<const Value *> Arguments, const User *U) {
851     // Delegate to the generic intrinsic handling code. This mostly provides an
852     // opportunity for targets to (for example) special case the cost of
853     // certain intrinsics based on constants used as arguments.
854     SmallVector<Type *, 8> ParamTys;
855     ParamTys.reserve(Arguments.size());
856     for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
857       ParamTys.push_back(Arguments[Idx]->getType());
858     return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
859   }
860 
861   unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
862     if (isa<PHINode>(U))
863       return TTI::TCC_Free; // Model all PHI nodes as free.
864 
865     if (isa<ExtractValueInst>(U))
866       return TTI::TCC_Free; // Model all ExtractValue nodes as free.
867 
868     // Static alloca doesn't generate target instructions.
869     if (auto *A = dyn_cast<AllocaInst>(U))
870       if (A->isStaticAlloca())
871         return TTI::TCC_Free;
872 
873     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
874       return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
875                                                 GEP->getPointerOperand(),
876                                                 Operands.drop_front());
877     }
878 
879     if (auto CS = ImmutableCallSite(U)) {
880       const Function *F = CS.getCalledFunction();
881       if (!F) {
882         // Just use the called value type.
883         Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
884         return static_cast<T *>(this)
885             ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
886       }
887 
888       SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
889       return static_cast<T *>(this)->getCallCost(F, Arguments, U);
890     }
891 
892     if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
893       // The old behaviour of generally treating extensions of icmp to be free
894       // has been removed. A target that needs it should override getUserCost().
895       return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
896                                                 Operands.back());
897 
898     return static_cast<T *>(this)->getOperationCost(
899         Operator::getOpcode(U), U->getType(),
900         U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
901   }
902 
903   int getInstructionLatency(const Instruction *I) {
904     SmallVector<const Value *, 4> Operands(I->value_op_begin(),
905                                            I->value_op_end());
906     if (getUserCost(I, Operands) == TTI::TCC_Free)
907       return 0;
908 
909     if (isa<LoadInst>(I))
910       return 4;
911 
912     Type *DstTy = I->getType();
913 
914     // Usually an intrinsic is a simple instruction.
915     // A real function call is much slower.
916     if (auto *CI = dyn_cast<CallInst>(I)) {
917       const Function *F = CI->getCalledFunction();
918       if (!F || static_cast<T *>(this)->isLoweredToCall(F))
919         return 40;
920       // Some intrinsics return a value and a flag, we use the value type
921       // to decide its latency.
922       if (StructType* StructTy = dyn_cast<StructType>(DstTy))
923         DstTy = StructTy->getElementType(0);
924       // Fall through to simple instructions.
925     }
926 
927     if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
928       DstTy = VectorTy->getElementType();
929     if (DstTy->isFloatingPointTy())
930       return 3;
931 
932     return 1;
933   }
934 };
935 }
936 
937 #endif
938