1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file provides helpers for the implementation of
10 /// a TargetTransformInfo-conforming class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16 
17 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
18 #include "llvm/Analysis/TargetTransformInfo.h"
19 #include "llvm/Analysis/VectorUtils.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/GetElementPtrTypeIterator.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/Type.h"
26 
27 namespace llvm {
28 
29 /// Base class for use as a mix-in that aids implementing
30 /// a TargetTransformInfo-compatible class.
31 class TargetTransformInfoImplBase {
32 protected:
33   typedef TargetTransformInfo TTI;
34 
35   const DataLayout &DL;
36 
37   explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
38 
39 public:
40   // Provide value semantics. MSVC requires that we spell all of these out.
41   TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42       : DL(Arg.DL) {}
43   TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
44 
45   const DataLayout &getDataLayout() const { return DL; }
46 
47   int getGEPCost(Type *PointeeType, const Value *Ptr,
48                  ArrayRef<const Value *> Operands,
49                  TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
50     // In the basic model, we just assume that all-constant GEPs will be folded
51     // into their uses via addressing modes.
52     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
53       if (!isa<Constant>(Operands[Idx]))
54         return TTI::TCC_Basic;
55 
56     return TTI::TCC_Free;
57   }
58 
59   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
60                                             unsigned &JTSize,
61                                             ProfileSummaryInfo *PSI,
62                                             BlockFrequencyInfo *BFI) const {
63     (void)PSI;
64     (void)BFI;
65     JTSize = 0;
66     return SI.getNumCases();
67   }
68 
69   unsigned getInliningThresholdMultiplier() const { return 1; }
70   unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
71 
72   int getInlinerVectorBonusPercent() const { return 150; }
73 
74   unsigned getMemcpyCost(const Instruction *I) const {
75     return TTI::TCC_Expensive;
76   }
77 
78   bool hasBranchDivergence() const { return false; }
79 
80   bool useGPUDivergenceAnalysis() const { return false; }
81 
82   bool isSourceOfDivergence(const Value *V) const { return false; }
83 
84   bool isAlwaysUniform(const Value *V) const { return false; }
85 
86   unsigned getFlatAddressSpace() const { return -1; }
87 
88   bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
89                                   Intrinsic::ID IID) const {
90     return false;
91   }
92 
93   bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
94 
95   unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
96 
97   Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
98                                           Value *NewV) const {
99     return nullptr;
100   }
101 
102   bool isLoweredToCall(const Function *F) const {
103     assert(F && "A concrete function must be provided to this routine.");
104 
105     // FIXME: These should almost certainly not be handled here, and instead
106     // handled with the help of TLI or the target itself. This was largely
107     // ported from existing analysis heuristics here so that such refactorings
108     // can take place in the future.
109 
110     if (F->isIntrinsic())
111       return false;
112 
113     if (F->hasLocalLinkage() || !F->hasName())
114       return true;
115 
116     StringRef Name = F->getName();
117 
118     // These will all likely lower to a single selection DAG node.
119     if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
120         Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
121         Name == "fmin" || Name == "fminf" || Name == "fminl" ||
122         Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
123         Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
124         Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
125       return false;
126 
127     // These are all likely to be optimized into something smaller.
128     if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
129         Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
130         Name == "floorf" || Name == "ceil" || Name == "round" ||
131         Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
132         Name == "llabs")
133       return false;
134 
135     return true;
136   }
137 
138   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
139                                 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
140                                 HardwareLoopInfo &HWLoopInfo) const {
141     return false;
142   }
143 
144   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
145                                    AssumptionCache &AC, TargetLibraryInfo *TLI,
146                                    DominatorTree *DT,
147                                    const LoopAccessInfo *LAI) const {
148     return false;
149   }
150 
151   bool emitGetActiveLaneMask() const {
152     return false;
153   }
154 
155   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
156                                                IntrinsicInst &II) const {
157     return None;
158   }
159 
160   Optional<Value *>
161   simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
162                                    APInt DemandedMask, KnownBits &Known,
163                                    bool &KnownBitsComputed) const {
164     return None;
165   }
166 
167   Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
168       InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
169       APInt &UndefElts2, APInt &UndefElts3,
170       std::function<void(Instruction *, unsigned, APInt, APInt &)>
171           SimplifyAndSetOp) const {
172     return None;
173   }
174 
175   void getUnrollingPreferences(Loop *, ScalarEvolution &,
176                                TTI::UnrollingPreferences &) const {}
177 
178   void getPeelingPreferences(Loop *, ScalarEvolution &,
179                              TTI::PeelingPreferences &) const {}
180 
181   bool isLegalAddImmediate(int64_t Imm) const { return false; }
182 
183   bool isLegalICmpImmediate(int64_t Imm) const { return false; }
184 
185   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
186                              bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
187                              Instruction *I = nullptr) const {
188     // Guess that only reg and reg+reg addressing is allowed. This heuristic is
189     // taken from the implementation of LSR.
190     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
191   }
192 
193   bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const {
194     return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
195                     C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
196            std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
197                     C2.ScaleCost, C2.ImmCost, C2.SetupCost);
198   }
199 
200   bool isNumRegsMajorCostOfLSR() const { return true; }
201 
202   bool isProfitableLSRChainElement(Instruction *I) const { return false; }
203 
204   bool canMacroFuseCmp() const { return false; }
205 
206   bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
207                   DominatorTree *DT, AssumptionCache *AC,
208                   TargetLibraryInfo *LibInfo) const {
209     return false;
210   }
211 
212   bool shouldFavorPostInc() const { return false; }
213 
214   bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
215 
216   bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
217     return false;
218   }
219 
220   bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
221     return false;
222   }
223 
224   bool isLegalNTStore(Type *DataType, Align Alignment) const {
225     // By default, assume nontemporal memory stores are available for stores
226     // that are aligned and have a size that is a power of 2.
227     unsigned DataSize = DL.getTypeStoreSize(DataType);
228     return Alignment >= DataSize && isPowerOf2_32(DataSize);
229   }
230 
231   bool isLegalNTLoad(Type *DataType, Align Alignment) const {
232     // By default, assume nontemporal memory loads are available for loads that
233     // are aligned and have a size that is a power of 2.
234     unsigned DataSize = DL.getTypeStoreSize(DataType);
235     return Alignment >= DataSize && isPowerOf2_32(DataSize);
236   }
237 
238   bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
239     return false;
240   }
241 
242   bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
243     return false;
244   }
245 
246   bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
247 
248   bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
249 
250   bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
251 
252   bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
253     return false;
254   }
255 
256   bool prefersVectorizedAddressing() const { return true; }
257 
258   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
259                            bool HasBaseReg, int64_t Scale,
260                            unsigned AddrSpace) const {
261     // Guess that all legal addressing mode are free.
262     if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
263                               AddrSpace))
264       return 0;
265     return -1;
266   }
267 
268   bool LSRWithInstrQueries() const { return false; }
269 
270   bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
271 
272   bool isProfitableToHoist(Instruction *I) const { return true; }
273 
274   bool useAA() const { return false; }
275 
276   bool isTypeLegal(Type *Ty) const { return false; }
277 
278   unsigned getRegUsageForType(Type *Ty) const { return 1; }
279 
280   bool shouldBuildLookupTables() const { return true; }
281   bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
282 
283   bool useColdCCForColdCall(Function &F) const { return false; }
284 
285   unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
286                                     bool Insert, bool Extract) const {
287     return 0;
288   }
289 
290   unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
291                                             unsigned VF) const {
292     return 0;
293   }
294 
295   bool supportsEfficientVectorElementLoadStore() const { return false; }
296 
297   bool enableAggressiveInterleaving(bool LoopHasReductions) const {
298     return false;
299   }
300 
301   TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
302                                                     bool IsZeroCmp) const {
303     return {};
304   }
305 
306   bool enableInterleavedAccessVectorization() const { return false; }
307 
308   bool enableMaskedInterleavedAccessVectorization() const { return false; }
309 
310   bool isFPVectorizationPotentiallyUnsafe() const { return false; }
311 
312   bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
313                                       unsigned AddressSpace, unsigned Alignment,
314                                       bool *Fast) const {
315     return false;
316   }
317 
318   TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
319     return TTI::PSK_Software;
320   }
321 
322   bool haveFastSqrt(Type *Ty) const { return false; }
323 
324   bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
325 
326   unsigned getFPOpCost(Type *Ty) const {
327     return TargetTransformInfo::TCC_Basic;
328   }
329 
330   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
331                             Type *Ty) const {
332     return 0;
333   }
334 
335   unsigned getIntImmCost(const APInt &Imm, Type *Ty,
336                          TTI::TargetCostKind CostKind) const {
337     return TTI::TCC_Basic;
338   }
339 
340   unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
341                              Type *Ty, TTI::TargetCostKind CostKind,
342                              Instruction *Inst = nullptr) const {
343     return TTI::TCC_Free;
344   }
345 
346   unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
347                                const APInt &Imm, Type *Ty,
348                                TTI::TargetCostKind CostKind) const {
349     return TTI::TCC_Free;
350   }
351 
352   unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
353 
354   unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
355     return Vector ? 1 : 0;
356   };
357 
358   const char *getRegisterClassName(unsigned ClassID) const {
359     switch (ClassID) {
360     default:
361       return "Generic::Unknown Register Class";
362     case 0:
363       return "Generic::ScalarRC";
364     case 1:
365       return "Generic::VectorRC";
366     }
367   }
368 
369   unsigned getRegisterBitWidth(bool Vector) const { return 32; }
370 
371   unsigned getMinVectorRegisterBitWidth() const { return 128; }
372 
373   Optional<unsigned> getMaxVScale() const { return None; }
374 
375   bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
376 
377   unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
378 
379   unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
380 
381   bool shouldConsiderAddressTypePromotion(
382       const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
383     AllowPromotionWithoutCommonHeader = false;
384     return false;
385   }
386 
387   unsigned getCacheLineSize() const { return 0; }
388 
389   llvm::Optional<unsigned>
390   getCacheSize(TargetTransformInfo::CacheLevel Level) const {
391     switch (Level) {
392     case TargetTransformInfo::CacheLevel::L1D:
393       LLVM_FALLTHROUGH;
394     case TargetTransformInfo::CacheLevel::L2D:
395       return llvm::Optional<unsigned>();
396     }
397     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
398   }
399 
400   llvm::Optional<unsigned>
401   getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
402     switch (Level) {
403     case TargetTransformInfo::CacheLevel::L1D:
404       LLVM_FALLTHROUGH;
405     case TargetTransformInfo::CacheLevel::L2D:
406       return llvm::Optional<unsigned>();
407     }
408 
409     llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
410   }
411 
412   unsigned getPrefetchDistance() const { return 0; }
413   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
414                                 unsigned NumStridedMemAccesses,
415                                 unsigned NumPrefetches, bool HasCall) const {
416     return 1;
417   }
418   unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
419   bool enableWritePrefetching() const { return false; }
420 
421   unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
422 
423   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
424                                   TTI::TargetCostKind CostKind,
425                                   TTI::OperandValueKind Opd1Info,
426                                   TTI::OperandValueKind Opd2Info,
427                                   TTI::OperandValueProperties Opd1PropInfo,
428                                   TTI::OperandValueProperties Opd2PropInfo,
429                                   ArrayRef<const Value *> Args,
430                                   const Instruction *CxtI = nullptr) const {
431     // FIXME: A number of transformation tests seem to require these values
432     // which seems a little odd for how arbitary there are.
433     switch (Opcode) {
434     default:
435       break;
436     case Instruction::FDiv:
437     case Instruction::FRem:
438     case Instruction::SDiv:
439     case Instruction::SRem:
440     case Instruction::UDiv:
441     case Instruction::URem:
442       // FIXME: Unlikely to be true for CodeSize.
443       return TTI::TCC_Expensive;
444     }
445     return 1;
446   }
447 
448   unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index,
449                           VectorType *SubTp) const {
450     return 1;
451   }
452 
453   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
454                             TTI::CastContextHint CCH,
455                             TTI::TargetCostKind CostKind,
456                             const Instruction *I) const {
457     switch (Opcode) {
458     default:
459       break;
460     case Instruction::IntToPtr: {
461       unsigned SrcSize = Src->getScalarSizeInBits();
462       if (DL.isLegalInteger(SrcSize) &&
463           SrcSize <= DL.getPointerTypeSizeInBits(Dst))
464         return 0;
465       break;
466     }
467     case Instruction::PtrToInt: {
468       unsigned DstSize = Dst->getScalarSizeInBits();
469       if (DL.isLegalInteger(DstSize) &&
470           DstSize >= DL.getPointerTypeSizeInBits(Src))
471         return 0;
472       break;
473     }
474     case Instruction::BitCast:
475       if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
476         // Identity and pointer-to-pointer casts are free.
477         return 0;
478       break;
479     case Instruction::Trunc: {
480       // trunc to a native type is free (assuming the target has compare and
481       // shift-right of the same width).
482       TypeSize DstSize = DL.getTypeSizeInBits(Dst);
483       if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedSize()))
484         return 0;
485       break;
486     }
487     }
488     return 1;
489   }
490 
491   unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
492                                     VectorType *VecTy, unsigned Index) const {
493     return 1;
494   }
495 
496   unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) const {
497     // A phi would be free, unless we're costing the throughput because it
498     // will require a register.
499     if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
500       return 0;
501     return 1;
502   }
503 
504   unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
505                               CmpInst::Predicate VecPred,
506                               TTI::TargetCostKind CostKind,
507                               const Instruction *I) const {
508     return 1;
509   }
510 
511   unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
512                               unsigned Index) const {
513     return 1;
514   }
515 
516   unsigned getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
517                            unsigned AddressSpace, TTI::TargetCostKind CostKind,
518                            const Instruction *I) const {
519     return 1;
520   }
521 
522   unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
523                                  unsigned AddressSpace,
524                                  TTI::TargetCostKind CostKind) const {
525     return 1;
526   }
527 
528   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
529                                   const Value *Ptr, bool VariableMask,
530                                   Align Alignment, TTI::TargetCostKind CostKind,
531                                   const Instruction *I = nullptr) const {
532     return 1;
533   }
534 
535   unsigned getInterleavedMemoryOpCost(
536       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
537       Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
538       bool UseMaskForCond, bool UseMaskForGaps) const {
539     return 1;
540   }
541 
542   unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
543                                  TTI::TargetCostKind CostKind) const {
544     switch (ICA.getID()) {
545     default:
546       break;
547     case Intrinsic::annotation:
548     case Intrinsic::assume:
549     case Intrinsic::sideeffect:
550     case Intrinsic::pseudoprobe:
551     case Intrinsic::dbg_declare:
552     case Intrinsic::dbg_value:
553     case Intrinsic::dbg_label:
554     case Intrinsic::invariant_start:
555     case Intrinsic::invariant_end:
556     case Intrinsic::launder_invariant_group:
557     case Intrinsic::strip_invariant_group:
558     case Intrinsic::is_constant:
559     case Intrinsic::lifetime_start:
560     case Intrinsic::lifetime_end:
561     case Intrinsic::experimental_noalias_scope_decl:
562     case Intrinsic::objectsize:
563     case Intrinsic::ptr_annotation:
564     case Intrinsic::var_annotation:
565     case Intrinsic::experimental_gc_result:
566     case Intrinsic::experimental_gc_relocate:
567     case Intrinsic::coro_alloc:
568     case Intrinsic::coro_begin:
569     case Intrinsic::coro_free:
570     case Intrinsic::coro_end:
571     case Intrinsic::coro_frame:
572     case Intrinsic::coro_size:
573     case Intrinsic::coro_suspend:
574     case Intrinsic::coro_param:
575     case Intrinsic::coro_subfn_addr:
576       // These intrinsics don't actually represent code after lowering.
577       return 0;
578     }
579     return 1;
580   }
581 
582   unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
583                             TTI::TargetCostKind CostKind) const {
584     return 1;
585   }
586 
587   unsigned getNumberOfParts(Type *Tp) const { return 0; }
588 
589   unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
590                                      const SCEV *) const {
591     return 0;
592   }
593 
594   unsigned getArithmeticReductionCost(unsigned, VectorType *, bool,
595                                       TTI::TargetCostKind) const {
596     return 1;
597   }
598 
599   unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool,
600                                   TTI::TargetCostKind) const {
601     return 1;
602   }
603 
604   InstructionCost getExtendedAddReductionCost(
605       bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
606       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const {
607     return 1;
608   }
609 
610   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
611     return 0;
612   }
613 
614   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const {
615     return false;
616   }
617 
618   unsigned getAtomicMemIntrinsicMaxElementSize() const {
619     // Note for overrides: You must ensure for all element unordered-atomic
620     // memory intrinsics that all power-of-2 element sizes up to, and
621     // including, the return value of this method have a corresponding
622     // runtime lib call. These runtime lib call definitions can be found
623     // in RuntimeLibcalls.h
624     return 0;
625   }
626 
627   Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
628                                            Type *ExpectedType) const {
629     return nullptr;
630   }
631 
632   Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
633                                   unsigned SrcAddrSpace, unsigned DestAddrSpace,
634                                   unsigned SrcAlign, unsigned DestAlign) const {
635     return Type::getInt8Ty(Context);
636   }
637 
638   void getMemcpyLoopResidualLoweringType(
639       SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
640       unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
641       unsigned SrcAlign, unsigned DestAlign) const {
642     for (unsigned i = 0; i != RemainingBytes; ++i)
643       OpsOut.push_back(Type::getInt8Ty(Context));
644   }
645 
646   bool areInlineCompatible(const Function *Caller,
647                            const Function *Callee) const {
648     return (Caller->getFnAttribute("target-cpu") ==
649             Callee->getFnAttribute("target-cpu")) &&
650            (Caller->getFnAttribute("target-features") ==
651             Callee->getFnAttribute("target-features"));
652   }
653 
654   bool areFunctionArgsABICompatible(const Function *Caller,
655                                     const Function *Callee,
656                                     SmallPtrSetImpl<Argument *> &Args) const {
657     return (Caller->getFnAttribute("target-cpu") ==
658             Callee->getFnAttribute("target-cpu")) &&
659            (Caller->getFnAttribute("target-features") ==
660             Callee->getFnAttribute("target-features"));
661   }
662 
663   bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
664                           const DataLayout &DL) const {
665     return false;
666   }
667 
668   bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
669                            const DataLayout &DL) const {
670     return false;
671   }
672 
673   unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
674 
675   bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
676 
677   bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
678 
679   bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
680                                    unsigned AddrSpace) const {
681     return true;
682   }
683 
684   bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
685                                     unsigned AddrSpace) const {
686     return true;
687   }
688 
689   unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
690                                unsigned ChainSizeInBytes,
691                                VectorType *VecTy) const {
692     return VF;
693   }
694 
695   unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
696                                 unsigned ChainSizeInBytes,
697                                 VectorType *VecTy) const {
698     return VF;
699   }
700 
701   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
702                              TTI::ReductionFlags Flags) const {
703     return false;
704   }
705 
706   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
707                              TTI::ReductionFlags Flags) const {
708     return false;
709   }
710 
711   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
712                                        TTI::ReductionFlags Flags) const {
713     return false;
714   }
715 
716   bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
717 
718   unsigned getGISelRematGlobalCost() const { return 1; }
719 
720   bool supportsScalableVectors() const { return false; }
721 
722   bool hasActiveVectorLength() const { return false; }
723 
724 protected:
725   // Obtain the minimum required size to hold the value (without the sign)
726   // In case of a vector it returns the min required size for one element.
727   unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
728     if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
729       const auto *VectorValue = cast<Constant>(Val);
730 
731       // In case of a vector need to pick the max between the min
732       // required size for each element
733       auto *VT = cast<FixedVectorType>(Val->getType());
734 
735       // Assume unsigned elements
736       isSigned = false;
737 
738       // The max required size is the size of the vector element type
739       unsigned MaxRequiredSize =
740           VT->getElementType()->getPrimitiveSizeInBits().getFixedSize();
741 
742       unsigned MinRequiredSize = 0;
743       for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
744         if (auto *IntElement =
745                 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
746           bool signedElement = IntElement->getValue().isNegative();
747           // Get the element min required size.
748           unsigned ElementMinRequiredSize =
749               IntElement->getValue().getMinSignedBits() - 1;
750           // In case one element is signed then all the vector is signed.
751           isSigned |= signedElement;
752           // Save the max required bit size between all the elements.
753           MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
754         } else {
755           // not an int constant element
756           return MaxRequiredSize;
757         }
758       }
759       return MinRequiredSize;
760     }
761 
762     if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
763       isSigned = CI->getValue().isNegative();
764       return CI->getValue().getMinSignedBits() - 1;
765     }
766 
767     if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
768       isSigned = true;
769       return Cast->getSrcTy()->getScalarSizeInBits() - 1;
770     }
771 
772     if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
773       isSigned = false;
774       return Cast->getSrcTy()->getScalarSizeInBits();
775     }
776 
777     isSigned = false;
778     return Val->getType()->getScalarSizeInBits();
779   }
780 
781   bool isStridedAccess(const SCEV *Ptr) const {
782     return Ptr && isa<SCEVAddRecExpr>(Ptr);
783   }
784 
785   const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
786                                             const SCEV *Ptr) const {
787     if (!isStridedAccess(Ptr))
788       return nullptr;
789     const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
790     return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
791   }
792 
793   bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
794                                        int64_t MergeDistance) const {
795     const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
796     if (!Step)
797       return false;
798     APInt StrideVal = Step->getAPInt();
799     if (StrideVal.getBitWidth() > 64)
800       return false;
801     // FIXME: Need to take absolute value for negative stride case.
802     return StrideVal.getSExtValue() < MergeDistance;
803   }
804 };
805 
806 /// CRTP base class for use as a mix-in that aids implementing
807 /// a TargetTransformInfo-compatible class.
808 template <typename T>
809 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
810 private:
811   typedef TargetTransformInfoImplBase BaseT;
812 
813 protected:
814   explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
815 
816 public:
817   using BaseT::getGEPCost;
818 
819   int getGEPCost(Type *PointeeType, const Value *Ptr,
820                  ArrayRef<const Value *> Operands,
821                  TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
822     assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
823     // TODO: will remove this when pointers have an opaque type.
824     assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
825                PointeeType &&
826            "explicit pointee type doesn't match operand's pointee type");
827     auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
828     bool HasBaseReg = (BaseGV == nullptr);
829 
830     auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
831     APInt BaseOffset(PtrSizeBits, 0);
832     int64_t Scale = 0;
833 
834     auto GTI = gep_type_begin(PointeeType, Operands);
835     Type *TargetType = nullptr;
836 
837     // Handle the case where the GEP instruction has a single operand,
838     // the basis, therefore TargetType is a nullptr.
839     if (Operands.empty())
840       return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
841 
842     for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
843       TargetType = GTI.getIndexedType();
844       // We assume that the cost of Scalar GEP with constant index and the
845       // cost of Vector GEP with splat constant index are the same.
846       const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
847       if (!ConstIdx)
848         if (auto Splat = getSplatValue(*I))
849           ConstIdx = dyn_cast<ConstantInt>(Splat);
850       if (StructType *STy = GTI.getStructTypeOrNull()) {
851         // For structures the index is always splat or scalar constant
852         assert(ConstIdx && "Unexpected GEP index");
853         uint64_t Field = ConstIdx->getZExtValue();
854         BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
855       } else {
856         // If this operand is a scalable type, bail out early.
857         // TODO: handle scalable vectors
858         if (isa<ScalableVectorType>(TargetType))
859           return TTI::TCC_Basic;
860         int64_t ElementSize =
861             DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
862         if (ConstIdx) {
863           BaseOffset +=
864               ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
865         } else {
866           // Needs scale register.
867           if (Scale != 0)
868             // No addressing mode takes two scale registers.
869             return TTI::TCC_Basic;
870           Scale = ElementSize;
871         }
872       }
873     }
874 
875     if (static_cast<T *>(this)->isLegalAddressingMode(
876             TargetType, const_cast<GlobalValue *>(BaseGV),
877             BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
878             Ptr->getType()->getPointerAddressSpace()))
879       return TTI::TCC_Free;
880     return TTI::TCC_Basic;
881   }
882 
883   int getUserCost(const User *U, ArrayRef<const Value *> Operands,
884                   TTI::TargetCostKind CostKind) {
885     auto *TargetTTI = static_cast<T *>(this);
886     // Handle non-intrinsic calls, invokes, and callbr.
887     // FIXME: Unlikely to be true for anything but CodeSize.
888     auto *CB = dyn_cast<CallBase>(U);
889     if (CB && !isa<IntrinsicInst>(U)) {
890       if (const Function *F = CB->getCalledFunction()) {
891         if (!TargetTTI->isLoweredToCall(F))
892           return TTI::TCC_Basic; // Give a basic cost if it will be lowered
893 
894         return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
895       }
896       // For indirect or other calls, scale cost by number of arguments.
897       return TTI::TCC_Basic * (CB->arg_size() + 1);
898     }
899 
900     Type *Ty = U->getType();
901     Type *OpTy =
902       U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr;
903     unsigned Opcode = Operator::getOpcode(U);
904     auto *I = dyn_cast<Instruction>(U);
905     switch (Opcode) {
906     default:
907       break;
908     case Instruction::Call: {
909       assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
910       auto *Intrinsic = cast<IntrinsicInst>(U);
911       IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
912       return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
913     }
914     case Instruction::Br:
915     case Instruction::Ret:
916     case Instruction::PHI:
917       return TargetTTI->getCFInstrCost(Opcode, CostKind);
918     case Instruction::ExtractValue:
919     case Instruction::Freeze:
920       return TTI::TCC_Free;
921     case Instruction::Alloca:
922       if (cast<AllocaInst>(U)->isStaticAlloca())
923         return TTI::TCC_Free;
924       break;
925     case Instruction::GetElementPtr: {
926       const GEPOperator *GEP = cast<GEPOperator>(U);
927       return TargetTTI->getGEPCost(GEP->getSourceElementType(),
928                                    GEP->getPointerOperand(),
929                                    Operands.drop_front());
930     }
931     case Instruction::Add:
932     case Instruction::FAdd:
933     case Instruction::Sub:
934     case Instruction::FSub:
935     case Instruction::Mul:
936     case Instruction::FMul:
937     case Instruction::UDiv:
938     case Instruction::SDiv:
939     case Instruction::FDiv:
940     case Instruction::URem:
941     case Instruction::SRem:
942     case Instruction::FRem:
943     case Instruction::Shl:
944     case Instruction::LShr:
945     case Instruction::AShr:
946     case Instruction::And:
947     case Instruction::Or:
948     case Instruction::Xor:
949     case Instruction::FNeg: {
950       TTI::OperandValueProperties Op1VP = TTI::OP_None;
951       TTI::OperandValueProperties Op2VP = TTI::OP_None;
952       TTI::OperandValueKind Op1VK =
953         TTI::getOperandInfo(U->getOperand(0), Op1VP);
954       TTI::OperandValueKind Op2VK = Opcode != Instruction::FNeg ?
955         TTI::getOperandInfo(U->getOperand(1), Op2VP) : TTI::OK_AnyValue;
956       SmallVector<const Value *, 2> Operands(U->operand_values());
957       return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind,
958                                                Op1VK, Op2VK,
959                                                Op1VP, Op2VP, Operands, I);
960     }
961     case Instruction::IntToPtr:
962     case Instruction::PtrToInt:
963     case Instruction::SIToFP:
964     case Instruction::UIToFP:
965     case Instruction::FPToUI:
966     case Instruction::FPToSI:
967     case Instruction::Trunc:
968     case Instruction::FPTrunc:
969     case Instruction::BitCast:
970     case Instruction::FPExt:
971     case Instruction::SExt:
972     case Instruction::ZExt:
973     case Instruction::AddrSpaceCast:
974       return TargetTTI->getCastInstrCost(
975           Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
976     case Instruction::Store: {
977       auto *SI = cast<StoreInst>(U);
978       Type *ValTy = U->getOperand(0)->getType();
979       return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
980                                         SI->getPointerAddressSpace(),
981                                         CostKind, I);
982     }
983     case Instruction::Load: {
984       auto *LI = cast<LoadInst>(U);
985       return TargetTTI->getMemoryOpCost(Opcode, U->getType(), LI->getAlign(),
986                                         LI->getPointerAddressSpace(),
987                                         CostKind, I);
988     }
989     case Instruction::Select: {
990       Type *CondTy = U->getOperand(0)->getType();
991       return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
992                                            CmpInst::BAD_ICMP_PREDICATE,
993                                            CostKind, I);
994     }
995     case Instruction::ICmp:
996     case Instruction::FCmp: {
997       Type *ValTy = U->getOperand(0)->getType();
998       // TODO: Also handle ICmp/FCmp constant expressions.
999       return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1000                                            I ? cast<CmpInst>(I)->getPredicate()
1001                                              : CmpInst::BAD_ICMP_PREDICATE,
1002                                            CostKind, I);
1003     }
1004     case Instruction::InsertElement: {
1005       auto *IE = dyn_cast<InsertElementInst>(U);
1006       if (!IE)
1007         return TTI::TCC_Basic; // FIXME
1008       auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1009       unsigned Idx = CI ? CI->getZExtValue() : -1;
1010       return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
1011     }
1012     case Instruction::ShuffleVector: {
1013       auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1014       if (!Shuffle)
1015         return TTI::TCC_Basic; // FIXME
1016       auto *VecTy = cast<VectorType>(U->getType());
1017       auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
1018 
1019       // TODO: Identify and add costs for insert subvector, etc.
1020       int SubIndex;
1021       if (Shuffle->isExtractSubvectorMask(SubIndex))
1022         return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1023                                          SubIndex, VecTy);
1024       else if (Shuffle->changesLength())
1025         return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
1026       else if (Shuffle->isIdentity())
1027         return 0;
1028       else if (Shuffle->isReverse())
1029         return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 0, nullptr);
1030       else if (Shuffle->isSelect())
1031         return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 0, nullptr);
1032       else if (Shuffle->isTranspose())
1033         return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 0, nullptr);
1034       else if (Shuffle->isZeroEltSplat())
1035         return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 0, nullptr);
1036       else if (Shuffle->isSingleSource())
1037         return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 0,
1038                                          nullptr);
1039 
1040       return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 0,
1041                                        nullptr);
1042     }
1043     case Instruction::ExtractElement: {
1044       unsigned Idx = -1;
1045       auto *EEI = dyn_cast<ExtractElementInst>(U);
1046       if (!EEI)
1047         return TTI::TCC_Basic; // FIXME
1048 
1049       auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1));
1050       if (CI)
1051         Idx = CI->getZExtValue();
1052 
1053       // Try to match a reduction (a series of shufflevector and vector ops
1054       // followed by an extractelement).
1055       unsigned RdxOpcode;
1056       VectorType *RdxType;
1057       bool IsPairwise;
1058       switch (TTI::matchVectorReduction(EEI, RdxOpcode, RdxType, IsPairwise)) {
1059       case TTI::RK_Arithmetic:
1060         return TargetTTI->getArithmeticReductionCost(RdxOpcode, RdxType,
1061                                                      IsPairwise, CostKind);
1062       case TTI::RK_MinMax:
1063         return TargetTTI->getMinMaxReductionCost(
1064             RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
1065             IsPairwise, /*IsUnsigned=*/false, CostKind);
1066       case TTI::RK_UnsignedMinMax:
1067         return TargetTTI->getMinMaxReductionCost(
1068             RdxType, cast<VectorType>(CmpInst::makeCmpResultType(RdxType)),
1069             IsPairwise, /*IsUnsigned=*/true, CostKind);
1070       case TTI::RK_None:
1071         break;
1072       }
1073       return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(),
1074                                            Idx);
1075     }
1076     }
1077     // By default, just classify everything as 'basic'.
1078     return TTI::TCC_Basic;
1079   }
1080 
1081   int getInstructionLatency(const Instruction *I) {
1082     SmallVector<const Value *, 4> Operands(I->operand_values());
1083     if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
1084       return 0;
1085 
1086     if (isa<LoadInst>(I))
1087       return 4;
1088 
1089     Type *DstTy = I->getType();
1090 
1091     // Usually an intrinsic is a simple instruction.
1092     // A real function call is much slower.
1093     if (auto *CI = dyn_cast<CallInst>(I)) {
1094       const Function *F = CI->getCalledFunction();
1095       if (!F || static_cast<T *>(this)->isLoweredToCall(F))
1096         return 40;
1097       // Some intrinsics return a value and a flag, we use the value type
1098       // to decide its latency.
1099       if (StructType *StructTy = dyn_cast<StructType>(DstTy))
1100         DstTy = StructTy->getElementType(0);
1101       // Fall through to simple instructions.
1102     }
1103 
1104     if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
1105       DstTy = VectorTy->getElementType();
1106     if (DstTy->isFloatingPointTy())
1107       return 3;
1108 
1109     return 1;
1110   }
1111 };
1112 } // namespace llvm
1113 
1114 #endif
1115