1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19 
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 
30 namespace llvm {
31 
32 class APInt;
33 class ARMTargetLowering;
34 class Instruction;
35 class Loop;
36 class SCEV;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 namespace TailPredication {
42   enum Mode {
43     Disabled = 0,
44     EnabledNoReductions,
45     Enabled,
46     ForceEnabledNoReductions,
47     ForceEnabled
48   };
49 }
50 
51 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
52   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
53   using TTI = TargetTransformInfo;
54 
55   friend BaseT;
56 
57   const ARMSubtarget *ST;
58   const ARMTargetLowering *TLI;
59 
60   // Currently the following features are excluded from InlineFeaturesAllowed.
61   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
62   // Depending on whether they are set or unset, different
63   // instructions/registers are available. For example, inlining a callee with
64   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
65   // fail if the callee uses ARM only instructions, e.g. in inline asm.
66   const FeatureBitset InlineFeaturesAllowed = {
67       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
68       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
69       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
70       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
71       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
72       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
73       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
74       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
75       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
76       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
77       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
78       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
79       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
80       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
81       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
82       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
83       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
84       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
85       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
86       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
87       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
88       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
89       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
90       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
91   };
92 
getST()93   const ARMSubtarget *getST() const { return ST; }
getTLI()94   const ARMTargetLowering *getTLI() const { return TLI; }
95 
96 public:
ARMTTIImpl(const ARMBaseTargetMachine * TM,const Function & F)97   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
98       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
99         TLI(ST->getTargetLowering()) {}
100 
101   bool areInlineCompatible(const Function *Caller,
102                            const Function *Callee) const;
103 
enableInterleavedAccessVectorization()104   bool enableInterleavedAccessVectorization() { return true; }
105 
106   bool shouldFavorBackedgeIndex(const Loop *L) const;
107   bool shouldFavorPostInc() const;
108 
109   /// Floating-point computation using ARMv8 AArch32 Advanced
110   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
111   /// and Arm MVE are IEEE-754 compliant.
isFPVectorizationPotentiallyUnsafe()112   bool isFPVectorizationPotentiallyUnsafe() {
113     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
114   }
115 
116   Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
117                                                IntrinsicInst &II) const;
118 
119   /// \name Scalar TTI Implementations
120   /// @{
121 
122   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
123                             Type *Ty);
124 
125   using BaseT::getIntImmCost;
126   int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
127 
128   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
129                         Type *Ty, TTI::TargetCostKind CostKind,
130                         Instruction *Inst = nullptr);
131 
132   /// @}
133 
134   /// \name Vector TTI Implementations
135   /// @{
136 
getNumberOfRegisters(unsigned ClassID)137   unsigned getNumberOfRegisters(unsigned ClassID) const {
138     bool Vector = (ClassID == 1);
139     if (Vector) {
140       if (ST->hasNEON())
141         return 16;
142       if (ST->hasMVEIntegerOps())
143         return 8;
144       return 0;
145     }
146 
147     if (ST->isThumb1Only())
148       return 8;
149     return 13;
150   }
151 
getRegisterBitWidth(bool Vector)152   unsigned getRegisterBitWidth(bool Vector) const {
153     if (Vector) {
154       if (ST->hasNEON())
155         return 128;
156       if (ST->hasMVEIntegerOps())
157         return 128;
158       return 0;
159     }
160 
161     return 32;
162   }
163 
getMaxInterleaveFactor(unsigned VF)164   unsigned getMaxInterleaveFactor(unsigned VF) {
165     return ST->getMaxInterleaveFactor();
166   }
167 
168   bool isProfitableLSRChainElement(Instruction *I);
169 
170   bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
171 
isLegalMaskedStore(Type * DataTy,Align Alignment)172   bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
173     return isLegalMaskedLoad(DataTy, Alignment);
174   }
175 
176   bool isLegalMaskedGather(Type *Ty, Align Alignment);
177 
isLegalMaskedScatter(Type * Ty,Align Alignment)178   bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
179     return isLegalMaskedGather(Ty, Alignment);
180   }
181 
182   int getMemcpyCost(const Instruction *I);
183 
184   int getNumMemOps(const IntrinsicInst *I) const;
185 
186   int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
187                      VectorType *SubTp);
188 
189   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
190                              TTI::ReductionFlags Flags) const;
191 
192   bool preferInLoopReduction(unsigned Opcode, Type *Ty,
193                              TTI::ReductionFlags Flags) const;
194 
195   bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
196                                        TTI::ReductionFlags Flags) const;
197 
shouldExpandReduction(const IntrinsicInst * II)198   bool shouldExpandReduction(const IntrinsicInst *II) const { return false; }
199 
200   int getCFInstrCost(unsigned Opcode,
201                      TTI::TargetCostKind CostKind);
202 
203   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
204                        TTI::CastContextHint CCH, TTI::TargetCostKind CostKind,
205                        const Instruction *I = nullptr);
206 
207   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
208                          CmpInst::Predicate VecPred,
209                          TTI::TargetCostKind CostKind,
210                          const Instruction *I = nullptr);
211 
212   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
213 
214   int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
215                                 const SCEV *Ptr);
216 
217   int getArithmeticInstrCost(
218       unsigned Opcode, Type *Ty,
219       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
220       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
221       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
222       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
223       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
224       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
225       const Instruction *CxtI = nullptr);
226 
227   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
228                       unsigned AddressSpace,
229                       TTI::TargetCostKind CostKind,
230                       const Instruction *I = nullptr);
231 
232   unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
233                                  unsigned AddressSpace,
234                                  TTI::TargetCostKind CostKind);
235 
236   int getInterleavedMemoryOpCost(
237       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
238       Align Alignment, unsigned AddressSpace,
239       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
240       bool UseMaskForCond = false, bool UseMaskForGaps = false);
241 
242   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
243                                   const Value *Ptr, bool VariableMask,
244                                   Align Alignment, TTI::TargetCostKind CostKind,
245                                   const Instruction *I = nullptr);
246 
247   int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
248                                  bool IsPairwiseForm,
249                                  TTI::TargetCostKind CostKind);
250   InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
251                                               Type *ResTy, VectorType *ValTy,
252                                               TTI::TargetCostKind CostKind);
253 
254   int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
255                             TTI::TargetCostKind CostKind);
256 
257   bool maybeLoweredToCall(Instruction &I);
258   bool isLoweredToCall(const Function *F);
259   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
260                                 AssumptionCache &AC,
261                                 TargetLibraryInfo *LibInfo,
262                                 HardwareLoopInfo &HWLoopInfo);
263   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
264                                    ScalarEvolution &SE,
265                                    AssumptionCache &AC,
266                                    TargetLibraryInfo *TLI,
267                                    DominatorTree *DT,
268                                    const LoopAccessInfo *LAI);
269   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
270                                TTI::UnrollingPreferences &UP);
271 
272   bool emitGetActiveLaneMask() const;
273 
274   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
275                              TTI::PeelingPreferences &PP);
shouldBuildLookupTablesForConstant(Constant * C)276   bool shouldBuildLookupTablesForConstant(Constant *C) const {
277     // In the ROPI and RWPI relocation models we can't have pointers to global
278     // variables or functions in constant data, so don't convert switches to
279     // lookup tables if any of the values would need relocation.
280     if (ST->isROPI() || ST->isRWPI())
281       return !C->needsRelocation();
282 
283     return true;
284   }
285   /// @}
286 };
287 
288 } // end namespace llvm
289 
290 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
291