1 //===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file a TargetTransformInfo::Concept conforming object specific to the
11 /// ARM target machine. It uses the target's detailed information to
12 /// provide more precise answers to certain TTI queries, while letting the
13 /// target independent and default TTI implementations handle the rest.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
18 #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
19 
20 #include "ARM.h"
21 #include "ARMSubtarget.h"
22 #include "ARMTargetMachine.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/Analysis/TargetTransformInfo.h"
25 #include "llvm/CodeGen/BasicTTIImpl.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Function.h"
28 #include "llvm/MC/SubtargetFeature.h"
29 
30 namespace llvm {
31 
32 class APInt;
33 class ARMTargetLowering;
34 class Instruction;
35 class Loop;
36 class SCEV;
37 class ScalarEvolution;
38 class Type;
39 class Value;
40 
41 namespace TailPredication {
42   enum Mode {
43     Disabled = 0,
44     EnabledNoReductions,
45     Enabled,
46     ForceEnabledNoReductions,
47     ForceEnabled
48   };
49 }
50 
51 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
52   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
53   using TTI = TargetTransformInfo;
54 
55   friend BaseT;
56 
57   const ARMSubtarget *ST;
58   const ARMTargetLowering *TLI;
59 
60   // Currently the following features are excluded from InlineFeaturesAllowed.
61   // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureFP64, FeatureD32
62   // Depending on whether they are set or unset, different
63   // instructions/registers are available. For example, inlining a callee with
64   // -thumb-mode in a caller with +thumb-mode, may cause the assembler to
65   // fail if the callee uses ARM only instructions, e.g. in inline asm.
66   const FeatureBitset InlineFeaturesAllowed = {
67       ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2,
68       ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8,
69       ARM::FeatureFullFP16, ARM::FeatureFP16FML, ARM::FeatureHWDivThumb,
70       ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex,
71       ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc,
72       ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt,
73       ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS,
74       ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing,
75       ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32,
76       ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR,
77       ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits,
78       ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg,
79       ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx,
80       ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs,
81       ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign,
82       ARM::FeatureHasSlowFPVMLx, ARM::FeatureHasSlowFPVFMx,
83       ARM::FeatureVMLxForwarding, ARM::FeaturePref32BitThumb,
84       ARM::FeatureAvoidPartialCPSR, ARM::FeatureCheapPredicableCPSR,
85       ARM::FeatureAvoidMOVsShOp, ARM::FeatureHasRetAddrStack,
86       ARM::FeatureHasNoBranchPredictor, ARM::FeatureDSP, ARM::FeatureMP,
87       ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass,
88       ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign,
89       ARM::FeatureLongCalls, ARM::FeatureExecuteOnly, ARM::FeatureReserveR9,
90       ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates
91   };
92 
getST()93   const ARMSubtarget *getST() const { return ST; }
getTLI()94   const ARMTargetLowering *getTLI() const { return TLI; }
95 
96 public:
ARMTTIImpl(const ARMBaseTargetMachine * TM,const Function & F)97   explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
98       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
99         TLI(ST->getTargetLowering()) {}
100 
101   bool areInlineCompatible(const Function *Caller,
102                            const Function *Callee) const;
103 
enableInterleavedAccessVectorization()104   bool enableInterleavedAccessVectorization() { return true; }
105 
106   bool shouldFavorBackedgeIndex(const Loop *L) const;
107   bool shouldFavorPostInc() const;
108 
109   /// Floating-point computation using ARMv8 AArch32 Advanced
110   /// SIMD instructions remains unchanged from ARMv7. Only AArch64 SIMD
111   /// and Arm MVE are IEEE-754 compliant.
isFPVectorizationPotentiallyUnsafe()112   bool isFPVectorizationPotentiallyUnsafe() {
113     return !ST->isTargetDarwin() && !ST->hasMVEFloatOps();
114   }
115 
116   /// \name Scalar TTI Implementations
117   /// @{
118 
119   int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
120                             Type *Ty);
121 
122   using BaseT::getIntImmCost;
123   int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind);
124 
125   int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm,
126                         Type *Ty, TTI::TargetCostKind CostKind);
127 
128   /// @}
129 
130   /// \name Vector TTI Implementations
131   /// @{
132 
getNumberOfRegisters(unsigned ClassID)133   unsigned getNumberOfRegisters(unsigned ClassID) const {
134     bool Vector = (ClassID == 1);
135     if (Vector) {
136       if (ST->hasNEON())
137         return 16;
138       if (ST->hasMVEIntegerOps())
139         return 8;
140       return 0;
141     }
142 
143     if (ST->isThumb1Only())
144       return 8;
145     return 13;
146   }
147 
getRegisterBitWidth(bool Vector)148   unsigned getRegisterBitWidth(bool Vector) const {
149     if (Vector) {
150       if (ST->hasNEON())
151         return 128;
152       if (ST->hasMVEIntegerOps())
153         return 128;
154       return 0;
155     }
156 
157     return 32;
158   }
159 
getMaxInterleaveFactor(unsigned VF)160   unsigned getMaxInterleaveFactor(unsigned VF) {
161     return ST->getMaxInterleaveFactor();
162   }
163 
164   bool isProfitableLSRChainElement(Instruction *I);
165 
166   bool isLegalMaskedLoad(Type *DataTy, Align Alignment);
167 
isLegalMaskedStore(Type * DataTy,Align Alignment)168   bool isLegalMaskedStore(Type *DataTy, Align Alignment) {
169     return isLegalMaskedLoad(DataTy, Alignment);
170   }
171 
172   bool isLegalMaskedGather(Type *Ty, Align Alignment);
173 
isLegalMaskedScatter(Type * Ty,Align Alignment)174   bool isLegalMaskedScatter(Type *Ty, Align Alignment) {
175     return isLegalMaskedGather(Ty, Alignment);
176   }
177 
178   int getMemcpyCost(const Instruction *I);
179 
180   int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
181                      VectorType *SubTp);
182 
183   bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
184                              TTI::ReductionFlags Flags) const;
185 
shouldExpandReduction(const IntrinsicInst * II)186   bool shouldExpandReduction(const IntrinsicInst *II) const {
187     switch (II->getIntrinsicID()) {
188     case Intrinsic::experimental_vector_reduce_v2_fadd:
189     case Intrinsic::experimental_vector_reduce_v2_fmul:
190       // We don't have legalization support for ordered FP reductions.
191       if (!II->getFastMathFlags().allowReassoc())
192         return true;
193       // Can't legalize reductions with soft floats.
194       return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs();
195 
196     case Intrinsic::experimental_vector_reduce_fmin:
197     case Intrinsic::experimental_vector_reduce_fmax:
198       // Can't legalize reductions with soft floats, and NoNan will create
199       // fminimum which we do not know how to lower.
200       return TLI->useSoftFloat() || !TLI->getSubtarget()->hasFPRegs() ||
201              !II->getFastMathFlags().noNaNs();
202 
203     default:
204       // Don't expand anything else, let legalization deal with it.
205       return false;
206     }
207   }
208 
209   int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
210                        TTI::TargetCostKind CostKind,
211                        const Instruction *I = nullptr);
212 
213   int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
214                          TTI::TargetCostKind CostKind,
215                          const Instruction *I = nullptr);
216 
217   int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
218 
219   int getAddressComputationCost(Type *Val, ScalarEvolution *SE,
220                                 const SCEV *Ptr);
221 
222   int getArithmeticInstrCost(
223       unsigned Opcode, Type *Ty,
224       TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
225       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
226       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
227       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
228       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
229       ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
230       const Instruction *CxtI = nullptr);
231 
232   int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
233                       unsigned AddressSpace,
234                       TTI::TargetCostKind CostKind,
235                       const Instruction *I = nullptr);
236 
237   int getInterleavedMemoryOpCost(
238       unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
239       Align Alignment, unsigned AddressSpace,
240       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
241       bool UseMaskForCond = false, bool UseMaskForGaps = false);
242 
243   unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
244                                   const Value *Ptr, bool VariableMask,
245                                   Align Alignment, TTI::TargetCostKind CostKind,
246                                   const Instruction *I = nullptr);
247 
248   bool isLoweredToCall(const Function *F);
249   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
250                                 AssumptionCache &AC,
251                                 TargetLibraryInfo *LibInfo,
252                                 HardwareLoopInfo &HWLoopInfo);
253   bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
254                                    ScalarEvolution &SE,
255                                    AssumptionCache &AC,
256                                    TargetLibraryInfo *TLI,
257                                    DominatorTree *DT,
258                                    const LoopAccessInfo *LAI);
259   void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
260                                TTI::UnrollingPreferences &UP);
261 
262   bool emitGetActiveLaneMask() const;
263 
264   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
265                              TTI::PeelingPreferences &PP);
shouldBuildLookupTablesForConstant(Constant * C)266   bool shouldBuildLookupTablesForConstant(Constant *C) const {
267     // In the ROPI and RWPI relocation models we can't have pointers to global
268     // variables or functions in constant data, so don't convert switches to
269     // lookup tables if any of the values would need relocation.
270     if (ST->isROPI() || ST->isRWPI())
271       return !C->needsRelocation();
272 
273     return true;
274   }
275   /// @}
276 };
277 
278 } // end namespace llvm
279 
280 #endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H
281