1 //===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the ARM specific subclass of TargetSubtargetInfo.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
14 #define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
15 
16 #include "ARMBaseInstrInfo.h"
17 #include "ARMBaseRegisterInfo.h"
18 #include "ARMConstantPoolValue.h"
19 #include "ARMFrameLowering.h"
20 #include "ARMISelLowering.h"
21 #include "ARMMachineFunctionInfo.h"
22 #include "ARMSelectionDAGInfo.h"
23 #include "llvm/Analysis/TargetTransformInfo.h"
24 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
25 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
26 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/RegisterBankInfo.h"
29 #include "llvm/CodeGen/TargetSubtargetInfo.h"
30 #include "llvm/MC/MCInstrItineraries.h"
31 #include "llvm/MC/MCSchedule.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetOptions.h"
34 #include "llvm/TargetParser/Triple.h"
35 #include <bitset>
36 #include <memory>
37 #include <string>
38 
39 #define GET_SUBTARGETINFO_HEADER
40 #include "ARMGenSubtargetInfo.inc"
41 
42 namespace llvm {
43 
44 class ARMBaseTargetMachine;
45 class GlobalValue;
46 class StringRef;
47 
48 class ARMSubtarget : public ARMGenSubtargetInfo {
49 protected:
50   enum ARMProcFamilyEnum {
51     Others,
52 
53     CortexA12,
54     CortexA15,
55     CortexA17,
56     CortexA32,
57     CortexA35,
58     CortexA5,
59     CortexA53,
60     CortexA55,
61     CortexA57,
62     CortexA7,
63     CortexA72,
64     CortexA73,
65     CortexA75,
66     CortexA76,
67     CortexA77,
68     CortexA78,
69     CortexA78C,
70     CortexA710,
71     CortexA8,
72     CortexA9,
73     CortexM3,
74     CortexM7,
75     CortexM52,
76     CortexR4,
77     CortexR4F,
78     CortexR5,
79     CortexR52,
80     CortexR7,
81     CortexX1,
82     CortexX1C,
83     Exynos,
84     Krait,
85     Kryo,
86     NeoverseN1,
87     NeoverseN2,
88     NeoverseV1,
89     Swift
90   };
91   enum ARMProcClassEnum {
92     None,
93 
94     AClass,
95     MClass,
96     RClass
97   };
98   enum ARMArchEnum {
99     ARMv4,
100     ARMv4t,
101     ARMv5,
102     ARMv5t,
103     ARMv5te,
104     ARMv5tej,
105     ARMv6,
106     ARMv6k,
107     ARMv6kz,
108     ARMv6m,
109     ARMv6sm,
110     ARMv6t2,
111     ARMv7a,
112     ARMv7em,
113     ARMv7m,
114     ARMv7r,
115     ARMv7ve,
116     ARMv81a,
117     ARMv82a,
118     ARMv83a,
119     ARMv84a,
120     ARMv85a,
121     ARMv86a,
122     ARMv87a,
123     ARMv88a,
124     ARMv89a,
125     ARMv8a,
126     ARMv8mBaseline,
127     ARMv8mMainline,
128     ARMv8r,
129     ARMv81mMainline,
130     ARMv9a,
131     ARMv91a,
132     ARMv92a,
133     ARMv93a,
134     ARMv94a,
135     ARMv95a,
136   };
137 
138 public:
139   /// What kind of timing do load multiple/store multiple instructions have.
140   enum ARMLdStMultipleTiming {
141     /// Can load/store 2 registers/cycle.
142     DoubleIssue,
143     /// Can load/store 2 registers/cycle, but needs an extra cycle if the access
144     /// is not 64-bit aligned.
145     DoubleIssueCheckUnalignedAccess,
146     /// Can load/store 1 register/cycle.
147     SingleIssue,
148     /// Can load/store 1 register/cycle, but needs an extra cycle for address
149     /// computation and potentially also for register writeback.
150     SingleIssuePlusExtras,
151   };
152 
153 protected:
154 // Bool members corresponding to the SubtargetFeatures defined in tablegen
155 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
156   bool ATTRIBUTE = DEFAULT;
157 #include "ARMGenSubtargetInfo.inc"
158 
159   /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
160   ARMProcFamilyEnum ARMProcFamily = Others;
161 
162   /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
163   ARMProcClassEnum ARMProcClass = None;
164 
165   /// ARMArch - ARM architecture
166   ARMArchEnum ARMArch = ARMv4t;
167 
168   /// UseMulOps - True if non-microcoded fused integer multiply-add and
169   /// multiply-subtract instructions should be used.
170   bool UseMulOps = false;
171 
172   /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
173   /// must be able to synthesize call stubs for interworking between ARM and
174   /// Thumb.
175   bool SupportsTailCall = false;
176 
177   /// RestrictIT - If true, the subtarget disallows generation of complex IT
178   ///  blocks.
179   bool RestrictIT = false;
180 
181   /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
182   bool UseSjLjEH = false;
183 
184   /// stackAlignment - The minimum alignment known to hold of the stack frame on
185   /// entry to the function and which must be maintained by every function.
186   Align stackAlignment = Align(4);
187 
188   /// CPUString - String name of used CPU.
189   std::string CPUString;
190 
191   unsigned MaxInterleaveFactor = 1;
192 
193   /// Clearance before partial register updates (in number of instructions)
194   unsigned PartialUpdateClearance = 0;
195 
196   /// What kind of timing do load multiple/store multiple have (double issue,
197   /// single issue etc).
198   ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue;
199 
200   /// The adjustment that we need to apply to get the operand latency from the
201   /// operand cycle returned by the itinerary data for pre-ISel operands.
202   int PreISelOperandLatencyAdjustment = 2;
203 
204   /// What alignment is preferred for loop bodies and functions, in log2(bytes).
205   unsigned PrefLoopLogAlignment = 0;
206 
207   /// The cost factor for MVE instructions, representing the multiple beats an
208   // instruction can take. The default is 2, (set in initSubtargetFeatures so
209   // that we can use subtarget features less than 2).
210   unsigned MVEVectorCostFactor = 0;
211 
212   /// OptMinSize - True if we're optimising for minimum code size, equal to
213   /// the function attribute.
214   bool OptMinSize = false;
215 
216   /// IsLittle - The target is Little Endian
217   bool IsLittle;
218 
219   /// TargetTriple - What processor and OS we're targeting.
220   Triple TargetTriple;
221 
222   /// SchedModel - Processor specific instruction costs.
223   MCSchedModel SchedModel;
224 
225   /// Selected instruction itineraries (one entry per itinerary class.)
226   InstrItineraryData InstrItins;
227 
228   /// Options passed via command line that could influence the target
229   const TargetOptions &Options;
230 
231   const ARMBaseTargetMachine &TM;
232 
233 public:
234   /// This constructor initializes the data members to match that
235   /// of the specified triple.
236   ///
237   ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
238                const ARMBaseTargetMachine &TM, bool IsLittle,
239                bool MinSize = false);
240 
241   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
242   /// that still makes it profitable to inline the call.
getMaxInlineSizeThreshold()243   unsigned getMaxInlineSizeThreshold() const {
244     return 64;
245   }
246 
247   /// getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size
248   /// that still makes it profitable to inline a llvm.memcpy as a Tail
249   /// Predicated loop.
250   /// This threshold should only be used for constant size inputs.
getMaxMemcpyTPInlineSizeThreshold()251   unsigned getMaxMemcpyTPInlineSizeThreshold() const { return 128; }
252 
253   /// ParseSubtargetFeatures - Parses features string setting specified
254   /// subtarget options.  Definition of function is auto generated by tblgen.
255   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
256 
257   /// initializeSubtargetDependencies - Initializes using a CPU and feature string
258   /// so that we can use initializer lists for subtarget initialization.
259   ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
260 
getSelectionDAGInfo()261   const ARMSelectionDAGInfo *getSelectionDAGInfo() const override {
262     return &TSInfo;
263   }
264 
getInstrInfo()265   const ARMBaseInstrInfo *getInstrInfo() const override {
266     return InstrInfo.get();
267   }
268 
getTargetLowering()269   const ARMTargetLowering *getTargetLowering() const override {
270     return &TLInfo;
271   }
272 
getFrameLowering()273   const ARMFrameLowering *getFrameLowering() const override {
274     return FrameLowering.get();
275   }
276 
getRegisterInfo()277   const ARMBaseRegisterInfo *getRegisterInfo() const override {
278     return &InstrInfo->getRegisterInfo();
279   }
280 
281   const CallLowering *getCallLowering() const override;
282   InstructionSelector *getInstructionSelector() const override;
283   const LegalizerInfo *getLegalizerInfo() const override;
284   const RegisterBankInfo *getRegBankInfo() const override;
285 
286 private:
287   ARMSelectionDAGInfo TSInfo;
288   // Either Thumb1FrameLowering or ARMFrameLowering.
289   std::unique_ptr<ARMFrameLowering> FrameLowering;
290   // Either Thumb1InstrInfo or Thumb2InstrInfo.
291   std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
292   ARMTargetLowering   TLInfo;
293 
294   /// GlobalISel related APIs.
295   std::unique_ptr<CallLowering> CallLoweringInfo;
296   std::unique_ptr<InstructionSelector> InstSelector;
297   std::unique_ptr<LegalizerInfo> Legalizer;
298   std::unique_ptr<RegisterBankInfo> RegBankInfo;
299 
300   void initializeEnvironment();
301   void initSubtargetFeatures(StringRef CPU, StringRef FS);
302   ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
303 
304   std::bitset<8> CoprocCDE = {};
305 public:
306 // Getters for SubtargetFeatures defined in tablegen
307 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
308   bool GETTER() const { return ATTRIBUTE; }
309 #include "ARMGenSubtargetInfo.inc"
310 
311   /// @{
312   /// These functions are obsolete, please consider adding subtarget features
313   /// or properties instead of calling them.
isCortexA5()314   bool isCortexA5() const { return ARMProcFamily == CortexA5; }
isCortexA7()315   bool isCortexA7() const { return ARMProcFamily == CortexA7; }
isCortexA8()316   bool isCortexA8() const { return ARMProcFamily == CortexA8; }
isCortexA9()317   bool isCortexA9() const { return ARMProcFamily == CortexA9; }
isCortexA15()318   bool isCortexA15() const { return ARMProcFamily == CortexA15; }
isSwift()319   bool isSwift()    const { return ARMProcFamily == Swift; }
isCortexM3()320   bool isCortexM3() const { return ARMProcFamily == CortexM3; }
isCortexM7()321   bool isCortexM7() const { return ARMProcFamily == CortexM7; }
isLikeA9()322   bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); }
isCortexR5()323   bool isCortexR5() const { return ARMProcFamily == CortexR5; }
isKrait()324   bool isKrait() const { return ARMProcFamily == Krait; }
325   /// @}
326 
hasARMOps()327   bool hasARMOps() const { return !NoARM; }
328 
useNEONForSinglePrecisionFP()329   bool useNEONForSinglePrecisionFP() const {
330     return hasNEON() && hasNEONForFP();
331   }
332 
hasVFP2Base()333   bool hasVFP2Base() const { return hasVFPv2SP(); }
hasVFP3Base()334   bool hasVFP3Base() const { return hasVFPv3D16SP(); }
hasVFP4Base()335   bool hasVFP4Base() const { return hasVFPv4D16SP(); }
hasFPARMv8Base()336   bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); }
337 
hasAnyDataBarrier()338   bool hasAnyDataBarrier() const {
339     return HasDataBarrier || (hasV6Ops() && !isThumb());
340   }
341 
useMulOps()342   bool useMulOps() const { return UseMulOps; }
useFPVMLx()343   bool useFPVMLx() const { return !SlowFPVMLx; }
useFPVFMx()344   bool useFPVFMx() const {
345     return !isTargetDarwin() && hasVFP4Base() && !SlowFPVFMx;
346   }
useFPVFMx16()347   bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); }
useFPVFMx64()348   bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); }
useSjLjEH()349   bool useSjLjEH() const { return UseSjLjEH; }
hasBaseDSP()350   bool hasBaseDSP() const {
351     if (isThumb())
352       return hasThumb2() && hasDSP();
353     else
354       return hasV5TEOps();
355   }
356 
357   /// Return true if the CPU supports any kind of instruction fusion.
hasFusion()358   bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
359 
getTargetTriple()360   const Triple &getTargetTriple() const { return TargetTriple; }
361 
isTargetDarwin()362   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
isTargetIOS()363   bool isTargetIOS() const { return TargetTriple.isiOS(); }
isTargetWatchOS()364   bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
isTargetWatchABI()365   bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); }
isTargetDriverKit()366   bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); }
isTargetLinux()367   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
isTargetNaCl()368   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
isTargetNetBSD()369   bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
isTargetWindows()370   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
371 
isTargetCOFF()372   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
isTargetELF()373   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
isTargetMachO()374   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
375 
376   // ARM EABI is the bare-metal EABI described in ARM ABI documents and
377   // can be accessed via -target arm-none-eabi. This is NOT GNUEABI.
378   // FIXME: Add a flag for bare-metal for that target and set Triple::EABI
379   // even for GNUEABI, so we can make a distinction here and still conform to
380   // the EABI on GNU (and Android) mode. This requires change in Clang, too.
381   // FIXME: The Darwin exception is temporary, while we move users to
382   // "*-*-*-macho" triples as quickly as possible.
isTargetAEABI()383   bool isTargetAEABI() const {
384     return (TargetTriple.getEnvironment() == Triple::EABI ||
385             TargetTriple.getEnvironment() == Triple::EABIHF) &&
386            !isTargetDarwin() && !isTargetWindows();
387   }
isTargetGNUAEABI()388   bool isTargetGNUAEABI() const {
389     return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
390             TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
391            !isTargetDarwin() && !isTargetWindows();
392   }
isTargetMuslAEABI()393   bool isTargetMuslAEABI() const {
394     return (TargetTriple.getEnvironment() == Triple::MuslEABI ||
395             TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
396             TargetTriple.getEnvironment() == Triple::OpenHOS) &&
397            !isTargetDarwin() && !isTargetWindows();
398   }
399 
400   // ARM Targets that support EHABI exception handling standard
401   // Darwin uses SjLj. Other targets might need more checks.
isTargetEHABICompatible()402   bool isTargetEHABICompatible() const {
403     return TargetTriple.isTargetEHABICompatible();
404   }
405 
406   bool isTargetHardFloat() const;
407 
isReadTPSoft()408   bool isReadTPSoft() const {
409     return !(isReadTPTPIDRURW() || isReadTPTPIDRURO() || isReadTPTPIDRPRW());
410   }
411 
isTargetAndroid()412   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
413 
414   bool isXRaySupported() const override;
415 
416   bool isAPCS_ABI() const;
417   bool isAAPCS_ABI() const;
418   bool isAAPCS16_ABI() const;
419 
420   bool isROPI() const;
421   bool isRWPI() const;
422 
useMachineScheduler()423   bool useMachineScheduler() const { return UseMISched; }
useMachinePipeliner()424   bool useMachinePipeliner() const { return UseMIPipeliner; }
hasMinSize()425   bool hasMinSize() const { return OptMinSize; }
isThumb1Only()426   bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
isThumb2()427   bool isThumb2() const { return isThumb() && hasThumb2(); }
isMClass()428   bool isMClass() const { return ARMProcClass == MClass; }
isRClass()429   bool isRClass() const { return ARMProcClass == RClass; }
isAClass()430   bool isAClass() const { return ARMProcClass == AClass; }
431 
isR9Reserved()432   bool isR9Reserved() const {
433     return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
434   }
435 
getFramePointerReg()436   MCPhysReg getFramePointerReg() const {
437     if (isTargetDarwin() ||
438         (!isTargetWindows() && isThumb() && !createAAPCSFrameChain()))
439       return ARM::R7;
440     return ARM::R11;
441   }
442 
443   /// Returns true if the frame setup is split into two separate pushes (first
444   /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
445   /// to lr. This is always required on Thumb1-only targets, as the push and
446   /// pop instructions can't access the high registers.
splitFramePushPop(const MachineFunction & MF)447   bool splitFramePushPop(const MachineFunction &MF) const {
448     if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress())
449       return true;
450     return (getFramePointerReg() == ARM::R7 &&
451             MF.getTarget().Options.DisableFramePointerElim(MF)) ||
452            isThumb1Only();
453   }
454 
455   bool splitFramePointerPush(const MachineFunction &MF) const;
456 
457   bool useStride4VFPs() const;
458 
459   bool useMovt() const;
460 
supportsTailCall()461   bool supportsTailCall() const { return SupportsTailCall; }
462 
allowsUnalignedMem()463   bool allowsUnalignedMem() const { return !StrictAlign; }
464 
restrictIT()465   bool restrictIT() const { return RestrictIT; }
466 
getCPUString()467   const std::string & getCPUString() const { return CPUString; }
468 
isLittle()469   bool isLittle() const { return IsLittle; }
470 
471   unsigned getMispredictionPenalty() const;
472 
473   /// Returns true if machine scheduler should be enabled.
474   bool enableMachineScheduler() const override;
475 
476   /// Returns true if machine pipeliner should be enabled.
477   bool enableMachinePipeliner() const override;
478   bool useDFAforSMS() const override;
479 
480   /// True for some subtargets at > -O0.
481   bool enablePostRAScheduler() const override;
482 
483   /// True for some subtargets at > -O0.
484   bool enablePostRAMachineScheduler() const override;
485 
486   /// Check whether this subtarget wants to use subregister liveness.
487   bool enableSubRegLiveness() const override;
488 
489   /// Enable use of alias analysis during code generation (during MI
490   /// scheduling, DAGCombine, etc.).
useAA()491   bool useAA() const override { return true; }
492 
493   /// getInstrItins - Return the instruction itineraries based on subtarget
494   /// selection.
getInstrItineraryData()495   const InstrItineraryData *getInstrItineraryData() const override {
496     return &InstrItins;
497   }
498 
499   /// getStackAlignment - Returns the minimum alignment known to hold of the
500   /// stack frame on entry to the function and which must be maintained by every
501   /// function for this subtarget.
getStackAlignment()502   Align getStackAlignment() const { return stackAlignment; }
503 
504   // Returns the required alignment for LDRD/STRD instructions
getDualLoadStoreAlignment()505   Align getDualLoadStoreAlignment() const {
506     return Align(hasV7Ops() || allowsUnalignedMem() ? 4 : 8);
507   }
508 
getMaxInterleaveFactor()509   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
510 
getPartialUpdateClearance()511   unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; }
512 
getLdStMultipleTiming()513   ARMLdStMultipleTiming getLdStMultipleTiming() const {
514     return LdStMultipleTiming;
515   }
516 
getPreISelOperandLatencyAdjustment()517   int getPreISelOperandLatencyAdjustment() const {
518     return PreISelOperandLatencyAdjustment;
519   }
520 
521   /// True if the GV will be accessed via an indirect symbol.
522   bool isGVIndirectSymbol(const GlobalValue *GV) const;
523 
524   /// Returns the constant pool modifier needed to access the GV.
525   bool isGVInGOT(const GlobalValue *GV) const;
526 
527   /// True if fast-isel is used.
528   bool useFastISel() const;
529 
530   /// Returns the correct return opcode for the current feature set.
531   /// Use BX if available to allow mixing thumb/arm code, but fall back
532   /// to plain mov pc,lr on ARMv4.
getReturnOpcode()533   unsigned getReturnOpcode() const {
534     if (isThumb())
535       return ARM::tBX_RET;
536     if (hasV4TOps())
537       return ARM::BX_RET;
538     return ARM::MOVPCLR;
539   }
540 
541   /// Allow movt+movw for PIC global address calculation.
542   /// ELF does not have GOT relocations for movt+movw.
543   /// ROPI does not use GOT.
allowPositionIndependentMovt()544   bool allowPositionIndependentMovt() const {
545     return isROPI() || !isTargetELF();
546   }
547 
getPrefLoopLogAlignment()548   unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
549 
550   unsigned
getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind)551   getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const {
552     if (CostKind == TargetTransformInfo::TCK_CodeSize)
553       return 1;
554     return MVEVectorCostFactor;
555   }
556 
557   bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
558                                    unsigned PhysReg) const override;
559   unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
560 };
561 
562 } // end namespace llvm
563 
564 #endif  // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H
565