1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15 
16 #include "AArch64FrameLowering.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64PointerAuth.h"
20 #include "AArch64RegisterInfo.h"
21 #include "AArch64SelectionDAGInfo.h"
22 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
23 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
26 #include "llvm/CodeGen/RegisterBankInfo.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 
30 #define GET_SUBTARGETINFO_HEADER
31 #include "AArch64GenSubtargetInfo.inc"
32 
33 namespace llvm {
34 class GlobalValue;
35 class StringRef;
36 class Triple;
37 
38 class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39 public:
40   enum ARMProcFamilyEnum : uint8_t {
41     Others,
42     A64FX,
43     Ampere1,
44     Ampere1A,
45     Ampere1B,
46     AppleA7,
47     AppleA10,
48     AppleA11,
49     AppleA12,
50     AppleA13,
51     AppleA14,
52     AppleA15,
53     AppleA16,
54     AppleA17,
55     Carmel,
56     CortexA35,
57     CortexA53,
58     CortexA55,
59     CortexA510,
60     CortexA520,
61     CortexA57,
62     CortexA65,
63     CortexA72,
64     CortexA73,
65     CortexA75,
66     CortexA76,
67     CortexA77,
68     CortexA78,
69     CortexA78C,
70     CortexA710,
71     CortexA715,
72     CortexA720,
73     CortexR82,
74     CortexX1,
75     CortexX1C,
76     CortexX2,
77     CortexX3,
78     CortexX4,
79     ExynosM3,
80     Falkor,
81     Kryo,
82     NeoverseE1,
83     NeoverseN1,
84     NeoverseN2,
85     Neoverse512TVB,
86     NeoverseV1,
87     NeoverseV2,
88     Saphira,
89     ThunderX2T99,
90     ThunderX,
91     ThunderXT81,
92     ThunderXT83,
93     ThunderXT88,
94     ThunderX3T110,
95     TSV110
96   };
97 
98 protected:
99   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
100   ARMProcFamilyEnum ARMProcFamily = Others;
101 
102   // Enable 64-bit vectorization in SLP.
103   unsigned MinVectorRegisterBitWidth = 64;
104 
105 // Bool members corresponding to the SubtargetFeatures defined in tablegen
106 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
107   bool ATTRIBUTE = DEFAULT;
108 #include "AArch64GenSubtargetInfo.inc"
109 
110   uint8_t MaxInterleaveFactor = 2;
111   uint8_t VectorInsertExtractBaseCost = 2;
112   uint16_t CacheLineSize = 0;
113   uint16_t PrefetchDistance = 0;
114   uint16_t MinPrefetchStride = 1;
115   unsigned MaxPrefetchIterationsAhead = UINT_MAX;
116   Align PrefFunctionAlignment;
117   Align PrefLoopAlignment;
118   unsigned MaxBytesForLoopAlignment = 0;
119   unsigned MinimumJumpTableEntries = 4;
120   unsigned MaxJumpTableSize = 0;
121 
122   // ReserveXRegister[i] - X#i is not available as a general purpose register.
123   BitVector ReserveXRegister;
124 
125   // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
126   BitVector ReserveXRegisterForRA;
127 
128   // CustomCallUsedXRegister[i] - X#i call saved.
129   BitVector CustomCallSavedXRegs;
130 
131   bool IsLittle;
132 
133   bool StreamingSVEMode;
134   bool StreamingCompatibleSVEMode;
135   unsigned MinSVEVectorSizeInBits;
136   unsigned MaxSVEVectorSizeInBits;
137   unsigned VScaleForTuning = 2;
138   TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
139 
140   /// TargetTriple - What processor and OS we're targeting.
141   Triple TargetTriple;
142 
143   AArch64FrameLowering FrameLowering;
144   AArch64InstrInfo InstrInfo;
145   AArch64SelectionDAGInfo TSInfo;
146   AArch64TargetLowering TLInfo;
147 
148   /// GlobalISel related APIs.
149   std::unique_ptr<CallLowering> CallLoweringInfo;
150   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
151   std::unique_ptr<InstructionSelector> InstSelector;
152   std::unique_ptr<LegalizerInfo> Legalizer;
153   std::unique_ptr<RegisterBankInfo> RegBankInfo;
154 
155 private:
156   /// initializeSubtargetDependencies - Initializes using CPUString and the
157   /// passed in feature string so that we can use initializer lists for
158   /// subtarget initialization.
159   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
160                                                     StringRef CPUString,
161                                                     StringRef TuneCPUString,
162                                                     bool HasMinSize);
163 
164   /// Initialize properties based on the selected processor family.
165   void initializeProperties(bool HasMinSize);
166 
167 public:
168   /// This constructor initializes the data members to match that
169   /// of the specified triple.
170   AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
171                    StringRef FS, const TargetMachine &TM, bool LittleEndian,
172                    unsigned MinSVEVectorSizeInBitsOverride = 0,
173                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
174                    bool StreamingSVEMode = false,
175                    bool StreamingCompatibleSVEMode = false,
176                    bool HasMinSize = false);
177 
178 // Getters for SubtargetFeatures defined in tablegen
179 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
180   bool GETTER() const { return ATTRIBUTE; }
181 #include "AArch64GenSubtargetInfo.inc"
182 
getSelectionDAGInfo()183   const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
184     return &TSInfo;
185   }
getFrameLowering()186   const AArch64FrameLowering *getFrameLowering() const override {
187     return &FrameLowering;
188   }
getTargetLowering()189   const AArch64TargetLowering *getTargetLowering() const override {
190     return &TLInfo;
191   }
getInstrInfo()192   const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
getRegisterInfo()193   const AArch64RegisterInfo *getRegisterInfo() const override {
194     return &getInstrInfo()->getRegisterInfo();
195   }
196   const CallLowering *getCallLowering() const override;
197   const InlineAsmLowering *getInlineAsmLowering() const override;
198   InstructionSelector *getInstructionSelector() const override;
199   const LegalizerInfo *getLegalizerInfo() const override;
200   const RegisterBankInfo *getRegBankInfo() const override;
getTargetTriple()201   const Triple &getTargetTriple() const { return TargetTriple; }
enableMachineScheduler()202   bool enableMachineScheduler() const override { return true; }
enablePostRAScheduler()203   bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
204 
205   /// Returns ARM processor family.
206   /// Avoid this function! CPU specifics should be kept local to this class
207   /// and preferably modeled with SubtargetFeatures or properties in
208   /// initializeProperties().
getProcFamily()209   ARMProcFamilyEnum getProcFamily() const {
210     return ARMProcFamily;
211   }
212 
isXRaySupported()213   bool isXRaySupported() const override { return true; }
214 
215   /// Returns true if the function has a streaming body.
isStreaming()216   bool isStreaming() const { return StreamingSVEMode; }
217 
218   /// Returns true if the function has a streaming-compatible body.
219   bool isStreamingCompatible() const;
220 
221   /// Returns true if the target has NEON and the function at runtime is known
222   /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
223   /// mode, which disables NEON instructions).
224   bool isNeonAvailable() const;
225 
226   /// Returns true if the target has SVE and can use the full range of SVE
227   /// instructions, for example because it knows the function is known not to be
228   /// in streaming-SVE mode or when the target has FEAT_FA64 enabled.
229   bool isSVEAvailable() const;
230 
getMinVectorRegisterBitWidth()231   unsigned getMinVectorRegisterBitWidth() const {
232     // Don't assume any minimum vector size when PSTATE.SM may not be 0, because
233     // we don't yet support streaming-compatible codegen support that we trust
234     // is safe for functions that may be executed in streaming-SVE mode.
235     // By returning '0' here, we disable vectorization.
236     if (!isSVEAvailable() && !isNeonAvailable())
237       return 0;
238     return MinVectorRegisterBitWidth;
239   }
240 
isXRegisterReserved(size_t i)241   bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
isXRegisterReservedForRA(size_t i)242   bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
getNumXRegisterReserved()243   unsigned getNumXRegisterReserved() const {
244     BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
245     AllReservedX |= ReserveXRegister;
246     AllReservedX |= ReserveXRegisterForRA;
247     return AllReservedX.count();
248   }
isXRegCustomCalleeSaved(size_t i)249   bool isXRegCustomCalleeSaved(size_t i) const {
250     return CustomCallSavedXRegs[i];
251   }
hasCustomCallingConv()252   bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
253 
254   /// Return true if the CPU supports any kind of instruction fusion.
hasFusion()255   bool hasFusion() const {
256     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
257            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
258            hasFuseAdrpAdd() || hasFuseLiterals();
259   }
260 
getMaxInterleaveFactor()261   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
262   unsigned getVectorInsertExtractBaseCost() const;
getCacheLineSize()263   unsigned getCacheLineSize() const override { return CacheLineSize; }
getPrefetchDistance()264   unsigned getPrefetchDistance() const override { return PrefetchDistance; }
getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)265   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
266                                 unsigned NumStridedMemAccesses,
267                                 unsigned NumPrefetches,
268                                 bool HasCall) const override {
269     return MinPrefetchStride;
270   }
getMaxPrefetchIterationsAhead()271   unsigned getMaxPrefetchIterationsAhead() const override {
272     return MaxPrefetchIterationsAhead;
273   }
getPrefFunctionAlignment()274   Align getPrefFunctionAlignment() const {
275     return PrefFunctionAlignment;
276   }
getPrefLoopAlignment()277   Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
278 
getMaxBytesForLoopAlignment()279   unsigned getMaxBytesForLoopAlignment() const {
280     return MaxBytesForLoopAlignment;
281   }
282 
getMaximumJumpTableSize()283   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
getMinimumJumpTableEntries()284   unsigned getMinimumJumpTableEntries() const {
285     return MinimumJumpTableEntries;
286   }
287 
288   /// CPU has TBI (top byte of addresses is ignored during HW address
289   /// translation) and OS enables it.
290   bool supportsAddressTopByteIgnored() const;
291 
isLittleEndian()292   bool isLittleEndian() const { return IsLittle; }
293 
isTargetDarwin()294   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
isTargetIOS()295   bool isTargetIOS() const { return TargetTriple.isiOS(); }
isTargetLinux()296   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
isTargetWindows()297   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
isTargetAndroid()298   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
isTargetFuchsia()299   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
isWindowsArm64EC()300   bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
301 
isTargetCOFF()302   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
isTargetELF()303   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
isTargetMachO()304   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
305 
isTargetILP32()306   bool isTargetILP32() const {
307     return TargetTriple.isArch32Bit() ||
308            TargetTriple.getEnvironment() == Triple::GNUILP32;
309   }
310 
311   bool useAA() const override;
312 
addrSinkUsingGEPs()313   bool addrSinkUsingGEPs() const override {
314     // Keeping GEPs inbounds is important for exploiting AArch64
315     // addressing-modes in ILP32 mode.
316     return useAA() || isTargetILP32();
317   }
318 
useSmallAddressing()319   bool useSmallAddressing() const {
320     switch (TLInfo.getTargetMachine().getCodeModel()) {
321       case CodeModel::Kernel:
322         // Kernel is currently allowed only for Fuchsia targets,
323         // where it is the same as Small for almost all purposes.
324       case CodeModel::Small:
325         return true;
326       default:
327         return false;
328     }
329   }
330 
331   /// ParseSubtargetFeatures - Parses features string setting specified
332   /// subtarget options.  Definition of function is auto generated by tblgen.
333   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
334 
335   /// ClassifyGlobalReference - Find the target operand flags that describe
336   /// how a global value should be referenced for the current subtarget.
337   unsigned ClassifyGlobalReference(const GlobalValue *GV,
338                                    const TargetMachine &TM) const;
339 
340   unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
341                                            const TargetMachine &TM) const;
342 
343   /// This function is design to compatible with the function def in other
344   /// targets and escape build error about the virtual function def in base
345   /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
346   unsigned char
classifyGlobalFunctionReference(const GlobalValue * GV)347   classifyGlobalFunctionReference(const GlobalValue *GV) const override {
348     return 0;
349   }
350 
351   void overrideSchedPolicy(MachineSchedPolicy &Policy,
352                            unsigned NumRegionInstrs) const override;
353 
354   bool enableEarlyIfConversion() const override;
355 
356   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
357 
isCallingConvWin64(CallingConv::ID CC)358   bool isCallingConvWin64(CallingConv::ID CC) const {
359     switch (CC) {
360     case CallingConv::C:
361     case CallingConv::Fast:
362     case CallingConv::Swift:
363       return isTargetWindows();
364     case CallingConv::Win64:
365       return true;
366     default:
367       return false;
368     }
369   }
370 
371   /// Return whether FrameLowering should always set the "extended frame
372   /// present" bit in FP, or set it based on a symbol in the runtime.
swiftAsyncContextIsDynamicallySet()373   bool swiftAsyncContextIsDynamicallySet() const {
374     // Older OS versions (particularly system unwinders) are confused by the
375     // Swift extended frame, so when building code that might be run on them we
376     // must dynamically query the concurrency library to determine whether
377     // extended frames should be flagged as present.
378     const Triple &TT = getTargetTriple();
379 
380     unsigned Major = TT.getOSVersion().getMajor();
381     switch(TT.getOS()) {
382     default:
383       return false;
384     case Triple::IOS:
385     case Triple::TvOS:
386       return Major < 15;
387     case Triple::WatchOS:
388       return Major < 8;
389     case Triple::MacOSX:
390     case Triple::Darwin:
391       return Major < 12;
392     }
393   }
394 
395   void mirFileLoaded(MachineFunction &MF) const override;
396 
hasSVEorSME()397   bool hasSVEorSME() const { return hasSVE() || hasSME(); }
hasSVE2orSME()398   bool hasSVE2orSME() const { return hasSVE2() || hasSME(); }
399 
400   // Return the known range for the bit length of SVE data registers. A value
401   // of 0 means nothing is known about that particular limit beyong what's
402   // implied by the architecture.
getMaxSVEVectorSizeInBits()403   unsigned getMaxSVEVectorSizeInBits() const {
404     assert(hasSVEorSME() &&
405            "Tried to get SVE vector length without SVE support!");
406     return MaxSVEVectorSizeInBits;
407   }
408 
getMinSVEVectorSizeInBits()409   unsigned getMinSVEVectorSizeInBits() const {
410     assert(hasSVEorSME() &&
411            "Tried to get SVE vector length without SVE support!");
412     return MinSVEVectorSizeInBits;
413   }
414 
useSVEForFixedLengthVectors()415   bool useSVEForFixedLengthVectors() const {
416     if (!isNeonAvailable())
417       return hasSVEorSME();
418 
419     // Prefer NEON unless larger SVE registers are available.
420     return hasSVEorSME() && getMinSVEVectorSizeInBits() >= 256;
421   }
422 
useSVEForFixedLengthVectors(EVT VT)423   bool useSVEForFixedLengthVectors(EVT VT) const {
424     if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
425       return false;
426     return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
427            !isNeonAvailable();
428   }
429 
getVScaleForTuning()430   unsigned getVScaleForTuning() const { return VScaleForTuning; }
431 
getSVETailFoldingDefaultOpts()432   TailFoldingOpts getSVETailFoldingDefaultOpts() const {
433     return DefaultSVETFOpts;
434   }
435 
getChkStkName()436   const char* getChkStkName() const {
437     if (isWindowsArm64EC())
438       return "#__chkstk_arm64ec";
439     return "__chkstk";
440   }
441 
getSecurityCheckCookieName()442   const char* getSecurityCheckCookieName() const {
443     if (isWindowsArm64EC())
444       return "#__security_check_cookie_arm64ec";
445     return "__security_check_cookie";
446   }
447 
448   /// Choose a method of checking LR before performing a tail call.
449   AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod() const;
450 
getAddressCheckPSV()451   const PseudoSourceValue *getAddressCheckPSV() const {
452     return AddressCheckPSV.get();
453   }
454 
455 private:
456   /// Pseudo value representing memory load performed to check an address.
457   ///
458   /// This load operation is solely used for its side-effects: if the address
459   /// is not mapped (or not readable), it triggers CPU exception, otherwise
460   /// execution proceeds and the value is not used.
461   class AddressCheckPseudoSourceValue : public PseudoSourceValue {
462   public:
AddressCheckPseudoSourceValue(const TargetMachine & TM)463     AddressCheckPseudoSourceValue(const TargetMachine &TM)
464         : PseudoSourceValue(TargetCustom, TM) {}
465 
isConstant(const MachineFrameInfo *)466     bool isConstant(const MachineFrameInfo *) const override { return false; }
isAliased(const MachineFrameInfo *)467     bool isAliased(const MachineFrameInfo *) const override { return true; }
mayAlias(const MachineFrameInfo *)468     bool mayAlias(const MachineFrameInfo *) const override { return true; }
printCustom(raw_ostream & OS)469     void printCustom(raw_ostream &OS) const override { OS << "AddressCheck"; }
470   };
471 
472   std::unique_ptr<AddressCheckPseudoSourceValue> AddressCheckPSV;
473 };
474 } // End llvm namespace
475 
476 #endif
477