1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15 
16 #include "AArch64FrameLowering.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64RegisterInfo.h"
20 #include "AArch64SelectionDAGInfo.h"
21 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
22 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
24 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
25 #include "llvm/CodeGen/RegisterBankInfo.h"
26 #include "llvm/CodeGen/TargetSubtargetInfo.h"
27 #include "llvm/IR/DataLayout.h"
28 #include <string>
29 
30 #define GET_SUBTARGETINFO_HEADER
31 #include "AArch64GenSubtargetInfo.inc"
32 
33 namespace llvm {
34 class GlobalValue;
35 class StringRef;
36 class Triple;
37 
38 class AArch64Subtarget final : public AArch64GenSubtargetInfo {
39 public:
40   enum ARMProcFamilyEnum : uint8_t {
41     Others,
42     A64FX,
43     Ampere1,
44     Ampere1A,
45     AppleA7,
46     AppleA10,
47     AppleA11,
48     AppleA12,
49     AppleA13,
50     AppleA14,
51     AppleA15,
52     AppleA16,
53     Carmel,
54     CortexA35,
55     CortexA53,
56     CortexA55,
57     CortexA510,
58     CortexA57,
59     CortexA65,
60     CortexA72,
61     CortexA73,
62     CortexA75,
63     CortexA76,
64     CortexA77,
65     CortexA78,
66     CortexA78C,
67     CortexA710,
68     CortexA715,
69     CortexR82,
70     CortexX1,
71     CortexX1C,
72     CortexX2,
73     CortexX3,
74     ExynosM3,
75     Falkor,
76     Kryo,
77     NeoverseE1,
78     NeoverseN1,
79     NeoverseN2,
80     Neoverse512TVB,
81     NeoverseV1,
82     NeoverseV2,
83     Saphira,
84     ThunderX2T99,
85     ThunderX,
86     ThunderXT81,
87     ThunderXT83,
88     ThunderXT88,
89     ThunderX3T110,
90     TSV110
91   };
92 
93 protected:
94   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
95   ARMProcFamilyEnum ARMProcFamily = Others;
96 
97   // Enable 64-bit vectorization in SLP.
98   unsigned MinVectorRegisterBitWidth = 64;
99 
100 // Bool members corresponding to the SubtargetFeatures defined in tablegen
101 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
102   bool ATTRIBUTE = DEFAULT;
103 #include "AArch64GenSubtargetInfo.inc"
104 
105   uint8_t MaxInterleaveFactor = 2;
106   uint8_t VectorInsertExtractBaseCost = 3;
107   uint16_t CacheLineSize = 0;
108   uint16_t PrefetchDistance = 0;
109   uint16_t MinPrefetchStride = 1;
110   unsigned MaxPrefetchIterationsAhead = UINT_MAX;
111   Align PrefFunctionAlignment;
112   Align PrefLoopAlignment;
113   unsigned MaxBytesForLoopAlignment = 0;
114   unsigned MaxJumpTableSize = 0;
115 
116   // ReserveXRegister[i] - X#i is not available as a general purpose register.
117   BitVector ReserveXRegister;
118 
119   // ReserveXRegisterForRA[i] - X#i is not available for register allocator.
120   BitVector ReserveXRegisterForRA;
121 
122   // CustomCallUsedXRegister[i] - X#i call saved.
123   BitVector CustomCallSavedXRegs;
124 
125   bool IsLittle;
126 
127   bool StreamingSVEMode;
128   bool StreamingCompatibleSVEMode;
129   unsigned MinSVEVectorSizeInBits;
130   unsigned MaxSVEVectorSizeInBits;
131   unsigned VScaleForTuning = 2;
132   TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
133 
134   /// TargetTriple - What processor and OS we're targeting.
135   Triple TargetTriple;
136 
137   AArch64FrameLowering FrameLowering;
138   AArch64InstrInfo InstrInfo;
139   AArch64SelectionDAGInfo TSInfo;
140   AArch64TargetLowering TLInfo;
141 
142   /// GlobalISel related APIs.
143   std::unique_ptr<CallLowering> CallLoweringInfo;
144   std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
145   std::unique_ptr<InstructionSelector> InstSelector;
146   std::unique_ptr<LegalizerInfo> Legalizer;
147   std::unique_ptr<RegisterBankInfo> RegBankInfo;
148 
149 private:
150   /// initializeSubtargetDependencies - Initializes using CPUString and the
151   /// passed in feature string so that we can use initializer lists for
152   /// subtarget initialization.
153   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
154                                                     StringRef CPUString,
155                                                     StringRef TuneCPUString);
156 
157   /// Initialize properties based on the selected processor family.
158   void initializeProperties();
159 
160 public:
161   /// This constructor initializes the data members to match that
162   /// of the specified triple.
163   AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
164                    StringRef FS, const TargetMachine &TM, bool LittleEndian,
165                    unsigned MinSVEVectorSizeInBitsOverride = 0,
166                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
167                    bool StreamingSVEMode = false,
168                    bool StreamingCompatibleSVEMode = false);
169 
170 // Getters for SubtargetFeatures defined in tablegen
171 #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
172   bool GETTER() const { return ATTRIBUTE; }
173 #include "AArch64GenSubtargetInfo.inc"
174 
175   const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
176     return &TSInfo;
177   }
178   const AArch64FrameLowering *getFrameLowering() const override {
179     return &FrameLowering;
180   }
181   const AArch64TargetLowering *getTargetLowering() const override {
182     return &TLInfo;
183   }
184   const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
185   const AArch64RegisterInfo *getRegisterInfo() const override {
186     return &getInstrInfo()->getRegisterInfo();
187   }
188   const CallLowering *getCallLowering() const override;
189   const InlineAsmLowering *getInlineAsmLowering() const override;
190   InstructionSelector *getInstructionSelector() const override;
191   const LegalizerInfo *getLegalizerInfo() const override;
192   const RegisterBankInfo *getRegBankInfo() const override;
193   const Triple &getTargetTriple() const { return TargetTriple; }
194   bool enableMachineScheduler() const override { return true; }
195   bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
196 
197   /// Returns ARM processor family.
198   /// Avoid this function! CPU specifics should be kept local to this class
199   /// and preferably modeled with SubtargetFeatures or properties in
200   /// initializeProperties().
201   ARMProcFamilyEnum getProcFamily() const {
202     return ARMProcFamily;
203   }
204 
205   bool isXRaySupported() const override { return true; }
206 
207   /// Returns true if the function has the streaming attribute.
208   bool isStreaming() const { return StreamingSVEMode; }
209 
210   /// Returns true if the function has the streaming-compatible attribute.
211   bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }
212 
213   /// Returns true if the target has NEON and the function at runtime is known
214   /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
215   /// mode, which disables NEON instructions).
216   bool isNeonAvailable() const;
217 
218   unsigned getMinVectorRegisterBitWidth() const {
219     // Don't assume any minimum vector size when PSTATE.SM may not be 0.
220     if (StreamingSVEMode || StreamingCompatibleSVEMode)
221       return 0;
222     return MinVectorRegisterBitWidth;
223   }
224 
225   bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
226   bool isXRegisterReservedForRA(size_t i) const { return ReserveXRegisterForRA[i]; }
227   unsigned getNumXRegisterReserved() const {
228     BitVector AllReservedX(AArch64::GPR64commonRegClass.getNumRegs());
229     AllReservedX |= ReserveXRegister;
230     AllReservedX |= ReserveXRegisterForRA;
231     return AllReservedX.count();
232   }
233   bool isXRegCustomCalleeSaved(size_t i) const {
234     return CustomCallSavedXRegs[i];
235   }
236   bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
237 
238   /// Return true if the CPU supports any kind of instruction fusion.
239   bool hasFusion() const {
240     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
241            hasFuseAES() || hasFuseArithmeticLogic() || hasFuseCCSelect() ||
242            hasFuseAdrpAdd() || hasFuseLiterals();
243   }
244 
245   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
246   unsigned getVectorInsertExtractBaseCost() const;
247   unsigned getCacheLineSize() const override { return CacheLineSize; }
248   unsigned getPrefetchDistance() const override { return PrefetchDistance; }
249   unsigned getMinPrefetchStride(unsigned NumMemAccesses,
250                                 unsigned NumStridedMemAccesses,
251                                 unsigned NumPrefetches,
252                                 bool HasCall) const override {
253     return MinPrefetchStride;
254   }
255   unsigned getMaxPrefetchIterationsAhead() const override {
256     return MaxPrefetchIterationsAhead;
257   }
258   Align getPrefFunctionAlignment() const {
259     return PrefFunctionAlignment;
260   }
261   Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
262 
263   unsigned getMaxBytesForLoopAlignment() const {
264     return MaxBytesForLoopAlignment;
265   }
266 
267   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
268 
269   /// CPU has TBI (top byte of addresses is ignored during HW address
270   /// translation) and OS enables it.
271   bool supportsAddressTopByteIgnored() const;
272 
273   bool isLittleEndian() const { return IsLittle; }
274 
275   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
276   bool isTargetIOS() const { return TargetTriple.isiOS(); }
277   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
278   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
279   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
280   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
281   bool isWindowsArm64EC() const { return TargetTriple.isWindowsArm64EC(); }
282 
283   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
284   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
285   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
286 
287   bool isTargetILP32() const {
288     return TargetTriple.isArch32Bit() ||
289            TargetTriple.getEnvironment() == Triple::GNUILP32;
290   }
291 
292   bool useAA() const override;
293 
294   bool addrSinkUsingGEPs() const override {
295     // Keeping GEPs inbounds is important for exploiting AArch64
296     // addressing-modes in ILP32 mode.
297     return useAA() || isTargetILP32();
298   }
299 
300   bool useSmallAddressing() const {
301     switch (TLInfo.getTargetMachine().getCodeModel()) {
302       case CodeModel::Kernel:
303         // Kernel is currently allowed only for Fuchsia targets,
304         // where it is the same as Small for almost all purposes.
305       case CodeModel::Small:
306         return true;
307       default:
308         return false;
309     }
310   }
311 
312   /// ParseSubtargetFeatures - Parses features string setting specified
313   /// subtarget options.  Definition of function is auto generated by tblgen.
314   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
315 
316   /// ClassifyGlobalReference - Find the target operand flags that describe
317   /// how a global value should be referenced for the current subtarget.
318   unsigned ClassifyGlobalReference(const GlobalValue *GV,
319                                    const TargetMachine &TM) const;
320 
321   unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
322                                            const TargetMachine &TM) const;
323 
324   /// This function is design to compatible with the function def in other
325   /// targets and escape build error about the virtual function def in base
326   /// class TargetSubtargetInfo. Updeate me if AArch64 target need to use it.
327   unsigned char
328   classifyGlobalFunctionReference(const GlobalValue *GV) const override {
329     return 0;
330   }
331 
332   void overrideSchedPolicy(MachineSchedPolicy &Policy,
333                            unsigned NumRegionInstrs) const override;
334 
335   bool enableEarlyIfConversion() const override;
336 
337   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
338 
339   bool isCallingConvWin64(CallingConv::ID CC) const {
340     switch (CC) {
341     case CallingConv::C:
342     case CallingConv::Fast:
343     case CallingConv::Swift:
344       return isTargetWindows();
345     case CallingConv::Win64:
346       return true;
347     default:
348       return false;
349     }
350   }
351 
352   /// Return whether FrameLowering should always set the "extended frame
353   /// present" bit in FP, or set it based on a symbol in the runtime.
354   bool swiftAsyncContextIsDynamicallySet() const {
355     // Older OS versions (particularly system unwinders) are confused by the
356     // Swift extended frame, so when building code that might be run on them we
357     // must dynamically query the concurrency library to determine whether
358     // extended frames should be flagged as present.
359     const Triple &TT = getTargetTriple();
360 
361     unsigned Major = TT.getOSVersion().getMajor();
362     switch(TT.getOS()) {
363     default:
364       return false;
365     case Triple::IOS:
366     case Triple::TvOS:
367       return Major < 15;
368     case Triple::WatchOS:
369       return Major < 8;
370     case Triple::MacOSX:
371     case Triple::Darwin:
372       return Major < 12;
373     }
374   }
375 
376   void mirFileLoaded(MachineFunction &MF) const override;
377 
378   bool hasSVEorSME() const { return hasSVE() || hasSME(); }
379 
380   // Return the known range for the bit length of SVE data registers. A value
381   // of 0 means nothing is known about that particular limit beyong what's
382   // implied by the architecture.
383   unsigned getMaxSVEVectorSizeInBits() const {
384     assert(hasSVEorSME() &&
385            "Tried to get SVE vector length without SVE support!");
386     return MaxSVEVectorSizeInBits;
387   }
388 
389   unsigned getMinSVEVectorSizeInBits() const {
390     assert(hasSVEorSME() &&
391            "Tried to get SVE vector length without SVE support!");
392     return MinSVEVectorSizeInBits;
393   }
394 
395   bool useSVEForFixedLengthVectors() const {
396     if (!isNeonAvailable())
397       return hasSVE();
398 
399     // Prefer NEON unless larger SVE registers are available.
400     return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
401   }
402 
403   bool useSVEForFixedLengthVectors(EVT VT) const {
404     if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
405       return false;
406     return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
407            !isNeonAvailable();
408   }
409 
410   unsigned getVScaleForTuning() const { return VScaleForTuning; }
411 
412   TailFoldingOpts getSVETailFoldingDefaultOpts() const {
413     return DefaultSVETFOpts;
414   }
415 
416   const char* getChkStkName() const {
417     if (isWindowsArm64EC())
418       return "__chkstk_arm64ec";
419     return "__chkstk";
420   }
421 
422   const char* getSecurityCheckCookieName() const {
423     if (isWindowsArm64EC())
424       return "__security_check_cookie_arm64ec";
425     return "__security_check_cookie";
426   }
427 };
428 } // End llvm namespace
429 
430 #endif
431