1 //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file declares the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
14 #define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
15 
16 #include "AArch64FrameLowering.h"
17 #include "AArch64ISelLowering.h"
18 #include "AArch64InstrInfo.h"
19 #include "AArch64RegisterInfo.h"
20 #include "AArch64SelectionDAGInfo.h"
21 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
24 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
26 #include "llvm/IR/DataLayout.h"
27 #include <string>
28 
29 #define GET_SUBTARGETINFO_HEADER
30 #include "AArch64GenSubtargetInfo.inc"
31 
32 namespace llvm {
33 class GlobalValue;
34 class StringRef;
35 class Triple;
36 
37 class AArch64Subtarget final : public AArch64GenSubtargetInfo {
38 public:
39   enum ARMProcFamilyEnum : uint8_t {
40     Others,
41     AppleA7,
42     AppleA10,
43     AppleA11,
44     AppleA12,
45     AppleA13,
46     CortexA35,
47     CortexA53,
48     CortexA55,
49     CortexA57,
50     CortexA65,
51     CortexA72,
52     CortexA73,
53     CortexA75,
54     CortexA76,
55     ExynosM3,
56     Falkor,
57     Kryo,
58     NeoverseE1,
59     NeoverseN1,
60     Saphira,
61     ThunderX2T99,
62     ThunderX,
63     ThunderXT81,
64     ThunderXT83,
65     ThunderXT88,
66     TSV110
67   };
68 
69 protected:
70   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
71   ARMProcFamilyEnum ARMProcFamily = Others;
72 
73   bool HasV8_1aOps = false;
74   bool HasV8_2aOps = false;
75   bool HasV8_3aOps = false;
76   bool HasV8_4aOps = false;
77   bool HasV8_5aOps = false;
78 
79   bool HasFPARMv8 = false;
80   bool HasNEON = false;
81   bool HasCrypto = false;
82   bool HasDotProd = false;
83   bool HasCRC = false;
84   bool HasLSE = false;
85   bool HasRAS = false;
86   bool HasRDM = false;
87   bool HasPerfMon = false;
88   bool HasFullFP16 = false;
89   bool HasFP16FML = false;
90   bool HasSPE = false;
91 
92   // ARMv8.1 extensions
93   bool HasVH = false;
94   bool HasPAN = false;
95   bool HasLOR = false;
96 
97   // ARMv8.2 extensions
98   bool HasPsUAO = false;
99   bool HasPAN_RWV = false;
100   bool HasCCPP = false;
101 
102   // Armv8.2 Crypto extensions
103   bool HasSM4 = false;
104   bool HasSHA3 = false;
105   bool HasSHA2 = false;
106   bool HasAES = false;
107 
108   // ARMv8.3 extensions
109   bool HasPA = false;
110   bool HasJS = false;
111   bool HasCCIDX = false;
112   bool HasComplxNum = false;
113 
114   // ARMv8.4 extensions
115   bool HasNV = false;
116   bool HasRASv8_4 = false;
117   bool HasMPAM = false;
118   bool HasDIT = false;
119   bool HasTRACEV8_4 = false;
120   bool HasAM = false;
121   bool HasSEL2 = false;
122   bool HasPMU = false;
123   bool HasTLB_RMI = false;
124   bool HasFMI = false;
125   bool HasRCPC_IMMO = false;
126 
127   bool HasLSLFast = false;
128   bool HasSVE = false;
129   bool HasSVE2 = false;
130   bool HasRCPC = false;
131   bool HasAggressiveFMA = false;
132 
133   // Armv8.5-A Extensions
134   bool HasAlternativeNZCV = false;
135   bool HasFRInt3264 = false;
136   bool HasSpecRestrict = false;
137   bool HasSSBS = false;
138   bool HasSB = false;
139   bool HasPredRes = false;
140   bool HasCCDP = false;
141   bool HasBTI = false;
142   bool HasRandGen = false;
143   bool HasMTE = false;
144   bool HasTME = false;
145 
146   // Arm SVE2 extensions
147   bool HasSVE2AES = false;
148   bool HasSVE2SM4 = false;
149   bool HasSVE2SHA3 = false;
150   bool HasSVE2BitPerm = false;
151 
152   // Future architecture extensions.
153   bool HasETE = false;
154   bool HasTRBE = false;
155 
156   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
157   bool HasZeroCycleRegMove = false;
158 
159   // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
160   bool HasZeroCycleZeroing = false;
161   bool HasZeroCycleZeroingGP = false;
162   bool HasZeroCycleZeroingFP = false;
163   bool HasZeroCycleZeroingFPWorkaround = false;
164 
165   // StrictAlign - Disallow unaligned memory accesses.
166   bool StrictAlign = false;
167 
168   // NegativeImmediates - transform instructions with negative immediates
169   bool NegativeImmediates = true;
170 
171   // Enable 64-bit vectorization in SLP.
172   unsigned MinVectorRegisterBitWidth = 64;
173 
174   bool UseAA = false;
175   bool PredictableSelectIsExpensive = false;
176   bool BalanceFPOps = false;
177   bool CustomAsCheapAsMove = false;
178   bool ExynosAsCheapAsMove = false;
179   bool UsePostRAScheduler = false;
180   bool Misaligned128StoreIsSlow = false;
181   bool Paired128IsSlow = false;
182   bool STRQroIsSlow = false;
183   bool UseAlternateSExtLoadCVTF32Pattern = false;
184   bool HasArithmeticBccFusion = false;
185   bool HasArithmeticCbzFusion = false;
186   bool HasFuseAddress = false;
187   bool HasFuseAES = false;
188   bool HasFuseArithmeticLogic = false;
189   bool HasFuseCCSelect = false;
190   bool HasFuseCryptoEOR = false;
191   bool HasFuseLiterals = false;
192   bool DisableLatencySchedHeuristic = false;
193   bool UseRSqrt = false;
194   bool Force32BitJumpTables = false;
195   bool UseEL1ForTP = false;
196   bool UseEL2ForTP = false;
197   bool UseEL3ForTP = false;
198   bool AllowTaggedGlobals = false;
199   uint8_t MaxInterleaveFactor = 2;
200   uint8_t VectorInsertExtractBaseCost = 3;
201   uint16_t CacheLineSize = 0;
202   uint16_t PrefetchDistance = 0;
203   uint16_t MinPrefetchStride = 1;
204   unsigned MaxPrefetchIterationsAhead = UINT_MAX;
205   unsigned PrefFunctionLogAlignment = 0;
206   unsigned PrefLoopLogAlignment = 0;
207   unsigned MaxJumpTableSize = 0;
208   unsigned WideningBaseCost = 0;
209 
210   // ReserveXRegister[i] - X#i is not available as a general purpose register.
211   BitVector ReserveXRegister;
212 
213   // CustomCallUsedXRegister[i] - X#i call saved.
214   BitVector CustomCallSavedXRegs;
215 
216   bool IsLittle;
217 
218   /// TargetTriple - What processor and OS we're targeting.
219   Triple TargetTriple;
220 
221   AArch64FrameLowering FrameLowering;
222   AArch64InstrInfo InstrInfo;
223   AArch64SelectionDAGInfo TSInfo;
224   AArch64TargetLowering TLInfo;
225 
226   /// GlobalISel related APIs.
227   std::unique_ptr<CallLowering> CallLoweringInfo;
228   std::unique_ptr<InstructionSelector> InstSelector;
229   std::unique_ptr<LegalizerInfo> Legalizer;
230   std::unique_ptr<RegisterBankInfo> RegBankInfo;
231 
232 private:
233   /// initializeSubtargetDependencies - Initializes using CPUString and the
234   /// passed in feature string so that we can use initializer lists for
235   /// subtarget initialization.
236   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
237                                                     StringRef CPUString);
238 
239   /// Initialize properties based on the selected processor family.
240   void initializeProperties();
241 
242 public:
243   /// This constructor initializes the data members to match that
244   /// of the specified triple.
245   AArch64Subtarget(const Triple &TT, const std::string &CPU,
246                    const std::string &FS, const TargetMachine &TM,
247                    bool LittleEndian);
248 
getSelectionDAGInfo()249   const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
250     return &TSInfo;
251   }
getFrameLowering()252   const AArch64FrameLowering *getFrameLowering() const override {
253     return &FrameLowering;
254   }
getTargetLowering()255   const AArch64TargetLowering *getTargetLowering() const override {
256     return &TLInfo;
257   }
getInstrInfo()258   const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
getRegisterInfo()259   const AArch64RegisterInfo *getRegisterInfo() const override {
260     return &getInstrInfo()->getRegisterInfo();
261   }
262   const CallLowering *getCallLowering() const override;
263   InstructionSelector *getInstructionSelector() const override;
264   const LegalizerInfo *getLegalizerInfo() const override;
265   const RegisterBankInfo *getRegBankInfo() const override;
getTargetTriple()266   const Triple &getTargetTriple() const { return TargetTriple; }
enableMachineScheduler()267   bool enableMachineScheduler() const override { return true; }
enablePostRAScheduler()268   bool enablePostRAScheduler() const override {
269     return UsePostRAScheduler;
270   }
271 
272   /// Returns ARM processor family.
273   /// Avoid this function! CPU specifics should be kept local to this class
274   /// and preferably modeled with SubtargetFeatures or properties in
275   /// initializeProperties().
getProcFamily()276   ARMProcFamilyEnum getProcFamily() const {
277     return ARMProcFamily;
278   }
279 
hasV8_1aOps()280   bool hasV8_1aOps() const { return HasV8_1aOps; }
hasV8_2aOps()281   bool hasV8_2aOps() const { return HasV8_2aOps; }
hasV8_3aOps()282   bool hasV8_3aOps() const { return HasV8_3aOps; }
hasV8_4aOps()283   bool hasV8_4aOps() const { return HasV8_4aOps; }
hasV8_5aOps()284   bool hasV8_5aOps() const { return HasV8_5aOps; }
285 
hasZeroCycleRegMove()286   bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
287 
hasZeroCycleZeroingGP()288   bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; }
289 
hasZeroCycleZeroingFP()290   bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; }
291 
hasZeroCycleZeroingFPWorkaround()292   bool hasZeroCycleZeroingFPWorkaround() const {
293     return HasZeroCycleZeroingFPWorkaround;
294   }
295 
requiresStrictAlign()296   bool requiresStrictAlign() const { return StrictAlign; }
297 
isXRaySupported()298   bool isXRaySupported() const override { return true; }
299 
getMinVectorRegisterBitWidth()300   unsigned getMinVectorRegisterBitWidth() const {
301     return MinVectorRegisterBitWidth;
302   }
303 
isXRegisterReserved(size_t i)304   bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
getNumXRegisterReserved()305   unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); }
isXRegCustomCalleeSaved(size_t i)306   bool isXRegCustomCalleeSaved(size_t i) const {
307     return CustomCallSavedXRegs[i];
308   }
hasCustomCallingConv()309   bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
hasFPARMv8()310   bool hasFPARMv8() const { return HasFPARMv8; }
hasNEON()311   bool hasNEON() const { return HasNEON; }
hasCrypto()312   bool hasCrypto() const { return HasCrypto; }
hasDotProd()313   bool hasDotProd() const { return HasDotProd; }
hasCRC()314   bool hasCRC() const { return HasCRC; }
hasLSE()315   bool hasLSE() const { return HasLSE; }
hasRAS()316   bool hasRAS() const { return HasRAS; }
hasRDM()317   bool hasRDM() const { return HasRDM; }
hasSM4()318   bool hasSM4() const { return HasSM4; }
hasSHA3()319   bool hasSHA3() const { return HasSHA3; }
hasSHA2()320   bool hasSHA2() const { return HasSHA2; }
hasAES()321   bool hasAES() const { return HasAES; }
balanceFPOps()322   bool balanceFPOps() const { return BalanceFPOps; }
predictableSelectIsExpensive()323   bool predictableSelectIsExpensive() const {
324     return PredictableSelectIsExpensive;
325   }
hasCustomCheapAsMoveHandling()326   bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
hasExynosCheapAsMoveHandling()327   bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; }
isMisaligned128StoreSlow()328   bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
isPaired128Slow()329   bool isPaired128Slow() const { return Paired128IsSlow; }
isSTRQroSlow()330   bool isSTRQroSlow() const { return STRQroIsSlow; }
useAlternateSExtLoadCVTF32Pattern()331   bool useAlternateSExtLoadCVTF32Pattern() const {
332     return UseAlternateSExtLoadCVTF32Pattern;
333   }
hasArithmeticBccFusion()334   bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
hasArithmeticCbzFusion()335   bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
hasFuseAddress()336   bool hasFuseAddress() const { return HasFuseAddress; }
hasFuseAES()337   bool hasFuseAES() const { return HasFuseAES; }
hasFuseArithmeticLogic()338   bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
hasFuseCCSelect()339   bool hasFuseCCSelect() const { return HasFuseCCSelect; }
hasFuseCryptoEOR()340   bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
hasFuseLiterals()341   bool hasFuseLiterals() const { return HasFuseLiterals; }
342 
343   /// Return true if the CPU supports any kind of instruction fusion.
hasFusion()344   bool hasFusion() const {
345     return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
346            hasFuseAES() || hasFuseArithmeticLogic() ||
347            hasFuseCCSelect() || hasFuseLiterals();
348   }
349 
useEL1ForTP()350   bool useEL1ForTP() const { return UseEL1ForTP; }
useEL2ForTP()351   bool useEL2ForTP() const { return UseEL2ForTP; }
useEL3ForTP()352   bool useEL3ForTP() const { return UseEL3ForTP; }
353 
useRSqrt()354   bool useRSqrt() const { return UseRSqrt; }
force32BitJumpTables()355   bool force32BitJumpTables() const { return Force32BitJumpTables; }
getMaxInterleaveFactor()356   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
getVectorInsertExtractBaseCost()357   unsigned getVectorInsertExtractBaseCost() const {
358     return VectorInsertExtractBaseCost;
359   }
getCacheLineSize()360   unsigned getCacheLineSize() const override { return CacheLineSize; }
getPrefetchDistance()361   unsigned getPrefetchDistance() const override { return PrefetchDistance; }
getMinPrefetchStride()362   unsigned getMinPrefetchStride() const override { return MinPrefetchStride; }
getMaxPrefetchIterationsAhead()363   unsigned getMaxPrefetchIterationsAhead() const override {
364     return MaxPrefetchIterationsAhead;
365   }
getPrefFunctionLogAlignment()366   unsigned getPrefFunctionLogAlignment() const {
367     return PrefFunctionLogAlignment;
368   }
getPrefLoopLogAlignment()369   unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
370 
getMaximumJumpTableSize()371   unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
372 
getWideningBaseCost()373   unsigned getWideningBaseCost() const { return WideningBaseCost; }
374 
375   /// CPU has TBI (top byte of addresses is ignored during HW address
376   /// translation) and OS enables it.
377   bool supportsAddressTopByteIgnored() const;
378 
hasPerfMon()379   bool hasPerfMon() const { return HasPerfMon; }
hasFullFP16()380   bool hasFullFP16() const { return HasFullFP16; }
hasFP16FML()381   bool hasFP16FML() const { return HasFP16FML; }
hasSPE()382   bool hasSPE() const { return HasSPE; }
hasLSLFast()383   bool hasLSLFast() const { return HasLSLFast; }
hasSVE()384   bool hasSVE() const { return HasSVE; }
hasSVE2()385   bool hasSVE2() const { return HasSVE2; }
hasRCPC()386   bool hasRCPC() const { return HasRCPC; }
hasAggressiveFMA()387   bool hasAggressiveFMA() const { return HasAggressiveFMA; }
hasAlternativeNZCV()388   bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
hasFRInt3264()389   bool hasFRInt3264() const { return HasFRInt3264; }
hasSpecRestrict()390   bool hasSpecRestrict() const { return HasSpecRestrict; }
hasSSBS()391   bool hasSSBS() const { return HasSSBS; }
hasSB()392   bool hasSB() const { return HasSB; }
hasPredRes()393   bool hasPredRes() const { return HasPredRes; }
hasCCDP()394   bool hasCCDP() const { return HasCCDP; }
hasBTI()395   bool hasBTI() const { return HasBTI; }
hasRandGen()396   bool hasRandGen() const { return HasRandGen; }
hasMTE()397   bool hasMTE() const { return HasMTE; }
hasTME()398   bool hasTME() const { return HasTME; }
399   // Arm SVE2 extensions
hasSVE2AES()400   bool hasSVE2AES() const { return HasSVE2AES; }
hasSVE2SM4()401   bool hasSVE2SM4() const { return HasSVE2SM4; }
hasSVE2SHA3()402   bool hasSVE2SHA3() const { return HasSVE2SHA3; }
hasSVE2BitPerm()403   bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
404 
isLittleEndian()405   bool isLittleEndian() const { return IsLittle; }
406 
isTargetDarwin()407   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
isTargetIOS()408   bool isTargetIOS() const { return TargetTriple.isiOS(); }
isTargetLinux()409   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
isTargetWindows()410   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
isTargetAndroid()411   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
isTargetFuchsia()412   bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
413 
isTargetCOFF()414   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
isTargetELF()415   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
isTargetMachO()416   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
417 
isTargetILP32()418   bool isTargetILP32() const { return TargetTriple.isArch32Bit(); }
419 
useAA()420   bool useAA() const override { return UseAA; }
421 
hasVH()422   bool hasVH() const { return HasVH; }
hasPAN()423   bool hasPAN() const { return HasPAN; }
hasLOR()424   bool hasLOR() const { return HasLOR; }
425 
hasPsUAO()426   bool hasPsUAO() const { return HasPsUAO; }
hasPAN_RWV()427   bool hasPAN_RWV() const { return HasPAN_RWV; }
hasCCPP()428   bool hasCCPP() const { return HasCCPP; }
429 
hasPA()430   bool hasPA() const { return HasPA; }
hasJS()431   bool hasJS() const { return HasJS; }
hasCCIDX()432   bool hasCCIDX() const { return HasCCIDX; }
hasComplxNum()433   bool hasComplxNum() const { return HasComplxNum; }
434 
hasNV()435   bool hasNV() const { return HasNV; }
hasRASv8_4()436   bool hasRASv8_4() const { return HasRASv8_4; }
hasMPAM()437   bool hasMPAM() const { return HasMPAM; }
hasDIT()438   bool hasDIT() const { return HasDIT; }
hasTRACEV8_4()439   bool hasTRACEV8_4() const { return HasTRACEV8_4; }
hasAM()440   bool hasAM() const { return HasAM; }
hasSEL2()441   bool hasSEL2() const { return HasSEL2; }
hasPMU()442   bool hasPMU() const { return HasPMU; }
hasTLB_RMI()443   bool hasTLB_RMI() const { return HasTLB_RMI; }
hasFMI()444   bool hasFMI() const { return HasFMI; }
hasRCPC_IMMO()445   bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
446 
addrSinkUsingGEPs()447   bool addrSinkUsingGEPs() const override {
448     // Keeping GEPs inbounds is important for exploiting AArch64
449     // addressing-modes in ILP32 mode.
450     return useAA() || isTargetILP32();
451   }
452 
useSmallAddressing()453   bool useSmallAddressing() const {
454     switch (TLInfo.getTargetMachine().getCodeModel()) {
455       case CodeModel::Kernel:
456         // Kernel is currently allowed only for Fuchsia targets,
457         // where it is the same as Small for almost all purposes.
458       case CodeModel::Small:
459         return true;
460       default:
461         return false;
462     }
463   }
464 
465   /// ParseSubtargetFeatures - Parses features string setting specified
466   /// subtarget options.  Definition of function is auto generated by tblgen.
467   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
468 
469   /// ClassifyGlobalReference - Find the target operand flags that describe
470   /// how a global value should be referenced for the current subtarget.
471   unsigned ClassifyGlobalReference(const GlobalValue *GV,
472                                    const TargetMachine &TM) const;
473 
474   unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
475                                            const TargetMachine &TM) const;
476 
477   void overrideSchedPolicy(MachineSchedPolicy &Policy,
478                            unsigned NumRegionInstrs) const override;
479 
480   bool enableEarlyIfConversion() const override;
481 
enableAdvancedRASplitCost()482   bool enableAdvancedRASplitCost() const override { return true; }
483 
484   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
485 
isCallingConvWin64(CallingConv::ID CC)486   bool isCallingConvWin64(CallingConv::ID CC) const {
487     switch (CC) {
488     case CallingConv::C:
489     case CallingConv::Fast:
490     case CallingConv::Swift:
491       return isTargetWindows();
492     case CallingConv::Win64:
493       return true;
494     default:
495       return false;
496     }
497   }
498 
499   void mirFileLoaded(MachineFunction &MF) const override;
500 };
501 } // End llvm namespace
502 
503 #endif
504