109467b48Spatrick //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements the AArch64 specific subclass of TargetSubtarget.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick 
1309467b48Spatrick #include "AArch64Subtarget.h"
1409467b48Spatrick 
1509467b48Spatrick #include "AArch64.h"
1609467b48Spatrick #include "AArch64InstrInfo.h"
1709467b48Spatrick #include "AArch64PBQPRegAlloc.h"
1809467b48Spatrick #include "AArch64TargetMachine.h"
19097a140dSpatrick #include "GISel/AArch64CallLowering.h"
20097a140dSpatrick #include "GISel/AArch64LegalizerInfo.h"
21097a140dSpatrick #include "GISel/AArch64RegisterBankInfo.h"
2209467b48Spatrick #include "MCTargetDesc/AArch64AddressingModes.h"
2309467b48Spatrick #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24*d415bd75Srobert #include "llvm/CodeGen/MachineFrameInfo.h"
2509467b48Spatrick #include "llvm/CodeGen/MachineScheduler.h"
2609467b48Spatrick #include "llvm/IR/GlobalValue.h"
27*d415bd75Srobert #include "llvm/Support/AArch64TargetParser.h"
2809467b48Spatrick #include "llvm/Support/TargetParser.h"
2909467b48Spatrick 
3009467b48Spatrick using namespace llvm;
3109467b48Spatrick 
3209467b48Spatrick #define DEBUG_TYPE "aarch64-subtarget"
3309467b48Spatrick 
3409467b48Spatrick #define GET_SUBTARGETINFO_CTOR
3509467b48Spatrick #define GET_SUBTARGETINFO_TARGET_DESC
3609467b48Spatrick #include "AArch64GenSubtargetInfo.inc"
3709467b48Spatrick 
3809467b48Spatrick static cl::opt<bool>
3909467b48Spatrick EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
4009467b48Spatrick                      "converter pass"), cl::init(true), cl::Hidden);
4109467b48Spatrick 
4209467b48Spatrick // If OS supports TBI, use this flag to enable it.
4309467b48Spatrick static cl::opt<bool>
4409467b48Spatrick UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
4509467b48Spatrick                          "an address is ignored"), cl::init(false), cl::Hidden);
4609467b48Spatrick 
4709467b48Spatrick static cl::opt<bool>
4809467b48Spatrick     UseNonLazyBind("aarch64-enable-nonlazybind",
4909467b48Spatrick                    cl::desc("Call nonlazybind functions via direct GOT load"),
5009467b48Spatrick                    cl::init(false), cl::Hidden);
5109467b48Spatrick 
5273471bf0Spatrick static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
5373471bf0Spatrick                            cl::desc("Enable the use of AA during codegen."));
54097a140dSpatrick 
55*d415bd75Srobert static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56*d415bd75Srobert     "aarch64-insert-extract-base-cost",
57*d415bd75Srobert     cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58*d415bd75Srobert 
59*d415bd75Srobert // Reserve a list of X# registers, so they are unavailable for register
60*d415bd75Srobert // allocator, but can still be used as ABI requests, such as passing arguments
61*d415bd75Srobert // to function call.
62*d415bd75Srobert static cl::list<std::string>
63*d415bd75Srobert ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64*d415bd75Srobert                   "registers, so they can't be used by register allocator. "
65*d415bd75Srobert                   "Should only be used for testing register allocator."),
66*d415bd75Srobert                   cl::CommaSeparated, cl::Hidden);
67*d415bd75Srobert 
68*d415bd75Srobert static cl::opt<bool>
69*d415bd75Srobert     ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
70*d415bd75Srobert                                 cl::init(false), cl::Hidden);
71*d415bd75Srobert 
getVectorInsertExtractBaseCost() const72*d415bd75Srobert unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
73*d415bd75Srobert   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
74*d415bd75Srobert     return OverrideVectorInsertExtractBaseCost;
75*d415bd75Srobert   return VectorInsertExtractBaseCost;
76*d415bd75Srobert }
77*d415bd75Srobert 
initializeSubtargetDependencies(StringRef FS,StringRef CPUString,StringRef TuneCPUString)78*d415bd75Srobert AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
79*d415bd75Srobert     StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
8009467b48Spatrick   // Determine default and user-specified characteristics
8109467b48Spatrick 
8209467b48Spatrick   if (CPUString.empty())
8309467b48Spatrick     CPUString = "generic";
8409467b48Spatrick 
85*d415bd75Srobert   if (TuneCPUString.empty())
86*d415bd75Srobert     TuneCPUString = CPUString;
87*d415bd75Srobert 
88*d415bd75Srobert   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
8909467b48Spatrick   initializeProperties();
9009467b48Spatrick 
9109467b48Spatrick   return *this;
9209467b48Spatrick }
9309467b48Spatrick 
initializeProperties()9409467b48Spatrick void AArch64Subtarget::initializeProperties() {
9509467b48Spatrick   // Initialize CPU specific properties. We should add a tablegen feature for
9609467b48Spatrick   // this in the future so we can specify it together with the subtarget
9709467b48Spatrick   // features.
9809467b48Spatrick   switch (ARMProcFamily) {
9909467b48Spatrick   case Others:
10009467b48Spatrick     break;
101097a140dSpatrick   case Carmel:
102097a140dSpatrick     CacheLineSize = 64;
103097a140dSpatrick     break;
10409467b48Spatrick   case CortexA35:
10509467b48Spatrick   case CortexA53:
10609467b48Spatrick   case CortexA55:
10773471bf0Spatrick     PrefFunctionLogAlignment = 4;
108*d415bd75Srobert     PrefLoopLogAlignment = 4;
109*d415bd75Srobert     MaxBytesForLoopAlignment = 8;
11009467b48Spatrick     break;
11109467b48Spatrick   case CortexA57:
11209467b48Spatrick     MaxInterleaveFactor = 4;
11309467b48Spatrick     PrefFunctionLogAlignment = 4;
114*d415bd75Srobert     PrefLoopLogAlignment = 4;
115*d415bd75Srobert     MaxBytesForLoopAlignment = 8;
11609467b48Spatrick     break;
11709467b48Spatrick   case CortexA65:
11809467b48Spatrick     PrefFunctionLogAlignment = 3;
11909467b48Spatrick     break;
12009467b48Spatrick   case CortexA72:
12109467b48Spatrick   case CortexA73:
12209467b48Spatrick   case CortexA75:
123*d415bd75Srobert     PrefFunctionLogAlignment = 4;
124*d415bd75Srobert     PrefLoopLogAlignment = 4;
125*d415bd75Srobert     MaxBytesForLoopAlignment = 8;
126*d415bd75Srobert     break;
12709467b48Spatrick   case CortexA76:
128097a140dSpatrick   case CortexA77:
129097a140dSpatrick   case CortexA78:
13073471bf0Spatrick   case CortexA78C:
13173471bf0Spatrick   case CortexR82:
132097a140dSpatrick   case CortexX1:
133*d415bd75Srobert   case CortexX1C:
13409467b48Spatrick     PrefFunctionLogAlignment = 4;
135*d415bd75Srobert     PrefLoopLogAlignment = 5;
136*d415bd75Srobert     MaxBytesForLoopAlignment = 16;
137*d415bd75Srobert     break;
138*d415bd75Srobert   case CortexA510:
139*d415bd75Srobert     PrefFunctionLogAlignment = 4;
140*d415bd75Srobert     VScaleForTuning = 1;
141*d415bd75Srobert     PrefLoopLogAlignment = 4;
142*d415bd75Srobert     MaxBytesForLoopAlignment = 8;
143*d415bd75Srobert     break;
144*d415bd75Srobert   case CortexA710:
145*d415bd75Srobert   case CortexA715:
146*d415bd75Srobert   case CortexX2:
147*d415bd75Srobert   case CortexX3:
148*d415bd75Srobert     PrefFunctionLogAlignment = 4;
149*d415bd75Srobert     VScaleForTuning = 1;
150*d415bd75Srobert     PrefLoopLogAlignment = 5;
151*d415bd75Srobert     MaxBytesForLoopAlignment = 16;
15209467b48Spatrick     break;
153097a140dSpatrick   case A64FX:
154097a140dSpatrick     CacheLineSize = 256;
15573471bf0Spatrick     PrefFunctionLogAlignment = 3;
15673471bf0Spatrick     PrefLoopLogAlignment = 2;
15773471bf0Spatrick     MaxInterleaveFactor = 4;
15873471bf0Spatrick     PrefetchDistance = 128;
15973471bf0Spatrick     MinPrefetchStride = 1024;
16073471bf0Spatrick     MaxPrefetchIterationsAhead = 4;
161*d415bd75Srobert     VScaleForTuning = 4;
162097a140dSpatrick     break;
16309467b48Spatrick   case AppleA7:
16409467b48Spatrick   case AppleA10:
16509467b48Spatrick   case AppleA11:
16609467b48Spatrick   case AppleA12:
16709467b48Spatrick   case AppleA13:
16873471bf0Spatrick   case AppleA14:
169*d415bd75Srobert   case AppleA15:
170*d415bd75Srobert   case AppleA16:
17109467b48Spatrick     CacheLineSize = 64;
17209467b48Spatrick     PrefetchDistance = 280;
17309467b48Spatrick     MinPrefetchStride = 2048;
17409467b48Spatrick     MaxPrefetchIterationsAhead = 3;
175*d415bd75Srobert     switch (ARMProcFamily) {
176*d415bd75Srobert     case AppleA14:
177*d415bd75Srobert     case AppleA15:
178*d415bd75Srobert     case AppleA16:
179*d415bd75Srobert       MaxInterleaveFactor = 4;
180*d415bd75Srobert       break;
181*d415bd75Srobert     default:
182*d415bd75Srobert       break;
183*d415bd75Srobert     }
18409467b48Spatrick     break;
18509467b48Spatrick   case ExynosM3:
18609467b48Spatrick     MaxInterleaveFactor = 4;
18709467b48Spatrick     MaxJumpTableSize = 20;
18809467b48Spatrick     PrefFunctionLogAlignment = 5;
18909467b48Spatrick     PrefLoopLogAlignment = 4;
19009467b48Spatrick     break;
19109467b48Spatrick   case Falkor:
19209467b48Spatrick     MaxInterleaveFactor = 4;
19309467b48Spatrick     // FIXME: remove this to enable 64-bit SLP if performance looks good.
19409467b48Spatrick     MinVectorRegisterBitWidth = 128;
19509467b48Spatrick     CacheLineSize = 128;
19609467b48Spatrick     PrefetchDistance = 820;
19709467b48Spatrick     MinPrefetchStride = 2048;
19809467b48Spatrick     MaxPrefetchIterationsAhead = 8;
19909467b48Spatrick     break;
20009467b48Spatrick   case Kryo:
20109467b48Spatrick     MaxInterleaveFactor = 4;
20209467b48Spatrick     VectorInsertExtractBaseCost = 2;
20309467b48Spatrick     CacheLineSize = 128;
20409467b48Spatrick     PrefetchDistance = 740;
20509467b48Spatrick     MinPrefetchStride = 1024;
20609467b48Spatrick     MaxPrefetchIterationsAhead = 11;
20709467b48Spatrick     // FIXME: remove this to enable 64-bit SLP if performance looks good.
20809467b48Spatrick     MinVectorRegisterBitWidth = 128;
20909467b48Spatrick     break;
21009467b48Spatrick   case NeoverseE1:
21109467b48Spatrick     PrefFunctionLogAlignment = 3;
21209467b48Spatrick     break;
21309467b48Spatrick   case NeoverseN1:
214*d415bd75Srobert     PrefFunctionLogAlignment = 4;
215*d415bd75Srobert     PrefLoopLogAlignment = 5;
216*d415bd75Srobert     MaxBytesForLoopAlignment = 16;
217*d415bd75Srobert     break;
21873471bf0Spatrick   case NeoverseN2:
219*d415bd75Srobert   case NeoverseV2:
220*d415bd75Srobert     PrefFunctionLogAlignment = 4;
221*d415bd75Srobert     PrefLoopLogAlignment = 5;
222*d415bd75Srobert     MaxBytesForLoopAlignment = 16;
223*d415bd75Srobert     VScaleForTuning = 1;
224*d415bd75Srobert     break;
22573471bf0Spatrick   case NeoverseV1:
22609467b48Spatrick     PrefFunctionLogAlignment = 4;
227*d415bd75Srobert     PrefLoopLogAlignment = 5;
228*d415bd75Srobert     MaxBytesForLoopAlignment = 16;
229*d415bd75Srobert     VScaleForTuning = 2;
230*d415bd75Srobert     break;
231*d415bd75Srobert   case Neoverse512TVB:
232*d415bd75Srobert     PrefFunctionLogAlignment = 4;
233*d415bd75Srobert     VScaleForTuning = 1;
234*d415bd75Srobert     MaxInterleaveFactor = 4;
23509467b48Spatrick     break;
23609467b48Spatrick   case Saphira:
23709467b48Spatrick     MaxInterleaveFactor = 4;
23809467b48Spatrick     // FIXME: remove this to enable 64-bit SLP if performance looks good.
23909467b48Spatrick     MinVectorRegisterBitWidth = 128;
24009467b48Spatrick     break;
24109467b48Spatrick   case ThunderX2T99:
24209467b48Spatrick     CacheLineSize = 64;
24309467b48Spatrick     PrefFunctionLogAlignment = 3;
24409467b48Spatrick     PrefLoopLogAlignment = 2;
24509467b48Spatrick     MaxInterleaveFactor = 4;
24609467b48Spatrick     PrefetchDistance = 128;
24709467b48Spatrick     MinPrefetchStride = 1024;
24809467b48Spatrick     MaxPrefetchIterationsAhead = 4;
24909467b48Spatrick     // FIXME: remove this to enable 64-bit SLP if performance looks good.
25009467b48Spatrick     MinVectorRegisterBitWidth = 128;
25109467b48Spatrick     break;
25209467b48Spatrick   case ThunderX:
25309467b48Spatrick   case ThunderXT88:
25409467b48Spatrick   case ThunderXT81:
25509467b48Spatrick   case ThunderXT83:
25609467b48Spatrick     CacheLineSize = 128;
25709467b48Spatrick     PrefFunctionLogAlignment = 3;
25809467b48Spatrick     PrefLoopLogAlignment = 2;
25909467b48Spatrick     // FIXME: remove this to enable 64-bit SLP if performance looks good.
26009467b48Spatrick     MinVectorRegisterBitWidth = 128;
26109467b48Spatrick     break;
26209467b48Spatrick   case TSV110:
26309467b48Spatrick     CacheLineSize = 64;
26409467b48Spatrick     PrefFunctionLogAlignment = 4;
26509467b48Spatrick     PrefLoopLogAlignment = 2;
26609467b48Spatrick     break;
2677299aa8dSpatrick   case ThunderX3T110:
2687299aa8dSpatrick     CacheLineSize = 64;
2697299aa8dSpatrick     PrefFunctionLogAlignment = 4;
2707299aa8dSpatrick     PrefLoopLogAlignment = 2;
2717299aa8dSpatrick     MaxInterleaveFactor = 4;
2727299aa8dSpatrick     PrefetchDistance = 128;
2737299aa8dSpatrick     MinPrefetchStride = 1024;
2747299aa8dSpatrick     MaxPrefetchIterationsAhead = 4;
2757299aa8dSpatrick     // FIXME: remove this to enable 64-bit SLP if performance looks good.
2767299aa8dSpatrick     MinVectorRegisterBitWidth = 128;
2777299aa8dSpatrick     break;
278*d415bd75Srobert   case Ampere1:
279*d415bd75Srobert   case Ampere1A:
280*d415bd75Srobert     CacheLineSize = 64;
281*d415bd75Srobert     PrefFunctionLogAlignment = 6;
282*d415bd75Srobert     PrefLoopLogAlignment = 6;
283*d415bd75Srobert     MaxInterleaveFactor = 4;
284*d415bd75Srobert     break;
28509467b48Spatrick   }
28609467b48Spatrick }
28709467b48Spatrick 
AArch64Subtarget(const Triple & TT,StringRef CPU,StringRef TuneCPU,StringRef FS,const TargetMachine & TM,bool LittleEndian,unsigned MinSVEVectorSizeInBitsOverride,unsigned MaxSVEVectorSizeInBitsOverride,bool StreamingSVEModeDisabled)288*d415bd75Srobert AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
289*d415bd75Srobert                                    StringRef TuneCPU, StringRef FS,
29073471bf0Spatrick                                    const TargetMachine &TM, bool LittleEndian,
29173471bf0Spatrick                                    unsigned MinSVEVectorSizeInBitsOverride,
292*d415bd75Srobert                                    unsigned MaxSVEVectorSizeInBitsOverride,
293*d415bd75Srobert                                    bool StreamingSVEModeDisabled)
294*d415bd75Srobert     : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
29509467b48Spatrick       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
296*d415bd75Srobert       ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
29709467b48Spatrick       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
29809467b48Spatrick       IsLittle(LittleEndian),
299*d415bd75Srobert       StreamingSVEModeDisabled(StreamingSVEModeDisabled),
30073471bf0Spatrick       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
30173471bf0Spatrick       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
302*d415bd75Srobert       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
303*d415bd75Srobert       TLInfo(TM, *this) {
30409467b48Spatrick   if (AArch64::isX18ReservedByDefault(TT))
30509467b48Spatrick     ReserveXRegister.set(18);
30609467b48Spatrick 
30709467b48Spatrick   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
308097a140dSpatrick   InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
30909467b48Spatrick   Legalizer.reset(new AArch64LegalizerInfo(*this));
31009467b48Spatrick 
31109467b48Spatrick   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
31209467b48Spatrick 
31309467b48Spatrick   // FIXME: At this point, we can't rely on Subtarget having RBI.
31409467b48Spatrick   // It's awkward to mix passing RBI and the Subtarget; should we pass
31509467b48Spatrick   // TII/TRI as well?
31609467b48Spatrick   InstSelector.reset(createAArch64InstructionSelector(
31709467b48Spatrick       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
31809467b48Spatrick 
31909467b48Spatrick   RegBankInfo.reset(RBI);
320*d415bd75Srobert 
321*d415bd75Srobert   auto TRI = getRegisterInfo();
322*d415bd75Srobert   StringSet<> ReservedRegNames;
323*d415bd75Srobert   ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
324*d415bd75Srobert   for (unsigned i = 0; i < 29; ++i) {
325*d415bd75Srobert     if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
326*d415bd75Srobert       ReserveXRegisterForRA.set(i);
327*d415bd75Srobert   }
328*d415bd75Srobert   // X30 is named LR, so we can't use TRI->getName to check X30.
329*d415bd75Srobert   if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
330*d415bd75Srobert     ReserveXRegisterForRA.set(30);
331*d415bd75Srobert   // X29 is named FP, so we can't use TRI->getName to check X29.
332*d415bd75Srobert   if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
333*d415bd75Srobert     ReserveXRegisterForRA.set(29);
33409467b48Spatrick }
33509467b48Spatrick 
getCallLowering() const33609467b48Spatrick const CallLowering *AArch64Subtarget::getCallLowering() const {
33709467b48Spatrick   return CallLoweringInfo.get();
33809467b48Spatrick }
33909467b48Spatrick 
getInlineAsmLowering() const340097a140dSpatrick const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
341097a140dSpatrick   return InlineAsmLoweringInfo.get();
342097a140dSpatrick }
343097a140dSpatrick 
getInstructionSelector() const34409467b48Spatrick InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
34509467b48Spatrick   return InstSelector.get();
34609467b48Spatrick }
34709467b48Spatrick 
getLegalizerInfo() const34809467b48Spatrick const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
34909467b48Spatrick   return Legalizer.get();
35009467b48Spatrick }
35109467b48Spatrick 
getRegBankInfo() const35209467b48Spatrick const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
35309467b48Spatrick   return RegBankInfo.get();
35409467b48Spatrick }
35509467b48Spatrick 
35609467b48Spatrick /// Find the target operand flags that describe how a global value should be
35709467b48Spatrick /// referenced for the current subtarget.
35809467b48Spatrick unsigned
ClassifyGlobalReference(const GlobalValue * GV,const TargetMachine & TM) const35909467b48Spatrick AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
36009467b48Spatrick                                           const TargetMachine &TM) const {
36109467b48Spatrick   // MachO large model always goes via a GOT, simply to get a single 8-byte
36209467b48Spatrick   // absolute relocation on all global addresses.
36309467b48Spatrick   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
36409467b48Spatrick     return AArch64II::MO_GOT;
36509467b48Spatrick 
36609467b48Spatrick   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
367*d415bd75Srobert     if (GV->hasDLLImportStorageClass()) {
368*d415bd75Srobert       if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
369*d415bd75Srobert         return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORTAUX;
37009467b48Spatrick       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
371*d415bd75Srobert     }
37209467b48Spatrick     if (getTargetTriple().isOSWindows())
37309467b48Spatrick       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
37409467b48Spatrick     return AArch64II::MO_GOT;
37509467b48Spatrick   }
37609467b48Spatrick 
37709467b48Spatrick   // The small code model's direct accesses use ADRP, which cannot
37809467b48Spatrick   // necessarily produce the value 0 (if the code is above 4GB).
37909467b48Spatrick   // Same for the tiny code model, where we have a pc relative LDR.
38009467b48Spatrick   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
38109467b48Spatrick       GV->hasExternalWeakLinkage())
38209467b48Spatrick     return AArch64II::MO_GOT;
38309467b48Spatrick 
38409467b48Spatrick   // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
38509467b48Spatrick   // that their nominal addresses are tagged and outside of the code model. In
38609467b48Spatrick   // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
38709467b48Spatrick   // tag if necessary based on MO_TAGGED.
38809467b48Spatrick   if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
38909467b48Spatrick     return AArch64II::MO_NC | AArch64II::MO_TAGGED;
39009467b48Spatrick 
39109467b48Spatrick   return AArch64II::MO_NO_FLAG;
39209467b48Spatrick }
39309467b48Spatrick 
classifyGlobalFunctionReference(const GlobalValue * GV,const TargetMachine & TM) const39409467b48Spatrick unsigned AArch64Subtarget::classifyGlobalFunctionReference(
39509467b48Spatrick     const GlobalValue *GV, const TargetMachine &TM) const {
39609467b48Spatrick   // MachO large model always goes via a GOT, because we don't have the
39709467b48Spatrick   // relocations available to do anything else..
39809467b48Spatrick   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
39909467b48Spatrick       !GV->hasInternalLinkage())
40009467b48Spatrick     return AArch64II::MO_GOT;
40109467b48Spatrick 
40209467b48Spatrick   // NonLazyBind goes via GOT unless we know it's available locally.
40309467b48Spatrick   auto *F = dyn_cast<Function>(GV);
40409467b48Spatrick   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
40509467b48Spatrick       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
40609467b48Spatrick     return AArch64II::MO_GOT;
40709467b48Spatrick 
408*d415bd75Srobert   if (getTargetTriple().isOSWindows()) {
409*d415bd75Srobert     if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
410*d415bd75Srobert         GV->hasDLLImportStorageClass()) {
411*d415bd75Srobert       // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
412*d415bd75Srobert       // not MO_DLLIMPORTAUX.
413*d415bd75Srobert       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
414*d415bd75Srobert     }
415*d415bd75Srobert 
41609467b48Spatrick     // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
41709467b48Spatrick     return ClassifyGlobalReference(GV, TM);
418*d415bd75Srobert   }
41909467b48Spatrick 
42009467b48Spatrick   return AArch64II::MO_NO_FLAG;
42109467b48Spatrick }
42209467b48Spatrick 
overrideSchedPolicy(MachineSchedPolicy & Policy,unsigned NumRegionInstrs) const42309467b48Spatrick void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
42409467b48Spatrick                                            unsigned NumRegionInstrs) const {
42509467b48Spatrick   // LNT run (at least on Cyclone) showed reasonably significant gains for
42609467b48Spatrick   // bi-directional scheduling. 253.perlbmk.
42709467b48Spatrick   Policy.OnlyTopDown = false;
42809467b48Spatrick   Policy.OnlyBottomUp = false;
42909467b48Spatrick   // Enabling or Disabling the latency heuristic is a close call: It seems to
43009467b48Spatrick   // help nearly no benchmark on out-of-order architectures, on the other hand
43109467b48Spatrick   // it regresses register pressure on a few benchmarking.
43209467b48Spatrick   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
43309467b48Spatrick }
43409467b48Spatrick 
enableEarlyIfConversion() const43509467b48Spatrick bool AArch64Subtarget::enableEarlyIfConversion() const {
43609467b48Spatrick   return EnableEarlyIfConvert;
43709467b48Spatrick }
43809467b48Spatrick 
supportsAddressTopByteIgnored() const43909467b48Spatrick bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
44009467b48Spatrick   if (!UseAddressTopByteIgnored)
44109467b48Spatrick     return false;
44209467b48Spatrick 
443*d415bd75Srobert   if (TargetTriple.isDriverKit())
444*d415bd75Srobert     return true;
44509467b48Spatrick   if (TargetTriple.isiOS()) {
446*d415bd75Srobert     return TargetTriple.getiOSVersion() >= VersionTuple(8);
44709467b48Spatrick   }
44809467b48Spatrick 
44909467b48Spatrick   return false;
45009467b48Spatrick }
45109467b48Spatrick 
45209467b48Spatrick std::unique_ptr<PBQPRAConstraint>
getCustomPBQPConstraints() const45309467b48Spatrick AArch64Subtarget::getCustomPBQPConstraints() const {
45409467b48Spatrick   return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
45509467b48Spatrick }
45609467b48Spatrick 
mirFileLoaded(MachineFunction & MF) const45709467b48Spatrick void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
45809467b48Spatrick   // We usually compute max call frame size after ISel. Do the computation now
45909467b48Spatrick   // if the .mir file didn't specify it. Note that this will probably give you
46009467b48Spatrick   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
46109467b48Spatrick   // instructions, specify explicitly if you need it to be correct.
46209467b48Spatrick   MachineFrameInfo &MFI = MF.getFrameInfo();
46309467b48Spatrick   if (!MFI.isMaxCallFrameSizeComputed())
46409467b48Spatrick     MFI.computeMaxCallFrameSize(MF);
46509467b48Spatrick }
466097a140dSpatrick 
useAA() const46773471bf0Spatrick bool AArch64Subtarget::useAA() const { return UseAA; }
468*d415bd75Srobert 
forceStreamingCompatibleSVE() const469*d415bd75Srobert bool AArch64Subtarget::forceStreamingCompatibleSVE() const {
470*d415bd75Srobert   if (ForceStreamingCompatibleSVE) {
471*d415bd75Srobert     assert(hasSVEorSME() && "Expected SVE to be available");
472*d415bd75Srobert     return hasSVEorSME();
473*d415bd75Srobert   }
474*d415bd75Srobert   return false;
475*d415bd75Srobert }
476