109467b48Spatrick //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // This file implements the AArch64 specific subclass of TargetSubtarget.
1009467b48Spatrick //
1109467b48Spatrick //===----------------------------------------------------------------------===//
1209467b48Spatrick
1309467b48Spatrick #include "AArch64Subtarget.h"
1409467b48Spatrick
1509467b48Spatrick #include "AArch64.h"
1609467b48Spatrick #include "AArch64InstrInfo.h"
1709467b48Spatrick #include "AArch64PBQPRegAlloc.h"
1809467b48Spatrick #include "AArch64TargetMachine.h"
19097a140dSpatrick #include "GISel/AArch64CallLowering.h"
20097a140dSpatrick #include "GISel/AArch64LegalizerInfo.h"
21097a140dSpatrick #include "GISel/AArch64RegisterBankInfo.h"
2209467b48Spatrick #include "MCTargetDesc/AArch64AddressingModes.h"
2309467b48Spatrick #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
24*d415bd75Srobert #include "llvm/CodeGen/MachineFrameInfo.h"
2509467b48Spatrick #include "llvm/CodeGen/MachineScheduler.h"
2609467b48Spatrick #include "llvm/IR/GlobalValue.h"
27*d415bd75Srobert #include "llvm/Support/AArch64TargetParser.h"
2809467b48Spatrick #include "llvm/Support/TargetParser.h"
2909467b48Spatrick
3009467b48Spatrick using namespace llvm;
3109467b48Spatrick
3209467b48Spatrick #define DEBUG_TYPE "aarch64-subtarget"
3309467b48Spatrick
3409467b48Spatrick #define GET_SUBTARGETINFO_CTOR
3509467b48Spatrick #define GET_SUBTARGETINFO_TARGET_DESC
3609467b48Spatrick #include "AArch64GenSubtargetInfo.inc"
3709467b48Spatrick
3809467b48Spatrick static cl::opt<bool>
3909467b48Spatrick EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
4009467b48Spatrick "converter pass"), cl::init(true), cl::Hidden);
4109467b48Spatrick
4209467b48Spatrick // If OS supports TBI, use this flag to enable it.
4309467b48Spatrick static cl::opt<bool>
4409467b48Spatrick UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
4509467b48Spatrick "an address is ignored"), cl::init(false), cl::Hidden);
4609467b48Spatrick
4709467b48Spatrick static cl::opt<bool>
4809467b48Spatrick UseNonLazyBind("aarch64-enable-nonlazybind",
4909467b48Spatrick cl::desc("Call nonlazybind functions via direct GOT load"),
5009467b48Spatrick cl::init(false), cl::Hidden);
5109467b48Spatrick
5273471bf0Spatrick static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
5373471bf0Spatrick cl::desc("Enable the use of AA during codegen."));
54097a140dSpatrick
55*d415bd75Srobert static cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
56*d415bd75Srobert "aarch64-insert-extract-base-cost",
57*d415bd75Srobert cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
58*d415bd75Srobert
59*d415bd75Srobert // Reserve a list of X# registers, so they are unavailable for register
60*d415bd75Srobert // allocator, but can still be used as ABI requests, such as passing arguments
61*d415bd75Srobert // to function call.
62*d415bd75Srobert static cl::list<std::string>
63*d415bd75Srobert ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
64*d415bd75Srobert "registers, so they can't be used by register allocator. "
65*d415bd75Srobert "Should only be used for testing register allocator."),
66*d415bd75Srobert cl::CommaSeparated, cl::Hidden);
67*d415bd75Srobert
68*d415bd75Srobert static cl::opt<bool>
69*d415bd75Srobert ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
70*d415bd75Srobert cl::init(false), cl::Hidden);
71*d415bd75Srobert
getVectorInsertExtractBaseCost() const72*d415bd75Srobert unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
73*d415bd75Srobert if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
74*d415bd75Srobert return OverrideVectorInsertExtractBaseCost;
75*d415bd75Srobert return VectorInsertExtractBaseCost;
76*d415bd75Srobert }
77*d415bd75Srobert
initializeSubtargetDependencies(StringRef FS,StringRef CPUString,StringRef TuneCPUString)78*d415bd75Srobert AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
79*d415bd75Srobert StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
8009467b48Spatrick // Determine default and user-specified characteristics
8109467b48Spatrick
8209467b48Spatrick if (CPUString.empty())
8309467b48Spatrick CPUString = "generic";
8409467b48Spatrick
85*d415bd75Srobert if (TuneCPUString.empty())
86*d415bd75Srobert TuneCPUString = CPUString;
87*d415bd75Srobert
88*d415bd75Srobert ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
8909467b48Spatrick initializeProperties();
9009467b48Spatrick
9109467b48Spatrick return *this;
9209467b48Spatrick }
9309467b48Spatrick
initializeProperties()9409467b48Spatrick void AArch64Subtarget::initializeProperties() {
9509467b48Spatrick // Initialize CPU specific properties. We should add a tablegen feature for
9609467b48Spatrick // this in the future so we can specify it together with the subtarget
9709467b48Spatrick // features.
9809467b48Spatrick switch (ARMProcFamily) {
9909467b48Spatrick case Others:
10009467b48Spatrick break;
101097a140dSpatrick case Carmel:
102097a140dSpatrick CacheLineSize = 64;
103097a140dSpatrick break;
10409467b48Spatrick case CortexA35:
10509467b48Spatrick case CortexA53:
10609467b48Spatrick case CortexA55:
10773471bf0Spatrick PrefFunctionLogAlignment = 4;
108*d415bd75Srobert PrefLoopLogAlignment = 4;
109*d415bd75Srobert MaxBytesForLoopAlignment = 8;
11009467b48Spatrick break;
11109467b48Spatrick case CortexA57:
11209467b48Spatrick MaxInterleaveFactor = 4;
11309467b48Spatrick PrefFunctionLogAlignment = 4;
114*d415bd75Srobert PrefLoopLogAlignment = 4;
115*d415bd75Srobert MaxBytesForLoopAlignment = 8;
11609467b48Spatrick break;
11709467b48Spatrick case CortexA65:
11809467b48Spatrick PrefFunctionLogAlignment = 3;
11909467b48Spatrick break;
12009467b48Spatrick case CortexA72:
12109467b48Spatrick case CortexA73:
12209467b48Spatrick case CortexA75:
123*d415bd75Srobert PrefFunctionLogAlignment = 4;
124*d415bd75Srobert PrefLoopLogAlignment = 4;
125*d415bd75Srobert MaxBytesForLoopAlignment = 8;
126*d415bd75Srobert break;
12709467b48Spatrick case CortexA76:
128097a140dSpatrick case CortexA77:
129097a140dSpatrick case CortexA78:
13073471bf0Spatrick case CortexA78C:
13173471bf0Spatrick case CortexR82:
132097a140dSpatrick case CortexX1:
133*d415bd75Srobert case CortexX1C:
13409467b48Spatrick PrefFunctionLogAlignment = 4;
135*d415bd75Srobert PrefLoopLogAlignment = 5;
136*d415bd75Srobert MaxBytesForLoopAlignment = 16;
137*d415bd75Srobert break;
138*d415bd75Srobert case CortexA510:
139*d415bd75Srobert PrefFunctionLogAlignment = 4;
140*d415bd75Srobert VScaleForTuning = 1;
141*d415bd75Srobert PrefLoopLogAlignment = 4;
142*d415bd75Srobert MaxBytesForLoopAlignment = 8;
143*d415bd75Srobert break;
144*d415bd75Srobert case CortexA710:
145*d415bd75Srobert case CortexA715:
146*d415bd75Srobert case CortexX2:
147*d415bd75Srobert case CortexX3:
148*d415bd75Srobert PrefFunctionLogAlignment = 4;
149*d415bd75Srobert VScaleForTuning = 1;
150*d415bd75Srobert PrefLoopLogAlignment = 5;
151*d415bd75Srobert MaxBytesForLoopAlignment = 16;
15209467b48Spatrick break;
153097a140dSpatrick case A64FX:
154097a140dSpatrick CacheLineSize = 256;
15573471bf0Spatrick PrefFunctionLogAlignment = 3;
15673471bf0Spatrick PrefLoopLogAlignment = 2;
15773471bf0Spatrick MaxInterleaveFactor = 4;
15873471bf0Spatrick PrefetchDistance = 128;
15973471bf0Spatrick MinPrefetchStride = 1024;
16073471bf0Spatrick MaxPrefetchIterationsAhead = 4;
161*d415bd75Srobert VScaleForTuning = 4;
162097a140dSpatrick break;
16309467b48Spatrick case AppleA7:
16409467b48Spatrick case AppleA10:
16509467b48Spatrick case AppleA11:
16609467b48Spatrick case AppleA12:
16709467b48Spatrick case AppleA13:
16873471bf0Spatrick case AppleA14:
169*d415bd75Srobert case AppleA15:
170*d415bd75Srobert case AppleA16:
17109467b48Spatrick CacheLineSize = 64;
17209467b48Spatrick PrefetchDistance = 280;
17309467b48Spatrick MinPrefetchStride = 2048;
17409467b48Spatrick MaxPrefetchIterationsAhead = 3;
175*d415bd75Srobert switch (ARMProcFamily) {
176*d415bd75Srobert case AppleA14:
177*d415bd75Srobert case AppleA15:
178*d415bd75Srobert case AppleA16:
179*d415bd75Srobert MaxInterleaveFactor = 4;
180*d415bd75Srobert break;
181*d415bd75Srobert default:
182*d415bd75Srobert break;
183*d415bd75Srobert }
18409467b48Spatrick break;
18509467b48Spatrick case ExynosM3:
18609467b48Spatrick MaxInterleaveFactor = 4;
18709467b48Spatrick MaxJumpTableSize = 20;
18809467b48Spatrick PrefFunctionLogAlignment = 5;
18909467b48Spatrick PrefLoopLogAlignment = 4;
19009467b48Spatrick break;
19109467b48Spatrick case Falkor:
19209467b48Spatrick MaxInterleaveFactor = 4;
19309467b48Spatrick // FIXME: remove this to enable 64-bit SLP if performance looks good.
19409467b48Spatrick MinVectorRegisterBitWidth = 128;
19509467b48Spatrick CacheLineSize = 128;
19609467b48Spatrick PrefetchDistance = 820;
19709467b48Spatrick MinPrefetchStride = 2048;
19809467b48Spatrick MaxPrefetchIterationsAhead = 8;
19909467b48Spatrick break;
20009467b48Spatrick case Kryo:
20109467b48Spatrick MaxInterleaveFactor = 4;
20209467b48Spatrick VectorInsertExtractBaseCost = 2;
20309467b48Spatrick CacheLineSize = 128;
20409467b48Spatrick PrefetchDistance = 740;
20509467b48Spatrick MinPrefetchStride = 1024;
20609467b48Spatrick MaxPrefetchIterationsAhead = 11;
20709467b48Spatrick // FIXME: remove this to enable 64-bit SLP if performance looks good.
20809467b48Spatrick MinVectorRegisterBitWidth = 128;
20909467b48Spatrick break;
21009467b48Spatrick case NeoverseE1:
21109467b48Spatrick PrefFunctionLogAlignment = 3;
21209467b48Spatrick break;
21309467b48Spatrick case NeoverseN1:
214*d415bd75Srobert PrefFunctionLogAlignment = 4;
215*d415bd75Srobert PrefLoopLogAlignment = 5;
216*d415bd75Srobert MaxBytesForLoopAlignment = 16;
217*d415bd75Srobert break;
21873471bf0Spatrick case NeoverseN2:
219*d415bd75Srobert case NeoverseV2:
220*d415bd75Srobert PrefFunctionLogAlignment = 4;
221*d415bd75Srobert PrefLoopLogAlignment = 5;
222*d415bd75Srobert MaxBytesForLoopAlignment = 16;
223*d415bd75Srobert VScaleForTuning = 1;
224*d415bd75Srobert break;
22573471bf0Spatrick case NeoverseV1:
22609467b48Spatrick PrefFunctionLogAlignment = 4;
227*d415bd75Srobert PrefLoopLogAlignment = 5;
228*d415bd75Srobert MaxBytesForLoopAlignment = 16;
229*d415bd75Srobert VScaleForTuning = 2;
230*d415bd75Srobert break;
231*d415bd75Srobert case Neoverse512TVB:
232*d415bd75Srobert PrefFunctionLogAlignment = 4;
233*d415bd75Srobert VScaleForTuning = 1;
234*d415bd75Srobert MaxInterleaveFactor = 4;
23509467b48Spatrick break;
23609467b48Spatrick case Saphira:
23709467b48Spatrick MaxInterleaveFactor = 4;
23809467b48Spatrick // FIXME: remove this to enable 64-bit SLP if performance looks good.
23909467b48Spatrick MinVectorRegisterBitWidth = 128;
24009467b48Spatrick break;
24109467b48Spatrick case ThunderX2T99:
24209467b48Spatrick CacheLineSize = 64;
24309467b48Spatrick PrefFunctionLogAlignment = 3;
24409467b48Spatrick PrefLoopLogAlignment = 2;
24509467b48Spatrick MaxInterleaveFactor = 4;
24609467b48Spatrick PrefetchDistance = 128;
24709467b48Spatrick MinPrefetchStride = 1024;
24809467b48Spatrick MaxPrefetchIterationsAhead = 4;
24909467b48Spatrick // FIXME: remove this to enable 64-bit SLP if performance looks good.
25009467b48Spatrick MinVectorRegisterBitWidth = 128;
25109467b48Spatrick break;
25209467b48Spatrick case ThunderX:
25309467b48Spatrick case ThunderXT88:
25409467b48Spatrick case ThunderXT81:
25509467b48Spatrick case ThunderXT83:
25609467b48Spatrick CacheLineSize = 128;
25709467b48Spatrick PrefFunctionLogAlignment = 3;
25809467b48Spatrick PrefLoopLogAlignment = 2;
25909467b48Spatrick // FIXME: remove this to enable 64-bit SLP if performance looks good.
26009467b48Spatrick MinVectorRegisterBitWidth = 128;
26109467b48Spatrick break;
26209467b48Spatrick case TSV110:
26309467b48Spatrick CacheLineSize = 64;
26409467b48Spatrick PrefFunctionLogAlignment = 4;
26509467b48Spatrick PrefLoopLogAlignment = 2;
26609467b48Spatrick break;
2677299aa8dSpatrick case ThunderX3T110:
2687299aa8dSpatrick CacheLineSize = 64;
2697299aa8dSpatrick PrefFunctionLogAlignment = 4;
2707299aa8dSpatrick PrefLoopLogAlignment = 2;
2717299aa8dSpatrick MaxInterleaveFactor = 4;
2727299aa8dSpatrick PrefetchDistance = 128;
2737299aa8dSpatrick MinPrefetchStride = 1024;
2747299aa8dSpatrick MaxPrefetchIterationsAhead = 4;
2757299aa8dSpatrick // FIXME: remove this to enable 64-bit SLP if performance looks good.
2767299aa8dSpatrick MinVectorRegisterBitWidth = 128;
2777299aa8dSpatrick break;
278*d415bd75Srobert case Ampere1:
279*d415bd75Srobert case Ampere1A:
280*d415bd75Srobert CacheLineSize = 64;
281*d415bd75Srobert PrefFunctionLogAlignment = 6;
282*d415bd75Srobert PrefLoopLogAlignment = 6;
283*d415bd75Srobert MaxInterleaveFactor = 4;
284*d415bd75Srobert break;
28509467b48Spatrick }
28609467b48Spatrick }
28709467b48Spatrick
AArch64Subtarget(const Triple & TT,StringRef CPU,StringRef TuneCPU,StringRef FS,const TargetMachine & TM,bool LittleEndian,unsigned MinSVEVectorSizeInBitsOverride,unsigned MaxSVEVectorSizeInBitsOverride,bool StreamingSVEModeDisabled)288*d415bd75Srobert AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
289*d415bd75Srobert StringRef TuneCPU, StringRef FS,
29073471bf0Spatrick const TargetMachine &TM, bool LittleEndian,
29173471bf0Spatrick unsigned MinSVEVectorSizeInBitsOverride,
292*d415bd75Srobert unsigned MaxSVEVectorSizeInBitsOverride,
293*d415bd75Srobert bool StreamingSVEModeDisabled)
294*d415bd75Srobert : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
29509467b48Spatrick ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
296*d415bd75Srobert ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
29709467b48Spatrick CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
29809467b48Spatrick IsLittle(LittleEndian),
299*d415bd75Srobert StreamingSVEModeDisabled(StreamingSVEModeDisabled),
30073471bf0Spatrick MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
30173471bf0Spatrick MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
302*d415bd75Srobert InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
303*d415bd75Srobert TLInfo(TM, *this) {
30409467b48Spatrick if (AArch64::isX18ReservedByDefault(TT))
30509467b48Spatrick ReserveXRegister.set(18);
30609467b48Spatrick
30709467b48Spatrick CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
308097a140dSpatrick InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
30909467b48Spatrick Legalizer.reset(new AArch64LegalizerInfo(*this));
31009467b48Spatrick
31109467b48Spatrick auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
31209467b48Spatrick
31309467b48Spatrick // FIXME: At this point, we can't rely on Subtarget having RBI.
31409467b48Spatrick // It's awkward to mix passing RBI and the Subtarget; should we pass
31509467b48Spatrick // TII/TRI as well?
31609467b48Spatrick InstSelector.reset(createAArch64InstructionSelector(
31709467b48Spatrick *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
31809467b48Spatrick
31909467b48Spatrick RegBankInfo.reset(RBI);
320*d415bd75Srobert
321*d415bd75Srobert auto TRI = getRegisterInfo();
322*d415bd75Srobert StringSet<> ReservedRegNames;
323*d415bd75Srobert ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
324*d415bd75Srobert for (unsigned i = 0; i < 29; ++i) {
325*d415bd75Srobert if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
326*d415bd75Srobert ReserveXRegisterForRA.set(i);
327*d415bd75Srobert }
328*d415bd75Srobert // X30 is named LR, so we can't use TRI->getName to check X30.
329*d415bd75Srobert if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
330*d415bd75Srobert ReserveXRegisterForRA.set(30);
331*d415bd75Srobert // X29 is named FP, so we can't use TRI->getName to check X29.
332*d415bd75Srobert if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
333*d415bd75Srobert ReserveXRegisterForRA.set(29);
33409467b48Spatrick }
33509467b48Spatrick
getCallLowering() const33609467b48Spatrick const CallLowering *AArch64Subtarget::getCallLowering() const {
33709467b48Spatrick return CallLoweringInfo.get();
33809467b48Spatrick }
33909467b48Spatrick
getInlineAsmLowering() const340097a140dSpatrick const InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
341097a140dSpatrick return InlineAsmLoweringInfo.get();
342097a140dSpatrick }
343097a140dSpatrick
getInstructionSelector() const34409467b48Spatrick InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
34509467b48Spatrick return InstSelector.get();
34609467b48Spatrick }
34709467b48Spatrick
getLegalizerInfo() const34809467b48Spatrick const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
34909467b48Spatrick return Legalizer.get();
35009467b48Spatrick }
35109467b48Spatrick
getRegBankInfo() const35209467b48Spatrick const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
35309467b48Spatrick return RegBankInfo.get();
35409467b48Spatrick }
35509467b48Spatrick
35609467b48Spatrick /// Find the target operand flags that describe how a global value should be
35709467b48Spatrick /// referenced for the current subtarget.
35809467b48Spatrick unsigned
ClassifyGlobalReference(const GlobalValue * GV,const TargetMachine & TM) const35909467b48Spatrick AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
36009467b48Spatrick const TargetMachine &TM) const {
36109467b48Spatrick // MachO large model always goes via a GOT, simply to get a single 8-byte
36209467b48Spatrick // absolute relocation on all global addresses.
36309467b48Spatrick if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
36409467b48Spatrick return AArch64II::MO_GOT;
36509467b48Spatrick
36609467b48Spatrick if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
367*d415bd75Srobert if (GV->hasDLLImportStorageClass()) {
368*d415bd75Srobert if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
369*d415bd75Srobert return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORTAUX;
37009467b48Spatrick return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
371*d415bd75Srobert }
37209467b48Spatrick if (getTargetTriple().isOSWindows())
37309467b48Spatrick return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
37409467b48Spatrick return AArch64II::MO_GOT;
37509467b48Spatrick }
37609467b48Spatrick
37709467b48Spatrick // The small code model's direct accesses use ADRP, which cannot
37809467b48Spatrick // necessarily produce the value 0 (if the code is above 4GB).
37909467b48Spatrick // Same for the tiny code model, where we have a pc relative LDR.
38009467b48Spatrick if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
38109467b48Spatrick GV->hasExternalWeakLinkage())
38209467b48Spatrick return AArch64II::MO_GOT;
38309467b48Spatrick
38409467b48Spatrick // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
38509467b48Spatrick // that their nominal addresses are tagged and outside of the code model. In
38609467b48Spatrick // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
38709467b48Spatrick // tag if necessary based on MO_TAGGED.
38809467b48Spatrick if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
38909467b48Spatrick return AArch64II::MO_NC | AArch64II::MO_TAGGED;
39009467b48Spatrick
39109467b48Spatrick return AArch64II::MO_NO_FLAG;
39209467b48Spatrick }
39309467b48Spatrick
classifyGlobalFunctionReference(const GlobalValue * GV,const TargetMachine & TM) const39409467b48Spatrick unsigned AArch64Subtarget::classifyGlobalFunctionReference(
39509467b48Spatrick const GlobalValue *GV, const TargetMachine &TM) const {
39609467b48Spatrick // MachO large model always goes via a GOT, because we don't have the
39709467b48Spatrick // relocations available to do anything else..
39809467b48Spatrick if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
39909467b48Spatrick !GV->hasInternalLinkage())
40009467b48Spatrick return AArch64II::MO_GOT;
40109467b48Spatrick
40209467b48Spatrick // NonLazyBind goes via GOT unless we know it's available locally.
40309467b48Spatrick auto *F = dyn_cast<Function>(GV);
40409467b48Spatrick if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
40509467b48Spatrick !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
40609467b48Spatrick return AArch64II::MO_GOT;
40709467b48Spatrick
408*d415bd75Srobert if (getTargetTriple().isOSWindows()) {
409*d415bd75Srobert if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
410*d415bd75Srobert GV->hasDLLImportStorageClass()) {
411*d415bd75Srobert // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
412*d415bd75Srobert // not MO_DLLIMPORTAUX.
413*d415bd75Srobert return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
414*d415bd75Srobert }
415*d415bd75Srobert
41609467b48Spatrick // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
41709467b48Spatrick return ClassifyGlobalReference(GV, TM);
418*d415bd75Srobert }
41909467b48Spatrick
42009467b48Spatrick return AArch64II::MO_NO_FLAG;
42109467b48Spatrick }
42209467b48Spatrick
overrideSchedPolicy(MachineSchedPolicy & Policy,unsigned NumRegionInstrs) const42309467b48Spatrick void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
42409467b48Spatrick unsigned NumRegionInstrs) const {
42509467b48Spatrick // LNT run (at least on Cyclone) showed reasonably significant gains for
42609467b48Spatrick // bi-directional scheduling. 253.perlbmk.
42709467b48Spatrick Policy.OnlyTopDown = false;
42809467b48Spatrick Policy.OnlyBottomUp = false;
42909467b48Spatrick // Enabling or Disabling the latency heuristic is a close call: It seems to
43009467b48Spatrick // help nearly no benchmark on out-of-order architectures, on the other hand
43109467b48Spatrick // it regresses register pressure on a few benchmarking.
43209467b48Spatrick Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
43309467b48Spatrick }
43409467b48Spatrick
enableEarlyIfConversion() const43509467b48Spatrick bool AArch64Subtarget::enableEarlyIfConversion() const {
43609467b48Spatrick return EnableEarlyIfConvert;
43709467b48Spatrick }
43809467b48Spatrick
supportsAddressTopByteIgnored() const43909467b48Spatrick bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
44009467b48Spatrick if (!UseAddressTopByteIgnored)
44109467b48Spatrick return false;
44209467b48Spatrick
443*d415bd75Srobert if (TargetTriple.isDriverKit())
444*d415bd75Srobert return true;
44509467b48Spatrick if (TargetTriple.isiOS()) {
446*d415bd75Srobert return TargetTriple.getiOSVersion() >= VersionTuple(8);
44709467b48Spatrick }
44809467b48Spatrick
44909467b48Spatrick return false;
45009467b48Spatrick }
45109467b48Spatrick
45209467b48Spatrick std::unique_ptr<PBQPRAConstraint>
getCustomPBQPConstraints() const45309467b48Spatrick AArch64Subtarget::getCustomPBQPConstraints() const {
45409467b48Spatrick return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
45509467b48Spatrick }
45609467b48Spatrick
mirFileLoaded(MachineFunction & MF) const45709467b48Spatrick void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
45809467b48Spatrick // We usually compute max call frame size after ISel. Do the computation now
45909467b48Spatrick // if the .mir file didn't specify it. Note that this will probably give you
46009467b48Spatrick // bogus values after PEI has eliminated the callframe setup/destroy pseudo
46109467b48Spatrick // instructions, specify explicitly if you need it to be correct.
46209467b48Spatrick MachineFrameInfo &MFI = MF.getFrameInfo();
46309467b48Spatrick if (!MFI.isMaxCallFrameSizeComputed())
46409467b48Spatrick MFI.computeMaxCallFrameSize(MF);
46509467b48Spatrick }
466097a140dSpatrick
useAA() const46773471bf0Spatrick bool AArch64Subtarget::useAA() const { return UseAA; }
468*d415bd75Srobert
forceStreamingCompatibleSVE() const469*d415bd75Srobert bool AArch64Subtarget::forceStreamingCompatibleSVE() const {
470*d415bd75Srobert if (ForceStreamingCompatibleSVE) {
471*d415bd75Srobert assert(hasSVEorSME() && "Expected SVE to be available");
472*d415bd75Srobert return hasSVEorSME();
473*d415bd75Srobert }
474*d415bd75Srobert return false;
475*d415bd75Srobert }
476