Target/AArch64/AArch64Subtarget.cpp

09467b48Spatrick//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
09467b48Spatrick//
09467b48Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
09467b48Spatrick// See https://llvm.org/LICENSE.txt for license information.
09467b48Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
09467b48Spatrick//
09467b48Spatrick//===----------------------------------------------------------------------===//
09467b48Spatrick//
09467b48Spatrick// This file implements the AArch64 specific subclass of TargetSubtarget.
09467b48Spatrick//
09467b48Spatrick//===----------------------------------------------------------------------===//
09467b48Spatrick
09467b48Spatrick#include "AArch64Subtarget.h"
09467b48Spatrick
09467b48Spatrick#include "AArch64.h"
09467b48Spatrick#include "AArch64InstrInfo.h"
09467b48Spatrick#include "AArch64PBQPRegAlloc.h"
09467b48Spatrick#include "AArch64TargetMachine.h"
097a140dSpatrick#include "GISel/AArch64CallLowering.h"
097a140dSpatrick#include "GISel/AArch64LegalizerInfo.h"
097a140dSpatrick#include "GISel/AArch64RegisterBankInfo.h"
09467b48Spatrick#include "MCTargetDesc/AArch64AddressingModes.h"
09467b48Spatrick#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
*d415bd75Srobert#include "llvm/CodeGen/MachineFrameInfo.h"
09467b48Spatrick#include "llvm/CodeGen/MachineScheduler.h"
09467b48Spatrick#include "llvm/IR/GlobalValue.h"
*d415bd75Srobert#include "llvm/Support/AArch64TargetParser.h"
09467b48Spatrick#include "llvm/Support/TargetParser.h"
09467b48Spatrick
09467b48Spatrickusing namespace llvm;
09467b48Spatrick
09467b48Spatrick#define DEBUG_TYPE "aarch64-subtarget"
09467b48Spatrick
09467b48Spatrick#define GET_SUBTARGETINFO_CTOR
09467b48Spatrick#define GET_SUBTARGETINFO_TARGET_DESC
09467b48Spatrick#include "AArch64GenSubtargetInfo.inc"
09467b48Spatrick
09467b48Spatrickstatic cl::opt<bool>
09467b48SpatrickEnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
09467b48Spatrick                     "converter pass"), cl::init(true), cl::Hidden);
09467b48Spatrick
09467b48Spatrick// If OS supports TBI, use this flag to enable it.
09467b48Spatrickstatic cl::opt<bool>
09467b48SpatrickUseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
09467b48Spatrick                         "an address is ignored"), cl::init(false), cl::Hidden);
09467b48Spatrick
09467b48Spatrickstatic cl::opt<bool>
09467b48Spatrick    UseNonLazyBind("aarch64-enable-nonlazybind",
09467b48Spatrick                   cl::desc("Call nonlazybind functions via direct GOT load"),
09467b48Spatrick                   cl::init(false), cl::Hidden);
09467b48Spatrick
73471bf0Spatrickstatic cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
73471bf0Spatrick                           cl::desc("Enable the use of AA during codegen."));
097a140dSpatrick
*d415bd75Srobertstatic cl::opt<unsigned> OverrideVectorInsertExtractBaseCost(
*d415bd75Srobert    "aarch64-insert-extract-base-cost",
*d415bd75Srobert    cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
*d415bd75Srobert
*d415bd75Srobert// Reserve a list of X# registers, so they are unavailable for register
*d415bd75Srobert// allocator, but can still be used as ABI requests, such as passing arguments
*d415bd75Srobert// to function call.
*d415bd75Srobertstatic cl::list<std::string>
*d415bd75SrobertReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
*d415bd75Srobert                  "registers, so they can't be used by register allocator. "
*d415bd75Srobert                  "Should only be used for testing register allocator."),
*d415bd75Srobert                  cl::CommaSeparated, cl::Hidden);
*d415bd75Srobert
*d415bd75Srobertstatic cl::opt<bool>
*d415bd75Srobert    ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
*d415bd75Srobert                                cl::init(false), cl::Hidden);
*d415bd75Srobert
*d415bd75Srobertunsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
*d415bd75Srobert  if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
*d415bd75Srobert    return OverrideVectorInsertExtractBaseCost;
*d415bd75Srobert  return VectorInsertExtractBaseCost;
*d415bd75Srobert}
*d415bd75Srobert
*d415bd75SrobertAArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
*d415bd75Srobert    StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
09467b48Spatrick  // Determine default and user-specified characteristics
09467b48Spatrick
09467b48Spatrick  if (CPUString.empty())
09467b48Spatrick    CPUString = "generic";
09467b48Spatrick
*d415bd75Srobert  if (TuneCPUString.empty())
*d415bd75Srobert    TuneCPUString = CPUString;
*d415bd75Srobert
*d415bd75Srobert  ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
09467b48Spatrick  initializeProperties();
09467b48Spatrick
09467b48Spatrick  return *this;
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickvoid AArch64Subtarget::initializeProperties() {
09467b48Spatrick  // Initialize CPU specific properties. We should add a tablegen feature for
09467b48Spatrick  // this in the future so we can specify it together with the subtarget
09467b48Spatrick  // features.
09467b48Spatrick  switch (ARMProcFamily) {
09467b48Spatrick  case Others:
09467b48Spatrick    break;
097a140dSpatrick  case Carmel:
097a140dSpatrick    CacheLineSize = 64;
097a140dSpatrick    break;
09467b48Spatrick  case CortexA35:
09467b48Spatrick  case CortexA53:
09467b48Spatrick  case CortexA55:
73471bf0Spatrick    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    PrefLoopLogAlignment = 4;
*d415bd75Srobert    MaxBytesForLoopAlignment = 8;
09467b48Spatrick    break;
09467b48Spatrick  case CortexA57:
09467b48Spatrick    MaxInterleaveFactor = 4;
09467b48Spatrick    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    PrefLoopLogAlignment = 4;
*d415bd75Srobert    MaxBytesForLoopAlignment = 8;
09467b48Spatrick    break;
09467b48Spatrick  case CortexA65:
09467b48Spatrick    PrefFunctionLogAlignment = 3;
09467b48Spatrick    break;
09467b48Spatrick  case CortexA72:
09467b48Spatrick  case CortexA73:
09467b48Spatrick  case CortexA75:
*d415bd75Srobert    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    PrefLoopLogAlignment = 4;
*d415bd75Srobert    MaxBytesForLoopAlignment = 8;
*d415bd75Srobert    break;
09467b48Spatrick  case CortexA76:
097a140dSpatrick  case CortexA77:
097a140dSpatrick  case CortexA78:
73471bf0Spatrick  case CortexA78C:
73471bf0Spatrick  case CortexR82:
097a140dSpatrick  case CortexX1:
*d415bd75Srobert  case CortexX1C:
09467b48Spatrick    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    PrefLoopLogAlignment = 5;
*d415bd75Srobert    MaxBytesForLoopAlignment = 16;
*d415bd75Srobert    break;
*d415bd75Srobert  case CortexA510:
*d415bd75Srobert    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    VScaleForTuning = 1;
*d415bd75Srobert    PrefLoopLogAlignment = 4;
*d415bd75Srobert    MaxBytesForLoopAlignment = 8;
*d415bd75Srobert    break;
*d415bd75Srobert  case CortexA710:
*d415bd75Srobert  case CortexA715:
*d415bd75Srobert  case CortexX2:
*d415bd75Srobert  case CortexX3:
*d415bd75Srobert    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    VScaleForTuning = 1;
*d415bd75Srobert    PrefLoopLogAlignment = 5;
*d415bd75Srobert    MaxBytesForLoopAlignment = 16;
09467b48Spatrick    break;
097a140dSpatrick  case A64FX:
097a140dSpatrick    CacheLineSize = 256;
73471bf0Spatrick    PrefFunctionLogAlignment = 3;
73471bf0Spatrick    PrefLoopLogAlignment = 2;
73471bf0Spatrick    MaxInterleaveFactor = 4;
73471bf0Spatrick    PrefetchDistance = 128;
73471bf0Spatrick    MinPrefetchStride = 1024;
73471bf0Spatrick    MaxPrefetchIterationsAhead = 4;
*d415bd75Srobert    VScaleForTuning = 4;
097a140dSpatrick    break;
09467b48Spatrick  case AppleA7:
09467b48Spatrick  case AppleA10:
09467b48Spatrick  case AppleA11:
09467b48Spatrick  case AppleA12:
09467b48Spatrick  case AppleA13:
73471bf0Spatrick  case AppleA14:
*d415bd75Srobert  case AppleA15:
*d415bd75Srobert  case AppleA16:
09467b48Spatrick    CacheLineSize = 64;
09467b48Spatrick    PrefetchDistance = 280;
09467b48Spatrick    MinPrefetchStride = 2048;
09467b48Spatrick    MaxPrefetchIterationsAhead = 3;
*d415bd75Srobert    switch (ARMProcFamily) {
*d415bd75Srobert    case AppleA14:
*d415bd75Srobert    case AppleA15:
*d415bd75Srobert    case AppleA16:
*d415bd75Srobert      MaxInterleaveFactor = 4;
*d415bd75Srobert      break;
*d415bd75Srobert    default:
*d415bd75Srobert      break;
*d415bd75Srobert    }
09467b48Spatrick    break;
09467b48Spatrick  case ExynosM3:
09467b48Spatrick    MaxInterleaveFactor = 4;
09467b48Spatrick    MaxJumpTableSize = 20;
09467b48Spatrick    PrefFunctionLogAlignment = 5;
09467b48Spatrick    PrefLoopLogAlignment = 4;
09467b48Spatrick    break;
09467b48Spatrick  case Falkor:
09467b48Spatrick    MaxInterleaveFactor = 4;
09467b48Spatrick    // FIXME: remove this to enable 64-bit SLP if performance looks good.
09467b48Spatrick    MinVectorRegisterBitWidth = 128;
09467b48Spatrick    CacheLineSize = 128;
09467b48Spatrick    PrefetchDistance = 820;
09467b48Spatrick    MinPrefetchStride = 2048;
09467b48Spatrick    MaxPrefetchIterationsAhead = 8;
09467b48Spatrick    break;
09467b48Spatrick  case Kryo:
09467b48Spatrick    MaxInterleaveFactor = 4;
09467b48Spatrick    VectorInsertExtractBaseCost = 2;
09467b48Spatrick    CacheLineSize = 128;
09467b48Spatrick    PrefetchDistance = 740;
09467b48Spatrick    MinPrefetchStride = 1024;
09467b48Spatrick    MaxPrefetchIterationsAhead = 11;
09467b48Spatrick    // FIXME: remove this to enable 64-bit SLP if performance looks good.
09467b48Spatrick    MinVectorRegisterBitWidth = 128;
09467b48Spatrick    break;
09467b48Spatrick  case NeoverseE1:
09467b48Spatrick    PrefFunctionLogAlignment = 3;
09467b48Spatrick    break;
09467b48Spatrick  case NeoverseN1:
*d415bd75Srobert    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    PrefLoopLogAlignment = 5;
*d415bd75Srobert    MaxBytesForLoopAlignment = 16;
*d415bd75Srobert    break;
73471bf0Spatrick  case NeoverseN2:
*d415bd75Srobert  case NeoverseV2:
*d415bd75Srobert    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    PrefLoopLogAlignment = 5;
*d415bd75Srobert    MaxBytesForLoopAlignment = 16;
*d415bd75Srobert    VScaleForTuning = 1;
*d415bd75Srobert    break;
73471bf0Spatrick  case NeoverseV1:
09467b48Spatrick    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    PrefLoopLogAlignment = 5;
*d415bd75Srobert    MaxBytesForLoopAlignment = 16;
*d415bd75Srobert    VScaleForTuning = 2;
*d415bd75Srobert    break;
*d415bd75Srobert  case Neoverse512TVB:
*d415bd75Srobert    PrefFunctionLogAlignment = 4;
*d415bd75Srobert    VScaleForTuning = 1;
*d415bd75Srobert    MaxInterleaveFactor = 4;
09467b48Spatrick    break;
09467b48Spatrick  case Saphira:
09467b48Spatrick    MaxInterleaveFactor = 4;
09467b48Spatrick    // FIXME: remove this to enable 64-bit SLP if performance looks good.
09467b48Spatrick    MinVectorRegisterBitWidth = 128;
09467b48Spatrick    break;
09467b48Spatrick  case ThunderX2T99:
09467b48Spatrick    CacheLineSize = 64;
09467b48Spatrick    PrefFunctionLogAlignment = 3;
09467b48Spatrick    PrefLoopLogAlignment = 2;
09467b48Spatrick    MaxInterleaveFactor = 4;
09467b48Spatrick    PrefetchDistance = 128;
09467b48Spatrick    MinPrefetchStride = 1024;
09467b48Spatrick    MaxPrefetchIterationsAhead = 4;
09467b48Spatrick    // FIXME: remove this to enable 64-bit SLP if performance looks good.
09467b48Spatrick    MinVectorRegisterBitWidth = 128;
09467b48Spatrick    break;
09467b48Spatrick  case ThunderX:
09467b48Spatrick  case ThunderXT88:
09467b48Spatrick  case ThunderXT81:
09467b48Spatrick  case ThunderXT83:
09467b48Spatrick    CacheLineSize = 128;
09467b48Spatrick    PrefFunctionLogAlignment = 3;
09467b48Spatrick    PrefLoopLogAlignment = 2;
09467b48Spatrick    // FIXME: remove this to enable 64-bit SLP if performance looks good.
09467b48Spatrick    MinVectorRegisterBitWidth = 128;
09467b48Spatrick    break;
09467b48Spatrick  case TSV110:
09467b48Spatrick    CacheLineSize = 64;
09467b48Spatrick    PrefFunctionLogAlignment = 4;
09467b48Spatrick    PrefLoopLogAlignment = 2;
09467b48Spatrick    break;
7299aa8dSpatrick  case ThunderX3T110:
7299aa8dSpatrick    CacheLineSize = 64;
7299aa8dSpatrick    PrefFunctionLogAlignment = 4;
7299aa8dSpatrick    PrefLoopLogAlignment = 2;
7299aa8dSpatrick    MaxInterleaveFactor = 4;
7299aa8dSpatrick    PrefetchDistance = 128;
7299aa8dSpatrick    MinPrefetchStride = 1024;
7299aa8dSpatrick    MaxPrefetchIterationsAhead = 4;
7299aa8dSpatrick    // FIXME: remove this to enable 64-bit SLP if performance looks good.
7299aa8dSpatrick    MinVectorRegisterBitWidth = 128;
7299aa8dSpatrick    break;
*d415bd75Srobert  case Ampere1:
*d415bd75Srobert  case Ampere1A:
*d415bd75Srobert    CacheLineSize = 64;
*d415bd75Srobert    PrefFunctionLogAlignment = 6;
*d415bd75Srobert    PrefLoopLogAlignment = 6;
*d415bd75Srobert    MaxInterleaveFactor = 4;
*d415bd75Srobert    break;
09467b48Spatrick  }
09467b48Spatrick}
09467b48Spatrick
*d415bd75SrobertAArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
*d415bd75Srobert                                   StringRef TuneCPU, StringRef FS,
73471bf0Spatrick                                   const TargetMachine &TM, bool LittleEndian,
73471bf0Spatrick                                   unsigned MinSVEVectorSizeInBitsOverride,
*d415bd75Srobert                                   unsigned MaxSVEVectorSizeInBitsOverride,
*d415bd75Srobert                                   bool StreamingSVEModeDisabled)
*d415bd75Srobert    : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
09467b48Spatrick      ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
*d415bd75Srobert      ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
09467b48Spatrick      CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
09467b48Spatrick      IsLittle(LittleEndian),
*d415bd75Srobert      StreamingSVEModeDisabled(StreamingSVEModeDisabled),
73471bf0Spatrick      MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
73471bf0Spatrick      MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
*d415bd75Srobert      InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
*d415bd75Srobert      TLInfo(TM, *this) {
09467b48Spatrick  if (AArch64::isX18ReservedByDefault(TT))
09467b48Spatrick    ReserveXRegister.set(18);
09467b48Spatrick
09467b48Spatrick  CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
097a140dSpatrick  InlineAsmLoweringInfo.reset(new InlineAsmLowering(getTargetLowering()));
09467b48Spatrick  Legalizer.reset(new AArch64LegalizerInfo(*this));
09467b48Spatrick
09467b48Spatrick  auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
09467b48Spatrick
09467b48Spatrick  // FIXME: At this point, we can't rely on Subtarget having RBI.
09467b48Spatrick  // It's awkward to mix passing RBI and the Subtarget; should we pass
09467b48Spatrick  // TII/TRI as well?
09467b48Spatrick  InstSelector.reset(createAArch64InstructionSelector(
09467b48Spatrick      *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
09467b48Spatrick
09467b48Spatrick  RegBankInfo.reset(RBI);
*d415bd75Srobert
*d415bd75Srobert  auto TRI = getRegisterInfo();
*d415bd75Srobert  StringSet<> ReservedRegNames;
*d415bd75Srobert  ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
*d415bd75Srobert  for (unsigned i = 0; i < 29; ++i) {
*d415bd75Srobert    if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
*d415bd75Srobert      ReserveXRegisterForRA.set(i);
*d415bd75Srobert  }
*d415bd75Srobert  // X30 is named LR, so we can't use TRI->getName to check X30.
*d415bd75Srobert  if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
*d415bd75Srobert    ReserveXRegisterForRA.set(30);
*d415bd75Srobert  // X29 is named FP, so we can't use TRI->getName to check X29.
*d415bd75Srobert  if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
*d415bd75Srobert    ReserveXRegisterForRA.set(29);
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickconst CallLowering *AArch64Subtarget::getCallLowering() const {
09467b48Spatrick  return CallLoweringInfo.get();
09467b48Spatrick}
09467b48Spatrick
097a140dSpatrickconst InlineAsmLowering *AArch64Subtarget::getInlineAsmLowering() const {
097a140dSpatrick  return InlineAsmLoweringInfo.get();
097a140dSpatrick}
097a140dSpatrick
09467b48SpatrickInstructionSelector *AArch64Subtarget::getInstructionSelector() const {
09467b48Spatrick  return InstSelector.get();
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickconst LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
09467b48Spatrick  return Legalizer.get();
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickconst RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
09467b48Spatrick  return RegBankInfo.get();
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrick/// Find the target operand flags that describe how a global value should be
09467b48Spatrick/// referenced for the current subtarget.
09467b48Spatrickunsigned
09467b48SpatrickAArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
09467b48Spatrick                                          const TargetMachine &TM) const {
09467b48Spatrick  // MachO large model always goes via a GOT, simply to get a single 8-byte
09467b48Spatrick  // absolute relocation on all global addresses.
09467b48Spatrick  if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
09467b48Spatrick    return AArch64II::MO_GOT;
09467b48Spatrick
09467b48Spatrick  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
*d415bd75Srobert    if (GV->hasDLLImportStorageClass()) {
*d415bd75Srobert      if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
*d415bd75Srobert        return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORTAUX;
09467b48Spatrick      return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
*d415bd75Srobert    }
09467b48Spatrick    if (getTargetTriple().isOSWindows())
09467b48Spatrick      return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
09467b48Spatrick    return AArch64II::MO_GOT;
09467b48Spatrick  }
09467b48Spatrick
09467b48Spatrick  // The small code model's direct accesses use ADRP, which cannot
09467b48Spatrick  // necessarily produce the value 0 (if the code is above 4GB).
09467b48Spatrick  // Same for the tiny code model, where we have a pc relative LDR.
09467b48Spatrick  if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
09467b48Spatrick      GV->hasExternalWeakLinkage())
09467b48Spatrick    return AArch64II::MO_GOT;
09467b48Spatrick
09467b48Spatrick  // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
09467b48Spatrick  // that their nominal addresses are tagged and outside of the code model. In
09467b48Spatrick  // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
09467b48Spatrick  // tag if necessary based on MO_TAGGED.
09467b48Spatrick  if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
09467b48Spatrick    return AArch64II::MO_NC | AArch64II::MO_TAGGED;
09467b48Spatrick
09467b48Spatrick  return AArch64II::MO_NO_FLAG;
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickunsigned AArch64Subtarget::classifyGlobalFunctionReference(
09467b48Spatrick    const GlobalValue *GV, const TargetMachine &TM) const {
09467b48Spatrick  // MachO large model always goes via a GOT, because we don't have the
09467b48Spatrick  // relocations available to do anything else..
09467b48Spatrick  if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
09467b48Spatrick      !GV->hasInternalLinkage())
09467b48Spatrick    return AArch64II::MO_GOT;
09467b48Spatrick
09467b48Spatrick  // NonLazyBind goes via GOT unless we know it's available locally.
09467b48Spatrick  auto *F = dyn_cast<Function>(GV);
09467b48Spatrick  if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
09467b48Spatrick      !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
09467b48Spatrick    return AArch64II::MO_GOT;
09467b48Spatrick
*d415bd75Srobert  if (getTargetTriple().isOSWindows()) {
*d415bd75Srobert    if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
*d415bd75Srobert        GV->hasDLLImportStorageClass()) {
*d415bd75Srobert      // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
*d415bd75Srobert      // not MO_DLLIMPORTAUX.
*d415bd75Srobert      return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
*d415bd75Srobert    }
*d415bd75Srobert
09467b48Spatrick    // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
09467b48Spatrick    return ClassifyGlobalReference(GV, TM);
*d415bd75Srobert  }
09467b48Spatrick
09467b48Spatrick  return AArch64II::MO_NO_FLAG;
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickvoid AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
09467b48Spatrick                                           unsigned NumRegionInstrs) const {
09467b48Spatrick  // LNT run (at least on Cyclone) showed reasonably significant gains for
09467b48Spatrick  // bi-directional scheduling. 253.perlbmk.
09467b48Spatrick  Policy.OnlyTopDown = false;
09467b48Spatrick  Policy.OnlyBottomUp = false;
09467b48Spatrick  // Enabling or Disabling the latency heuristic is a close call: It seems to
09467b48Spatrick  // help nearly no benchmark on out-of-order architectures, on the other hand
09467b48Spatrick  // it regresses register pressure on a few benchmarking.
09467b48Spatrick  Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickbool AArch64Subtarget::enableEarlyIfConversion() const {
09467b48Spatrick  return EnableEarlyIfConvert;
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickbool AArch64Subtarget::supportsAddressTopByteIgnored() const {
09467b48Spatrick  if (!UseAddressTopByteIgnored)
09467b48Spatrick    return false;
09467b48Spatrick
*d415bd75Srobert  if (TargetTriple.isDriverKit())
*d415bd75Srobert    return true;
09467b48Spatrick  if (TargetTriple.isiOS()) {
*d415bd75Srobert    return TargetTriple.getiOSVersion() >= VersionTuple(8);
09467b48Spatrick  }
09467b48Spatrick
09467b48Spatrick  return false;
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickstd::unique_ptr<PBQPRAConstraint>
09467b48SpatrickAArch64Subtarget::getCustomPBQPConstraints() const {
09467b48Spatrick  return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
09467b48Spatrick}
09467b48Spatrick
09467b48Spatrickvoid AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
09467b48Spatrick  // We usually compute max call frame size after ISel. Do the computation now
09467b48Spatrick  // if the .mir file didn't specify it. Note that this will probably give you
09467b48Spatrick  // bogus values after PEI has eliminated the callframe setup/destroy pseudo
09467b48Spatrick  // instructions, specify explicitly if you need it to be correct.
09467b48Spatrick  MachineFrameInfo &MFI = MF.getFrameInfo();
09467b48Spatrick  if (!MFI.isMaxCallFrameSizeComputed())
09467b48Spatrick    MFI.computeMaxCallFrameSize(MF);
09467b48Spatrick}
097a140dSpatrick
73471bf0Spatrickbool AArch64Subtarget::useAA() const { return UseAA; }
*d415bd75Srobert
*d415bd75Srobertbool AArch64Subtarget::forceStreamingCompatibleSVE() const {
*d415bd75Srobert  if (ForceStreamingCompatibleSVE) {
*d415bd75Srobert    assert(hasSVEorSME() && "Expected SVE to be available");
*d415bd75Srobert    return hasSVEorSME();
*d415bd75Srobert  }
*d415bd75Srobert  return false;
*d415bd75Srobert}