10b57cec5SDimitry Andric //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass identifies floating point stores that should not be combined into
100b57cec5SDimitry Andric // store pairs. Later we may do the same for floating point loads.
110b57cec5SDimitry Andric // ===---------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
145f757f3fSDimitry Andric #include "AArch64Subtarget.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineTraceMetrics.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
210b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
220b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric using namespace llvm;
250b57cec5SDimitry Andric
260b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-stp-suppress"
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
290b57cec5SDimitry Andric
300b57cec5SDimitry Andric namespace {
310b57cec5SDimitry Andric class AArch64StorePairSuppress : public MachineFunctionPass {
320b57cec5SDimitry Andric const AArch64InstrInfo *TII;
330b57cec5SDimitry Andric const TargetRegisterInfo *TRI;
340b57cec5SDimitry Andric const MachineRegisterInfo *MRI;
350b57cec5SDimitry Andric TargetSchedModel SchedModel;
360b57cec5SDimitry Andric MachineTraceMetrics *Traces;
370b57cec5SDimitry Andric MachineTraceMetrics::Ensemble *MinInstr;
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric public:
400b57cec5SDimitry Andric static char ID;
AArch64StorePairSuppress()410b57cec5SDimitry Andric AArch64StorePairSuppress() : MachineFunctionPass(ID) {
420b57cec5SDimitry Andric initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
430b57cec5SDimitry Andric }
440b57cec5SDimitry Andric
getPassName() const450b57cec5SDimitry Andric StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; }
460b57cec5SDimitry Andric
470b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &F) override;
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric private:
500b57cec5SDimitry Andric bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric bool isNarrowFPStore(const MachineInstr &MI);
530b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const540b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
550b57cec5SDimitry Andric AU.setPreservesCFG();
560b57cec5SDimitry Andric AU.addRequired<MachineTraceMetrics>();
570b57cec5SDimitry Andric AU.addPreserved<MachineTraceMetrics>();
580b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
590b57cec5SDimitry Andric }
600b57cec5SDimitry Andric };
610b57cec5SDimitry Andric char AArch64StorePairSuppress::ID = 0;
620b57cec5SDimitry Andric } // anonymous
630b57cec5SDimitry Andric
640b57cec5SDimitry Andric INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress",
650b57cec5SDimitry Andric STPSUPPRESS_PASS_NAME, false, false)
660b57cec5SDimitry Andric
createAArch64StorePairSuppressPass()670b57cec5SDimitry Andric FunctionPass *llvm::createAArch64StorePairSuppressPass() {
680b57cec5SDimitry Andric return new AArch64StorePairSuppress();
690b57cec5SDimitry Andric }
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric /// Return true if an STP can be added to this block without increasing the
720b57cec5SDimitry Andric /// critical resource height. STP is good to form in Ld/St limited blocks and
730b57cec5SDimitry Andric /// bad to form in float-point limited blocks. This is true independent of the
740b57cec5SDimitry Andric /// critical path. If the critical path is longer than the resource height, the
750b57cec5SDimitry Andric /// extra vector ops can limit physreg renaming. Otherwise, it could simply
760b57cec5SDimitry Andric /// oversaturate the vector units.
shouldAddSTPToBlock(const MachineBasicBlock * BB)770b57cec5SDimitry Andric bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
780b57cec5SDimitry Andric if (!MinInstr)
7906c3fb27SDimitry Andric MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
820b57cec5SDimitry Andric unsigned ResLength = BBTrace.getResourceLength();
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric // Get the machine model's scheduling class for STPQi.
850b57cec5SDimitry Andric // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
860b57cec5SDimitry Andric unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
870b57cec5SDimitry Andric const MCSchedClassDesc *SCDesc =
880b57cec5SDimitry Andric SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
890b57cec5SDimitry Andric
900b57cec5SDimitry Andric // If a subtarget does not define resources for STPQi, bail here.
910b57cec5SDimitry Andric if (SCDesc->isValid() && !SCDesc->isVariant()) {
92bdd1243dSDimitry Andric unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc);
930b57cec5SDimitry Andric if (ResLenWithSTP > ResLength) {
940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Suppress STP in BB: " << BB->getNumber()
950b57cec5SDimitry Andric << " resources " << ResLength << " -> " << ResLenWithSTP
960b57cec5SDimitry Andric << "\n");
970b57cec5SDimitry Andric return false;
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric }
1000b57cec5SDimitry Andric return true;
1010b57cec5SDimitry Andric }
1020b57cec5SDimitry Andric
1030b57cec5SDimitry Andric /// Return true if this is a floating-point store smaller than the V reg. On
1040b57cec5SDimitry Andric /// cyclone, these require a vector shuffle before storing a pair.
1050b57cec5SDimitry Andric /// Ideally we would call getMatchingPairOpcode() and have the machine model
1060b57cec5SDimitry Andric /// tell us if it's profitable with no cpu knowledge here.
1070b57cec5SDimitry Andric ///
1080b57cec5SDimitry Andric /// FIXME: We plan to develop a decent Target abstraction for simple loads and
1090b57cec5SDimitry Andric /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
isNarrowFPStore(const MachineInstr & MI)1100b57cec5SDimitry Andric bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
1110b57cec5SDimitry Andric switch (MI.getOpcode()) {
1120b57cec5SDimitry Andric default:
1130b57cec5SDimitry Andric return false;
1140b57cec5SDimitry Andric case AArch64::STRSui:
1150b57cec5SDimitry Andric case AArch64::STRDui:
1160b57cec5SDimitry Andric case AArch64::STURSi:
1170b57cec5SDimitry Andric case AArch64::STURDi:
1180b57cec5SDimitry Andric return true;
1190b57cec5SDimitry Andric }
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)1220b57cec5SDimitry Andric bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
123349cc55cSDimitry Andric if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize())
1240b57cec5SDimitry Andric return false;
1250b57cec5SDimitry Andric
1265f757f3fSDimitry Andric const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
1275f757f3fSDimitry Andric if (!ST.enableStorePairSuppress())
1285f757f3fSDimitry Andric return false;
1295f757f3fSDimitry Andric
1300b57cec5SDimitry Andric TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
1310b57cec5SDimitry Andric TRI = ST.getRegisterInfo();
1320b57cec5SDimitry Andric MRI = &MF.getRegInfo();
1330b57cec5SDimitry Andric SchedModel.init(&ST);
1340b57cec5SDimitry Andric Traces = &getAnalysis<MachineTraceMetrics>();
1350b57cec5SDimitry Andric MinInstr = nullptr;
1360b57cec5SDimitry Andric
1370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n');
1380b57cec5SDimitry Andric
1390b57cec5SDimitry Andric if (!SchedModel.hasInstrSchedModel()) {
1400b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Skipping pass: no machine model present.\n");
1410b57cec5SDimitry Andric return false;
1420b57cec5SDimitry Andric }
1430b57cec5SDimitry Andric
1440b57cec5SDimitry Andric // Check for a sequence of stores to the same base address. We don't need to
1450b57cec5SDimitry Andric // precisely determine whether a store pair can be formed. But we do want to
1460b57cec5SDimitry Andric // filter out most situations where we can't form store pairs to avoid
1470b57cec5SDimitry Andric // computing trace metrics in those cases.
1480b57cec5SDimitry Andric for (auto &MBB : MF) {
1490b57cec5SDimitry Andric bool SuppressSTP = false;
1500b57cec5SDimitry Andric unsigned PrevBaseReg = 0;
1510b57cec5SDimitry Andric for (auto &MI : MBB) {
1520b57cec5SDimitry Andric if (!isNarrowFPStore(MI))
1530b57cec5SDimitry Andric continue;
1540b57cec5SDimitry Andric const MachineOperand *BaseOp;
1550b57cec5SDimitry Andric int64_t Offset;
1565ffd83dbSDimitry Andric bool OffsetIsScalable;
1575ffd83dbSDimitry Andric if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable,
1585ffd83dbSDimitry Andric TRI) &&
1590b57cec5SDimitry Andric BaseOp->isReg()) {
1608bcb0991SDimitry Andric Register BaseReg = BaseOp->getReg();
1610b57cec5SDimitry Andric if (PrevBaseReg == BaseReg) {
1620b57cec5SDimitry Andric // If this block can take STPs, skip ahead to the next block.
1630b57cec5SDimitry Andric if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
1640b57cec5SDimitry Andric break;
1650b57cec5SDimitry Andric // Otherwise, continue unpairing the stores in this block.
1660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n");
1670b57cec5SDimitry Andric SuppressSTP = true;
1680b57cec5SDimitry Andric TII->suppressLdStPair(MI);
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric PrevBaseReg = BaseReg;
1710b57cec5SDimitry Andric } else
1720b57cec5SDimitry Andric PrevBaseReg = 0;
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric }
1750b57cec5SDimitry Andric // This pass just sets some internal MachineMemOperand flags. It can't really
1760b57cec5SDimitry Andric // invalidate anything.
1770b57cec5SDimitry Andric return false;
1780b57cec5SDimitry Andric }
179