10b57cec5SDimitry Andric //===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass identifies floating point stores that should not be combined into
100b57cec5SDimitry Andric // store pairs. Later we may do the same for floating point loads.
110b57cec5SDimitry Andric // ===---------------------------------------------------------------------===//
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric #include "AArch64InstrInfo.h"
145f757f3fSDimitry Andric #include "AArch64Subtarget.h"
150b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
170b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
180b57cec5SDimitry Andric #include "llvm/CodeGen/MachineTraceMetrics.h"
190b57cec5SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h"
210b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
220b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
230b57cec5SDimitry Andric 
240b57cec5SDimitry Andric using namespace llvm;
250b57cec5SDimitry Andric 
260b57cec5SDimitry Andric #define DEBUG_TYPE "aarch64-stp-suppress"
270b57cec5SDimitry Andric 
280b57cec5SDimitry Andric #define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric namespace {
310b57cec5SDimitry Andric class AArch64StorePairSuppress : public MachineFunctionPass {
320b57cec5SDimitry Andric   const AArch64InstrInfo *TII;
330b57cec5SDimitry Andric   const TargetRegisterInfo *TRI;
340b57cec5SDimitry Andric   const MachineRegisterInfo *MRI;
350b57cec5SDimitry Andric   TargetSchedModel SchedModel;
360b57cec5SDimitry Andric   MachineTraceMetrics *Traces;
370b57cec5SDimitry Andric   MachineTraceMetrics::Ensemble *MinInstr;
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric public:
400b57cec5SDimitry Andric   static char ID;
AArch64StorePairSuppress()410b57cec5SDimitry Andric   AArch64StorePairSuppress() : MachineFunctionPass(ID) {
420b57cec5SDimitry Andric     initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
430b57cec5SDimitry Andric   }
440b57cec5SDimitry Andric 
getPassName() const450b57cec5SDimitry Andric   StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; }
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &F) override;
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric private:
500b57cec5SDimitry Andric   bool shouldAddSTPToBlock(const MachineBasicBlock *BB);
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric   bool isNarrowFPStore(const MachineInstr &MI);
530b57cec5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const540b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
550b57cec5SDimitry Andric     AU.setPreservesCFG();
560b57cec5SDimitry Andric     AU.addRequired<MachineTraceMetrics>();
570b57cec5SDimitry Andric     AU.addPreserved<MachineTraceMetrics>();
580b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
590b57cec5SDimitry Andric   }
600b57cec5SDimitry Andric };
610b57cec5SDimitry Andric char AArch64StorePairSuppress::ID = 0;
620b57cec5SDimitry Andric } // anonymous
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress",
650b57cec5SDimitry Andric                 STPSUPPRESS_PASS_NAME, false, false)
660b57cec5SDimitry Andric 
createAArch64StorePairSuppressPass()670b57cec5SDimitry Andric FunctionPass *llvm::createAArch64StorePairSuppressPass() {
680b57cec5SDimitry Andric   return new AArch64StorePairSuppress();
690b57cec5SDimitry Andric }
700b57cec5SDimitry Andric 
710b57cec5SDimitry Andric /// Return true if an STP can be added to this block without increasing the
720b57cec5SDimitry Andric /// critical resource height. STP is good to form in Ld/St limited blocks and
730b57cec5SDimitry Andric /// bad to form in float-point limited blocks. This is true independent of the
740b57cec5SDimitry Andric /// critical path. If the critical path is longer than the resource height, the
750b57cec5SDimitry Andric /// extra vector ops can limit physreg renaming. Otherwise, it could simply
760b57cec5SDimitry Andric /// oversaturate the vector units.
shouldAddSTPToBlock(const MachineBasicBlock * BB)770b57cec5SDimitry Andric bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
780b57cec5SDimitry Andric   if (!MinInstr)
7906c3fb27SDimitry Andric     MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric   MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
820b57cec5SDimitry Andric   unsigned ResLength = BBTrace.getResourceLength();
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric   // Get the machine model's scheduling class for STPQi.
850b57cec5SDimitry Andric   // Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
860b57cec5SDimitry Andric   unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
870b57cec5SDimitry Andric   const MCSchedClassDesc *SCDesc =
880b57cec5SDimitry Andric       SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
890b57cec5SDimitry Andric 
900b57cec5SDimitry Andric   // If a subtarget does not define resources for STPQi, bail here.
910b57cec5SDimitry Andric   if (SCDesc->isValid() && !SCDesc->isVariant()) {
92bdd1243dSDimitry Andric     unsigned ResLenWithSTP = BBTrace.getResourceLength(std::nullopt, SCDesc);
930b57cec5SDimitry Andric     if (ResLenWithSTP > ResLength) {
940b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Suppress STP in BB: " << BB->getNumber()
950b57cec5SDimitry Andric                         << " resources " << ResLength << " -> " << ResLenWithSTP
960b57cec5SDimitry Andric                         << "\n");
970b57cec5SDimitry Andric       return false;
980b57cec5SDimitry Andric     }
990b57cec5SDimitry Andric   }
1000b57cec5SDimitry Andric   return true;
1010b57cec5SDimitry Andric }
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric /// Return true if this is a floating-point store smaller than the V reg. On
1040b57cec5SDimitry Andric /// cyclone, these require a vector shuffle before storing a pair.
1050b57cec5SDimitry Andric /// Ideally we would call getMatchingPairOpcode() and have the machine model
1060b57cec5SDimitry Andric /// tell us if it's profitable with no cpu knowledge here.
1070b57cec5SDimitry Andric ///
1080b57cec5SDimitry Andric /// FIXME: We plan to develop a decent Target abstraction for simple loads and
1090b57cec5SDimitry Andric /// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
isNarrowFPStore(const MachineInstr & MI)1100b57cec5SDimitry Andric bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
1110b57cec5SDimitry Andric   switch (MI.getOpcode()) {
1120b57cec5SDimitry Andric   default:
1130b57cec5SDimitry Andric     return false;
1140b57cec5SDimitry Andric   case AArch64::STRSui:
1150b57cec5SDimitry Andric   case AArch64::STRDui:
1160b57cec5SDimitry Andric   case AArch64::STURSi:
1170b57cec5SDimitry Andric   case AArch64::STURDi:
1180b57cec5SDimitry Andric     return true;
1190b57cec5SDimitry Andric   }
1200b57cec5SDimitry Andric }
1210b57cec5SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)1220b57cec5SDimitry Andric bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
123349cc55cSDimitry Andric   if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize())
1240b57cec5SDimitry Andric     return false;
1250b57cec5SDimitry Andric 
1265f757f3fSDimitry Andric   const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
1275f757f3fSDimitry Andric   if (!ST.enableStorePairSuppress())
1285f757f3fSDimitry Andric     return false;
1295f757f3fSDimitry Andric 
1300b57cec5SDimitry Andric   TII = static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
1310b57cec5SDimitry Andric   TRI = ST.getRegisterInfo();
1320b57cec5SDimitry Andric   MRI = &MF.getRegInfo();
1330b57cec5SDimitry Andric   SchedModel.init(&ST);
1340b57cec5SDimitry Andric   Traces = &getAnalysis<MachineTraceMetrics>();
1350b57cec5SDimitry Andric   MinInstr = nullptr;
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << MF.getName() << '\n');
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric   if (!SchedModel.hasInstrSchedModel()) {
1400b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Skipping pass: no machine model present.\n");
1410b57cec5SDimitry Andric     return false;
1420b57cec5SDimitry Andric   }
1430b57cec5SDimitry Andric 
1440b57cec5SDimitry Andric   // Check for a sequence of stores to the same base address. We don't need to
1450b57cec5SDimitry Andric   // precisely determine whether a store pair can be formed. But we do want to
1460b57cec5SDimitry Andric   // filter out most situations where we can't form store pairs to avoid
1470b57cec5SDimitry Andric   // computing trace metrics in those cases.
1480b57cec5SDimitry Andric   for (auto &MBB : MF) {
1490b57cec5SDimitry Andric     bool SuppressSTP = false;
1500b57cec5SDimitry Andric     unsigned PrevBaseReg = 0;
1510b57cec5SDimitry Andric     for (auto &MI : MBB) {
1520b57cec5SDimitry Andric       if (!isNarrowFPStore(MI))
1530b57cec5SDimitry Andric         continue;
1540b57cec5SDimitry Andric       const MachineOperand *BaseOp;
1550b57cec5SDimitry Andric       int64_t Offset;
1565ffd83dbSDimitry Andric       bool OffsetIsScalable;
1575ffd83dbSDimitry Andric       if (TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable,
1585ffd83dbSDimitry Andric                                        TRI) &&
1590b57cec5SDimitry Andric           BaseOp->isReg()) {
1608bcb0991SDimitry Andric         Register BaseReg = BaseOp->getReg();
1610b57cec5SDimitry Andric         if (PrevBaseReg == BaseReg) {
1620b57cec5SDimitry Andric           // If this block can take STPs, skip ahead to the next block.
1630b57cec5SDimitry Andric           if (!SuppressSTP && shouldAddSTPToBlock(MI.getParent()))
1640b57cec5SDimitry Andric             break;
1650b57cec5SDimitry Andric           // Otherwise, continue unpairing the stores in this block.
1660b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "Unpairing store " << MI << "\n");
1670b57cec5SDimitry Andric           SuppressSTP = true;
1680b57cec5SDimitry Andric           TII->suppressLdStPair(MI);
1690b57cec5SDimitry Andric         }
1700b57cec5SDimitry Andric         PrevBaseReg = BaseReg;
1710b57cec5SDimitry Andric       } else
1720b57cec5SDimitry Andric         PrevBaseReg = 0;
1730b57cec5SDimitry Andric     }
1740b57cec5SDimitry Andric   }
1750b57cec5SDimitry Andric   // This pass just sets some internal MachineMemOperand flags. It can't really
1760b57cec5SDimitry Andric   // invalidate anything.
1770b57cec5SDimitry Andric   return false;
1780b57cec5SDimitry Andric }
179