1e8d8bef9SDimitry Andric //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // This pass does post-instruction-selection optimizations in the GlobalISel
10e8d8bef9SDimitry Andric // pipeline, before the rest of codegen runs.
11e8d8bef9SDimitry Andric //
12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
13e8d8bef9SDimitry Andric 
14e8d8bef9SDimitry Andric #include "AArch64.h"
15e8d8bef9SDimitry Andric #include "AArch64TargetMachine.h"
16e8d8bef9SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h"
17bdd1243dSDimitry Andric #include "llvm/ADT/STLExtras.h"
18fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
19e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
20e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
21bdd1243dSDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
22e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
23e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
24e8d8bef9SDimitry Andric #include "llvm/Support/Debug.h"
25bdd1243dSDimitry Andric #include "llvm/Support/ErrorHandling.h"
26e8d8bef9SDimitry Andric 
27e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-post-select-optimize"
28e8d8bef9SDimitry Andric 
29e8d8bef9SDimitry Andric using namespace llvm;
30e8d8bef9SDimitry Andric 
31e8d8bef9SDimitry Andric namespace {
32e8d8bef9SDimitry Andric class AArch64PostSelectOptimize : public MachineFunctionPass {
33e8d8bef9SDimitry Andric public:
34e8d8bef9SDimitry Andric   static char ID;
35e8d8bef9SDimitry Andric 
36e8d8bef9SDimitry Andric   AArch64PostSelectOptimize();
37e8d8bef9SDimitry Andric 
getPassName() const38e8d8bef9SDimitry Andric   StringRef getPassName() const override {
39e8d8bef9SDimitry Andric     return "AArch64 Post Select Optimizer";
40e8d8bef9SDimitry Andric   }
41e8d8bef9SDimitry Andric 
42e8d8bef9SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
43e8d8bef9SDimitry Andric 
44e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
45e8d8bef9SDimitry Andric 
46e8d8bef9SDimitry Andric private:
47e8d8bef9SDimitry Andric   bool optimizeNZCVDefs(MachineBasicBlock &MBB);
48bdd1243dSDimitry Andric   bool doPeepholeOpts(MachineBasicBlock &MBB);
49bdd1243dSDimitry Andric   /// Look for cross regclass copies that can be trivially eliminated.
50bdd1243dSDimitry Andric   bool foldSimpleCrossClassCopies(MachineInstr &MI);
51e8d8bef9SDimitry Andric };
52e8d8bef9SDimitry Andric } // end anonymous namespace
53e8d8bef9SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const54e8d8bef9SDimitry Andric void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
55e8d8bef9SDimitry Andric   AU.addRequired<TargetPassConfig>();
56e8d8bef9SDimitry Andric   AU.setPreservesCFG();
57e8d8bef9SDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
58e8d8bef9SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
59e8d8bef9SDimitry Andric }
60e8d8bef9SDimitry Andric 
AArch64PostSelectOptimize()61e8d8bef9SDimitry Andric AArch64PostSelectOptimize::AArch64PostSelectOptimize()
62e8d8bef9SDimitry Andric     : MachineFunctionPass(ID) {
63e8d8bef9SDimitry Andric   initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
64e8d8bef9SDimitry Andric }
65e8d8bef9SDimitry Andric 
getNonFlagSettingVariant(unsigned Opc)66e8d8bef9SDimitry Andric unsigned getNonFlagSettingVariant(unsigned Opc) {
67e8d8bef9SDimitry Andric   switch (Opc) {
68e8d8bef9SDimitry Andric   default:
69e8d8bef9SDimitry Andric     return 0;
70e8d8bef9SDimitry Andric   case AArch64::SUBSXrr:
71e8d8bef9SDimitry Andric     return AArch64::SUBXrr;
72e8d8bef9SDimitry Andric   case AArch64::SUBSWrr:
73e8d8bef9SDimitry Andric     return AArch64::SUBWrr;
74e8d8bef9SDimitry Andric   case AArch64::SUBSXrs:
75e8d8bef9SDimitry Andric     return AArch64::SUBXrs;
76*06c3fb27SDimitry Andric   case AArch64::SUBSWrs:
77*06c3fb27SDimitry Andric     return AArch64::SUBWrs;
78e8d8bef9SDimitry Andric   case AArch64::SUBSXri:
79e8d8bef9SDimitry Andric     return AArch64::SUBXri;
80e8d8bef9SDimitry Andric   case AArch64::SUBSWri:
81e8d8bef9SDimitry Andric     return AArch64::SUBWri;
82*06c3fb27SDimitry Andric   case AArch64::ADDSXrr:
83*06c3fb27SDimitry Andric     return AArch64::ADDXrr;
84*06c3fb27SDimitry Andric   case AArch64::ADDSWrr:
85*06c3fb27SDimitry Andric     return AArch64::ADDWrr;
86*06c3fb27SDimitry Andric   case AArch64::ADDSXrs:
87*06c3fb27SDimitry Andric     return AArch64::ADDXrs;
88*06c3fb27SDimitry Andric   case AArch64::ADDSWrs:
89*06c3fb27SDimitry Andric     return AArch64::ADDWrs;
90*06c3fb27SDimitry Andric   case AArch64::ADDSXri:
91*06c3fb27SDimitry Andric     return AArch64::ADDXri;
92*06c3fb27SDimitry Andric   case AArch64::ADDSWri:
93*06c3fb27SDimitry Andric     return AArch64::ADDWri;
94*06c3fb27SDimitry Andric   case AArch64::SBCSXr:
95*06c3fb27SDimitry Andric     return AArch64::SBCXr;
96*06c3fb27SDimitry Andric   case AArch64::SBCSWr:
97*06c3fb27SDimitry Andric     return AArch64::SBCWr;
98*06c3fb27SDimitry Andric   case AArch64::ADCSXr:
99*06c3fb27SDimitry Andric     return AArch64::ADCXr;
100*06c3fb27SDimitry Andric   case AArch64::ADCSWr:
101*06c3fb27SDimitry Andric     return AArch64::ADCWr;
102e8d8bef9SDimitry Andric   }
103e8d8bef9SDimitry Andric }
104e8d8bef9SDimitry Andric 
doPeepholeOpts(MachineBasicBlock & MBB)105bdd1243dSDimitry Andric bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) {
106bdd1243dSDimitry Andric   bool Changed = false;
107bdd1243dSDimitry Andric   for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) {
108bdd1243dSDimitry Andric     Changed |= foldSimpleCrossClassCopies(MI);
109bdd1243dSDimitry Andric   }
110bdd1243dSDimitry Andric   return Changed;
111bdd1243dSDimitry Andric }
112bdd1243dSDimitry Andric 
foldSimpleCrossClassCopies(MachineInstr & MI)113bdd1243dSDimitry Andric bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
114bdd1243dSDimitry Andric   auto *MF = MI.getMF();
115bdd1243dSDimitry Andric   auto &MRI = MF->getRegInfo();
116bdd1243dSDimitry Andric 
117bdd1243dSDimitry Andric   if (!MI.isCopy())
118bdd1243dSDimitry Andric     return false;
119bdd1243dSDimitry Andric 
120bdd1243dSDimitry Andric   if (MI.getOperand(1).getSubReg())
121bdd1243dSDimitry Andric     return false; // Don't deal with subreg copies
122bdd1243dSDimitry Andric 
123bdd1243dSDimitry Andric   Register Src = MI.getOperand(1).getReg();
124bdd1243dSDimitry Andric   Register Dst = MI.getOperand(0).getReg();
125bdd1243dSDimitry Andric 
126bdd1243dSDimitry Andric   if (Src.isPhysical() || Dst.isPhysical())
127bdd1243dSDimitry Andric     return false;
128bdd1243dSDimitry Andric 
129bdd1243dSDimitry Andric   const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
130bdd1243dSDimitry Andric   const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
131bdd1243dSDimitry Andric 
132bdd1243dSDimitry Andric   if (SrcRC == DstRC)
133bdd1243dSDimitry Andric     return false;
134bdd1243dSDimitry Andric 
135bdd1243dSDimitry Andric 
136bdd1243dSDimitry Andric   if (SrcRC->hasSubClass(DstRC)) {
137bdd1243dSDimitry Andric     // This is the case where the source class is a superclass of the dest, so
138bdd1243dSDimitry Andric     // if the copy is the only user of the source, we can just constrain the
139bdd1243dSDimitry Andric     // source reg to the dest class.
140bdd1243dSDimitry Andric 
141bdd1243dSDimitry Andric     if (!MRI.hasOneNonDBGUse(Src))
142bdd1243dSDimitry Andric       return false; // Only constrain single uses of the source.
143bdd1243dSDimitry Andric 
144bdd1243dSDimitry Andric     // Constrain to dst reg class as long as it's not a weird class that only
145bdd1243dSDimitry Andric     // has a few registers.
146bdd1243dSDimitry Andric     if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25))
147bdd1243dSDimitry Andric       return false;
148bdd1243dSDimitry Andric   } else if (DstRC->hasSubClass(SrcRC)) {
149bdd1243dSDimitry Andric     // This is the inverse case, where the destination class is a superclass of
150bdd1243dSDimitry Andric     // the source. Here, if the copy is the only user, we can just constrain
151bdd1243dSDimitry Andric     // the user of the copy to use the smaller class of the source.
152bdd1243dSDimitry Andric   } else {
153bdd1243dSDimitry Andric     return false;
154bdd1243dSDimitry Andric   }
155bdd1243dSDimitry Andric 
156bdd1243dSDimitry Andric   MRI.replaceRegWith(Dst, Src);
157bdd1243dSDimitry Andric   MI.eraseFromParent();
158bdd1243dSDimitry Andric   return true;
159bdd1243dSDimitry Andric }
160bdd1243dSDimitry Andric 
optimizeNZCVDefs(MachineBasicBlock & MBB)161e8d8bef9SDimitry Andric bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
162*06c3fb27SDimitry Andric   // If we find a dead NZCV implicit-def, we
163*06c3fb27SDimitry Andric   // - try to convert the operation to a non-flag-setting equivalent
164*06c3fb27SDimitry Andric   // - or mark the def as dead to aid later peephole optimizations.
165*06c3fb27SDimitry Andric 
166*06c3fb27SDimitry Andric   // Use cases:
167*06c3fb27SDimitry Andric   // 1)
168e8d8bef9SDimitry Andric   // Consider the following code:
169e8d8bef9SDimitry Andric   //  FCMPSrr %0, %1, implicit-def $nzcv
170e8d8bef9SDimitry Andric   //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
171e8d8bef9SDimitry Andric   //  %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
172e8d8bef9SDimitry Andric   //  FCMPSrr %0, %1, implicit-def $nzcv
173e8d8bef9SDimitry Andric   //  %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
174e8d8bef9SDimitry Andric   // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
175e8d8bef9SDimitry Andric   // when we have a single IR fcmp being used by two selects. During selection,
176e8d8bef9SDimitry Andric   // to ensure that there can be no clobbering of nzcv between the fcmp and the
177e8d8bef9SDimitry Andric   // csel, we have to generate an fcmp immediately before each csel is
178e8d8bef9SDimitry Andric   // selected.
179e8d8bef9SDimitry Andric   // However, often we can essentially CSE these together later in MachineCSE.
180e8d8bef9SDimitry Andric   // This doesn't work though if there are unrelated flag-setting instructions
181e8d8bef9SDimitry Andric   // in between the two FCMPs. In this case, the SUBS defines NZCV
182e8d8bef9SDimitry Andric   // but it doesn't have any users, being overwritten by the second FCMP.
183e8d8bef9SDimitry Andric   //
184*06c3fb27SDimitry Andric   // 2)
185*06c3fb27SDimitry Andric   // The instruction selector always emits the flag-setting variant of ADC/SBC
186*06c3fb27SDimitry Andric   // while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these
187*06c3fb27SDimitry Andric   // instructions is never used, we can switch to the non-flag-setting variant.
188*06c3fb27SDimitry Andric 
189e8d8bef9SDimitry Andric   bool Changed = false;
190fe6060f1SDimitry Andric   auto &MF = *MBB.getParent();
191fe6060f1SDimitry Andric   auto &Subtarget = MF.getSubtarget();
192fe6060f1SDimitry Andric   const auto &TII = Subtarget.getInstrInfo();
193fe6060f1SDimitry Andric   auto TRI = Subtarget.getRegisterInfo();
194fe6060f1SDimitry Andric   auto RBI = Subtarget.getRegBankInfo();
195fe6060f1SDimitry Andric   auto &MRI = MF.getRegInfo();
196e8d8bef9SDimitry Andric 
197e8d8bef9SDimitry Andric   LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
198e8d8bef9SDimitry Andric   LRU.addLiveOuts(MBB);
199*06c3fb27SDimitry Andric 
200e8d8bef9SDimitry Andric   for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
201*06c3fb27SDimitry Andric     bool NZCVDead = LRU.available(AArch64::NZCV);
202*06c3fb27SDimitry Andric     if (NZCVDead && II.definesRegister(AArch64::NZCV)) {
203*06c3fb27SDimitry Andric       // The instruction defines NZCV, but NZCV is dead.
204e8d8bef9SDimitry Andric       unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
205e8d8bef9SDimitry Andric       int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
206e8d8bef9SDimitry Andric       if (DeadNZCVIdx != -1) {
207*06c3fb27SDimitry Andric         if (NewOpc) {
208*06c3fb27SDimitry Andric           // If there is an equivalent non-flag-setting op, we convert.
209e8d8bef9SDimitry Andric           LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
210*06c3fb27SDimitry Andric                                "op: "
211e8d8bef9SDimitry Andric                             << II);
212e8d8bef9SDimitry Andric           II.setDesc(TII->get(NewOpc));
21381ad6265SDimitry Andric           II.removeOperand(DeadNZCVIdx);
214fe6060f1SDimitry Andric           // Changing the opcode can result in differing regclass requirements,
215fe6060f1SDimitry Andric           // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
216fe6060f1SDimitry Andric           // Constrain the regclasses, possibly introducing a copy.
217fe6060f1SDimitry Andric           constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
218fe6060f1SDimitry Andric                                    II.getOperand(0), 0);
219e8d8bef9SDimitry Andric           Changed |= true;
220e8d8bef9SDimitry Andric         } else {
221e8d8bef9SDimitry Andric           // Otherwise, we just set the nzcv imp-def operand to be dead, so the
222e8d8bef9SDimitry Andric           // peephole optimizations can optimize them further.
223e8d8bef9SDimitry Andric           II.getOperand(DeadNZCVIdx).setIsDead();
224e8d8bef9SDimitry Andric         }
225e8d8bef9SDimitry Andric       }
226e8d8bef9SDimitry Andric     }
227*06c3fb27SDimitry Andric     LRU.stepBackward(II);
228e8d8bef9SDimitry Andric   }
229e8d8bef9SDimitry Andric   return Changed;
230e8d8bef9SDimitry Andric }
231e8d8bef9SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)232e8d8bef9SDimitry Andric bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
233e8d8bef9SDimitry Andric   if (MF.getProperties().hasProperty(
234e8d8bef9SDimitry Andric           MachineFunctionProperties::Property::FailedISel))
235e8d8bef9SDimitry Andric     return false;
236e8d8bef9SDimitry Andric   assert(MF.getProperties().hasProperty(
237e8d8bef9SDimitry Andric              MachineFunctionProperties::Property::Selected) &&
238e8d8bef9SDimitry Andric          "Expected a selected MF");
239e8d8bef9SDimitry Andric 
240e8d8bef9SDimitry Andric   bool Changed = false;
241bdd1243dSDimitry Andric   for (auto &BB : MF) {
242e8d8bef9SDimitry Andric     Changed |= optimizeNZCVDefs(BB);
243bdd1243dSDimitry Andric     Changed |= doPeepholeOpts(BB);
244bdd1243dSDimitry Andric   }
245fe6060f1SDimitry Andric   return Changed;
246e8d8bef9SDimitry Andric }
247e8d8bef9SDimitry Andric 
248e8d8bef9SDimitry Andric char AArch64PostSelectOptimize::ID = 0;
249e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
250e8d8bef9SDimitry Andric                       "Optimize AArch64 selected instructions",
251e8d8bef9SDimitry Andric                       false, false)
252e8d8bef9SDimitry Andric INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
253e8d8bef9SDimitry Andric                     "Optimize AArch64 selected instructions", false,
254e8d8bef9SDimitry Andric                     false)
255e8d8bef9SDimitry Andric 
256e8d8bef9SDimitry Andric namespace llvm {
createAArch64PostSelectOptimize()257e8d8bef9SDimitry Andric FunctionPass *createAArch64PostSelectOptimize() {
258e8d8bef9SDimitry Andric   return new AArch64PostSelectOptimize();
259e8d8bef9SDimitry Andric }
260e8d8bef9SDimitry Andric } // end namespace llvm
261