1e8d8bef9SDimitry Andric //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // This pass does post-instruction-selection optimizations in the GlobalISel
10e8d8bef9SDimitry Andric // pipeline, before the rest of codegen runs.
11e8d8bef9SDimitry Andric //
12e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
13e8d8bef9SDimitry Andric 
14e8d8bef9SDimitry Andric #include "AArch64.h"
15e8d8bef9SDimitry Andric #include "AArch64TargetMachine.h"
16e8d8bef9SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h"
17fe6060f1SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
18e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
19e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
20e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
21e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
22e8d8bef9SDimitry Andric #include "llvm/Support/Debug.h"
23e8d8bef9SDimitry Andric 
24e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-post-select-optimize"
25e8d8bef9SDimitry Andric 
26e8d8bef9SDimitry Andric using namespace llvm;
27e8d8bef9SDimitry Andric 
28e8d8bef9SDimitry Andric namespace {
29e8d8bef9SDimitry Andric class AArch64PostSelectOptimize : public MachineFunctionPass {
30e8d8bef9SDimitry Andric public:
31e8d8bef9SDimitry Andric   static char ID;
32e8d8bef9SDimitry Andric 
33e8d8bef9SDimitry Andric   AArch64PostSelectOptimize();
34e8d8bef9SDimitry Andric 
35e8d8bef9SDimitry Andric   StringRef getPassName() const override {
36e8d8bef9SDimitry Andric     return "AArch64 Post Select Optimizer";
37e8d8bef9SDimitry Andric   }
38e8d8bef9SDimitry Andric 
39e8d8bef9SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
40e8d8bef9SDimitry Andric 
41e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
42e8d8bef9SDimitry Andric 
43e8d8bef9SDimitry Andric private:
44e8d8bef9SDimitry Andric   bool optimizeNZCVDefs(MachineBasicBlock &MBB);
45e8d8bef9SDimitry Andric };
46e8d8bef9SDimitry Andric } // end anonymous namespace
47e8d8bef9SDimitry Andric 
48e8d8bef9SDimitry Andric void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
49e8d8bef9SDimitry Andric   AU.addRequired<TargetPassConfig>();
50e8d8bef9SDimitry Andric   AU.setPreservesCFG();
51e8d8bef9SDimitry Andric   getSelectionDAGFallbackAnalysisUsage(AU);
52e8d8bef9SDimitry Andric   MachineFunctionPass::getAnalysisUsage(AU);
53e8d8bef9SDimitry Andric }
54e8d8bef9SDimitry Andric 
55e8d8bef9SDimitry Andric AArch64PostSelectOptimize::AArch64PostSelectOptimize()
56e8d8bef9SDimitry Andric     : MachineFunctionPass(ID) {
57e8d8bef9SDimitry Andric   initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
58e8d8bef9SDimitry Andric }
59e8d8bef9SDimitry Andric 
60e8d8bef9SDimitry Andric unsigned getNonFlagSettingVariant(unsigned Opc) {
61e8d8bef9SDimitry Andric   switch (Opc) {
62e8d8bef9SDimitry Andric   default:
63e8d8bef9SDimitry Andric     return 0;
64e8d8bef9SDimitry Andric   case AArch64::SUBSXrr:
65e8d8bef9SDimitry Andric     return AArch64::SUBXrr;
66e8d8bef9SDimitry Andric   case AArch64::SUBSWrr:
67e8d8bef9SDimitry Andric     return AArch64::SUBWrr;
68e8d8bef9SDimitry Andric   case AArch64::SUBSXrs:
69e8d8bef9SDimitry Andric     return AArch64::SUBXrs;
70e8d8bef9SDimitry Andric   case AArch64::SUBSXri:
71e8d8bef9SDimitry Andric     return AArch64::SUBXri;
72e8d8bef9SDimitry Andric   case AArch64::SUBSWri:
73e8d8bef9SDimitry Andric     return AArch64::SUBWri;
74e8d8bef9SDimitry Andric   }
75e8d8bef9SDimitry Andric }
76e8d8bef9SDimitry Andric 
77e8d8bef9SDimitry Andric bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
78e8d8bef9SDimitry Andric   // Consider the following code:
79e8d8bef9SDimitry Andric   //  FCMPSrr %0, %1, implicit-def $nzcv
80e8d8bef9SDimitry Andric   //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
81e8d8bef9SDimitry Andric   //  %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
82e8d8bef9SDimitry Andric   //  FCMPSrr %0, %1, implicit-def $nzcv
83e8d8bef9SDimitry Andric   //  %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
84e8d8bef9SDimitry Andric   // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
85e8d8bef9SDimitry Andric   // when we have a single IR fcmp being used by two selects. During selection,
86e8d8bef9SDimitry Andric   // to ensure that there can be no clobbering of nzcv between the fcmp and the
87e8d8bef9SDimitry Andric   // csel, we have to generate an fcmp immediately before each csel is
88e8d8bef9SDimitry Andric   // selected.
89e8d8bef9SDimitry Andric   // However, often we can essentially CSE these together later in MachineCSE.
90e8d8bef9SDimitry Andric   // This doesn't work though if there are unrelated flag-setting instructions
91e8d8bef9SDimitry Andric   // in between the two FCMPs. In this case, the SUBS defines NZCV
92e8d8bef9SDimitry Andric   // but it doesn't have any users, being overwritten by the second FCMP.
93e8d8bef9SDimitry Andric   //
94e8d8bef9SDimitry Andric   // Our solution here is to try to convert flag setting operations between
95e8d8bef9SDimitry Andric   // a interval of identical FCMPs, so that CSE will be able to eliminate one.
96e8d8bef9SDimitry Andric   bool Changed = false;
97fe6060f1SDimitry Andric   auto &MF = *MBB.getParent();
98fe6060f1SDimitry Andric   auto &Subtarget = MF.getSubtarget();
99fe6060f1SDimitry Andric   const auto &TII = Subtarget.getInstrInfo();
100fe6060f1SDimitry Andric   auto TRI = Subtarget.getRegisterInfo();
101fe6060f1SDimitry Andric   auto RBI = Subtarget.getRegBankInfo();
102fe6060f1SDimitry Andric   auto &MRI = MF.getRegInfo();
103e8d8bef9SDimitry Andric 
104e8d8bef9SDimitry Andric   // The first step is to find the first and last FCMPs. If we have found
105e8d8bef9SDimitry Andric   // at least two, then set the limit of the bottom-up walk to the first FCMP
106e8d8bef9SDimitry Andric   // found since we're only interested in dealing with instructions between
107e8d8bef9SDimitry Andric   // them.
108e8d8bef9SDimitry Andric   MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
109e8d8bef9SDimitry Andric   for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
110e8d8bef9SDimitry Andric     if (MI.getOpcode() == AArch64::FCMPSrr ||
111e8d8bef9SDimitry Andric         MI.getOpcode() == AArch64::FCMPDrr) {
112e8d8bef9SDimitry Andric       if (!FirstCmp)
113e8d8bef9SDimitry Andric         FirstCmp = &MI;
114e8d8bef9SDimitry Andric       else
115e8d8bef9SDimitry Andric         LastCmp = &MI;
116e8d8bef9SDimitry Andric     }
117e8d8bef9SDimitry Andric   }
118e8d8bef9SDimitry Andric 
119e8d8bef9SDimitry Andric   // In addition to converting flag-setting ops in fcmp ranges into non-flag
120e8d8bef9SDimitry Andric   // setting ops, across the whole basic block we also detect when nzcv
121e8d8bef9SDimitry Andric   // implicit-defs are dead, and mark them as dead. Peephole optimizations need
122e8d8bef9SDimitry Andric   // this information later.
123e8d8bef9SDimitry Andric 
124e8d8bef9SDimitry Andric   LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
125e8d8bef9SDimitry Andric   LRU.addLiveOuts(MBB);
126e8d8bef9SDimitry Andric   bool NZCVDead = LRU.available(AArch64::NZCV);
127e8d8bef9SDimitry Andric   bool InsideCmpRange = false;
128e8d8bef9SDimitry Andric   for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
129e8d8bef9SDimitry Andric     LRU.stepBackward(II);
130e8d8bef9SDimitry Andric 
131e8d8bef9SDimitry Andric     if (LastCmp) { // There's a range present in this block.
132e8d8bef9SDimitry Andric       // If we're inside an fcmp range, look for begin instruction.
133e8d8bef9SDimitry Andric       if (InsideCmpRange && &II == FirstCmp)
134e8d8bef9SDimitry Andric         InsideCmpRange = false;
135e8d8bef9SDimitry Andric       else if (&II == LastCmp)
136e8d8bef9SDimitry Andric         InsideCmpRange = true;
137e8d8bef9SDimitry Andric     }
138e8d8bef9SDimitry Andric 
139e8d8bef9SDimitry Andric     // Did this instruction define NZCV?
140e8d8bef9SDimitry Andric     bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
141e8d8bef9SDimitry Andric     if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
142e8d8bef9SDimitry Andric       // If we have a def and NZCV is dead, then we may convert this op.
143e8d8bef9SDimitry Andric       unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
144e8d8bef9SDimitry Andric       int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
145e8d8bef9SDimitry Andric       if (DeadNZCVIdx != -1) {
146e8d8bef9SDimitry Andric         // If we're inside an fcmp range, then convert flag setting ops.
147e8d8bef9SDimitry Andric         if (InsideCmpRange && NewOpc) {
148e8d8bef9SDimitry Andric           LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
149e8d8bef9SDimitry Andric                                "op in fcmp range: "
150e8d8bef9SDimitry Andric                             << II);
151e8d8bef9SDimitry Andric           II.setDesc(TII->get(NewOpc));
152*81ad6265SDimitry Andric           II.removeOperand(DeadNZCVIdx);
153fe6060f1SDimitry Andric           // Changing the opcode can result in differing regclass requirements,
154fe6060f1SDimitry Andric           // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
155fe6060f1SDimitry Andric           // Constrain the regclasses, possibly introducing a copy.
156fe6060f1SDimitry Andric           constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
157fe6060f1SDimitry Andric                                    II.getOperand(0), 0);
158e8d8bef9SDimitry Andric           Changed |= true;
159e8d8bef9SDimitry Andric         } else {
160e8d8bef9SDimitry Andric           // Otherwise, we just set the nzcv imp-def operand to be dead, so the
161e8d8bef9SDimitry Andric           // peephole optimizations can optimize them further.
162e8d8bef9SDimitry Andric           II.getOperand(DeadNZCVIdx).setIsDead();
163e8d8bef9SDimitry Andric         }
164e8d8bef9SDimitry Andric       }
165e8d8bef9SDimitry Andric     }
166e8d8bef9SDimitry Andric 
167e8d8bef9SDimitry Andric     NZCVDead = NZCVDeadAtCurrInstr;
168e8d8bef9SDimitry Andric   }
169e8d8bef9SDimitry Andric   return Changed;
170e8d8bef9SDimitry Andric }
171e8d8bef9SDimitry Andric 
172e8d8bef9SDimitry Andric bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
173e8d8bef9SDimitry Andric   if (MF.getProperties().hasProperty(
174e8d8bef9SDimitry Andric           MachineFunctionProperties::Property::FailedISel))
175e8d8bef9SDimitry Andric     return false;
176e8d8bef9SDimitry Andric   assert(MF.getProperties().hasProperty(
177e8d8bef9SDimitry Andric              MachineFunctionProperties::Property::Selected) &&
178e8d8bef9SDimitry Andric          "Expected a selected MF");
179e8d8bef9SDimitry Andric 
180e8d8bef9SDimitry Andric   bool Changed = false;
181e8d8bef9SDimitry Andric   for (auto &BB : MF)
182e8d8bef9SDimitry Andric     Changed |= optimizeNZCVDefs(BB);
183fe6060f1SDimitry Andric   return Changed;
184e8d8bef9SDimitry Andric }
185e8d8bef9SDimitry Andric 
186e8d8bef9SDimitry Andric char AArch64PostSelectOptimize::ID = 0;
187e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
188e8d8bef9SDimitry Andric                       "Optimize AArch64 selected instructions",
189e8d8bef9SDimitry Andric                       false, false)
190e8d8bef9SDimitry Andric INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
191e8d8bef9SDimitry Andric                     "Optimize AArch64 selected instructions", false,
192e8d8bef9SDimitry Andric                     false)
193e8d8bef9SDimitry Andric 
194e8d8bef9SDimitry Andric namespace llvm {
195e8d8bef9SDimitry Andric FunctionPass *createAArch64PostSelectOptimize() {
196e8d8bef9SDimitry Andric   return new AArch64PostSelectOptimize();
197e8d8bef9SDimitry Andric }
198e8d8bef9SDimitry Andric } // end namespace llvm
199