1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does post-instruction-selection optimizations in the GlobalISel
10 // pipeline, before the rest of codegen runs.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64.h"
15 #include "AArch64TargetMachine.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineOperand.h"
20 #include "llvm/CodeGen/TargetPassConfig.h"
21 #include "llvm/Support/Debug.h"
22 
23 #define DEBUG_TYPE "aarch64-post-select-optimize"
24 
25 using namespace llvm;
26 
27 namespace {
28 class AArch64PostSelectOptimize : public MachineFunctionPass {
29 public:
30   static char ID;
31 
32   AArch64PostSelectOptimize();
33 
34   StringRef getPassName() const override {
35     return "AArch64 Post Select Optimizer";
36   }
37 
38   bool runOnMachineFunction(MachineFunction &MF) override;
39 
40   void getAnalysisUsage(AnalysisUsage &AU) const override;
41 
42 private:
43   bool optimizeNZCVDefs(MachineBasicBlock &MBB);
44 };
45 } // end anonymous namespace
46 
47 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
48   AU.addRequired<TargetPassConfig>();
49   AU.setPreservesCFG();
50   getSelectionDAGFallbackAnalysisUsage(AU);
51   MachineFunctionPass::getAnalysisUsage(AU);
52 }
53 
54 AArch64PostSelectOptimize::AArch64PostSelectOptimize()
55     : MachineFunctionPass(ID) {
56   initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
57 }
58 
59 unsigned getNonFlagSettingVariant(unsigned Opc) {
60   switch (Opc) {
61   default:
62     return 0;
63   case AArch64::SUBSXrr:
64     return AArch64::SUBXrr;
65   case AArch64::SUBSWrr:
66     return AArch64::SUBWrr;
67   case AArch64::SUBSXrs:
68     return AArch64::SUBXrs;
69   case AArch64::SUBSXri:
70     return AArch64::SUBXri;
71   case AArch64::SUBSWri:
72     return AArch64::SUBWri;
73   }
74 }
75 
76 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
77   // Consider the following code:
78   //  FCMPSrr %0, %1, implicit-def $nzcv
79   //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
80   //  %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
81   //  FCMPSrr %0, %1, implicit-def $nzcv
82   //  %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
83   // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
84   // when we have a single IR fcmp being used by two selects. During selection,
85   // to ensure that there can be no clobbering of nzcv between the fcmp and the
86   // csel, we have to generate an fcmp immediately before each csel is
87   // selected.
88   // However, often we can essentially CSE these together later in MachineCSE.
89   // This doesn't work though if there are unrelated flag-setting instructions
90   // in between the two FCMPs. In this case, the SUBS defines NZCV
91   // but it doesn't have any users, being overwritten by the second FCMP.
92   //
93   // Our solution here is to try to convert flag setting operations between
94   // a interval of identical FCMPs, so that CSE will be able to eliminate one.
95   bool Changed = false;
96   const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
97 
98   // The first step is to find the first and last FCMPs. If we have found
99   // at least two, then set the limit of the bottom-up walk to the first FCMP
100   // found since we're only interested in dealing with instructions between
101   // them.
102   MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
103   for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
104     if (MI.getOpcode() == AArch64::FCMPSrr ||
105         MI.getOpcode() == AArch64::FCMPDrr) {
106       if (!FirstCmp)
107         FirstCmp = &MI;
108       else
109         LastCmp = &MI;
110     }
111   }
112 
113   // In addition to converting flag-setting ops in fcmp ranges into non-flag
114   // setting ops, across the whole basic block we also detect when nzcv
115   // implicit-defs are dead, and mark them as dead. Peephole optimizations need
116   // this information later.
117 
118   LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
119   LRU.addLiveOuts(MBB);
120   bool NZCVDead = LRU.available(AArch64::NZCV);
121   bool InsideCmpRange = false;
122   for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
123     LRU.stepBackward(II);
124 
125     if (LastCmp) { // There's a range present in this block.
126       // If we're inside an fcmp range, look for begin instruction.
127       if (InsideCmpRange && &II == FirstCmp)
128         InsideCmpRange = false;
129       else if (&II == LastCmp)
130         InsideCmpRange = true;
131     }
132 
133     // Did this instruction define NZCV?
134     bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
135     if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
136       // If we have a def and NZCV is dead, then we may convert this op.
137       unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
138       int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
139       if (DeadNZCVIdx != -1) {
140         // If we're inside an fcmp range, then convert flag setting ops.
141         if (InsideCmpRange && NewOpc) {
142           LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
143                                "op in fcmp range: "
144                             << II);
145           II.setDesc(TII->get(NewOpc));
146           II.RemoveOperand(DeadNZCVIdx);
147           Changed |= true;
148         } else {
149           // Otherwise, we just set the nzcv imp-def operand to be dead, so the
150           // peephole optimizations can optimize them further.
151           II.getOperand(DeadNZCVIdx).setIsDead();
152         }
153       }
154     }
155 
156     NZCVDead = NZCVDeadAtCurrInstr;
157   }
158   return Changed;
159 }
160 
161 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
162   if (MF.getProperties().hasProperty(
163           MachineFunctionProperties::Property::FailedISel))
164     return false;
165   assert(MF.getProperties().hasProperty(
166              MachineFunctionProperties::Property::Selected) &&
167          "Expected a selected MF");
168 
169   bool Changed = false;
170   for (auto &BB : MF)
171     Changed |= optimizeNZCVDefs(BB);
172   return true;
173 }
174 
175 char AArch64PostSelectOptimize::ID = 0;
176 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
177                       "Optimize AArch64 selected instructions",
178                       false, false)
179 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
180                     "Optimize AArch64 selected instructions", false,
181                     false)
182 
183 namespace llvm {
184 FunctionPass *createAArch64PostSelectOptimize() {
185   return new AArch64PostSelectOptimize();
186 }
187 } // end namespace llvm
188