1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does post-instruction-selection optimizations in the GlobalISel
10 // pipeline, before the rest of codegen runs.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64.h"
15 #include "AArch64TargetMachine.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/CodeGen/MachineBasicBlock.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineOperand.h"
20 #include "llvm/CodeGen/TargetPassConfig.h"
21 #include "llvm/Support/Debug.h"
22
23 #define DEBUG_TYPE "aarch64-post-select-optimize"
24
25 using namespace llvm;
26
27 namespace {
28 class AArch64PostSelectOptimize : public MachineFunctionPass {
29 public:
30 static char ID;
31
32 AArch64PostSelectOptimize();
33
getPassName() const34 StringRef getPassName() const override {
35 return "AArch64 Post Select Optimizer";
36 }
37
38 bool runOnMachineFunction(MachineFunction &MF) override;
39
40 void getAnalysisUsage(AnalysisUsage &AU) const override;
41
42 private:
43 bool optimizeNZCVDefs(MachineBasicBlock &MBB);
44 };
45 } // end anonymous namespace
46
getAnalysisUsage(AnalysisUsage & AU) const47 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
48 AU.addRequired<TargetPassConfig>();
49 AU.setPreservesCFG();
50 getSelectionDAGFallbackAnalysisUsage(AU);
51 MachineFunctionPass::getAnalysisUsage(AU);
52 }
53
AArch64PostSelectOptimize()54 AArch64PostSelectOptimize::AArch64PostSelectOptimize()
55 : MachineFunctionPass(ID) {
56 initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
57 }
58
getNonFlagSettingVariant(unsigned Opc)59 unsigned getNonFlagSettingVariant(unsigned Opc) {
60 switch (Opc) {
61 default:
62 return 0;
63 case AArch64::SUBSXrr:
64 return AArch64::SUBXrr;
65 case AArch64::SUBSWrr:
66 return AArch64::SUBWrr;
67 case AArch64::SUBSXrs:
68 return AArch64::SUBXrs;
69 case AArch64::SUBSXri:
70 return AArch64::SUBXri;
71 case AArch64::SUBSWri:
72 return AArch64::SUBWri;
73 }
74 }
75
optimizeNZCVDefs(MachineBasicBlock & MBB)76 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
77 // Consider the following code:
78 // FCMPSrr %0, %1, implicit-def $nzcv
79 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
80 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
81 // FCMPSrr %0, %1, implicit-def $nzcv
82 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
83 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
84 // when we have a single IR fcmp being used by two selects. During selection,
85 // to ensure that there can be no clobbering of nzcv between the fcmp and the
86 // csel, we have to generate an fcmp immediately before each csel is
87 // selected.
88 // However, often we can essentially CSE these together later in MachineCSE.
89 // This doesn't work though if there are unrelated flag-setting instructions
90 // in between the two FCMPs. In this case, the SUBS defines NZCV
91 // but it doesn't have any users, being overwritten by the second FCMP.
92 //
93 // Our solution here is to try to convert flag setting operations between
94 // a interval of identical FCMPs, so that CSE will be able to eliminate one.
95 bool Changed = false;
96 const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo();
97
98 // The first step is to find the first and last FCMPs. If we have found
99 // at least two, then set the limit of the bottom-up walk to the first FCMP
100 // found since we're only interested in dealing with instructions between
101 // them.
102 MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
103 for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
104 if (MI.getOpcode() == AArch64::FCMPSrr ||
105 MI.getOpcode() == AArch64::FCMPDrr) {
106 if (!FirstCmp)
107 FirstCmp = &MI;
108 else
109 LastCmp = &MI;
110 }
111 }
112
113 // In addition to converting flag-setting ops in fcmp ranges into non-flag
114 // setting ops, across the whole basic block we also detect when nzcv
115 // implicit-defs are dead, and mark them as dead. Peephole optimizations need
116 // this information later.
117
118 LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
119 LRU.addLiveOuts(MBB);
120 bool NZCVDead = LRU.available(AArch64::NZCV);
121 bool InsideCmpRange = false;
122 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
123 LRU.stepBackward(II);
124
125 if (LastCmp) { // There's a range present in this block.
126 // If we're inside an fcmp range, look for begin instruction.
127 if (InsideCmpRange && &II == FirstCmp)
128 InsideCmpRange = false;
129 else if (&II == LastCmp)
130 InsideCmpRange = true;
131 }
132
133 // Did this instruction define NZCV?
134 bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
135 if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
136 // If we have a def and NZCV is dead, then we may convert this op.
137 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
138 int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
139 if (DeadNZCVIdx != -1) {
140 // If we're inside an fcmp range, then convert flag setting ops.
141 if (InsideCmpRange && NewOpc) {
142 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
143 "op in fcmp range: "
144 << II);
145 II.setDesc(TII->get(NewOpc));
146 II.RemoveOperand(DeadNZCVIdx);
147 Changed |= true;
148 } else {
149 // Otherwise, we just set the nzcv imp-def operand to be dead, so the
150 // peephole optimizations can optimize them further.
151 II.getOperand(DeadNZCVIdx).setIsDead();
152 }
153 }
154 }
155
156 NZCVDead = NZCVDeadAtCurrInstr;
157 }
158 return Changed;
159 }
160
runOnMachineFunction(MachineFunction & MF)161 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
162 if (MF.getProperties().hasProperty(
163 MachineFunctionProperties::Property::FailedISel))
164 return false;
165 assert(MF.getProperties().hasProperty(
166 MachineFunctionProperties::Property::Selected) &&
167 "Expected a selected MF");
168
169 bool Changed = false;
170 for (auto &BB : MF)
171 Changed |= optimizeNZCVDefs(BB);
172 return true;
173 }
174
175 char AArch64PostSelectOptimize::ID = 0;
176 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
177 "Optimize AArch64 selected instructions",
178 false, false)
179 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
180 "Optimize AArch64 selected instructions", false,
181 false)
182
183 namespace llvm {
createAArch64PostSelectOptimize()184 FunctionPass *createAArch64PostSelectOptimize() {
185 return new AArch64PostSelectOptimize();
186 }
187 } // end namespace llvm
188