1 //=== AArch64PostSelectOptimize.cpp ---------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does post-instruction-selection optimizations in the GlobalISel
10 // pipeline, before the rest of codegen runs.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64.h"
15 #include "AArch64TargetMachine.h"
16 #include "MCTargetDesc/AArch64MCTargetDesc.h"
17 #include "llvm/CodeGen/GlobalISel/Utils.h"
18 #include "llvm/CodeGen/MachineBasicBlock.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineOperand.h"
21 #include "llvm/CodeGen/TargetPassConfig.h"
22 #include "llvm/Support/Debug.h"
23
24 #define DEBUG_TYPE "aarch64-post-select-optimize"
25
26 using namespace llvm;
27
28 namespace {
29 class AArch64PostSelectOptimize : public MachineFunctionPass {
30 public:
31 static char ID;
32
33 AArch64PostSelectOptimize();
34
getPassName() const35 StringRef getPassName() const override {
36 return "AArch64 Post Select Optimizer";
37 }
38
39 bool runOnMachineFunction(MachineFunction &MF) override;
40
41 void getAnalysisUsage(AnalysisUsage &AU) const override;
42
43 private:
44 bool optimizeNZCVDefs(MachineBasicBlock &MBB);
45 };
46 } // end anonymous namespace
47
getAnalysisUsage(AnalysisUsage & AU) const48 void AArch64PostSelectOptimize::getAnalysisUsage(AnalysisUsage &AU) const {
49 AU.addRequired<TargetPassConfig>();
50 AU.setPreservesCFG();
51 getSelectionDAGFallbackAnalysisUsage(AU);
52 MachineFunctionPass::getAnalysisUsage(AU);
53 }
54
AArch64PostSelectOptimize()55 AArch64PostSelectOptimize::AArch64PostSelectOptimize()
56 : MachineFunctionPass(ID) {
57 initializeAArch64PostSelectOptimizePass(*PassRegistry::getPassRegistry());
58 }
59
getNonFlagSettingVariant(unsigned Opc)60 unsigned getNonFlagSettingVariant(unsigned Opc) {
61 switch (Opc) {
62 default:
63 return 0;
64 case AArch64::SUBSXrr:
65 return AArch64::SUBXrr;
66 case AArch64::SUBSWrr:
67 return AArch64::SUBWrr;
68 case AArch64::SUBSXrs:
69 return AArch64::SUBXrs;
70 case AArch64::SUBSXri:
71 return AArch64::SUBXri;
72 case AArch64::SUBSWri:
73 return AArch64::SUBWri;
74 }
75 }
76
optimizeNZCVDefs(MachineBasicBlock & MBB)77 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
78 // Consider the following code:
79 // FCMPSrr %0, %1, implicit-def $nzcv
80 // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
81 // %sub:gpr32 = SUBSWrr %_, %_, implicit-def $nzcv
82 // FCMPSrr %0, %1, implicit-def $nzcv
83 // %sel2:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
84 // This kind of code where we have 2 FCMPs each feeding a CSEL can happen
85 // when we have a single IR fcmp being used by two selects. During selection,
86 // to ensure that there can be no clobbering of nzcv between the fcmp and the
87 // csel, we have to generate an fcmp immediately before each csel is
88 // selected.
89 // However, often we can essentially CSE these together later in MachineCSE.
90 // This doesn't work though if there are unrelated flag-setting instructions
91 // in between the two FCMPs. In this case, the SUBS defines NZCV
92 // but it doesn't have any users, being overwritten by the second FCMP.
93 //
94 // Our solution here is to try to convert flag setting operations between
95 // a interval of identical FCMPs, so that CSE will be able to eliminate one.
96 bool Changed = false;
97 auto &MF = *MBB.getParent();
98 auto &Subtarget = MF.getSubtarget();
99 const auto &TII = Subtarget.getInstrInfo();
100 auto TRI = Subtarget.getRegisterInfo();
101 auto RBI = Subtarget.getRegBankInfo();
102 auto &MRI = MF.getRegInfo();
103
104 // The first step is to find the first and last FCMPs. If we have found
105 // at least two, then set the limit of the bottom-up walk to the first FCMP
106 // found since we're only interested in dealing with instructions between
107 // them.
108 MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
109 for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
110 if (MI.getOpcode() == AArch64::FCMPSrr ||
111 MI.getOpcode() == AArch64::FCMPDrr) {
112 if (!FirstCmp)
113 FirstCmp = &MI;
114 else
115 LastCmp = &MI;
116 }
117 }
118
119 // In addition to converting flag-setting ops in fcmp ranges into non-flag
120 // setting ops, across the whole basic block we also detect when nzcv
121 // implicit-defs are dead, and mark them as dead. Peephole optimizations need
122 // this information later.
123
124 LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
125 LRU.addLiveOuts(MBB);
126 bool NZCVDead = LRU.available(AArch64::NZCV);
127 bool InsideCmpRange = false;
128 for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
129 LRU.stepBackward(II);
130
131 if (LastCmp) { // There's a range present in this block.
132 // If we're inside an fcmp range, look for begin instruction.
133 if (InsideCmpRange && &II == FirstCmp)
134 InsideCmpRange = false;
135 else if (&II == LastCmp)
136 InsideCmpRange = true;
137 }
138
139 // Did this instruction define NZCV?
140 bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
141 if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
142 // If we have a def and NZCV is dead, then we may convert this op.
143 unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
144 int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
145 if (DeadNZCVIdx != -1) {
146 // If we're inside an fcmp range, then convert flag setting ops.
147 if (InsideCmpRange && NewOpc) {
148 LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
149 "op in fcmp range: "
150 << II);
151 II.setDesc(TII->get(NewOpc));
152 II.RemoveOperand(DeadNZCVIdx);
153 // Changing the opcode can result in differing regclass requirements,
154 // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp.
155 // Constrain the regclasses, possibly introducing a copy.
156 constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(),
157 II.getOperand(0), 0);
158 Changed |= true;
159 } else {
160 // Otherwise, we just set the nzcv imp-def operand to be dead, so the
161 // peephole optimizations can optimize them further.
162 II.getOperand(DeadNZCVIdx).setIsDead();
163 }
164 }
165 }
166
167 NZCVDead = NZCVDeadAtCurrInstr;
168 }
169 return Changed;
170 }
171
runOnMachineFunction(MachineFunction & MF)172 bool AArch64PostSelectOptimize::runOnMachineFunction(MachineFunction &MF) {
173 if (MF.getProperties().hasProperty(
174 MachineFunctionProperties::Property::FailedISel))
175 return false;
176 assert(MF.getProperties().hasProperty(
177 MachineFunctionProperties::Property::Selected) &&
178 "Expected a selected MF");
179
180 bool Changed = false;
181 for (auto &BB : MF)
182 Changed |= optimizeNZCVDefs(BB);
183 return Changed;
184 }
185
186 char AArch64PostSelectOptimize::ID = 0;
187 INITIALIZE_PASS_BEGIN(AArch64PostSelectOptimize, DEBUG_TYPE,
188 "Optimize AArch64 selected instructions",
189 false, false)
190 INITIALIZE_PASS_END(AArch64PostSelectOptimize, DEBUG_TYPE,
191 "Optimize AArch64 selected instructions", false,
192 false)
193
194 namespace llvm {
createAArch64PostSelectOptimize()195 FunctionPass *createAArch64PostSelectOptimize() {
196 return new AArch64PostSelectOptimize();
197 }
198 } // end namespace llvm
199