1 //===--------- PPCPreEmitPeephole.cpp - Late peephole optimizations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // A pre-emit peephole for catching opportunities introduced by late passes such
10 // as MachineBlockPlacement.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PPC.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCSubtarget.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/Statistic.h"
19 #include "llvm/CodeGen/LivePhysRegs.h"
20 #include "llvm/CodeGen/MachineBasicBlock.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/ADT/Statistic.h"
26 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "ppc-pre-emit-peephole"
31 
32 STATISTIC(NumRRConvertedInPreEmit,
33           "Number of r+r instructions converted to r+i in pre-emit peephole");
34 STATISTIC(NumRemovedInPreEmit,
35           "Number of instructions deleted in pre-emit peephole");
36 STATISTIC(NumberOfSelfCopies,
37           "Number of self copy instructions eliminated");
38 STATISTIC(NumFrameOffFoldInPreEmit,
39           "Number of folding frame offset by using r+r in pre-emit peephole");
40 
41 static cl::opt<bool>
42 RunPreEmitPeephole("ppc-late-peephole", cl::Hidden, cl::init(true),
43                    cl::desc("Run pre-emit peephole optimizations."));
44 
45 namespace {
46   class PPCPreEmitPeephole : public MachineFunctionPass {
47   public:
48     static char ID;
49     PPCPreEmitPeephole() : MachineFunctionPass(ID) {
50       initializePPCPreEmitPeepholePass(*PassRegistry::getPassRegistry());
51     }
52 
53     void getAnalysisUsage(AnalysisUsage &AU) const override {
54       MachineFunctionPass::getAnalysisUsage(AU);
55     }
56 
57     MachineFunctionProperties getRequiredProperties() const override {
58       return MachineFunctionProperties().set(
59           MachineFunctionProperties::Property::NoVRegs);
60     }
61 
62     // This function removes any redundant load immediates. It has two level
63     // loops - The outer loop finds the load immediates BBI that could be used
64     // to replace following redundancy. The inner loop scans instructions that
65     // after BBI to find redundancy and update kill/dead flags accordingly. If
66     // AfterBBI is the same as BBI, it is redundant, otherwise any instructions
67     // that modify the def register of BBI would break the scanning.
68     // DeadOrKillToUnset is a pointer to the previous operand that had the
69     // kill/dead flag set. It keeps track of the def register of BBI, the use
70     // registers of AfterBBIs and the def registers of AfterBBIs.
71     bool removeRedundantLIs(MachineBasicBlock &MBB,
72                             const TargetRegisterInfo *TRI) {
73       LLVM_DEBUG(dbgs() << "Remove redundant load immediates from MBB:\n";
74                  MBB.dump(); dbgs() << "\n");
75 
76       DenseSet<MachineInstr *> InstrsToErase;
77       for (auto BBI = MBB.instr_begin(); BBI != MBB.instr_end(); ++BBI) {
78         // Skip load immediate that is marked to be erased later because it
79         // cannot be used to replace any other instructions.
80         if (InstrsToErase.find(&*BBI) != InstrsToErase.end())
81           continue;
82         // Skip non-load immediate.
83         unsigned Opc = BBI->getOpcode();
84         if (Opc != PPC::LI && Opc != PPC::LI8 && Opc != PPC::LIS &&
85             Opc != PPC::LIS8)
86           continue;
87         // Skip load immediate, where the operand is a relocation (e.g., $r3 =
88         // LI target-flags(ppc-lo) %const.0).
89         if (!BBI->getOperand(1).isImm())
90           continue;
91         assert(BBI->getOperand(0).isReg() &&
92                "Expected a register for the first operand");
93 
94         LLVM_DEBUG(dbgs() << "Scanning after load immediate: "; BBI->dump(););
95 
96         Register Reg = BBI->getOperand(0).getReg();
97         int64_t Imm = BBI->getOperand(1).getImm();
98         MachineOperand *DeadOrKillToUnset = nullptr;
99         if (BBI->getOperand(0).isDead()) {
100           DeadOrKillToUnset = &BBI->getOperand(0);
101           LLVM_DEBUG(dbgs() << " Kill flag of " << *DeadOrKillToUnset
102                             << " from load immediate " << *BBI
103                             << " is a unsetting candidate\n");
104         }
105         // This loop scans instructions after BBI to see if there is any
106         // redundant load immediate.
107         for (auto AfterBBI = std::next(BBI); AfterBBI != MBB.instr_end();
108              ++AfterBBI) {
109           // Track the operand that kill Reg. We would unset the kill flag of
110           // the operand if there is a following redundant load immediate.
111           int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
112           if (KillIdx != -1) {
113             assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
114             DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
115             LLVM_DEBUG(dbgs()
116                        << " Kill flag of " << *DeadOrKillToUnset << " from "
117                        << *AfterBBI << " is a unsetting candidate\n");
118           }
119 
120           if (!AfterBBI->modifiesRegister(Reg, TRI))
121             continue;
122           // Finish scanning because Reg is overwritten by a non-load
123           // instruction.
124           if (AfterBBI->getOpcode() != Opc)
125             break;
126           assert(AfterBBI->getOperand(0).isReg() &&
127                  "Expected a register for the first operand");
128           // Finish scanning because Reg is overwritten by a relocation or a
129           // different value.
130           if (!AfterBBI->getOperand(1).isImm() ||
131               AfterBBI->getOperand(1).getImm() != Imm)
132             break;
133 
134           // It loads same immediate value to the same Reg, which is redundant.
135           // We would unset kill flag in previous Reg usage to extend live range
136           // of Reg first, then remove the redundancy.
137           if (DeadOrKillToUnset) {
138             LLVM_DEBUG(dbgs()
139                        << " Unset dead/kill flag of " << *DeadOrKillToUnset
140                        << " from " << *DeadOrKillToUnset->getParent());
141             if (DeadOrKillToUnset->isDef())
142               DeadOrKillToUnset->setIsDead(false);
143             else
144               DeadOrKillToUnset->setIsKill(false);
145           }
146           DeadOrKillToUnset =
147               AfterBBI->findRegisterDefOperand(Reg, true, true, TRI);
148           if (DeadOrKillToUnset)
149             LLVM_DEBUG(dbgs()
150                        << " Dead flag of " << *DeadOrKillToUnset << " from "
151                        << *AfterBBI << " is a unsetting candidate\n");
152           InstrsToErase.insert(&*AfterBBI);
153           LLVM_DEBUG(dbgs() << " Remove redundant load immediate: ";
154                      AfterBBI->dump());
155         }
156       }
157 
158       for (MachineInstr *MI : InstrsToErase) {
159         MI->eraseFromParent();
160       }
161       NumRemovedInPreEmit += InstrsToErase.size();
162       return !InstrsToErase.empty();
163     }
164 
165     bool runOnMachineFunction(MachineFunction &MF) override {
166       if (skipFunction(MF.getFunction()) || !RunPreEmitPeephole) {
167         // Remove UNENCODED_NOP even when this pass is disabled.
168         // This needs to be done unconditionally so we don't emit zeros
169         // in the instruction stream.
170         SmallVector<MachineInstr *, 4> InstrsToErase;
171         for (MachineBasicBlock &MBB : MF)
172           for (MachineInstr &MI : MBB)
173             if (MI.getOpcode() == PPC::UNENCODED_NOP)
174               InstrsToErase.push_back(&MI);
175         for (MachineInstr *MI : InstrsToErase)
176           MI->eraseFromParent();
177         return false;
178       }
179       bool Changed = false;
180       const PPCInstrInfo *TII = MF.getSubtarget<PPCSubtarget>().getInstrInfo();
181       const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
182       SmallVector<MachineInstr *, 4> InstrsToErase;
183       for (MachineBasicBlock &MBB : MF) {
184         Changed |= removeRedundantLIs(MBB, TRI);
185         for (MachineInstr &MI : MBB) {
186           unsigned Opc = MI.getOpcode();
187           if (Opc == PPC::UNENCODED_NOP) {
188             InstrsToErase.push_back(&MI);
189             continue;
190           }
191           // Detect self copies - these can result from running AADB.
192           if (PPCInstrInfo::isSameClassPhysRegCopy(Opc)) {
193             const MCInstrDesc &MCID = TII->get(Opc);
194             if (MCID.getNumOperands() == 3 &&
195                 MI.getOperand(0).getReg() == MI.getOperand(1).getReg() &&
196                 MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
197               NumberOfSelfCopies++;
198               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
199               LLVM_DEBUG(MI.dump());
200               InstrsToErase.push_back(&MI);
201               continue;
202             }
203             else if (MCID.getNumOperands() == 2 &&
204                      MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
205               NumberOfSelfCopies++;
206               LLVM_DEBUG(dbgs() << "Deleting self-copy instruction: ");
207               LLVM_DEBUG(MI.dump());
208               InstrsToErase.push_back(&MI);
209               continue;
210             }
211           }
212           MachineInstr *DefMIToErase = nullptr;
213           if (TII->convertToImmediateForm(MI, &DefMIToErase)) {
214             Changed = true;
215             NumRRConvertedInPreEmit++;
216             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
217             LLVM_DEBUG(MI.dump());
218             if (DefMIToErase) {
219               InstrsToErase.push_back(DefMIToErase);
220             }
221           }
222           if (TII->foldFrameOffset(MI)) {
223             Changed = true;
224             NumFrameOffFoldInPreEmit++;
225             LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
226             LLVM_DEBUG(MI.dump());
227           }
228         }
229 
230         // Eliminate conditional branch based on a constant CR bit by
231         // CRSET or CRUNSET. We eliminate the conditional branch or
232         // convert it into an unconditional branch. Also, if the CR bit
233         // is not used by other instructions, we eliminate CRSET as well.
234         auto I = MBB.getFirstInstrTerminator();
235         if (I == MBB.instr_end())
236           continue;
237         MachineInstr *Br = &*I;
238         if (Br->getOpcode() != PPC::BC && Br->getOpcode() != PPC::BCn)
239           continue;
240         MachineInstr *CRSetMI = nullptr;
241         Register CRBit = Br->getOperand(0).getReg();
242         unsigned CRReg = getCRFromCRBit(CRBit);
243         bool SeenUse = false;
244         MachineBasicBlock::reverse_iterator It = Br, Er = MBB.rend();
245         for (It++; It != Er; It++) {
246           if (It->modifiesRegister(CRBit, TRI)) {
247             if ((It->getOpcode() == PPC::CRUNSET ||
248                  It->getOpcode() == PPC::CRSET) &&
249                 It->getOperand(0).getReg() == CRBit)
250               CRSetMI = &*It;
251             break;
252           }
253           if (It->readsRegister(CRBit, TRI))
254             SeenUse = true;
255         }
256         if (!CRSetMI) continue;
257 
258         unsigned CRSetOp = CRSetMI->getOpcode();
259         if ((Br->getOpcode() == PPC::BCn && CRSetOp == PPC::CRSET) ||
260             (Br->getOpcode() == PPC::BC  && CRSetOp == PPC::CRUNSET)) {
261           // Remove this branch since it cannot be taken.
262           InstrsToErase.push_back(Br);
263           MBB.removeSuccessor(Br->getOperand(1).getMBB());
264         }
265         else {
266           // This conditional branch is always taken. So, remove all branches
267           // and insert an unconditional branch to the destination of this.
268           MachineBasicBlock::iterator It = Br, Er = MBB.end();
269           for (; It != Er; It++) {
270             if (It->isDebugInstr()) continue;
271             assert(It->isTerminator() && "Non-terminator after a terminator");
272             InstrsToErase.push_back(&*It);
273           }
274           if (!MBB.isLayoutSuccessor(Br->getOperand(1).getMBB())) {
275             ArrayRef<MachineOperand> NoCond;
276             TII->insertBranch(MBB, Br->getOperand(1).getMBB(), nullptr,
277                               NoCond, Br->getDebugLoc());
278           }
279           for (auto &Succ : MBB.successors())
280             if (Succ != Br->getOperand(1).getMBB()) {
281               MBB.removeSuccessor(Succ);
282               break;
283             }
284         }
285 
286         // If the CRBit is not used by another instruction, we can eliminate
287         // CRSET/CRUNSET instruction.
288         if (!SeenUse) {
289           // We need to check use of the CRBit in successors.
290           for (auto &SuccMBB : MBB.successors())
291             if (SuccMBB->isLiveIn(CRBit) || SuccMBB->isLiveIn(CRReg)) {
292               SeenUse = true;
293               break;
294             }
295           if (!SeenUse)
296             InstrsToErase.push_back(CRSetMI);
297         }
298       }
299       for (MachineInstr *MI : InstrsToErase) {
300         LLVM_DEBUG(dbgs() << "PPC pre-emit peephole: erasing instruction: ");
301         LLVM_DEBUG(MI->dump());
302         MI->eraseFromParent();
303         NumRemovedInPreEmit++;
304       }
305       return Changed;
306     }
307   };
308 }
309 
310 INITIALIZE_PASS(PPCPreEmitPeephole, DEBUG_TYPE, "PowerPC Pre-Emit Peephole",
311                 false, false)
312 char PPCPreEmitPeephole::ID = 0;
313 
314 FunctionPass *llvm::createPPCPreEmitPeepholePass() {
315   return new PPCPreEmitPeephole();
316 }
317