1 //===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass generates machine instructions for the CTR loops related pseudos:
10 // 1: MTCTRloop/DecreaseCTRloop
11 // 2: MTCTR8loop/DecreaseCTR8loop
12 //
13 // If a CTR loop can be generated:
14 // 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"
15 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and
16 //    its user branch instruction can be deleted.
17 //
18 // If a CTR loop can not be generated due to clobber of CTR:
19 // 1: MTCTRloop/MTCTR8loop can be deleted.
20 // 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and
21 //    a "cmplwi/cmpldi".
22 //
23 // This pass runs just before register allocation, because we don't want
24 // register allocator to allocate register for DecreaseCTRloop if a CTR can be
25 // generated or if a CTR loop can not be generated, we don't have any condition
26 // register for the new added "cmplwi/cmpldi".
27 //
28 //===----------------------------------------------------------------------===//
29 
30 #include "PPC.h"
31 #include "PPCInstrInfo.h"
32 #include "PPCSubtarget.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/MachineBasicBlock.h"
35 #include "llvm/CodeGen/MachineFunction.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstr.h"
38 #include "llvm/CodeGen/MachineLoopInfo.h"
39 #include "llvm/CodeGen/MachineOperand.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/Register.h"
42 #include "llvm/InitializePasses.h"
43 #include "llvm/Pass.h"
44 #include "llvm/PassRegistry.h"
45 #include "llvm/Support/CodeGen.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/ErrorHandling.h"
48 #include <cassert>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "ppc-ctrloops"
53 
54 STATISTIC(NumCTRLoops, "Number of CTR loops generated");
55 STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");
56 
57 namespace {
58 class PPCCTRLoops : public MachineFunctionPass {
59 public:
60   static char ID;
61 
62   PPCCTRLoops() : MachineFunctionPass(ID) {
63     initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
64   }
65 
66   void getAnalysisUsage(AnalysisUsage &AU) const override {
67     AU.addRequired<MachineLoopInfo>();
68     MachineFunctionPass::getAnalysisUsage(AU);
69   }
70 
71   bool runOnMachineFunction(MachineFunction &MF) override;
72 
73 private:
74   const PPCInstrInfo *TII = nullptr;
75   MachineRegisterInfo *MRI = nullptr;
76 
77   bool processLoop(MachineLoop *ML);
78   bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
79   void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
80                          MachineInstr *Dec);
81   void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
82 };
83 } // namespace
84 
85 char PPCCTRLoops::ID = 0;
86 
87 INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
88                       false, false)
89 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
90 INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
91                     false, false)
92 
93 FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }
94 
95 bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
96   bool Changed = false;
97 
98   auto &MLI = getAnalysis<MachineLoopInfo>();
99   TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
100   MRI = &MF.getRegInfo();
101 
102   for (auto *ML : MLI) {
103     if (ML->isOutermost())
104       Changed |= processLoop(ML);
105   }
106 
107 #ifndef NDEBUG
108   for (const MachineBasicBlock &BB : MF) {
109     for (const MachineInstr &I : BB)
110       assert((I.getOpcode() != PPC::DecreaseCTRloop &&
111               I.getOpcode() != PPC::DecreaseCTR8loop) &&
112              "CTR loop pseudo is not expanded!");
113   }
114 #endif
115 
116   return Changed;
117 }
118 
119 bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
120   if (!CheckReads) {
121     // If we are only checking for defs, that is we are going to find
122     // definitions before MTCTRloop, for this case:
123     // CTR defination inside the callee of a call instruction will not impact
124     // the defination of MTCTRloop, so we can use definesRegister() for the
125     // check, no need to check the regmask.
126     return MI->definesRegister(PPC::CTR) || MI->definesRegister(PPC::CTR8);
127   }
128 
129   if (MI->modifiesRegister(PPC::CTR) || MI->modifiesRegister(PPC::CTR8))
130     return true;
131 
132   if (MI->getDesc().isCall())
133     return true;
134 
135   // We define the CTR in the loop preheader, so if there is any CTR reader in
136   // the loop, we also can not use CTR loop form.
137   if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
138     return true;
139 
140   return false;
141 }
142 
143 bool PPCCTRLoops::processLoop(MachineLoop *ML) {
144   bool Changed = false;
145 
146   // Align with HardwareLoop pass, process inner loops first.
147   for (MachineLoop *I : *ML)
148     Changed |= processLoop(I);
149 
150   // If any inner loop is changed, outter loop must be without hardware loop
151   // intrinsics.
152   if (Changed)
153     return true;
154 
155   auto IsLoopStart = [](MachineInstr &MI) {
156     return MI.getOpcode() == PPC::MTCTRloop ||
157            MI.getOpcode() == PPC::MTCTR8loop;
158   };
159 
160   auto SearchForStart =
161       [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
162     for (auto &MI : *MBB) {
163       if (IsLoopStart(MI))
164         return &MI;
165     }
166     return nullptr;
167   };
168 
169   MachineInstr *Start = nullptr;
170   MachineInstr *Dec = nullptr;
171   bool InvalidCTRLoop = false;
172 
173   MachineBasicBlock *Preheader = ML->getLoopPreheader();
174   // If there is no preheader for this loop, there must be no MTCTRloop
175   // either.
176   if (!Preheader)
177     return false;
178 
179   Start = SearchForStart(Preheader);
180   // This is not a CTR loop candidate.
181   if (!Start)
182     return false;
183 
184   // If CTR is live to the preheader, we can not redefine the CTR register.
185   if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
186     InvalidCTRLoop = true;
187 
188   // Make sure there is also no CTR clobber in the block preheader between the
189   // begin and MTCTR.
190   for (MachineBasicBlock::reverse_instr_iterator I =
191            std::next(Start->getReverseIterator());
192        I != Preheader->instr_rend(); ++I)
193     // Only check the definitions of CTR. If there is non-dead definition for
194     // the CTR, we conservatively don't generate a CTR loop.
195     if (isCTRClobber(&*I, /* CheckReads */ false)) {
196       InvalidCTRLoop = true;
197       break;
198     }
199 
200   // Make sure there is also no CTR clobber/user in the block preheader between
201   // MTCTR and the end.
202   for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
203        I != Preheader->instr_end(); ++I)
204     if (isCTRClobber(&*I, /* CheckReads */ true)) {
205       InvalidCTRLoop = true;
206       break;
207     }
208 
209   // Find the CTR loop components and decide whether or not to fall back to a
210   // normal loop.
211   for (auto *MBB : reverse(ML->getBlocks())) {
212     for (auto &MI : *MBB) {
213       if (MI.getOpcode() == PPC::DecreaseCTRloop ||
214           MI.getOpcode() == PPC::DecreaseCTR8loop)
215         Dec = &MI;
216       else if (!InvalidCTRLoop)
217         // If any instruction clobber CTR, then we can not generate a CTR loop.
218         InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
219     }
220     if (Dec && InvalidCTRLoop)
221       break;
222   }
223 
224   assert(Dec && "CTR loop is not complete!");
225 
226   if (InvalidCTRLoop) {
227     expandNormalLoops(ML, Start, Dec);
228     ++NumNormalLoops;
229   }
230   else {
231     expandCTRLoops(ML, Start, Dec);
232     ++NumCTRLoops;
233   }
234   return true;
235 }
236 
237 void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
238                                     MachineInstr *Dec) {
239   bool Is64Bit =
240       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
241 
242   MachineBasicBlock *Preheader = Start->getParent();
243   MachineBasicBlock *Exiting = Dec->getParent();
244   assert((Preheader && Exiting) &&
245          "Preheader and exiting should exist for CTR loop!");
246 
247   assert(Dec->getOperand(1).getImm() == 1 &&
248          "Loop decrement stride must be 1");
249 
250   unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
251   unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;
252 
253   Register PHIDef =
254       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
255                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
256 
257   Start->getParent()->getParent()->getProperties().reset(
258       MachineFunctionProperties::Property::NoPHIs);
259 
260   // Generate "PHI" in the header block.
261   auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
262                         DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
263   PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);
264 
265   Register ADDIDef =
266       MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
267                                          : &PPC::GPRC_and_GPRC_NOR0RegClass);
268   // Generate "addi -1" in the exiting block.
269   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
270       .addReg(PHIDef)
271       .addImm(-1);
272 
273   // Add other inputs for the PHI node.
274   if (ML->isLoopLatch(Exiting)) {
275     // There must be only two predecessors for the loop header, one is the
276     // Preheader and the other one is loop latch Exiting. In hardware loop
277     // insertion pass, the block containing DecreaseCTRloop must dominate all
278     // loop latches. So there must be only one latch.
279     assert(ML->getHeader()->pred_size() == 2 &&
280            "Loop header predecessor is not right!");
281     PHIMIB.addReg(ADDIDef).addMBB(Exiting);
282   } else {
283     // If the block containing DecreaseCTRloop is not a loop latch, we can use
284     // ADDIDef as the value for all other blocks for the PHI. In hardware loop
285     // insertion pass, the block containing DecreaseCTRloop must dominate all
286     // loop latches.
287     for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
288       if (ML->contains(P)) {
289         assert(ML->isLoopLatch(P) &&
290                "Loop's header in-loop predecessor is not loop latch!");
291         PHIMIB.addReg(ADDIDef).addMBB(P);
292       } else
293         assert(P == Preheader &&
294                "CTR loop should not be generated for irreducible loop!");
295     }
296   }
297 
298   // Generate the compare in the exiting block.
299   Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
300   auto CMPMIB =
301       BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
302           .addReg(ADDIDef)
303           .addImm(0);
304 
305   BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
306           Dec->getOperand(0).getReg())
307       .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);
308 
309   // Remove the pseudo instructions.
310   Start->eraseFromParent();
311   Dec->eraseFromParent();
312 }
313 
314 void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
315                                  MachineInstr *Dec) {
316   bool Is64Bit =
317       Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();
318 
319   MachineBasicBlock *Preheader = Start->getParent();
320   MachineBasicBlock *Exiting = Dec->getParent();
321 
322   (void)Preheader;
323   assert((Preheader && Exiting) &&
324          "Preheader and exiting should exist for CTR loop!");
325 
326   assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");
327 
328   unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
329   unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
330   auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
331   assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
332          "There should be only one user for loop decrement pseudo!");
333 
334   unsigned Opcode = 0;
335   switch (BrInstr->getOpcode()) {
336   case PPC::BC:
337     Opcode = BDNZOpcode;
338     (void) ML;
339     assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
340            "Invalid ctr loop!");
341     break;
342   case PPC::BCn:
343     Opcode = BDZOpcode;
344     assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
345            "Invalid ctr loop!");
346     break;
347   default:
348     llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
349   }
350 
351   // Generate "bdnz/bdz" in the exiting block just before the terminator.
352   BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
353       .addMBB(BrInstr->getOperand(1).getMBB());
354 
355   // Remove the pseudo instructions.
356   BrInstr->eraseFromParent();
357   Dec->eraseFromParent();
358 }
359