1 //===---- PPCReduceCRLogicals.cpp - Reduce CR Bit Logical operations ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===---------------------------------------------------------------------===//
8 //
9 // This pass aims to reduce the number of logical operations on bits in the CR
10 // register. These instructions have a fairly high latency and only a single
11 // pipeline at their disposal in modern PPC cores. Furthermore, they have a
12 // tendency to occur in fairly small blocks where there's little opportunity
13 // to hide the latency between the CR logical operation and its user.
14 //
15 //===---------------------------------------------------------------------===//
16 
17 #include "PPC.h"
18 #include "PPCInstrInfo.h"
19 #include "PPCTargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFunctionPass.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/Config/llvm-config.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Support/Debug.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "ppc-reduce-cr-ops"
33 
34 STATISTIC(NumContainedSingleUseBinOps,
35           "Number of single-use binary CR logical ops contained in a block");
36 STATISTIC(NumToSplitBlocks,
37           "Number of binary CR logical ops that can be used to split blocks");
38 STATISTIC(TotalCRLogicals, "Number of CR logical ops.");
39 STATISTIC(TotalNullaryCRLogicals,
40           "Number of nullary CR logical ops (CRSET/CRUNSET).");
41 STATISTIC(TotalUnaryCRLogicals, "Number of unary CR logical ops.");
42 STATISTIC(TotalBinaryCRLogicals, "Number of CR logical ops.");
43 STATISTIC(NumBlocksSplitOnBinaryCROp,
44           "Number of blocks split on CR binary logical ops.");
45 STATISTIC(NumNotSplitIdenticalOperands,
46           "Number of blocks not split due to operands being identical.");
47 STATISTIC(NumNotSplitChainCopies,
48           "Number of blocks not split due to operands being chained copies.");
49 STATISTIC(NumNotSplitWrongOpcode,
50           "Number of blocks not split due to the wrong opcode.");
51 
52 /// Given a basic block \p Successor that potentially contains PHIs, this
53 /// function will look for any incoming values in the PHIs that are supposed to
54 /// be coming from \p OrigMBB but whose definition is actually in \p NewMBB.
55 /// Any such PHIs will be updated to reflect reality.
updatePHIs(MachineBasicBlock * Successor,MachineBasicBlock * OrigMBB,MachineBasicBlock * NewMBB,MachineRegisterInfo * MRI)56 static void updatePHIs(MachineBasicBlock *Successor, MachineBasicBlock *OrigMBB,
57                        MachineBasicBlock *NewMBB, MachineRegisterInfo *MRI) {
58   for (auto &MI : Successor->instrs()) {
59     if (!MI.isPHI())
60       continue;
61     // This is a really ugly-looking loop, but it was pillaged directly from
62     // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
63     for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
64       MachineOperand &MO = MI.getOperand(i);
65       if (MO.getMBB() == OrigMBB) {
66         // Check if the instruction is actually defined in NewMBB.
67         if (MI.getOperand(i - 1).isReg()) {
68           MachineInstr *DefMI = MRI->getVRegDef(MI.getOperand(i - 1).getReg());
69           if (DefMI->getParent() == NewMBB ||
70               !OrigMBB->isSuccessor(Successor)) {
71             MO.setMBB(NewMBB);
72             break;
73           }
74         }
75       }
76     }
77   }
78 }
79 
80 /// Given a basic block \p Successor that potentially contains PHIs, this
81 /// function will look for PHIs that have an incoming value from \p OrigMBB
82 /// and will add the same incoming value from \p NewMBB.
83 /// NOTE: This should only be used if \p NewMBB is an immediate dominator of
84 /// \p OrigMBB.
addIncomingValuesToPHIs(MachineBasicBlock * Successor,MachineBasicBlock * OrigMBB,MachineBasicBlock * NewMBB,MachineRegisterInfo * MRI)85 static void addIncomingValuesToPHIs(MachineBasicBlock *Successor,
86                                     MachineBasicBlock *OrigMBB,
87                                     MachineBasicBlock *NewMBB,
88                                     MachineRegisterInfo *MRI) {
89   assert(OrigMBB->isSuccessor(NewMBB) &&
90          "NewMBB must be a successor of OrigMBB");
91   for (auto &MI : Successor->instrs()) {
92     if (!MI.isPHI())
93       continue;
94     // This is a really ugly-looking loop, but it was pillaged directly from
95     // MachineBasicBlock::transferSuccessorsAndUpdatePHIs().
96     for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
97       MachineOperand &MO = MI.getOperand(i);
98       if (MO.getMBB() == OrigMBB) {
99         MachineInstrBuilder MIB(*MI.getParent()->getParent(), &MI);
100         MIB.addReg(MI.getOperand(i - 1).getReg()).addMBB(NewMBB);
101         break;
102       }
103     }
104   }
105 }
106 
107 struct BlockSplitInfo {
108   MachineInstr *OrigBranch;
109   MachineInstr *SplitBefore;
110   MachineInstr *SplitCond;
111   bool InvertNewBranch;
112   bool InvertOrigBranch;
113   bool BranchToFallThrough;
114   const MachineBranchProbabilityInfo *MBPI;
115   MachineInstr *MIToDelete;
116   MachineInstr *NewCond;
allInstrsInSameMBBBlockSplitInfo117   bool allInstrsInSameMBB() {
118     if (!OrigBranch || !SplitBefore || !SplitCond)
119       return false;
120     MachineBasicBlock *MBB = OrigBranch->getParent();
121     if (SplitBefore->getParent() != MBB || SplitCond->getParent() != MBB)
122       return false;
123     if (MIToDelete && MIToDelete->getParent() != MBB)
124       return false;
125     if (NewCond && NewCond->getParent() != MBB)
126       return false;
127     return true;
128   }
129 };
130 
131 /// Splits a MachineBasicBlock to branch before \p SplitBefore. The original
132 /// branch is \p OrigBranch. The target of the new branch can either be the same
133 /// as the target of the original branch or the fallthrough successor of the
134 /// original block as determined by \p BranchToFallThrough. The branch
135 /// conditions will be inverted according to \p InvertNewBranch and
136 /// \p InvertOrigBranch. If an instruction that previously fed the branch is to
137 /// be deleted, it is provided in \p MIToDelete and \p NewCond will be used as
138 /// the branch condition. The branch probabilities will be set if the
139 /// MachineBranchProbabilityInfo isn't null.
splitMBB(BlockSplitInfo & BSI)140 static bool splitMBB(BlockSplitInfo &BSI) {
141   assert(BSI.allInstrsInSameMBB() &&
142          "All instructions must be in the same block.");
143 
144   MachineBasicBlock *ThisMBB = BSI.OrigBranch->getParent();
145   MachineFunction *MF = ThisMBB->getParent();
146   MachineRegisterInfo *MRI = &MF->getRegInfo();
147   assert(MRI->isSSA() && "Can only do this while the function is in SSA form.");
148   if (ThisMBB->succ_size() != 2) {
149     LLVM_DEBUG(
150         dbgs() << "Don't know how to handle blocks that don't have exactly"
151                << " two successors.\n");
152     return false;
153   }
154 
155   const PPCInstrInfo *TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
156   unsigned OrigBROpcode = BSI.OrigBranch->getOpcode();
157   unsigned InvertedOpcode =
158       OrigBROpcode == PPC::BC
159           ? PPC::BCn
160           : OrigBROpcode == PPC::BCn
161                 ? PPC::BC
162                 : OrigBROpcode == PPC::BCLR ? PPC::BCLRn : PPC::BCLR;
163   unsigned NewBROpcode = BSI.InvertNewBranch ? InvertedOpcode : OrigBROpcode;
164   MachineBasicBlock *OrigTarget = BSI.OrigBranch->getOperand(1).getMBB();
165   MachineBasicBlock *OrigFallThrough = OrigTarget == *ThisMBB->succ_begin()
166                                            ? *ThisMBB->succ_rbegin()
167                                            : *ThisMBB->succ_begin();
168   MachineBasicBlock *NewBRTarget =
169       BSI.BranchToFallThrough ? OrigFallThrough : OrigTarget;
170 
171   // It's impossible to know the precise branch probability after the split.
172   // But it still needs to be reasonable, the whole probability to original
173   // targets should not be changed.
174   // After split NewBRTarget will get two incoming edges. Assume P0 is the
175   // original branch probability to NewBRTarget, P1 and P2 are new branch
176   // probabilies to NewBRTarget after split. If the two edge frequencies are
177   // same, then
178   //      F * P1 = F * P0 / 2            ==>  P1 = P0 / 2
179   //      F * (1 - P1) * P2 = F * P1     ==>  P2 = P1 / (1 - P1)
180   BranchProbability ProbToNewTarget, ProbFallThrough;     // Prob for new Br.
181   BranchProbability ProbOrigTarget, ProbOrigFallThrough;  // Prob for orig Br.
182   ProbToNewTarget = ProbFallThrough = BranchProbability::getUnknown();
183   ProbOrigTarget = ProbOrigFallThrough = BranchProbability::getUnknown();
184   if (BSI.MBPI) {
185     if (BSI.BranchToFallThrough) {
186       ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigFallThrough) / 2;
187       ProbFallThrough = ProbToNewTarget.getCompl();
188       ProbOrigFallThrough = ProbToNewTarget / ProbToNewTarget.getCompl();
189       ProbOrigTarget = ProbOrigFallThrough.getCompl();
190     } else {
191       ProbToNewTarget = BSI.MBPI->getEdgeProbability(ThisMBB, OrigTarget) / 2;
192       ProbFallThrough = ProbToNewTarget.getCompl();
193       ProbOrigTarget = ProbToNewTarget / ProbToNewTarget.getCompl();
194       ProbOrigFallThrough = ProbOrigTarget.getCompl();
195     }
196   }
197 
198   // Create a new basic block.
199   MachineBasicBlock::iterator InsertPoint = BSI.SplitBefore;
200   const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
201   MachineFunction::iterator It = ThisMBB->getIterator();
202   MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(LLVM_BB);
203   MF->insert(++It, NewMBB);
204 
205   // Move everything after SplitBefore into the new block.
206   NewMBB->splice(NewMBB->end(), ThisMBB, InsertPoint, ThisMBB->end());
207   NewMBB->transferSuccessors(ThisMBB);
208   if (!ProbOrigTarget.isUnknown()) {
209     auto MBBI = find(NewMBB->successors(), OrigTarget);
210     NewMBB->setSuccProbability(MBBI, ProbOrigTarget);
211     MBBI = find(NewMBB->successors(), OrigFallThrough);
212     NewMBB->setSuccProbability(MBBI, ProbOrigFallThrough);
213   }
214 
215   // Add the two successors to ThisMBB.
216   ThisMBB->addSuccessor(NewBRTarget, ProbToNewTarget);
217   ThisMBB->addSuccessor(NewMBB, ProbFallThrough);
218 
219   // Add the branches to ThisMBB.
220   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
221           TII->get(NewBROpcode))
222       .addReg(BSI.SplitCond->getOperand(0).getReg())
223       .addMBB(NewBRTarget);
224   BuildMI(*ThisMBB, ThisMBB->end(), BSI.SplitBefore->getDebugLoc(),
225           TII->get(PPC::B))
226       .addMBB(NewMBB);
227   if (BSI.MIToDelete)
228     BSI.MIToDelete->eraseFromParent();
229 
230   // Change the condition on the original branch and invert it if requested.
231   auto FirstTerminator = NewMBB->getFirstTerminator();
232   if (BSI.NewCond) {
233     assert(FirstTerminator->getOperand(0).isReg() &&
234            "Can't update condition of unconditional branch.");
235     FirstTerminator->getOperand(0).setReg(BSI.NewCond->getOperand(0).getReg());
236   }
237   if (BSI.InvertOrigBranch)
238     FirstTerminator->setDesc(TII->get(InvertedOpcode));
239 
240   // If any of the PHIs in the successors of NewMBB reference values that
241   // now come from NewMBB, they need to be updated.
242   for (auto *Succ : NewMBB->successors()) {
243     updatePHIs(Succ, ThisMBB, NewMBB, MRI);
244   }
245   addIncomingValuesToPHIs(NewBRTarget, ThisMBB, NewMBB, MRI);
246 
247   LLVM_DEBUG(dbgs() << "After splitting, ThisMBB:\n"; ThisMBB->dump());
248   LLVM_DEBUG(dbgs() << "NewMBB:\n"; NewMBB->dump());
249   LLVM_DEBUG(dbgs() << "New branch-to block:\n"; NewBRTarget->dump());
250   return true;
251 }
252 
isBinary(MachineInstr & MI)253 static bool isBinary(MachineInstr &MI) {
254   return MI.getNumOperands() == 3;
255 }
256 
isNullary(MachineInstr & MI)257 static bool isNullary(MachineInstr &MI) {
258   return MI.getNumOperands() == 1;
259 }
260 
261 /// Given a CR logical operation \p CROp, branch opcode \p BROp as well as
262 /// a flag to indicate if the first operand of \p CROp is used as the
263 /// SplitBefore operand, determines whether either of the branches are to be
264 /// inverted as well as whether the new target should be the original
265 /// fall-through block.
266 static void
computeBranchTargetAndInversion(unsigned CROp,unsigned BROp,bool UsingDef1,bool & InvertNewBranch,bool & InvertOrigBranch,bool & TargetIsFallThrough)267 computeBranchTargetAndInversion(unsigned CROp, unsigned BROp, bool UsingDef1,
268                                 bool &InvertNewBranch, bool &InvertOrigBranch,
269                                 bool &TargetIsFallThrough) {
270   // The conditions under which each of the output operands should be [un]set
271   // can certainly be written much more concisely with just 3 if statements or
272   // ternary expressions. However, this provides a much clearer overview to the
273   // reader as to what is set for each <CROp, BROp, OpUsed> combination.
274   if (BROp == PPC::BC || BROp == PPC::BCLR) {
275     // Regular branches.
276     switch (CROp) {
277     default:
278       llvm_unreachable("Don't know how to handle this CR logical.");
279     case PPC::CROR:
280       InvertNewBranch = false;
281       InvertOrigBranch = false;
282       TargetIsFallThrough = false;
283       return;
284     case PPC::CRAND:
285       InvertNewBranch = true;
286       InvertOrigBranch = false;
287       TargetIsFallThrough = true;
288       return;
289     case PPC::CRNAND:
290       InvertNewBranch = true;
291       InvertOrigBranch = true;
292       TargetIsFallThrough = false;
293       return;
294     case PPC::CRNOR:
295       InvertNewBranch = false;
296       InvertOrigBranch = true;
297       TargetIsFallThrough = true;
298       return;
299     case PPC::CRORC:
300       InvertNewBranch = UsingDef1;
301       InvertOrigBranch = !UsingDef1;
302       TargetIsFallThrough = false;
303       return;
304     case PPC::CRANDC:
305       InvertNewBranch = !UsingDef1;
306       InvertOrigBranch = !UsingDef1;
307       TargetIsFallThrough = true;
308       return;
309     }
310   } else if (BROp == PPC::BCn || BROp == PPC::BCLRn) {
311     // Negated branches.
312     switch (CROp) {
313     default:
314       llvm_unreachable("Don't know how to handle this CR logical.");
315     case PPC::CROR:
316       InvertNewBranch = true;
317       InvertOrigBranch = false;
318       TargetIsFallThrough = true;
319       return;
320     case PPC::CRAND:
321       InvertNewBranch = false;
322       InvertOrigBranch = false;
323       TargetIsFallThrough = false;
324       return;
325     case PPC::CRNAND:
326       InvertNewBranch = false;
327       InvertOrigBranch = true;
328       TargetIsFallThrough = true;
329       return;
330     case PPC::CRNOR:
331       InvertNewBranch = true;
332       InvertOrigBranch = true;
333       TargetIsFallThrough = false;
334       return;
335     case PPC::CRORC:
336       InvertNewBranch = !UsingDef1;
337       InvertOrigBranch = !UsingDef1;
338       TargetIsFallThrough = true;
339       return;
340     case PPC::CRANDC:
341       InvertNewBranch = UsingDef1;
342       InvertOrigBranch = !UsingDef1;
343       TargetIsFallThrough = false;
344       return;
345     }
346   } else
347     llvm_unreachable("Don't know how to handle this branch.");
348 }
349 
350 namespace {
351 
352 class PPCReduceCRLogicals : public MachineFunctionPass {
353 
354 public:
355   static char ID;
356   struct CRLogicalOpInfo {
357     MachineInstr *MI;
358     // FIXME: If chains of copies are to be handled, this should be a vector.
359     std::pair<MachineInstr*, MachineInstr*> CopyDefs;
360     std::pair<MachineInstr*, MachineInstr*> TrueDefs;
361     unsigned IsBinary : 1;
362     unsigned IsNullary : 1;
363     unsigned ContainedInBlock : 1;
364     unsigned FeedsISEL : 1;
365     unsigned FeedsBR : 1;
366     unsigned FeedsLogical : 1;
367     unsigned SingleUse : 1;
368     unsigned DefsSingleUse : 1;
369     unsigned SubregDef1;
370     unsigned SubregDef2;
CRLogicalOpInfo__anon12a9d3c00111::PPCReduceCRLogicals::CRLogicalOpInfo371     CRLogicalOpInfo() : MI(nullptr), IsBinary(0), IsNullary(0),
372                         ContainedInBlock(0), FeedsISEL(0), FeedsBR(0),
373                         FeedsLogical(0), SingleUse(0), DefsSingleUse(1),
374                         SubregDef1(0), SubregDef2(0) { }
375     void dump();
376   };
377 
378 private:
379   const PPCInstrInfo *TII = nullptr;
380   MachineFunction *MF = nullptr;
381   MachineRegisterInfo *MRI = nullptr;
382   const MachineBranchProbabilityInfo *MBPI = nullptr;
383 
384   // A vector to contain all the CR logical operations
385   SmallVector<CRLogicalOpInfo, 16> AllCRLogicalOps;
386   void initialize(MachineFunction &MFParm);
387   void collectCRLogicals();
388   bool handleCROp(unsigned Idx);
389   bool splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI);
isCRLogical(MachineInstr & MI)390   static bool isCRLogical(MachineInstr &MI) {
391     unsigned Opc = MI.getOpcode();
392     return Opc == PPC::CRAND || Opc == PPC::CRNAND || Opc == PPC::CROR ||
393            Opc == PPC::CRXOR || Opc == PPC::CRNOR || Opc == PPC::CRNOT ||
394            Opc == PPC::CREQV || Opc == PPC::CRANDC || Opc == PPC::CRORC ||
395            Opc == PPC::CRSET || Opc == PPC::CRUNSET || Opc == PPC::CR6SET ||
396            Opc == PPC::CR6UNSET;
397   }
simplifyCode()398   bool simplifyCode() {
399     bool Changed = false;
400     // Not using a range-based for loop here as the vector may grow while being
401     // operated on.
402     for (unsigned i = 0; i < AllCRLogicalOps.size(); i++)
403       Changed |= handleCROp(i);
404     return Changed;
405   }
406 
407 public:
PPCReduceCRLogicals()408   PPCReduceCRLogicals() : MachineFunctionPass(ID) {
409     initializePPCReduceCRLogicalsPass(*PassRegistry::getPassRegistry());
410   }
411 
412   MachineInstr *lookThroughCRCopy(unsigned Reg, unsigned &Subreg,
413                                   MachineInstr *&CpDef);
runOnMachineFunction(MachineFunction & MF)414   bool runOnMachineFunction(MachineFunction &MF) override {
415     if (skipFunction(MF.getFunction()))
416       return false;
417 
418     // If the subtarget doesn't use CR bits, there's nothing to do.
419     const PPCSubtarget &STI = MF.getSubtarget<PPCSubtarget>();
420     if (!STI.useCRBits())
421       return false;
422 
423     initialize(MF);
424     collectCRLogicals();
425     return simplifyCode();
426   }
427   CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI);
getAnalysisUsage(AnalysisUsage & AU) const428   void getAnalysisUsage(AnalysisUsage &AU) const override {
429     AU.addRequired<MachineBranchProbabilityInfo>();
430     AU.addRequired<MachineDominatorTree>();
431     MachineFunctionPass::getAnalysisUsage(AU);
432   }
433 };
434 
435 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dump()436 LLVM_DUMP_METHOD void PPCReduceCRLogicals::CRLogicalOpInfo::dump() {
437   dbgs() << "CRLogicalOpMI: ";
438   MI->dump();
439   dbgs() << "IsBinary: " << IsBinary << ", FeedsISEL: " << FeedsISEL;
440   dbgs() << ", FeedsBR: " << FeedsBR << ", FeedsLogical: ";
441   dbgs() << FeedsLogical << ", SingleUse: " << SingleUse;
442   dbgs() << ", DefsSingleUse: " << DefsSingleUse;
443   dbgs() << ", SubregDef1: " << SubregDef1 << ", SubregDef2: ";
444   dbgs() << SubregDef2 << ", ContainedInBlock: " << ContainedInBlock;
445   if (!IsNullary) {
446     dbgs() << "\nDefs:\n";
447     TrueDefs.first->dump();
448   }
449   if (IsBinary)
450     TrueDefs.second->dump();
451   dbgs() << "\n";
452   if (CopyDefs.first) {
453     dbgs() << "CopyDef1: ";
454     CopyDefs.first->dump();
455   }
456   if (CopyDefs.second) {
457     dbgs() << "CopyDef2: ";
458     CopyDefs.second->dump();
459   }
460 }
461 #endif
462 
463 PPCReduceCRLogicals::CRLogicalOpInfo
createCRLogicalOpInfo(MachineInstr & MIParam)464 PPCReduceCRLogicals::createCRLogicalOpInfo(MachineInstr &MIParam) {
465   CRLogicalOpInfo Ret;
466   Ret.MI = &MIParam;
467   // Get the defs
468   if (isNullary(MIParam)) {
469     Ret.IsNullary = 1;
470     Ret.TrueDefs = std::make_pair(nullptr, nullptr);
471     Ret.CopyDefs = std::make_pair(nullptr, nullptr);
472   } else {
473     MachineInstr *Def1 = lookThroughCRCopy(MIParam.getOperand(1).getReg(),
474                                            Ret.SubregDef1, Ret.CopyDefs.first);
475     assert(Def1 && "Must be able to find a definition of operand 1.");
476     Ret.DefsSingleUse &=
477       MRI->hasOneNonDBGUse(Def1->getOperand(0).getReg());
478     Ret.DefsSingleUse &=
479       MRI->hasOneNonDBGUse(Ret.CopyDefs.first->getOperand(0).getReg());
480     if (isBinary(MIParam)) {
481       Ret.IsBinary = 1;
482       MachineInstr *Def2 = lookThroughCRCopy(MIParam.getOperand(2).getReg(),
483                                              Ret.SubregDef2,
484                                              Ret.CopyDefs.second);
485       assert(Def2 && "Must be able to find a definition of operand 2.");
486       Ret.DefsSingleUse &=
487         MRI->hasOneNonDBGUse(Def2->getOperand(0).getReg());
488       Ret.DefsSingleUse &=
489         MRI->hasOneNonDBGUse(Ret.CopyDefs.second->getOperand(0).getReg());
490       Ret.TrueDefs = std::make_pair(Def1, Def2);
491     } else {
492       Ret.TrueDefs = std::make_pair(Def1, nullptr);
493       Ret.CopyDefs.second = nullptr;
494     }
495   }
496 
497   Ret.ContainedInBlock = 1;
498   // Get the uses
499   for (MachineInstr &UseMI :
500        MRI->use_nodbg_instructions(MIParam.getOperand(0).getReg())) {
501     unsigned Opc = UseMI.getOpcode();
502     if (Opc == PPC::ISEL || Opc == PPC::ISEL8)
503       Ret.FeedsISEL = 1;
504     if (Opc == PPC::BC || Opc == PPC::BCn || Opc == PPC::BCLR ||
505         Opc == PPC::BCLRn)
506       Ret.FeedsBR = 1;
507     Ret.FeedsLogical = isCRLogical(UseMI);
508     if (UseMI.getParent() != MIParam.getParent())
509       Ret.ContainedInBlock = 0;
510   }
511   Ret.SingleUse = MRI->hasOneNonDBGUse(MIParam.getOperand(0).getReg()) ? 1 : 0;
512 
513   // We now know whether all the uses of the CR logical are in the same block.
514   if (!Ret.IsNullary) {
515     Ret.ContainedInBlock &=
516       (MIParam.getParent() == Ret.TrueDefs.first->getParent());
517     if (Ret.IsBinary)
518       Ret.ContainedInBlock &=
519         (MIParam.getParent() == Ret.TrueDefs.second->getParent());
520   }
521   LLVM_DEBUG(Ret.dump());
522   if (Ret.IsBinary && Ret.ContainedInBlock && Ret.SingleUse) {
523     NumContainedSingleUseBinOps++;
524     if (Ret.FeedsBR && Ret.DefsSingleUse)
525       NumToSplitBlocks++;
526   }
527   return Ret;
528 }
529 
530 /// Looks through a COPY instruction to the actual definition of the CR-bit
531 /// register and returns the instruction that defines it.
532 /// FIXME: This currently handles what is by-far the most common case:
533 /// an instruction that defines a CR field followed by a single copy of a bit
534 /// from that field into a virtual register. If chains of copies need to be
535 /// handled, this should have a loop until a non-copy instruction is found.
lookThroughCRCopy(unsigned Reg,unsigned & Subreg,MachineInstr * & CpDef)536 MachineInstr *PPCReduceCRLogicals::lookThroughCRCopy(unsigned Reg,
537                                                      unsigned &Subreg,
538                                                      MachineInstr *&CpDef) {
539   Subreg = -1;
540   if (!Register::isVirtualRegister(Reg))
541     return nullptr;
542   MachineInstr *Copy = MRI->getVRegDef(Reg);
543   CpDef = Copy;
544   if (!Copy->isCopy())
545     return Copy;
546   Register CopySrc = Copy->getOperand(1).getReg();
547   Subreg = Copy->getOperand(1).getSubReg();
548   if (!CopySrc.isVirtual()) {
549     const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
550     // Set the Subreg
551     if (CopySrc == PPC::CR0EQ || CopySrc == PPC::CR6EQ)
552       Subreg = PPC::sub_eq;
553     if (CopySrc == PPC::CR0LT || CopySrc == PPC::CR6LT)
554       Subreg = PPC::sub_lt;
555     if (CopySrc == PPC::CR0GT || CopySrc == PPC::CR6GT)
556       Subreg = PPC::sub_gt;
557     if (CopySrc == PPC::CR0UN || CopySrc == PPC::CR6UN)
558       Subreg = PPC::sub_un;
559     // Loop backwards and return the first MI that modifies the physical CR Reg.
560     MachineBasicBlock::iterator Me = Copy, B = Copy->getParent()->begin();
561     while (Me != B)
562       if ((--Me)->modifiesRegister(CopySrc, TRI))
563         return &*Me;
564     return nullptr;
565   }
566   return MRI->getVRegDef(CopySrc);
567 }
568 
initialize(MachineFunction & MFParam)569 void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
570   MF = &MFParam;
571   MRI = &MF->getRegInfo();
572   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
573   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
574 
575   AllCRLogicalOps.clear();
576 }
577 
578 /// Contains all the implemented transformations on CR logical operations.
579 /// For example, a binary CR logical can be used to split a block on its inputs,
580 /// a unary CR logical might be used to change the condition code on a
581 /// comparison feeding it. A nullary CR logical might simply be removable
582 /// if the user of the bit it [un]sets can be transformed.
handleCROp(unsigned Idx)583 bool PPCReduceCRLogicals::handleCROp(unsigned Idx) {
584   // We can definitely split a block on the inputs to a binary CR operation
585   // whose defs and (single) use are within the same block.
586   bool Changed = false;
587   CRLogicalOpInfo CRI = AllCRLogicalOps[Idx];
588   if (CRI.IsBinary && CRI.ContainedInBlock && CRI.SingleUse && CRI.FeedsBR &&
589       CRI.DefsSingleUse) {
590     Changed = splitBlockOnBinaryCROp(CRI);
591     if (Changed)
592       NumBlocksSplitOnBinaryCROp++;
593   }
594   return Changed;
595 }
596 
597 /// Splits a block that contains a CR-logical operation that feeds a branch
598 /// and whose operands are produced within the block.
599 /// Example:
600 ///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
601 ///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
602 ///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
603 ///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
604 ///    %vr9<def> = CROR %vr6<kill>, %vr8<kill>; CRBITRC:%vr9,%vr6,%vr8
605 ///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
606 /// Becomes:
607 ///    %vr5<def> = CMPDI %vr2, 0; CRRC:%vr5 G8RC:%vr2
608 ///    %vr6<def> = COPY %vr5:sub_eq; CRBITRC:%vr6 CRRC:%vr5
609 ///    BC %vr6<kill>, <BB#2>; CRBITRC:%vr6
610 ///
611 ///    %vr7<def> = CMPDI %vr3, 0; CRRC:%vr7 G8RC:%vr3
612 ///    %vr8<def> = COPY %vr7:sub_eq; CRBITRC:%vr8 CRRC:%vr7
613 ///    BC %vr9<kill>, <BB#2>; CRBITRC:%vr9
splitBlockOnBinaryCROp(CRLogicalOpInfo & CRI)614 bool PPCReduceCRLogicals::splitBlockOnBinaryCROp(CRLogicalOpInfo &CRI) {
615   if (CRI.CopyDefs.first == CRI.CopyDefs.second) {
616     LLVM_DEBUG(dbgs() << "Unable to split as the two operands are the same\n");
617     NumNotSplitIdenticalOperands++;
618     return false;
619   }
620   if (CRI.TrueDefs.first->isCopy() || CRI.TrueDefs.second->isCopy() ||
621       CRI.TrueDefs.first->isPHI() || CRI.TrueDefs.second->isPHI()) {
622     LLVM_DEBUG(
623         dbgs() << "Unable to split because one of the operands is a PHI or "
624                   "chain of copies.\n");
625     NumNotSplitChainCopies++;
626     return false;
627   }
628   // Note: keep in sync with computeBranchTargetAndInversion().
629   if (CRI.MI->getOpcode() != PPC::CROR &&
630       CRI.MI->getOpcode() != PPC::CRAND &&
631       CRI.MI->getOpcode() != PPC::CRNOR &&
632       CRI.MI->getOpcode() != PPC::CRNAND &&
633       CRI.MI->getOpcode() != PPC::CRORC &&
634       CRI.MI->getOpcode() != PPC::CRANDC) {
635     LLVM_DEBUG(dbgs() << "Unable to split blocks on this opcode.\n");
636     NumNotSplitWrongOpcode++;
637     return false;
638   }
639   LLVM_DEBUG(dbgs() << "Splitting the following CR op:\n"; CRI.dump());
640   MachineBasicBlock::iterator Def1It = CRI.TrueDefs.first;
641   MachineBasicBlock::iterator Def2It = CRI.TrueDefs.second;
642 
643   bool UsingDef1 = false;
644   MachineInstr *SplitBefore = &*Def2It;
645   for (auto E = CRI.MI->getParent()->end(); Def2It != E; ++Def2It) {
646     if (Def1It == Def2It) { // Def2 comes before Def1.
647       SplitBefore = &*Def1It;
648       UsingDef1 = true;
649       break;
650     }
651   }
652 
653   LLVM_DEBUG(dbgs() << "We will split the following block:\n";);
654   LLVM_DEBUG(CRI.MI->getParent()->dump());
655   LLVM_DEBUG(dbgs() << "Before instruction:\n"; SplitBefore->dump());
656 
657   // Get the branch instruction.
658   MachineInstr *Branch =
659     MRI->use_nodbg_begin(CRI.MI->getOperand(0).getReg())->getParent();
660 
661   // We want the new block to have no code in it other than the definition
662   // of the input to the CR logical and the CR logical itself. So we move
663   // those to the bottom of the block (just before the branch). Then we
664   // will split before the CR logical.
665   MachineBasicBlock *MBB = SplitBefore->getParent();
666   auto FirstTerminator = MBB->getFirstTerminator();
667   MachineBasicBlock::iterator FirstInstrToMove =
668     UsingDef1 ? CRI.TrueDefs.first : CRI.TrueDefs.second;
669   MachineBasicBlock::iterator SecondInstrToMove =
670     UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second;
671 
672   // The instructions that need to be moved are not guaranteed to be
673   // contiguous. Move them individually.
674   // FIXME: If one of the operands is a chain of (single use) copies, they
675   // can all be moved and we can still split.
676   MBB->splice(FirstTerminator, MBB, FirstInstrToMove);
677   if (FirstInstrToMove != SecondInstrToMove)
678     MBB->splice(FirstTerminator, MBB, SecondInstrToMove);
679   MBB->splice(FirstTerminator, MBB, CRI.MI);
680 
681   unsigned Opc = CRI.MI->getOpcode();
682   bool InvertOrigBranch, InvertNewBranch, TargetIsFallThrough;
683   computeBranchTargetAndInversion(Opc, Branch->getOpcode(), UsingDef1,
684                                   InvertNewBranch, InvertOrigBranch,
685                                   TargetIsFallThrough);
686   MachineInstr *SplitCond =
687     UsingDef1 ? CRI.CopyDefs.second : CRI.CopyDefs.first;
688   LLVM_DEBUG(dbgs() << "We will " << (InvertNewBranch ? "invert" : "copy"));
689   LLVM_DEBUG(dbgs() << " the original branch and the target is the "
690                     << (TargetIsFallThrough ? "fallthrough block\n"
691                                             : "orig. target block\n"));
692   LLVM_DEBUG(dbgs() << "Original branch instruction: "; Branch->dump());
693   BlockSplitInfo BSI { Branch, SplitBefore, SplitCond, InvertNewBranch,
694     InvertOrigBranch, TargetIsFallThrough, MBPI, CRI.MI,
695     UsingDef1 ? CRI.CopyDefs.first : CRI.CopyDefs.second };
696   bool Changed = splitMBB(BSI);
697   // If we've split on a CR logical that is fed by a CR logical,
698   // recompute the source CR logical as it may be usable for splitting.
699   if (Changed) {
700     bool Input1CRlogical =
701       CRI.TrueDefs.first && isCRLogical(*CRI.TrueDefs.first);
702     bool Input2CRlogical =
703       CRI.TrueDefs.second && isCRLogical(*CRI.TrueDefs.second);
704     if (Input1CRlogical)
705       AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.first));
706     if (Input2CRlogical)
707       AllCRLogicalOps.push_back(createCRLogicalOpInfo(*CRI.TrueDefs.second));
708   }
709   return Changed;
710 }
711 
collectCRLogicals()712 void PPCReduceCRLogicals::collectCRLogicals() {
713   for (MachineBasicBlock &MBB : *MF) {
714     for (MachineInstr &MI : MBB) {
715       if (isCRLogical(MI)) {
716         AllCRLogicalOps.push_back(createCRLogicalOpInfo(MI));
717         TotalCRLogicals++;
718         if (AllCRLogicalOps.back().IsNullary)
719           TotalNullaryCRLogicals++;
720         else if (AllCRLogicalOps.back().IsBinary)
721           TotalBinaryCRLogicals++;
722         else
723           TotalUnaryCRLogicals++;
724       }
725     }
726   }
727 }
728 
729 } // end anonymous namespace
730 
731 INITIALIZE_PASS_BEGIN(PPCReduceCRLogicals, DEBUG_TYPE,
732                       "PowerPC Reduce CR logical Operation", false, false)
733 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
734 INITIALIZE_PASS_END(PPCReduceCRLogicals, DEBUG_TYPE,
735                     "PowerPC Reduce CR logical Operation", false, false)
736 
737 char PPCReduceCRLogicals::ID = 0;
738 FunctionPass*
createPPCReduceCRLogicalsPass()739 llvm::createPPCReduceCRLogicalsPass() { return new PPCReduceCRLogicals(); }
740