1 //===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs global common subexpression elimination on machine
10 // instructions using a scoped hash table based value numbering scheme. It
11 // must be run while the machine function is still in SSA form.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/ScopedHashTable.h"
17 #include "llvm/ADT/SmallPtrSet.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/Analysis/AliasAnalysis.h"
22 #include "llvm/Analysis/CFG.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
25 #include "llvm/CodeGen/MachineDominators.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/Passes.h"
32 #include "llvm/CodeGen/TargetInstrInfo.h"
33 #include "llvm/CodeGen/TargetOpcodes.h"
34 #include "llvm/CodeGen/TargetRegisterInfo.h"
35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
36 #include "llvm/InitializePasses.h"
37 #include "llvm/MC/MCRegister.h"
38 #include "llvm/MC/MCRegisterInfo.h"
39 #include "llvm/Pass.h"
40 #include "llvm/Support/Allocator.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/RecyclingAllocator.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include <cassert>
45 #include <iterator>
46 #include <utility>
47 #include <vector>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "machine-cse"
52 
53 STATISTIC(NumCoalesces, "Number of copies coalesced");
54 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
55 STATISTIC(NumPREs,      "Number of partial redundant expression"
56                         " transformed to fully redundant");
57 STATISTIC(NumPhysCSEs,
58           "Number of physreg referencing common subexpr eliminated");
59 STATISTIC(NumCrossBBCSEs,
60           "Number of cross-MBB physreg referencing CS eliminated");
61 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
62 
63 // Threshold to avoid excessive cost to compute isProfitableToCSE.
64 static cl::opt<int>
65     CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
66                     cl::desc("Threshold for the size of CSUses"));
67 
68 namespace {
69 
70   class MachineCSE : public MachineFunctionPass {
71     const TargetInstrInfo *TII = nullptr;
72     const TargetRegisterInfo *TRI = nullptr;
73     AliasAnalysis *AA = nullptr;
74     MachineDominatorTree *DT = nullptr;
75     MachineRegisterInfo *MRI = nullptr;
76     MachineBlockFrequencyInfo *MBFI = nullptr;
77 
78   public:
79     static char ID; // Pass identification
80 
81     MachineCSE() : MachineFunctionPass(ID) {
82       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
83     }
84 
85     bool runOnMachineFunction(MachineFunction &MF) override;
86 
87     void getAnalysisUsage(AnalysisUsage &AU) const override {
88       AU.setPreservesCFG();
89       MachineFunctionPass::getAnalysisUsage(AU);
90       AU.addRequired<AAResultsWrapperPass>();
91       AU.addPreservedID(MachineLoopInfoID);
92       AU.addRequired<MachineDominatorTree>();
93       AU.addPreserved<MachineDominatorTree>();
94       AU.addRequired<MachineBlockFrequencyInfo>();
95       AU.addPreserved<MachineBlockFrequencyInfo>();
96     }
97 
98     MachineFunctionProperties getRequiredProperties() const override {
99       return MachineFunctionProperties()
100         .set(MachineFunctionProperties::Property::IsSSA);
101     }
102 
103     void releaseMemory() override {
104       ScopeMap.clear();
105       PREMap.clear();
106       Exps.clear();
107     }
108 
109   private:
110     using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
111                             ScopedHashTableVal<MachineInstr *, unsigned>>;
112     using ScopedHTType =
113         ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
114                         AllocatorTy>;
115     using ScopeType = ScopedHTType::ScopeTy;
116     using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
117 
118     unsigned LookAheadLimit = 0;
119     DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
120     DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
121         PREMap;
122     ScopedHTType VNT;
123     SmallVector<MachineInstr *, 64> Exps;
124     unsigned CurrVN = 0;
125 
126     bool PerformTrivialCopyPropagation(MachineInstr *MI,
127                                        MachineBasicBlock *MBB);
128     bool isPhysDefTriviallyDead(MCRegister Reg,
129                                 MachineBasicBlock::const_iterator I,
130                                 MachineBasicBlock::const_iterator E) const;
131     bool hasLivePhysRegDefUses(const MachineInstr *MI,
132                                const MachineBasicBlock *MBB,
133                                SmallSet<MCRegister, 8> &PhysRefs,
134                                PhysDefVector &PhysDefs, bool &PhysUseDef) const;
135     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
136                           SmallSet<MCRegister, 8> &PhysRefs,
137                           PhysDefVector &PhysDefs, bool &NonLocal) const;
138     bool isCSECandidate(MachineInstr *MI);
139     bool isProfitableToCSE(Register CSReg, Register Reg,
140                            MachineBasicBlock *CSBB, MachineInstr *MI);
141     void EnterScope(MachineBasicBlock *MBB);
142     void ExitScope(MachineBasicBlock *MBB);
143     bool ProcessBlockCSE(MachineBasicBlock *MBB);
144     void ExitScopeIfDone(MachineDomTreeNode *Node,
145                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
146     bool PerformCSE(MachineDomTreeNode *Node);
147 
148     bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
149     bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
150     bool PerformSimplePRE(MachineDominatorTree *DT);
151     /// Heuristics to see if it's profitable to move common computations of MBB
152     /// and MBB1 to CandidateBB.
153     bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
154                                  MachineBasicBlock *MBB,
155                                  MachineBasicBlock *MBB1);
156   };
157 
158 } // end anonymous namespace
159 
160 char MachineCSE::ID = 0;
161 
162 char &llvm::MachineCSEID = MachineCSE::ID;
163 
164 INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
165                       "Machine Common Subexpression Elimination", false, false)
166 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
167 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
168 INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
169                     "Machine Common Subexpression Elimination", false, false)
170 
171 /// The source register of a COPY machine instruction can be propagated to all
172 /// its users, and this propagation could increase the probability of finding
173 /// common subexpressions. If the COPY has only one user, the COPY itself can
174 /// be removed.
175 bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
176                                                MachineBasicBlock *MBB) {
177   bool Changed = false;
178   for (MachineOperand &MO : MI->all_uses()) {
179     Register Reg = MO.getReg();
180     if (!Reg.isVirtual())
181       continue;
182     bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
183     MachineInstr *DefMI = MRI->getVRegDef(Reg);
184     if (!DefMI->isCopy())
185       continue;
186     Register SrcReg = DefMI->getOperand(1).getReg();
187     if (!SrcReg.isVirtual())
188       continue;
189     if (DefMI->getOperand(0).getSubReg())
190       continue;
191     // FIXME: We should trivially coalesce subregister copies to expose CSE
192     // opportunities on instructions with truncated operands (see
193     // cse-add-with-overflow.ll). This can be done here as follows:
194     // if (SrcSubReg)
195     //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
196     //                                     SrcSubReg);
197     // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
198     //
199     // The 2-addr pass has been updated to handle coalesced subregs. However,
200     // some machine-specific code still can't handle it.
201     // To handle it properly we also need a way find a constrained subregister
202     // class given a super-reg class and subreg index.
203     if (DefMI->getOperand(1).getSubReg())
204       continue;
205     if (!MRI->constrainRegAttrs(SrcReg, Reg))
206       continue;
207     LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
208     LLVM_DEBUG(dbgs() << "***     to: " << *MI);
209 
210     // Propagate SrcReg of copies to MI.
211     MO.setReg(SrcReg);
212     MRI->clearKillFlags(SrcReg);
213     // Coalesce single use copies.
214     if (OnlyOneUse) {
215       // If (and only if) we've eliminated all uses of the copy, also
216       // copy-propagate to any debug-users of MI, or they'll be left using
217       // an undefined value.
218       DefMI->changeDebugValuesDefReg(SrcReg);
219 
220       DefMI->eraseFromParent();
221       ++NumCoalesces;
222     }
223     Changed = true;
224   }
225 
226   return Changed;
227 }
228 
229 bool MachineCSE::isPhysDefTriviallyDead(
230     MCRegister Reg, MachineBasicBlock::const_iterator I,
231     MachineBasicBlock::const_iterator E) const {
232   unsigned LookAheadLeft = LookAheadLimit;
233   while (LookAheadLeft) {
234     // Skip over dbg_value's.
235     I = skipDebugInstructionsForward(I, E);
236 
237     if (I == E)
238       // Reached end of block, we don't know if register is dead or not.
239       return false;
240 
241     bool SeenDef = false;
242     for (const MachineOperand &MO : I->operands()) {
243       if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
244         SeenDef = true;
245       if (!MO.isReg() || !MO.getReg())
246         continue;
247       if (!TRI->regsOverlap(MO.getReg(), Reg))
248         continue;
249       if (MO.isUse())
250         // Found a use!
251         return false;
252       SeenDef = true;
253     }
254     if (SeenDef)
255       // See a def of Reg (or an alias) before encountering any use, it's
256       // trivially dead.
257       return true;
258 
259     --LookAheadLeft;
260     ++I;
261   }
262   return false;
263 }
264 
265 static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
266                                             const MachineOperand &MO,
267                                             const MachineFunction &MF,
268                                             const TargetRegisterInfo &TRI,
269                                             const TargetInstrInfo &TII) {
270   // MachineRegisterInfo::isConstantPhysReg directly called by
271   // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
272   // reserved registers to be frozen. That doesn't cause a problem  post-ISel as
273   // most (if not all) targets freeze reserved registers right after ISel.
274   //
275   // It does cause issues mid-GlobalISel, however, hence the additional
276   // reservedRegsFrozen check.
277   const MachineRegisterInfo &MRI = MF.getRegInfo();
278   return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
279          (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
280 }
281 
282 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
283 /// physical registers (except for dead defs of physical registers). It also
284 /// returns the physical register def by reference if it's the only one and the
285 /// instruction does not uses a physical register.
286 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
287                                        const MachineBasicBlock *MBB,
288                                        SmallSet<MCRegister, 8> &PhysRefs,
289                                        PhysDefVector &PhysDefs,
290                                        bool &PhysUseDef) const {
291   // First, add all uses to PhysRefs.
292   for (const MachineOperand &MO : MI->all_uses()) {
293     Register Reg = MO.getReg();
294     if (!Reg)
295       continue;
296     if (Reg.isVirtual())
297       continue;
298     // Reading either caller preserved or constant physregs is ok.
299     if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
300                                          *TII))
301       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
302         PhysRefs.insert(*AI);
303   }
304 
305   // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
306   // (which currently contains only uses), set the PhysUseDef flag.
307   PhysUseDef = false;
308   MachineBasicBlock::const_iterator I = MI; I = std::next(I);
309   for (const auto &MOP : llvm::enumerate(MI->operands())) {
310     const MachineOperand &MO = MOP.value();
311     if (!MO.isReg() || !MO.isDef())
312       continue;
313     Register Reg = MO.getReg();
314     if (!Reg)
315       continue;
316     if (Reg.isVirtual())
317       continue;
318     // Check against PhysRefs even if the def is "dead".
319     if (PhysRefs.count(Reg.asMCReg()))
320       PhysUseDef = true;
321     // If the def is dead, it's ok. But the def may not marked "dead". That's
322     // common since this pass is run before livevariables. We can scan
323     // forward a few instructions and check if it is obviously dead.
324     if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
325       PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
326   }
327 
328   // Finally, add all defs to PhysRefs as well.
329   for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
330     for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
331          ++AI)
332       PhysRefs.insert(*AI);
333 
334   return !PhysRefs.empty();
335 }
336 
337 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
338                                   SmallSet<MCRegister, 8> &PhysRefs,
339                                   PhysDefVector &PhysDefs,
340                                   bool &NonLocal) const {
341   // For now conservatively returns false if the common subexpression is
342   // not in the same basic block as the given instruction. The only exception
343   // is if the common subexpression is in the sole predecessor block.
344   const MachineBasicBlock *MBB = MI->getParent();
345   const MachineBasicBlock *CSMBB = CSMI->getParent();
346 
347   bool CrossMBB = false;
348   if (CSMBB != MBB) {
349     if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
350       return false;
351 
352     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
353       if (MRI->isAllocatable(PhysDefs[i].second) ||
354           MRI->isReserved(PhysDefs[i].second))
355         // Avoid extending live range of physical registers if they are
356         //allocatable or reserved.
357         return false;
358     }
359     CrossMBB = true;
360   }
361   MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
362   MachineBasicBlock::const_iterator E = MI;
363   MachineBasicBlock::const_iterator EE = CSMBB->end();
364   unsigned LookAheadLeft = LookAheadLimit;
365   while (LookAheadLeft) {
366     // Skip over dbg_value's.
367     while (I != E && I != EE && I->isDebugInstr())
368       ++I;
369 
370     if (I == EE) {
371       assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
372       (void)CrossMBB;
373       CrossMBB = false;
374       NonLocal = true;
375       I = MBB->begin();
376       EE = MBB->end();
377       continue;
378     }
379 
380     if (I == E)
381       return true;
382 
383     for (const MachineOperand &MO : I->operands()) {
384       // RegMasks go on instructions like calls that clobber lots of physregs.
385       // Don't attempt to CSE across such an instruction.
386       if (MO.isRegMask())
387         return false;
388       if (!MO.isReg() || !MO.isDef())
389         continue;
390       Register MOReg = MO.getReg();
391       if (MOReg.isVirtual())
392         continue;
393       if (PhysRefs.count(MOReg.asMCReg()))
394         return false;
395     }
396 
397     --LookAheadLeft;
398     ++I;
399   }
400 
401   return false;
402 }
403 
404 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
405   if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
406       MI->isInlineAsm() || MI->isDebugInstr())
407     return false;
408 
409   // Ignore copies.
410   if (MI->isCopyLike())
411     return false;
412 
413   // Ignore stuff that we obviously can't move.
414   if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
415       MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
416     return false;
417 
418   if (MI->mayLoad()) {
419     // Okay, this instruction does a load. As a refinement, we allow the target
420     // to decide whether the loaded value is actually a constant. If so, we can
421     // actually use it as a load.
422     if (!MI->isDereferenceableInvariantLoad())
423       // FIXME: we should be able to hoist loads with no other side effects if
424       // there are no other instructions which can change memory in this loop.
425       // This is a trivial form of alias analysis.
426       return false;
427   }
428 
429   // Ignore stack guard loads, otherwise the register that holds CSEed value may
430   // be spilled and get loaded back with corrupted data.
431   if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
432     return false;
433 
434   return true;
435 }
436 
437 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
438 /// common expression that defines Reg. CSBB is basic block where CSReg is
439 /// defined.
440 bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
441                                    MachineBasicBlock *CSBB, MachineInstr *MI) {
442   // FIXME: Heuristics that works around the lack the live range splitting.
443 
444   // If CSReg is used at all uses of Reg, CSE should not increase register
445   // pressure of CSReg.
446   bool MayIncreasePressure = true;
447   if (CSReg.isVirtual() && Reg.isVirtual()) {
448     MayIncreasePressure = false;
449     SmallPtrSet<MachineInstr*, 8> CSUses;
450     int NumOfUses = 0;
451     for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
452       CSUses.insert(&MI);
453       // Too costly to compute if NumOfUses is very large. Conservatively assume
454       // MayIncreasePressure to avoid spending too much time here.
455       if (++NumOfUses > CSUsesThreshold) {
456         MayIncreasePressure = true;
457         break;
458       }
459     }
460     if (!MayIncreasePressure)
461       for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
462         if (!CSUses.count(&MI)) {
463           MayIncreasePressure = true;
464           break;
465         }
466       }
467   }
468   if (!MayIncreasePressure) return true;
469 
470   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
471   // an immediate predecessor. We don't want to increase register pressure and
472   // end up causing other computation to be spilled.
473   if (TII->isAsCheapAsAMove(*MI)) {
474     MachineBasicBlock *BB = MI->getParent();
475     if (CSBB != BB && !CSBB->isSuccessor(BB))
476       return false;
477   }
478 
479   // Heuristics #2: If the expression doesn't not use a vr and the only use
480   // of the redundant computation are copies, do not cse.
481   bool HasVRegUse = false;
482   for (const MachineOperand &MO : MI->all_uses()) {
483     if (MO.getReg().isVirtual()) {
484       HasVRegUse = true;
485       break;
486     }
487   }
488   if (!HasVRegUse) {
489     bool HasNonCopyUse = false;
490     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
491       // Ignore copies.
492       if (!MI.isCopyLike()) {
493         HasNonCopyUse = true;
494         break;
495       }
496     }
497     if (!HasNonCopyUse)
498       return false;
499   }
500 
501   // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
502   // it unless the defined value is already used in the BB of the new use.
503   bool HasPHI = false;
504   for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
505     HasPHI |= UseMI.isPHI();
506     if (UseMI.getParent() == MI->getParent())
507       return true;
508   }
509 
510   return !HasPHI;
511 }
512 
513 void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
514   LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
515   ScopeType *Scope = new ScopeType(VNT);
516   ScopeMap[MBB] = Scope;
517 }
518 
519 void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
520   LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
521   DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
522   assert(SI != ScopeMap.end());
523   delete SI->second;
524   ScopeMap.erase(SI);
525 }
526 
527 bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
528   bool Changed = false;
529 
530   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
531   SmallVector<unsigned, 2> ImplicitDefsToUpdate;
532   SmallVector<unsigned, 2> ImplicitDefs;
533   for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
534     if (!isCSECandidate(&MI))
535       continue;
536 
537     bool FoundCSE = VNT.count(&MI);
538     if (!FoundCSE) {
539       // Using trivial copy propagation to find more CSE opportunities.
540       if (PerformTrivialCopyPropagation(&MI, MBB)) {
541         Changed = true;
542 
543         // After coalescing MI itself may become a copy.
544         if (MI.isCopyLike())
545           continue;
546 
547         // Try again to see if CSE is possible.
548         FoundCSE = VNT.count(&MI);
549       }
550     }
551 
552     // Commute commutable instructions.
553     bool Commuted = false;
554     if (!FoundCSE && MI.isCommutable()) {
555       if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
556         Commuted = true;
557         FoundCSE = VNT.count(NewMI);
558         if (NewMI != &MI) {
559           // New instruction. It doesn't need to be kept.
560           NewMI->eraseFromParent();
561           Changed = true;
562         } else if (!FoundCSE)
563           // MI was changed but it didn't help, commute it back!
564           (void)TII->commuteInstruction(MI);
565       }
566     }
567 
568     // If the instruction defines physical registers and the values *may* be
569     // used, then it's not safe to replace it with a common subexpression.
570     // It's also not safe if the instruction uses physical registers.
571     bool CrossMBBPhysDef = false;
572     SmallSet<MCRegister, 8> PhysRefs;
573     PhysDefVector PhysDefs;
574     bool PhysUseDef = false;
575     if (FoundCSE &&
576         hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
577       FoundCSE = false;
578 
579       // ... Unless the CS is local or is in the sole predecessor block
580       // and it also defines the physical register which is not clobbered
581       // in between and the physical register uses were not clobbered.
582       // This can never be the case if the instruction both uses and
583       // defines the same physical register, which was detected above.
584       if (!PhysUseDef) {
585         unsigned CSVN = VNT.lookup(&MI);
586         MachineInstr *CSMI = Exps[CSVN];
587         if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
588           FoundCSE = true;
589       }
590     }
591 
592     if (!FoundCSE) {
593       VNT.insert(&MI, CurrVN++);
594       Exps.push_back(&MI);
595       continue;
596     }
597 
598     // Found a common subexpression, eliminate it.
599     unsigned CSVN = VNT.lookup(&MI);
600     MachineInstr *CSMI = Exps[CSVN];
601     LLVM_DEBUG(dbgs() << "Examining: " << MI);
602     LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
603 
604     // Prevent CSE-ing non-local convergent instructions.
605     // LLVM's current definition of `isConvergent` does not necessarily prove
606     // that non-local CSE is illegal. The following check extends the definition
607     // of `isConvergent` to assume a convergent instruction is dependent not
608     // only on additional conditions, but also on fewer conditions. LLVM does
609     // not have a MachineInstr attribute which expresses this extended
610     // definition, so it's necessary to use `isConvergent` to prevent illegally
611     // CSE-ing the subset of `isConvergent` instructions which do fall into this
612     // extended definition.
613     if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
614       LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
615                            "different BBs, avoid CSE!\n");
616       VNT.insert(&MI, CurrVN++);
617       Exps.push_back(&MI);
618       continue;
619     }
620 
621     // Check if it's profitable to perform this CSE.
622     bool DoCSE = true;
623     unsigned NumDefs = MI.getNumDefs();
624 
625     for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
626       MachineOperand &MO = MI.getOperand(i);
627       if (!MO.isReg() || !MO.isDef())
628         continue;
629       Register OldReg = MO.getReg();
630       Register NewReg = CSMI->getOperand(i).getReg();
631 
632       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
633       // we should make sure it is not dead at CSMI.
634       if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
635         ImplicitDefsToUpdate.push_back(i);
636 
637       // Keep track of implicit defs of CSMI and MI, to clear possibly
638       // made-redundant kill flags.
639       if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
640         ImplicitDefs.push_back(OldReg);
641 
642       if (OldReg == NewReg) {
643         --NumDefs;
644         continue;
645       }
646 
647       assert(OldReg.isVirtual() && NewReg.isVirtual() &&
648              "Do not CSE physical register defs!");
649 
650       if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
651         LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
652         DoCSE = false;
653         break;
654       }
655 
656       // Don't perform CSE if the result of the new instruction cannot exist
657       // within the constraints (register class, bank, or low-level type) of
658       // the old instruction.
659       if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
660         LLVM_DEBUG(
661             dbgs() << "*** Not the same register constraints, avoid CSE!\n");
662         DoCSE = false;
663         break;
664       }
665 
666       CSEPairs.push_back(std::make_pair(OldReg, NewReg));
667       --NumDefs;
668     }
669 
670     // Actually perform the elimination.
671     if (DoCSE) {
672       for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
673         unsigned OldReg = CSEPair.first;
674         unsigned NewReg = CSEPair.second;
675         // OldReg may have been unused but is used now, clear the Dead flag
676         MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
677         assert(Def != nullptr && "CSEd register has no unique definition?");
678         Def->clearRegisterDeads(NewReg);
679         // Replace with NewReg and clear kill flags which may be wrong now.
680         MRI->replaceRegWith(OldReg, NewReg);
681         MRI->clearKillFlags(NewReg);
682       }
683 
684       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
685       // we should make sure it is not dead at CSMI.
686       for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
687         CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
688       for (const auto &PhysDef : PhysDefs)
689         if (!MI.getOperand(PhysDef.first).isDead())
690           CSMI->getOperand(PhysDef.first).setIsDead(false);
691 
692       // Go through implicit defs of CSMI and MI, and clear the kill flags on
693       // their uses in all the instructions between CSMI and MI.
694       // We might have made some of the kill flags redundant, consider:
695       //   subs  ... implicit-def %nzcv    <- CSMI
696       //   csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
697       //   subs  ... implicit-def %nzcv    <- MI, to be eliminated
698       //   csinc ... implicit killed %nzcv
699       // Since we eliminated MI, and reused a register imp-def'd by CSMI
700       // (here %nzcv), that register, if it was killed before MI, should have
701       // that kill flag removed, because it's lifetime was extended.
702       if (CSMI->getParent() == MI.getParent()) {
703         for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
704           for (auto ImplicitDef : ImplicitDefs)
705             if (MachineOperand *MO = II->findRegisterUseOperand(
706                     ImplicitDef, /*isKill=*/true, TRI))
707               MO->setIsKill(false);
708       } else {
709         // If the instructions aren't in the same BB, bail out and clear the
710         // kill flag on all uses of the imp-def'd register.
711         for (auto ImplicitDef : ImplicitDefs)
712           MRI->clearKillFlags(ImplicitDef);
713       }
714 
715       if (CrossMBBPhysDef) {
716         // Add physical register defs now coming in from a predecessor to MBB
717         // livein list.
718         while (!PhysDefs.empty()) {
719           auto LiveIn = PhysDefs.pop_back_val();
720           if (!MBB->isLiveIn(LiveIn.second))
721             MBB->addLiveIn(LiveIn.second);
722         }
723         ++NumCrossBBCSEs;
724       }
725 
726       MI.eraseFromParent();
727       ++NumCSEs;
728       if (!PhysRefs.empty())
729         ++NumPhysCSEs;
730       if (Commuted)
731         ++NumCommutes;
732       Changed = true;
733     } else {
734       VNT.insert(&MI, CurrVN++);
735       Exps.push_back(&MI);
736     }
737     CSEPairs.clear();
738     ImplicitDefsToUpdate.clear();
739     ImplicitDefs.clear();
740   }
741 
742   return Changed;
743 }
744 
745 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
746 /// dominator tree node if its a leaf or all of its children are done. Walk
747 /// up the dominator tree to destroy ancestors which are now done.
748 void
749 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
750                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
751   if (OpenChildren[Node])
752     return;
753 
754   // Pop scope.
755   ExitScope(Node->getBlock());
756 
757   // Now traverse upwards to pop ancestors whose offsprings are all done.
758   while (MachineDomTreeNode *Parent = Node->getIDom()) {
759     unsigned Left = --OpenChildren[Parent];
760     if (Left != 0)
761       break;
762     ExitScope(Parent->getBlock());
763     Node = Parent;
764   }
765 }
766 
767 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
768   SmallVector<MachineDomTreeNode*, 32> Scopes;
769   SmallVector<MachineDomTreeNode*, 8> WorkList;
770   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
771 
772   CurrVN = 0;
773 
774   // Perform a DFS walk to determine the order of visit.
775   WorkList.push_back(Node);
776   do {
777     Node = WorkList.pop_back_val();
778     Scopes.push_back(Node);
779     OpenChildren[Node] = Node->getNumChildren();
780     append_range(WorkList, Node->children());
781   } while (!WorkList.empty());
782 
783   // Now perform CSE.
784   bool Changed = false;
785   for (MachineDomTreeNode *Node : Scopes) {
786     MachineBasicBlock *MBB = Node->getBlock();
787     EnterScope(MBB);
788     Changed |= ProcessBlockCSE(MBB);
789     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
790     ExitScopeIfDone(Node, OpenChildren);
791   }
792 
793   return Changed;
794 }
795 
796 // We use stronger checks for PRE candidate rather than for CSE ones to embrace
797 // checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
798 // to exclude instrs created by PRE that won't be CSEed later.
799 bool MachineCSE::isPRECandidate(MachineInstr *MI,
800                                 SmallSet<MCRegister, 8> &PhysRefs) {
801   if (!isCSECandidate(MI) ||
802       MI->isNotDuplicable() ||
803       MI->mayLoad() ||
804       TII->isAsCheapAsAMove(*MI) ||
805       MI->getNumDefs() != 1 ||
806       MI->getNumExplicitDefs() != 1)
807     return false;
808 
809   for (const MachineOperand &MO : MI->operands()) {
810     if (MO.isReg() && !MO.getReg().isVirtual()) {
811       if (MO.isDef())
812         return false;
813       else
814         PhysRefs.insert(MO.getReg());
815     }
816   }
817 
818   return true;
819 }
820 
821 bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
822                                  MachineBasicBlock *MBB) {
823   bool Changed = false;
824   for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
825     SmallSet<MCRegister, 8> PhysRefs;
826     if (!isPRECandidate(&MI, PhysRefs))
827       continue;
828 
829     if (!PREMap.count(&MI)) {
830       PREMap[&MI] = MBB;
831       continue;
832     }
833 
834     auto MBB1 = PREMap[&MI];
835     assert(
836         !DT->properlyDominates(MBB, MBB1) &&
837         "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
838     auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
839     if (!CMBB->isLegalToHoistInto())
840       continue;
841 
842     if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
843       continue;
844 
845     // Two instrs are partial redundant if their basic blocks are reachable
846     // from one to another but one doesn't dominate another.
847     if (CMBB != MBB1) {
848       auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
849       if (BB != nullptr && BB1 != nullptr &&
850           (isPotentiallyReachable(BB1, BB) ||
851            isPotentiallyReachable(BB, BB1))) {
852         // The following check extends the definition of `isConvergent` to
853         // assume a convergent instruction is dependent not only on additional
854         // conditions, but also on fewer conditions. LLVM does not have a
855         // MachineInstr attribute which expresses this extended definition, so
856         // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
857         // subset of `isConvergent` instructions which do fall into this
858         // extended definition.
859         if (MI.isConvergent() && CMBB != MBB)
860           continue;
861 
862         // If this instruction uses physical registers then we can only do PRE
863         // if it's using the value that is live at the place we're hoisting to.
864         bool NonLocal;
865         PhysDefVector PhysDefs;
866         if (!PhysRefs.empty() &&
867             !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
868                               PhysDefs, NonLocal))
869           continue;
870 
871         assert(MI.getOperand(0).isDef() &&
872                "First operand of instr with one explicit def must be this def");
873         Register VReg = MI.getOperand(0).getReg();
874         Register NewReg = MRI->cloneVirtualRegister(VReg);
875         if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
876           continue;
877         MachineInstr &NewMI =
878             TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
879 
880         // When hoisting, make sure we don't carry the debug location of
881         // the original instruction, as that's not correct and can cause
882         // unexpected jumps when debugging optimized code.
883         auto EmptyDL = DebugLoc();
884         NewMI.setDebugLoc(EmptyDL);
885 
886         NewMI.getOperand(0).setReg(NewReg);
887 
888         PREMap[&MI] = CMBB;
889         ++NumPREs;
890         Changed = true;
891       }
892     }
893   }
894   return Changed;
895 }
896 
897 // This simple PRE (partial redundancy elimination) pass doesn't actually
898 // eliminate partial redundancy but transforms it to full redundancy,
899 // anticipating that the next CSE step will eliminate this created redundancy.
900 // If CSE doesn't eliminate this, than created instruction will remain dead
901 // and eliminated later by Remove Dead Machine Instructions pass.
902 bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
903   SmallVector<MachineDomTreeNode *, 32> BBs;
904 
905   PREMap.clear();
906   bool Changed = false;
907   BBs.push_back(DT->getRootNode());
908   do {
909     auto Node = BBs.pop_back_val();
910     append_range(BBs, Node->children());
911 
912     MachineBasicBlock *MBB = Node->getBlock();
913     Changed |= ProcessBlockPRE(DT, MBB);
914 
915   } while (!BBs.empty());
916 
917   return Changed;
918 }
919 
920 bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
921                                          MachineBasicBlock *MBB,
922                                          MachineBasicBlock *MBB1) {
923   if (CandidateBB->getParent()->getFunction().hasMinSize())
924     return true;
925   assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
926   assert(DT->dominates(CandidateBB, MBB1) &&
927          "CandidateBB should dominate MBB1");
928   return MBFI->getBlockFreq(CandidateBB) <=
929          MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
930 }
931 
932 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
933   if (skipFunction(MF.getFunction()))
934     return false;
935 
936   TII = MF.getSubtarget().getInstrInfo();
937   TRI = MF.getSubtarget().getRegisterInfo();
938   MRI = &MF.getRegInfo();
939   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
940   DT = &getAnalysis<MachineDominatorTree>();
941   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
942   LookAheadLimit = TII->getMachineCSELookAheadLimit();
943   bool ChangedPRE, ChangedCSE;
944   ChangedPRE = PerformSimplePRE(DT);
945   ChangedCSE = PerformCSE(DT->getRootNode());
946   return ChangedPRE || ChangedCSE;
947 }
948