1 //===- MachineSink.cpp - Sinking for machine instructions -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass moves instructions into successor blocks when possible, so that
10 // they aren't executed on paths where their results aren't needed.
11 //
12 // This pass is not intended to be a replacement or a complete alternative
13 // for an LLVM-IR-level sinking pass. It is only designed to sink simple
14 // constructs that are not exposed before lowering and instruction selection.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/DepthFirstIterator.h"
20 #include "llvm/ADT/MapVector.h"
21 #include "llvm/ADT/PointerIntPair.h"
22 #include "llvm/ADT/PostOrderIterator.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/AliasAnalysis.h"
28 #include "llvm/Analysis/CFG.h"
29 #include "llvm/CodeGen/MachineBasicBlock.h"
30 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
31 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
32 #include "llvm/CodeGen/MachineCycleAnalysis.h"
33 #include "llvm/CodeGen/MachineDominators.h"
34 #include "llvm/CodeGen/MachineFunction.h"
35 #include "llvm/CodeGen/MachineFunctionPass.h"
36 #include "llvm/CodeGen/MachineInstr.h"
37 #include "llvm/CodeGen/MachineLoopInfo.h"
38 #include "llvm/CodeGen/MachineOperand.h"
39 #include "llvm/CodeGen/MachinePostDominators.h"
40 #include "llvm/CodeGen/MachineRegisterInfo.h"
41 #include "llvm/CodeGen/RegisterClassInfo.h"
42 #include "llvm/CodeGen/RegisterPressure.h"
43 #include "llvm/CodeGen/TargetInstrInfo.h"
44 #include "llvm/CodeGen/TargetRegisterInfo.h"
45 #include "llvm/CodeGen/TargetSubtargetInfo.h"
46 #include "llvm/IR/BasicBlock.h"
47 #include "llvm/IR/DebugInfoMetadata.h"
48 #include "llvm/IR/LLVMContext.h"
49 #include "llvm/InitializePasses.h"
50 #include "llvm/MC/MCRegisterInfo.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/BranchProbability.h"
53 #include "llvm/Support/CommandLine.h"
54 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include <algorithm>
57 #include <cassert>
58 #include <cstdint>
59 #include <map>
60 #include <utility>
61 #include <vector>
62 
63 using namespace llvm;
64 
65 #define DEBUG_TYPE "machine-sink"
66 
67 static cl::opt<bool>
68 SplitEdges("machine-sink-split",
69            cl::desc("Split critical edges during machine sinking"),
70            cl::init(true), cl::Hidden);
71 
72 static cl::opt<bool>
73 UseBlockFreqInfo("machine-sink-bfi",
74            cl::desc("Use block frequency info to find successors to sink"),
75            cl::init(true), cl::Hidden);
76 
77 static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
78     "machine-sink-split-probability-threshold",
79     cl::desc(
80         "Percentage threshold for splitting single-instruction critical edge. "
81         "If the branch threshold is higher than this threshold, we allow "
82         "speculative execution of up to 1 instruction to avoid branching to "
83         "splitted critical edge"),
84     cl::init(40), cl::Hidden);
85 
86 static cl::opt<unsigned> SinkLoadInstsPerBlockThreshold(
87     "machine-sink-load-instrs-threshold",
88     cl::desc("Do not try to find alias store for a load if there is a in-path "
89              "block whose instruction number is higher than this threshold."),
90     cl::init(2000), cl::Hidden);
91 
92 static cl::opt<unsigned> SinkLoadBlocksThreshold(
93     "machine-sink-load-blocks-threshold",
94     cl::desc("Do not try to find alias store for a load if the block number in "
95              "the straight line is higher than this threshold."),
96     cl::init(20), cl::Hidden);
97 
98 static cl::opt<bool>
99     SinkInstsIntoCycle("sink-insts-to-avoid-spills",
100                        cl::desc("Sink instructions into cycles to avoid "
101                                 "register spills"),
102                        cl::init(false), cl::Hidden);
103 
104 static cl::opt<unsigned> SinkIntoCycleLimit(
105     "machine-sink-cycle-limit",
106     cl::desc("The maximum number of instructions considered for cycle sinking."),
107     cl::init(50), cl::Hidden);
108 
109 STATISTIC(NumSunk,      "Number of machine instructions sunk");
110 STATISTIC(NumCycleSunk,  "Number of machine instructions sunk into a cycle");
111 STATISTIC(NumSplit,     "Number of critical edges split");
112 STATISTIC(NumCoalesces, "Number of copies coalesced");
113 STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
114 
115 namespace {
116 
117   class MachineSinking : public MachineFunctionPass {
118     const TargetInstrInfo *TII = nullptr;
119     const TargetRegisterInfo *TRI = nullptr;
120     MachineRegisterInfo *MRI = nullptr;      // Machine register information
121     MachineDominatorTree *DT = nullptr;      // Machine dominator tree
122     MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
123     MachineCycleInfo *CI = nullptr;
124     MachineBlockFrequencyInfo *MBFI = nullptr;
125     const MachineBranchProbabilityInfo *MBPI = nullptr;
126     AliasAnalysis *AA = nullptr;
127     RegisterClassInfo RegClassInfo;
128 
129     // Remember which edges have been considered for breaking.
130     SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
131     CEBCandidates;
132     // Remember which edges we are about to split.
133     // This is different from CEBCandidates since those edges
134     // will be split.
135     SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
136 
137     DenseSet<Register> RegsToClearKillFlags;
138 
139     using AllSuccsCache =
140         std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
141 
142     /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is
143     /// post-dominated by another DBG_VALUE of the same variable location.
144     /// This is necessary to detect sequences such as:
145     ///     %0 = someinst
146     ///     DBG_VALUE %0, !123, !DIExpression()
147     ///     %1 = anotherinst
148     ///     DBG_VALUE %1, !123, !DIExpression()
149     /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
150     /// would re-order assignments.
151     using SeenDbgUser = PointerIntPair<MachineInstr *, 1>;
152 
153     /// Record of DBG_VALUE uses of vregs in a block, so that we can identify
154     /// debug instructions to sink.
155     SmallDenseMap<unsigned, TinyPtrVector<SeenDbgUser>> SeenDbgUsers;
156 
157     /// Record of debug variables that have had their locations set in the
158     /// current block.
159     DenseSet<DebugVariable> SeenDbgVars;
160 
161     std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>, bool>
162         HasStoreCache;
163     std::map<std::pair<MachineBasicBlock *, MachineBasicBlock *>,
164              std::vector<MachineInstr *>>
165         StoreInstrCache;
166 
167     /// Cached BB's register pressure.
168     std::map<MachineBasicBlock *, std::vector<unsigned>> CachedRegisterPressure;
169 
170   public:
171     static char ID; // Pass identification
172 
173     MachineSinking() : MachineFunctionPass(ID) {
174       initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
175     }
176 
177     bool runOnMachineFunction(MachineFunction &MF) override;
178 
179     void getAnalysisUsage(AnalysisUsage &AU) const override {
180       MachineFunctionPass::getAnalysisUsage(AU);
181       AU.addRequired<AAResultsWrapperPass>();
182       AU.addRequired<MachineDominatorTree>();
183       AU.addRequired<MachinePostDominatorTree>();
184       AU.addRequired<MachineCycleInfoWrapperPass>();
185       AU.addRequired<MachineBranchProbabilityInfo>();
186       AU.addPreserved<MachineCycleInfoWrapperPass>();
187       AU.addPreserved<MachineLoopInfo>();
188       if (UseBlockFreqInfo)
189         AU.addRequired<MachineBlockFrequencyInfo>();
190     }
191 
192     void releaseMemory() override {
193       CEBCandidates.clear();
194     }
195 
196   private:
197     bool ProcessBlock(MachineBasicBlock &MBB);
198     void ProcessDbgInst(MachineInstr &MI);
199     bool isWorthBreakingCriticalEdge(MachineInstr &MI,
200                                      MachineBasicBlock *From,
201                                      MachineBasicBlock *To);
202 
203     bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
204                          MachineInstr &MI);
205 
206     /// Postpone the splitting of the given critical
207     /// edge (\p From, \p To).
208     ///
209     /// We do not split the edges on the fly. Indeed, this invalidates
210     /// the dominance information and thus triggers a lot of updates
211     /// of that information underneath.
212     /// Instead, we postpone all the splits after each iteration of
213     /// the main loop. That way, the information is at least valid
214     /// for the lifetime of an iteration.
215     ///
216     /// \return True if the edge is marked as toSplit, false otherwise.
217     /// False can be returned if, for instance, this is not profitable.
218     bool PostponeSplitCriticalEdge(MachineInstr &MI,
219                                    MachineBasicBlock *From,
220                                    MachineBasicBlock *To,
221                                    bool BreakPHIEdge);
222     bool SinkInstruction(MachineInstr &MI, bool &SawStore,
223                          AllSuccsCache &AllSuccessors);
224 
225     /// If we sink a COPY inst, some debug users of it's destination may no
226     /// longer be dominated by the COPY, and will eventually be dropped.
227     /// This is easily rectified by forwarding the non-dominated debug uses
228     /// to the copy source.
229     void SalvageUnsunkDebugUsersOfCopy(MachineInstr &,
230                                        MachineBasicBlock *TargetBlock);
231     bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB,
232                                  MachineBasicBlock *DefMBB, bool &BreakPHIEdge,
233                                  bool &LocalUse) const;
234     MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
235                bool &BreakPHIEdge, AllSuccsCache &AllSuccessors);
236 
237     void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB,
238                                  SmallVectorImpl<MachineInstr *> &Candidates);
239     bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I);
240 
241     bool isProfitableToSinkTo(Register Reg, MachineInstr &MI,
242                               MachineBasicBlock *MBB,
243                               MachineBasicBlock *SuccToSinkTo,
244                               AllSuccsCache &AllSuccessors);
245 
246     bool PerformTrivialForwardCoalescing(MachineInstr &MI,
247                                          MachineBasicBlock *MBB);
248 
249     SmallVector<MachineBasicBlock *, 4> &
250     GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
251                            AllSuccsCache &AllSuccessors) const;
252 
253     std::vector<unsigned> &getBBRegisterPressure(MachineBasicBlock &MBB);
254   };
255 
256 } // end anonymous namespace
257 
258 char MachineSinking::ID = 0;
259 
260 char &llvm::MachineSinkingID = MachineSinking::ID;
261 
262 INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
263                       "Machine code sinking", false, false)
264 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
265 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
266 INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
267 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
268 INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
269                     "Machine code sinking", false, false)
270 
271 /// Return true if a target defined block prologue instruction interferes
272 /// with a sink candidate.
273 static bool blockPrologueInterferes(const MachineBasicBlock *BB,
274                                     MachineBasicBlock::const_iterator End,
275                                     const MachineInstr &MI,
276                                     const TargetRegisterInfo *TRI,
277                                     const TargetInstrInfo *TII,
278                                     const MachineRegisterInfo *MRI) {
279   for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
280        ++PI) {
281     // Only check target defined prologue instructions
282     if (!TII->isBasicBlockPrologue(*PI))
283       continue;
284     for (auto &MO : MI.operands()) {
285       if (!MO.isReg())
286         continue;
287       Register Reg = MO.getReg();
288       if (!Reg)
289         continue;
290       if (MO.isUse()) {
291         if (Reg.isPhysical() && MRI && MRI->isConstantPhysReg(Reg))
292           continue;
293         if (PI->modifiesRegister(Reg, TRI))
294           return true;
295       } else {
296         if (PI->readsRegister(Reg, TRI))
297           return true;
298         // Check for interference with non-dead defs
299         auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
300         if (DefOp && !DefOp->isDead())
301           return true;
302       }
303     }
304   }
305 
306   return false;
307 }
308 
309 bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
310                                                      MachineBasicBlock *MBB) {
311   if (!MI.isCopy())
312     return false;
313 
314   Register SrcReg = MI.getOperand(1).getReg();
315   Register DstReg = MI.getOperand(0).getReg();
316   if (!SrcReg.isVirtual() || !DstReg.isVirtual() ||
317       !MRI->hasOneNonDBGUse(SrcReg))
318     return false;
319 
320   const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
321   const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
322   if (SRC != DRC)
323     return false;
324 
325   MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
326   if (DefMI->isCopyLike())
327     return false;
328   LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
329   LLVM_DEBUG(dbgs() << "*** to: " << MI);
330   MRI->replaceRegWith(DstReg, SrcReg);
331   MI.eraseFromParent();
332 
333   // Conservatively, clear any kill flags, since it's possible that they are no
334   // longer correct.
335   MRI->clearKillFlags(SrcReg);
336 
337   ++NumCoalesces;
338   return true;
339 }
340 
341 /// AllUsesDominatedByBlock - Return true if all uses of the specified register
342 /// occur in blocks dominated by the specified block. If any use is in the
343 /// definition block, then return false since it is never legal to move def
344 /// after uses.
345 bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
346                                              MachineBasicBlock *MBB,
347                                              MachineBasicBlock *DefMBB,
348                                              bool &BreakPHIEdge,
349                                              bool &LocalUse) const {
350   assert(Reg.isVirtual() && "Only makes sense for vregs");
351 
352   // Ignore debug uses because debug info doesn't affect the code.
353   if (MRI->use_nodbg_empty(Reg))
354     return true;
355 
356   // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
357   // into and they are all PHI nodes. In this case, machine-sink must break
358   // the critical edge first. e.g.
359   //
360   // %bb.1:
361   //   Predecessors according to CFG: %bb.0
362   //     ...
363   //     %def = DEC64_32r %x, implicit-def dead %eflags
364   //     ...
365   //     JE_4 <%bb.37>, implicit %eflags
366   //   Successors according to CFG: %bb.37 %bb.2
367   //
368   // %bb.2:
369   //     %p = PHI %y, %bb.0, %def, %bb.1
370   if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
371         MachineInstr *UseInst = MO.getParent();
372         unsigned OpNo = MO.getOperandNo();
373         MachineBasicBlock *UseBlock = UseInst->getParent();
374         return UseBlock == MBB && UseInst->isPHI() &&
375                UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
376       })) {
377     BreakPHIEdge = true;
378     return true;
379   }
380 
381   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
382     // Determine the block of the use.
383     MachineInstr *UseInst = MO.getParent();
384     unsigned OpNo = &MO - &UseInst->getOperand(0);
385     MachineBasicBlock *UseBlock = UseInst->getParent();
386     if (UseInst->isPHI()) {
387       // PHI nodes use the operand in the predecessor block, not the block with
388       // the PHI.
389       UseBlock = UseInst->getOperand(OpNo+1).getMBB();
390     } else if (UseBlock == DefMBB) {
391       LocalUse = true;
392       return false;
393     }
394 
395     // Check that it dominates.
396     if (!DT->dominates(MBB, UseBlock))
397       return false;
398   }
399 
400   return true;
401 }
402 
403 /// Return true if this machine instruction loads from global offset table or
404 /// constant pool.
405 static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
406   assert(MI.mayLoad() && "Expected MI that loads!");
407 
408   // If we lost memory operands, conservatively assume that the instruction
409   // reads from everything..
410   if (MI.memoperands_empty())
411     return true;
412 
413   for (MachineMemOperand *MemOp : MI.memoperands())
414     if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
415       if (PSV->isGOT() || PSV->isConstantPool())
416         return true;
417 
418   return false;
419 }
420 
421 void MachineSinking::FindCycleSinkCandidates(
422     MachineCycle *Cycle, MachineBasicBlock *BB,
423     SmallVectorImpl<MachineInstr *> &Candidates) {
424   for (auto &MI : *BB) {
425     LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI);
426     if (!TII->shouldSink(MI)) {
427       LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this "
428                            "target\n");
429       continue;
430     }
431     if (!isCycleInvariant(Cycle, MI)) {
432       LLVM_DEBUG(dbgs() << "CycleSink: Instruction is not cycle invariant\n");
433       continue;
434     }
435     bool DontMoveAcrossStore = true;
436     if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) {
437       LLVM_DEBUG(dbgs() << "CycleSink: Instruction not safe to move.\n");
438       continue;
439     }
440     if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) {
441       LLVM_DEBUG(dbgs() << "CycleSink: Dont sink GOT or constant pool loads\n");
442       continue;
443     }
444     if (MI.isConvergent())
445       continue;
446 
447     const MachineOperand &MO = MI.getOperand(0);
448     if (!MO.isReg() || !MO.getReg() || !MO.isDef())
449       continue;
450     if (!MRI->hasOneDef(MO.getReg()))
451       continue;
452 
453     LLVM_DEBUG(dbgs() << "CycleSink: Instruction added as candidate.\n");
454     Candidates.push_back(&MI);
455   }
456 }
457 
458 bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
459   if (skipFunction(MF.getFunction()))
460     return false;
461 
462   LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n");
463 
464   TII = MF.getSubtarget().getInstrInfo();
465   TRI = MF.getSubtarget().getRegisterInfo();
466   MRI = &MF.getRegInfo();
467   DT = &getAnalysis<MachineDominatorTree>();
468   PDT = &getAnalysis<MachinePostDominatorTree>();
469   CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
470   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
471   MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
472   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
473   RegClassInfo.runOnMachineFunction(MF);
474 
475   bool EverMadeChange = false;
476 
477   while (true) {
478     bool MadeChange = false;
479 
480     // Process all basic blocks.
481     CEBCandidates.clear();
482     ToSplit.clear();
483     for (auto &MBB: MF)
484       MadeChange |= ProcessBlock(MBB);
485 
486     // If we have anything we marked as toSplit, split it now.
487     for (const auto &Pair : ToSplit) {
488       auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this);
489       if (NewSucc != nullptr) {
490         LLVM_DEBUG(dbgs() << " *** Splitting critical edge: "
491                           << printMBBReference(*Pair.first) << " -- "
492                           << printMBBReference(*NewSucc) << " -- "
493                           << printMBBReference(*Pair.second) << '\n');
494         if (MBFI)
495           MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI);
496 
497         MadeChange = true;
498         ++NumSplit;
499       } else
500         LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n");
501     }
502     // If this iteration over the code changed anything, keep iterating.
503     if (!MadeChange) break;
504     EverMadeChange = true;
505   }
506 
507   if (SinkInstsIntoCycle) {
508     SmallVector<MachineCycle *, 8> Cycles(CI->toplevel_begin(),
509                                           CI->toplevel_end());
510     for (auto *Cycle : Cycles) {
511       MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
512       if (!Preheader) {
513         LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n");
514         continue;
515       }
516       SmallVector<MachineInstr *, 8> Candidates;
517       FindCycleSinkCandidates(Cycle, Preheader, Candidates);
518 
519       // Walk the candidates in reverse order so that we start with the use
520       // of a def-use chain, if there is any.
521       // TODO: Sort the candidates using a cost-model.
522       unsigned i = 0;
523       for (MachineInstr *I : llvm::reverse(Candidates)) {
524         if (i++ == SinkIntoCycleLimit) {
525           LLVM_DEBUG(dbgs() << "CycleSink:   Limit reached of instructions to "
526                                "be analysed.");
527           break;
528         }
529 
530         if (!SinkIntoCycle(Cycle, *I))
531           break;
532         EverMadeChange = true;
533         ++NumCycleSunk;
534       }
535     }
536   }
537 
538   HasStoreCache.clear();
539   StoreInstrCache.clear();
540 
541   // Now clear any kill flags for recorded registers.
542   for (auto I : RegsToClearKillFlags)
543     MRI->clearKillFlags(I);
544   RegsToClearKillFlags.clear();
545 
546   return EverMadeChange;
547 }
548 
549 bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
550   // Can't sink anything out of a block that has less than two successors.
551   if (MBB.succ_size() <= 1 || MBB.empty()) return false;
552 
553   // Don't bother sinking code out of unreachable blocks. In addition to being
554   // unprofitable, it can also lead to infinite looping, because in an
555   // unreachable cycle there may be nowhere to stop.
556   if (!DT->isReachableFromEntry(&MBB)) return false;
557 
558   bool MadeChange = false;
559 
560   // Cache all successors, sorted by frequency info and cycle depth.
561   AllSuccsCache AllSuccessors;
562 
563   // Walk the basic block bottom-up.  Remember if we saw a store.
564   MachineBasicBlock::iterator I = MBB.end();
565   --I;
566   bool ProcessedBegin, SawStore = false;
567   do {
568     MachineInstr &MI = *I;  // The instruction to sink.
569 
570     // Predecrement I (if it's not begin) so that it isn't invalidated by
571     // sinking.
572     ProcessedBegin = I == MBB.begin();
573     if (!ProcessedBegin)
574       --I;
575 
576     if (MI.isDebugOrPseudoInstr()) {
577       if (MI.isDebugValue())
578         ProcessDbgInst(MI);
579       continue;
580     }
581 
582     bool Joined = PerformTrivialForwardCoalescing(MI, &MBB);
583     if (Joined) {
584       MadeChange = true;
585       continue;
586     }
587 
588     if (SinkInstruction(MI, SawStore, AllSuccessors)) {
589       ++NumSunk;
590       MadeChange = true;
591     }
592 
593     // If we just processed the first instruction in the block, we're done.
594   } while (!ProcessedBegin);
595 
596   SeenDbgUsers.clear();
597   SeenDbgVars.clear();
598   // recalculate the bb register pressure after sinking one BB.
599   CachedRegisterPressure.clear();
600 
601   return MadeChange;
602 }
603 
604 void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
605   // When we see DBG_VALUEs for registers, record any vreg it reads, so that
606   // we know what to sink if the vreg def sinks.
607   assert(MI.isDebugValue() && "Expected DBG_VALUE for processing");
608 
609   DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
610                     MI.getDebugLoc()->getInlinedAt());
611   bool SeenBefore = SeenDbgVars.contains(Var);
612 
613   for (MachineOperand &MO : MI.debug_operands()) {
614     if (MO.isReg() && MO.getReg().isVirtual())
615       SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore));
616   }
617 
618   // Record the variable for any DBG_VALUE, to avoid re-ordering any of them.
619   SeenDbgVars.insert(Var);
620 }
621 
622 bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
623                                                  MachineBasicBlock *From,
624                                                  MachineBasicBlock *To) {
625   // FIXME: Need much better heuristics.
626 
627   // If the pass has already considered breaking this edge (during this pass
628   // through the function), then let's go ahead and break it. This means
629   // sinking multiple "cheap" instructions into the same block.
630   if (!CEBCandidates.insert(std::make_pair(From, To)).second)
631     return true;
632 
633   if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
634     return true;
635 
636   if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
637       BranchProbability(SplitEdgeProbabilityThreshold, 100))
638     return true;
639 
640   // MI is cheap, we probably don't want to break the critical edge for it.
641   // However, if this would allow some definitions of its source operands
642   // to be sunk then it's probably worth it.
643   for (const MachineOperand &MO : MI.all_uses()) {
644     Register Reg = MO.getReg();
645     if (Reg == 0)
646       continue;
647 
648     // We don't move live definitions of physical registers,
649     // so sinking their uses won't enable any opportunities.
650     if (Reg.isPhysical())
651       continue;
652 
653     // If this instruction is the only user of a virtual register,
654     // check if breaking the edge will enable sinking
655     // both this instruction and the defining instruction.
656     if (MRI->hasOneNonDBGUse(Reg)) {
657       // If the definition resides in same MBB,
658       // claim it's likely we can sink these together.
659       // If definition resides elsewhere, we aren't
660       // blocking it from being sunk so don't break the edge.
661       MachineInstr *DefMI = MRI->getVRegDef(Reg);
662       if (DefMI->getParent() == MI.getParent())
663         return true;
664     }
665   }
666 
667   return false;
668 }
669 
670 bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
671                                                MachineBasicBlock *FromBB,
672                                                MachineBasicBlock *ToBB,
673                                                bool BreakPHIEdge) {
674   if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
675     return false;
676 
677   // Avoid breaking back edge. From == To means backedge for single BB cycle.
678   if (!SplitEdges || FromBB == ToBB)
679     return false;
680 
681   MachineCycle *FromCycle = CI->getCycle(FromBB);
682   MachineCycle *ToCycle = CI->getCycle(ToBB);
683 
684   // Check for backedges of more "complex" cycles.
685   if (FromCycle == ToCycle && FromCycle &&
686       (!FromCycle->isReducible() || FromCycle->getHeader() == ToBB))
687     return false;
688 
689   // It's not always legal to break critical edges and sink the computation
690   // to the edge.
691   //
692   // %bb.1:
693   // v1024
694   // Beq %bb.3
695   // <fallthrough>
696   // %bb.2:
697   // ... no uses of v1024
698   // <fallthrough>
699   // %bb.3:
700   // ...
701   //       = v1024
702   //
703   // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted:
704   //
705   // %bb.1:
706   // ...
707   // Bne %bb.2
708   // %bb.4:
709   // v1024 =
710   // B %bb.3
711   // %bb.2:
712   // ... no uses of v1024
713   // <fallthrough>
714   // %bb.3:
715   // ...
716   //       = v1024
717   //
718   // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3
719   // flow. We need to ensure the new basic block where the computation is
720   // sunk to dominates all the uses.
721   // It's only legal to break critical edge and sink the computation to the
722   // new block if all the predecessors of "To", except for "From", are
723   // not dominated by "From". Given SSA property, this means these
724   // predecessors are dominated by "To".
725   //
726   // There is no need to do this check if all the uses are PHI nodes. PHI
727   // sources are only defined on the specific predecessor edges.
728   if (!BreakPHIEdge) {
729     for (MachineBasicBlock *Pred : ToBB->predecessors())
730       if (Pred != FromBB && !DT->dominates(ToBB, Pred))
731         return false;
732   }
733 
734   ToSplit.insert(std::make_pair(FromBB, ToBB));
735 
736   return true;
737 }
738 
739 std::vector<unsigned> &
740 MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) {
741   // Currently to save compiling time, MBB's register pressure will not change
742   // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's
743   // register pressure is changed after sinking any instructions into it.
744   // FIXME: need a accurate and cheap register pressure estiminate model here.
745   auto RP = CachedRegisterPressure.find(&MBB);
746   if (RP != CachedRegisterPressure.end())
747     return RP->second;
748 
749   RegionPressure Pressure;
750   RegPressureTracker RPTracker(Pressure);
751 
752   // Initialize the register pressure tracker.
753   RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(),
754                  /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);
755 
756   for (MachineBasicBlock::iterator MII = MBB.instr_end(),
757                                    MIE = MBB.instr_begin();
758        MII != MIE; --MII) {
759     MachineInstr &MI = *std::prev(MII);
760     if (MI.isDebugInstr() || MI.isPseudoProbe())
761       continue;
762     RegisterOperands RegOpers;
763     RegOpers.collect(MI, *TRI, *MRI, false, false);
764     RPTracker.recedeSkipDebugValues();
765     assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");
766     RPTracker.recede(RegOpers);
767   }
768 
769   RPTracker.closeRegion();
770   auto It = CachedRegisterPressure.insert(
771       std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure));
772   return It.first->second;
773 }
774 
775 /// isProfitableToSinkTo - Return true if it is profitable to sink MI.
776 bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
777                                           MachineBasicBlock *MBB,
778                                           MachineBasicBlock *SuccToSinkTo,
779                                           AllSuccsCache &AllSuccessors) {
780   assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
781 
782   if (MBB == SuccToSinkTo)
783     return false;
784 
785   // It is profitable if SuccToSinkTo does not post dominate current block.
786   if (!PDT->dominates(SuccToSinkTo, MBB))
787     return true;
788 
789   // It is profitable to sink an instruction from a deeper cycle to a shallower
790   // cycle, even if the latter post-dominates the former (PR21115).
791   if (CI->getCycleDepth(MBB) > CI->getCycleDepth(SuccToSinkTo))
792     return true;
793 
794   // Check if only use in post dominated block is PHI instruction.
795   bool NonPHIUse = false;
796   for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) {
797     MachineBasicBlock *UseBlock = UseInst.getParent();
798     if (UseBlock == SuccToSinkTo && !UseInst.isPHI())
799       NonPHIUse = true;
800   }
801   if (!NonPHIUse)
802     return true;
803 
804   // If SuccToSinkTo post dominates then also it may be profitable if MI
805   // can further profitably sinked into another block in next round.
806   bool BreakPHIEdge = false;
807   // FIXME - If finding successor is compile time expensive then cache results.
808   if (MachineBasicBlock *MBB2 =
809           FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors))
810     return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors);
811 
812   MachineCycle *MCycle = CI->getCycle(MBB);
813 
814   // If the instruction is not inside a cycle, it is not profitable to sink MI to
815   // a post dominate block SuccToSinkTo.
816   if (!MCycle)
817     return false;
818 
819   auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) {
820     unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
821     const int *PS = TRI->getRegClassPressureSets(RC);
822     // Get register pressure for block SuccToSinkTo.
823     std::vector<unsigned> BBRegisterPressure =
824         getBBRegisterPressure(*SuccToSinkTo);
825     for (; *PS != -1; PS++)
826       // check if any register pressure set exceeds limit in block SuccToSinkTo
827       // after sinking.
828       if (Weight + BBRegisterPressure[*PS] >=
829           TRI->getRegPressureSetLimit(*MBB->getParent(), *PS))
830         return true;
831     return false;
832   };
833 
834   // If this instruction is inside a Cycle and sinking this instruction can make
835   // more registers live range shorten, it is still prifitable.
836   for (const MachineOperand &MO : MI.operands()) {
837     // Ignore non-register operands.
838     if (!MO.isReg())
839       continue;
840     Register Reg = MO.getReg();
841     if (Reg == 0)
842       continue;
843 
844     if (Reg.isPhysical()) {
845       // Don't handle non-constant and non-ignorable physical register uses.
846       if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
847         return false;
848       continue;
849     }
850 
851     // Users for the defs are all dominated by SuccToSinkTo.
852     if (MO.isDef()) {
853       // This def register's live range is shortened after sinking.
854       bool LocalUse = false;
855       if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge,
856                                    LocalUse))
857         return false;
858     } else {
859       MachineInstr *DefMI = MRI->getVRegDef(Reg);
860       if (!DefMI)
861         continue;
862       MachineCycle *Cycle = CI->getCycle(DefMI->getParent());
863       // DefMI is defined outside of cycle. There should be no live range
864       // impact for this operand. Defination outside of cycle means:
865       // 1: defination is outside of cycle.
866       // 2: defination is in this cycle, but it is a PHI in the cycle header.
867       if (Cycle != MCycle || (DefMI->isPHI() && Cycle && Cycle->isReducible() &&
868                               Cycle->getHeader() == DefMI->getParent()))
869         continue;
870       // The DefMI is defined inside the cycle.
871       // If sinking this operand makes some register pressure set exceed limit,
872       // it is not profitable.
873       if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) {
874         LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable.");
875         return false;
876       }
877     }
878   }
879 
880   // If MI is in cycle and all its operands are alive across the whole cycle or
881   // if no operand sinking make register pressure set exceed limit, it is
882   // profitable to sink MI.
883   return true;
884 }
885 
886 /// Get the sorted sequence of successors for this MachineBasicBlock, possibly
887 /// computing it if it was not already cached.
888 SmallVector<MachineBasicBlock *, 4> &
889 MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
890                                        AllSuccsCache &AllSuccessors) const {
891   // Do we have the sorted successors in cache ?
892   auto Succs = AllSuccessors.find(MBB);
893   if (Succs != AllSuccessors.end())
894     return Succs->second;
895 
896   SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->successors());
897 
898   // Handle cases where sinking can happen but where the sink point isn't a
899   // successor. For example:
900   //
901   //   x = computation
902   //   if () {} else {}
903   //   use x
904   //
905   for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) {
906     // DomTree children of MBB that have MBB as immediate dominator are added.
907     if (DTChild->getIDom()->getBlock() == MI.getParent() &&
908         // Skip MBBs already added to the AllSuccs vector above.
909         !MBB->isSuccessor(DTChild->getBlock()))
910       AllSuccs.push_back(DTChild->getBlock());
911   }
912 
913   // Sort Successors according to their cycle depth or block frequency info.
914   llvm::stable_sort(
915       AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
916         uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
917         uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
918         bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
919         return HasBlockFreq ? LHSFreq < RHSFreq
920                             : CI->getCycleDepth(L) < CI->getCycleDepth(R);
921       });
922 
923   auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs));
924 
925   return it.first->second;
926 }
927 
928 /// FindSuccToSinkTo - Find a successor to sink this instruction to.
929 MachineBasicBlock *
930 MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
931                                  bool &BreakPHIEdge,
932                                  AllSuccsCache &AllSuccessors) {
933   assert (MBB && "Invalid MachineBasicBlock!");
934 
935   // loop over all the operands of the specified instruction.  If there is
936   // anything we can't handle, bail out.
937 
938   // SuccToSinkTo - This is the successor to sink this instruction to, once we
939   // decide.
940   MachineBasicBlock *SuccToSinkTo = nullptr;
941   for (const MachineOperand &MO : MI.operands()) {
942     if (!MO.isReg()) continue;  // Ignore non-register operands.
943 
944     Register Reg = MO.getReg();
945     if (Reg == 0) continue;
946 
947     if (Reg.isPhysical()) {
948       if (MO.isUse()) {
949         // If the physreg has no defs anywhere, it's just an ambient register
950         // and we can freely move its uses. Alternatively, if it's allocatable,
951         // it could get allocated to something with a def during allocation.
952         if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
953           return nullptr;
954       } else if (!MO.isDead()) {
955         // A def that isn't dead. We can't move it.
956         return nullptr;
957       }
958     } else {
959       // Virtual register uses are always safe to sink.
960       if (MO.isUse()) continue;
961 
962       // If it's not safe to move defs of the register class, then abort.
963       if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
964         return nullptr;
965 
966       // Virtual register defs can only be sunk if all their uses are in blocks
967       // dominated by one of the successors.
968       if (SuccToSinkTo) {
969         // If a previous operand picked a block to sink to, then this operand
970         // must be sinkable to the same block.
971         bool LocalUse = false;
972         if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
973                                      BreakPHIEdge, LocalUse))
974           return nullptr;
975 
976         continue;
977       }
978 
979       // Otherwise, we should look at all the successors and decide which one
980       // we should sink to. If we have reliable block frequency information
981       // (frequency != 0) available, give successors with smaller frequencies
982       // higher priority, otherwise prioritize smaller cycle depths.
983       for (MachineBasicBlock *SuccBlock :
984            GetAllSortedSuccessors(MI, MBB, AllSuccessors)) {
985         bool LocalUse = false;
986         if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
987                                     BreakPHIEdge, LocalUse)) {
988           SuccToSinkTo = SuccBlock;
989           break;
990         }
991         if (LocalUse)
992           // Def is used locally, it's never safe to move this def.
993           return nullptr;
994       }
995 
996       // If we couldn't find a block to sink to, ignore this instruction.
997       if (!SuccToSinkTo)
998         return nullptr;
999       if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors))
1000         return nullptr;
1001     }
1002   }
1003 
1004   // It is not possible to sink an instruction into its own block.  This can
1005   // happen with cycles.
1006   if (MBB == SuccToSinkTo)
1007     return nullptr;
1008 
1009   if (!SuccToSinkTo)
1010     return nullptr;
1011 
1012   // It's not safe to sink instructions to EH landing pad. Control flow into
1013   // landing pad is implicitly defined.
1014   if (SuccToSinkTo->isEHPad())
1015     return nullptr;
1016 
1017   // It ought to be okay to sink instructions into an INLINEASM_BR target, but
1018   // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
1019   // the source block (which this code does not yet do). So for now, forbid
1020   // doing so.
1021   if (SuccToSinkTo->isInlineAsmBrIndirectTarget())
1022     return nullptr;
1023 
1024   MachineBasicBlock::const_iterator InsertPos =
1025       SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
1026   if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI))
1027     return nullptr;
1028 
1029   return SuccToSinkTo;
1030 }
1031 
1032 /// Return true if MI is likely to be usable as a memory operation by the
1033 /// implicit null check optimization.
1034 ///
1035 /// This is a "best effort" heuristic, and should not be relied upon for
1036 /// correctness.  This returning true does not guarantee that the implicit null
1037 /// check optimization is legal over MI, and this returning false does not
1038 /// guarantee MI cannot possibly be used to do a null check.
1039 static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
1040                                              const TargetInstrInfo *TII,
1041                                              const TargetRegisterInfo *TRI) {
1042   using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate;
1043 
1044   auto *MBB = MI.getParent();
1045   if (MBB->pred_size() != 1)
1046     return false;
1047 
1048   auto *PredMBB = *MBB->pred_begin();
1049   auto *PredBB = PredMBB->getBasicBlock();
1050 
1051   // Frontends that don't use implicit null checks have no reason to emit
1052   // branches with make.implicit metadata, and this function should always
1053   // return false for them.
1054   if (!PredBB ||
1055       !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
1056     return false;
1057 
1058   const MachineOperand *BaseOp;
1059   int64_t Offset;
1060   bool OffsetIsScalable;
1061   if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
1062     return false;
1063 
1064   if (!BaseOp->isReg())
1065     return false;
1066 
1067   if (!(MI.mayLoad() && !MI.isPredicable()))
1068     return false;
1069 
1070   MachineBranchPredicate MBP;
1071   if (TII->analyzeBranchPredicate(*PredMBB, MBP, false))
1072     return false;
1073 
1074   return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 &&
1075          (MBP.Predicate == MachineBranchPredicate::PRED_NE ||
1076           MBP.Predicate == MachineBranchPredicate::PRED_EQ) &&
1077          MBP.LHS.getReg() == BaseOp->getReg();
1078 }
1079 
1080 /// If the sunk instruction is a copy, try to forward the copy instead of
1081 /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if
1082 /// there's any subregister weirdness involved. Returns true if copy
1083 /// propagation occurred.
1084 static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI,
1085                                  Register Reg) {
1086   const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo();
1087   const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo();
1088 
1089   // Copy DBG_VALUE operand and set the original to undef. We then check to
1090   // see whether this is something that can be copy-forwarded. If it isn't,
1091   // continue around the loop.
1092 
1093   const MachineOperand *SrcMO = nullptr, *DstMO = nullptr;
1094   auto CopyOperands = TII.isCopyInstr(SinkInst);
1095   if (!CopyOperands)
1096     return false;
1097   SrcMO = CopyOperands->Source;
1098   DstMO = CopyOperands->Destination;
1099 
1100   // Check validity of forwarding this copy.
1101   bool PostRA = MRI.getNumVirtRegs() == 0;
1102 
1103   // Trying to forward between physical and virtual registers is too hard.
1104   if (Reg.isVirtual() != SrcMO->getReg().isVirtual())
1105     return false;
1106 
1107   // Only try virtual register copy-forwarding before regalloc, and physical
1108   // register copy-forwarding after regalloc.
1109   bool arePhysRegs = !Reg.isVirtual();
1110   if (arePhysRegs != PostRA)
1111     return false;
1112 
1113   // Pre-regalloc, only forward if all subregisters agree (or there are no
1114   // subregs at all). More analysis might recover some forwardable copies.
1115   if (!PostRA)
1116     for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg))
1117       if (DbgMO.getSubReg() != SrcMO->getSubReg() ||
1118           DbgMO.getSubReg() != DstMO->getSubReg())
1119         return false;
1120 
1121   // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register
1122   // of this copy. Only forward the copy if the DBG_VALUE operand exactly
1123   // matches the copy destination.
1124   if (PostRA && Reg != DstMO->getReg())
1125     return false;
1126 
1127   for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) {
1128     DbgMO.setReg(SrcMO->getReg());
1129     DbgMO.setSubReg(SrcMO->getSubReg());
1130   }
1131   return true;
1132 }
1133 
1134 using MIRegs = std::pair<MachineInstr *, SmallVector<unsigned, 2>>;
1135 /// Sink an instruction and its associated debug instructions.
1136 static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo,
1137                         MachineBasicBlock::iterator InsertPos,
1138                         ArrayRef<MIRegs> DbgValuesToSink) {
1139   // If we cannot find a location to use (merge with), then we erase the debug
1140   // location to prevent debug-info driven tools from potentially reporting
1141   // wrong location information.
1142   if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end())
1143     MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(),
1144                                                  InsertPos->getDebugLoc()));
1145   else
1146     MI.setDebugLoc(DebugLoc());
1147 
1148   // Move the instruction.
1149   MachineBasicBlock *ParentBlock = MI.getParent();
1150   SuccToSinkTo.splice(InsertPos, ParentBlock, MI,
1151                       ++MachineBasicBlock::iterator(MI));
1152 
1153   // Sink a copy of debug users to the insert position. Mark the original
1154   // DBG_VALUE location as 'undef', indicating that any earlier variable
1155   // location should be terminated as we've optimised away the value at this
1156   // point.
1157   for (const auto &DbgValueToSink : DbgValuesToSink) {
1158     MachineInstr *DbgMI = DbgValueToSink.first;
1159     MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI);
1160     SuccToSinkTo.insert(InsertPos, NewDbgMI);
1161 
1162     bool PropagatedAllSunkOps = true;
1163     for (unsigned Reg : DbgValueToSink.second) {
1164       if (DbgMI->hasDebugOperandForReg(Reg)) {
1165         if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) {
1166           PropagatedAllSunkOps = false;
1167           break;
1168         }
1169       }
1170     }
1171     if (!PropagatedAllSunkOps)
1172       DbgMI->setDebugValueUndef();
1173   }
1174 }
1175 
1176 /// hasStoreBetween - check if there is store betweeen straight line blocks From
1177 /// and To.
1178 bool MachineSinking::hasStoreBetween(MachineBasicBlock *From,
1179                                      MachineBasicBlock *To, MachineInstr &MI) {
1180   // Make sure From and To are in straight line which means From dominates To
1181   // and To post dominates From.
1182   if (!DT->dominates(From, To) || !PDT->dominates(To, From))
1183     return true;
1184 
1185   auto BlockPair = std::make_pair(From, To);
1186 
1187   // Does these two blocks pair be queried before and have a definite cached
1188   // result?
1189   if (HasStoreCache.find(BlockPair) != HasStoreCache.end())
1190     return HasStoreCache[BlockPair];
1191 
1192   if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end())
1193     return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) {
1194       return I->mayAlias(AA, MI, false);
1195     });
1196 
1197   bool SawStore = false;
1198   bool HasAliasedStore = false;
1199   DenseSet<MachineBasicBlock *> HandledBlocks;
1200   DenseSet<MachineBasicBlock *> HandledDomBlocks;
1201   // Go through all reachable blocks from From.
1202   for (MachineBasicBlock *BB : depth_first(From)) {
1203     // We insert the instruction at the start of block To, so no need to worry
1204     // about stores inside To.
1205     // Store in block From should be already considered when just enter function
1206     // SinkInstruction.
1207     if (BB == To || BB == From)
1208       continue;
1209 
1210     // We already handle this BB in previous iteration.
1211     if (HandledBlocks.count(BB))
1212       continue;
1213 
1214     HandledBlocks.insert(BB);
1215     // To post dominates BB, it must be a path from block From.
1216     if (PDT->dominates(To, BB)) {
1217       if (!HandledDomBlocks.count(BB))
1218         HandledDomBlocks.insert(BB);
1219 
1220       // If this BB is too big or the block number in straight line between From
1221       // and To is too big, stop searching to save compiling time.
1222       if (BB->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold) ||
1223           HandledDomBlocks.size() > SinkLoadBlocksThreshold) {
1224         for (auto *DomBB : HandledDomBlocks) {
1225           if (DomBB != BB && DT->dominates(DomBB, BB))
1226             HasStoreCache[std::make_pair(DomBB, To)] = true;
1227           else if(DomBB != BB && DT->dominates(BB, DomBB))
1228             HasStoreCache[std::make_pair(From, DomBB)] = true;
1229         }
1230         HasStoreCache[BlockPair] = true;
1231         return true;
1232       }
1233 
1234       for (MachineInstr &I : *BB) {
1235         // Treat as alias conservatively for a call or an ordered memory
1236         // operation.
1237         if (I.isCall() || I.hasOrderedMemoryRef()) {
1238           for (auto *DomBB : HandledDomBlocks) {
1239             if (DomBB != BB && DT->dominates(DomBB, BB))
1240               HasStoreCache[std::make_pair(DomBB, To)] = true;
1241             else if(DomBB != BB && DT->dominates(BB, DomBB))
1242               HasStoreCache[std::make_pair(From, DomBB)] = true;
1243           }
1244           HasStoreCache[BlockPair] = true;
1245           return true;
1246         }
1247 
1248         if (I.mayStore()) {
1249           SawStore = true;
1250           // We still have chance to sink MI if all stores between are not
1251           // aliased to MI.
1252           // Cache all store instructions, so that we don't need to go through
1253           // all From reachable blocks for next load instruction.
1254           if (I.mayAlias(AA, MI, false))
1255             HasAliasedStore = true;
1256           StoreInstrCache[BlockPair].push_back(&I);
1257         }
1258       }
1259     }
1260   }
1261   // If there is no store at all, cache the result.
1262   if (!SawStore)
1263     HasStoreCache[BlockPair] = false;
1264   return HasAliasedStore;
1265 }
1266 
1267 /// Sink instructions into cycles if profitable. This especially tries to
1268 /// prevent register spills caused by register pressure if there is little to no
1269 /// overhead moving instructions into cycles.
1270 bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
1271   LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I);
1272   MachineBasicBlock *Preheader = Cycle->getCyclePreheader();
1273   assert(Preheader && "Cycle sink needs a preheader block");
1274   MachineBasicBlock *SinkBlock = nullptr;
1275   bool CanSink = true;
1276   const MachineOperand &MO = I.getOperand(0);
1277 
1278   for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
1279     LLVM_DEBUG(dbgs() << "CycleSink:   Analysing use: " << MI);
1280     if (!Cycle->contains(MI.getParent())) {
1281       LLVM_DEBUG(dbgs() << "CycleSink:   Use not in cycle, can't sink.\n");
1282       CanSink = false;
1283       break;
1284     }
1285 
1286     // FIXME: Come up with a proper cost model that estimates whether sinking
1287     // the instruction (and thus possibly executing it on every cycle
1288     // iteration) is more expensive than a register.
1289     // For now assumes that copies are cheap and thus almost always worth it.
1290     if (!MI.isCopy()) {
1291       LLVM_DEBUG(dbgs() << "CycleSink:   Use is not a copy\n");
1292       CanSink = false;
1293       break;
1294     }
1295     if (!SinkBlock) {
1296       SinkBlock = MI.getParent();
1297       LLVM_DEBUG(dbgs() << "CycleSink:   Setting sink block to: "
1298                         << printMBBReference(*SinkBlock) << "\n");
1299       continue;
1300     }
1301     SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent());
1302     if (!SinkBlock) {
1303       LLVM_DEBUG(dbgs() << "CycleSink:   Can't find nearest dominator\n");
1304       CanSink = false;
1305       break;
1306     }
1307     LLVM_DEBUG(dbgs() << "CycleSink:   Setting nearest common dom block: " <<
1308                printMBBReference(*SinkBlock) << "\n");
1309   }
1310 
1311   if (!CanSink) {
1312     LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n");
1313     return false;
1314   }
1315   if (!SinkBlock) {
1316     LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n");
1317     return false;
1318   }
1319   if (SinkBlock == Preheader) {
1320     LLVM_DEBUG(
1321         dbgs() << "CycleSink: Not sinking, sink block is the preheader\n");
1322     return false;
1323   }
1324   if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) {
1325     LLVM_DEBUG(
1326         dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n");
1327     return false;
1328   }
1329 
1330   LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n");
1331   SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader,
1332                     I);
1333 
1334   // Conservatively clear any kill flags on uses of sunk instruction
1335   for (MachineOperand &MO : I.operands()) {
1336     if (MO.isReg() && MO.readsReg())
1337       RegsToClearKillFlags.insert(MO.getReg());
1338   }
1339 
1340   // The instruction is moved from its basic block, so do not retain the
1341   // debug information.
1342   assert(!I.isDebugInstr() && "Should not sink debug inst");
1343   I.setDebugLoc(DebugLoc());
1344   return true;
1345 }
1346 
1347 /// SinkInstruction - Determine whether it is safe to sink the specified machine
1348 /// instruction out of its current block into a successor.
1349 bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
1350                                      AllSuccsCache &AllSuccessors) {
1351   // Don't sink instructions that the target prefers not to sink.
1352   if (!TII->shouldSink(MI))
1353     return false;
1354 
1355   // Check if it's safe to move the instruction.
1356   if (!MI.isSafeToMove(AA, SawStore))
1357     return false;
1358 
1359   // Convergent operations may not be made control-dependent on additional
1360   // values.
1361   if (MI.isConvergent())
1362     return false;
1363 
1364   // Don't break implicit null checks.  This is a performance heuristic, and not
1365   // required for correctness.
1366   if (SinkingPreventsImplicitNullCheck(MI, TII, TRI))
1367     return false;
1368 
1369   // FIXME: This should include support for sinking instructions within the
1370   // block they are currently in to shorten the live ranges.  We often get
1371   // instructions sunk into the top of a large block, but it would be better to
1372   // also sink them down before their first use in the block.  This xform has to
1373   // be careful not to *increase* register pressure though, e.g. sinking
1374   // "x = y + z" down if it kills y and z would increase the live ranges of y
1375   // and z and only shrink the live range of x.
1376 
1377   bool BreakPHIEdge = false;
1378   MachineBasicBlock *ParentBlock = MI.getParent();
1379   MachineBasicBlock *SuccToSinkTo =
1380       FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors);
1381 
1382   // If there are no outputs, it must have side-effects.
1383   if (!SuccToSinkTo)
1384     return false;
1385 
1386   // If the instruction to move defines a dead physical register which is live
1387   // when leaving the basic block, don't move it because it could turn into a
1388   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
1389   for (const MachineOperand &MO : MI.all_defs()) {
1390     Register Reg = MO.getReg();
1391     if (Reg == 0 || !Reg.isPhysical())
1392       continue;
1393     if (SuccToSinkTo->isLiveIn(Reg))
1394       return false;
1395   }
1396 
1397   LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo);
1398 
1399   // If the block has multiple predecessors, this is a critical edge.
1400   // Decide if we can sink along it or need to break the edge.
1401   if (SuccToSinkTo->pred_size() > 1) {
1402     // We cannot sink a load across a critical edge - there may be stores in
1403     // other code paths.
1404     bool TryBreak = false;
1405     bool Store =
1406         MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true;
1407     if (!MI.isSafeToMove(AA, Store)) {
1408       LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
1409       TryBreak = true;
1410     }
1411 
1412     // We don't want to sink across a critical edge if we don't dominate the
1413     // successor. We could be introducing calculations to new code paths.
1414     if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
1415       LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
1416       TryBreak = true;
1417     }
1418 
1419     // Don't sink instructions into a cycle.
1420     if (!TryBreak && CI->getCycle(SuccToSinkTo) &&
1421         (!CI->getCycle(SuccToSinkTo)->isReducible() ||
1422          CI->getCycle(SuccToSinkTo)->getHeader() == SuccToSinkTo)) {
1423       LLVM_DEBUG(dbgs() << " *** NOTE: cycle header found\n");
1424       TryBreak = true;
1425     }
1426 
1427     // Otherwise we are OK with sinking along a critical edge.
1428     if (!TryBreak)
1429       LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n");
1430     else {
1431       // Mark this edge as to be split.
1432       // If the edge can actually be split, the next iteration of the main loop
1433       // will sink MI in the newly created block.
1434       bool Status =
1435         PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
1436       if (!Status)
1437         LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1438                              "break critical edge\n");
1439       // The instruction will not be sunk this time.
1440       return false;
1441     }
1442   }
1443 
1444   if (BreakPHIEdge) {
1445     // BreakPHIEdge is true if all the uses are in the successor MBB being
1446     // sunken into and they are all PHI nodes. In this case, machine-sink must
1447     // break the critical edge first.
1448     bool Status = PostponeSplitCriticalEdge(MI, ParentBlock,
1449                                             SuccToSinkTo, BreakPHIEdge);
1450     if (!Status)
1451       LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
1452                            "break critical edge\n");
1453     // The instruction will not be sunk this time.
1454     return false;
1455   }
1456 
1457   // Determine where to insert into. Skip phi nodes.
1458   MachineBasicBlock::iterator InsertPos =
1459       SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
1460   if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI)) {
1461     LLVM_DEBUG(dbgs() << " *** Not sinking: prologue interference\n");
1462     return false;
1463   }
1464 
1465   // Collect debug users of any vreg that this inst defines.
1466   SmallVector<MIRegs, 4> DbgUsersToSink;
1467   for (auto &MO : MI.all_defs()) {
1468     if (!MO.getReg().isVirtual())
1469       continue;
1470     if (!SeenDbgUsers.count(MO.getReg()))
1471       continue;
1472 
1473     // Sink any users that don't pass any other DBG_VALUEs for this variable.
1474     auto &Users = SeenDbgUsers[MO.getReg()];
1475     for (auto &User : Users) {
1476       MachineInstr *DbgMI = User.getPointer();
1477       if (User.getInt()) {
1478         // This DBG_VALUE would re-order assignments. If we can't copy-propagate
1479         // it, it can't be recovered. Set it undef.
1480         if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg()))
1481           DbgMI->setDebugValueUndef();
1482       } else {
1483         DbgUsersToSink.push_back(
1484             {DbgMI, SmallVector<unsigned, 2>(1, MO.getReg())});
1485       }
1486     }
1487   }
1488 
1489   // After sinking, some debug users may not be dominated any more. If possible,
1490   // copy-propagate their operands. As it's expensive, don't do this if there's
1491   // no debuginfo in the program.
1492   if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy())
1493     SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo);
1494 
1495   performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink);
1496 
1497   // Conservatively, clear any kill flags, since it's possible that they are no
1498   // longer correct.
1499   // Note that we have to clear the kill flags for any register this instruction
1500   // uses as we may sink over another instruction which currently kills the
1501   // used registers.
1502   for (MachineOperand &MO : MI.all_uses())
1503     RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
1504 
1505   return true;
1506 }
1507 
1508 void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
1509     MachineInstr &MI, MachineBasicBlock *TargetBlock) {
1510   assert(MI.isCopy());
1511   assert(MI.getOperand(1).isReg());
1512 
1513   // Enumerate all users of vreg operands that are def'd. Skip those that will
1514   // be sunk. For the rest, if they are not dominated by the block we will sink
1515   // MI into, propagate the copy source to them.
1516   SmallVector<MachineInstr *, 4> DbgDefUsers;
1517   SmallVector<Register, 4> DbgUseRegs;
1518   const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1519   for (auto &MO : MI.all_defs()) {
1520     if (!MO.getReg().isVirtual())
1521       continue;
1522     DbgUseRegs.push_back(MO.getReg());
1523     for (auto &User : MRI.use_instructions(MO.getReg())) {
1524       if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent()))
1525         continue;
1526 
1527       // If is in same block, will either sink or be use-before-def.
1528       if (User.getParent() == MI.getParent())
1529         continue;
1530 
1531       assert(User.hasDebugOperandForReg(MO.getReg()) &&
1532              "DBG_VALUE user of vreg, but has no operand for it?");
1533       DbgDefUsers.push_back(&User);
1534     }
1535   }
1536 
1537   // Point the users of this copy that are no longer dominated, at the source
1538   // of the copy.
1539   for (auto *User : DbgDefUsers) {
1540     for (auto &Reg : DbgUseRegs) {
1541       for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) {
1542         DbgOp.setReg(MI.getOperand(1).getReg());
1543         DbgOp.setSubReg(MI.getOperand(1).getSubReg());
1544       }
1545     }
1546   }
1547 }
1548 
1549 //===----------------------------------------------------------------------===//
1550 // This pass is not intended to be a replacement or a complete alternative
1551 // for the pre-ra machine sink pass. It is only designed to sink COPY
1552 // instructions which should be handled after RA.
1553 //
1554 // This pass sinks COPY instructions into a successor block, if the COPY is not
1555 // used in the current block and the COPY is live-in to a single successor
1556 // (i.e., doesn't require the COPY to be duplicated).  This avoids executing the
1557 // copy on paths where their results aren't needed.  This also exposes
1558 // additional opportunites for dead copy elimination and shrink wrapping.
1559 //
1560 // These copies were either not handled by or are inserted after the MachineSink
1561 // pass. As an example of the former case, the MachineSink pass cannot sink
1562 // COPY instructions with allocatable source registers; for AArch64 these type
1563 // of copy instructions are frequently used to move function parameters (PhyReg)
1564 // into virtual registers in the entry block.
1565 //
1566 // For the machine IR below, this pass will sink %w19 in the entry into its
1567 // successor (%bb.1) because %w19 is only live-in in %bb.1.
1568 // %bb.0:
1569 //   %wzr = SUBSWri %w1, 1
1570 //   %w19 = COPY %w0
1571 //   Bcc 11, %bb.2
1572 // %bb.1:
1573 //   Live Ins: %w19
1574 //   BL @fun
1575 //   %w0 = ADDWrr %w0, %w19
1576 //   RET %w0
1577 // %bb.2:
1578 //   %w0 = COPY %wzr
1579 //   RET %w0
1580 // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be
1581 // able to see %bb.0 as a candidate.
1582 //===----------------------------------------------------------------------===//
1583 namespace {
1584 
1585 class PostRAMachineSinking : public MachineFunctionPass {
1586 public:
1587   bool runOnMachineFunction(MachineFunction &MF) override;
1588 
1589   static char ID;
1590   PostRAMachineSinking() : MachineFunctionPass(ID) {}
1591   StringRef getPassName() const override { return "PostRA Machine Sink"; }
1592 
1593   void getAnalysisUsage(AnalysisUsage &AU) const override {
1594     AU.setPreservesCFG();
1595     MachineFunctionPass::getAnalysisUsage(AU);
1596   }
1597 
1598   MachineFunctionProperties getRequiredProperties() const override {
1599     return MachineFunctionProperties().set(
1600         MachineFunctionProperties::Property::NoVRegs);
1601   }
1602 
1603 private:
1604   /// Track which register units have been modified and used.
1605   LiveRegUnits ModifiedRegUnits, UsedRegUnits;
1606 
1607   /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
1608   /// entry in this map for each unit it touches. The DBG_VALUE's entry
1609   /// consists of a pointer to the instruction itself, and a vector of registers
1610   /// referred to by the instruction that overlap the key register unit.
1611   DenseMap<unsigned, SmallVector<MIRegs, 2>> SeenDbgInstrs;
1612 
1613   /// Sink Copy instructions unused in the same block close to their uses in
1614   /// successors.
1615   bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF,
1616                      const TargetRegisterInfo *TRI, const TargetInstrInfo *TII);
1617 };
1618 } // namespace
1619 
1620 char PostRAMachineSinking::ID = 0;
1621 char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID;
1622 
1623 INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink",
1624                 "PostRA Machine Sink", false, false)
1625 
1626 static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg,
1627                                   const TargetRegisterInfo *TRI) {
1628   LiveRegUnits LiveInRegUnits(*TRI);
1629   LiveInRegUnits.addLiveIns(MBB);
1630   return !LiveInRegUnits.available(Reg);
1631 }
1632 
1633 static MachineBasicBlock *
1634 getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
1635                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1636                       unsigned Reg, const TargetRegisterInfo *TRI) {
1637   // Try to find a single sinkable successor in which Reg is live-in.
1638   MachineBasicBlock *BB = nullptr;
1639   for (auto *SI : SinkableBBs) {
1640     if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) {
1641       // If BB is set here, Reg is live-in to at least two sinkable successors,
1642       // so quit.
1643       if (BB)
1644         return nullptr;
1645       BB = SI;
1646     }
1647   }
1648   // Reg is not live-in to any sinkable successors.
1649   if (!BB)
1650     return nullptr;
1651 
1652   // Check if any register aliased with Reg is live-in in other successors.
1653   for (auto *SI : CurBB.successors()) {
1654     if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI))
1655       return nullptr;
1656   }
1657   return BB;
1658 }
1659 
1660 static MachineBasicBlock *
1661 getSingleLiveInSuccBB(MachineBasicBlock &CurBB,
1662                       const SmallPtrSetImpl<MachineBasicBlock *> &SinkableBBs,
1663                       ArrayRef<unsigned> DefedRegsInCopy,
1664                       const TargetRegisterInfo *TRI) {
1665   MachineBasicBlock *SingleBB = nullptr;
1666   for (auto DefReg : DefedRegsInCopy) {
1667     MachineBasicBlock *BB =
1668         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI);
1669     if (!BB || (SingleBB && SingleBB != BB))
1670       return nullptr;
1671     SingleBB = BB;
1672   }
1673   return SingleBB;
1674 }
1675 
1676 static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
1677                            SmallVectorImpl<unsigned> &UsedOpsInCopy,
1678                            LiveRegUnits &UsedRegUnits,
1679                            const TargetRegisterInfo *TRI) {
1680   for (auto U : UsedOpsInCopy) {
1681     MachineOperand &MO = MI->getOperand(U);
1682     Register SrcReg = MO.getReg();
1683     if (!UsedRegUnits.available(SrcReg)) {
1684       MachineBasicBlock::iterator NI = std::next(MI->getIterator());
1685       for (MachineInstr &UI : make_range(NI, CurBB.end())) {
1686         if (UI.killsRegister(SrcReg, TRI)) {
1687           UI.clearRegisterKills(SrcReg, TRI);
1688           MO.setIsKill(true);
1689           break;
1690         }
1691       }
1692     }
1693   }
1694 }
1695 
1696 static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
1697                          SmallVectorImpl<unsigned> &UsedOpsInCopy,
1698                          SmallVectorImpl<unsigned> &DefedRegsInCopy) {
1699   MachineFunction &MF = *SuccBB->getParent();
1700   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1701   for (unsigned DefReg : DefedRegsInCopy)
1702     for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
1703       SuccBB->removeLiveIn(S);
1704   for (auto U : UsedOpsInCopy) {
1705     Register SrcReg = MI->getOperand(U).getReg();
1706     LaneBitmask Mask;
1707     for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) {
1708       Mask |= (*S).second;
1709     }
1710     SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll());
1711   }
1712   SuccBB->sortUniqueLiveIns();
1713 }
1714 
1715 static bool hasRegisterDependency(MachineInstr *MI,
1716                                   SmallVectorImpl<unsigned> &UsedOpsInCopy,
1717                                   SmallVectorImpl<unsigned> &DefedRegsInCopy,
1718                                   LiveRegUnits &ModifiedRegUnits,
1719                                   LiveRegUnits &UsedRegUnits) {
1720   bool HasRegDependency = false;
1721   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
1722     MachineOperand &MO = MI->getOperand(i);
1723     if (!MO.isReg())
1724       continue;
1725     Register Reg = MO.getReg();
1726     if (!Reg)
1727       continue;
1728     if (MO.isDef()) {
1729       if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) {
1730         HasRegDependency = true;
1731         break;
1732       }
1733       DefedRegsInCopy.push_back(Reg);
1734 
1735       // FIXME: instead of isUse(), readsReg() would be a better fix here,
1736       // For example, we can ignore modifications in reg with undef. However,
1737       // it's not perfectly clear if skipping the internal read is safe in all
1738       // other targets.
1739     } else if (MO.isUse()) {
1740       if (!ModifiedRegUnits.available(Reg)) {
1741         HasRegDependency = true;
1742         break;
1743       }
1744       UsedOpsInCopy.push_back(i);
1745     }
1746   }
1747   return HasRegDependency;
1748 }
1749 
1750 bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
1751                                          MachineFunction &MF,
1752                                          const TargetRegisterInfo *TRI,
1753                                          const TargetInstrInfo *TII) {
1754   SmallPtrSet<MachineBasicBlock *, 2> SinkableBBs;
1755   // FIXME: For now, we sink only to a successor which has a single predecessor
1756   // so that we can directly sink COPY instructions to the successor without
1757   // adding any new block or branch instruction.
1758   for (MachineBasicBlock *SI : CurBB.successors())
1759     if (!SI->livein_empty() && SI->pred_size() == 1)
1760       SinkableBBs.insert(SI);
1761 
1762   if (SinkableBBs.empty())
1763     return false;
1764 
1765   bool Changed = false;
1766 
1767   // Track which registers have been modified and used between the end of the
1768   // block and the current instruction.
1769   ModifiedRegUnits.clear();
1770   UsedRegUnits.clear();
1771   SeenDbgInstrs.clear();
1772 
1773   for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) {
1774     // Track the operand index for use in Copy.
1775     SmallVector<unsigned, 2> UsedOpsInCopy;
1776     // Track the register number defed in Copy.
1777     SmallVector<unsigned, 2> DefedRegsInCopy;
1778 
1779     // We must sink this DBG_VALUE if its operand is sunk. To avoid searching
1780     // for DBG_VALUEs later, record them when they're encountered.
1781     if (MI.isDebugValue() && !MI.isDebugRef()) {
1782       SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
1783       bool IsValid = true;
1784       for (MachineOperand &MO : MI.debug_operands()) {
1785         if (MO.isReg() && MO.getReg().isPhysical()) {
1786           // Bail if we can already tell the sink would be rejected, rather
1787           // than needlessly accumulating lots of DBG_VALUEs.
1788           if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
1789                                     ModifiedRegUnits, UsedRegUnits)) {
1790             IsValid = false;
1791             break;
1792           }
1793 
1794           // Record debug use of each reg unit.
1795           for (MCRegUnit Unit : TRI->regunits(MO.getReg()))
1796             MIUnits[Unit].push_back(MO.getReg());
1797         }
1798       }
1799       if (IsValid) {
1800         for (auto &RegOps : MIUnits)
1801           SeenDbgInstrs[RegOps.first].emplace_back(&MI,
1802                                                    std::move(RegOps.second));
1803       }
1804       continue;
1805     }
1806 
1807     if (MI.isDebugOrPseudoInstr())
1808       continue;
1809 
1810     // Do not move any instruction across function call.
1811     if (MI.isCall())
1812       return false;
1813 
1814     if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
1815       LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1816                                         TRI);
1817       continue;
1818     }
1819 
1820     // Don't sink the COPY if it would violate a register dependency.
1821     if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
1822                               ModifiedRegUnits, UsedRegUnits)) {
1823       LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1824                                         TRI);
1825       continue;
1826     }
1827     assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) &&
1828            "Unexpect SrcReg or DefReg");
1829     MachineBasicBlock *SuccBB =
1830         getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI);
1831     // Don't sink if we cannot find a single sinkable successor in which Reg
1832     // is live-in.
1833     if (!SuccBB) {
1834       LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
1835                                         TRI);
1836       continue;
1837     }
1838     assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
1839            "Unexpected predecessor");
1840 
1841     // Collect DBG_VALUEs that must sink with this copy. We've previously
1842     // recorded which reg units that DBG_VALUEs read, if this instruction
1843     // writes any of those units then the corresponding DBG_VALUEs must sink.
1844     MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
1845     for (auto &MO : MI.all_defs()) {
1846       for (MCRegUnit Unit : TRI->regunits(MO.getReg())) {
1847         for (const auto &MIRegs : SeenDbgInstrs.lookup(Unit)) {
1848           auto &Regs = DbgValsToSinkMap[MIRegs.first];
1849           for (unsigned Reg : MIRegs.second)
1850             Regs.push_back(Reg);
1851         }
1852       }
1853     }
1854     auto DbgValsToSink = DbgValsToSinkMap.takeVector();
1855 
1856     LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccBB);
1857 
1858     MachineBasicBlock::iterator InsertPos =
1859         SuccBB->SkipPHIsAndLabels(SuccBB->begin());
1860     if (blockPrologueInterferes(SuccBB, InsertPos, MI, TRI, TII, nullptr)) {
1861       LLVM_DEBUG(
1862           dbgs() << " *** Not sinking: prologue interference\n");
1863       continue;
1864     }
1865 
1866     // Clear the kill flag if SrcReg is killed between MI and the end of the
1867     // block.
1868     clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
1869     performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
1870     updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
1871 
1872     Changed = true;
1873     ++NumPostRACopySink;
1874   }
1875   return Changed;
1876 }
1877 
1878 bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
1879   if (skipFunction(MF.getFunction()))
1880     return false;
1881 
1882   bool Changed = false;
1883   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1884   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1885 
1886   ModifiedRegUnits.init(*TRI);
1887   UsedRegUnits.init(*TRI);
1888   for (auto &BB : MF)
1889     Changed |= tryToSinkCopy(BB, MF, TRI, TII);
1890 
1891   return Changed;
1892 }
1893