1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the generic RegisterCoalescer interface which
10 // is used as the common interface used by all clients and
11 // implementations of register coalescing.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "RegisterCoalescer.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/BitVector.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/AliasAnalysis.h"
24 #include "llvm/CodeGen/LiveInterval.h"
25 #include "llvm/CodeGen/LiveIntervals.h"
26 #include "llvm/CodeGen/LiveRangeEdit.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineLoopInfo.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/CodeGen/RegisterClassInfo.h"
37 #include "llvm/CodeGen/SlotIndexes.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetOpcodes.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/MC/LaneBitmask.h"
45 #include "llvm/MC/MCInstrDesc.h"
46 #include "llvm/MC/MCRegisterInfo.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Compiler.h"
50 #include "llvm/Support/Debug.h"
51 #include "llvm/Support/ErrorHandling.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include <algorithm>
54 #include <cassert>
55 #include <iterator>
56 #include <limits>
57 #include <tuple>
58 #include <utility>
59 #include <vector>
60 
61 using namespace llvm;
62 
63 #define DEBUG_TYPE "regalloc"
64 
65 STATISTIC(numJoins    , "Number of interval joins performed");
66 STATISTIC(numCrossRCs , "Number of cross class joins performed");
67 STATISTIC(numCommutes , "Number of instruction commuting performed");
68 STATISTIC(numExtends  , "Number of copies extended");
69 STATISTIC(NumReMats   , "Number of instructions re-materialized");
70 STATISTIC(NumInflated , "Number of register classes inflated");
71 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
72 STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
73 STATISTIC(NumShrinkToUses,  "Number of shrinkToUses called");
74 
75 static cl::opt<bool> EnableJoining("join-liveintervals",
76                                    cl::desc("Coalesce copies (default=true)"),
77                                    cl::init(true), cl::Hidden);
78 
79 static cl::opt<bool> UseTerminalRule("terminal-rule",
80                                      cl::desc("Apply the terminal rule"),
81                                      cl::init(false), cl::Hidden);
82 
83 /// Temporary flag to test critical edge unsplitting.
84 static cl::opt<bool>
85 EnableJoinSplits("join-splitedges",
86   cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
87 
88 /// Temporary flag to test global copy optimization.
89 static cl::opt<cl::boolOrDefault>
90 EnableGlobalCopies("join-globalcopies",
91   cl::desc("Coalesce copies that span blocks (default=subtarget)"),
92   cl::init(cl::BOU_UNSET), cl::Hidden);
93 
94 static cl::opt<bool>
95 VerifyCoalescing("verify-coalescing",
96          cl::desc("Verify machine instrs before and after register coalescing"),
97          cl::Hidden);
98 
99 static cl::opt<unsigned> LateRematUpdateThreshold(
100     "late-remat-update-threshold", cl::Hidden,
101     cl::desc("During rematerialization for a copy, if the def instruction has "
102              "many other copy uses to be rematerialized, delay the multiple "
103              "separate live interval update work and do them all at once after "
104              "all those rematerialization are done. It will save a lot of "
105              "repeated work. "),
106     cl::init(100));
107 
108 static cl::opt<unsigned> LargeIntervalSizeThreshold(
109     "large-interval-size-threshold", cl::Hidden,
110     cl::desc("If the valnos size of an interval is larger than the threshold, "
111              "it is regarded as a large interval. "),
112     cl::init(100));
113 
114 static cl::opt<unsigned> LargeIntervalFreqThreshold(
115     "large-interval-freq-threshold", cl::Hidden,
116     cl::desc("For a large interval, if it is coalesed with other live "
117              "intervals many times more than the threshold, stop its "
118              "coalescing to control the compile time. "),
119     cl::init(256));
120 
121 namespace {
122 
123   class JoinVals;
124 
125   class RegisterCoalescer : public MachineFunctionPass,
126                             private LiveRangeEdit::Delegate {
127     MachineFunction* MF = nullptr;
128     MachineRegisterInfo* MRI = nullptr;
129     const TargetRegisterInfo* TRI = nullptr;
130     const TargetInstrInfo* TII = nullptr;
131     LiveIntervals *LIS = nullptr;
132     const MachineLoopInfo* Loops = nullptr;
133     AliasAnalysis *AA = nullptr;
134     RegisterClassInfo RegClassInfo;
135 
136     /// Position and VReg of a PHI instruction during coalescing.
137     struct PHIValPos {
138       SlotIndex SI;    ///< Slot where this PHI occurs.
139       Register Reg;    ///< VReg the PHI occurs in.
140       unsigned SubReg; ///< Qualifying subregister for Reg.
141     };
142 
143     /// Map from debug instruction number to PHI position during coalescing.
144     DenseMap<unsigned, PHIValPos> PHIValToPos;
145     /// Index of, for each VReg, which debug instruction numbers and
146     /// corresponding PHIs are sensitive to coalescing. Each VReg may have
147     /// multiple PHI defs, at different positions.
148     DenseMap<Register, SmallVector<unsigned, 2>> RegToPHIIdx;
149 
150     /// Debug variable location tracking -- for each VReg, maintain an
151     /// ordered-by-slot-index set of DBG_VALUEs, to help quick
152     /// identification of whether coalescing may change location validity.
153     using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
154     DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
155 
156     /// A LaneMask to remember on which subregister live ranges we need to call
157     /// shrinkToUses() later.
158     LaneBitmask ShrinkMask;
159 
160     /// True if the main range of the currently coalesced intervals should be
161     /// checked for smaller live intervals.
162     bool ShrinkMainRange = false;
163 
164     /// True if the coalescer should aggressively coalesce global copies
165     /// in favor of keeping local copies.
166     bool JoinGlobalCopies = false;
167 
168     /// True if the coalescer should aggressively coalesce fall-thru
169     /// blocks exclusively containing copies.
170     bool JoinSplitEdges = false;
171 
172     /// Copy instructions yet to be coalesced.
173     SmallVector<MachineInstr*, 8> WorkList;
174     SmallVector<MachineInstr*, 8> LocalWorkList;
175 
176     /// Set of instruction pointers that have been erased, and
177     /// that may be present in WorkList.
178     SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
179 
180     /// Dead instructions that are about to be deleted.
181     SmallVector<MachineInstr*, 8> DeadDefs;
182 
183     /// Virtual registers to be considered for register class inflation.
184     SmallVector<Register, 8> InflateRegs;
185 
186     /// The collection of live intervals which should have been updated
187     /// immediately after rematerialiation but delayed until
188     /// lateLiveIntervalUpdate is called.
189     DenseSet<Register> ToBeUpdated;
190 
191     /// Record how many times the large live interval with many valnos
192     /// has been tried to join with other live interval.
193     DenseMap<Register, unsigned long> LargeLIVisitCounter;
194 
195     /// Recursively eliminate dead defs in DeadDefs.
196     void eliminateDeadDefs(LiveRangeEdit *Edit = nullptr);
197 
198     /// LiveRangeEdit callback for eliminateDeadDefs().
199     void LRE_WillEraseInstruction(MachineInstr *MI) override;
200 
201     /// Coalesce the LocalWorkList.
202     void coalesceLocals();
203 
204     /// Join compatible live intervals
205     void joinAllIntervals();
206 
207     /// Coalesce copies in the specified MBB, putting
208     /// copies that cannot yet be coalesced into WorkList.
209     void copyCoalesceInMBB(MachineBasicBlock *MBB);
210 
211     /// Tries to coalesce all copies in CurrList. Returns true if any progress
212     /// was made.
213     bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
214 
215     /// If one def has many copy like uses, and those copy uses are all
216     /// rematerialized, the live interval update needed for those
217     /// rematerializations will be delayed and done all at once instead
218     /// of being done multiple times. This is to save compile cost because
219     /// live interval update is costly.
220     void lateLiveIntervalUpdate();
221 
222     /// Check if the incoming value defined by a COPY at \p SLRQ in the subrange
223     /// has no value defined in the predecessors. If the incoming value is the
224     /// same as defined by the copy itself, the value is considered undefined.
225     bool copyValueUndefInPredecessors(LiveRange &S,
226                                       const MachineBasicBlock *MBB,
227                                       LiveQueryResult SLRQ);
228 
229     /// Set necessary undef flags on subregister uses after pruning out undef
230     /// lane segments from the subrange.
231     void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg,
232                                     LaneBitmask PrunedLanes);
233 
234     /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
235     /// src/dst of the copy instruction CopyMI.  This returns true if the copy
236     /// was successfully coalesced away. If it is not currently possible to
237     /// coalesce this interval, but it may be possible if other things get
238     /// coalesced, then it returns true by reference in 'Again'.
239     bool joinCopy(MachineInstr *CopyMI, bool &Again);
240 
241     /// Attempt to join these two intervals.  On failure, this
242     /// returns false.  The output "SrcInt" will not have been modified, so we
243     /// can use this information below to update aliases.
244     bool joinIntervals(CoalescerPair &CP);
245 
246     /// Attempt joining two virtual registers. Return true on success.
247     bool joinVirtRegs(CoalescerPair &CP);
248 
249     /// If a live interval has many valnos and is coalesced with other
250     /// live intervals many times, we regard such live interval as having
251     /// high compile time cost.
252     bool isHighCostLiveInterval(LiveInterval &LI);
253 
254     /// Attempt joining with a reserved physreg.
255     bool joinReservedPhysReg(CoalescerPair &CP);
256 
257     /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
258     /// Subranges in @p LI which only partially interfere with the desired
259     /// LaneMask are split as necessary. @p LaneMask are the lanes that
260     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
261     /// lanemasks already adjusted to the coalesced register.
262     void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
263                            LaneBitmask LaneMask, CoalescerPair &CP,
264                            unsigned DstIdx);
265 
266     /// Join the liveranges of two subregisters. Joins @p RRange into
267     /// @p LRange, @p RRange may be invalid afterwards.
268     void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
269                           LaneBitmask LaneMask, const CoalescerPair &CP);
270 
271     /// We found a non-trivially-coalescable copy. If the source value number is
272     /// defined by a copy from the destination reg see if we can merge these two
273     /// destination reg valno# into a single value number, eliminating a copy.
274     /// This returns true if an interval was modified.
275     bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
276 
277     /// Return true if there are definitions of IntB
278     /// other than BValNo val# that can reach uses of AValno val# of IntA.
279     bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
280                               VNInfo *AValNo, VNInfo *BValNo);
281 
282     /// We found a non-trivially-coalescable copy.
283     /// If the source value number is defined by a commutable instruction and
284     /// its other operand is coalesced to the copy dest register, see if we
285     /// can transform the copy into a noop by commuting the definition.
286     /// This returns a pair of two flags:
287     /// - the first element is true if an interval was modified,
288     /// - the second element is true if the destination interval needs
289     ///   to be shrunk after deleting the copy.
290     std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP,
291                                                   MachineInstr *CopyMI);
292 
293     /// We found a copy which can be moved to its less frequent predecessor.
294     bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
295 
296     /// If the source of a copy is defined by a
297     /// trivial computation, replace the copy by rematerialize the definition.
298     bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
299                                  bool &IsDefCopy);
300 
301     /// Return true if a copy involving a physreg should be joined.
302     bool canJoinPhys(const CoalescerPair &CP);
303 
304     /// Replace all defs and uses of SrcReg to DstReg and update the subregister
305     /// number if it is not zero. If DstReg is a physical register and the
306     /// existing subregister number of the def / use being updated is not zero,
307     /// make sure to set it to the correct physical subregister.
308     void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
309 
310     /// If the given machine operand reads only undefined lanes add an undef
311     /// flag.
312     /// This can happen when undef uses were previously concealed by a copy
313     /// which we coalesced. Example:
314     ///    %0:sub0<def,read-undef> = ...
315     ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
316     ///       = use %1:sub1       <-- hidden undef use
317     void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
318                       MachineOperand &MO, unsigned SubRegIdx);
319 
320     /// Handle copies of undef values. If the undef value is an incoming
321     /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
322     /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
323     /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
324     MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
325 
326     /// Check whether or not we should apply the terminal rule on the
327     /// destination (Dst) of \p Copy.
328     /// When the terminal rule applies, Copy is not profitable to
329     /// coalesce.
330     /// Dst is terminal if it has exactly one affinity (Dst, Src) and
331     /// at least one interference (Dst, Dst2). If Dst is terminal, the
332     /// terminal rule consists in checking that at least one of
333     /// interfering node, say Dst2, has an affinity of equal or greater
334     /// weight with Src.
335     /// In that case, Dst2 and Dst will not be able to be both coalesced
336     /// with Src. Since Dst2 exposes more coalescing opportunities than
337     /// Dst, we can drop \p Copy.
338     bool applyTerminalRule(const MachineInstr &Copy) const;
339 
340     /// Wrapper method for \see LiveIntervals::shrinkToUses.
341     /// This method does the proper fixing of the live-ranges when the afore
342     /// mentioned method returns true.
343     void shrinkToUses(LiveInterval *LI,
344                       SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
345       NumShrinkToUses++;
346       if (LIS->shrinkToUses(LI, Dead)) {
347         /// Check whether or not \p LI is composed by multiple connected
348         /// components and if that is the case, fix that.
349         SmallVector<LiveInterval*, 8> SplitLIs;
350         LIS->splitSeparateComponents(*LI, SplitLIs);
351       }
352     }
353 
354     /// Wrapper Method to do all the necessary work when an Instruction is
355     /// deleted.
356     /// Optimizations should use this to make sure that deleted instructions
357     /// are always accounted for.
358     void deleteInstr(MachineInstr* MI) {
359       ErasedInstrs.insert(MI);
360       LIS->RemoveMachineInstrFromMaps(*MI);
361       MI->eraseFromParent();
362     }
363 
364     /// Walk over function and initialize the DbgVRegToValues map.
365     void buildVRegToDbgValueMap(MachineFunction &MF);
366 
367     /// Test whether, after merging, any DBG_VALUEs would refer to a
368     /// different value number than before merging, and whether this can
369     /// be resolved. If not, mark the DBG_VALUE as being undef.
370     void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS,
371                                       JoinVals &LHSVals, LiveRange &RHS,
372                                       JoinVals &RHSVals);
373 
374     void checkMergingChangesDbgValuesImpl(Register Reg, LiveRange &OtherRange,
375                                           LiveRange &RegRange, JoinVals &Vals2);
376 
377   public:
378     static char ID; ///< Class identification, replacement for typeinfo
379 
380     RegisterCoalescer() : MachineFunctionPass(ID) {
381       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
382     }
383 
384     void getAnalysisUsage(AnalysisUsage &AU) const override;
385 
386     MachineFunctionProperties getClearedProperties() const override {
387       return MachineFunctionProperties().set(
388           MachineFunctionProperties::Property::IsSSA);
389     }
390 
391     void releaseMemory() override;
392 
393     /// This is the pass entry point.
394     bool runOnMachineFunction(MachineFunction&) override;
395 
396     /// Implement the dump method.
397     void print(raw_ostream &O, const Module* = nullptr) const override;
398   };
399 
400 } // end anonymous namespace
401 
402 char RegisterCoalescer::ID = 0;
403 
404 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
405 
406 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
407                       "Register Coalescer", false, false)
408 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
409 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
410 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
411 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
412 INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
413                     "Register Coalescer", false, false)
414 
415 [[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
416                                       const MachineInstr *MI, Register &Src,
417                                       Register &Dst, unsigned &SrcSub,
418                                       unsigned &DstSub) {
419     if (MI->isCopy()) {
420       Dst = MI->getOperand(0).getReg();
421       DstSub = MI->getOperand(0).getSubReg();
422       Src = MI->getOperand(1).getReg();
423       SrcSub = MI->getOperand(1).getSubReg();
424     } else if (MI->isSubregToReg()) {
425       Dst = MI->getOperand(0).getReg();
426       DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
427                                         MI->getOperand(3).getImm());
428       Src = MI->getOperand(2).getReg();
429       SrcSub = MI->getOperand(2).getSubReg();
430     } else
431       return false;
432     return true;
433 }
434 
435 /// Return true if this block should be vacated by the coalescer to eliminate
436 /// branches. The important cases to handle in the coalescer are critical edges
437 /// split during phi elimination which contain only copies. Simple blocks that
438 /// contain non-branches should also be vacated, but this can be handled by an
439 /// earlier pass similar to early if-conversion.
440 static bool isSplitEdge(const MachineBasicBlock *MBB) {
441   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
442     return false;
443 
444   for (const auto &MI : *MBB) {
445     if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
446       return false;
447   }
448   return true;
449 }
450 
451 bool CoalescerPair::setRegisters(const MachineInstr *MI) {
452   SrcReg = DstReg = Register();
453   SrcIdx = DstIdx = 0;
454   NewRC = nullptr;
455   Flipped = CrossClass = false;
456 
457   Register Src, Dst;
458   unsigned SrcSub = 0, DstSub = 0;
459   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
460     return false;
461   Partial = SrcSub || DstSub;
462 
463   // If one register is a physreg, it must be Dst.
464   if (Src.isPhysical()) {
465     if (Dst.isPhysical())
466       return false;
467     std::swap(Src, Dst);
468     std::swap(SrcSub, DstSub);
469     Flipped = true;
470   }
471 
472   const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
473 
474   if (Dst.isPhysical()) {
475     // Eliminate DstSub on a physreg.
476     if (DstSub) {
477       Dst = TRI.getSubReg(Dst, DstSub);
478       if (!Dst) return false;
479       DstSub = 0;
480     }
481 
482     // Eliminate SrcSub by picking a corresponding Dst superregister.
483     if (SrcSub) {
484       Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
485       if (!Dst) return false;
486     } else if (!MRI.getRegClass(Src)->contains(Dst)) {
487       return false;
488     }
489   } else {
490     // Both registers are virtual.
491     const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
492     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
493 
494     // Both registers have subreg indices.
495     if (SrcSub && DstSub) {
496       // Copies between different sub-registers are never coalescable.
497       if (Src == Dst && SrcSub != DstSub)
498         return false;
499 
500       NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
501                                          SrcIdx, DstIdx);
502       if (!NewRC)
503         return false;
504     } else if (DstSub) {
505       // SrcReg will be merged with a sub-register of DstReg.
506       SrcIdx = DstSub;
507       NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
508     } else if (SrcSub) {
509       // DstReg will be merged with a sub-register of SrcReg.
510       DstIdx = SrcSub;
511       NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
512     } else {
513       // This is a straight copy without sub-registers.
514       NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
515     }
516 
517     // The combined constraint may be impossible to satisfy.
518     if (!NewRC)
519       return false;
520 
521     // Prefer SrcReg to be a sub-register of DstReg.
522     // FIXME: Coalescer should support subregs symmetrically.
523     if (DstIdx && !SrcIdx) {
524       std::swap(Src, Dst);
525       std::swap(SrcIdx, DstIdx);
526       Flipped = !Flipped;
527     }
528 
529     CrossClass = NewRC != DstRC || NewRC != SrcRC;
530   }
531   // Check our invariants
532   assert(Src.isVirtual() && "Src must be virtual");
533   assert(!(Dst.isPhysical() && DstSub) && "Cannot have a physical SubIdx");
534   SrcReg = Src;
535   DstReg = Dst;
536   return true;
537 }
538 
539 bool CoalescerPair::flip() {
540   if (DstReg.isPhysical())
541     return false;
542   std::swap(SrcReg, DstReg);
543   std::swap(SrcIdx, DstIdx);
544   Flipped = !Flipped;
545   return true;
546 }
547 
548 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
549   if (!MI)
550     return false;
551   Register Src, Dst;
552   unsigned SrcSub = 0, DstSub = 0;
553   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
554     return false;
555 
556   // Find the virtual register that is SrcReg.
557   if (Dst == SrcReg) {
558     std::swap(Src, Dst);
559     std::swap(SrcSub, DstSub);
560   } else if (Src != SrcReg) {
561     return false;
562   }
563 
564   // Now check that Dst matches DstReg.
565   if (DstReg.isPhysical()) {
566     if (!Dst.isPhysical())
567       return false;
568     assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
569     // DstSub could be set for a physreg from INSERT_SUBREG.
570     if (DstSub)
571       Dst = TRI.getSubReg(Dst, DstSub);
572     // Full copy of Src.
573     if (!SrcSub)
574       return DstReg == Dst;
575     // This is a partial register copy. Check that the parts match.
576     return Register(TRI.getSubReg(DstReg, SrcSub)) == Dst;
577   } else {
578     // DstReg is virtual.
579     if (DstReg != Dst)
580       return false;
581     // Registers match, do the subregisters line up?
582     return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
583            TRI.composeSubRegIndices(DstIdx, DstSub);
584   }
585 }
586 
587 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
588   AU.setPreservesCFG();
589   AU.addRequired<AAResultsWrapperPass>();
590   AU.addRequired<LiveIntervals>();
591   AU.addPreserved<LiveIntervals>();
592   AU.addPreserved<SlotIndexes>();
593   AU.addRequired<MachineLoopInfo>();
594   AU.addPreserved<MachineLoopInfo>();
595   AU.addPreservedID(MachineDominatorsID);
596   MachineFunctionPass::getAnalysisUsage(AU);
597 }
598 
599 void RegisterCoalescer::eliminateDeadDefs(LiveRangeEdit *Edit) {
600   if (Edit) {
601     Edit->eliminateDeadDefs(DeadDefs);
602     return;
603   }
604   SmallVector<Register, 8> NewRegs;
605   LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
606                 nullptr, this).eliminateDeadDefs(DeadDefs);
607 }
608 
609 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
610   // MI may be in WorkList. Make sure we don't visit it.
611   ErasedInstrs.insert(MI);
612 }
613 
614 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
615                                              MachineInstr *CopyMI) {
616   assert(!CP.isPartial() && "This doesn't work for partial copies.");
617   assert(!CP.isPhys() && "This doesn't work for physreg copies.");
618 
619   LiveInterval &IntA =
620     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
621   LiveInterval &IntB =
622     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
623   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
624 
625   // We have a non-trivially-coalescable copy with IntA being the source and
626   // IntB being the dest, thus this defines a value number in IntB.  If the
627   // source value number (in IntA) is defined by a copy from B, see if we can
628   // merge these two pieces of B into a single value number, eliminating a copy.
629   // For example:
630   //
631   //  A3 = B0
632   //    ...
633   //  B1 = A3      <- this copy
634   //
635   // In this case, B0 can be extended to where the B1 copy lives, allowing the
636   // B1 value number to be replaced with B0 (which simplifies the B
637   // liveinterval).
638 
639   // BValNo is a value number in B that is defined by a copy from A.  'B1' in
640   // the example above.
641   LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
642   if (BS == IntB.end()) return false;
643   VNInfo *BValNo = BS->valno;
644 
645   // Get the location that B is defined at.  Two options: either this value has
646   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
647   // can't process it.
648   if (BValNo->def != CopyIdx) return false;
649 
650   // AValNo is the value number in A that defines the copy, A3 in the example.
651   SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
652   LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
653   // The live segment might not exist after fun with physreg coalescing.
654   if (AS == IntA.end()) return false;
655   VNInfo *AValNo = AS->valno;
656 
657   // If AValNo is defined as a copy from IntB, we can potentially process this.
658   // Get the instruction that defines this value number.
659   MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
660   // Don't allow any partial copies, even if isCoalescable() allows them.
661   if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
662     return false;
663 
664   // Get the Segment in IntB that this value number starts with.
665   LiveInterval::iterator ValS =
666     IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
667   if (ValS == IntB.end())
668     return false;
669 
670   // Make sure that the end of the live segment is inside the same block as
671   // CopyMI.
672   MachineInstr *ValSEndInst =
673     LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
674   if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
675     return false;
676 
677   // Okay, we now know that ValS ends in the same block that the CopyMI
678   // live-range starts.  If there are no intervening live segments between them
679   // in IntB, we can merge them.
680   if (ValS+1 != BS) return false;
681 
682   LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg(), TRI));
683 
684   SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
685   // We are about to delete CopyMI, so need to remove it as the 'instruction
686   // that defines this value #'. Update the valnum with the new defining
687   // instruction #.
688   BValNo->def = FillerStart;
689 
690   // Okay, we can merge them.  We need to insert a new liverange:
691   // [ValS.end, BS.begin) of either value number, then we merge the
692   // two value numbers.
693   IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
694 
695   // Okay, merge "B1" into the same value number as "B0".
696   if (BValNo != ValS->valno)
697     IntB.MergeValueNumberInto(BValNo, ValS->valno);
698 
699   // Do the same for the subregister segments.
700   for (LiveInterval::SubRange &S : IntB.subranges()) {
701     // Check for SubRange Segments of the form [1234r,1234d:0) which can be
702     // removed to prevent creating bogus SubRange Segments.
703     LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
704     if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
705       S.removeSegment(*SS, true);
706       continue;
707     }
708     // The subrange may have ended before FillerStart. If so, extend it.
709     if (!S.getVNInfoAt(FillerStart)) {
710       SlotIndex BBStart =
711           LIS->getMBBStartIdx(LIS->getMBBFromIndex(FillerStart));
712       S.extendInBlock(BBStart, FillerStart);
713     }
714     VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
715     S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
716     VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
717     if (SubBValNo != SubValSNo)
718       S.MergeValueNumberInto(SubBValNo, SubValSNo);
719   }
720 
721   LLVM_DEBUG(dbgs() << "   result = " << IntB << '\n');
722 
723   // If the source instruction was killing the source register before the
724   // merge, unset the isKill marker given the live range has been extended.
725   int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg(), true);
726   if (UIdx != -1) {
727     ValSEndInst->getOperand(UIdx).setIsKill(false);
728   }
729 
730   // Rewrite the copy.
731   CopyMI->substituteRegister(IntA.reg(), IntB.reg(), 0, *TRI);
732   // If the copy instruction was killing the destination register or any
733   // subrange before the merge trim the live range.
734   bool RecomputeLiveRange = AS->end == CopyIdx;
735   if (!RecomputeLiveRange) {
736     for (LiveInterval::SubRange &S : IntA.subranges()) {
737       LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
738       if (SS != S.end() && SS->end == CopyIdx) {
739         RecomputeLiveRange = true;
740         break;
741       }
742     }
743   }
744   if (RecomputeLiveRange)
745     shrinkToUses(&IntA);
746 
747   ++numExtends;
748   return true;
749 }
750 
751 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
752                                              LiveInterval &IntB,
753                                              VNInfo *AValNo,
754                                              VNInfo *BValNo) {
755   // If AValNo has PHI kills, conservatively assume that IntB defs can reach
756   // the PHI values.
757   if (LIS->hasPHIKill(IntA, AValNo))
758     return true;
759 
760   for (LiveRange::Segment &ASeg : IntA.segments) {
761     if (ASeg.valno != AValNo) continue;
762     LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
763     if (BI != IntB.begin())
764       --BI;
765     for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
766       if (BI->valno == BValNo)
767         continue;
768       if (BI->start <= ASeg.start && BI->end > ASeg.start)
769         return true;
770       if (BI->start > ASeg.start && BI->start < ASeg.end)
771         return true;
772     }
773   }
774   return false;
775 }
776 
777 /// Copy segments with value number @p SrcValNo from liverange @p Src to live
778 /// range @Dst and use value number @p DstValNo there.
779 static std::pair<bool,bool>
780 addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src,
781                      const VNInfo *SrcValNo) {
782   bool Changed = false;
783   bool MergedWithDead = false;
784   for (const LiveRange::Segment &S : Src.segments) {
785     if (S.valno != SrcValNo)
786       continue;
787     // This is adding a segment from Src that ends in a copy that is about
788     // to be removed. This segment is going to be merged with a pre-existing
789     // segment in Dst. This works, except in cases when the corresponding
790     // segment in Dst is dead. For example: adding [192r,208r:1) from Src
791     // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst.
792     // Recognized such cases, so that the segments can be shrunk.
793     LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo);
794     LiveRange::Segment &Merged = *Dst.addSegment(Added);
795     if (Merged.end.isDead())
796       MergedWithDead = true;
797     Changed = true;
798   }
799   return std::make_pair(Changed, MergedWithDead);
800 }
801 
802 std::pair<bool,bool>
803 RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
804                                             MachineInstr *CopyMI) {
805   assert(!CP.isPhys());
806 
807   LiveInterval &IntA =
808       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
809   LiveInterval &IntB =
810       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
811 
812   // We found a non-trivially-coalescable copy with IntA being the source and
813   // IntB being the dest, thus this defines a value number in IntB.  If the
814   // source value number (in IntA) is defined by a commutable instruction and
815   // its other operand is coalesced to the copy dest register, see if we can
816   // transform the copy into a noop by commuting the definition. For example,
817   //
818   //  A3 = op A2 killed B0
819   //    ...
820   //  B1 = A3      <- this copy
821   //    ...
822   //     = op A3   <- more uses
823   //
824   // ==>
825   //
826   //  B2 = op B0 killed A2
827   //    ...
828   //  B1 = B2      <- now an identity copy
829   //    ...
830   //     = op B2   <- more uses
831 
832   // BValNo is a value number in B that is defined by a copy from A. 'B1' in
833   // the example above.
834   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
835   VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
836   assert(BValNo != nullptr && BValNo->def == CopyIdx);
837 
838   // AValNo is the value number in A that defines the copy, A3 in the example.
839   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
840   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
841   if (AValNo->isPHIDef())
842     return { false, false };
843   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
844   if (!DefMI)
845     return { false, false };
846   if (!DefMI->isCommutable())
847     return { false, false };
848   // If DefMI is a two-address instruction then commuting it will change the
849   // destination register.
850   int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg());
851   assert(DefIdx != -1);
852   unsigned UseOpIdx;
853   if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
854     return { false, false };
855 
856   // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
857   // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
858   // passed to the method. That _other_ operand is chosen by
859   // the findCommutedOpIndices() method.
860   //
861   // That is obviously an area for improvement in case of instructions having
862   // more than 2 operands. For example, if some instruction has 3 commutable
863   // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
864   // op#2<->op#3) of commute transformation should be considered/tried here.
865   unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
866   if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
867     return { false, false };
868 
869   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
870   Register NewReg = NewDstMO.getReg();
871   if (NewReg != IntB.reg() || !IntB.Query(AValNo->def).isKill())
872     return { false, false };
873 
874   // Make sure there are no other definitions of IntB that would reach the
875   // uses which the new definition can reach.
876   if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
877     return { false, false };
878 
879   // If some of the uses of IntA.reg is already coalesced away, return false.
880   // It's not possible to determine whether it's safe to perform the coalescing.
881   for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg())) {
882     MachineInstr *UseMI = MO.getParent();
883     unsigned OpNo = &MO - &UseMI->getOperand(0);
884     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
885     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
886     if (US == IntA.end() || US->valno != AValNo)
887       continue;
888     // If this use is tied to a def, we can't rewrite the register.
889     if (UseMI->isRegTiedToDefOperand(OpNo))
890       return { false, false };
891   }
892 
893   LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
894                     << *DefMI);
895 
896   // At this point we have decided that it is legal to do this
897   // transformation.  Start by commuting the instruction.
898   MachineBasicBlock *MBB = DefMI->getParent();
899   MachineInstr *NewMI =
900       TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
901   if (!NewMI)
902     return { false, false };
903   if (IntA.reg().isVirtual() && IntB.reg().isVirtual() &&
904       !MRI->constrainRegClass(IntB.reg(), MRI->getRegClass(IntA.reg())))
905     return { false, false };
906   if (NewMI != DefMI) {
907     LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
908     MachineBasicBlock::iterator Pos = DefMI;
909     MBB->insert(Pos, NewMI);
910     MBB->erase(DefMI);
911   }
912 
913   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
914   // A = or A, B
915   // ...
916   // B = A
917   // ...
918   // C = killed A
919   // ...
920   //   = B
921 
922   // Update uses of IntA of the specific Val# with IntB.
923   for (MachineOperand &UseMO :
924        llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
925     if (UseMO.isUndef())
926       continue;
927     MachineInstr *UseMI = UseMO.getParent();
928     if (UseMI->isDebugInstr()) {
929       // FIXME These don't have an instruction index.  Not clear we have enough
930       // info to decide whether to do this replacement or not.  For now do it.
931       UseMO.setReg(NewReg);
932       continue;
933     }
934     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
935     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
936     assert(US != IntA.end() && "Use must be live");
937     if (US->valno != AValNo)
938       continue;
939     // Kill flags are no longer accurate. They are recomputed after RA.
940     UseMO.setIsKill(false);
941     if (NewReg.isPhysical())
942       UseMO.substPhysReg(NewReg, *TRI);
943     else
944       UseMO.setReg(NewReg);
945     if (UseMI == CopyMI)
946       continue;
947     if (!UseMI->isCopy())
948       continue;
949     if (UseMI->getOperand(0).getReg() != IntB.reg() ||
950         UseMI->getOperand(0).getSubReg())
951       continue;
952 
953     // This copy will become a noop. If it's defining a new val#, merge it into
954     // BValNo.
955     SlotIndex DefIdx = UseIdx.getRegSlot();
956     VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
957     if (!DVNI)
958       continue;
959     LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
960     assert(DVNI->def == DefIdx);
961     BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
962     for (LiveInterval::SubRange &S : IntB.subranges()) {
963       VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
964       if (!SubDVNI)
965         continue;
966       VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
967       assert(SubBValNo->def == CopyIdx);
968       S.MergeValueNumberInto(SubDVNI, SubBValNo);
969     }
970 
971     deleteInstr(UseMI);
972   }
973 
974   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
975   // is updated.
976   bool ShrinkB = false;
977   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
978   if (IntA.hasSubRanges() || IntB.hasSubRanges()) {
979     if (!IntA.hasSubRanges()) {
980       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg());
981       IntA.createSubRangeFrom(Allocator, Mask, IntA);
982     } else if (!IntB.hasSubRanges()) {
983       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg());
984       IntB.createSubRangeFrom(Allocator, Mask, IntB);
985     }
986     SlotIndex AIdx = CopyIdx.getRegSlot(true);
987     LaneBitmask MaskA;
988     const SlotIndexes &Indexes = *LIS->getSlotIndexes();
989     for (LiveInterval::SubRange &SA : IntA.subranges()) {
990       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
991       // Even if we are dealing with a full copy, some lanes can
992       // still be undefined.
993       // E.g.,
994       // undef A.subLow = ...
995       // B = COPY A <== A.subHigh is undefined here and does
996       //                not have a value number.
997       if (!ASubValNo)
998         continue;
999       MaskA |= SA.LaneMask;
1000 
1001       IntB.refineSubRanges(
1002           Allocator, SA.LaneMask,
1003           [&Allocator, &SA, CopyIdx, ASubValNo,
1004            &ShrinkB](LiveInterval::SubRange &SR) {
1005             VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
1006                                            : SR.getVNInfoAt(CopyIdx);
1007             assert(BSubValNo != nullptr);
1008             auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
1009             ShrinkB |= P.second;
1010             if (P.first)
1011               BSubValNo->def = ASubValNo->def;
1012           },
1013           Indexes, *TRI);
1014     }
1015     // Go over all subranges of IntB that have not been covered by IntA,
1016     // and delete the segments starting at CopyIdx. This can happen if
1017     // IntA has undef lanes that are defined in IntB.
1018     for (LiveInterval::SubRange &SB : IntB.subranges()) {
1019       if ((SB.LaneMask & MaskA).any())
1020         continue;
1021       if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx))
1022         if (S->start.getBaseIndex() == CopyIdx.getBaseIndex())
1023           SB.removeSegment(*S, true);
1024     }
1025   }
1026 
1027   BValNo->def = AValNo->def;
1028   auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
1029   ShrinkB |= P.second;
1030   LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
1031 
1032   LIS->removeVRegDefAt(IntA, AValNo->def);
1033 
1034   LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
1035   ++numCommutes;
1036   return { true, ShrinkB };
1037 }
1038 
1039 /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
1040 /// predecessor of BB2, and if B is not redefined on the way from A = B
1041 /// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
1042 /// execution goes through the path from BB0 to BB2. We may move B = A
1043 /// to the predecessor without such reversed copy.
1044 /// So we will transform the program from:
1045 ///   BB0:
1046 ///      A = B;    BB1:
1047 ///       ...         ...
1048 ///     /     \      /
1049 ///             BB2:
1050 ///               ...
1051 ///               B = A;
1052 ///
1053 /// to:
1054 ///
1055 ///   BB0:         BB1:
1056 ///      A = B;        ...
1057 ///       ...          B = A;
1058 ///     /     \       /
1059 ///             BB2:
1060 ///               ...
1061 ///
1062 /// A special case is when BB0 and BB2 are the same BB which is the only
1063 /// BB in a loop:
1064 ///   BB1:
1065 ///        ...
1066 ///   BB0/BB2:  ----
1067 ///        B = A;   |
1068 ///        ...      |
1069 ///        A = B;   |
1070 ///          |-------
1071 ///          |
1072 /// We may hoist B = A from BB0/BB2 to BB1.
1073 ///
1074 /// The major preconditions for correctness to remove such partial
1075 /// redundancy include:
1076 /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
1077 ///    the PHI is defined by the reversed copy A = B in BB0.
1078 /// 2. No B is referenced from the start of BB2 to B = A.
1079 /// 3. No B is defined from A = B to the end of BB0.
1080 /// 4. BB1 has only one successor.
1081 ///
1082 /// 2 and 4 implicitly ensure B is not live at the end of BB1.
1083 /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
1084 /// colder place, which not only prevent endless loop, but also make sure
1085 /// the movement of copy is beneficial.
1086 bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
1087                                                 MachineInstr &CopyMI) {
1088   assert(!CP.isPhys());
1089   if (!CopyMI.isFullCopy())
1090     return false;
1091 
1092   MachineBasicBlock &MBB = *CopyMI.getParent();
1093   // If this block is the target of an invoke/inlineasm_br, moving the copy into
1094   // the predecessor is tricker, and we don't handle it.
1095   if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget())
1096     return false;
1097 
1098   if (MBB.pred_size() != 2)
1099     return false;
1100 
1101   LiveInterval &IntA =
1102       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
1103   LiveInterval &IntB =
1104       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
1105 
1106   // A is defined by PHI at the entry of MBB.
1107   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
1108   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
1109   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
1110   if (!AValNo->isPHIDef())
1111     return false;
1112 
1113   // No B is referenced before CopyMI in MBB.
1114   if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
1115     return false;
1116 
1117   // MBB has two predecessors: one contains A = B so no copy will be inserted
1118   // for it. The other one will have a copy moved from MBB.
1119   bool FoundReverseCopy = false;
1120   MachineBasicBlock *CopyLeftBB = nullptr;
1121   for (MachineBasicBlock *Pred : MBB.predecessors()) {
1122     VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
1123     MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
1124     if (!DefMI || !DefMI->isFullCopy()) {
1125       CopyLeftBB = Pred;
1126       continue;
1127     }
1128     // Check DefMI is a reverse copy and it is in BB Pred.
1129     if (DefMI->getOperand(0).getReg() != IntA.reg() ||
1130         DefMI->getOperand(1).getReg() != IntB.reg() ||
1131         DefMI->getParent() != Pred) {
1132       CopyLeftBB = Pred;
1133       continue;
1134     }
1135     // If there is any other def of B after DefMI and before the end of Pred,
1136     // we need to keep the copy of B = A at the end of Pred if we remove
1137     // B = A from MBB.
1138     bool ValB_Changed = false;
1139     for (auto *VNI : IntB.valnos) {
1140       if (VNI->isUnused())
1141         continue;
1142       if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
1143         ValB_Changed = true;
1144         break;
1145       }
1146     }
1147     if (ValB_Changed) {
1148       CopyLeftBB = Pred;
1149       continue;
1150     }
1151     FoundReverseCopy = true;
1152   }
1153 
1154   // If no reverse copy is found in predecessors, nothing to do.
1155   if (!FoundReverseCopy)
1156     return false;
1157 
1158   // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
1159   // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
1160   // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
1161   // update IntA/IntB.
1162   //
1163   // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
1164   // MBB is hotter than CopyLeftBB.
1165   if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
1166     return false;
1167 
1168   // Now (almost sure it's) ok to move copy.
1169   if (CopyLeftBB) {
1170     // Position in CopyLeftBB where we should insert new copy.
1171     auto InsPos = CopyLeftBB->getFirstTerminator();
1172 
1173     // Make sure that B isn't referenced in the terminators (if any) at the end
1174     // of the predecessor since we're about to insert a new definition of B
1175     // before them.
1176     if (InsPos != CopyLeftBB->end()) {
1177       SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
1178       if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
1179         return false;
1180     }
1181 
1182     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
1183                       << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
1184 
1185     // Insert new copy to CopyLeftBB.
1186     MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1187                                       TII->get(TargetOpcode::COPY), IntB.reg())
1188                                   .addReg(IntA.reg());
1189     SlotIndex NewCopyIdx =
1190         LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1191     IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1192     for (LiveInterval::SubRange &SR : IntB.subranges())
1193       SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1194 
1195     // If the newly created Instruction has an address of an instruction that was
1196     // deleted before (object recycled by the allocator) it needs to be removed from
1197     // the deleted list.
1198     ErasedInstrs.erase(NewCopyMI);
1199   } else {
1200     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1201                       << printMBBReference(MBB) << '\t' << CopyMI);
1202   }
1203 
1204   const bool IsUndefCopy = CopyMI.getOperand(1).isUndef();
1205 
1206   // Remove CopyMI.
1207   // Note: This is fine to remove the copy before updating the live-ranges.
1208   // While updating the live-ranges, we only look at slot indices and
1209   // never go back to the instruction.
1210   // Mark instructions as deleted.
1211   deleteInstr(&CopyMI);
1212 
1213   // Update the liveness.
1214   SmallVector<SlotIndex, 8> EndPoints;
1215   VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1216   LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1217                   &EndPoints);
1218   BValNo->markUnused();
1219 
1220   if (IsUndefCopy) {
1221     // We're introducing an undef phi def, and need to set undef on any users of
1222     // the previously local def to avoid artifically extending the lifetime
1223     // through the block.
1224     for (MachineOperand &MO : MRI->use_nodbg_operands(IntB.reg())) {
1225       const MachineInstr &MI = *MO.getParent();
1226       SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1227       if (!IntB.liveAt(UseIdx))
1228         MO.setIsUndef(true);
1229     }
1230   }
1231 
1232   // Extend IntB to the EndPoints of its original live interval.
1233   LIS->extendToIndices(IntB, EndPoints);
1234 
1235   // Now, do the same for its subranges.
1236   for (LiveInterval::SubRange &SR : IntB.subranges()) {
1237     EndPoints.clear();
1238     VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1239     assert(BValNo && "All sublanes should be live");
1240     LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1241     BValNo->markUnused();
1242     // We can have a situation where the result of the original copy is live,
1243     // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes
1244     // the copy appear as an endpoint from pruneValue(), but we don't want it
1245     // to because the copy has been removed.  We can go ahead and remove that
1246     // endpoint; there is no other situation here that there could be a use at
1247     // the same place as we know that the copy is a full copy.
1248     for (unsigned I = 0; I != EndPoints.size(); ) {
1249       if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) {
1250         EndPoints[I] = EndPoints.back();
1251         EndPoints.pop_back();
1252         continue;
1253       }
1254       ++I;
1255     }
1256     SmallVector<SlotIndex, 8> Undefs;
1257     IntB.computeSubRangeUndefs(Undefs, SR.LaneMask, *MRI,
1258                                *LIS->getSlotIndexes());
1259     LIS->extendToIndices(SR, EndPoints, Undefs);
1260   }
1261   // If any dead defs were extended, truncate them.
1262   shrinkToUses(&IntB);
1263 
1264   // Finally, update the live-range of IntA.
1265   shrinkToUses(&IntA);
1266   return true;
1267 }
1268 
1269 /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1270 /// defining a subregister.
1271 static bool definesFullReg(const MachineInstr &MI, Register Reg) {
1272   assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
1273 
1274   for (const MachineOperand &Op : MI.all_defs()) {
1275     if (Op.getReg() != Reg)
1276       continue;
1277     // Return true if we define the full register or don't care about the value
1278     // inside other subregisters.
1279     if (Op.getSubReg() == 0 || Op.isUndef())
1280       return true;
1281   }
1282   return false;
1283 }
1284 
1285 bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1286                                                 MachineInstr *CopyMI,
1287                                                 bool &IsDefCopy) {
1288   IsDefCopy = false;
1289   Register SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1290   unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1291   Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1292   unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1293   if (SrcReg.isPhysical())
1294     return false;
1295 
1296   LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1297   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1298   VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1299   if (!ValNo)
1300     return false;
1301   if (ValNo->isPHIDef() || ValNo->isUnused())
1302     return false;
1303   MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
1304   if (!DefMI)
1305     return false;
1306   if (DefMI->isCopyLike()) {
1307     IsDefCopy = true;
1308     return false;
1309   }
1310   if (!TII->isAsCheapAsAMove(*DefMI))
1311     return false;
1312 
1313   SmallVector<Register, 8> NewRegs;
1314   LiveRangeEdit Edit(&SrcInt, NewRegs, *MF, *LIS, nullptr, this);
1315   if (!Edit.checkRematerializable(ValNo, DefMI))
1316     return false;
1317 
1318   if (!definesFullReg(*DefMI, SrcReg))
1319     return false;
1320   bool SawStore = false;
1321   if (!DefMI->isSafeToMove(AA, SawStore))
1322     return false;
1323   const MCInstrDesc &MCID = DefMI->getDesc();
1324   if (MCID.getNumDefs() != 1)
1325     return false;
1326   // Only support subregister destinations when the def is read-undef.
1327   MachineOperand &DstOperand = CopyMI->getOperand(0);
1328   Register CopyDstReg = DstOperand.getReg();
1329   if (DstOperand.getSubReg() && !DstOperand.isUndef())
1330     return false;
1331 
1332   // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1333   // the register substantially (beyond both source and dest size). This is bad
1334   // for performance since it can cascade through a function, introducing many
1335   // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1336   // around after a few subreg copies).
1337   if (SrcIdx && DstIdx)
1338     return false;
1339 
1340   [[maybe_unused]] const unsigned DefSubIdx = DefMI->getOperand(0).getSubReg();
1341   const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1342   if (!DefMI->isImplicitDef()) {
1343     if (DstReg.isPhysical()) {
1344       Register NewDstReg = DstReg;
1345 
1346       unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1347                                               DefMI->getOperand(0).getSubReg());
1348       if (NewDstIdx)
1349         NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1350 
1351       // Finally, make sure that the physical subregister that will be
1352       // constructed later is permitted for the instruction.
1353       if (!DefRC->contains(NewDstReg))
1354         return false;
1355     } else {
1356       // Theoretically, some stack frame reference could exist. Just make sure
1357       // it hasn't actually happened.
1358       assert(DstReg.isVirtual() &&
1359              "Only expect to deal with virtual or physical registers");
1360     }
1361   }
1362 
1363   LiveRangeEdit::Remat RM(ValNo);
1364   RM.OrigMI = DefMI;
1365   if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx, true))
1366     return false;
1367 
1368   DebugLoc DL = CopyMI->getDebugLoc();
1369   MachineBasicBlock *MBB = CopyMI->getParent();
1370   MachineBasicBlock::iterator MII =
1371     std::next(MachineBasicBlock::iterator(CopyMI));
1372   Edit.rematerializeAt(*MBB, MII, DstReg, RM, *TRI, false, SrcIdx, CopyMI);
1373   MachineInstr &NewMI = *std::prev(MII);
1374   NewMI.setDebugLoc(DL);
1375 
1376   // In a situation like the following:
1377   //     %0:subreg = instr              ; DefMI, subreg = DstIdx
1378   //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
1379   // instead of widening %1 to the register class of %0 simply do:
1380   //     %1 = instr
1381   const TargetRegisterClass *NewRC = CP.getNewRC();
1382   if (DstIdx != 0) {
1383     MachineOperand &DefMO = NewMI.getOperand(0);
1384     if (DefMO.getSubReg() == DstIdx) {
1385       assert(SrcIdx == 0 && CP.isFlipped()
1386              && "Shouldn't have SrcIdx+DstIdx at this point");
1387       const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1388       const TargetRegisterClass *CommonRC =
1389         TRI->getCommonSubClass(DefRC, DstRC);
1390       if (CommonRC != nullptr) {
1391         NewRC = CommonRC;
1392 
1393         // Instruction might contain "undef %0:subreg" as use operand:
1394         //   %0:subreg = instr op_1, ..., op_N, undef %0:subreg, op_N+2, ...
1395         //
1396         // Need to check all operands.
1397         for (MachineOperand &MO : NewMI.operands()) {
1398           if (MO.isReg() && MO.getReg() == DstReg && MO.getSubReg() == DstIdx) {
1399             MO.setSubReg(0);
1400           }
1401         }
1402 
1403         DstIdx = 0;
1404         DefMO.setIsUndef(false); // Only subregs can have def+undef.
1405       }
1406     }
1407   }
1408 
1409   // CopyMI may have implicit operands, save them so that we can transfer them
1410   // over to the newly materialized instruction after CopyMI is removed.
1411   SmallVector<MachineOperand, 4> ImplicitOps;
1412   ImplicitOps.reserve(CopyMI->getNumOperands() -
1413                       CopyMI->getDesc().getNumOperands());
1414   for (unsigned I = CopyMI->getDesc().getNumOperands(),
1415                 E = CopyMI->getNumOperands();
1416        I != E; ++I) {
1417     MachineOperand &MO = CopyMI->getOperand(I);
1418     if (MO.isReg()) {
1419       assert(MO.isImplicit() && "No explicit operands after implicit operands.");
1420       assert((MO.getReg().isPhysical() ||
1421               (MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
1422              "unexpected implicit virtual register def");
1423       ImplicitOps.push_back(MO);
1424     }
1425   }
1426 
1427   CopyMI->eraseFromParent();
1428   ErasedInstrs.insert(CopyMI);
1429 
1430   // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1431   // We need to remember these so we can add intervals once we insert
1432   // NewMI into SlotIndexes.
1433   //
1434   // We also expect to have tied implicit-defs of super registers originating
1435   // from SUBREG_TO_REG, such as:
1436   // $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi
1437   // undef %0.sub_32bit = MOV32r0 implicit-def dead $eflags, implicit-def %0
1438   //
1439   // The implicit-def of the super register may have been reduced to
1440   // subregisters depending on the uses.
1441 
1442   bool NewMIDefinesFullReg = false;
1443 
1444   SmallVector<MCRegister, 4> NewMIImplDefs;
1445   for (unsigned i = NewMI.getDesc().getNumOperands(),
1446                 e = NewMI.getNumOperands();
1447        i != e; ++i) {
1448     MachineOperand &MO = NewMI.getOperand(i);
1449     if (MO.isReg() && MO.isDef()) {
1450       assert(MO.isImplicit());
1451       if (MO.getReg().isPhysical()) {
1452         if (MO.getReg() == DstReg)
1453           NewMIDefinesFullReg = true;
1454 
1455         assert(MO.isImplicit() && MO.getReg().isPhysical() &&
1456                (MO.isDead() ||
1457                 (DefSubIdx &&
1458                  ((TRI->getSubReg(MO.getReg(), DefSubIdx) ==
1459                    MCRegister((unsigned)NewMI.getOperand(0).getReg())) ||
1460                   TRI->isSubRegisterEq(NewMI.getOperand(0).getReg(),
1461                                        MO.getReg())))));
1462         NewMIImplDefs.push_back(MO.getReg().asMCReg());
1463       } else {
1464         assert(MO.getReg() == NewMI.getOperand(0).getReg());
1465 
1466         // We're only expecting another def of the main output, so the range
1467         // should get updated with the regular output range.
1468         //
1469         // FIXME: The range updating below probably needs updating to look at
1470         // the super register if subranges are tracked.
1471         assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
1472                "subrange update for implicit-def of super register may not be "
1473                "properly handled");
1474       }
1475     }
1476   }
1477 
1478   if (DstReg.isVirtual()) {
1479     unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1480 
1481     if (DefRC != nullptr) {
1482       if (NewIdx)
1483         NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1484       else
1485         NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1486       assert(NewRC && "subreg chosen for remat incompatible with instruction");
1487     }
1488     // Remap subranges to new lanemask and change register class.
1489     LiveInterval &DstInt = LIS->getInterval(DstReg);
1490     for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1491       SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1492     }
1493     MRI->setRegClass(DstReg, NewRC);
1494 
1495     // Update machine operands and add flags.
1496     updateRegDefsUses(DstReg, DstReg, DstIdx);
1497     NewMI.getOperand(0).setSubReg(NewIdx);
1498     // updateRegDefUses can add an "undef" flag to the definition, since
1499     // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
1500     // sure that "undef" is not set.
1501     if (NewIdx == 0)
1502       NewMI.getOperand(0).setIsUndef(false);
1503     // Add dead subregister definitions if we are defining the whole register
1504     // but only part of it is live.
1505     // This could happen if the rematerialization instruction is rematerializing
1506     // more than actually is used in the register.
1507     // An example would be:
1508     // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1509     // ; Copying only part of the register here, but the rest is undef.
1510     // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1511     // ==>
1512     // ; Materialize all the constants but only using one
1513     // %2 = LOAD_CONSTANTS 5, 8
1514     //
1515     // at this point for the part that wasn't defined before we could have
1516     // subranges missing the definition.
1517     if (NewIdx == 0 && DstInt.hasSubRanges()) {
1518       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1519       SlotIndex DefIndex =
1520           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1521       LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1522       VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
1523       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1524         if (!SR.liveAt(DefIndex))
1525           SR.createDeadDef(DefIndex, Alloc);
1526         MaxMask &= ~SR.LaneMask;
1527       }
1528       if (MaxMask.any()) {
1529         LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1530         SR->createDeadDef(DefIndex, Alloc);
1531       }
1532     }
1533 
1534     // Make sure that the subrange for resultant undef is removed
1535     // For example:
1536     //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
1537     //   %2 = COPY %1
1538     // ==>
1539     //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
1540     //     ; Correct but need to remove the subrange for %2:sub0
1541     //     ; as it is now undef
1542     if (NewIdx != 0 && DstInt.hasSubRanges()) {
1543       // The affected subregister segments can be removed.
1544       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1545       LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1546       bool UpdatedSubRanges = false;
1547       SlotIndex DefIndex =
1548           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1549       VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
1550       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1551         if ((SR.LaneMask & DstMask).none()) {
1552           LLVM_DEBUG(dbgs()
1553                      << "Removing undefined SubRange "
1554                      << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1555 
1556           if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1557             // VNI is in ValNo - remove any segments in this SubRange that have
1558             // this ValNo
1559             SR.removeValNo(RmValNo);
1560           }
1561 
1562           // We may not have a defined value at this point, but still need to
1563           // clear out any empty subranges tentatively created by
1564           // updateRegDefUses. The original subrange def may have only undefed
1565           // some lanes.
1566           UpdatedSubRanges = true;
1567         } else {
1568           // We know that this lane is defined by this instruction,
1569           // but at this point it may be empty because it is not used by
1570           // anything. This happens when updateRegDefUses adds the missing
1571           // lanes. Assign that lane a dead def so that the interferences
1572           // are properly modeled.
1573           if (SR.empty())
1574             SR.createDeadDef(DefIndex, Alloc);
1575         }
1576       }
1577       if (UpdatedSubRanges)
1578         DstInt.removeEmptySubRanges();
1579     }
1580   } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1581     // The New instruction may be defining a sub-register of what's actually
1582     // been asked for. If so it must implicitly define the whole thing.
1583     assert(DstReg.isPhysical() &&
1584            "Only expect virtual or physical registers in remat");
1585     NewMI.getOperand(0).setIsDead(true);
1586 
1587     if (!NewMIDefinesFullReg) {
1588       NewMI.addOperand(MachineOperand::CreateReg(
1589           CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1590     }
1591 
1592     // Record small dead def live-ranges for all the subregisters
1593     // of the destination register.
1594     // Otherwise, variables that live through may miss some
1595     // interferences, thus creating invalid allocation.
1596     // E.g., i386 code:
1597     // %1 = somedef ; %1 GR8
1598     // %2 = remat ; %2 GR32
1599     // CL = COPY %2.sub_8bit
1600     // = somedef %1 ; %1 GR8
1601     // =>
1602     // %1 = somedef ; %1 GR8
1603     // dead ECX = remat ; implicit-def CL
1604     // = somedef %1 ; %1 GR8
1605     // %1 will see the interferences with CL but not with CH since
1606     // no live-ranges would have been created for ECX.
1607     // Fix that!
1608     SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1609     for (MCRegUnit Unit : TRI->regunits(NewMI.getOperand(0).getReg()))
1610       if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1611         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1612   }
1613 
1614   NewMI.setRegisterDefReadUndef(NewMI.getOperand(0).getReg());
1615 
1616   // Transfer over implicit operands to the rematerialized instruction.
1617   for (MachineOperand &MO : ImplicitOps)
1618     NewMI.addOperand(MO);
1619 
1620   SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1621   for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
1622     MCRegister Reg = NewMIImplDefs[i];
1623     for (MCRegUnit Unit : TRI->regunits(Reg))
1624       if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
1625         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1626   }
1627 
1628   LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
1629   ++NumReMats;
1630 
1631   // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1632   // to describe DstReg instead.
1633   if (MRI->use_nodbg_empty(SrcReg)) {
1634     for (MachineOperand &UseMO :
1635          llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
1636       MachineInstr *UseMI = UseMO.getParent();
1637       if (UseMI->isDebugInstr()) {
1638         if (DstReg.isPhysical())
1639           UseMO.substPhysReg(DstReg, *TRI);
1640         else
1641           UseMO.setReg(DstReg);
1642         // Move the debug value directly after the def of the rematerialized
1643         // value in DstReg.
1644         MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1645         LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1646       }
1647     }
1648   }
1649 
1650   if (ToBeUpdated.count(SrcReg))
1651     return true;
1652 
1653   unsigned NumCopyUses = 0;
1654   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
1655     if (UseMO.getParent()->isCopyLike())
1656       NumCopyUses++;
1657   }
1658   if (NumCopyUses < LateRematUpdateThreshold) {
1659     // The source interval can become smaller because we removed a use.
1660     shrinkToUses(&SrcInt, &DeadDefs);
1661     if (!DeadDefs.empty())
1662       eliminateDeadDefs(&Edit);
1663   } else {
1664     ToBeUpdated.insert(SrcReg);
1665   }
1666   return true;
1667 }
1668 
1669 MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1670   // ProcessImplicitDefs may leave some copies of <undef> values, it only
1671   // removes local variables. When we have a copy like:
1672   //
1673   //   %1 = COPY undef %2
1674   //
1675   // We delete the copy and remove the corresponding value number from %1.
1676   // Any uses of that value number are marked as <undef>.
1677 
1678   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1679   // CoalescerPair may have a new register class with adjusted subreg indices
1680   // at this point.
1681   Register SrcReg, DstReg;
1682   unsigned SrcSubIdx = 0, DstSubIdx = 0;
1683   if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
1684     return nullptr;
1685 
1686   SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1687   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1688   // CopyMI is undef iff SrcReg is not live before the instruction.
1689   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1690     LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1691     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1692       if ((SR.LaneMask & SrcMask).none())
1693         continue;
1694       if (SR.liveAt(Idx))
1695         return nullptr;
1696     }
1697   } else if (SrcLI.liveAt(Idx))
1698     return nullptr;
1699 
1700   // If the undef copy defines a live-out value (i.e. an input to a PHI def),
1701   // then replace it with an IMPLICIT_DEF.
1702   LiveInterval &DstLI = LIS->getInterval(DstReg);
1703   SlotIndex RegIndex = Idx.getRegSlot();
1704   LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
1705   assert(Seg != nullptr && "No segment for defining instruction");
1706   VNInfo *V = DstLI.getVNInfoAt(Seg->end);
1707 
1708   // The source interval may also have been on an undef use, in which case the
1709   // copy introduced a live value.
1710   if (((V && V->isPHIDef()) || (!V && !DstLI.liveAt(Idx)))) {
1711     for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
1712       MachineOperand &MO = CopyMI->getOperand(i-1);
1713       if (MO.isReg()) {
1714         if (MO.isUse())
1715           CopyMI->removeOperand(i - 1);
1716       } else {
1717         assert(MO.isImm() &&
1718                CopyMI->getOpcode() == TargetOpcode::SUBREG_TO_REG);
1719         CopyMI->removeOperand(i-1);
1720       }
1721     }
1722 
1723     CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
1724     LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
1725                "implicit def\n");
1726     return CopyMI;
1727   }
1728 
1729   // Remove any DstReg segments starting at the instruction.
1730   LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1731 
1732   // Remove value or merge with previous one in case of a subregister def.
1733   if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1734     VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1735     DstLI.MergeValueNumberInto(VNI, PrevVNI);
1736 
1737     // The affected subregister segments can be removed.
1738     LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1739     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1740       if ((SR.LaneMask & DstMask).none())
1741         continue;
1742 
1743       VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1744       assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1745       SR.removeValNo(SVNI);
1746     }
1747     DstLI.removeEmptySubRanges();
1748   } else
1749     LIS->removeVRegDefAt(DstLI, RegIndex);
1750 
1751   // Mark uses as undef.
1752   for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1753     if (MO.isDef() /*|| MO.isUndef()*/)
1754       continue;
1755     const MachineInstr &MI = *MO.getParent();
1756     SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1757     LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1758     bool isLive;
1759     if (!UseMask.all() && DstLI.hasSubRanges()) {
1760       isLive = false;
1761       for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1762         if ((SR.LaneMask & UseMask).none())
1763           continue;
1764         if (SR.liveAt(UseIdx)) {
1765           isLive = true;
1766           break;
1767         }
1768       }
1769     } else
1770       isLive = DstLI.liveAt(UseIdx);
1771     if (isLive)
1772       continue;
1773     MO.setIsUndef(true);
1774     LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1775   }
1776 
1777   // A def of a subregister may be a use of the other subregisters, so
1778   // deleting a def of a subregister may also remove uses. Since CopyMI
1779   // is still part of the function (but about to be erased), mark all
1780   // defs of DstReg in it as <undef>, so that shrinkToUses would
1781   // ignore them.
1782   for (MachineOperand &MO : CopyMI->all_defs())
1783     if (MO.getReg() == DstReg)
1784       MO.setIsUndef(true);
1785   LIS->shrinkToUses(&DstLI);
1786 
1787   return CopyMI;
1788 }
1789 
1790 void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1791                                      MachineOperand &MO, unsigned SubRegIdx) {
1792   LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1793   if (MO.isDef())
1794     Mask = ~Mask;
1795   bool IsUndef = true;
1796   for (const LiveInterval::SubRange &S : Int.subranges()) {
1797     if ((S.LaneMask & Mask).none())
1798       continue;
1799     if (S.liveAt(UseIdx)) {
1800       IsUndef = false;
1801       break;
1802     }
1803   }
1804   if (IsUndef) {
1805     MO.setIsUndef(true);
1806     // We found out some subregister use is actually reading an undefined
1807     // value. In some cases the whole vreg has become undefined at this
1808     // point so we have to potentially shrink the main range if the
1809     // use was ending a live segment there.
1810     LiveQueryResult Q = Int.Query(UseIdx);
1811     if (Q.valueOut() == nullptr)
1812       ShrinkMainRange = true;
1813   }
1814 }
1815 
1816 void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
1817                                           unsigned SubIdx) {
1818   bool DstIsPhys = DstReg.isPhysical();
1819   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1820 
1821   if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1822     for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1823       unsigned SubReg = MO.getSubReg();
1824       if (SubReg == 0 || MO.isUndef())
1825         continue;
1826       MachineInstr &MI = *MO.getParent();
1827       if (MI.isDebugInstr())
1828         continue;
1829       SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1830       addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1831     }
1832   }
1833 
1834   SmallPtrSet<MachineInstr*, 8> Visited;
1835   for (MachineRegisterInfo::reg_instr_iterator
1836        I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1837        I != E; ) {
1838     MachineInstr *UseMI = &*(I++);
1839 
1840     // Each instruction can only be rewritten once because sub-register
1841     // composition is not always idempotent. When SrcReg != DstReg, rewriting
1842     // the UseMI operands removes them from the SrcReg use-def chain, but when
1843     // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1844     // operands mentioning the virtual register.
1845     if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1846       continue;
1847 
1848     SmallVector<unsigned,8> Ops;
1849     bool Reads, Writes;
1850     std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1851 
1852     // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1853     // because SrcReg is a sub-register.
1854     if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
1855       Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1856 
1857     // Replace SrcReg with DstReg in all UseMI operands.
1858     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1859       MachineOperand &MO = UseMI->getOperand(Ops[i]);
1860 
1861       // Adjust <undef> flags in case of sub-register joins. We don't want to
1862       // turn a full def into a read-modify-write sub-register def and vice
1863       // versa.
1864       if (SubIdx && MO.isDef())
1865         MO.setIsUndef(!Reads);
1866 
1867       // A subreg use of a partially undef (super) register may be a complete
1868       // undef use now and then has to be marked that way.
1869       if (MO.isUse() && !DstIsPhys) {
1870         unsigned SubUseIdx = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
1871         if (SubUseIdx != 0 && MRI->shouldTrackSubRegLiveness(DstReg)) {
1872           if (!DstInt->hasSubRanges()) {
1873             BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1874             LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
1875             LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1876             LaneBitmask UnusedLanes = FullMask & ~UsedLanes;
1877             DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt);
1878             // The unused lanes are just empty live-ranges at this point.
1879             // It is the caller responsibility to set the proper
1880             // dead segments if there is an actual dead def of the
1881             // unused lanes. This may happen with rematerialization.
1882             DstInt->createSubRange(Allocator, UnusedLanes);
1883           }
1884           SlotIndex MIIdx = UseMI->isDebugInstr()
1885             ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
1886             : LIS->getInstructionIndex(*UseMI);
1887           SlotIndex UseIdx = MIIdx.getRegSlot(true);
1888           addUndefFlag(*DstInt, UseIdx, MO, SubUseIdx);
1889         }
1890       }
1891 
1892       if (DstIsPhys)
1893         MO.substPhysReg(DstReg, *TRI);
1894       else
1895         MO.substVirtReg(DstReg, SubIdx, *TRI);
1896     }
1897 
1898     LLVM_DEBUG({
1899       dbgs() << "\t\tupdated: ";
1900       if (!UseMI->isDebugInstr())
1901         dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1902       dbgs() << *UseMI;
1903     });
1904   }
1905 }
1906 
1907 bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1908   // Always join simple intervals that are defined by a single copy from a
1909   // reserved register. This doesn't increase register pressure, so it is
1910   // always beneficial.
1911   if (!MRI->isReserved(CP.getDstReg())) {
1912     LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1913     return false;
1914   }
1915 
1916   LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1917   if (JoinVInt.containsOneValue())
1918     return true;
1919 
1920   LLVM_DEBUG(
1921       dbgs() << "\tCannot join complex intervals into reserved register.\n");
1922   return false;
1923 }
1924 
1925 bool RegisterCoalescer::copyValueUndefInPredecessors(
1926     LiveRange &S, const MachineBasicBlock *MBB, LiveQueryResult SLRQ) {
1927   for (const MachineBasicBlock *Pred : MBB->predecessors()) {
1928     SlotIndex PredEnd = LIS->getMBBEndIdx(Pred);
1929     if (VNInfo *V = S.getVNInfoAt(PredEnd.getPrevSlot())) {
1930       // If this is a self loop, we may be reading the same value.
1931       if (V->id != SLRQ.valueOutOrDead()->id)
1932         return false;
1933     }
1934   }
1935 
1936   return true;
1937 }
1938 
1939 void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI,
1940                                                    Register Reg,
1941                                                    LaneBitmask PrunedLanes) {
1942   // If we had other instructions in the segment reading the undef sublane
1943   // value, we need to mark them with undef.
1944   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
1945     unsigned SubRegIdx = MO.getSubReg();
1946     if (SubRegIdx == 0 || MO.isUndef())
1947       continue;
1948 
1949     LaneBitmask SubRegMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1950     SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
1951     for (LiveInterval::SubRange &S : LI.subranges()) {
1952       if (!S.liveAt(Pos) && (PrunedLanes & SubRegMask).any()) {
1953         MO.setIsUndef();
1954         break;
1955       }
1956     }
1957   }
1958 
1959   LI.removeEmptySubRanges();
1960 
1961   // A def of a subregister may be a use of other register lanes. Replacing
1962   // such a def with a def of a different register will eliminate the use,
1963   // and may cause the recorded live range to be larger than the actual
1964   // liveness in the program IR.
1965   LIS->shrinkToUses(&LI);
1966 }
1967 
1968 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1969   Again = false;
1970   LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1971 
1972   CoalescerPair CP(*TRI);
1973   if (!CP.setRegisters(CopyMI)) {
1974     LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
1975     return false;
1976   }
1977 
1978   if (CP.getNewRC()) {
1979     auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1980     auto DstRC = MRI->getRegClass(CP.getDstReg());
1981     unsigned SrcIdx = CP.getSrcIdx();
1982     unsigned DstIdx = CP.getDstIdx();
1983     if (CP.isFlipped()) {
1984       std::swap(SrcIdx, DstIdx);
1985       std::swap(SrcRC, DstRC);
1986     }
1987     if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1988                              CP.getNewRC(), *LIS)) {
1989       LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1990       return false;
1991     }
1992   }
1993 
1994   // Dead code elimination. This really should be handled by MachineDCE, but
1995   // sometimes dead copies slip through, and we can't generate invalid live
1996   // ranges.
1997   if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
1998     LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
1999     DeadDefs.push_back(CopyMI);
2000     eliminateDeadDefs();
2001     return true;
2002   }
2003 
2004   // Eliminate undefs.
2005   if (!CP.isPhys()) {
2006     // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
2007     if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
2008       if (UndefMI->isImplicitDef())
2009         return false;
2010       deleteInstr(CopyMI);
2011       return false;  // Not coalescable.
2012     }
2013   }
2014 
2015   // Coalesced copies are normally removed immediately, but transformations
2016   // like removeCopyByCommutingDef() can inadvertently create identity copies.
2017   // When that happens, just join the values and remove the copy.
2018   if (CP.getSrcReg() == CP.getDstReg()) {
2019     LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
2020     LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
2021     const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
2022     LiveQueryResult LRQ = LI.Query(CopyIdx);
2023     if (VNInfo *DefVNI = LRQ.valueDefined()) {
2024       VNInfo *ReadVNI = LRQ.valueIn();
2025       assert(ReadVNI && "No value before copy and no <undef> flag.");
2026       assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
2027 
2028       // Track incoming undef lanes we need to eliminate from the subrange.
2029       LaneBitmask PrunedLanes;
2030       MachineBasicBlock *MBB = CopyMI->getParent();
2031 
2032       // Process subregister liveranges.
2033       for (LiveInterval::SubRange &S : LI.subranges()) {
2034         LiveQueryResult SLRQ = S.Query(CopyIdx);
2035         if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
2036           if (VNInfo *SReadVNI = SLRQ.valueIn())
2037             SDefVNI = S.MergeValueNumberInto(SDefVNI, SReadVNI);
2038 
2039           // If this copy introduced an undef subrange from an incoming value,
2040           // we need to eliminate the undef live in values from the subrange.
2041           if (copyValueUndefInPredecessors(S, MBB, SLRQ)) {
2042             LLVM_DEBUG(dbgs() << "Incoming sublane value is undef at copy\n");
2043             PrunedLanes |= S.LaneMask;
2044             S.removeValNo(SDefVNI);
2045           }
2046         }
2047       }
2048 
2049       LI.MergeValueNumberInto(DefVNI, ReadVNI);
2050       if (PrunedLanes.any()) {
2051         LLVM_DEBUG(dbgs() << "Pruning undef incoming lanes: "
2052                           << PrunedLanes << '\n');
2053         setUndefOnPrunedSubRegUses(LI, CP.getSrcReg(), PrunedLanes);
2054       }
2055 
2056       LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
2057     }
2058     deleteInstr(CopyMI);
2059     return true;
2060   }
2061 
2062   // Enforce policies.
2063   if (CP.isPhys()) {
2064     LLVM_DEBUG(dbgs() << "\tConsidering merging "
2065                       << printReg(CP.getSrcReg(), TRI) << " with "
2066                       << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
2067     if (!canJoinPhys(CP)) {
2068       // Before giving up coalescing, if definition of source is defined by
2069       // trivial computation, try rematerializing it.
2070       bool IsDefCopy = false;
2071       if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2072         return true;
2073       if (IsDefCopy)
2074         Again = true;  // May be possible to coalesce later.
2075       return false;
2076     }
2077   } else {
2078     // When possible, let DstReg be the larger interval.
2079     if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
2080                            LIS->getInterval(CP.getDstReg()).size())
2081       CP.flip();
2082 
2083     LLVM_DEBUG({
2084       dbgs() << "\tConsidering merging to "
2085              << TRI->getRegClassName(CP.getNewRC()) << " with ";
2086       if (CP.getDstIdx() && CP.getSrcIdx())
2087         dbgs() << printReg(CP.getDstReg()) << " in "
2088                << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
2089                << printReg(CP.getSrcReg()) << " in "
2090                << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
2091       else
2092         dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
2093                << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
2094     });
2095   }
2096 
2097   ShrinkMask = LaneBitmask::getNone();
2098   ShrinkMainRange = false;
2099 
2100   // Okay, attempt to join these two intervals.  On failure, this returns false.
2101   // Otherwise, if one of the intervals being joined is a physreg, this method
2102   // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
2103   // been modified, so we can use this information below to update aliases.
2104   if (!joinIntervals(CP)) {
2105     // Coalescing failed.
2106 
2107     // If definition of source is defined by trivial computation, try
2108     // rematerializing it.
2109     bool IsDefCopy = false;
2110     if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
2111       return true;
2112 
2113     // If we can eliminate the copy without merging the live segments, do so
2114     // now.
2115     if (!CP.isPartial() && !CP.isPhys()) {
2116       bool Changed = adjustCopiesBackFrom(CP, CopyMI);
2117       bool Shrink = false;
2118       if (!Changed)
2119         std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI);
2120       if (Changed) {
2121         deleteInstr(CopyMI);
2122         if (Shrink) {
2123           Register DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
2124           LiveInterval &DstLI = LIS->getInterval(DstReg);
2125           shrinkToUses(&DstLI);
2126           LLVM_DEBUG(dbgs() << "\t\tshrunk:   " << DstLI << '\n');
2127         }
2128         LLVM_DEBUG(dbgs() << "\tTrivial!\n");
2129         return true;
2130       }
2131     }
2132 
2133     // Try and see if we can partially eliminate the copy by moving the copy to
2134     // its predecessor.
2135     if (!CP.isPartial() && !CP.isPhys())
2136       if (removePartialRedundancy(CP, *CopyMI))
2137         return true;
2138 
2139     // Otherwise, we are unable to join the intervals.
2140     LLVM_DEBUG(dbgs() << "\tInterference!\n");
2141     Again = true;  // May be possible to coalesce later.
2142     return false;
2143   }
2144 
2145   // Coalescing to a virtual register that is of a sub-register class of the
2146   // other. Make sure the resulting register is set to the right register class.
2147   if (CP.isCrossClass()) {
2148     ++numCrossRCs;
2149     MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
2150   }
2151 
2152   // Removing sub-register copies can ease the register class constraints.
2153   // Make sure we attempt to inflate the register class of DstReg.
2154   if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
2155     InflateRegs.push_back(CP.getDstReg());
2156 
2157   // CopyMI has been erased by joinIntervals at this point. Remove it from
2158   // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
2159   // to the work list. This keeps ErasedInstrs from growing needlessly.
2160   ErasedInstrs.erase(CopyMI);
2161 
2162   // Rewrite all SrcReg operands to DstReg.
2163   // Also update DstReg operands to include DstIdx if it is set.
2164   if (CP.getDstIdx())
2165     updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
2166   updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
2167 
2168   // Shrink subregister ranges if necessary.
2169   if (ShrinkMask.any()) {
2170     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2171     for (LiveInterval::SubRange &S : LI.subranges()) {
2172       if ((S.LaneMask & ShrinkMask).none())
2173         continue;
2174       LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
2175                         << ")\n");
2176       LIS->shrinkToUses(S, LI.reg());
2177       ShrinkMainRange = true;
2178     }
2179     LI.removeEmptySubRanges();
2180   }
2181 
2182   // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live
2183   // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval
2184   // is not up-to-date, need to update the merged live interval here.
2185   if (ToBeUpdated.count(CP.getSrcReg()))
2186     ShrinkMainRange = true;
2187 
2188   if (ShrinkMainRange) {
2189     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
2190     shrinkToUses(&LI);
2191   }
2192 
2193   // SrcReg is guaranteed to be the register whose live interval that is
2194   // being merged.
2195   LIS->removeInterval(CP.getSrcReg());
2196 
2197   // Update regalloc hint.
2198   TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
2199 
2200   LLVM_DEBUG({
2201     dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
2202            << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
2203     dbgs() << "\tResult = ";
2204     if (CP.isPhys())
2205       dbgs() << printReg(CP.getDstReg(), TRI);
2206     else
2207       dbgs() << LIS->getInterval(CP.getDstReg());
2208     dbgs() << '\n';
2209   });
2210 
2211   ++numJoins;
2212   return true;
2213 }
2214 
2215 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
2216   Register DstReg = CP.getDstReg();
2217   Register SrcReg = CP.getSrcReg();
2218   assert(CP.isPhys() && "Must be a physreg copy");
2219   assert(MRI->isReserved(DstReg) && "Not a reserved register");
2220   LiveInterval &RHS = LIS->getInterval(SrcReg);
2221   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
2222 
2223   assert(RHS.containsOneValue() && "Invalid join with reserved register");
2224 
2225   // Optimization for reserved registers like ESP. We can only merge with a
2226   // reserved physreg if RHS has a single value that is a copy of DstReg.
2227   // The live range of the reserved register will look like a set of dead defs
2228   // - we don't properly track the live range of reserved registers.
2229 
2230   // Deny any overlapping intervals.  This depends on all the reserved
2231   // register live ranges to look like dead defs.
2232   if (!MRI->isConstantPhysReg(DstReg)) {
2233     for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2234       // Abort if not all the regunits are reserved.
2235       for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
2236         if (!MRI->isReserved(*RI))
2237           return false;
2238       }
2239       if (RHS.overlaps(LIS->getRegUnit(Unit))) {
2240         LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI)
2241                           << '\n');
2242         return false;
2243       }
2244     }
2245 
2246     // We must also check for overlaps with regmask clobbers.
2247     BitVector RegMaskUsable;
2248     if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
2249         !RegMaskUsable.test(DstReg)) {
2250       LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
2251       return false;
2252     }
2253   }
2254 
2255   // Skip any value computations, we are not adding new values to the
2256   // reserved register.  Also skip merging the live ranges, the reserved
2257   // register live range doesn't need to be accurate as long as all the
2258   // defs are there.
2259 
2260   // Delete the identity copy.
2261   MachineInstr *CopyMI;
2262   if (CP.isFlipped()) {
2263     // Physreg is copied into vreg
2264     //   %y = COPY %physreg_x
2265     //   ...  //< no other def of %physreg_x here
2266     //   use %y
2267     // =>
2268     //   ...
2269     //   use %physreg_x
2270     CopyMI = MRI->getVRegDef(SrcReg);
2271     deleteInstr(CopyMI);
2272   } else {
2273     // VReg is copied into physreg:
2274     //   %y = def
2275     //   ... //< no other def or use of %physreg_x here
2276     //   %physreg_x = COPY %y
2277     // =>
2278     //   %physreg_x = def
2279     //   ...
2280     if (!MRI->hasOneNonDBGUse(SrcReg)) {
2281       LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
2282       return false;
2283     }
2284 
2285     if (!LIS->intervalIsInOneMBB(RHS)) {
2286       LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
2287       return false;
2288     }
2289 
2290     MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
2291     CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
2292     SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
2293     SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
2294 
2295     if (!MRI->isConstantPhysReg(DstReg)) {
2296       // We checked above that there are no interfering defs of the physical
2297       // register. However, for this case, where we intend to move up the def of
2298       // the physical register, we also need to check for interfering uses.
2299       SlotIndexes *Indexes = LIS->getSlotIndexes();
2300       for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
2301            SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
2302         MachineInstr *MI = LIS->getInstructionFromIndex(SI);
2303         if (MI->readsRegister(DstReg, TRI)) {
2304           LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
2305           return false;
2306         }
2307       }
2308     }
2309 
2310     // We're going to remove the copy which defines a physical reserved
2311     // register, so remove its valno, etc.
2312     LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
2313                       << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
2314 
2315     LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
2316     deleteInstr(CopyMI);
2317 
2318     // Create a new dead def at the new def location.
2319     for (MCRegUnit Unit : TRI->regunits(DstReg)) {
2320       LiveRange &LR = LIS->getRegUnit(Unit);
2321       LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
2322     }
2323   }
2324 
2325   // We don't track kills for reserved registers.
2326   MRI->clearKillFlags(CP.getSrcReg());
2327 
2328   return true;
2329 }
2330 
2331 //===----------------------------------------------------------------------===//
2332 //                 Interference checking and interval joining
2333 //===----------------------------------------------------------------------===//
2334 //
2335 // In the easiest case, the two live ranges being joined are disjoint, and
2336 // there is no interference to consider. It is quite common, though, to have
2337 // overlapping live ranges, and we need to check if the interference can be
2338 // resolved.
2339 //
2340 // The live range of a single SSA value forms a sub-tree of the dominator tree.
2341 // This means that two SSA values overlap if and only if the def of one value
2342 // is contained in the live range of the other value. As a special case, the
2343 // overlapping values can be defined at the same index.
2344 //
2345 // The interference from an overlapping def can be resolved in these cases:
2346 //
2347 // 1. Coalescable copies. The value is defined by a copy that would become an
2348 //    identity copy after joining SrcReg and DstReg. The copy instruction will
2349 //    be removed, and the value will be merged with the source value.
2350 //
2351 //    There can be several copies back and forth, causing many values to be
2352 //    merged into one. We compute a list of ultimate values in the joined live
2353 //    range as well as a mappings from the old value numbers.
2354 //
2355 // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
2356 //    predecessors have a live out value. It doesn't cause real interference,
2357 //    and can be merged into the value it overlaps. Like a coalescable copy, it
2358 //    can be erased after joining.
2359 //
2360 // 3. Copy of external value. The overlapping def may be a copy of a value that
2361 //    is already in the other register. This is like a coalescable copy, but
2362 //    the live range of the source register must be trimmed after erasing the
2363 //    copy instruction:
2364 //
2365 //      %src = COPY %ext
2366 //      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
2367 //
2368 // 4. Clobbering undefined lanes. Vector registers are sometimes built by
2369 //    defining one lane at a time:
2370 //
2371 //      %dst:ssub0<def,read-undef> = FOO
2372 //      %src = BAR
2373 //      %dst:ssub1 = COPY %src
2374 //
2375 //    The live range of %src overlaps the %dst value defined by FOO, but
2376 //    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
2377 //    which was undef anyway.
2378 //
2379 //    The value mapping is more complicated in this case. The final live range
2380 //    will have different value numbers for both FOO and BAR, but there is no
2381 //    simple mapping from old to new values. It may even be necessary to add
2382 //    new PHI values.
2383 //
2384 // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
2385 //    is live, but never read. This can happen because we don't compute
2386 //    individual live ranges per lane.
2387 //
2388 //      %dst = FOO
2389 //      %src = BAR
2390 //      %dst:ssub1 = COPY %src
2391 //
2392 //    This kind of interference is only resolved locally. If the clobbered
2393 //    lane value escapes the block, the join is aborted.
2394 
2395 namespace {
2396 
2397 /// Track information about values in a single virtual register about to be
2398 /// joined. Objects of this class are always created in pairs - one for each
2399 /// side of the CoalescerPair (or one for each lane of a side of the coalescer
2400 /// pair)
2401 class JoinVals {
2402   /// Live range we work on.
2403   LiveRange &LR;
2404 
2405   /// (Main) register we work on.
2406   const Register Reg;
2407 
2408   /// Reg (and therefore the values in this liverange) will end up as
2409   /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
2410   /// CP.SrcIdx.
2411   const unsigned SubIdx;
2412 
2413   /// The LaneMask that this liverange will occupy the coalesced register. May
2414   /// be smaller than the lanemask produced by SubIdx when merging subranges.
2415   const LaneBitmask LaneMask;
2416 
2417   /// This is true when joining sub register ranges, false when joining main
2418   /// ranges.
2419   const bool SubRangeJoin;
2420 
2421   /// Whether the current LiveInterval tracks subregister liveness.
2422   const bool TrackSubRegLiveness;
2423 
2424   /// Values that will be present in the final live range.
2425   SmallVectorImpl<VNInfo*> &NewVNInfo;
2426 
2427   const CoalescerPair &CP;
2428   LiveIntervals *LIS;
2429   SlotIndexes *Indexes;
2430   const TargetRegisterInfo *TRI;
2431 
2432   /// Value number assignments. Maps value numbers in LI to entries in
2433   /// NewVNInfo. This is suitable for passing to LiveInterval::join().
2434   SmallVector<int, 8> Assignments;
2435 
2436   public:
2437   /// Conflict resolution for overlapping values.
2438   enum ConflictResolution {
2439     /// No overlap, simply keep this value.
2440     CR_Keep,
2441 
2442     /// Merge this value into OtherVNI and erase the defining instruction.
2443     /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2444     /// values.
2445     CR_Erase,
2446 
2447     /// Merge this value into OtherVNI but keep the defining instruction.
2448     /// This is for the special case where OtherVNI is defined by the same
2449     /// instruction.
2450     CR_Merge,
2451 
2452     /// Keep this value, and have it replace OtherVNI where possible. This
2453     /// complicates value mapping since OtherVNI maps to two different values
2454     /// before and after this def.
2455     /// Used when clobbering undefined or dead lanes.
2456     CR_Replace,
2457 
2458     /// Unresolved conflict. Visit later when all values have been mapped.
2459     CR_Unresolved,
2460 
2461     /// Unresolvable conflict. Abort the join.
2462     CR_Impossible
2463   };
2464 
2465   private:
2466   /// Per-value info for LI. The lane bit masks are all relative to the final
2467   /// joined register, so they can be compared directly between SrcReg and
2468   /// DstReg.
2469   struct Val {
2470     ConflictResolution Resolution = CR_Keep;
2471 
2472     /// Lanes written by this def, 0 for unanalyzed values.
2473     LaneBitmask WriteLanes;
2474 
2475     /// Lanes with defined values in this register. Other lanes are undef and
2476     /// safe to clobber.
2477     LaneBitmask ValidLanes;
2478 
2479     /// Value in LI being redefined by this def.
2480     VNInfo *RedefVNI = nullptr;
2481 
2482     /// Value in the other live range that overlaps this def, if any.
2483     VNInfo *OtherVNI = nullptr;
2484 
2485     /// Is this value an IMPLICIT_DEF that can be erased?
2486     ///
2487     /// IMPLICIT_DEF values should only exist at the end of a basic block that
2488     /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2489     /// safely erased if they are overlapping a live value in the other live
2490     /// interval.
2491     ///
2492     /// Weird control flow graphs and incomplete PHI handling in
2493     /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2494     /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2495     /// normal values.
2496     bool ErasableImplicitDef = false;
2497 
2498     /// True when the live range of this value will be pruned because of an
2499     /// overlapping CR_Replace value in the other live range.
2500     bool Pruned = false;
2501 
2502     /// True once Pruned above has been computed.
2503     bool PrunedComputed = false;
2504 
2505     /// True if this value is determined to be identical to OtherVNI
2506     /// (in valuesIdentical). This is used with CR_Erase where the erased
2507     /// copy is redundant, i.e. the source value is already the same as
2508     /// the destination. In such cases the subranges need to be updated
2509     /// properly. See comment at pruneSubRegValues for more info.
2510     bool Identical = false;
2511 
2512     Val() = default;
2513 
2514     bool isAnalyzed() const { return WriteLanes.any(); }
2515 
2516     /// Mark this value as an IMPLICIT_DEF which must be kept as if it were an
2517     /// ordinary value.
2518     void mustKeepImplicitDef(const TargetRegisterInfo &TRI,
2519                              const MachineInstr &ImpDef) {
2520       assert(ImpDef.isImplicitDef());
2521       ErasableImplicitDef = false;
2522       ValidLanes = TRI.getSubRegIndexLaneMask(ImpDef.getOperand(0).getSubReg());
2523     }
2524   };
2525 
2526   /// One entry per value number in LI.
2527   SmallVector<Val, 8> Vals;
2528 
2529   /// Compute the bitmask of lanes actually written by DefMI.
2530   /// Set Redef if there are any partial register definitions that depend on the
2531   /// previous value of the register.
2532   LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2533 
2534   /// Find the ultimate value that VNI was copied from.
2535   std::pair<const VNInfo *, Register> followCopyChain(const VNInfo *VNI) const;
2536 
2537   bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
2538 
2539   /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2540   /// Return a conflict resolution when possible, but leave the hard cases as
2541   /// CR_Unresolved.
2542   /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2543   /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2544   /// The recursion always goes upwards in the dominator tree, making loops
2545   /// impossible.
2546   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2547 
2548   /// Compute the value assignment for ValNo in RI.
2549   /// This may be called recursively by analyzeValue(), but never for a ValNo on
2550   /// the stack.
2551   void computeAssignment(unsigned ValNo, JoinVals &Other);
2552 
2553   /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2554   /// the extent of the tainted lanes in the block.
2555   ///
2556   /// Multiple values in Other.LR can be affected since partial redefinitions
2557   /// can preserve previously tainted lanes.
2558   ///
2559   ///   1 %dst = VLOAD           <-- Define all lanes in %dst
2560   ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
2561   ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
2562   ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2563   ///
2564   /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2565   /// entry to TaintedVals.
2566   ///
2567   /// Returns false if the tainted lanes extend beyond the basic block.
2568   bool
2569   taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2570               SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2571 
2572   /// Return true if MI uses any of the given Lanes from Reg.
2573   /// This does not include partial redefinitions of Reg.
2574   bool usesLanes(const MachineInstr &MI, Register, unsigned, LaneBitmask) const;
2575 
2576   /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2577   /// be pruned:
2578   ///
2579   ///   %dst = COPY %src
2580   ///   %src = COPY %dst  <-- This value to be pruned.
2581   ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
2582   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2583 
2584 public:
2585   JoinVals(LiveRange &LR, Register Reg, unsigned SubIdx, LaneBitmask LaneMask,
2586            SmallVectorImpl<VNInfo *> &newVNInfo, const CoalescerPair &cp,
2587            LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2588            bool TrackSubRegLiveness)
2589       : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2590         SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2591         NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2592         TRI(TRI), Assignments(LR.getNumValNums(), -1),
2593         Vals(LR.getNumValNums()) {}
2594 
2595   /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2596   /// Returns false if any conflicts were impossible to resolve.
2597   bool mapValues(JoinVals &Other);
2598 
2599   /// Try to resolve conflicts that require all values to be mapped.
2600   /// Returns false if any conflicts were impossible to resolve.
2601   bool resolveConflicts(JoinVals &Other);
2602 
2603   /// Prune the live range of values in Other.LR where they would conflict with
2604   /// CR_Replace values in LR. Collect end points for restoring the live range
2605   /// after joining.
2606   void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2607                    bool changeInstrs);
2608 
2609   /// Removes subranges starting at copies that get removed. This sometimes
2610   /// happens when undefined subranges are copied around. These ranges contain
2611   /// no useful information and can be removed.
2612   void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2613 
2614   /// Pruning values in subranges can lead to removing segments in these
2615   /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2616   /// the main range also need to be removed. This function will mark
2617   /// the corresponding values in the main range as pruned, so that
2618   /// eraseInstrs can do the final cleanup.
2619   /// The parameter @p LI must be the interval whose main range is the
2620   /// live range LR.
2621   void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2622 
2623   /// Erase any machine instructions that have been coalesced away.
2624   /// Add erased instructions to ErasedInstrs.
2625   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2626   /// the erased instrs.
2627   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2628                    SmallVectorImpl<Register> &ShrinkRegs,
2629                    LiveInterval *LI = nullptr);
2630 
2631   /// Remove liverange defs at places where implicit defs will be removed.
2632   void removeImplicitDefs();
2633 
2634   /// Get the value assignments suitable for passing to LiveInterval::join.
2635   const int *getAssignments() const { return Assignments.data(); }
2636 
2637   /// Get the conflict resolution for a value number.
2638   ConflictResolution getResolution(unsigned Num) const {
2639     return Vals[Num].Resolution;
2640   }
2641 };
2642 
2643 } // end anonymous namespace
2644 
2645 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2646   const {
2647   LaneBitmask L;
2648   for (const MachineOperand &MO : DefMI->all_defs()) {
2649     if (MO.getReg() != Reg)
2650       continue;
2651     L |= TRI->getSubRegIndexLaneMask(
2652            TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2653     if (MO.readsReg())
2654       Redef = true;
2655   }
2656   return L;
2657 }
2658 
2659 std::pair<const VNInfo *, Register>
2660 JoinVals::followCopyChain(const VNInfo *VNI) const {
2661   Register TrackReg = Reg;
2662 
2663   while (!VNI->isPHIDef()) {
2664     SlotIndex Def = VNI->def;
2665     MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2666     assert(MI && "No defining instruction");
2667     if (!MI->isFullCopy())
2668       return std::make_pair(VNI, TrackReg);
2669     Register SrcReg = MI->getOperand(1).getReg();
2670     if (!SrcReg.isVirtual())
2671       return std::make_pair(VNI, TrackReg);
2672 
2673     const LiveInterval &LI = LIS->getInterval(SrcReg);
2674     const VNInfo *ValueIn;
2675     // No subrange involved.
2676     if (!SubRangeJoin || !LI.hasSubRanges()) {
2677       LiveQueryResult LRQ = LI.Query(Def);
2678       ValueIn = LRQ.valueIn();
2679     } else {
2680       // Query subranges. Ensure that all matching ones take us to the same def
2681       // (allowing some of them to be undef).
2682       ValueIn = nullptr;
2683       for (const LiveInterval::SubRange &S : LI.subranges()) {
2684         // Transform lanemask to a mask in the joined live interval.
2685         LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2686         if ((SMask & LaneMask).none())
2687           continue;
2688         LiveQueryResult LRQ = S.Query(Def);
2689         if (!ValueIn) {
2690           ValueIn = LRQ.valueIn();
2691           continue;
2692         }
2693         if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
2694           return std::make_pair(VNI, TrackReg);
2695       }
2696     }
2697     if (ValueIn == nullptr) {
2698       // Reaching an undefined value is legitimate, for example:
2699       //
2700       // 1   undef %0.sub1 = ...  ;; %0.sub0 == undef
2701       // 2   %1 = COPY %0         ;; %1 is defined here.
2702       // 3   %0 = COPY %1         ;; Now %0.sub0 has a definition,
2703       //                          ;; but it's equivalent to "undef".
2704       return std::make_pair(nullptr, SrcReg);
2705     }
2706     VNI = ValueIn;
2707     TrackReg = SrcReg;
2708   }
2709   return std::make_pair(VNI, TrackReg);
2710 }
2711 
2712 bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2713                                const JoinVals &Other) const {
2714   const VNInfo *Orig0;
2715   Register Reg0;
2716   std::tie(Orig0, Reg0) = followCopyChain(Value0);
2717   if (Orig0 == Value1 && Reg0 == Other.Reg)
2718     return true;
2719 
2720   const VNInfo *Orig1;
2721   Register Reg1;
2722   std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2723   // If both values are undefined, and the source registers are the same
2724   // register, the values are identical. Filter out cases where only one
2725   // value is defined.
2726   if (Orig0 == nullptr || Orig1 == nullptr)
2727     return Orig0 == Orig1 && Reg0 == Reg1;
2728 
2729   // The values are equal if they are defined at the same place and use the
2730   // same register. Note that we cannot compare VNInfos directly as some of
2731   // them might be from a copy created in mergeSubRangeInto()  while the other
2732   // is from the original LiveInterval.
2733   return Orig0->def == Orig1->def && Reg0 == Reg1;
2734 }
2735 
2736 JoinVals::ConflictResolution
2737 JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2738   Val &V = Vals[ValNo];
2739   assert(!V.isAnalyzed() && "Value has already been analyzed!");
2740   VNInfo *VNI = LR.getValNumInfo(ValNo);
2741   if (VNI->isUnused()) {
2742     V.WriteLanes = LaneBitmask::getAll();
2743     return CR_Keep;
2744   }
2745 
2746   // Get the instruction defining this value, compute the lanes written.
2747   const MachineInstr *DefMI = nullptr;
2748   if (VNI->isPHIDef()) {
2749     // Conservatively assume that all lanes in a PHI are valid.
2750     LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2751                                      : TRI->getSubRegIndexLaneMask(SubIdx);
2752     V.ValidLanes = V.WriteLanes = Lanes;
2753   } else {
2754     DefMI = Indexes->getInstructionFromIndex(VNI->def);
2755     assert(DefMI != nullptr);
2756     if (SubRangeJoin) {
2757       // We don't care about the lanes when joining subregister ranges.
2758       V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2759       if (DefMI->isImplicitDef()) {
2760         V.ValidLanes = LaneBitmask::getNone();
2761         V.ErasableImplicitDef = true;
2762       }
2763     } else {
2764       bool Redef = false;
2765       V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2766 
2767       // If this is a read-modify-write instruction, there may be more valid
2768       // lanes than the ones written by this instruction.
2769       // This only covers partial redef operands. DefMI may have normal use
2770       // operands reading the register. They don't contribute valid lanes.
2771       //
2772       // This adds ssub1 to the set of valid lanes in %src:
2773       //
2774       //   %src:ssub1 = FOO
2775       //
2776       // This leaves only ssub1 valid, making any other lanes undef:
2777       //
2778       //   %src:ssub1<def,read-undef> = FOO %src:ssub2
2779       //
2780       // The <read-undef> flag on the def operand means that old lane values are
2781       // not important.
2782       if (Redef) {
2783         V.RedefVNI = LR.Query(VNI->def).valueIn();
2784         assert((TrackSubRegLiveness || V.RedefVNI) &&
2785                "Instruction is reading nonexistent value");
2786         if (V.RedefVNI != nullptr) {
2787           computeAssignment(V.RedefVNI->id, Other);
2788           V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2789         }
2790       }
2791 
2792       // An IMPLICIT_DEF writes undef values.
2793       if (DefMI->isImplicitDef()) {
2794         // We normally expect IMPLICIT_DEF values to be live only until the end
2795         // of their block. If the value is really live longer and gets pruned in
2796         // another block, this flag is cleared again.
2797         //
2798         // Clearing the valid lanes is deferred until it is sure this can be
2799         // erased.
2800         V.ErasableImplicitDef = true;
2801       }
2802     }
2803   }
2804 
2805   // Find the value in Other that overlaps VNI->def, if any.
2806   LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2807 
2808   // It is possible that both values are defined by the same instruction, or
2809   // the values are PHIs defined in the same block. When that happens, the two
2810   // values should be merged into one, but not into any preceding value.
2811   // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2812   if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2813     assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2814 
2815     // One value stays, the other is merged. Keep the earlier one, or the first
2816     // one we see.
2817     if (OtherVNI->def < VNI->def)
2818       Other.computeAssignment(OtherVNI->id, *this);
2819     else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2820       // This is an early-clobber def overlapping a live-in value in the other
2821       // register. Not mergeable.
2822       V.OtherVNI = OtherLRQ.valueIn();
2823       return CR_Impossible;
2824     }
2825     V.OtherVNI = OtherVNI;
2826     Val &OtherV = Other.Vals[OtherVNI->id];
2827     // Keep this value, check for conflicts when analyzing OtherVNI. Avoid
2828     // revisiting OtherVNI->id in JoinVals::computeAssignment() below before it
2829     // is assigned.
2830     if (!OtherV.isAnalyzed() || Other.Assignments[OtherVNI->id] == -1)
2831       return CR_Keep;
2832     // Both sides have been analyzed now.
2833     // Allow overlapping PHI values. Any real interference would show up in a
2834     // predecessor, the PHI itself can't introduce any conflicts.
2835     if (VNI->isPHIDef())
2836       return CR_Merge;
2837     if ((V.ValidLanes & OtherV.ValidLanes).any())
2838       // Overlapping lanes can't be resolved.
2839       return CR_Impossible;
2840     else
2841       return CR_Merge;
2842   }
2843 
2844   // No simultaneous def. Is Other live at the def?
2845   V.OtherVNI = OtherLRQ.valueIn();
2846   if (!V.OtherVNI)
2847     // No overlap, no conflict.
2848     return CR_Keep;
2849 
2850   assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2851 
2852   // We have overlapping values, or possibly a kill of Other.
2853   // Recursively compute assignments up the dominator tree.
2854   Other.computeAssignment(V.OtherVNI->id, *this);
2855   Val &OtherV = Other.Vals[V.OtherVNI->id];
2856 
2857   if (OtherV.ErasableImplicitDef) {
2858     // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2859     // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2860     // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2861     // technically.
2862     //
2863     // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2864     // to erase the IMPLICIT_DEF instruction.
2865     //
2866     // Additionally we must keep an IMPLICIT_DEF if we're redefining an incoming
2867     // value.
2868 
2869     MachineInstr *OtherImpDef =
2870         Indexes->getInstructionFromIndex(V.OtherVNI->def);
2871     MachineBasicBlock *OtherMBB = OtherImpDef->getParent();
2872     if (DefMI &&
2873         (DefMI->getParent() != OtherMBB || LIS->isLiveInToMBB(LR, OtherMBB))) {
2874       LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2875                  << " extends into "
2876                  << printMBBReference(*DefMI->getParent())
2877                  << ", keeping it.\n");
2878       OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2879     } else if (OtherMBB->hasEHPadSuccessor()) {
2880       // If OtherV is defined in a basic block that has EH pad successors then
2881       // we get the same problem not just if OtherV is live beyond its basic
2882       // block, but beyond the last call instruction in its basic block. Handle
2883       // this case conservatively.
2884       LLVM_DEBUG(
2885           dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2886                  << " may be live into EH pad successors, keeping it.\n");
2887       OtherV.mustKeepImplicitDef(*TRI, *OtherImpDef);
2888     } else {
2889       // We deferred clearing these lanes in case we needed to save them
2890       OtherV.ValidLanes &= ~OtherV.WriteLanes;
2891     }
2892   }
2893 
2894   // Allow overlapping PHI values. Any real interference would show up in a
2895   // predecessor, the PHI itself can't introduce any conflicts.
2896   if (VNI->isPHIDef())
2897     return CR_Replace;
2898 
2899   // Check for simple erasable conflicts.
2900   if (DefMI->isImplicitDef())
2901     return CR_Erase;
2902 
2903   // Include the non-conflict where DefMI is a coalescable copy that kills
2904   // OtherVNI. We still want the copy erased and value numbers merged.
2905   if (CP.isCoalescable(DefMI)) {
2906     // Some of the lanes copied from OtherVNI may be undef, making them undef
2907     // here too.
2908     V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2909     return CR_Erase;
2910   }
2911 
2912   // This may not be a real conflict if DefMI simply kills Other and defines
2913   // VNI.
2914   if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2915     return CR_Keep;
2916 
2917   // Handle the case where VNI and OtherVNI can be proven to be identical:
2918   //
2919   //   %other = COPY %ext
2920   //   %this  = COPY %ext <-- Erase this copy
2921   //
2922   if (DefMI->isFullCopy() && !CP.isPartial() &&
2923       valuesIdentical(VNI, V.OtherVNI, Other)) {
2924     V.Identical = true;
2925     return CR_Erase;
2926   }
2927 
2928   // The remaining checks apply to the lanes, which aren't tracked here.  This
2929   // was already decided to be OK via the following CR_Replace condition.
2930   // CR_Replace.
2931   if (SubRangeJoin)
2932     return CR_Replace;
2933 
2934   // If the lanes written by this instruction were all undef in OtherVNI, it is
2935   // still safe to join the live ranges. This can't be done with a simple value
2936   // mapping, though - OtherVNI will map to multiple values:
2937   //
2938   //   1 %dst:ssub0 = FOO                <-- OtherVNI
2939   //   2 %src = BAR                      <-- VNI
2940   //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
2941   //   4 BAZ killed %dst
2942   //   5 QUUX killed %src
2943   //
2944   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2945   // handles this complex value mapping.
2946   if ((V.WriteLanes & OtherV.ValidLanes).none())
2947     return CR_Replace;
2948 
2949   // If the other live range is killed by DefMI and the live ranges are still
2950   // overlapping, it must be because we're looking at an early clobber def:
2951   //
2952   //   %dst<def,early-clobber> = ASM killed %src
2953   //
2954   // In this case, it is illegal to merge the two live ranges since the early
2955   // clobber def would clobber %src before it was read.
2956   if (OtherLRQ.isKill()) {
2957     // This case where the def doesn't overlap the kill is handled above.
2958     assert(VNI->def.isEarlyClobber() &&
2959            "Only early clobber defs can overlap a kill");
2960     return CR_Impossible;
2961   }
2962 
2963   // VNI is clobbering live lanes in OtherVNI, but there is still the
2964   // possibility that no instructions actually read the clobbered lanes.
2965   // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2966   // Otherwise Other.RI wouldn't be live here.
2967   if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2968     return CR_Impossible;
2969 
2970   if (TrackSubRegLiveness) {
2971     auto &OtherLI = LIS->getInterval(Other.Reg);
2972     // If OtherVNI does not have subranges, it means all the lanes of OtherVNI
2973     // share the same live range, so we just need to check whether they have
2974     // any conflict bit in their LaneMask.
2975     if (!OtherLI.hasSubRanges()) {
2976       LaneBitmask OtherMask = TRI->getSubRegIndexLaneMask(Other.SubIdx);
2977       return (OtherMask & V.WriteLanes).none() ? CR_Replace : CR_Impossible;
2978     }
2979 
2980     // If we are clobbering some active lanes of OtherVNI at VNI->def, it is
2981     // impossible to resolve the conflict. Otherwise, we can just replace
2982     // OtherVNI because of no real conflict.
2983     for (LiveInterval::SubRange &OtherSR : OtherLI.subranges()) {
2984       LaneBitmask OtherMask =
2985           TRI->composeSubRegIndexLaneMask(Other.SubIdx, OtherSR.LaneMask);
2986       if ((OtherMask & V.WriteLanes).none())
2987         continue;
2988 
2989       auto OtherSRQ = OtherSR.Query(VNI->def);
2990       if (OtherSRQ.valueIn() && OtherSRQ.endPoint() > VNI->def) {
2991         // VNI is clobbering some lanes of OtherVNI, they have real conflict.
2992         return CR_Impossible;
2993       }
2994     }
2995 
2996     // VNI is NOT clobbering any lane of OtherVNI, just replace OtherVNI.
2997     return CR_Replace;
2998   }
2999 
3000   // We need to verify that no instructions are reading the clobbered lanes.
3001   // To save compile time, we'll only check that locally. Don't allow the
3002   // tainted value to escape the basic block.
3003   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3004   if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
3005     return CR_Impossible;
3006 
3007   // There are still some things that could go wrong besides clobbered lanes
3008   // being read, for example OtherVNI may be only partially redefined in MBB,
3009   // and some clobbered lanes could escape the block. Save this analysis for
3010   // resolveConflicts() when all values have been mapped. We need to know
3011   // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
3012   // that now - the recursive analyzeValue() calls must go upwards in the
3013   // dominator tree.
3014   return CR_Unresolved;
3015 }
3016 
3017 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
3018   Val &V = Vals[ValNo];
3019   if (V.isAnalyzed()) {
3020     // Recursion should always move up the dominator tree, so ValNo is not
3021     // supposed to reappear before it has been assigned.
3022     assert(Assignments[ValNo] != -1 && "Bad recursion?");
3023     return;
3024   }
3025   switch ((V.Resolution = analyzeValue(ValNo, Other))) {
3026   case CR_Erase:
3027   case CR_Merge:
3028     // Merge this ValNo into OtherVNI.
3029     assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
3030     assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
3031     Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
3032     LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
3033                       << LR.getValNumInfo(ValNo)->def << " into "
3034                       << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
3035                       << V.OtherVNI->def << " --> @"
3036                       << NewVNInfo[Assignments[ValNo]]->def << '\n');
3037     break;
3038   case CR_Replace:
3039   case CR_Unresolved: {
3040     // The other value is going to be pruned if this join is successful.
3041     assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
3042     Val &OtherV = Other.Vals[V.OtherVNI->id];
3043     OtherV.Pruned = true;
3044     [[fallthrough]];
3045   }
3046   default:
3047     // This value number needs to go in the final joined live range.
3048     Assignments[ValNo] = NewVNInfo.size();
3049     NewVNInfo.push_back(LR.getValNumInfo(ValNo));
3050     break;
3051   }
3052 }
3053 
3054 bool JoinVals::mapValues(JoinVals &Other) {
3055   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3056     computeAssignment(i, Other);
3057     if (Vals[i].Resolution == CR_Impossible) {
3058       LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
3059                         << '@' << LR.getValNumInfo(i)->def << '\n');
3060       return false;
3061     }
3062   }
3063   return true;
3064 }
3065 
3066 bool JoinVals::
3067 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
3068             SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
3069   VNInfo *VNI = LR.getValNumInfo(ValNo);
3070   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3071   SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
3072 
3073   // Scan Other.LR from VNI.def to MBBEnd.
3074   LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
3075   assert(OtherI != Other.LR.end() && "No conflict?");
3076   do {
3077     // OtherI is pointing to a tainted value. Abort the join if the tainted
3078     // lanes escape the block.
3079     SlotIndex End = OtherI->end;
3080     if (End >= MBBEnd) {
3081       LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
3082                         << OtherI->valno->id << '@' << OtherI->start << '\n');
3083       return false;
3084     }
3085     LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
3086                       << OtherI->valno->id << '@' << OtherI->start << " to "
3087                       << End << '\n');
3088     // A dead def is not a problem.
3089     if (End.isDead())
3090       break;
3091     TaintExtent.push_back(std::make_pair(End, TaintedLanes));
3092 
3093     // Check for another def in the MBB.
3094     if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
3095       break;
3096 
3097     // Lanes written by the new def are no longer tainted.
3098     const Val &OV = Other.Vals[OtherI->valno->id];
3099     TaintedLanes &= ~OV.WriteLanes;
3100     if (!OV.RedefVNI)
3101       break;
3102   } while (TaintedLanes.any());
3103   return true;
3104 }
3105 
3106 bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
3107                          LaneBitmask Lanes) const {
3108   if (MI.isDebugOrPseudoInstr())
3109     return false;
3110   for (const MachineOperand &MO : MI.all_uses()) {
3111     if (MO.getReg() != Reg)
3112       continue;
3113     if (!MO.readsReg())
3114       continue;
3115     unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
3116     if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
3117       return true;
3118   }
3119   return false;
3120 }
3121 
3122 bool JoinVals::resolveConflicts(JoinVals &Other) {
3123   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3124     Val &V = Vals[i];
3125     assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
3126     if (V.Resolution != CR_Unresolved)
3127       continue;
3128     LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
3129                       << LR.getValNumInfo(i)->def
3130                       << ' ' << PrintLaneMask(LaneMask) << '\n');
3131     if (SubRangeJoin)
3132       return false;
3133 
3134     ++NumLaneConflicts;
3135     assert(V.OtherVNI && "Inconsistent conflict resolution.");
3136     VNInfo *VNI = LR.getValNumInfo(i);
3137     const Val &OtherV = Other.Vals[V.OtherVNI->id];
3138 
3139     // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
3140     // join, those lanes will be tainted with a wrong value. Get the extent of
3141     // the tainted lanes.
3142     LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
3143     SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
3144     if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
3145       // Tainted lanes would extend beyond the basic block.
3146       return false;
3147 
3148     assert(!TaintExtent.empty() && "There should be at least one conflict.");
3149 
3150     // Now look at the instructions from VNI->def to TaintExtent (inclusive).
3151     MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
3152     MachineBasicBlock::iterator MI = MBB->begin();
3153     if (!VNI->isPHIDef()) {
3154       MI = Indexes->getInstructionFromIndex(VNI->def);
3155       if (!VNI->def.isEarlyClobber()) {
3156         // No need to check the instruction defining VNI for reads.
3157         ++MI;
3158       }
3159     }
3160     assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
3161            "Interference ends on VNI->def. Should have been handled earlier");
3162     MachineInstr *LastMI =
3163       Indexes->getInstructionFromIndex(TaintExtent.front().first);
3164     assert(LastMI && "Range must end at a proper instruction");
3165     unsigned TaintNum = 0;
3166     while (true) {
3167       assert(MI != MBB->end() && "Bad LastMI");
3168       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
3169         LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
3170         return false;
3171       }
3172       // LastMI is the last instruction to use the current value.
3173       if (&*MI == LastMI) {
3174         if (++TaintNum == TaintExtent.size())
3175           break;
3176         LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
3177         assert(LastMI && "Range must end at a proper instruction");
3178         TaintedLanes = TaintExtent[TaintNum].second;
3179       }
3180       ++MI;
3181     }
3182 
3183     // The tainted lanes are unused.
3184     V.Resolution = CR_Replace;
3185     ++NumLaneResolves;
3186   }
3187   return true;
3188 }
3189 
3190 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
3191   Val &V = Vals[ValNo];
3192   if (V.Pruned || V.PrunedComputed)
3193     return V.Pruned;
3194 
3195   if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
3196     return V.Pruned;
3197 
3198   // Follow copies up the dominator tree and check if any intermediate value
3199   // has been pruned.
3200   V.PrunedComputed = true;
3201   V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
3202   return V.Pruned;
3203 }
3204 
3205 void JoinVals::pruneValues(JoinVals &Other,
3206                            SmallVectorImpl<SlotIndex> &EndPoints,
3207                            bool changeInstrs) {
3208   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3209     SlotIndex Def = LR.getValNumInfo(i)->def;
3210     switch (Vals[i].Resolution) {
3211     case CR_Keep:
3212       break;
3213     case CR_Replace: {
3214       // This value takes precedence over the value in Other.LR.
3215       LIS->pruneValue(Other.LR, Def, &EndPoints);
3216       // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
3217       // instructions are only inserted to provide a live-out value for PHI
3218       // predecessors, so the instruction should simply go away once its value
3219       // has been replaced.
3220       Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
3221       bool EraseImpDef = OtherV.ErasableImplicitDef &&
3222                          OtherV.Resolution == CR_Keep;
3223       if (!Def.isBlock()) {
3224         if (changeInstrs) {
3225           // Remove <def,read-undef> flags. This def is now a partial redef.
3226           // Also remove dead flags since the joined live range will
3227           // continue past this instruction.
3228           for (MachineOperand &MO :
3229                Indexes->getInstructionFromIndex(Def)->operands()) {
3230             if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
3231               if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
3232                 MO.setIsUndef(false);
3233               MO.setIsDead(false);
3234             }
3235           }
3236         }
3237         // This value will reach instructions below, but we need to make sure
3238         // the live range also reaches the instruction at Def.
3239         if (!EraseImpDef)
3240           EndPoints.push_back(Def);
3241       }
3242       LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
3243                         << ": " << Other.LR << '\n');
3244       break;
3245     }
3246     case CR_Erase:
3247     case CR_Merge:
3248       if (isPrunedValue(i, Other)) {
3249         // This value is ultimately a copy of a pruned value in LR or Other.LR.
3250         // We can no longer trust the value mapping computed by
3251         // computeAssignment(), the value that was originally copied could have
3252         // been replaced.
3253         LIS->pruneValue(LR, Def, &EndPoints);
3254         LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
3255                           << Def << ": " << LR << '\n');
3256       }
3257       break;
3258     case CR_Unresolved:
3259     case CR_Impossible:
3260       llvm_unreachable("Unresolved conflicts");
3261     }
3262   }
3263 }
3264 
3265 // Check if the segment consists of a copied live-through value (i.e. the copy
3266 // in the block only extended the liveness, of an undef value which we may need
3267 // to handle).
3268 static bool isLiveThrough(const LiveQueryResult Q) {
3269   return Q.valueIn() && Q.valueIn()->isPHIDef() && Q.valueIn() == Q.valueOut();
3270 }
3271 
3272 /// Consider the following situation when coalescing the copy between
3273 /// %31 and %45 at 800. (The vertical lines represent live range segments.)
3274 ///
3275 ///                              Main range         Subrange 0004 (sub2)
3276 ///                              %31    %45           %31    %45
3277 ///  544    %45 = COPY %28               +                    +
3278 ///                                      | v1                 | v1
3279 ///  560B bb.1:                          +                    +
3280 ///  624        = %45.sub2               | v2                 | v2
3281 ///  800    %31 = COPY %45        +      +             +      +
3282 ///                               | v0                 | v0
3283 ///  816    %31.sub1 = ...        +                    |
3284 ///  880    %30 = COPY %31        | v1                 +
3285 ///  928    %45 = COPY %30        |      +                    +
3286 ///                               |      | v0                 | v0  <--+
3287 ///  992B   ; backedge -> bb.1    |      +                    +        |
3288 /// 1040        = %31.sub0        +                                    |
3289 ///                                                 This value must remain
3290 ///                                                 live-out!
3291 ///
3292 /// Assuming that %31 is coalesced into %45, the copy at 928 becomes
3293 /// redundant, since it copies the value from %45 back into it. The
3294 /// conflict resolution for the main range determines that %45.v0 is
3295 /// to be erased, which is ok since %31.v1 is identical to it.
3296 /// The problem happens with the subrange for sub2: it has to be live
3297 /// on exit from the block, but since 928 was actually a point of
3298 /// definition of %45.sub2, %45.sub2 was not live immediately prior
3299 /// to that definition. As a result, when 928 was erased, the value v0
3300 /// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
3301 /// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
3302 /// providing an incorrect value to the use at 624.
3303 ///
3304 /// Since the main-range values %31.v1 and %45.v0 were proved to be
3305 /// identical, the corresponding values in subranges must also be the
3306 /// same. A redundant copy is removed because it's not needed, and not
3307 /// because it copied an undefined value, so any liveness that originated
3308 /// from that copy cannot disappear. When pruning a value that started
3309 /// at the removed copy, the corresponding identical value must be
3310 /// extended to replace it.
3311 void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
3312   // Look for values being erased.
3313   bool DidPrune = false;
3314   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3315     Val &V = Vals[i];
3316     // We should trigger in all cases in which eraseInstrs() does something.
3317     // match what eraseInstrs() is doing, print a message so
3318     if (V.Resolution != CR_Erase &&
3319         (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
3320       continue;
3321 
3322     // Check subranges at the point where the copy will be removed.
3323     SlotIndex Def = LR.getValNumInfo(i)->def;
3324     SlotIndex OtherDef;
3325     if (V.Identical)
3326       OtherDef = V.OtherVNI->def;
3327 
3328     // Print message so mismatches with eraseInstrs() can be diagnosed.
3329     LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
3330                       << '\n');
3331     for (LiveInterval::SubRange &S : LI.subranges()) {
3332       LiveQueryResult Q = S.Query(Def);
3333 
3334       // If a subrange starts at the copy then an undefined value has been
3335       // copied and we must remove that subrange value as well.
3336       VNInfo *ValueOut = Q.valueOutOrDead();
3337       if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
3338                                   (V.Identical && V.Resolution == CR_Erase &&
3339                                    ValueOut->def == Def))) {
3340         LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
3341                           << " at " << Def << "\n");
3342         SmallVector<SlotIndex,8> EndPoints;
3343         LIS->pruneValue(S, Def, &EndPoints);
3344         DidPrune = true;
3345         // Mark value number as unused.
3346         ValueOut->markUnused();
3347 
3348         if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
3349           // If V is identical to V.OtherVNI (and S was live at OtherDef),
3350           // then we can't simply prune V from S. V needs to be replaced
3351           // with V.OtherVNI.
3352           LIS->extendToIndices(S, EndPoints);
3353         }
3354 
3355         // We may need to eliminate the subrange if the copy introduced a live
3356         // out undef value.
3357         if (ValueOut->isPHIDef())
3358           ShrinkMask |= S.LaneMask;
3359         continue;
3360       }
3361 
3362       // If a subrange ends at the copy, then a value was copied but only
3363       // partially used later. Shrink the subregister range appropriately.
3364       //
3365       // Ultimately this calls shrinkToUses, so assuming ShrinkMask is
3366       // conservatively correct.
3367       if ((Q.valueIn() != nullptr && Q.valueOut() == nullptr) ||
3368           (V.Resolution == CR_Erase && isLiveThrough(Q))) {
3369         LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
3370                           << PrintLaneMask(S.LaneMask) << " at " << Def
3371                           << "\n");
3372         ShrinkMask |= S.LaneMask;
3373       }
3374     }
3375   }
3376   if (DidPrune)
3377     LI.removeEmptySubRanges();
3378 }
3379 
3380 /// Check if any of the subranges of @p LI contain a definition at @p Def.
3381 static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
3382   for (LiveInterval::SubRange &SR : LI.subranges()) {
3383     if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
3384       if (VNI->def == Def)
3385         return true;
3386   }
3387   return false;
3388 }
3389 
3390 void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
3391   assert(&static_cast<LiveRange&>(LI) == &LR);
3392 
3393   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3394     if (Vals[i].Resolution != CR_Keep)
3395       continue;
3396     VNInfo *VNI = LR.getValNumInfo(i);
3397     if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
3398       continue;
3399     Vals[i].Pruned = true;
3400     ShrinkMainRange = true;
3401   }
3402 }
3403 
3404 void JoinVals::removeImplicitDefs() {
3405   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3406     Val &V = Vals[i];
3407     if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
3408       continue;
3409 
3410     VNInfo *VNI = LR.getValNumInfo(i);
3411     VNI->markUnused();
3412     LR.removeValNo(VNI);
3413   }
3414 }
3415 
3416 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
3417                            SmallVectorImpl<Register> &ShrinkRegs,
3418                            LiveInterval *LI) {
3419   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
3420     // Get the def location before markUnused() below invalidates it.
3421     VNInfo *VNI = LR.getValNumInfo(i);
3422     SlotIndex Def = VNI->def;
3423     switch (Vals[i].Resolution) {
3424     case CR_Keep: {
3425       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
3426       // longer. The IMPLICIT_DEF instructions are only inserted by
3427       // PHIElimination to guarantee that all PHI predecessors have a value.
3428       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
3429         break;
3430       // Remove value number i from LR.
3431       // For intervals with subranges, removing a segment from the main range
3432       // may require extending the previous segment: for each definition of
3433       // a subregister, there will be a corresponding def in the main range.
3434       // That def may fall in the middle of a segment from another subrange.
3435       // In such cases, removing this def from the main range must be
3436       // complemented by extending the main range to account for the liveness
3437       // of the other subrange.
3438       // The new end point of the main range segment to be extended.
3439       SlotIndex NewEnd;
3440       if (LI != nullptr) {
3441         LiveRange::iterator I = LR.FindSegmentContaining(Def);
3442         assert(I != LR.end());
3443         // Do not extend beyond the end of the segment being removed.
3444         // The segment may have been pruned in preparation for joining
3445         // live ranges.
3446         NewEnd = I->end;
3447       }
3448 
3449       LR.removeValNo(VNI);
3450       // Note that this VNInfo is reused and still referenced in NewVNInfo,
3451       // make it appear like an unused value number.
3452       VNI->markUnused();
3453 
3454       if (LI != nullptr && LI->hasSubRanges()) {
3455         assert(static_cast<LiveRange*>(LI) == &LR);
3456         // Determine the end point based on the subrange information:
3457         // minimum of (earliest def of next segment,
3458         //             latest end point of containing segment)
3459         SlotIndex ED, LE;
3460         for (LiveInterval::SubRange &SR : LI->subranges()) {
3461           LiveRange::iterator I = SR.find(Def);
3462           if (I == SR.end())
3463             continue;
3464           if (I->start > Def)
3465             ED = ED.isValid() ? std::min(ED, I->start) : I->start;
3466           else
3467             LE = LE.isValid() ? std::max(LE, I->end) : I->end;
3468         }
3469         if (LE.isValid())
3470           NewEnd = std::min(NewEnd, LE);
3471         if (ED.isValid())
3472           NewEnd = std::min(NewEnd, ED);
3473 
3474         // We only want to do the extension if there was a subrange that
3475         // was live across Def.
3476         if (LE.isValid()) {
3477           LiveRange::iterator S = LR.find(Def);
3478           if (S != LR.begin())
3479             std::prev(S)->end = NewEnd;
3480         }
3481       }
3482       LLVM_DEBUG({
3483         dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
3484         if (LI != nullptr)
3485           dbgs() << "\t\t  LHS = " << *LI << '\n';
3486       });
3487       [[fallthrough]];
3488     }
3489 
3490     case CR_Erase: {
3491       MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
3492       assert(MI && "No instruction to erase");
3493       if (MI->isCopy()) {
3494         Register Reg = MI->getOperand(1).getReg();
3495         if (Reg.isVirtual() && Reg != CP.getSrcReg() && Reg != CP.getDstReg())
3496           ShrinkRegs.push_back(Reg);
3497       }
3498       ErasedInstrs.insert(MI);
3499       LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
3500       LIS->RemoveMachineInstrFromMaps(*MI);
3501       MI->eraseFromParent();
3502       break;
3503     }
3504     default:
3505       break;
3506     }
3507   }
3508 }
3509 
3510 void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
3511                                          LaneBitmask LaneMask,
3512                                          const CoalescerPair &CP) {
3513   SmallVector<VNInfo*, 16> NewVNInfo;
3514   JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
3515                    NewVNInfo, CP, LIS, TRI, true, true);
3516   JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
3517                    NewVNInfo, CP, LIS, TRI, true, true);
3518 
3519   // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
3520   // We should be able to resolve all conflicts here as we could successfully do
3521   // it on the mainrange already. There is however a problem when multiple
3522   // ranges get mapped to the "overflow" lane mask bit which creates unexpected
3523   // interferences.
3524   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
3525     // We already determined that it is legal to merge the intervals, so this
3526     // should never fail.
3527     llvm_unreachable("*** Couldn't join subrange!\n");
3528   }
3529   if (!LHSVals.resolveConflicts(RHSVals) ||
3530       !RHSVals.resolveConflicts(LHSVals)) {
3531     // We already determined that it is legal to merge the intervals, so this
3532     // should never fail.
3533     llvm_unreachable("*** Couldn't join subrange!\n");
3534   }
3535 
3536   // The merging algorithm in LiveInterval::join() can't handle conflicting
3537   // value mappings, so we need to remove any live ranges that overlap a
3538   // CR_Replace resolution. Collect a set of end points that can be used to
3539   // restore the live range after joining.
3540   SmallVector<SlotIndex, 8> EndPoints;
3541   LHSVals.pruneValues(RHSVals, EndPoints, false);
3542   RHSVals.pruneValues(LHSVals, EndPoints, false);
3543 
3544   LHSVals.removeImplicitDefs();
3545   RHSVals.removeImplicitDefs();
3546 
3547   LRange.verify();
3548   RRange.verify();
3549 
3550   // Join RRange into LHS.
3551   LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
3552               NewVNInfo);
3553 
3554   LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
3555                     << ' ' << LRange << "\n");
3556   if (EndPoints.empty())
3557     return;
3558 
3559   // Recompute the parts of the live range we had to remove because of
3560   // CR_Replace conflicts.
3561   LLVM_DEBUG({
3562     dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3563     for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3564       dbgs() << EndPoints[i];
3565       if (i != n-1)
3566         dbgs() << ',';
3567     }
3568     dbgs() << ":  " << LRange << '\n';
3569   });
3570   LIS->extendToIndices(LRange, EndPoints);
3571 }
3572 
3573 void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
3574                                           const LiveRange &ToMerge,
3575                                           LaneBitmask LaneMask,
3576                                           CoalescerPair &CP,
3577                                           unsigned ComposeSubRegIdx) {
3578   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3579   LI.refineSubRanges(
3580       Allocator, LaneMask,
3581       [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
3582         if (SR.empty()) {
3583           SR.assign(ToMerge, Allocator);
3584         } else {
3585           // joinSubRegRange() destroys the merged range, so we need a copy.
3586           LiveRange RangeCopy(ToMerge, Allocator);
3587           joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
3588         }
3589       },
3590       *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx);
3591 }
3592 
3593 bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
3594   if (LI.valnos.size() < LargeIntervalSizeThreshold)
3595     return false;
3596   auto &Counter = LargeLIVisitCounter[LI.reg()];
3597   if (Counter < LargeIntervalFreqThreshold) {
3598     Counter++;
3599     return false;
3600   }
3601   return true;
3602 }
3603 
3604 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
3605   SmallVector<VNInfo*, 16> NewVNInfo;
3606   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
3607   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
3608   bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
3609   JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
3610                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3611   JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
3612                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
3613 
3614   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
3615 
3616   if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
3617     return false;
3618 
3619   // First compute NewVNInfo and the simple value mappings.
3620   // Detect impossible conflicts early.
3621   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
3622     return false;
3623 
3624   // Some conflicts can only be resolved after all values have been mapped.
3625   if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
3626     return false;
3627 
3628   // All clear, the live ranges can be merged.
3629   if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3630     BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3631 
3632     // Transform lanemasks from the LHS to masks in the coalesced register and
3633     // create initial subranges if necessary.
3634     unsigned DstIdx = CP.getDstIdx();
3635     if (!LHS.hasSubRanges()) {
3636       LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3637                                      : TRI->getSubRegIndexLaneMask(DstIdx);
3638       // LHS must support subregs or we wouldn't be in this codepath.
3639       assert(Mask.any());
3640       LHS.createSubRangeFrom(Allocator, Mask, LHS);
3641     } else if (DstIdx != 0) {
3642       // Transform LHS lanemasks to new register class if necessary.
3643       for (LiveInterval::SubRange &R : LHS.subranges()) {
3644         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3645         R.LaneMask = Mask;
3646       }
3647     }
3648     LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
3649                       << '\n');
3650 
3651     // Determine lanemasks of RHS in the coalesced register and merge subranges.
3652     unsigned SrcIdx = CP.getSrcIdx();
3653     if (!RHS.hasSubRanges()) {
3654       LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3655                                      : TRI->getSubRegIndexLaneMask(SrcIdx);
3656       mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx);
3657     } else {
3658       // Pair up subranges and merge.
3659       for (LiveInterval::SubRange &R : RHS.subranges()) {
3660         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3661         mergeSubRangeInto(LHS, R, Mask, CP, DstIdx);
3662       }
3663     }
3664     LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3665 
3666     // Pruning implicit defs from subranges may result in the main range
3667     // having stale segments.
3668     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3669 
3670     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3671     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3672   }
3673 
3674   // The merging algorithm in LiveInterval::join() can't handle conflicting
3675   // value mappings, so we need to remove any live ranges that overlap a
3676   // CR_Replace resolution. Collect a set of end points that can be used to
3677   // restore the live range after joining.
3678   SmallVector<SlotIndex, 8> EndPoints;
3679   LHSVals.pruneValues(RHSVals, EndPoints, true);
3680   RHSVals.pruneValues(LHSVals, EndPoints, true);
3681 
3682   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3683   // registers to require trimming.
3684   SmallVector<Register, 8> ShrinkRegs;
3685   LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3686   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3687   while (!ShrinkRegs.empty())
3688     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3689 
3690   // Scan and mark undef any DBG_VALUEs that would refer to a different value.
3691   checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals);
3692 
3693   // If the RHS covers any PHI locations that were tracked for debug-info, we
3694   // must update tracking information to reflect the join.
3695   auto RegIt = RegToPHIIdx.find(CP.getSrcReg());
3696   if (RegIt != RegToPHIIdx.end()) {
3697     // Iterate over all the debug instruction numbers assigned this register.
3698     for (unsigned InstID : RegIt->second) {
3699       auto PHIIt = PHIValToPos.find(InstID);
3700       assert(PHIIt != PHIValToPos.end());
3701       const SlotIndex &SI = PHIIt->second.SI;
3702 
3703       // Does the RHS cover the position of this PHI?
3704       auto LII = RHS.find(SI);
3705       if (LII == RHS.end() || LII->start > SI)
3706         continue;
3707 
3708       // Accept two kinds of subregister movement:
3709       //  * When we merge from one register class into a larger register:
3710       //        %1:gr16 = some-inst
3711       //                ->
3712       //        %2:gr32.sub_16bit = some-inst
3713       //  * When the PHI is already in a subregister, and the larger class
3714       //    is coalesced:
3715       //        %2:gr32.sub_16bit = some-inst
3716       //        %3:gr32 = COPY %2
3717       //                ->
3718       //        %3:gr32.sub_16bit = some-inst
3719       // Test for subregister move:
3720       if (CP.getSrcIdx() != 0 || CP.getDstIdx() != 0)
3721         // If we're moving between different subregisters, ignore this join.
3722         // The PHI will not get a location, dropping variable locations.
3723         if (PHIIt->second.SubReg && PHIIt->second.SubReg != CP.getSrcIdx())
3724           continue;
3725 
3726       // Update our tracking of where the PHI is.
3727       PHIIt->second.Reg = CP.getDstReg();
3728 
3729       // If we merge into a sub-register of a larger class (test above),
3730       // update SubReg.
3731       if (CP.getSrcIdx() != 0)
3732         PHIIt->second.SubReg = CP.getSrcIdx();
3733     }
3734 
3735     // Rebuild the register index in RegToPHIIdx to account for PHIs tracking
3736     // different VRegs now. Copy old collection of debug instruction numbers and
3737     // erase the old one:
3738     auto InstrNums = RegIt->second;
3739     RegToPHIIdx.erase(RegIt);
3740 
3741     // There might already be PHIs being tracked in the destination VReg. Insert
3742     // into an existing tracking collection, or insert a new one.
3743     RegIt = RegToPHIIdx.find(CP.getDstReg());
3744     if (RegIt != RegToPHIIdx.end())
3745       RegIt->second.insert(RegIt->second.end(), InstrNums.begin(),
3746                            InstrNums.end());
3747     else
3748       RegToPHIIdx.insert({CP.getDstReg(), InstrNums});
3749   }
3750 
3751   // Join RHS into LHS.
3752   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3753 
3754   // Kill flags are going to be wrong if the live ranges were overlapping.
3755   // Eventually, we should simply clear all kill flags when computing live
3756   // ranges. They are reinserted after register allocation.
3757   MRI->clearKillFlags(LHS.reg());
3758   MRI->clearKillFlags(RHS.reg());
3759 
3760   if (!EndPoints.empty()) {
3761     // Recompute the parts of the live range we had to remove because of
3762     // CR_Replace conflicts.
3763     LLVM_DEBUG({
3764       dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3765       for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3766         dbgs() << EndPoints[i];
3767         if (i != n-1)
3768           dbgs() << ',';
3769       }
3770       dbgs() << ":  " << LHS << '\n';
3771     });
3772     LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3773   }
3774 
3775   return true;
3776 }
3777 
3778 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3779   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3780 }
3781 
3782 void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
3783 {
3784   const SlotIndexes &Slots = *LIS->getSlotIndexes();
3785   SmallVector<MachineInstr *, 8> ToInsert;
3786 
3787   // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
3788   // vreg => DbgValueLoc map.
3789   auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
3790     for (auto *X : ToInsert) {
3791       for (const auto &Op : X->debug_operands()) {
3792         if (Op.isReg() && Op.getReg().isVirtual())
3793           DbgVRegToValues[Op.getReg()].push_back({Slot, X});
3794       }
3795     }
3796 
3797     ToInsert.clear();
3798   };
3799 
3800   // Iterate over all instructions, collecting them into the ToInsert vector.
3801   // Once a non-debug instruction is found, record the slot index of the
3802   // collected DBG_VALUEs.
3803   for (auto &MBB : MF) {
3804     SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB);
3805 
3806     for (auto &MI : MBB) {
3807       if (MI.isDebugValue()) {
3808         if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
3809               return MO.isReg() && MO.getReg().isVirtual();
3810             }))
3811           ToInsert.push_back(&MI);
3812       } else if (!MI.isDebugOrPseudoInstr()) {
3813         CurrentSlot = Slots.getInstructionIndex(MI);
3814         CloseNewDVRange(CurrentSlot);
3815       }
3816     }
3817 
3818     // Close range of DBG_VALUEs at the end of blocks.
3819     CloseNewDVRange(Slots.getMBBEndIdx(&MBB));
3820   }
3821 
3822   // Sort all DBG_VALUEs we've seen by slot number.
3823   for (auto &Pair : DbgVRegToValues)
3824     llvm::sort(Pair.second);
3825 }
3826 
3827 void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
3828                                                      LiveRange &LHS,
3829                                                      JoinVals &LHSVals,
3830                                                      LiveRange &RHS,
3831                                                      JoinVals &RHSVals) {
3832   auto ScanForDstReg = [&](Register Reg) {
3833     checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals);
3834   };
3835 
3836   auto ScanForSrcReg = [&](Register Reg) {
3837     checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
3838   };
3839 
3840   // Scan for unsound updates of both the source and destination register.
3841   ScanForSrcReg(CP.getSrcReg());
3842   ScanForDstReg(CP.getDstReg());
3843 }
3844 
3845 void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
3846                                                          LiveRange &OtherLR,
3847                                                          LiveRange &RegLR,
3848                                                          JoinVals &RegVals) {
3849   // Are there any DBG_VALUEs to examine?
3850   auto VRegMapIt = DbgVRegToValues.find(Reg);
3851   if (VRegMapIt == DbgVRegToValues.end())
3852     return;
3853 
3854   auto &DbgValueSet = VRegMapIt->second;
3855   auto DbgValueSetIt = DbgValueSet.begin();
3856   auto SegmentIt = OtherLR.begin();
3857 
3858   bool LastUndefResult = false;
3859   SlotIndex LastUndefIdx;
3860 
3861   // If the "Other" register is live at a slot Idx, test whether Reg can
3862   // safely be merged with it, or should be marked undef.
3863   auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult,
3864                       &LastUndefIdx](SlotIndex Idx) -> bool {
3865     // Our worst-case performance typically happens with asan, causing very
3866     // many DBG_VALUEs of the same location. Cache a copy of the most recent
3867     // result for this edge-case.
3868     if (LastUndefIdx == Idx)
3869       return LastUndefResult;
3870 
3871     // If the other range was live, and Reg's was not, the register coalescer
3872     // will not have tried to resolve any conflicts. We don't know whether
3873     // the DBG_VALUE will refer to the same value number, so it must be made
3874     // undef.
3875     auto OtherIt = RegLR.find(Idx);
3876     if (OtherIt == RegLR.end())
3877       return true;
3878 
3879     // Both the registers were live: examine the conflict resolution record for
3880     // the value number Reg refers to. CR_Keep meant that this value number
3881     // "won" and the merged register definitely refers to that value. CR_Erase
3882     // means the value number was a redundant copy of the other value, which
3883     // was coalesced and Reg deleted. It's safe to refer to the other register
3884     // (which will be the source of the copy).
3885     auto Resolution = RegVals.getResolution(OtherIt->valno->id);
3886     LastUndefResult = Resolution != JoinVals::CR_Keep &&
3887                       Resolution != JoinVals::CR_Erase;
3888     LastUndefIdx = Idx;
3889     return LastUndefResult;
3890   };
3891 
3892   // Iterate over both the live-range of the "Other" register, and the set of
3893   // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest
3894   // slot index. This relies on the DbgValueSet being ordered.
3895   while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) {
3896     if (DbgValueSetIt->first < SegmentIt->end) {
3897       // "Other" is live and there is a DBG_VALUE of Reg: test if we should
3898       // set it undef.
3899       if (DbgValueSetIt->first >= SegmentIt->start) {
3900         bool HasReg = DbgValueSetIt->second->hasDebugOperandForReg(Reg);
3901         bool ShouldUndefReg = ShouldUndef(DbgValueSetIt->first);
3902         if (HasReg && ShouldUndefReg) {
3903           // Mark undef, erase record of this DBG_VALUE to avoid revisiting.
3904           DbgValueSetIt->second->setDebugValueUndef();
3905           continue;
3906         }
3907       }
3908       ++DbgValueSetIt;
3909     } else {
3910       ++SegmentIt;
3911     }
3912   }
3913 }
3914 
3915 namespace {
3916 
3917 /// Information concerning MBB coalescing priority.
3918 struct MBBPriorityInfo {
3919   MachineBasicBlock *MBB;
3920   unsigned Depth;
3921   bool IsSplit;
3922 
3923   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3924     : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3925 };
3926 
3927 } // end anonymous namespace
3928 
3929 /// C-style comparator that sorts first based on the loop depth of the basic
3930 /// block (the unsigned), and then on the MBB number.
3931 ///
3932 /// EnableGlobalCopies assumes that the primary sort key is loop depth.
3933 static int compareMBBPriority(const MBBPriorityInfo *LHS,
3934                               const MBBPriorityInfo *RHS) {
3935   // Deeper loops first
3936   if (LHS->Depth != RHS->Depth)
3937     return LHS->Depth > RHS->Depth ? -1 : 1;
3938 
3939   // Try to unsplit critical edges next.
3940   if (LHS->IsSplit != RHS->IsSplit)
3941     return LHS->IsSplit ? -1 : 1;
3942 
3943   // Prefer blocks that are more connected in the CFG. This takes care of
3944   // the most difficult copies first while intervals are short.
3945   unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3946   unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3947   if (cl != cr)
3948     return cl > cr ? -1 : 1;
3949 
3950   // As a last resort, sort by block number.
3951   return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
3952 }
3953 
3954 /// \returns true if the given copy uses or defines a local live range.
3955 static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3956   if (!Copy->isCopy())
3957     return false;
3958 
3959   if (Copy->getOperand(1).isUndef())
3960     return false;
3961 
3962   Register SrcReg = Copy->getOperand(1).getReg();
3963   Register DstReg = Copy->getOperand(0).getReg();
3964   if (SrcReg.isPhysical() || DstReg.isPhysical())
3965     return false;
3966 
3967   return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3968     || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3969 }
3970 
3971 void RegisterCoalescer::lateLiveIntervalUpdate() {
3972   for (Register reg : ToBeUpdated) {
3973     if (!LIS->hasInterval(reg))
3974       continue;
3975     LiveInterval &LI = LIS->getInterval(reg);
3976     shrinkToUses(&LI, &DeadDefs);
3977     if (!DeadDefs.empty())
3978       eliminateDeadDefs();
3979   }
3980   ToBeUpdated.clear();
3981 }
3982 
3983 bool RegisterCoalescer::
3984 copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3985   bool Progress = false;
3986   for (MachineInstr *&MI : CurrList) {
3987     if (!MI)
3988       continue;
3989     // Skip instruction pointers that have already been erased, for example by
3990     // dead code elimination.
3991     if (ErasedInstrs.count(MI)) {
3992       MI = nullptr;
3993       continue;
3994     }
3995     bool Again = false;
3996     bool Success = joinCopy(MI, Again);
3997     Progress |= Success;
3998     if (Success || !Again)
3999       MI = nullptr;
4000   }
4001   return Progress;
4002 }
4003 
4004 /// Check if DstReg is a terminal node.
4005 /// I.e., it does not have any affinity other than \p Copy.
4006 static bool isTerminalReg(Register DstReg, const MachineInstr &Copy,
4007                           const MachineRegisterInfo *MRI) {
4008   assert(Copy.isCopyLike());
4009   // Check if the destination of this copy as any other affinity.
4010   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
4011     if (&MI != &Copy && MI.isCopyLike())
4012       return false;
4013   return true;
4014 }
4015 
4016 bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
4017   assert(Copy.isCopyLike());
4018   if (!UseTerminalRule)
4019     return false;
4020   Register SrcReg, DstReg;
4021   unsigned SrcSubReg = 0, DstSubReg = 0;
4022   if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
4023     return false;
4024   // Check if the destination of this copy has any other affinity.
4025   if (DstReg.isPhysical() ||
4026       // If SrcReg is a physical register, the copy won't be coalesced.
4027       // Ignoring it may have other side effect (like missing
4028       // rematerialization). So keep it.
4029       SrcReg.isPhysical() || !isTerminalReg(DstReg, Copy, MRI))
4030     return false;
4031 
4032   // DstReg is a terminal node. Check if it interferes with any other
4033   // copy involving SrcReg.
4034   const MachineBasicBlock *OrigBB = Copy.getParent();
4035   const LiveInterval &DstLI = LIS->getInterval(DstReg);
4036   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
4037     // Technically we should check if the weight of the new copy is
4038     // interesting compared to the other one and update the weight
4039     // of the copies accordingly. However, this would only work if
4040     // we would gather all the copies first then coalesce, whereas
4041     // right now we interleave both actions.
4042     // For now, just consider the copies that are in the same block.
4043     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
4044       continue;
4045     Register OtherSrcReg, OtherReg;
4046     unsigned OtherSrcSubReg = 0, OtherSubReg = 0;
4047     if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
4048                 OtherSubReg))
4049       return false;
4050     if (OtherReg == SrcReg)
4051       OtherReg = OtherSrcReg;
4052     // Check if OtherReg is a non-terminal.
4053     if (OtherReg.isPhysical() || isTerminalReg(OtherReg, MI, MRI))
4054       continue;
4055     // Check that OtherReg interfere with DstReg.
4056     if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
4057       LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
4058                         << '\n');
4059       return true;
4060     }
4061   }
4062   return false;
4063 }
4064 
4065 void
4066 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
4067   LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
4068 
4069   // Collect all copy-like instructions in MBB. Don't start coalescing anything
4070   // yet, it might invalidate the iterator.
4071   const unsigned PrevSize = WorkList.size();
4072   if (JoinGlobalCopies) {
4073     SmallVector<MachineInstr*, 2> LocalTerminals;
4074     SmallVector<MachineInstr*, 2> GlobalTerminals;
4075     // Coalesce copies bottom-up to coalesce local defs before local uses. They
4076     // are not inherently easier to resolve, but slightly preferable until we
4077     // have local live range splitting. In particular this is required by
4078     // cmp+jmp macro fusion.
4079     for (MachineInstr &MI : *MBB) {
4080       if (!MI.isCopyLike())
4081         continue;
4082       bool ApplyTerminalRule = applyTerminalRule(MI);
4083       if (isLocalCopy(&MI, LIS)) {
4084         if (ApplyTerminalRule)
4085           LocalTerminals.push_back(&MI);
4086         else
4087           LocalWorkList.push_back(&MI);
4088       } else {
4089         if (ApplyTerminalRule)
4090           GlobalTerminals.push_back(&MI);
4091         else
4092           WorkList.push_back(&MI);
4093       }
4094     }
4095     // Append the copies evicted by the terminal rule at the end of the list.
4096     LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
4097     WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
4098   }
4099   else {
4100     SmallVector<MachineInstr*, 2> Terminals;
4101     for (MachineInstr &MII : *MBB)
4102       if (MII.isCopyLike()) {
4103         if (applyTerminalRule(MII))
4104           Terminals.push_back(&MII);
4105         else
4106           WorkList.push_back(&MII);
4107       }
4108     // Append the copies evicted by the terminal rule at the end of the list.
4109     WorkList.append(Terminals.begin(), Terminals.end());
4110   }
4111   // Try coalescing the collected copies immediately, and remove the nulls.
4112   // This prevents the WorkList from getting too large since most copies are
4113   // joinable on the first attempt.
4114   MutableArrayRef<MachineInstr*>
4115     CurrList(WorkList.begin() + PrevSize, WorkList.end());
4116   if (copyCoalesceWorkList(CurrList))
4117     WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
4118                                nullptr), WorkList.end());
4119 }
4120 
4121 void RegisterCoalescer::coalesceLocals() {
4122   copyCoalesceWorkList(LocalWorkList);
4123   for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
4124     if (LocalWorkList[j])
4125       WorkList.push_back(LocalWorkList[j]);
4126   }
4127   LocalWorkList.clear();
4128 }
4129 
4130 void RegisterCoalescer::joinAllIntervals() {
4131   LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
4132   assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
4133 
4134   std::vector<MBBPriorityInfo> MBBs;
4135   MBBs.reserve(MF->size());
4136   for (MachineBasicBlock &MBB : *MF) {
4137     MBBs.push_back(MBBPriorityInfo(&MBB, Loops->getLoopDepth(&MBB),
4138                                    JoinSplitEdges && isSplitEdge(&MBB)));
4139   }
4140   array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
4141 
4142   // Coalesce intervals in MBB priority order.
4143   unsigned CurrDepth = std::numeric_limits<unsigned>::max();
4144   for (MBBPriorityInfo &MBB : MBBs) {
4145     // Try coalescing the collected local copies for deeper loops.
4146     if (JoinGlobalCopies && MBB.Depth < CurrDepth) {
4147       coalesceLocals();
4148       CurrDepth = MBB.Depth;
4149     }
4150     copyCoalesceInMBB(MBB.MBB);
4151   }
4152   lateLiveIntervalUpdate();
4153   coalesceLocals();
4154 
4155   // Joining intervals can allow other intervals to be joined.  Iteratively join
4156   // until we make no progress.
4157   while (copyCoalesceWorkList(WorkList))
4158     /* empty */ ;
4159   lateLiveIntervalUpdate();
4160 }
4161 
4162 void RegisterCoalescer::releaseMemory() {
4163   ErasedInstrs.clear();
4164   WorkList.clear();
4165   DeadDefs.clear();
4166   InflateRegs.clear();
4167   LargeLIVisitCounter.clear();
4168 }
4169 
4170 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
4171   LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n"
4172                     << "********** Function: " << fn.getName() << '\n');
4173 
4174   // Variables changed between a setjmp and a longjump can have undefined value
4175   // after the longjmp. This behaviour can be observed if such a variable is
4176   // spilled, so longjmp won't restore the value in the spill slot.
4177   // RegisterCoalescer should not run in functions with a setjmp to avoid
4178   // merging such undefined variables with predictable ones.
4179   //
4180   // TODO: Could specifically disable coalescing registers live across setjmp
4181   // calls
4182   if (fn.exposesReturnsTwice()) {
4183     LLVM_DEBUG(
4184         dbgs() << "* Skipped as it exposes functions that returns twice.\n");
4185     return false;
4186   }
4187 
4188   MF = &fn;
4189   MRI = &fn.getRegInfo();
4190   const TargetSubtargetInfo &STI = fn.getSubtarget();
4191   TRI = STI.getRegisterInfo();
4192   TII = STI.getInstrInfo();
4193   LIS = &getAnalysis<LiveIntervals>();
4194   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
4195   Loops = &getAnalysis<MachineLoopInfo>();
4196   if (EnableGlobalCopies == cl::BOU_UNSET)
4197     JoinGlobalCopies = STI.enableJoinGlobalCopies();
4198   else
4199     JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
4200 
4201   // If there are PHIs tracked by debug-info, they will need updating during
4202   // coalescing. Build an index of those PHIs to ease updating.
4203   SlotIndexes *Slots = LIS->getSlotIndexes();
4204   for (const auto &DebugPHI : MF->DebugPHIPositions) {
4205     MachineBasicBlock *MBB = DebugPHI.second.MBB;
4206     Register Reg = DebugPHI.second.Reg;
4207     unsigned SubReg = DebugPHI.second.SubReg;
4208     SlotIndex SI = Slots->getMBBStartIdx(MBB);
4209     PHIValPos P = {SI, Reg, SubReg};
4210     PHIValToPos.insert(std::make_pair(DebugPHI.first, P));
4211     RegToPHIIdx[Reg].push_back(DebugPHI.first);
4212   }
4213 
4214   // The MachineScheduler does not currently require JoinSplitEdges. This will
4215   // either be enabled unconditionally or replaced by a more general live range
4216   // splitting optimization.
4217   JoinSplitEdges = EnableJoinSplits;
4218 
4219   if (VerifyCoalescing)
4220     MF->verify(this, "Before register coalescing");
4221 
4222   DbgVRegToValues.clear();
4223   buildVRegToDbgValueMap(fn);
4224 
4225   RegClassInfo.runOnMachineFunction(fn);
4226 
4227   // Join (coalesce) intervals if requested.
4228   if (EnableJoining)
4229     joinAllIntervals();
4230 
4231   // After deleting a lot of copies, register classes may be less constrained.
4232   // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
4233   // DPR inflation.
4234   array_pod_sort(InflateRegs.begin(), InflateRegs.end());
4235   InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
4236                     InflateRegs.end());
4237   LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
4238                     << " regs.\n");
4239   for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
4240     Register Reg = InflateRegs[i];
4241     if (MRI->reg_nodbg_empty(Reg))
4242       continue;
4243     if (MRI->recomputeRegClass(Reg)) {
4244       LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
4245                         << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
4246       ++NumInflated;
4247 
4248       LiveInterval &LI = LIS->getInterval(Reg);
4249       if (LI.hasSubRanges()) {
4250         // If the inflated register class does not support subregisters anymore
4251         // remove the subranges.
4252         if (!MRI->shouldTrackSubRegLiveness(Reg)) {
4253           LI.clearSubRanges();
4254         } else {
4255 #ifndef NDEBUG
4256           LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
4257           // If subranges are still supported, then the same subregs
4258           // should still be supported.
4259           for (LiveInterval::SubRange &S : LI.subranges()) {
4260             assert((S.LaneMask & ~MaxMask).none());
4261           }
4262 #endif
4263         }
4264       }
4265     }
4266   }
4267 
4268   // After coalescing, update any PHIs that are being tracked by debug-info
4269   // with their new VReg locations.
4270   for (auto &p : MF->DebugPHIPositions) {
4271     auto it = PHIValToPos.find(p.first);
4272     assert(it != PHIValToPos.end());
4273     p.second.Reg = it->second.Reg;
4274     p.second.SubReg = it->second.SubReg;
4275   }
4276 
4277   PHIValToPos.clear();
4278   RegToPHIIdx.clear();
4279 
4280   LLVM_DEBUG(dump());
4281   if (VerifyCoalescing)
4282     MF->verify(this, "After register coalescing");
4283   return true;
4284 }
4285 
4286 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
4287    LIS->print(O, m);
4288 }
4289