1 //===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the RAGreedy function pass for register allocation in
10 // optimized builds.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "RegAllocGreedy.h"
15 #include "AllocationOrder.h"
16 #include "InterferenceCache.h"
17 #include "LiveDebugVariables.h"
18 #include "RegAllocBase.h"
19 #include "RegAllocEvictionAdvisor.h"
20 #include "RegAllocPriorityAdvisor.h"
21 #include "SpillPlacement.h"
22 #include "SplitKit.h"
23 #include "llvm/ADT/ArrayRef.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/IndexedMap.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/ADT/StringRef.h"
31 #include "llvm/Analysis/AliasAnalysis.h"
32 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 #include "llvm/CodeGen/CalcSpillWeights.h"
34 #include "llvm/CodeGen/EdgeBundles.h"
35 #include "llvm/CodeGen/LiveInterval.h"
36 #include "llvm/CodeGen/LiveIntervalUnion.h"
37 #include "llvm/CodeGen/LiveIntervals.h"
38 #include "llvm/CodeGen/LiveRangeEdit.h"
39 #include "llvm/CodeGen/LiveRegMatrix.h"
40 #include "llvm/CodeGen/LiveStacks.h"
41 #include "llvm/CodeGen/MachineBasicBlock.h"
42 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
43 #include "llvm/CodeGen/MachineDominators.h"
44 #include "llvm/CodeGen/MachineFrameInfo.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineLoopInfo.h"
49 #include "llvm/CodeGen/MachineOperand.h"
50 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
51 #include "llvm/CodeGen/MachineRegisterInfo.h"
52 #include "llvm/CodeGen/RegAllocRegistry.h"
53 #include "llvm/CodeGen/RegisterClassInfo.h"
54 #include "llvm/CodeGen/SlotIndexes.h"
55 #include "llvm/CodeGen/Spiller.h"
56 #include "llvm/CodeGen/TargetInstrInfo.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/TargetSubtargetInfo.h"
59 #include "llvm/CodeGen/VirtRegMap.h"
60 #include "llvm/IR/DebugInfoMetadata.h"
61 #include "llvm/IR/Function.h"
62 #include "llvm/IR/LLVMContext.h"
63 #include "llvm/InitializePasses.h"
64 #include "llvm/MC/MCRegisterInfo.h"
65 #include "llvm/Pass.h"
66 #include "llvm/Support/BlockFrequency.h"
67 #include "llvm/Support/BranchProbability.h"
68 #include "llvm/Support/CommandLine.h"
69 #include "llvm/Support/Debug.h"
70 #include "llvm/Support/MathExtras.h"
71 #include "llvm/Support/Timer.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 #define DEBUG_TYPE "regalloc"
81 
82 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
83 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
84 STATISTIC(NumEvicted,      "Number of interferences evicted");
85 
86 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
87     "split-spill-mode", cl::Hidden,
88     cl::desc("Spill mode for splitting live ranges"),
89     cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
90                clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
91                clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
92     cl::init(SplitEditor::SM_Speed));
93 
94 static cl::opt<unsigned>
95 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
96                              cl::desc("Last chance recoloring max depth"),
97                              cl::init(5));
98 
99 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
100     "lcr-max-interf", cl::Hidden,
101     cl::desc("Last chance recoloring maximum number of considered"
102              " interference at a time"),
103     cl::init(8));
104 
105 static cl::opt<bool> ExhaustiveSearch(
106     "exhaustive-register-search", cl::NotHidden,
107     cl::desc("Exhaustive Search for registers bypassing the depth "
108              "and interference cutoffs of last chance recoloring"),
109     cl::Hidden);
110 
111 static cl::opt<bool> EnableDeferredSpilling(
112     "enable-deferred-spilling", cl::Hidden,
113     cl::desc("Instead of spilling a variable right away, defer the actual "
114              "code insertion to the end of the allocation. That way the "
115              "allocator might still find a suitable coloring for this "
116              "variable because of other evicted variables."),
117     cl::init(false));
118 
119 // FIXME: Find a good default for this flag and remove the flag.
120 static cl::opt<unsigned>
121 CSRFirstTimeCost("regalloc-csr-first-time-cost",
122               cl::desc("Cost for first time use of callee-saved register."),
123               cl::init(0), cl::Hidden);
124 
125 static cl::opt<unsigned long> GrowRegionComplexityBudget(
126     "grow-region-complexity-budget",
127     cl::desc("growRegion() does not scale with the number of BB edges, so "
128              "limit its budget and bail out once we reach the limit."),
129     cl::init(10000), cl::Hidden);
130 
131 static cl::opt<bool> GreedyRegClassPriorityTrumpsGlobalness(
132     "greedy-regclass-priority-trumps-globalness",
133     cl::desc("Change the greedy register allocator's live range priority "
134              "calculation to make the AllocationPriority of the register class "
135              "more important then whether the range is global"),
136     cl::Hidden);
137 
138 static cl::opt<bool> GreedyReverseLocalAssignment(
139     "greedy-reverse-local-assignment",
140     cl::desc("Reverse allocation order of local live ranges, such that "
141              "shorter local live ranges will tend to be allocated first"),
142     cl::Hidden);
143 
144 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
145                                        createGreedyRegisterAllocator);
146 
147 char RAGreedy::ID = 0;
148 char &llvm::RAGreedyID = RAGreedy::ID;
149 
150 INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
151                 "Greedy Register Allocator", false, false)
152 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
153 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
154 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
155 INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
156 INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
157 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
158 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
159 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
160 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
161 INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
162 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
163 INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
164 INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
165 INITIALIZE_PASS_DEPENDENCY(RegAllocEvictionAdvisorAnalysis)
166 INITIALIZE_PASS_DEPENDENCY(RegAllocPriorityAdvisorAnalysis)
167 INITIALIZE_PASS_END(RAGreedy, "greedy",
168                 "Greedy Register Allocator", false, false)
169 
170 #ifndef NDEBUG
171 const char *const RAGreedy::StageName[] = {
172     "RS_New",
173     "RS_Assign",
174     "RS_Split",
175     "RS_Split2",
176     "RS_Spill",
177     "RS_Memory",
178     "RS_Done"
179 };
180 #endif
181 
182 // Hysteresis to use when comparing floats.
183 // This helps stabilize decisions based on float comparisons.
184 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
185 
186 FunctionPass* llvm::createGreedyRegisterAllocator() {
187   return new RAGreedy();
188 }
189 
190 FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) {
191   return new RAGreedy(Ftor);
192 }
193 
194 RAGreedy::RAGreedy(RegClassFilterFunc F):
195   MachineFunctionPass(ID),
196   RegAllocBase(F) {
197 }
198 
199 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
200   AU.setPreservesCFG();
201   AU.addRequired<MachineBlockFrequencyInfo>();
202   AU.addPreserved<MachineBlockFrequencyInfo>();
203   AU.addRequired<LiveIntervals>();
204   AU.addPreserved<LiveIntervals>();
205   AU.addRequired<SlotIndexes>();
206   AU.addPreserved<SlotIndexes>();
207   AU.addRequired<LiveDebugVariables>();
208   AU.addPreserved<LiveDebugVariables>();
209   AU.addRequired<LiveStacks>();
210   AU.addPreserved<LiveStacks>();
211   AU.addRequired<MachineDominatorTree>();
212   AU.addPreserved<MachineDominatorTree>();
213   AU.addRequired<MachineLoopInfo>();
214   AU.addPreserved<MachineLoopInfo>();
215   AU.addRequired<VirtRegMap>();
216   AU.addPreserved<VirtRegMap>();
217   AU.addRequired<LiveRegMatrix>();
218   AU.addPreserved<LiveRegMatrix>();
219   AU.addRequired<EdgeBundles>();
220   AU.addRequired<SpillPlacement>();
221   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
222   AU.addRequired<RegAllocEvictionAdvisorAnalysis>();
223   AU.addRequired<RegAllocPriorityAdvisorAnalysis>();
224   MachineFunctionPass::getAnalysisUsage(AU);
225 }
226 
227 //===----------------------------------------------------------------------===//
228 //                     LiveRangeEdit delegate methods
229 //===----------------------------------------------------------------------===//
230 
231 bool RAGreedy::LRE_CanEraseVirtReg(Register VirtReg) {
232   LiveInterval &LI = LIS->getInterval(VirtReg);
233   if (VRM->hasPhys(VirtReg)) {
234     Matrix->unassign(LI);
235     aboutToRemoveInterval(LI);
236     return true;
237   }
238   // Unassigned virtreg is probably in the priority queue.
239   // RegAllocBase will erase it after dequeueing.
240   // Nonetheless, clear the live-range so that the debug
241   // dump will show the right state for that VirtReg.
242   LI.clear();
243   return false;
244 }
245 
246 void RAGreedy::LRE_WillShrinkVirtReg(Register VirtReg) {
247   if (!VRM->hasPhys(VirtReg))
248     return;
249 
250   // Register is assigned, put it back on the queue for reassignment.
251   LiveInterval &LI = LIS->getInterval(VirtReg);
252   Matrix->unassign(LI);
253   RegAllocBase::enqueue(&LI);
254 }
255 
256 void RAGreedy::LRE_DidCloneVirtReg(Register New, Register Old) {
257   ExtraInfo->LRE_DidCloneVirtReg(New, Old);
258 }
259 
260 void RAGreedy::ExtraRegInfo::LRE_DidCloneVirtReg(Register New, Register Old) {
261   // Cloning a register we haven't even heard about yet?  Just ignore it.
262   if (!Info.inBounds(Old))
263     return;
264 
265   // LRE may clone a virtual register because dead code elimination causes it to
266   // be split into connected components. The new components are much smaller
267   // than the original, so they should get a new chance at being assigned.
268   // same stage as the parent.
269   Info[Old].Stage = RS_Assign;
270   Info.grow(New.id());
271   Info[New] = Info[Old];
272 }
273 
274 void RAGreedy::releaseMemory() {
275   SpillerInstance.reset();
276   GlobalCand.clear();
277 }
278 
279 void RAGreedy::enqueueImpl(const LiveInterval *LI) { enqueue(Queue, LI); }
280 
281 void RAGreedy::enqueue(PQueue &CurQueue, const LiveInterval *LI) {
282   // Prioritize live ranges by size, assigning larger ranges first.
283   // The queue holds (size, reg) pairs.
284   const Register Reg = LI->reg();
285   assert(Reg.isVirtual() && "Can only enqueue virtual registers");
286 
287   auto Stage = ExtraInfo->getOrInitStage(Reg);
288   if (Stage == RS_New) {
289     Stage = RS_Assign;
290     ExtraInfo->setStage(Reg, Stage);
291   }
292 
293   unsigned Ret = PriorityAdvisor->getPriority(*LI);
294 
295   // The virtual register number is a tie breaker for same-sized ranges.
296   // Give lower vreg numbers higher priority to assign them first.
297   CurQueue.push(std::make_pair(Ret, ~Reg));
298 }
299 
300 unsigned DefaultPriorityAdvisor::getPriority(const LiveInterval &LI) const {
301   const unsigned Size = LI.getSize();
302   const Register Reg = LI.reg();
303   unsigned Prio;
304   LiveRangeStage Stage = RA.getExtraInfo().getStage(LI);
305 
306   if (Stage == RS_Split) {
307     // Unsplit ranges that couldn't be allocated immediately are deferred until
308     // everything else has been allocated.
309     Prio = Size;
310   } else if (Stage == RS_Memory) {
311     // Memory operand should be considered last.
312     // Change the priority such that Memory operand are assigned in
313     // the reverse order that they came in.
314     // TODO: Make this a member variable and probably do something about hints.
315     static unsigned MemOp = 0;
316     Prio = MemOp++;
317   } else {
318     // Giant live ranges fall back to the global assignment heuristic, which
319     // prevents excessive spilling in pathological cases.
320     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
321     bool ForceGlobal = RC.GlobalPriority ||
322                        (!ReverseLocalAssignment &&
323                         (Size / SlotIndex::InstrDist) >
324                             (2 * RegClassInfo.getNumAllocatableRegs(&RC)));
325     unsigned GlobalBit = 0;
326 
327     if (Stage == RS_Assign && !ForceGlobal && !LI.empty() &&
328         LIS->intervalIsInOneMBB(LI)) {
329       // Allocate original local ranges in linear instruction order. Since they
330       // are singly defined, this produces optimal coloring in the absence of
331       // global interference and other constraints.
332       if (!ReverseLocalAssignment)
333         Prio = LI.beginIndex().getApproxInstrDistance(Indexes->getLastIndex());
334       else {
335         // Allocating bottom up may allow many short LRGs to be assigned first
336         // to one of the cheap registers. This could be much faster for very
337         // large blocks on targets with many physical registers.
338         Prio = Indexes->getZeroIndex().getApproxInstrDistance(LI.endIndex());
339       }
340     } else {
341       // Allocate global and split ranges in long->short order. Long ranges that
342       // don't fit should be spilled (or split) ASAP so they don't create
343       // interference.  Mark a bit to prioritize global above local ranges.
344       Prio = Size;
345       GlobalBit = 1;
346     }
347 
348     // Priority bit layout:
349     // 31 RS_Assign priority
350     // 30 Preference priority
351     // if (RegClassPriorityTrumpsGlobalness)
352     //   29-25 AllocPriority
353     //   24 GlobalBit
354     // else
355     //   29 Global bit
356     //   28-24 AllocPriority
357     // 0-23 Size/Instr distance
358 
359     // Clamp the size to fit with the priority masking scheme
360     Prio = std::min(Prio, (unsigned)maxUIntN(24));
361     assert(isUInt<5>(RC.AllocationPriority) && "allocation priority overflow");
362 
363     if (RegClassPriorityTrumpsGlobalness)
364       Prio |= RC.AllocationPriority << 25 | GlobalBit << 24;
365     else
366       Prio |= GlobalBit << 29 | RC.AllocationPriority << 24;
367 
368     // Mark a higher bit to prioritize global and local above RS_Split.
369     Prio |= (1u << 31);
370 
371     // Boost ranges that have a physical register hint.
372     if (VRM->hasKnownPreference(Reg))
373       Prio |= (1u << 30);
374   }
375 
376   return Prio;
377 }
378 
379 const LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
380 
381 const LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
382   if (CurQueue.empty())
383     return nullptr;
384   LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
385   CurQueue.pop();
386   return LI;
387 }
388 
389 //===----------------------------------------------------------------------===//
390 //                            Direct Assignment
391 //===----------------------------------------------------------------------===//
392 
393 /// tryAssign - Try to assign VirtReg to an available register.
394 MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
395                                AllocationOrder &Order,
396                                SmallVectorImpl<Register> &NewVRegs,
397                                const SmallVirtRegSet &FixedRegisters) {
398   MCRegister PhysReg;
399   for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
400     assert(*I);
401     if (!Matrix->checkInterference(VirtReg, *I)) {
402       if (I.isHint())
403         return *I;
404       else
405         PhysReg = *I;
406     }
407   }
408   if (!PhysReg.isValid())
409     return PhysReg;
410 
411   // PhysReg is available, but there may be a better choice.
412 
413   // If we missed a simple hint, try to cheaply evict interference from the
414   // preferred register.
415   if (Register Hint = MRI->getSimpleHint(VirtReg.reg()))
416     if (Order.isHint(Hint)) {
417       MCRegister PhysHint = Hint.asMCReg();
418       LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
419 
420       if (EvictAdvisor->canEvictHintInterference(VirtReg, PhysHint,
421                                                  FixedRegisters)) {
422         evictInterference(VirtReg, PhysHint, NewVRegs);
423         return PhysHint;
424       }
425       // Record the missed hint, we may be able to recover
426       // at the end if the surrounding allocation changed.
427       SetOfBrokenHints.insert(&VirtReg);
428     }
429 
430   // Try to evict interference from a cheaper alternative.
431   uint8_t Cost = RegCosts[PhysReg];
432 
433   // Most registers have 0 additional cost.
434   if (!Cost)
435     return PhysReg;
436 
437   LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
438                     << (unsigned)Cost << '\n');
439   MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
440   return CheapReg ? CheapReg : PhysReg;
441 }
442 
443 //===----------------------------------------------------------------------===//
444 //                         Interference eviction
445 //===----------------------------------------------------------------------===//
446 
447 Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
448                                               Register PrevReg) const {
449   auto Order =
450       AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
451   MCRegister PhysReg;
452   for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
453     if ((*I).id() == PrevReg.id())
454       continue;
455 
456     MCRegUnitIterator Units(*I, TRI);
457     for (; Units.isValid(); ++Units) {
458       // Instantiate a "subquery", not to be confused with the Queries array.
459       LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
460       if (subQ.checkInterference())
461         break;
462     }
463     // If no units have interference, break out with the current PhysReg.
464     if (!Units.isValid())
465       PhysReg = *I;
466   }
467   if (PhysReg)
468     LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
469                       << printReg(PrevReg, TRI) << " to "
470                       << printReg(PhysReg, TRI) << '\n');
471   return PhysReg;
472 }
473 
474 /// evictInterference - Evict any interferring registers that prevent VirtReg
475 /// from being assigned to Physreg. This assumes that canEvictInterference
476 /// returned true.
477 void RAGreedy::evictInterference(const LiveInterval &VirtReg,
478                                  MCRegister PhysReg,
479                                  SmallVectorImpl<Register> &NewVRegs) {
480   // Make sure that VirtReg has a cascade number, and assign that cascade
481   // number to every evicted register. These live ranges than then only be
482   // evicted by a newer cascade, preventing infinite loops.
483   unsigned Cascade = ExtraInfo->getOrAssignNewCascade(VirtReg.reg());
484 
485   LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
486                     << " interference: Cascade " << Cascade << '\n');
487 
488   // Collect all interfering virtregs first.
489   SmallVector<const LiveInterval *, 8> Intfs;
490   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
491     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
492     // We usually have the interfering VRegs cached so collectInterferingVRegs()
493     // should be fast, we may need to recalculate if when different physregs
494     // overlap the same register unit so we had different SubRanges queried
495     // against it.
496     ArrayRef<const LiveInterval *> IVR = Q.interferingVRegs();
497     Intfs.append(IVR.begin(), IVR.end());
498   }
499 
500   // Evict them second. This will invalidate the queries.
501   for (const LiveInterval *Intf : Intfs) {
502     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
503     if (!VRM->hasPhys(Intf->reg()))
504       continue;
505 
506     Matrix->unassign(*Intf);
507     assert((ExtraInfo->getCascade(Intf->reg()) < Cascade ||
508             VirtReg.isSpillable() < Intf->isSpillable()) &&
509            "Cannot decrease cascade number, illegal eviction");
510     ExtraInfo->setCascade(Intf->reg(), Cascade);
511     ++NumEvicted;
512     NewVRegs.push_back(Intf->reg());
513   }
514 }
515 
516 /// Returns true if the given \p PhysReg is a callee saved register and has not
517 /// been used for allocation yet.
518 bool RegAllocEvictionAdvisor::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
519   MCRegister CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg);
520   if (!CSR)
521     return false;
522 
523   return !Matrix->isPhysRegUsed(PhysReg);
524 }
525 
526 std::optional<unsigned>
527 RegAllocEvictionAdvisor::getOrderLimit(const LiveInterval &VirtReg,
528                                        const AllocationOrder &Order,
529                                        unsigned CostPerUseLimit) const {
530   unsigned OrderLimit = Order.getOrder().size();
531 
532   if (CostPerUseLimit < uint8_t(~0u)) {
533     // Check of any registers in RC are below CostPerUseLimit.
534     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg());
535     uint8_t MinCost = RegClassInfo.getMinCost(RC);
536     if (MinCost >= CostPerUseLimit) {
537       LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = "
538                         << MinCost << ", no cheaper registers to be found.\n");
539       return std::nullopt;
540     }
541 
542     // It is normal for register classes to have a long tail of registers with
543     // the same cost. We don't need to look at them if they're too expensive.
544     if (RegCosts[Order.getOrder().back()] >= CostPerUseLimit) {
545       OrderLimit = RegClassInfo.getLastCostChange(RC);
546       LLVM_DEBUG(dbgs() << "Only trying the first " << OrderLimit
547                         << " regs.\n");
548     }
549   }
550   return OrderLimit;
551 }
552 
553 bool RegAllocEvictionAdvisor::canAllocatePhysReg(unsigned CostPerUseLimit,
554                                                  MCRegister PhysReg) const {
555   if (RegCosts[PhysReg] >= CostPerUseLimit)
556     return false;
557   // The first use of a callee-saved register in a function has cost 1.
558   // Don't start using a CSR when the CostPerUseLimit is low.
559   if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) {
560     LLVM_DEBUG(
561         dbgs() << printReg(PhysReg, TRI) << " would clobber CSR "
562                << printReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI)
563                << '\n');
564     return false;
565   }
566   return true;
567 }
568 
569 /// tryEvict - Try to evict all interferences for a physreg.
570 /// @param  VirtReg Currently unassigned virtual register.
571 /// @param  Order   Physregs to try.
572 /// @return         Physreg to assign VirtReg, or 0.
573 MCRegister RAGreedy::tryEvict(const LiveInterval &VirtReg,
574                               AllocationOrder &Order,
575                               SmallVectorImpl<Register> &NewVRegs,
576                               uint8_t CostPerUseLimit,
577                               const SmallVirtRegSet &FixedRegisters) {
578   NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
579                      TimePassesIsEnabled);
580 
581   MCRegister BestPhys = EvictAdvisor->tryFindEvictionCandidate(
582       VirtReg, Order, CostPerUseLimit, FixedRegisters);
583   if (BestPhys.isValid())
584     evictInterference(VirtReg, BestPhys, NewVRegs);
585   return BestPhys;
586 }
587 
588 //===----------------------------------------------------------------------===//
589 //                              Region Splitting
590 //===----------------------------------------------------------------------===//
591 
592 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
593 /// interference pattern in Physreg and its aliases. Add the constraints to
594 /// SpillPlacement and return the static cost of this split in Cost, assuming
595 /// that all preferences in SplitConstraints are met.
596 /// Return false if there are no bundles with positive bias.
597 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
598                                    BlockFrequency &Cost) {
599   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
600 
601   // Reset interference dependent info.
602   SplitConstraints.resize(UseBlocks.size());
603   BlockFrequency StaticCost = 0;
604   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
605     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
606     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
607 
608     BC.Number = BI.MBB->getNumber();
609     Intf.moveToBlock(BC.Number);
610     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
611     BC.Exit = (BI.LiveOut &&
612                !LIS->getInstructionFromIndex(BI.LastInstr)->isImplicitDef())
613                   ? SpillPlacement::PrefReg
614                   : SpillPlacement::DontCare;
615     BC.ChangesValue = BI.FirstDef.isValid();
616 
617     if (!Intf.hasInterference())
618       continue;
619 
620     // Number of spill code instructions to insert.
621     unsigned Ins = 0;
622 
623     // Interference for the live-in value.
624     if (BI.LiveIn) {
625       if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) {
626         BC.Entry = SpillPlacement::MustSpill;
627         ++Ins;
628       } else if (Intf.first() < BI.FirstInstr) {
629         BC.Entry = SpillPlacement::PrefSpill;
630         ++Ins;
631       } else if (Intf.first() < BI.LastInstr) {
632         ++Ins;
633       }
634 
635       // Abort if the spill cannot be inserted at the MBB' start
636       if (((BC.Entry == SpillPlacement::MustSpill) ||
637            (BC.Entry == SpillPlacement::PrefSpill)) &&
638           SlotIndex::isEarlierInstr(BI.FirstInstr,
639                                     SA->getFirstSplitPoint(BC.Number)))
640         return false;
641     }
642 
643     // Interference for the live-out value.
644     if (BI.LiveOut) {
645       if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) {
646         BC.Exit = SpillPlacement::MustSpill;
647         ++Ins;
648       } else if (Intf.last() > BI.LastInstr) {
649         BC.Exit = SpillPlacement::PrefSpill;
650         ++Ins;
651       } else if (Intf.last() > BI.FirstInstr) {
652         ++Ins;
653       }
654     }
655 
656     // Accumulate the total frequency of inserted spill code.
657     while (Ins--)
658       StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
659   }
660   Cost = StaticCost;
661 
662   // Add constraints for use-blocks. Note that these are the only constraints
663   // that may add a positive bias, it is downhill from here.
664   SpillPlacer->addConstraints(SplitConstraints);
665   return SpillPlacer->scanActiveBundles();
666 }
667 
668 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
669 /// live-through blocks in Blocks.
670 bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
671                                      ArrayRef<unsigned> Blocks) {
672   const unsigned GroupSize = 8;
673   SpillPlacement::BlockConstraint BCS[GroupSize];
674   unsigned TBS[GroupSize];
675   unsigned B = 0, T = 0;
676 
677   for (unsigned Number : Blocks) {
678     Intf.moveToBlock(Number);
679 
680     if (!Intf.hasInterference()) {
681       assert(T < GroupSize && "Array overflow");
682       TBS[T] = Number;
683       if (++T == GroupSize) {
684         SpillPlacer->addLinks(ArrayRef(TBS, T));
685         T = 0;
686       }
687       continue;
688     }
689 
690     assert(B < GroupSize && "Array overflow");
691     BCS[B].Number = Number;
692 
693     // Abort if the spill cannot be inserted at the MBB' start
694     MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
695     auto FirstNonDebugInstr = MBB->getFirstNonDebugInstr();
696     if (FirstNonDebugInstr != MBB->end() &&
697         SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*FirstNonDebugInstr),
698                                   SA->getFirstSplitPoint(Number)))
699       return false;
700     // Interference for the live-in value.
701     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
702       BCS[B].Entry = SpillPlacement::MustSpill;
703     else
704       BCS[B].Entry = SpillPlacement::PrefSpill;
705 
706     // Interference for the live-out value.
707     if (Intf.last() >= SA->getLastSplitPoint(Number))
708       BCS[B].Exit = SpillPlacement::MustSpill;
709     else
710       BCS[B].Exit = SpillPlacement::PrefSpill;
711 
712     if (++B == GroupSize) {
713       SpillPlacer->addConstraints(ArrayRef(BCS, B));
714       B = 0;
715     }
716   }
717 
718   SpillPlacer->addConstraints(ArrayRef(BCS, B));
719   SpillPlacer->addLinks(ArrayRef(TBS, T));
720   return true;
721 }
722 
723 bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
724   // Keep track of through blocks that have not been added to SpillPlacer.
725   BitVector Todo = SA->getThroughBlocks();
726   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
727   unsigned AddedTo = 0;
728 #ifndef NDEBUG
729   unsigned Visited = 0;
730 #endif
731 
732   unsigned long Budget = GrowRegionComplexityBudget;
733   while (true) {
734     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
735     // Find new through blocks in the periphery of PrefRegBundles.
736     for (unsigned Bundle : NewBundles) {
737       // Look at all blocks connected to Bundle in the full graph.
738       ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
739       // Limit compilation time by bailing out after we use all our budget.
740       if (Blocks.size() >= Budget)
741         return false;
742       Budget -= Blocks.size();
743       for (unsigned Block : Blocks) {
744         if (!Todo.test(Block))
745           continue;
746         Todo.reset(Block);
747         // This is a new through block. Add it to SpillPlacer later.
748         ActiveBlocks.push_back(Block);
749 #ifndef NDEBUG
750         ++Visited;
751 #endif
752       }
753     }
754     // Any new blocks to add?
755     if (ActiveBlocks.size() == AddedTo)
756       break;
757 
758     // Compute through constraints from the interference, or assume that all
759     // through blocks prefer spilling when forming compact regions.
760     auto NewBlocks = ArrayRef(ActiveBlocks).slice(AddedTo);
761     if (Cand.PhysReg) {
762       if (!addThroughConstraints(Cand.Intf, NewBlocks))
763         return false;
764     } else
765       // Provide a strong negative bias on through blocks to prevent unwanted
766       // liveness on loop backedges.
767       SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
768     AddedTo = ActiveBlocks.size();
769 
770     // Perhaps iterating can enable more bundles?
771     SpillPlacer->iterate();
772   }
773   LLVM_DEBUG(dbgs() << ", v=" << Visited);
774   return true;
775 }
776 
777 /// calcCompactRegion - Compute the set of edge bundles that should be live
778 /// when splitting the current live range into compact regions.  Compact
779 /// regions can be computed without looking at interference.  They are the
780 /// regions formed by removing all the live-through blocks from the live range.
781 ///
782 /// Returns false if the current live range is already compact, or if the
783 /// compact regions would form single block regions anyway.
784 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
785   // Without any through blocks, the live range is already compact.
786   if (!SA->getNumThroughBlocks())
787     return false;
788 
789   // Compact regions don't correspond to any physreg.
790   Cand.reset(IntfCache, MCRegister::NoRegister);
791 
792   LLVM_DEBUG(dbgs() << "Compact region bundles");
793 
794   // Use the spill placer to determine the live bundles. GrowRegion pretends
795   // that all the through blocks have interference when PhysReg is unset.
796   SpillPlacer->prepare(Cand.LiveBundles);
797 
798   // The static split cost will be zero since Cand.Intf reports no interference.
799   BlockFrequency Cost;
800   if (!addSplitConstraints(Cand.Intf, Cost)) {
801     LLVM_DEBUG(dbgs() << ", none.\n");
802     return false;
803   }
804 
805   if (!growRegion(Cand)) {
806     LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
807     return false;
808   }
809 
810   SpillPlacer->finish();
811 
812   if (!Cand.LiveBundles.any()) {
813     LLVM_DEBUG(dbgs() << ", none.\n");
814     return false;
815   }
816 
817   LLVM_DEBUG({
818     for (int I : Cand.LiveBundles.set_bits())
819       dbgs() << " EB#" << I;
820     dbgs() << ".\n";
821   });
822   return true;
823 }
824 
825 /// calcSpillCost - Compute how expensive it would be to split the live range in
826 /// SA around all use blocks instead of forming bundle regions.
827 BlockFrequency RAGreedy::calcSpillCost() {
828   BlockFrequency Cost = 0;
829   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
830   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
831     unsigned Number = BI.MBB->getNumber();
832     // We normally only need one spill instruction - a load or a store.
833     Cost += SpillPlacer->getBlockFrequency(Number);
834 
835     // Unless the value is redefined in the block.
836     if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
837       Cost += SpillPlacer->getBlockFrequency(Number);
838   }
839   return Cost;
840 }
841 
842 /// calcGlobalSplitCost - Return the global split cost of following the split
843 /// pattern in LiveBundles. This cost should be added to the local cost of the
844 /// interference pattern in SplitConstraints.
845 ///
846 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
847                                              const AllocationOrder &Order) {
848   BlockFrequency GlobalCost = 0;
849   const BitVector &LiveBundles = Cand.LiveBundles;
850   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
851   for (unsigned I = 0; I != UseBlocks.size(); ++I) {
852     const SplitAnalysis::BlockInfo &BI = UseBlocks[I];
853     SpillPlacement::BlockConstraint &BC = SplitConstraints[I];
854     bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, false)];
855     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, true)];
856     unsigned Ins = 0;
857 
858     Cand.Intf.moveToBlock(BC.Number);
859 
860     if (BI.LiveIn)
861       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
862     if (BI.LiveOut)
863       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
864     while (Ins--)
865       GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
866   }
867 
868   for (unsigned Number : Cand.ActiveBlocks) {
869     bool RegIn  = LiveBundles[Bundles->getBundle(Number, false)];
870     bool RegOut = LiveBundles[Bundles->getBundle(Number, true)];
871     if (!RegIn && !RegOut)
872       continue;
873     if (RegIn && RegOut) {
874       // We need double spill code if this block has interference.
875       Cand.Intf.moveToBlock(Number);
876       if (Cand.Intf.hasInterference()) {
877         GlobalCost += SpillPlacer->getBlockFrequency(Number);
878         GlobalCost += SpillPlacer->getBlockFrequency(Number);
879       }
880       continue;
881     }
882     // live-in / stack-out or stack-in live-out.
883     GlobalCost += SpillPlacer->getBlockFrequency(Number);
884   }
885   return GlobalCost;
886 }
887 
888 /// splitAroundRegion - Split the current live range around the regions
889 /// determined by BundleCand and GlobalCand.
890 ///
891 /// Before calling this function, GlobalCand and BundleCand must be initialized
892 /// so each bundle is assigned to a valid candidate, or NoCand for the
893 /// stack-bound bundles.  The shared SA/SE SplitAnalysis and SplitEditor
894 /// objects must be initialized for the current live range, and intervals
895 /// created for the used candidates.
896 ///
897 /// @param LREdit    The LiveRangeEdit object handling the current split.
898 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
899 ///                  must appear in this list.
900 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
901                                  ArrayRef<unsigned> UsedCands) {
902   // These are the intervals created for new global ranges. We may create more
903   // intervals for local ranges.
904   const unsigned NumGlobalIntvs = LREdit.size();
905   LLVM_DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs
906                     << " globals.\n");
907   assert(NumGlobalIntvs && "No global intervals configured");
908 
909   // Isolate even single instructions when dealing with a proper sub-class.
910   // That guarantees register class inflation for the stack interval because it
911   // is all copies.
912   Register Reg = SA->getParent().reg();
913   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
914 
915   // First handle all the blocks with uses.
916   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
917   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
918     unsigned Number = BI.MBB->getNumber();
919     unsigned IntvIn = 0, IntvOut = 0;
920     SlotIndex IntfIn, IntfOut;
921     if (BI.LiveIn) {
922       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
923       if (CandIn != NoCand) {
924         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
925         IntvIn = Cand.IntvIdx;
926         Cand.Intf.moveToBlock(Number);
927         IntfIn = Cand.Intf.first();
928       }
929     }
930     if (BI.LiveOut) {
931       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
932       if (CandOut != NoCand) {
933         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
934         IntvOut = Cand.IntvIdx;
935         Cand.Intf.moveToBlock(Number);
936         IntfOut = Cand.Intf.last();
937       }
938     }
939 
940     // Create separate intervals for isolated blocks with multiple uses.
941     if (!IntvIn && !IntvOut) {
942       LLVM_DEBUG(dbgs() << printMBBReference(*BI.MBB) << " isolated.\n");
943       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
944         SE->splitSingleBlock(BI);
945       continue;
946     }
947 
948     if (IntvIn && IntvOut)
949       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
950     else if (IntvIn)
951       SE->splitRegInBlock(BI, IntvIn, IntfIn);
952     else
953       SE->splitRegOutBlock(BI, IntvOut, IntfOut);
954   }
955 
956   // Handle live-through blocks. The relevant live-through blocks are stored in
957   // the ActiveBlocks list with each candidate. We need to filter out
958   // duplicates.
959   BitVector Todo = SA->getThroughBlocks();
960   for (unsigned UsedCand : UsedCands) {
961     ArrayRef<unsigned> Blocks = GlobalCand[UsedCand].ActiveBlocks;
962     for (unsigned Number : Blocks) {
963       if (!Todo.test(Number))
964         continue;
965       Todo.reset(Number);
966 
967       unsigned IntvIn = 0, IntvOut = 0;
968       SlotIndex IntfIn, IntfOut;
969 
970       unsigned CandIn = BundleCand[Bundles->getBundle(Number, false)];
971       if (CandIn != NoCand) {
972         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
973         IntvIn = Cand.IntvIdx;
974         Cand.Intf.moveToBlock(Number);
975         IntfIn = Cand.Intf.first();
976       }
977 
978       unsigned CandOut = BundleCand[Bundles->getBundle(Number, true)];
979       if (CandOut != NoCand) {
980         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
981         IntvOut = Cand.IntvIdx;
982         Cand.Intf.moveToBlock(Number);
983         IntfOut = Cand.Intf.last();
984       }
985       if (!IntvIn && !IntvOut)
986         continue;
987       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
988     }
989   }
990 
991   ++NumGlobalSplits;
992 
993   SmallVector<unsigned, 8> IntvMap;
994   SE->finish(&IntvMap);
995   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
996 
997   unsigned OrigBlocks = SA->getNumLiveBlocks();
998 
999   // Sort out the new intervals created by splitting. We get four kinds:
1000   // - Remainder intervals should not be split again.
1001   // - Candidate intervals can be assigned to Cand.PhysReg.
1002   // - Block-local splits are candidates for local splitting.
1003   // - DCE leftovers should go back on the queue.
1004   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1005     const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
1006 
1007     // Ignore old intervals from DCE.
1008     if (ExtraInfo->getOrInitStage(Reg.reg()) != RS_New)
1009       continue;
1010 
1011     // Remainder interval. Don't try splitting again, spill if it doesn't
1012     // allocate.
1013     if (IntvMap[I] == 0) {
1014       ExtraInfo->setStage(Reg, RS_Spill);
1015       continue;
1016     }
1017 
1018     // Global intervals. Allow repeated splitting as long as the number of live
1019     // blocks is strictly decreasing.
1020     if (IntvMap[I] < NumGlobalIntvs) {
1021       if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1022         LLVM_DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1023                           << " blocks as original.\n");
1024         // Don't allow repeated splitting as a safe guard against looping.
1025         ExtraInfo->setStage(Reg, RS_Split2);
1026       }
1027       continue;
1028     }
1029 
1030     // Other intervals are treated as new. This includes local intervals created
1031     // for blocks with multiple uses, and anything created by DCE.
1032   }
1033 
1034   if (VerifyEnabled)
1035     MF->verify(this, "After splitting live range around region");
1036 }
1037 
1038 MCRegister RAGreedy::tryRegionSplit(const LiveInterval &VirtReg,
1039                                     AllocationOrder &Order,
1040                                     SmallVectorImpl<Register> &NewVRegs) {
1041   if (!TRI->shouldRegionSplitForVirtReg(*MF, VirtReg))
1042     return MCRegister::NoRegister;
1043   unsigned NumCands = 0;
1044   BlockFrequency SpillCost = calcSpillCost();
1045   BlockFrequency BestCost;
1046 
1047   // Check if we can split this live range around a compact region.
1048   bool HasCompact = calcCompactRegion(GlobalCand.front());
1049   if (HasCompact) {
1050     // Yes, keep GlobalCand[0] as the compact region candidate.
1051     NumCands = 1;
1052     BestCost = BlockFrequency::getMaxFrequency();
1053   } else {
1054     // No benefit from the compact region, our fallback will be per-block
1055     // splitting. Make sure we find a solution that is cheaper than spilling.
1056     BestCost = SpillCost;
1057     LLVM_DEBUG(dbgs() << "Cost of isolating all blocks = ";
1058                MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1059   }
1060 
1061   unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
1062                                                NumCands, false /*IgnoreCSR*/);
1063 
1064   // No solutions found, fall back to single block splitting.
1065   if (!HasCompact && BestCand == NoCand)
1066     return MCRegister::NoRegister;
1067 
1068   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1069 }
1070 
1071 unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg,
1072                                             AllocationOrder &Order,
1073                                             BlockFrequency &BestCost,
1074                                             unsigned &NumCands,
1075                                             bool IgnoreCSR) {
1076   unsigned BestCand = NoCand;
1077   for (MCPhysReg PhysReg : Order) {
1078     assert(PhysReg);
1079     if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg))
1080       continue;
1081 
1082     // Discard bad candidates before we run out of interference cache cursors.
1083     // This will only affect register classes with a lot of registers (>32).
1084     if (NumCands == IntfCache.getMaxCursors()) {
1085       unsigned WorstCount = ~0u;
1086       unsigned Worst = 0;
1087       for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) {
1088         if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg)
1089           continue;
1090         unsigned Count = GlobalCand[CandIndex].LiveBundles.count();
1091         if (Count < WorstCount) {
1092           Worst = CandIndex;
1093           WorstCount = Count;
1094         }
1095       }
1096       --NumCands;
1097       GlobalCand[Worst] = GlobalCand[NumCands];
1098       if (BestCand == NumCands)
1099         BestCand = Worst;
1100     }
1101 
1102     if (GlobalCand.size() <= NumCands)
1103       GlobalCand.resize(NumCands+1);
1104     GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1105     Cand.reset(IntfCache, PhysReg);
1106 
1107     SpillPlacer->prepare(Cand.LiveBundles);
1108     BlockFrequency Cost;
1109     if (!addSplitConstraints(Cand.Intf, Cost)) {
1110       LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n");
1111       continue;
1112     }
1113     LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = ";
1114                MBFI->printBlockFreq(dbgs(), Cost));
1115     if (Cost >= BestCost) {
1116       LLVM_DEBUG({
1117         if (BestCand == NoCand)
1118           dbgs() << " worse than no bundles\n";
1119         else
1120           dbgs() << " worse than "
1121                  << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1122       });
1123       continue;
1124     }
1125     if (!growRegion(Cand)) {
1126       LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n");
1127       continue;
1128     }
1129 
1130     SpillPlacer->finish();
1131 
1132     // No live bundles, defer to splitSingleBlocks().
1133     if (!Cand.LiveBundles.any()) {
1134       LLVM_DEBUG(dbgs() << " no bundles.\n");
1135       continue;
1136     }
1137 
1138     Cost += calcGlobalSplitCost(Cand, Order);
1139     LLVM_DEBUG({
1140       dbgs() << ", total = ";
1141       MBFI->printBlockFreq(dbgs(), Cost) << " with bundles";
1142       for (int I : Cand.LiveBundles.set_bits())
1143         dbgs() << " EB#" << I;
1144       dbgs() << ".\n";
1145     });
1146     if (Cost < BestCost) {
1147       BestCand = NumCands;
1148       BestCost = Cost;
1149     }
1150     ++NumCands;
1151   }
1152 
1153   return BestCand;
1154 }
1155 
1156 unsigned RAGreedy::doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand,
1157                                  bool HasCompact,
1158                                  SmallVectorImpl<Register> &NewVRegs) {
1159   SmallVector<unsigned, 8> UsedCands;
1160   // Prepare split editor.
1161   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1162   SE->reset(LREdit, SplitSpillMode);
1163 
1164   // Assign all edge bundles to the preferred candidate, or NoCand.
1165   BundleCand.assign(Bundles->getNumBundles(), NoCand);
1166 
1167   // Assign bundles for the best candidate region.
1168   if (BestCand != NoCand) {
1169     GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1170     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1171       UsedCands.push_back(BestCand);
1172       Cand.IntvIdx = SE->openIntv();
1173       LLVM_DEBUG(dbgs() << "Split for " << printReg(Cand.PhysReg, TRI) << " in "
1174                         << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1175       (void)B;
1176     }
1177   }
1178 
1179   // Assign bundles for the compact region.
1180   if (HasCompact) {
1181     GlobalSplitCandidate &Cand = GlobalCand.front();
1182     assert(!Cand.PhysReg && "Compact region has no physreg");
1183     if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1184       UsedCands.push_back(0);
1185       Cand.IntvIdx = SE->openIntv();
1186       LLVM_DEBUG(dbgs() << "Split for compact region in " << B
1187                         << " bundles, intv " << Cand.IntvIdx << ".\n");
1188       (void)B;
1189     }
1190   }
1191 
1192   splitAroundRegion(LREdit, UsedCands);
1193   return 0;
1194 }
1195 
1196 //===----------------------------------------------------------------------===//
1197 //                            Per-Block Splitting
1198 //===----------------------------------------------------------------------===//
1199 
1200 /// tryBlockSplit - Split a global live range around every block with uses. This
1201 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1202 /// they don't allocate.
1203 unsigned RAGreedy::tryBlockSplit(const LiveInterval &VirtReg,
1204                                  AllocationOrder &Order,
1205                                  SmallVectorImpl<Register> &NewVRegs) {
1206   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1207   Register Reg = VirtReg.reg();
1208   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1209   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1210   SE->reset(LREdit, SplitSpillMode);
1211   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1212   for (const SplitAnalysis::BlockInfo &BI : UseBlocks) {
1213     if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1214       SE->splitSingleBlock(BI);
1215   }
1216   // No blocks were split.
1217   if (LREdit.empty())
1218     return 0;
1219 
1220   // We did split for some blocks.
1221   SmallVector<unsigned, 8> IntvMap;
1222   SE->finish(&IntvMap);
1223 
1224   // Tell LiveDebugVariables about the new ranges.
1225   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1226 
1227   // Sort out the new intervals created by splitting. The remainder interval
1228   // goes straight to spilling, the new local ranges get to stay RS_New.
1229   for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
1230     const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
1231     if (ExtraInfo->getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
1232       ExtraInfo->setStage(LI, RS_Spill);
1233   }
1234 
1235   if (VerifyEnabled)
1236     MF->verify(this, "After splitting live range around basic blocks");
1237   return 0;
1238 }
1239 
1240 //===----------------------------------------------------------------------===//
1241 //                         Per-Instruction Splitting
1242 //===----------------------------------------------------------------------===//
1243 
1244 /// Get the number of allocatable registers that match the constraints of \p Reg
1245 /// on \p MI and that are also in \p SuperRC.
1246 static unsigned getNumAllocatableRegsForConstraints(
1247     const MachineInstr *MI, Register Reg, const TargetRegisterClass *SuperRC,
1248     const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1249     const RegisterClassInfo &RCI) {
1250   assert(SuperRC && "Invalid register class");
1251 
1252   const TargetRegisterClass *ConstrainedRC =
1253       MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1254                                              /* ExploreBundle */ true);
1255   if (!ConstrainedRC)
1256     return 0;
1257   return RCI.getNumAllocatableRegs(ConstrainedRC);
1258 }
1259 
1260 static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
1261                                        const TargetRegisterInfo &TRI,
1262                                        const MachineInstr &MI, Register Reg) {
1263   LaneBitmask Mask;
1264   for (const MachineOperand &MO : MI.operands()) {
1265     if (!MO.isReg() || MO.getReg() != Reg)
1266       continue;
1267 
1268     unsigned SubReg = MO.getSubReg();
1269     if (SubReg == 0 && MO.isUse()) {
1270       Mask |= MRI.getMaxLaneMaskForVReg(Reg);
1271       continue;
1272     }
1273 
1274     LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
1275     if (MO.isDef()) {
1276       if (!MO.isUndef())
1277         Mask |= ~SubRegMask;
1278     } else
1279       Mask |= SubRegMask;
1280   }
1281 
1282   return Mask;
1283 }
1284 
1285 /// Return true if \p MI at \P Use reads a subset of the lanes live in \p
1286 /// VirtReg.
1287 static bool readsLaneSubset(const MachineRegisterInfo &MRI,
1288                             const MachineInstr *MI, const LiveInterval &VirtReg,
1289                             const TargetRegisterInfo *TRI, SlotIndex Use) {
1290   // Early check the common case.
1291   if (MI->isCopy() &&
1292       MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
1293     return false;
1294 
1295   // FIXME: We're only considering uses, but should be consider defs too?
1296   LaneBitmask ReadMask = getInstReadLaneMask(MRI, *TRI, *MI, VirtReg.reg());
1297 
1298   LaneBitmask LiveAtMask;
1299   for (const LiveInterval::SubRange &S : VirtReg.subranges()) {
1300     if (S.liveAt(Use))
1301       LiveAtMask |= S.LaneMask;
1302   }
1303 
1304   // If the live lanes aren't different from the lanes used by the instruction,
1305   // this doesn't help.
1306   return (ReadMask & ~(LiveAtMask & TRI->getCoveringLanes())).any();
1307 }
1308 
1309 /// tryInstructionSplit - Split a live range around individual instructions.
1310 /// This is normally not worthwhile since the spiller is doing essentially the
1311 /// same thing. However, when the live range is in a constrained register
1312 /// class, it may help to insert copies such that parts of the live range can
1313 /// be moved to a larger register class.
1314 ///
1315 /// This is similar to spilling to a larger register class.
1316 unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
1317                                        AllocationOrder &Order,
1318                                        SmallVectorImpl<Register> &NewVRegs) {
1319   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1320   // There is no point to this if there are no larger sub-classes.
1321 
1322   bool SplitSubClass = true;
1323   if (!RegClassInfo.isProperSubClass(CurRC)) {
1324     if (!VirtReg.hasSubRanges())
1325       return 0;
1326     SplitSubClass = false;
1327   }
1328 
1329   // Always enable split spill mode, since we're effectively spilling to a
1330   // register.
1331   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1332   SE->reset(LREdit, SplitEditor::SM_Size);
1333 
1334   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1335   if (Uses.size() <= 1)
1336     return 0;
1337 
1338   LLVM_DEBUG(dbgs() << "Split around " << Uses.size()
1339                     << " individual instrs.\n");
1340 
1341   const TargetRegisterClass *SuperRC =
1342       TRI->getLargestLegalSuperClass(CurRC, *MF);
1343   unsigned SuperRCNumAllocatableRegs =
1344       RegClassInfo.getNumAllocatableRegs(SuperRC);
1345   // Split around every non-copy instruction if this split will relax
1346   // the constraints on the virtual register.
1347   // Otherwise, splitting just inserts uncoalescable copies that do not help
1348   // the allocation.
1349   for (const SlotIndex Use : Uses) {
1350     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
1351       if (MI->isFullCopy() ||
1352           (SplitSubClass &&
1353            SuperRCNumAllocatableRegs ==
1354                getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
1355                                                    TII, TRI, RegClassInfo)) ||
1356           // TODO: Handle split for subranges with subclass constraints?
1357           (!SplitSubClass && VirtReg.hasSubRanges() &&
1358            !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
1359         LLVM_DEBUG(dbgs() << "    skip:\t" << Use << '\t' << *MI);
1360         continue;
1361       }
1362     }
1363     SE->openIntv();
1364     SlotIndex SegStart = SE->enterIntvBefore(Use);
1365     SlotIndex SegStop = SE->leaveIntvAfter(Use);
1366     SE->useIntv(SegStart, SegStop);
1367   }
1368 
1369   if (LREdit.empty()) {
1370     LLVM_DEBUG(dbgs() << "All uses were copies.\n");
1371     return 0;
1372   }
1373 
1374   SmallVector<unsigned, 8> IntvMap;
1375   SE->finish(&IntvMap);
1376   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1377   // Assign all new registers to RS_Spill. This was the last chance.
1378   ExtraInfo->setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1379   return 0;
1380 }
1381 
1382 //===----------------------------------------------------------------------===//
1383 //                             Local Splitting
1384 //===----------------------------------------------------------------------===//
1385 
1386 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1387 /// in order to use PhysReg between two entries in SA->UseSlots.
1388 ///
1389 /// GapWeight[I] represents the gap between UseSlots[I] and UseSlots[I + 1].
1390 ///
1391 void RAGreedy::calcGapWeights(MCRegister PhysReg,
1392                               SmallVectorImpl<float> &GapWeight) {
1393   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1394   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1395   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1396   const unsigned NumGaps = Uses.size()-1;
1397 
1398   // Start and end points for the interference check.
1399   SlotIndex StartIdx =
1400     BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1401   SlotIndex StopIdx =
1402     BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1403 
1404   GapWeight.assign(NumGaps, 0.0f);
1405 
1406   // Add interference from each overlapping register.
1407   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1408     if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1409           .checkInterference())
1410       continue;
1411 
1412     // We know that VirtReg is a continuous interval from FirstInstr to
1413     // LastInstr, so we don't need InterferenceQuery.
1414     //
1415     // Interference that overlaps an instruction is counted in both gaps
1416     // surrounding the instruction. The exception is interference before
1417     // StartIdx and after StopIdx.
1418     //
1419     LiveIntervalUnion::SegmentIter IntI =
1420       Matrix->getLiveUnions()[*Units] .find(StartIdx);
1421     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1422       // Skip the gaps before IntI.
1423       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1424         if (++Gap == NumGaps)
1425           break;
1426       if (Gap == NumGaps)
1427         break;
1428 
1429       // Update the gaps covered by IntI.
1430       const float weight = IntI.value()->weight();
1431       for (; Gap != NumGaps; ++Gap) {
1432         GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1433         if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1434           break;
1435       }
1436       if (Gap == NumGaps)
1437         break;
1438     }
1439   }
1440 
1441   // Add fixed interference.
1442   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1443     const LiveRange &LR = LIS->getRegUnit(*Units);
1444     LiveRange::const_iterator I = LR.find(StartIdx);
1445     LiveRange::const_iterator E = LR.end();
1446 
1447     // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1448     for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1449       while (Uses[Gap+1].getBoundaryIndex() < I->start)
1450         if (++Gap == NumGaps)
1451           break;
1452       if (Gap == NumGaps)
1453         break;
1454 
1455       for (; Gap != NumGaps; ++Gap) {
1456         GapWeight[Gap] = huge_valf;
1457         if (Uses[Gap+1].getBaseIndex() >= I->end)
1458           break;
1459       }
1460       if (Gap == NumGaps)
1461         break;
1462     }
1463   }
1464 }
1465 
1466 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1467 /// basic block.
1468 ///
1469 unsigned RAGreedy::tryLocalSplit(const LiveInterval &VirtReg,
1470                                  AllocationOrder &Order,
1471                                  SmallVectorImpl<Register> &NewVRegs) {
1472   // TODO: the function currently only handles a single UseBlock; it should be
1473   // possible to generalize.
1474   if (SA->getUseBlocks().size() != 1)
1475     return 0;
1476 
1477   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1478 
1479   // Note that it is possible to have an interval that is live-in or live-out
1480   // while only covering a single block - A phi-def can use undef values from
1481   // predecessors, and the block could be a single-block loop.
1482   // We don't bother doing anything clever about such a case, we simply assume
1483   // that the interval is continuous from FirstInstr to LastInstr. We should
1484   // make sure that we don't do anything illegal to such an interval, though.
1485 
1486   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1487   if (Uses.size() <= 2)
1488     return 0;
1489   const unsigned NumGaps = Uses.size()-1;
1490 
1491   LLVM_DEBUG({
1492     dbgs() << "tryLocalSplit: ";
1493     for (const auto &Use : Uses)
1494       dbgs() << ' ' << Use;
1495     dbgs() << '\n';
1496   });
1497 
1498   // If VirtReg is live across any register mask operands, compute a list of
1499   // gaps with register masks.
1500   SmallVector<unsigned, 8> RegMaskGaps;
1501   if (Matrix->checkRegMaskInterference(VirtReg)) {
1502     // Get regmask slots for the whole block.
1503     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1504     LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1505     // Constrain to VirtReg's live range.
1506     unsigned RI =
1507         llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
1508     unsigned RE = RMS.size();
1509     for (unsigned I = 0; I != NumGaps && RI != RE; ++I) {
1510       // Look for Uses[I] <= RMS <= Uses[I + 1].
1511       assert(!SlotIndex::isEarlierInstr(RMS[RI], Uses[I]));
1512       if (SlotIndex::isEarlierInstr(Uses[I + 1], RMS[RI]))
1513         continue;
1514       // Skip a regmask on the same instruction as the last use. It doesn't
1515       // overlap the live range.
1516       if (SlotIndex::isSameInstr(Uses[I + 1], RMS[RI]) && I + 1 == NumGaps)
1517         break;
1518       LLVM_DEBUG(dbgs() << ' ' << RMS[RI] << ':' << Uses[I] << '-'
1519                         << Uses[I + 1]);
1520       RegMaskGaps.push_back(I);
1521       // Advance ri to the next gap. A regmask on one of the uses counts in
1522       // both gaps.
1523       while (RI != RE && SlotIndex::isEarlierInstr(RMS[RI], Uses[I + 1]))
1524         ++RI;
1525     }
1526     LLVM_DEBUG(dbgs() << '\n');
1527   }
1528 
1529   // Since we allow local split results to be split again, there is a risk of
1530   // creating infinite loops. It is tempting to require that the new live
1531   // ranges have less instructions than the original. That would guarantee
1532   // convergence, but it is too strict. A live range with 3 instructions can be
1533   // split 2+3 (including the COPY), and we want to allow that.
1534   //
1535   // Instead we use these rules:
1536   //
1537   // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1538   //    noop split, of course).
1539   // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1540   //    the new ranges must have fewer instructions than before the split.
1541   // 3. New ranges with the same number of instructions are marked RS_Split2,
1542   //    smaller ranges are marked RS_New.
1543   //
1544   // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1545   // excessive splitting and infinite loops.
1546   //
1547   bool ProgressRequired = ExtraInfo->getStage(VirtReg) >= RS_Split2;
1548 
1549   // Best split candidate.
1550   unsigned BestBefore = NumGaps;
1551   unsigned BestAfter = 0;
1552   float BestDiff = 0;
1553 
1554   const float blockFreq =
1555     SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1556     (1.0f / MBFI->getEntryFreq());
1557   SmallVector<float, 8> GapWeight;
1558 
1559   for (MCPhysReg PhysReg : Order) {
1560     assert(PhysReg);
1561     // Keep track of the largest spill weight that would need to be evicted in
1562     // order to make use of PhysReg between UseSlots[I] and UseSlots[I + 1].
1563     calcGapWeights(PhysReg, GapWeight);
1564 
1565     // Remove any gaps with regmask clobbers.
1566     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1567       for (unsigned I = 0, E = RegMaskGaps.size(); I != E; ++I)
1568         GapWeight[RegMaskGaps[I]] = huge_valf;
1569 
1570     // Try to find the best sequence of gaps to close.
1571     // The new spill weight must be larger than any gap interference.
1572 
1573     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1574     unsigned SplitBefore = 0, SplitAfter = 1;
1575 
1576     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1577     // It is the spill weight that needs to be evicted.
1578     float MaxGap = GapWeight[0];
1579 
1580     while (true) {
1581       // Live before/after split?
1582       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1583       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1584 
1585       LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << ' ' << Uses[SplitBefore]
1586                         << '-' << Uses[SplitAfter] << " I=" << MaxGap);
1587 
1588       // Stop before the interval gets so big we wouldn't be making progress.
1589       if (!LiveBefore && !LiveAfter) {
1590         LLVM_DEBUG(dbgs() << " all\n");
1591         break;
1592       }
1593       // Should the interval be extended or shrunk?
1594       bool Shrink = true;
1595 
1596       // How many gaps would the new range have?
1597       unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1598 
1599       // Legally, without causing looping?
1600       bool Legal = !ProgressRequired || NewGaps < NumGaps;
1601 
1602       if (Legal && MaxGap < huge_valf) {
1603         // Estimate the new spill weight. Each instruction reads or writes the
1604         // register. Conservatively assume there are no read-modify-write
1605         // instructions.
1606         //
1607         // Try to guess the size of the new interval.
1608         const float EstWeight = normalizeSpillWeight(
1609             blockFreq * (NewGaps + 1),
1610             Uses[SplitBefore].distance(Uses[SplitAfter]) +
1611                 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1612             1);
1613         // Would this split be possible to allocate?
1614         // Never allocate all gaps, we wouldn't be making progress.
1615         LLVM_DEBUG(dbgs() << " w=" << EstWeight);
1616         if (EstWeight * Hysteresis >= MaxGap) {
1617           Shrink = false;
1618           float Diff = EstWeight - MaxGap;
1619           if (Diff > BestDiff) {
1620             LLVM_DEBUG(dbgs() << " (best)");
1621             BestDiff = Hysteresis * Diff;
1622             BestBefore = SplitBefore;
1623             BestAfter = SplitAfter;
1624           }
1625         }
1626       }
1627 
1628       // Try to shrink.
1629       if (Shrink) {
1630         if (++SplitBefore < SplitAfter) {
1631           LLVM_DEBUG(dbgs() << " shrink\n");
1632           // Recompute the max when necessary.
1633           if (GapWeight[SplitBefore - 1] >= MaxGap) {
1634             MaxGap = GapWeight[SplitBefore];
1635             for (unsigned I = SplitBefore + 1; I != SplitAfter; ++I)
1636               MaxGap = std::max(MaxGap, GapWeight[I]);
1637           }
1638           continue;
1639         }
1640         MaxGap = 0;
1641       }
1642 
1643       // Try to extend the interval.
1644       if (SplitAfter >= NumGaps) {
1645         LLVM_DEBUG(dbgs() << " end\n");
1646         break;
1647       }
1648 
1649       LLVM_DEBUG(dbgs() << " extend\n");
1650       MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1651     }
1652   }
1653 
1654   // Didn't find any candidates?
1655   if (BestBefore == NumGaps)
1656     return 0;
1657 
1658   LLVM_DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] << '-'
1659                     << Uses[BestAfter] << ", " << BestDiff << ", "
1660                     << (BestAfter - BestBefore + 1) << " instrs\n");
1661 
1662   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
1663   SE->reset(LREdit);
1664 
1665   SE->openIntv();
1666   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1667   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
1668   SE->useIntv(SegStart, SegStop);
1669   SmallVector<unsigned, 8> IntvMap;
1670   SE->finish(&IntvMap);
1671   DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
1672   // If the new range has the same number of instructions as before, mark it as
1673   // RS_Split2 so the next split will be forced to make progress. Otherwise,
1674   // leave the new intervals as RS_New so they can compete.
1675   bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1676   bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1677   unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1678   if (NewGaps >= NumGaps) {
1679     LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
1680     assert(!ProgressRequired && "Didn't make progress when it was required.");
1681     for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
1682       if (IntvMap[I] == 1) {
1683         ExtraInfo->setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
1684         LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
1685       }
1686     LLVM_DEBUG(dbgs() << '\n');
1687   }
1688   ++NumLocalSplits;
1689 
1690   return 0;
1691 }
1692 
1693 //===----------------------------------------------------------------------===//
1694 //                          Live Range Splitting
1695 //===----------------------------------------------------------------------===//
1696 
1697 /// trySplit - Try to split VirtReg or one of its interferences, making it
1698 /// assignable.
1699 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
1700 unsigned RAGreedy::trySplit(const LiveInterval &VirtReg, AllocationOrder &Order,
1701                             SmallVectorImpl<Register> &NewVRegs,
1702                             const SmallVirtRegSet &FixedRegisters) {
1703   // Ranges must be Split2 or less.
1704   if (ExtraInfo->getStage(VirtReg) >= RS_Spill)
1705     return 0;
1706 
1707   // Local intervals are handled separately.
1708   if (LIS->intervalIsInOneMBB(VirtReg)) {
1709     NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
1710                        TimerGroupDescription, TimePassesIsEnabled);
1711     SA->analyze(&VirtReg);
1712     Register PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1713     if (PhysReg || !NewVRegs.empty())
1714       return PhysReg;
1715     return tryInstructionSplit(VirtReg, Order, NewVRegs);
1716   }
1717 
1718   NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
1719                      TimerGroupDescription, TimePassesIsEnabled);
1720 
1721   SA->analyze(&VirtReg);
1722 
1723   // First try to split around a region spanning multiple blocks. RS_Split2
1724   // ranges already made dubious progress with region splitting, so they go
1725   // straight to single block splitting.
1726   if (ExtraInfo->getStage(VirtReg) < RS_Split2) {
1727     MCRegister PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1728     if (PhysReg || !NewVRegs.empty())
1729       return PhysReg;
1730   }
1731 
1732   // Then isolate blocks.
1733   return tryBlockSplit(VirtReg, Order, NewVRegs);
1734 }
1735 
1736 //===----------------------------------------------------------------------===//
1737 //                          Last Chance Recoloring
1738 //===----------------------------------------------------------------------===//
1739 
1740 /// Return true if \p reg has any tied def operand.
1741 static bool hasTiedDef(MachineRegisterInfo *MRI, unsigned reg) {
1742   for (const MachineOperand &MO : MRI->def_operands(reg))
1743     if (MO.isTied())
1744       return true;
1745 
1746   return false;
1747 }
1748 
1749 /// Return true if the existing assignment of \p Intf overlaps, but is not the
1750 /// same, as \p PhysReg.
1751 static bool assignedRegPartiallyOverlaps(const TargetRegisterInfo &TRI,
1752                                          const VirtRegMap &VRM,
1753                                          MCRegister PhysReg,
1754                                          const LiveInterval &Intf) {
1755   MCRegister AssignedReg = VRM.getPhys(Intf.reg());
1756   if (PhysReg == AssignedReg)
1757     return false;
1758   return TRI.regsOverlap(PhysReg, AssignedReg);
1759 }
1760 
1761 /// mayRecolorAllInterferences - Check if the virtual registers that
1762 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1763 /// recolored to free \p PhysReg.
1764 /// When true is returned, \p RecoloringCandidates has been augmented with all
1765 /// the live intervals that need to be recolored in order to free \p PhysReg
1766 /// for \p VirtReg.
1767 /// \p FixedRegisters contains all the virtual registers that cannot be
1768 /// recolored.
1769 bool RAGreedy::mayRecolorAllInterferences(
1770     MCRegister PhysReg, const LiveInterval &VirtReg,
1771     SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
1772   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
1773 
1774   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1775     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1776     // If there is LastChanceRecoloringMaxInterference or more interferences,
1777     // chances are one would not be recolorable.
1778     if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
1779             LastChanceRecoloringMaxInterference &&
1780         !ExhaustiveSearch) {
1781       LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
1782       CutOffInfo |= CO_Interf;
1783       return false;
1784     }
1785     for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
1786       // If Intf is done and sits on the same register class as VirtReg, it
1787       // would not be recolorable as it is in the same state as
1788       // VirtReg. However there are at least two exceptions.
1789       //
1790       // If VirtReg has tied defs and Intf doesn't, then
1791       // there is still a point in examining if it can be recolorable.
1792       //
1793       // Additionally, if the register class has overlapping tuple members, it
1794       // may still be recolorable using a different tuple. This is more likely
1795       // if the existing assignment aliases with the candidate.
1796       //
1797       if (((ExtraInfo->getStage(*Intf) == RS_Done &&
1798             MRI->getRegClass(Intf->reg()) == CurRC &&
1799             !assignedRegPartiallyOverlaps(*TRI, *VRM, PhysReg, *Intf)) &&
1800            !(hasTiedDef(MRI, VirtReg.reg()) &&
1801              !hasTiedDef(MRI, Intf->reg()))) ||
1802           FixedRegisters.count(Intf->reg())) {
1803         LLVM_DEBUG(
1804             dbgs() << "Early abort: the interference is not recolorable.\n");
1805         return false;
1806       }
1807       RecoloringCandidates.insert(Intf);
1808     }
1809   }
1810   return true;
1811 }
1812 
1813 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
1814 /// its interferences.
1815 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
1816 /// virtual register that was using it. The recoloring process may recursively
1817 /// use the last chance recoloring. Therefore, when a virtual register has been
1818 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
1819 /// be last-chance-recolored again during this recoloring "session".
1820 /// E.g.,
1821 /// Let
1822 /// vA can use {R1, R2    }
1823 /// vB can use {    R2, R3}
1824 /// vC can use {R1        }
1825 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
1826 /// instance) and they all interfere.
1827 ///
1828 /// vA is assigned R1
1829 /// vB is assigned R2
1830 /// vC tries to evict vA but vA is already done.
1831 /// Regular register allocation fails.
1832 ///
1833 /// Last chance recoloring kicks in:
1834 /// vC does as if vA was evicted => vC uses R1.
1835 /// vC is marked as fixed.
1836 /// vA needs to find a color.
1837 /// None are available.
1838 /// vA cannot evict vC: vC is a fixed virtual register now.
1839 /// vA does as if vB was evicted => vA uses R2.
1840 /// vB needs to find a color.
1841 /// R3 is available.
1842 /// Recoloring => vC = R1, vA = R2, vB = R3
1843 ///
1844 /// \p Order defines the preferred allocation order for \p VirtReg.
1845 /// \p NewRegs will contain any new virtual register that have been created
1846 /// (split, spill) during the process and that must be assigned.
1847 /// \p FixedRegisters contains all the virtual registers that cannot be
1848 /// recolored.
1849 ///
1850 /// \p RecolorStack tracks the original assignments of successfully recolored
1851 /// registers.
1852 ///
1853 /// \p Depth gives the current depth of the last chance recoloring.
1854 /// \return a physical register that can be used for VirtReg or ~0u if none
1855 /// exists.
1856 unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
1857                                            AllocationOrder &Order,
1858                                            SmallVectorImpl<Register> &NewVRegs,
1859                                            SmallVirtRegSet &FixedRegisters,
1860                                            RecoloringStack &RecolorStack,
1861                                            unsigned Depth) {
1862   if (!TRI->shouldUseLastChanceRecoloringForVirtReg(*MF, VirtReg))
1863     return ~0u;
1864 
1865   LLVM_DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
1866 
1867   const ssize_t EntryStackSize = RecolorStack.size();
1868 
1869   // Ranges must be Done.
1870   assert((ExtraInfo->getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
1871          "Last chance recoloring should really be last chance");
1872   // Set the max depth to LastChanceRecoloringMaxDepth.
1873   // We may want to reconsider that if we end up with a too large search space
1874   // for target with hundreds of registers.
1875   // Indeed, in that case we may want to cut the search space earlier.
1876   if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
1877     LLVM_DEBUG(dbgs() << "Abort because max depth has been reached.\n");
1878     CutOffInfo |= CO_Depth;
1879     return ~0u;
1880   }
1881 
1882   // Set of Live intervals that will need to be recolored.
1883   SmallLISet RecoloringCandidates;
1884 
1885   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
1886   // this recoloring "session".
1887   assert(!FixedRegisters.count(VirtReg.reg()));
1888   FixedRegisters.insert(VirtReg.reg());
1889   SmallVector<Register, 4> CurrentNewVRegs;
1890 
1891   for (MCRegister PhysReg : Order) {
1892     assert(PhysReg.isValid());
1893     LLVM_DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
1894                       << printReg(PhysReg, TRI) << '\n');
1895     RecoloringCandidates.clear();
1896     CurrentNewVRegs.clear();
1897 
1898     // It is only possible to recolor virtual register interference.
1899     if (Matrix->checkInterference(VirtReg, PhysReg) >
1900         LiveRegMatrix::IK_VirtReg) {
1901       LLVM_DEBUG(
1902           dbgs() << "Some interferences are not with virtual registers.\n");
1903 
1904       continue;
1905     }
1906 
1907     // Early give up on this PhysReg if it is obvious we cannot recolor all
1908     // the interferences.
1909     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
1910                                     FixedRegisters)) {
1911       LLVM_DEBUG(dbgs() << "Some interferences cannot be recolored.\n");
1912       continue;
1913     }
1914 
1915     // RecoloringCandidates contains all the virtual registers that interfere
1916     // with VirtReg on PhysReg (or one of its aliases). Enqueue them for
1917     // recoloring and perform the actual recoloring.
1918     PQueue RecoloringQueue;
1919     for (const LiveInterval *RC : RecoloringCandidates) {
1920       Register ItVirtReg = RC->reg();
1921       enqueue(RecoloringQueue, RC);
1922       assert(VRM->hasPhys(ItVirtReg) &&
1923              "Interferences are supposed to be with allocated variables");
1924 
1925       // Record the current allocation.
1926       RecolorStack.push_back(std::make_pair(RC, VRM->getPhys(ItVirtReg)));
1927 
1928       // unset the related struct.
1929       Matrix->unassign(*RC);
1930     }
1931 
1932     // Do as if VirtReg was assigned to PhysReg so that the underlying
1933     // recoloring has the right information about the interferes and
1934     // available colors.
1935     Matrix->assign(VirtReg, PhysReg);
1936 
1937     // Save the current recoloring state.
1938     // If we cannot recolor all the interferences, we will have to start again
1939     // at this point for the next physical register.
1940     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
1941     if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
1942                                 FixedRegisters, RecolorStack, Depth)) {
1943       // Push the queued vregs into the main queue.
1944       for (Register NewVReg : CurrentNewVRegs)
1945         NewVRegs.push_back(NewVReg);
1946       // Do not mess up with the global assignment process.
1947       // I.e., VirtReg must be unassigned.
1948       Matrix->unassign(VirtReg);
1949       return PhysReg;
1950     }
1951 
1952     LLVM_DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
1953                       << printReg(PhysReg, TRI) << '\n');
1954 
1955     // The recoloring attempt failed, undo the changes.
1956     FixedRegisters = SaveFixedRegisters;
1957     Matrix->unassign(VirtReg);
1958 
1959     // For a newly created vreg which is also in RecoloringCandidates,
1960     // don't add it to NewVRegs because its physical register will be restored
1961     // below. Other vregs in CurrentNewVRegs are created by calling
1962     // selectOrSplit and should be added into NewVRegs.
1963     for (Register &R : CurrentNewVRegs) {
1964       if (RecoloringCandidates.count(&LIS->getInterval(R)))
1965         continue;
1966       NewVRegs.push_back(R);
1967     }
1968 
1969     // Roll back our unsuccessful recoloring. Also roll back any successful
1970     // recolorings in any recursive recoloring attempts, since it's possible
1971     // they would have introduced conflicts with assignments we will be
1972     // restoring further up the stack. Perform all unassignments prior to
1973     // reassigning, since sub-recolorings may have conflicted with the registers
1974     // we are going to restore to their original assignments.
1975     for (ssize_t I = RecolorStack.size() - 1; I >= EntryStackSize; --I) {
1976       const LiveInterval *LI;
1977       MCRegister PhysReg;
1978       std::tie(LI, PhysReg) = RecolorStack[I];
1979 
1980       if (VRM->hasPhys(LI->reg()))
1981         Matrix->unassign(*LI);
1982     }
1983 
1984     for (size_t I = EntryStackSize; I != RecolorStack.size(); ++I) {
1985       const LiveInterval *LI;
1986       MCRegister PhysReg;
1987       std::tie(LI, PhysReg) = RecolorStack[I];
1988       if (!LI->empty() && !MRI->reg_nodbg_empty(LI->reg()))
1989         Matrix->assign(*LI, PhysReg);
1990     }
1991 
1992     // Pop the stack of recoloring attempts.
1993     RecolorStack.resize(EntryStackSize);
1994   }
1995 
1996   // Last chance recoloring did not worked either, give up.
1997   return ~0u;
1998 }
1999 
2000 /// tryRecoloringCandidates - Try to assign a new color to every register
2001 /// in \RecoloringQueue.
2002 /// \p NewRegs will contain any new virtual register created during the
2003 /// recoloring process.
2004 /// \p FixedRegisters[in/out] contains all the registers that have been
2005 /// recolored.
2006 /// \return true if all virtual registers in RecoloringQueue were successfully
2007 /// recolored, false otherwise.
2008 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2009                                        SmallVectorImpl<Register> &NewVRegs,
2010                                        SmallVirtRegSet &FixedRegisters,
2011                                        RecoloringStack &RecolorStack,
2012                                        unsigned Depth) {
2013   while (!RecoloringQueue.empty()) {
2014     const LiveInterval *LI = dequeue(RecoloringQueue);
2015     LLVM_DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2016     MCRegister PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters,
2017                                            RecolorStack, Depth + 1);
2018     // When splitting happens, the live-range may actually be empty.
2019     // In that case, this is okay to continue the recoloring even
2020     // if we did not find an alternative color for it. Indeed,
2021     // there will not be anything to color for LI in the end.
2022     if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
2023       return false;
2024 
2025     if (!PhysReg) {
2026       assert(LI->empty() && "Only empty live-range do not require a register");
2027       LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2028                         << " succeeded. Empty LI.\n");
2029       continue;
2030     }
2031     LLVM_DEBUG(dbgs() << "Recoloring of " << *LI
2032                       << " succeeded with: " << printReg(PhysReg, TRI) << '\n');
2033 
2034     Matrix->assign(*LI, PhysReg);
2035     FixedRegisters.insert(LI->reg());
2036   }
2037   return true;
2038 }
2039 
2040 //===----------------------------------------------------------------------===//
2041 //                            Main Entry Point
2042 //===----------------------------------------------------------------------===//
2043 
2044 MCRegister RAGreedy::selectOrSplit(const LiveInterval &VirtReg,
2045                                    SmallVectorImpl<Register> &NewVRegs) {
2046   CutOffInfo = CO_None;
2047   LLVMContext &Ctx = MF->getFunction().getContext();
2048   SmallVirtRegSet FixedRegisters;
2049   RecoloringStack RecolorStack;
2050   MCRegister Reg =
2051       selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters, RecolorStack);
2052   if (Reg == ~0U && (CutOffInfo != CO_None)) {
2053     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2054     if (CutOffEncountered == CO_Depth)
2055       Ctx.emitError("register allocation failed: maximum depth for recoloring "
2056                     "reached. Use -fexhaustive-register-search to skip "
2057                     "cutoffs");
2058     else if (CutOffEncountered == CO_Interf)
2059       Ctx.emitError("register allocation failed: maximum interference for "
2060                     "recoloring reached. Use -fexhaustive-register-search "
2061                     "to skip cutoffs");
2062     else if (CutOffEncountered == (CO_Depth | CO_Interf))
2063       Ctx.emitError("register allocation failed: maximum interference and "
2064                     "depth for recoloring reached. Use "
2065                     "-fexhaustive-register-search to skip cutoffs");
2066   }
2067   return Reg;
2068 }
2069 
2070 /// Using a CSR for the first time has a cost because it causes push|pop
2071 /// to be added to prologue|epilogue. Splitting a cold section of the live
2072 /// range can have lower cost than using the CSR for the first time;
2073 /// Spilling a live range in the cold path can have lower cost than using
2074 /// the CSR for the first time. Returns the physical register if we decide
2075 /// to use the CSR; otherwise return 0.
2076 MCRegister RAGreedy::tryAssignCSRFirstTime(
2077     const LiveInterval &VirtReg, AllocationOrder &Order, MCRegister PhysReg,
2078     uint8_t &CostPerUseLimit, SmallVectorImpl<Register> &NewVRegs) {
2079   if (ExtraInfo->getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2080     // We choose spill over using the CSR for the first time if the spill cost
2081     // is lower than CSRCost.
2082     SA->analyze(&VirtReg);
2083     if (calcSpillCost() >= CSRCost)
2084       return PhysReg;
2085 
2086     // We are going to spill, set CostPerUseLimit to 1 to make sure that
2087     // we will not use a callee-saved register in tryEvict.
2088     CostPerUseLimit = 1;
2089     return 0;
2090   }
2091   if (ExtraInfo->getStage(VirtReg) < RS_Split) {
2092     // We choose pre-splitting over using the CSR for the first time if
2093     // the cost of splitting is lower than CSRCost.
2094     SA->analyze(&VirtReg);
2095     unsigned NumCands = 0;
2096     BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2097     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2098                                                  NumCands, true /*IgnoreCSR*/);
2099     if (BestCand == NoCand)
2100       // Use the CSR if we can't find a region split below CSRCost.
2101       return PhysReg;
2102 
2103     // Perform the actual pre-splitting.
2104     doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2105     return 0;
2106   }
2107   return PhysReg;
2108 }
2109 
2110 void RAGreedy::aboutToRemoveInterval(const LiveInterval &LI) {
2111   // Do not keep invalid information around.
2112   SetOfBrokenHints.remove(&LI);
2113 }
2114 
2115 void RAGreedy::initializeCSRCost() {
2116   // We use the larger one out of the command-line option and the value report
2117   // by TRI.
2118   CSRCost = BlockFrequency(
2119       std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2120   if (!CSRCost.getFrequency())
2121     return;
2122 
2123   // Raw cost is relative to Entry == 2^14; scale it appropriately.
2124   uint64_t ActualEntry = MBFI->getEntryFreq();
2125   if (!ActualEntry) {
2126     CSRCost = 0;
2127     return;
2128   }
2129   uint64_t FixedEntry = 1 << 14;
2130   if (ActualEntry < FixedEntry)
2131     CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2132   else if (ActualEntry <= UINT32_MAX)
2133     // Invert the fraction and divide.
2134     CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2135   else
2136     // Can't use BranchProbability in general, since it takes 32-bit numbers.
2137     CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2138 }
2139 
2140 /// Collect the hint info for \p Reg.
2141 /// The results are stored into \p Out.
2142 /// \p Out is not cleared before being populated.
2143 void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
2144   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2145     if (!Instr.isFullCopy())
2146       continue;
2147     // Look for the other end of the copy.
2148     Register OtherReg = Instr.getOperand(0).getReg();
2149     if (OtherReg == Reg) {
2150       OtherReg = Instr.getOperand(1).getReg();
2151       if (OtherReg == Reg)
2152         continue;
2153     }
2154     // Get the current assignment.
2155     MCRegister OtherPhysReg =
2156         OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
2157     // Push the collected information.
2158     Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2159                            OtherPhysReg));
2160   }
2161 }
2162 
2163 /// Using the given \p List, compute the cost of the broken hints if
2164 /// \p PhysReg was used.
2165 /// \return The cost of \p List for \p PhysReg.
2166 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2167                                            MCRegister PhysReg) {
2168   BlockFrequency Cost = 0;
2169   for (const HintInfo &Info : List) {
2170     if (Info.PhysReg != PhysReg)
2171       Cost += Info.Freq;
2172   }
2173   return Cost;
2174 }
2175 
2176 /// Using the register assigned to \p VirtReg, try to recolor
2177 /// all the live ranges that are copy-related with \p VirtReg.
2178 /// The recoloring is then propagated to all the live-ranges that have
2179 /// been recolored and so on, until no more copies can be coalesced or
2180 /// it is not profitable.
2181 /// For a given live range, profitability is determined by the sum of the
2182 /// frequencies of the non-identity copies it would introduce with the old
2183 /// and new register.
2184 void RAGreedy::tryHintRecoloring(const LiveInterval &VirtReg) {
2185   // We have a broken hint, check if it is possible to fix it by
2186   // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2187   // some register and PhysReg may be available for the other live-ranges.
2188   SmallSet<Register, 4> Visited;
2189   SmallVector<unsigned, 2> RecoloringCandidates;
2190   HintsInfo Info;
2191   Register Reg = VirtReg.reg();
2192   MCRegister PhysReg = VRM->getPhys(Reg);
2193   // Start the recoloring algorithm from the input live-interval, then
2194   // it will propagate to the ones that are copy-related with it.
2195   Visited.insert(Reg);
2196   RecoloringCandidates.push_back(Reg);
2197 
2198   LLVM_DEBUG(dbgs() << "Trying to reconcile hints for: " << printReg(Reg, TRI)
2199                     << '(' << printReg(PhysReg, TRI) << ")\n");
2200 
2201   do {
2202     Reg = RecoloringCandidates.pop_back_val();
2203 
2204     // We cannot recolor physical register.
2205     if (Reg.isPhysical())
2206       continue;
2207 
2208     // This may be a skipped class
2209     if (!VRM->hasPhys(Reg)) {
2210       assert(!ShouldAllocateClass(*TRI, *MRI->getRegClass(Reg)) &&
2211              "We have an unallocated variable which should have been handled");
2212       continue;
2213     }
2214 
2215     // Get the live interval mapped with this virtual register to be able
2216     // to check for the interference with the new color.
2217     LiveInterval &LI = LIS->getInterval(Reg);
2218     MCRegister CurrPhys = VRM->getPhys(Reg);
2219     // Check that the new color matches the register class constraints and
2220     // that it is free for this live range.
2221     if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2222                                 Matrix->checkInterference(LI, PhysReg)))
2223       continue;
2224 
2225     LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << '(' << printReg(CurrPhys, TRI)
2226                       << ") is recolorable.\n");
2227 
2228     // Gather the hint info.
2229     Info.clear();
2230     collectHintInfo(Reg, Info);
2231     // Check if recoloring the live-range will increase the cost of the
2232     // non-identity copies.
2233     if (CurrPhys != PhysReg) {
2234       LLVM_DEBUG(dbgs() << "Checking profitability:\n");
2235       BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2236       BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2237       LLVM_DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2238                         << "\nNew Cost: " << NewCopiesCost.getFrequency()
2239                         << '\n');
2240       if (OldCopiesCost < NewCopiesCost) {
2241         LLVM_DEBUG(dbgs() << "=> Not profitable.\n");
2242         continue;
2243       }
2244       // At this point, the cost is either cheaper or equal. If it is
2245       // equal, we consider this is profitable because it may expose
2246       // more recoloring opportunities.
2247       LLVM_DEBUG(dbgs() << "=> Profitable.\n");
2248       // Recolor the live-range.
2249       Matrix->unassign(LI);
2250       Matrix->assign(LI, PhysReg);
2251     }
2252     // Push all copy-related live-ranges to keep reconciling the broken
2253     // hints.
2254     for (const HintInfo &HI : Info) {
2255       if (Visited.insert(HI.Reg).second)
2256         RecoloringCandidates.push_back(HI.Reg);
2257     }
2258   } while (!RecoloringCandidates.empty());
2259 }
2260 
2261 /// Try to recolor broken hints.
2262 /// Broken hints may be repaired by recoloring when an evicted variable
2263 /// freed up a register for a larger live-range.
2264 /// Consider the following example:
2265 /// BB1:
2266 ///   a =
2267 ///   b =
2268 /// BB2:
2269 ///   ...
2270 ///   = b
2271 ///   = a
2272 /// Let us assume b gets split:
2273 /// BB1:
2274 ///   a =
2275 ///   b =
2276 /// BB2:
2277 ///   c = b
2278 ///   ...
2279 ///   d = c
2280 ///   = d
2281 ///   = a
2282 /// Because of how the allocation work, b, c, and d may be assigned different
2283 /// colors. Now, if a gets evicted later:
2284 /// BB1:
2285 ///   a =
2286 ///   st a, SpillSlot
2287 ///   b =
2288 /// BB2:
2289 ///   c = b
2290 ///   ...
2291 ///   d = c
2292 ///   = d
2293 ///   e = ld SpillSlot
2294 ///   = e
2295 /// This is likely that we can assign the same register for b, c, and d,
2296 /// getting rid of 2 copies.
2297 void RAGreedy::tryHintsRecoloring() {
2298   for (const LiveInterval *LI : SetOfBrokenHints) {
2299     assert(LI->reg().isVirtual() &&
2300            "Recoloring is possible only for virtual registers");
2301     // Some dead defs may be around (e.g., because of debug uses).
2302     // Ignore those.
2303     if (!VRM->hasPhys(LI->reg()))
2304       continue;
2305     tryHintRecoloring(*LI);
2306   }
2307 }
2308 
2309 MCRegister RAGreedy::selectOrSplitImpl(const LiveInterval &VirtReg,
2310                                        SmallVectorImpl<Register> &NewVRegs,
2311                                        SmallVirtRegSet &FixedRegisters,
2312                                        RecoloringStack &RecolorStack,
2313                                        unsigned Depth) {
2314   uint8_t CostPerUseLimit = uint8_t(~0u);
2315   // First try assigning a free register.
2316   auto Order =
2317       AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
2318   if (MCRegister PhysReg =
2319           tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
2320     // When NewVRegs is not empty, we may have made decisions such as evicting
2321     // a virtual register, go with the earlier decisions and use the physical
2322     // register.
2323     if (CSRCost.getFrequency() &&
2324         EvictAdvisor->isUnusedCalleeSavedReg(PhysReg) && NewVRegs.empty()) {
2325       MCRegister CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2326                                                 CostPerUseLimit, NewVRegs);
2327       if (CSRReg || !NewVRegs.empty())
2328         // Return now if we decide to use a CSR or create new vregs due to
2329         // pre-splitting.
2330         return CSRReg;
2331     } else
2332       return PhysReg;
2333   }
2334 
2335   LiveRangeStage Stage = ExtraInfo->getStage(VirtReg);
2336   LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
2337                     << ExtraInfo->getCascade(VirtReg.reg()) << '\n');
2338 
2339   // Try to evict a less worthy live range, but only for ranges from the primary
2340   // queue. The RS_Split ranges already failed to do this, and they should not
2341   // get a second chance until they have been split.
2342   if (Stage != RS_Split)
2343     if (Register PhysReg =
2344             tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
2345                      FixedRegisters)) {
2346       Register Hint = MRI->getSimpleHint(VirtReg.reg());
2347       // If VirtReg has a hint and that hint is broken record this
2348       // virtual register as a recoloring candidate for broken hint.
2349       // Indeed, since we evicted a variable in its neighborhood it is
2350       // likely we can at least partially recolor some of the
2351       // copy-related live-ranges.
2352       if (Hint && Hint != PhysReg)
2353         SetOfBrokenHints.insert(&VirtReg);
2354       return PhysReg;
2355     }
2356 
2357   assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
2358 
2359   // The first time we see a live range, don't try to split or spill.
2360   // Wait until the second time, when all smaller ranges have been allocated.
2361   // This gives a better picture of the interference to split around.
2362   if (Stage < RS_Split) {
2363     ExtraInfo->setStage(VirtReg, RS_Split);
2364     LLVM_DEBUG(dbgs() << "wait for second round\n");
2365     NewVRegs.push_back(VirtReg.reg());
2366     return 0;
2367   }
2368 
2369   if (Stage < RS_Spill) {
2370     // Try splitting VirtReg or interferences.
2371     unsigned NewVRegSizeBefore = NewVRegs.size();
2372     Register PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
2373     if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
2374       return PhysReg;
2375   }
2376 
2377   // If we couldn't allocate a register from spilling, there is probably some
2378   // invalid inline assembly. The base class will report it.
2379   if (Stage >= RS_Done || !VirtReg.isSpillable()) {
2380     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2381                                    RecolorStack, Depth);
2382   }
2383 
2384   // Finally spill VirtReg itself.
2385   if ((EnableDeferredSpilling ||
2386        TRI->shouldUseDeferredSpillingForVirtReg(*MF, VirtReg)) &&
2387       ExtraInfo->getStage(VirtReg) < RS_Memory) {
2388     // TODO: This is experimental and in particular, we do not model
2389     // the live range splitting done by spilling correctly.
2390     // We would need a deep integration with the spiller to do the
2391     // right thing here. Anyway, that is still good for early testing.
2392     ExtraInfo->setStage(VirtReg, RS_Memory);
2393     LLVM_DEBUG(dbgs() << "Do as if this register is in memory\n");
2394     NewVRegs.push_back(VirtReg.reg());
2395   } else {
2396     NamedRegionTimer T("spill", "Spiller", TimerGroupName,
2397                        TimerGroupDescription, TimePassesIsEnabled);
2398     LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
2399     spiller().spill(LRE);
2400     ExtraInfo->setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2401 
2402     // Tell LiveDebugVariables about the new ranges. Ranges not being covered by
2403     // the new regs are kept in LDV (still mapping to the old register), until
2404     // we rewrite spilled locations in LDV at a later stage.
2405     DebugVars->splitRegister(VirtReg.reg(), LRE.regs(), *LIS);
2406 
2407     if (VerifyEnabled)
2408       MF->verify(this, "After spilling");
2409   }
2410 
2411   // The live virtual register requesting allocation was spilled, so tell
2412   // the caller not to allocate anything during this round.
2413   return 0;
2414 }
2415 
2416 void RAGreedy::RAGreedyStats::report(MachineOptimizationRemarkMissed &R) {
2417   using namespace ore;
2418   if (Spills) {
2419     R << NV("NumSpills", Spills) << " spills ";
2420     R << NV("TotalSpillsCost", SpillsCost) << " total spills cost ";
2421   }
2422   if (FoldedSpills) {
2423     R << NV("NumFoldedSpills", FoldedSpills) << " folded spills ";
2424     R << NV("TotalFoldedSpillsCost", FoldedSpillsCost)
2425       << " total folded spills cost ";
2426   }
2427   if (Reloads) {
2428     R << NV("NumReloads", Reloads) << " reloads ";
2429     R << NV("TotalReloadsCost", ReloadsCost) << " total reloads cost ";
2430   }
2431   if (FoldedReloads) {
2432     R << NV("NumFoldedReloads", FoldedReloads) << " folded reloads ";
2433     R << NV("TotalFoldedReloadsCost", FoldedReloadsCost)
2434       << " total folded reloads cost ";
2435   }
2436   if (ZeroCostFoldedReloads)
2437     R << NV("NumZeroCostFoldedReloads", ZeroCostFoldedReloads)
2438       << " zero cost folded reloads ";
2439   if (Copies) {
2440     R << NV("NumVRCopies", Copies) << " virtual registers copies ";
2441     R << NV("TotalCopiesCost", CopiesCost) << " total copies cost ";
2442   }
2443 }
2444 
2445 RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
2446   RAGreedyStats Stats;
2447   const MachineFrameInfo &MFI = MF->getFrameInfo();
2448   int FI;
2449 
2450   auto isSpillSlotAccess = [&MFI](const MachineMemOperand *A) {
2451     return MFI.isSpillSlotObjectIndex(cast<FixedStackPseudoSourceValue>(
2452         A->getPseudoValue())->getFrameIndex());
2453   };
2454   auto isPatchpointInstr = [](const MachineInstr &MI) {
2455     return MI.getOpcode() == TargetOpcode::PATCHPOINT ||
2456            MI.getOpcode() == TargetOpcode::STACKMAP ||
2457            MI.getOpcode() == TargetOpcode::STATEPOINT;
2458   };
2459   for (MachineInstr &MI : MBB) {
2460     if (MI.isCopy()) {
2461       const MachineOperand &Dest = MI.getOperand(0);
2462       const MachineOperand &Src = MI.getOperand(1);
2463       Register SrcReg = Src.getReg();
2464       Register DestReg = Dest.getReg();
2465       // Only count `COPY`s with a virtual register as source or destination.
2466       if (SrcReg.isVirtual() || DestReg.isVirtual()) {
2467         if (SrcReg.isVirtual()) {
2468           SrcReg = VRM->getPhys(SrcReg);
2469           if (Src.getSubReg())
2470             SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
2471         }
2472         if (DestReg.isVirtual()) {
2473           DestReg = VRM->getPhys(DestReg);
2474           if (Dest.getSubReg())
2475             DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
2476         }
2477         if (SrcReg != DestReg)
2478           ++Stats.Copies;
2479       }
2480       continue;
2481     }
2482 
2483     SmallVector<const MachineMemOperand *, 2> Accesses;
2484     if (TII->isLoadFromStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2485       ++Stats.Reloads;
2486       continue;
2487     }
2488     if (TII->isStoreToStackSlot(MI, FI) && MFI.isSpillSlotObjectIndex(FI)) {
2489       ++Stats.Spills;
2490       continue;
2491     }
2492     if (TII->hasLoadFromStackSlot(MI, Accesses) &&
2493         llvm::any_of(Accesses, isSpillSlotAccess)) {
2494       if (!isPatchpointInstr(MI)) {
2495         Stats.FoldedReloads += Accesses.size();
2496         continue;
2497       }
2498       // For statepoint there may be folded and zero cost folded stack reloads.
2499       std::pair<unsigned, unsigned> NonZeroCostRange =
2500           TII->getPatchpointUnfoldableRange(MI);
2501       SmallSet<unsigned, 16> FoldedReloads;
2502       SmallSet<unsigned, 16> ZeroCostFoldedReloads;
2503       for (unsigned Idx = 0, E = MI.getNumOperands(); Idx < E; ++Idx) {
2504         MachineOperand &MO = MI.getOperand(Idx);
2505         if (!MO.isFI() || !MFI.isSpillSlotObjectIndex(MO.getIndex()))
2506           continue;
2507         if (Idx >= NonZeroCostRange.first && Idx < NonZeroCostRange.second)
2508           FoldedReloads.insert(MO.getIndex());
2509         else
2510           ZeroCostFoldedReloads.insert(MO.getIndex());
2511       }
2512       // If stack slot is used in folded reload it is not zero cost then.
2513       for (unsigned Slot : FoldedReloads)
2514         ZeroCostFoldedReloads.erase(Slot);
2515       Stats.FoldedReloads += FoldedReloads.size();
2516       Stats.ZeroCostFoldedReloads += ZeroCostFoldedReloads.size();
2517       continue;
2518     }
2519     Accesses.clear();
2520     if (TII->hasStoreToStackSlot(MI, Accesses) &&
2521         llvm::any_of(Accesses, isSpillSlotAccess)) {
2522       Stats.FoldedSpills += Accesses.size();
2523     }
2524   }
2525   // Set cost of collected statistic by multiplication to relative frequency of
2526   // this basic block.
2527   float RelFreq = MBFI->getBlockFreqRelativeToEntryBlock(&MBB);
2528   Stats.ReloadsCost = RelFreq * Stats.Reloads;
2529   Stats.FoldedReloadsCost = RelFreq * Stats.FoldedReloads;
2530   Stats.SpillsCost = RelFreq * Stats.Spills;
2531   Stats.FoldedSpillsCost = RelFreq * Stats.FoldedSpills;
2532   Stats.CopiesCost = RelFreq * Stats.Copies;
2533   return Stats;
2534 }
2535 
2536 RAGreedy::RAGreedyStats RAGreedy::reportStats(MachineLoop *L) {
2537   RAGreedyStats Stats;
2538 
2539   // Sum up the spill and reloads in subloops.
2540   for (MachineLoop *SubLoop : *L)
2541     Stats.add(reportStats(SubLoop));
2542 
2543   for (MachineBasicBlock *MBB : L->getBlocks())
2544     // Handle blocks that were not included in subloops.
2545     if (Loops->getLoopFor(MBB) == L)
2546       Stats.add(computeStats(*MBB));
2547 
2548   if (!Stats.isEmpty()) {
2549     using namespace ore;
2550 
2551     ORE->emit([&]() {
2552       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "LoopSpillReloadCopies",
2553                                         L->getStartLoc(), L->getHeader());
2554       Stats.report(R);
2555       R << "generated in loop";
2556       return R;
2557     });
2558   }
2559   return Stats;
2560 }
2561 
2562 void RAGreedy::reportStats() {
2563   if (!ORE->allowExtraAnalysis(DEBUG_TYPE))
2564     return;
2565   RAGreedyStats Stats;
2566   for (MachineLoop *L : *Loops)
2567     Stats.add(reportStats(L));
2568   // Process non-loop blocks.
2569   for (MachineBasicBlock &MBB : *MF)
2570     if (!Loops->getLoopFor(&MBB))
2571       Stats.add(computeStats(MBB));
2572   if (!Stats.isEmpty()) {
2573     using namespace ore;
2574 
2575     ORE->emit([&]() {
2576       DebugLoc Loc;
2577       if (auto *SP = MF->getFunction().getSubprogram())
2578         Loc = DILocation::get(SP->getContext(), SP->getLine(), 1, SP);
2579       MachineOptimizationRemarkMissed R(DEBUG_TYPE, "SpillReloadCopies", Loc,
2580                                         &MF->front());
2581       Stats.report(R);
2582       R << "generated in function";
2583       return R;
2584     });
2585   }
2586 }
2587 
2588 bool RAGreedy::hasVirtRegAlloc() {
2589   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
2590     Register Reg = Register::index2VirtReg(I);
2591     if (MRI->reg_nodbg_empty(Reg))
2592       continue;
2593     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2594     if (!RC)
2595       continue;
2596     if (ShouldAllocateClass(*TRI, *RC))
2597       return true;
2598   }
2599 
2600   return false;
2601 }
2602 
2603 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2604   LLVM_DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2605                     << "********** Function: " << mf.getName() << '\n');
2606 
2607   MF = &mf;
2608   TII = MF->getSubtarget().getInstrInfo();
2609 
2610   if (VerifyEnabled)
2611     MF->verify(this, "Before greedy register allocator");
2612 
2613   RegAllocBase::init(getAnalysis<VirtRegMap>(),
2614                      getAnalysis<LiveIntervals>(),
2615                      getAnalysis<LiveRegMatrix>());
2616 
2617   // Early return if there is no virtual register to be allocated to a
2618   // physical register.
2619   if (!hasVirtRegAlloc())
2620     return false;
2621 
2622   Indexes = &getAnalysis<SlotIndexes>();
2623   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2624   DomTree = &getAnalysis<MachineDominatorTree>();
2625   ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
2626   Loops = &getAnalysis<MachineLoopInfo>();
2627   Bundles = &getAnalysis<EdgeBundles>();
2628   SpillPlacer = &getAnalysis<SpillPlacement>();
2629   DebugVars = &getAnalysis<LiveDebugVariables>();
2630 
2631   initializeCSRCost();
2632 
2633   RegCosts = TRI->getRegisterCosts(*MF);
2634   RegClassPriorityTrumpsGlobalness =
2635       GreedyRegClassPriorityTrumpsGlobalness.getNumOccurrences()
2636           ? GreedyRegClassPriorityTrumpsGlobalness
2637           : TRI->regClassPriorityTrumpsGlobalness(*MF);
2638 
2639   ReverseLocalAssignment = GreedyReverseLocalAssignment.getNumOccurrences()
2640                                ? GreedyReverseLocalAssignment
2641                                : TRI->reverseLocalAssignment();
2642 
2643   ExtraInfo.emplace();
2644   EvictAdvisor =
2645       getAnalysis<RegAllocEvictionAdvisorAnalysis>().getAdvisor(*MF, *this);
2646   PriorityAdvisor =
2647       getAnalysis<RegAllocPriorityAdvisorAnalysis>().getAdvisor(*MF, *this);
2648 
2649   VRAI = std::make_unique<VirtRegAuxInfo>(*MF, *LIS, *VRM, *Loops, *MBFI);
2650   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM, *VRAI));
2651 
2652   VRAI->calculateSpillWeightsAndHints();
2653 
2654   LLVM_DEBUG(LIS->dump());
2655 
2656   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2657   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
2658 
2659   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2660   GlobalCand.resize(32);  // This will grow as needed.
2661   SetOfBrokenHints.clear();
2662 
2663   allocatePhysRegs();
2664   tryHintsRecoloring();
2665 
2666   if (VerifyEnabled)
2667     MF->verify(this, "Before post optimization");
2668   postOptimization();
2669   reportStats();
2670 
2671   releaseMemory();
2672   return true;
2673 }
2674