1 #![allow(non_snake_case)]
2 #![allow(non_camel_case_types)]
3 
4 //! Core implementation of the backtracking allocator.
5 
6 use log::{debug, info, log_enabled, Level};
7 use smallvec::SmallVec;
8 use std::default;
9 use std::fmt;
10 
11 use crate::analysis_data_flow::{add_raw_reg_vecs_for_insn, does_inst_use_def_or_mod_reg};
12 use crate::analysis_main::{run_analysis, AnalysisInfo};
13 use crate::avl_tree::{AVLTree, AVL_NULL};
14 use crate::bt_coalescing_analysis::{do_coalescing_analysis, Hint};
15 use crate::bt_commitment_map::{CommitmentMap, RangeFragAndVLRIx};
16 use crate::bt_spillslot_allocator::SpillSlotAllocator;
17 use crate::bt_vlr_priority_queue::VirtualRangePrioQ;
18 use crate::data_structures::{
19     BlockIx, InstIx, InstPoint, Point, RangeFrag, RangeFragIx, RealRange, RealReg, RealRegUniverse,
20     Reg, RegVecBounds, RegVecs, Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec,
21     VirtualRange, VirtualRangeIx, VirtualReg, Writable,
22 };
23 use crate::inst_stream::{edit_inst_stream, InstToInsert, InstToInsertAndPoint};
24 use crate::sparse_set::SparseSetU;
25 use crate::union_find::UnionFindEquivClasses;
26 use crate::{Function, RegAllocError, RegAllocResult};
27 
28 #[derive(Clone)]
29 pub struct BacktrackingOptions {
30     /// Should the register allocator generate block annotations?
31     pub request_block_annotations: bool,
32 }
33 
34 impl default::Default for BacktrackingOptions {
default() -> Self35     fn default() -> Self {
36         Self {
37             request_block_annotations: false,
38         }
39     }
40 }
41 
42 impl fmt::Debug for BacktrackingOptions {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result43     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
44         write!(
45             fmt,
46             "backtracking (block annotations: {})",
47             self.request_block_annotations
48         )
49     }
50 }
51 
52 //=============================================================================
53 // The per-real-register state
54 //
55 // Relevant methods are expected to be parameterised by the same VirtualRange
56 // env as used in calls to `VirtualRangePrioQ`.
57 
58 struct PerRealReg {
59     // The current committed fragments for this RealReg.
60     committed: CommitmentMap,
61 
62     // The set of VirtualRanges which have been assigned to this RealReg.  The
63     // union of their frags will be equal to `committed` only if this RealReg
64     // has no RealRanges.  If this RealReg does have RealRanges the
65     // aforementioned union will be exactly the subset of `committed` not used
66     // by the RealRanges.
67     vlrixs_assigned: Set<VirtualRangeIx>,
68 }
69 impl PerRealReg {
new() -> Self70     fn new() -> Self {
71         Self {
72             committed: CommitmentMap::new(),
73             vlrixs_assigned: Set::<VirtualRangeIx>::empty(),
74         }
75     }
76 
77     #[inline(never)]
add_RealRange(&mut self, to_add: &RealRange, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>)78     fn add_RealRange(&mut self, to_add: &RealRange, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) {
79         // Commit this register to `to_add`, irrevocably.  Don't add it to
80         // `vlrixs_assigned` since we will never want to later evict the
81         // assignment.
82         self.committed
83             .add_indirect(&to_add.sorted_frags, None, frag_env);
84     }
85 
86     #[inline(never)]
add_VirtualRange( &mut self, to_add_vlrix: VirtualRangeIx, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, )87     fn add_VirtualRange(
88         &mut self,
89         to_add_vlrix: VirtualRangeIx,
90         vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
91     ) {
92         let to_add_vlr = &vlr_env[to_add_vlrix];
93         self.committed
94             .add(&to_add_vlr.sorted_frags, Some(to_add_vlrix));
95         assert!(!self.vlrixs_assigned.contains(to_add_vlrix));
96         self.vlrixs_assigned.insert(to_add_vlrix);
97     }
98 
99     #[inline(never)]
del_VirtualRange( &mut self, to_del_vlrix: VirtualRangeIx, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, )100     fn del_VirtualRange(
101         &mut self,
102         to_del_vlrix: VirtualRangeIx,
103         vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
104     ) {
105         // Remove it from `vlrixs_assigned`
106         if self.vlrixs_assigned.contains(to_del_vlrix) {
107             self.vlrixs_assigned.delete(to_del_vlrix);
108         } else {
109             panic!("PerRealReg: del_VirtualRange on VR not in vlrixs_assigned");
110         }
111         // Remove it from `committed`
112         let to_del_vlr = &vlr_env[to_del_vlrix];
113         self.committed.del(&to_del_vlr.sorted_frags);
114     }
115 }
116 
117 // HELPER FUNCTION
118 // For a given `RangeFrag`, traverse the commitment tree rooted at `root`,
119 // adding to `running_set` the set of VLRIxs that the frag intersects, and
120 // adding their spill costs to `running_cost`.  Return false if, for one of
121 // the 4 reasons documented below, the traversal has been abandoned, and true
122 // if the search completed successfully.
search_commitment_tree<IsAllowedToEvict>( running_set: &mut SparseSetU<[VirtualRangeIx; 4]>, running_cost: &mut SpillCost, tree: &AVLTree<RangeFragAndVLRIx>, pair_frag: &RangeFrag, spill_cost_budget: &SpillCost, allowed_to_evict: &IsAllowedToEvict, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, ) -> bool where IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,123 fn search_commitment_tree<IsAllowedToEvict>(
124     // The running state, threaded through the tree traversal.  These
125     // accumulate ranges and costs as we traverse the tree.  These are mutable
126     // because our caller (`find_evict_set`) will want to try and allocate
127     // multiple `RangeFrag`s in this tree, not just a single one, and so it
128     // needs a way to accumulate the total evict-cost and evict-set for all
129     // the `RangeFrag`s it iterates over.
130     running_set: &mut SparseSetU<[VirtualRangeIx; 4]>,
131     running_cost: &mut SpillCost,
132     // The tree to search.
133     tree: &AVLTree<RangeFragAndVLRIx>,
134     // The RangeFrag we want to accommodate.
135     pair_frag: &RangeFrag,
136     spill_cost_budget: &SpillCost,
137     allowed_to_evict: &IsAllowedToEvict,
138     vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
139 ) -> bool
140 where
141     IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,
142 {
143     let mut stack = SmallVec::<[u32; 32]>::new();
144     assert!(tree.root != AVL_NULL);
145     stack.push(tree.root);
146 
147     while let Some(curr) = stack.pop() {
148         let curr_node = &tree.pool[curr as usize];
149         let curr_node_item = &curr_node.item;
150         let curr_frag = &curr_node_item.frag;
151 
152         // Figure out whether `pair_frag` overlaps the root of the current
153         // subtree.
154         let overlaps_curr = pair_frag.last >= curr_frag.first && pair_frag.first <= curr_frag.last;
155 
156         // Let's first consider the current node.  If we need it but it's not
157         // evictable, we might as well stop now.
158         if overlaps_curr {
159             // This frag has no associated VirtualRangeIx, so it is part of a
160             // RealRange, and hence not evictable.
161             if curr_node_item.mb_vlrix.is_none() {
162                 return false;
163             }
164             // Maybe this one is a spill range, in which case, it can't be
165             // evicted.
166             let vlrix_to_evict = curr_node_item.mb_vlrix.unwrap();
167             let vlr_to_evict = &vlr_env[vlrix_to_evict];
168             if vlr_to_evict.spill_cost.is_infinite() {
169                 return false;
170             }
171             // Check that this range alone doesn't exceed our total spill
172             // cost.  NB: given the check XXX below, this isn't actually
173             // necessary; however it means that we avoid doing two
174             // SparseSet::contains operations before exiting.  This saves
175             // around 0.3% instruction count for large inputs.
176             if !vlr_to_evict.spill_cost.is_less_than(spill_cost_budget) {
177                 return false;
178             }
179             // Maybe our caller doesn't want us to evict this one.
180             if !allowed_to_evict(vlrix_to_evict) {
181                 return false;
182             }
183             // Ok!  We can evict the current node.  Update the running state
184             // accordingly.  Note that we may be presented with the same VLRIx
185             // to evict multiple times, so we must be careful to add the cost
186             // of it only once.
187             if !running_set.contains(vlrix_to_evict) {
188                 let mut tmp_cost = *running_cost;
189                 tmp_cost.add(&vlr_to_evict.spill_cost);
190                 // See above XXX
191                 if !tmp_cost.is_less_than(spill_cost_budget) {
192                     return false;
193                 }
194                 *running_cost = tmp_cost;
195                 running_set.insert(vlrix_to_evict);
196             }
197         }
198 
199         // Now figure out if we need to visit the subtrees, and if so push the
200         // relevant nodes.  Whether we visit the left or right subtree first
201         // is unimportant, at least from a correctness perspective.
202         let must_check_left = pair_frag.first < curr_frag.first;
203         if must_check_left {
204             let left_of_curr = tree.pool[curr as usize].left;
205             if left_of_curr != AVL_NULL {
206                 stack.push(left_of_curr);
207             }
208         }
209 
210         let must_check_right = pair_frag.last > curr_frag.last;
211         if must_check_right {
212             let right_of_curr = tree.pool[curr as usize].right;
213             if right_of_curr != AVL_NULL {
214                 stack.push(right_of_curr);
215             }
216         }
217     }
218 
219     // If we get here, it means that `pair_frag` can be accommodated if we
220     // evict all the frags it overlaps in `tree`.
221     //
222     // Stay sane ..
223     assert!(running_cost.is_finite());
224     assert!(running_cost.is_less_than(spill_cost_budget));
225     true
226 }
227 
228 impl PerRealReg {
229     // Find the set of VirtualRangeIxs that would need to be evicted in order to
230     // allocate `would_like_to_add` to this register.  Virtual ranges mentioned
231     // in `do_not_evict` must not be considered as candidates for eviction.
232     // Also returns the total associated spill cost.  That spill cost cannot be
233     // infinite.
234     //
235     // This can fail (return None) for four different reasons:
236     //
237     // - `would_like_to_add` interferes with a real-register-live-range
238     //   commitment, so the register would be unavailable even if we evicted
239     //   *all* virtual ranges assigned to it.
240     //
241     // - `would_like_to_add` interferes with a virtual range which is a spill
242     //   range (has infinite cost).  We cannot evict those without risking
243     //   non-termination of the overall allocation algorithm.
244     //
245     // - `would_like_to_add` interferes with a virtual range listed in
246     //   `do_not_evict`.  Our caller uses this mechanism when trying to do
247     //   coalesing, to avoid the nonsensicality of evicting some part of a
248     //   virtual live range group in order to allocate a member of the same
249     //   group.
250     //
251     // - The total spill cost of the candidate set exceeds the spill cost of
252     //   `would_like_to_add`.  This means that spilling them would be a net loss
253     //   per our cost model.  Note that `would_like_to_add` may have an infinite
254     //   spill cost, in which case it will "win" over all other
255     //   non-infinite-cost eviction candidates.  This is by design (so as to
256     //   guarantee that we can always allocate spill/reload bridges).
257     #[inline(never)]
find_evict_set<IsAllowedToEvict>( &self, would_like_to_add: VirtualRangeIx, allowed_to_evict: &IsAllowedToEvict, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, ) -> Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> where IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,258     fn find_evict_set<IsAllowedToEvict>(
259         &self,
260         would_like_to_add: VirtualRangeIx,
261         allowed_to_evict: &IsAllowedToEvict,
262         vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
263     ) -> Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)>
264     where
265         IsAllowedToEvict: Fn(VirtualRangeIx) -> bool,
266     {
267         // Firstly, if the commitment tree is for this reg is empty, we can
268         // declare success immediately.
269         if self.committed.tree.root == AVL_NULL {
270             let evict_set = SparseSetU::<[VirtualRangeIx; 4]>::empty();
271             let evict_cost = SpillCost::zero();
272             return Some((evict_set, evict_cost));
273         }
274 
275         // The tree isn't empty, so we will have to do this the hard way: iterate
276         // over all fragments in `would_like_to_add` and check them against the
277         // tree.
278 
279         // Useful constants for the main loop
280         let would_like_to_add_vlr = &vlr_env[would_like_to_add];
281         let evict_cost_budget = would_like_to_add_vlr.spill_cost;
282         // Note that `evict_cost_budget` can be infinite because
283         // `would_like_to_add` might be a spill/reload range.
284 
285         // The overall evict set and cost so far.  These are updated as we iterate
286         // over the fragments that make up `would_like_to_add`.
287         let mut running_set = SparseSetU::<[VirtualRangeIx; 4]>::empty();
288         let mut running_cost = SpillCost::zero();
289 
290         // "wlta" = would like to add
291         for wlta_frag in &would_like_to_add_vlr.sorted_frags.frags {
292             let wlta_frag_ok = search_commitment_tree(
293                 &mut running_set,
294                 &mut running_cost,
295                 &self.committed.tree,
296                 &wlta_frag,
297                 &evict_cost_budget,
298                 allowed_to_evict,
299                 vlr_env,
300             );
301             if !wlta_frag_ok {
302                 // This fragment won't fit, for one of the four reasons listed
303                 // above.  So give up now.
304                 return None;
305             }
306             // And move on to the next fragment.
307         }
308 
309         // If we got here, it means that `would_like_to_add` can be accommodated \o/
310         assert!(running_cost.is_finite());
311         assert!(running_cost.is_less_than(&evict_cost_budget));
312         Some((running_set, running_cost))
313     }
314 
315     #[allow(dead_code)]
316     #[inline(never)]
show1_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String317     fn show1_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String {
318         //"in_use:   ".to_string() + &self.committed.show_with_frag_env(&frag_env)
319         "(show1_with_envs:FIXME)".to_string()
320     }
321     #[allow(dead_code)]
322     #[inline(never)]
show2_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String323     fn show2_with_envs(&self, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>) -> String {
324         //"assigned: ".to_string() + &format!("{:?}", &self.vlrixs_assigned)
325         "(show2_with_envs:FIXME)".to_string()
326     }
327 }
328 
329 //=============================================================================
330 // Printing the allocator's top level state
331 
332 #[inline(never)]
print_RA_state( who: &str, _universe: &RealRegUniverse, prioQ: &VirtualRangePrioQ, _perRealReg: &Vec<PerRealReg>, edit_list_move: &Vec<EditListItem>, edit_list_other: &Vec<EditListItem>, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>, )333 fn print_RA_state(
334     who: &str,
335     _universe: &RealRegUniverse,
336     // State components
337     prioQ: &VirtualRangePrioQ,
338     _perRealReg: &Vec<PerRealReg>,
339     edit_list_move: &Vec<EditListItem>,
340     edit_list_other: &Vec<EditListItem>,
341     // The context (environment)
342     vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
343     _frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
344 ) {
345     debug!("<<<<====---- RA state at '{}' ----====", who);
346     //for ix in 0..perRealReg.len() {
347     //    if !&perRealReg[ix].committed.pairs.is_empty() {
348     //        debug!(
349     //            "{:<5}  {}",
350     //            universe.regs[ix].1,
351     //            &perRealReg[ix].show1_with_envs(&frag_env)
352     //        );
353     //        debug!("       {}", &perRealReg[ix].show2_with_envs(&frag_env));
354     //        debug!("");
355     //    }
356     //}
357     if !prioQ.is_empty() {
358         for s in prioQ.show_with_envs(vlr_env) {
359             debug!("{}", s);
360         }
361     }
362     for eli in edit_list_move {
363         debug!("ELI MOVE: {:?}", eli);
364     }
365     for eli in edit_list_other {
366         debug!("ELI other: {:?}", eli);
367     }
368     debug!(">>>>");
369 }
370 
371 //=============================================================================
372 // Allocator top level
373 
374 /* (const) For each virtual live range
375    - its sorted RangeFrags
376    - its total size
377    - its spill cost
378    - (eventually) its assigned real register
379    New VirtualRanges are created as we go, but existing ones are unchanged,
380    apart from being tagged with its assigned real register.
381 
382    (mut) For each real register
383    - the sorted RangeFrags assigned to it (todo: rm the M)
384    - the virtual LR indices assigned to it.  This is so we can eject
385      a VirtualRange from the commitment, as needed
386 
387    (mut) the set of VirtualRanges not yet allocated, prioritised by total size
388 
389    (mut) the edit list that is produced
390 */
391 /*
392 fn show_commit_tab(commit_tab: &Vec::<SortedRangeFragIxs>,
393                    who: &str,
394                    context: &TypedIxVec::<RangeFragIx, RangeFrag>) -> String {
395     let mut res = "Commit Table at '".to_string()
396                   + &who.to_string() + &"'\n".to_string();
397     let mut rregNo = 0;
398     for smf in commit_tab.iter() {
399         res += &"  ".to_string();
400         res += &mkRealReg(rregNo).show();
401         res += &" ".to_string();
402         res += &smf.show_with_fenv(&context);
403         res += &"\n".to_string();
404         rregNo += 1;
405     }
406     res
407 }
408 */
409 
410 // VirtualRanges created by spilling all pertain to a single InstIx.  But
411 // within that InstIx, there are three kinds of "bridges":
412 #[derive(Clone, Copy, PartialEq)]
413 pub(crate) enum BridgeKind {
414     RtoU, // A bridge for a USE.  This connects the reload to the use.
415     RtoS, // a bridge for a MOD.  This connects the reload, the use/def
416     // and the spill, since the value must first be reloade, then
417     // operated on, and finally re-spilled.
418     DtoS, // A bridge for a DEF.  This connects the def to the spill.
419 }
420 
421 impl fmt::Debug for BridgeKind {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result422     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
423         match self {
424             BridgeKind::RtoU => write!(fmt, "R->U"),
425             BridgeKind::RtoS => write!(fmt, "R->S"),
426             BridgeKind::DtoS => write!(fmt, "D->S"),
427         }
428     }
429 }
430 
431 #[derive(Clone, Copy)]
432 struct EditListItem {
433     // This holds enough information to create a spill or reload instruction,
434     // or both, and also specifies where in the instruction stream it/they
435     // should be added.  Note that if the edit list as a whole specifies
436     // multiple items for the same location, then it is assumed that the order
437     // in which they execute isn't important.
438     //
439     // Some of the relevant info can be found via the VirtualRangeIx link:
440     // (1) the real reg involved
441     // (2) the place where the insn should go, since the VirtualRange should
442     //    only have one RangeFrag, and we can deduce the correct location
443     //    from that.
444     // Despite (2) we also carry here the InstIx of the affected instruction
445     // (there should be only one) since computing it via (2) is expensive.
446     // This however gives a redundancy in representation against (2).  Beware!
447     slot: SpillSlot,
448     vlrix: VirtualRangeIx,
449     kind: BridgeKind,
450     iix: InstIx,
451 }
452 
453 impl fmt::Debug for EditListItem {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result454     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
455         write!(
456             fmt,
457             "(ELI: at {:?} for {:?} add {:?}, slot={:?})",
458             self.iix, self.vlrix, self.kind, self.slot
459         )
460     }
461 }
462 
463 // Allocator top level.  This function returns a result struct that contains
464 // the final sequence of instructions, possibly with fills/spills/moves
465 // spliced in and redundant moves elided, and with all virtual registers
466 // replaced with real registers. Allocation can fail if there are insufficient
467 // registers to even generate spill/reload code, or if the function appears to
468 // have any undefined VirtualReg/RealReg uses.
469 
470 #[inline(never)]
alloc_main<F: Function>( func: &mut F, reg_universe: &RealRegUniverse, use_checker: bool, opts: &BacktrackingOptions, ) -> Result<RegAllocResult<F>, RegAllocError>471 pub fn alloc_main<F: Function>(
472     func: &mut F,
473     reg_universe: &RealRegUniverse,
474     use_checker: bool,
475     opts: &BacktrackingOptions,
476 ) -> Result<RegAllocResult<F>, RegAllocError> {
477     // -------- Perform initial liveness analysis --------
478     // Note that the analysis phase can fail; hence we propagate any error.
479     let AnalysisInfo {
480         reg_vecs_and_bounds,
481         real_ranges: rlr_env,
482         virtual_ranges: mut vlr_env,
483         range_frags: frag_env,
484         range_metrics: frag_metrics_env,
485         estimated_frequencies: est_freqs,
486         inst_to_block_map,
487         ..
488     } = run_analysis(func, reg_universe).map_err(|err| RegAllocError::Analysis(err))?;
489 
490     assert!(reg_vecs_and_bounds.is_sanitized());
491     assert!(frag_env.len() == frag_metrics_env.len());
492 
493     // Also perform analysis that finds all coalesing opportunities.
494     let coalescing_info = do_coalescing_analysis(
495         func,
496         &reg_vecs_and_bounds,
497         &rlr_env,
498         &mut vlr_env,
499         &frag_env,
500         &est_freqs,
501         &reg_universe,
502     );
503     let mut hints: TypedIxVec<VirtualRangeIx, SmallVec<[Hint; 8]>> = coalescing_info.0;
504     let vlrEquivClasses: UnionFindEquivClasses<VirtualRangeIx> = coalescing_info.1;
505     let is_vv_boundary_move: TypedIxVec<InstIx, bool> = coalescing_info.2;
506     let vreg_to_vlrs_map: Vec</*vreg index,*/ SmallVec<[VirtualRangeIx; 3]>> = coalescing_info.3;
507     assert!(hints.len() == vlr_env.len());
508 
509     // -------- Alloc main --------
510 
511     // Create initial state
512     info!("alloc_main: begin");
513     info!(
514         "alloc_main:   in: {} insns in {} blocks",
515         func.insns().len(),
516         func.blocks().len()
517     );
518     let num_vlrs_initial = vlr_env.len();
519     info!(
520         "alloc_main:   in: {} VLRs, {} RLRs",
521         num_vlrs_initial,
522         rlr_env.len()
523     );
524 
525     // This is fully populated by the ::new call.
526     let mut prioQ = VirtualRangePrioQ::new(&vlr_env);
527 
528     // Whereas this is empty.  We have to populate it "by hand", by
529     // effectively cloning the allocatable part (prefix) of the universe.
530     let mut per_real_reg = Vec::<PerRealReg>::new();
531     for _ in 0..reg_universe.allocable {
532         // Doing this instead of simply .resize avoids needing Clone for
533         // PerRealReg
534         per_real_reg.push(PerRealReg::new());
535     }
536     for rlr in rlr_env.iter() {
537         let rregIndex = rlr.rreg.get_index();
538         // Ignore RealRanges for RealRegs that are not part of the allocatable
539         // set.  As far as the allocator is concerned, such RealRegs simply
540         // don't exist.
541         if rregIndex >= reg_universe.allocable {
542             continue;
543         }
544         per_real_reg[rregIndex].add_RealRange(&rlr, &frag_env);
545     }
546 
547     let mut edit_list_move = Vec::<EditListItem>::new();
548     let mut edit_list_other = Vec::<EditListItem>::new();
549     if log_enabled!(Level::Debug) {
550         debug!("");
551         print_RA_state(
552             "Initial",
553             &reg_universe,
554             &prioQ,
555             &per_real_reg,
556             &edit_list_move,
557             &edit_list_other,
558             &vlr_env,
559             &frag_env,
560         );
561     }
562 
563     // This is also part of the running state.  `vlr_slot_env` tells us the
564     // assigned spill slot for each VirtualRange, if any.
565     // `spill_slot_allocator` decides on the assignments and writes them into
566     // `vlr_slot_env`.
567     let mut vlr_slot_env = TypedIxVec::<VirtualRangeIx, Option<SpillSlot>>::new();
568     vlr_slot_env.resize(num_vlrs_initial, None);
569     let mut spill_slot_allocator = SpillSlotAllocator::new();
570 
571     // Main allocation loop.  Each time round, pull out the longest
572     // unallocated VirtualRange, and do one of three things:
573     //
574     // * allocate it to a RealReg, possibly by ejecting some existing
575     //   allocation, but only one with a lower spill cost than this one, or
576     //
577     // * spill it.  This causes the VirtualRange to disappear.  It is replaced
578     //   by a set of very short VirtualRanges to carry the spill and reload
579     //   values.  Or,
580     //
581     // * split it.  This causes it to disappear but be replaced by two
582     //   VirtualRanges which together constitute the original.
583     debug!("");
584     debug!("-- MAIN ALLOCATION LOOP (DI means 'direct', CO means 'coalesced'):");
585 
586     info!("alloc_main:   main allocation loop: begin");
587 
588     // ======== BEGIN Main allocation loop ========
589     let mut num_vlrs_processed = 0; // stats only
590     let mut num_vlrs_spilled = 0; // stats only
591     let mut num_vlrs_evicted = 0; // stats only
592 
593     'main_allocation_loop: loop {
594         debug!("-- still TODO          {}", prioQ.len());
595         if false {
596             if log_enabled!(Level::Debug) {
597                 debug!("");
598                 print_RA_state(
599                     "Loop Top",
600                     &reg_universe,
601                     &prioQ,
602                     &per_real_reg,
603                     &edit_list_move,
604                     &edit_list_other,
605                     &vlr_env,
606                     &frag_env,
607                 );
608                 debug!("");
609             }
610         }
611 
612         let mb_curr_vlrix = prioQ.get_longest_VirtualRange();
613         if mb_curr_vlrix.is_none() {
614             break 'main_allocation_loop;
615         }
616 
617         num_vlrs_processed += 1;
618         let curr_vlrix = mb_curr_vlrix.unwrap();
619         let curr_vlr = &vlr_env[curr_vlrix];
620 
621         debug!("--   considering       {:?}:  {:?}", curr_vlrix, curr_vlr);
622 
623         assert!(curr_vlr.vreg.to_reg().is_virtual());
624         assert!(curr_vlr.rreg.is_none());
625         let curr_vlr_regclass = curr_vlr.vreg.get_class();
626         let curr_vlr_rc = curr_vlr_regclass.rc_to_usize();
627 
628         // ====== BEGIN Try to do coalescing ======
629         //
630         // First, look through the hints for `curr_vlr`, collecting up candidate
631         // real regs, in decreasing order of preference, in `hinted_regs`.  Note
632         // that we don't have to consider the weights here, because the coalescing
633         // analysis phase has already sorted the hints for the VLR so as to
634         // present the most favoured (weighty) first, so we merely need to retain
635         // that ordering when copying into `hinted_regs`.
636         assert!(hints.len() == vlr_env.len());
637         let mut hinted_regs = SmallVec::<[RealReg; 8]>::new();
638 
639         // === BEGIN collect all hints for `curr_vlr` ===
640         // `hints` has one entry per VLR, but only for VLRs which existed
641         // initially (viz, not for any created by spilling/splitting/whatever).
642         // Similarly, `vlrEquivClasses` can only map VLRs that existed initially,
643         // and will panic otherwise.  Hence the following check:
644         if curr_vlrix.get() < hints.len() {
645             for hint in &hints[curr_vlrix] {
646                 // BEGIN for each hint
647                 let mb_cand = match hint {
648                     Hint::SameAs(other_vlrix, _weight) => {
649                         // It wants the same reg as some other VLR, but we can only honour
650                         // that if the other VLR actually *has* a reg at this point.  Its
651                         // `rreg` field will tell us exactly that.
652                         vlr_env[*other_vlrix].rreg
653                     }
654                     Hint::Exactly(rreg, _weight) => Some(*rreg),
655                 };
656                 // So now `mb_cand` might have a preferred real reg.  If so, add it to
657                 // the list of cands.  De-dup as we go, since that is way cheaper than
658                 // effectively doing the same via repeated lookups in the
659                 // CommitmentMaps.
660                 if let Some(rreg) = mb_cand {
661                     if !hinted_regs.iter().any(|r| *r == rreg) {
662                         hinted_regs.push(rreg);
663                     }
664                 }
665                 // END for each hint
666             }
667 
668             // At this point, we have in `hinted_regs`, the hint candidates that
669             // arise from copies between `curr_vlr` and its immediate neighbouring
670             // VLRs or RLRs, in order of declining preference.  And that is a good
671             // start.
672             //
673             // However, it may be the case that there is some other VLR which
674             // is in the same equivalence class as `curr_vlr`, but is not a
675             // direct neighbour, and which has already been assigned a
676             // register.  We really ought to take those into account too, as
677             // the least-preferred candidates.  Hence we need to iterate over
678             // the equivalence class and "round up the secondary candidates."
679             //
680             // Note that the equivalence class might contain VirtualRanges
681             // that are mutually overlapping.  That is handled correctly,
682             // since we always consult the relevant CommitmentMaps (in the
683             // PerRealRegs) to detect interference.  To more fully understand
684             // this, see the big block comment at the top of
685             // bt_coalescing_analysis.rs.
686             let n_primary_cands = hinted_regs.len();
687 
688             // Work the equivalence class set for `curr_vlrix` to pick up any
689             // rreg hints.  Equivalence class info exists only for "initial" VLRs.
690             if curr_vlrix.get() < num_vlrs_initial {
691                 // `curr_vlrix` is an "initial" VLR.
692                 for vlrix in vlrEquivClasses.equiv_class_elems_iter(curr_vlrix) {
693                     if vlrix != curr_vlrix {
694                         if let Some(rreg) = vlr_env[vlrix].rreg {
695                             // Add `rreg` as a cand, if we don't already have it.
696                             if !hinted_regs.iter().any(|r| *r == rreg) {
697                                 hinted_regs.push(rreg);
698                             }
699                         }
700                     }
701                 }
702 
703                 // Sort the secondary cands, so as to try and impose more consistency
704                 // across the group.  I don't know if this is worthwhile, but it seems
705                 // sensible.
706                 hinted_regs[n_primary_cands..].sort_by(|rreg1, rreg2| {
707                     rreg1.get_index().partial_cmp(&rreg2.get_index()).unwrap()
708                 });
709             }
710 
711             if log_enabled!(Level::Debug) {
712                 if !hinted_regs.is_empty() {
713                     let mut candStr = "pri {".to_string();
714                     for (rreg, n) in hinted_regs.iter().zip(0..) {
715                         if n == n_primary_cands {
716                             candStr = candStr + &" } sec {".to_string();
717                         }
718                         candStr =
719                             candStr + &" ".to_string() + &reg_universe.regs[rreg.get_index()].1;
720                     }
721                     candStr = candStr + &" }";
722                     debug!("--   CO candidates     {}", candStr);
723                 }
724             }
725         }
726         // === END collect all hints for `curr_vlr` ===
727 
728         // === BEGIN try to use the hints for `curr_vlr` ===
729         // Now work through the list of preferences, to see if we can honour any
730         // of them.
731         for rreg in &hinted_regs {
732             let rregNo = rreg.get_index();
733 
734             // Find the set of ranges which we'd have to evict in order to honour
735             // this hint.  In the best case the set will be empty.  In the worst
736             // case, we will get None either because (1) it would require evicting a
737             // spill range, which is disallowed so as to guarantee termination of
738             // the algorithm, or (2) because it would require evicting a real reg
739             // live range, which we can't do.
740             //
741             // We take care not to evict any range which is in the same equivalence
742             // class as `curr_vlr` since those are (by definition) connected to
743             // `curr_vlr` via V-V copies, and so evicting any of them would be
744             // counterproductive from the point of view of removing copies.
745 
746             let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> =
747                 per_real_reg[rregNo].find_evict_set(
748                     curr_vlrix,
749                     &|vlrix_to_evict| {
750                         // What this means is: don't evict `vlrix_to_evict` if
751                         // it is in the same equivalence class as `curr_vlrix`
752                         // (the VLR which we're trying to allocate) AND we
753                         // actually know the equivalence classes for both
754                         // (hence the `Some`).  Spill/reload ("non-original")
755                         // VLRS don't have entries in `vlrEquivClasses`.
756                         vlrEquivClasses.in_same_equivalence_class(vlrix_to_evict, curr_vlrix)
757                             != Some(true)
758                     },
759                     &vlr_env,
760                 );
761             if let Some((vlrixs_to_evict, total_evict_cost)) = mb_evict_info {
762                 // Stay sane #1
763                 assert!(curr_vlr.rreg.is_none());
764                 // Stay sane #2
765                 assert!(vlrixs_to_evict.is_empty() == total_evict_cost.is_zero());
766                 // Can't evict if any in the set are spill ranges
767                 assert!(total_evict_cost.is_finite());
768                 // Ensure forward progress
769                 assert!(total_evict_cost.is_less_than(&curr_vlr.spill_cost));
770                 // Evict all evictees in the set
771                 for vlrix_to_evict in vlrixs_to_evict.iter() {
772                     // Ensure we're not evicting anything in `curr_vlrix`'s eclass.
773                     // This should be guaranteed us by find_evict_set.
774                     assert!(
775                         vlrEquivClasses.in_same_equivalence_class(*vlrix_to_evict, curr_vlrix)
776                             != Some(true)
777                     );
778                     // Evict ..
779                     debug!(
780                         "--   CO evict          {:?}:  {:?}",
781                         *vlrix_to_evict, &vlr_env[*vlrix_to_evict]
782                     );
783                     per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env);
784                     prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict);
785                     // Directly modify bits of vlr_env.  This means we have to abandon
786                     // the immutable borrow for curr_vlr, but that's OK -- we won't need
787                     // it again (in this loop iteration).
788                     debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some());
789                     vlr_env[*vlrix_to_evict].rreg = None;
790                     num_vlrs_evicted += 1;
791                 }
792                 // .. and reassign.
793                 debug!("--   CO alloc to       {}", reg_universe.regs[rregNo].1);
794                 per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env);
795                 vlr_env[curr_vlrix].rreg = Some(*rreg);
796                 // We're done!
797                 continue 'main_allocation_loop;
798             }
799         } /* for rreg in hinted_regs */
800         // === END try to use the hints for `curr_vlr` ===
801 
802         // ====== END Try to do coalescing ======
803 
804         // We get here if we failed to find a viable assignment by the process of
805         // looking at the coalescing hints.
806         //
807         // So: do almost exactly as we did in the "try for coalescing" stage
808         // above.  Except, instead of trying each coalescing candidate
809         // individually, iterate over all the registers in the class, to find the
810         // one (if any) that has the lowest total evict cost.  If we find one that
811         // has zero cost -- that is, we can make the assignment without evicting
812         // anything -- then stop the search at that point, since searching further
813         // is pointless.
814 
815         let (first_in_rc, last_in_rc) = match &reg_universe.allocable_by_class[curr_vlr_rc] {
816             &None => {
817                 return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass));
818             }
819             &Some(ref info) => (info.first, info.last),
820         };
821 
822         let mut best_so_far: Option<(
823             /*rreg index*/ usize,
824             SparseSetU<[VirtualRangeIx; 4]>,
825             SpillCost,
826         )> = None;
827 
828         'search_through_cand_rregs_loop: for rregNo in first_in_rc..last_in_rc + 1 {
829             //debug!("--   Cand              {} ...",
830             //       reg_universe.regs[rregNo].1);
831 
832             let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> =
833                 per_real_reg[rregNo].find_evict_set(
834                     curr_vlrix,
835                     // We pass a closure that ignores its arg and returns `true`.
836                     // Meaning, "we are not specifying any particular
837                     // can't-be-evicted VLRs in this call."
838                     &|_vlrix_to_evict| true,
839                     &vlr_env,
840                 );
841             //
842             //match mb_evict_info {
843             //  None => debug!("--   Cand              {}: Unavail",
844             //                 reg_universe.regs[rregNo].1),
845             //  Some((ref evict_set, ref evict_cost)) =>
846             //    debug!("--   Cand              {}: Avail, evict cost {:?}, set {:?}",
847             //            reg_universe.regs[rregNo].1, evict_cost, evict_set)
848             //}
849             //
850             if let Some((cand_vlrixs_to_evict, cand_total_evict_cost)) = mb_evict_info {
851                 // Stay sane ..
852                 assert!(cand_vlrixs_to_evict.is_empty() == cand_total_evict_cost.is_zero());
853                 // We can't evict if any in the set are spill ranges, and
854                 // find_evict_set should not offer us that possibility.
855                 assert!(cand_total_evict_cost.is_finite());
856                 // Ensure forward progress
857                 assert!(cand_total_evict_cost.is_less_than(&curr_vlr.spill_cost));
858                 // Update the "best so far".  First, if the evict set is empty, then
859                 // the candidate is by definition better than all others, and we will
860                 // quit searching just below.
861                 let mut cand_is_better = cand_vlrixs_to_evict.is_empty();
862                 if !cand_is_better {
863                     if let Some((_, _, best_spill_cost)) = best_so_far {
864                         // If we've already got a candidate, this one is better if it has
865                         // a lower total spill cost.
866                         if cand_total_evict_cost.is_less_than(&best_spill_cost) {
867                             cand_is_better = true;
868                         }
869                     } else {
870                         // We don't have any candidate so far, so what we just got is
871                         // better (than nothing).
872                         cand_is_better = true;
873                     }
874                 }
875                 // Remember the candidate if required.
876                 let cand_vlrixs_to_evict_is_empty = cand_vlrixs_to_evict.is_empty();
877                 if cand_is_better {
878                     best_so_far = Some((rregNo, cand_vlrixs_to_evict, cand_total_evict_cost));
879                 }
880                 // If we've found a no-evictions-necessary candidate, quit searching
881                 // immediately.  We won't find anything better.
882                 if cand_vlrixs_to_evict_is_empty {
883                     break 'search_through_cand_rregs_loop;
884                 }
885             }
886         } // for rregNo in first_in_rc..last_in_rc + 1 {
887 
888         // Examine the results of the search.  Did we find any usable candidate?
889         if let Some((rregNo, vlrixs_to_evict, total_spill_cost)) = best_so_far {
890             // We are still Totally Paranoid (tm)
891             // Stay sane #1
892             debug_assert!(curr_vlr.rreg.is_none());
893             // Can't spill a spill range
894             assert!(total_spill_cost.is_finite());
895             // Ensure forward progress
896             assert!(total_spill_cost.is_less_than(&curr_vlr.spill_cost));
897             // Now the same evict-reassign section as with the coalescing logic above.
898             // Evict all evictees in the set
899             for vlrix_to_evict in vlrixs_to_evict.iter() {
900                 // Evict ..
901                 debug!(
902                     "--   DI evict          {:?}:  {:?}",
903                     *vlrix_to_evict, &vlr_env[*vlrix_to_evict]
904                 );
905                 per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env);
906                 prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict);
907                 debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some());
908                 vlr_env[*vlrix_to_evict].rreg = None;
909                 num_vlrs_evicted += 1;
910             }
911             // .. and reassign.
912             debug!("--   DI alloc to       {}", reg_universe.regs[rregNo].1);
913             per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env);
914             let rreg = reg_universe.regs[rregNo].0;
915             vlr_env[curr_vlrix].rreg = Some(rreg);
916             // We're done!
917             continue 'main_allocation_loop;
918         }
919 
920         // Still no luck.  We can't find a register to put it in, so we'll
921         // have to spill it, since splitting it isn't yet implemented.
922         debug!("--   spill");
923 
924         // If the live range already pertains to a spill or restore, then
925         // it's Game Over.  The constraints (availability of RealRegs vs
926         // requirement for them) are impossible to fulfill, and so we cannot
927         // generate any valid allocation for this function.
928         if curr_vlr.spill_cost.is_infinite() {
929             return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass));
930         }
931 
932         // Generate a new spill slot number, S
933         /* Spilling vreg V with virtual live range VirtualRange to slot S:
934               for each frag F in VirtualRange {
935                  for each insn I in F.first.iix .. F.last.iix {
936                     if I does not mention V
937                        continue
938                     if I mentions V in a Read role {
939                        // invar: I cannot mention V in a Mod role
940                        add new VirtualRange I.reload -> I.use,
941                                             vreg V, spillcost Inf
942                        add eli @ I.reload V SpillSlot
943                     }
944                     if I mentions V in a Mod role {
945                        // invar: I cannot mention V in a Read or Write Role
946                        add new VirtualRange I.reload -> I.spill,
947                                             Vreg V, spillcost Inf
948                        add eli @ I.reload V SpillSlot
949                        add eli @ I.spill  SpillSlot V
950                     }
951                     if I mentions V in a Write role {
952                        // invar: I cannot mention V in a Mod role
953                        add new VirtualRange I.def -> I.spill,
954                                             vreg V, spillcost Inf
955                        add eli @ I.spill V SpillSlot
956                     }
957                  }
958               }
959         */
960 
961         // We will be spilling vreg `curr_vlr.reg` with VirtualRange `curr_vlr` to
962         // a spill slot that the spill slot allocator will choose for us, just a
963         // bit further down this function.
964 
965         // This holds enough info to create reload or spill (or both)
966         // instructions around an instruction that references a VirtualReg
967         // that has been spilled.
968         struct SpillAndOrReloadInfo {
969             bix: BlockIx,     // that `iix` is in
970             iix: InstIx,      // this is the Inst we are spilling/reloading for
971             kind: BridgeKind, // says whether to create a spill or reload or both
972         }
973         let mut sri_vec = Vec::<SpillAndOrReloadInfo>::new();
974         let curr_vlr_vreg = curr_vlr.vreg;
975         let curr_vlr_reg = curr_vlr_vreg.to_reg();
976 
977         for frag in &curr_vlr.sorted_frags.frags {
978             for iix in frag.first.iix().dotdot(frag.last.iix().plus(1)) {
979                 let (iix_uses_curr_vlr_reg, iix_defs_curr_vlr_reg, iix_mods_curr_vlr_reg) =
980                     does_inst_use_def_or_mod_reg(&reg_vecs_and_bounds, iix, curr_vlr_reg);
981                 // If this insn doesn't mention the vreg we're spilling for,
982                 // move on.
983                 if !iix_defs_curr_vlr_reg && !iix_mods_curr_vlr_reg && !iix_uses_curr_vlr_reg {
984                     continue;
985                 }
986                 // USES: Do we need to create a reload-to-use bridge
987                 // (VirtualRange) ?
988                 if iix_uses_curr_vlr_reg && frag.contains(&InstPoint::new_use(iix)) {
989                     debug_assert!(!iix_mods_curr_vlr_reg);
990                     // Stash enough info that we can create a new VirtualRange
991                     // and a new edit list entry for the reload.
992                     let bix = inst_to_block_map.map(iix);
993                     let sri = SpillAndOrReloadInfo {
994                         bix,
995                         iix,
996                         kind: BridgeKind::RtoU,
997                     };
998                     sri_vec.push(sri);
999                 }
1000                 // MODS: Do we need to create a reload-to-spill bridge?  This
1001                 // can only happen for instructions which modify a register.
1002                 // Note this has to be a single VirtualRange, since if it were
1003                 // two (one for the reload, one for the spill) they could
1004                 // later end up being assigned to different RealRegs, which is
1005                 // obviously nonsensical.
1006                 if iix_mods_curr_vlr_reg
1007                     && frag.contains(&InstPoint::new_use(iix))
1008                     && frag.contains(&InstPoint::new_def(iix))
1009                 {
1010                     debug_assert!(!iix_uses_curr_vlr_reg);
1011                     debug_assert!(!iix_defs_curr_vlr_reg);
1012                     let bix = inst_to_block_map.map(iix);
1013                     let sri = SpillAndOrReloadInfo {
1014                         bix,
1015                         iix,
1016                         kind: BridgeKind::RtoS,
1017                     };
1018                     sri_vec.push(sri);
1019                 }
1020                 // DEFS: Do we need to create a def-to-spill bridge?
1021                 if iix_defs_curr_vlr_reg && frag.contains(&InstPoint::new_def(iix)) {
1022                     debug_assert!(!iix_mods_curr_vlr_reg);
1023                     let bix = inst_to_block_map.map(iix);
1024                     let sri = SpillAndOrReloadInfo {
1025                         bix,
1026                         iix,
1027                         kind: BridgeKind::DtoS,
1028                     };
1029                     sri_vec.push(sri);
1030                 }
1031             }
1032         }
1033 
1034         // Now that we no longer need to access `frag_env` or `vlr_env` for
1035         // the remainder of this iteration of the main allocation loop, we can
1036         // actually generate the required spill/reload artefacts.
1037 
1038         // First off, poke the spill slot allocator to get an intelligent choice
1039         // of slot.  Note that this will fail for "non-initial" VirtualRanges; but
1040         // the only non-initial ones will have been created by spilling anyway,
1041         // any we definitely shouldn't be trying to spill them again.  Hence we
1042         // can assert.
1043         assert!(vlr_slot_env.len() == num_vlrs_initial);
1044         assert!(curr_vlrix < VirtualRangeIx::new(num_vlrs_initial));
1045         if vlr_slot_env[curr_vlrix].is_none() {
1046             // It hasn't been decided yet.  Cause it to be so by asking for an
1047             // allocation for the entire eclass that `curr_vlrix` belongs to.
1048             spill_slot_allocator.alloc_spill_slots(
1049                 &mut vlr_slot_env,
1050                 func,
1051                 &vlr_env,
1052                 &vlrEquivClasses,
1053                 curr_vlrix,
1054             );
1055             assert!(vlr_slot_env[curr_vlrix].is_some());
1056         }
1057         let spill_slot_to_use = vlr_slot_env[curr_vlrix].unwrap();
1058 
1059         for sri in sri_vec {
1060             let (new_vlr_first_pt, new_vlr_last_pt) = match sri.kind {
1061                 BridgeKind::RtoU => (Point::Reload, Point::Use),
1062                 BridgeKind::RtoS => (Point::Reload, Point::Spill),
1063                 BridgeKind::DtoS => (Point::Def, Point::Spill),
1064             };
1065             let new_vlr_frag = RangeFrag {
1066                 first: InstPoint::new(sri.iix, new_vlr_first_pt),
1067                 last: InstPoint::new(sri.iix, new_vlr_last_pt),
1068             };
1069             debug!("--     new RangeFrag    {:?}", &new_vlr_frag);
1070             let new_vlr_sfrags = SortedRangeFrags::unit(new_vlr_frag);
1071             let new_vlr = VirtualRange {
1072                 vreg: curr_vlr_vreg,
1073                 rreg: None,
1074                 sorted_frags: new_vlr_sfrags,
1075                 size: 1,
1076                 // Effectively infinite.  We'll never look at this again anyway.
1077                 total_cost: 0xFFFF_FFFFu32,
1078                 spill_cost: SpillCost::infinite(),
1079             };
1080             let new_vlrix = VirtualRangeIx::new(vlr_env.len() as u32);
1081             debug!(
1082                 "--     new VirtRange    {:?}  :=  {:?}",
1083                 new_vlrix, &new_vlr
1084             );
1085             vlr_env.push(new_vlr);
1086             prioQ.add_VirtualRange(&vlr_env, new_vlrix);
1087 
1088             // BEGIN (optimisation only) see if we can create any coalescing hints
1089             // for this new VLR.
1090             let mut new_vlr_hint = SmallVec::<[Hint; 8]>::new();
1091             if is_vv_boundary_move[sri.iix] {
1092                 // Collect the src and dst regs for the move.  It must be a
1093                 // move because `is_vv_boundary_move` claims that to be true.
1094                 let im = func.is_move(&func.get_insn(sri.iix));
1095                 assert!(im.is_some());
1096                 let (wdst_reg, src_reg): (Writable<Reg>, Reg) = im.unwrap();
1097                 let dst_reg: Reg = wdst_reg.to_reg();
1098                 assert!(src_reg.is_virtual() && dst_reg.is_virtual());
1099                 let dst_vreg: VirtualReg = dst_reg.to_virtual_reg();
1100                 let src_vreg: VirtualReg = src_reg.to_virtual_reg();
1101                 let bridge_eef = est_freqs[sri.bix];
1102                 match sri.kind {
1103                     BridgeKind::RtoU => {
1104                         // Reload-to-Use bridge.  Hint that we want to be
1105                         // allocated to the same reg as the destination of the
1106                         // move.  That means we have to find the VLR that owns
1107                         // the destination vreg.
1108                         for vlrix in &vreg_to_vlrs_map[dst_vreg.get_index()] {
1109                             if vlr_env[*vlrix].vreg == dst_vreg {
1110                                 new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
1111                                 break;
1112                             }
1113                         }
1114                     }
1115                     BridgeKind::DtoS => {
1116                         // Def-to-Spill bridge.  Hint that we want to be
1117                         // allocated to the same reg as the source of the
1118                         // move.
1119                         for vlrix in &vreg_to_vlrs_map[src_vreg.get_index()] {
1120                             if vlr_env[*vlrix].vreg == src_vreg {
1121                                 new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef));
1122                                 break;
1123                             }
1124                         }
1125                     }
1126                     BridgeKind::RtoS => {
1127                         // A Reload-to-Spill bridge.  This can't happen.  It
1128                         // implies that the instruction modifies (both reads
1129                         // and writes) one of its operands, but the client's
1130                         // `is_move` function claims this instruction is a
1131                         // plain "move" (reads source, writes dest).  These
1132                         // claims are mutually contradictory.
1133                         panic!("RtoS bridge for v-v boundary move");
1134                     }
1135                 }
1136             }
1137             hints.push(new_vlr_hint);
1138             // END see if we can create any coalescing hints for this new VLR.
1139 
1140             // Finally, create a new EditListItem.  This holds enough
1141             // information that a suitable spill or reload instruction can
1142             // later be created.
1143             let new_eli = EditListItem {
1144                 slot: spill_slot_to_use,
1145                 vlrix: new_vlrix,
1146                 kind: sri.kind,
1147                 iix: sri.iix,
1148             };
1149             if is_vv_boundary_move[sri.iix] {
1150                 debug!("--     new ELI MOVE     {:?}", &new_eli);
1151                 edit_list_move.push(new_eli);
1152             } else {
1153                 debug!("--     new ELI other    {:?}", &new_eli);
1154                 edit_list_other.push(new_eli);
1155             }
1156         }
1157 
1158         num_vlrs_spilled += 1;
1159         // And implicitly "continue 'main_allocation_loop"
1160     }
1161     // ======== END Main allocation loop ========
1162 
1163     info!("alloc_main:   main allocation loop: end");
1164 
1165     if log_enabled!(Level::Debug) {
1166         debug!("");
1167         print_RA_state(
1168             "Final",
1169             &reg_universe,
1170             &prioQ,
1171             &per_real_reg,
1172             &edit_list_move,
1173             &edit_list_other,
1174             &vlr_env,
1175             &frag_env,
1176         );
1177     }
1178 
1179     // ======== BEGIN Do spill slot coalescing ========
1180 
1181     debug!("");
1182     info!("alloc_main:   create spills_n_reloads for MOVE insns");
1183 
1184     // Sort `edit_list_move` by the insn with which each item is associated.
1185     edit_list_move.sort_unstable_by(|eli1, eli2| eli1.iix.cmp(&eli2.iix));
1186 
1187     // Now go through `edit_list_move` and find pairs which constitute a
1188     // spillslot-to-the-same-spillslot move.  What we have in `edit_list_move` is
1189     // heavily constrained, as follows:
1190     //
1191     // * each entry should reference an InstIx which the coalescing analysis
1192     //   identified as a virtual-to-virtual copy which exists at the boundary
1193     //   between two VirtualRanges.  The "boundary" aspect is important; we
1194     //   can't coalesce out moves in which the source vreg is not the "last use"
1195     //   or for which the destination vreg is not the "first def".  (The same is
1196     //   true for coalescing of unspilled moves).
1197     //
1198     // * the each entry must reference a VirtualRange which has only a single
1199     //   RangeFrag, and that frag must exist entirely "within" the referenced
1200     //   instruction.  Specifically, it may only be a R->U frag (bridge) or a
1201     //   D->S frag.
1202     //
1203     // * For a referenced instruction, there may be at most two entries in this
1204     //   list: one that references the R->U frag and one that references the
1205     //   D->S frag.  Furthermore, the two entries must carry values of the same
1206     //   RegClass; if that isn't true, the client's `is_move` function is
1207     //   defective.
1208     //
1209     // For any such pair identified, if both frags mention the same spill slot,
1210     // we skip generating both the reload and the spill instruction.  We also
1211     // note that the instruction itself is to be deleted (converted to a
1212     // zero-len nop).  In a sense we have "cancelled out" a reload/spill pair.
1213     // Entries that can't be cancelled out are handled the same way as for
1214     // entries in `edit_list_other`, by simply copying them there.
1215     //
1216     // Since `edit_list_move` is sorted by insn ix, we can scan linearly over
1217     // it, looking for adjacent pairs.  We'll have to accept them in either
1218     // order though (first R->U then D->S, or the other way round).  There's no
1219     // fixed ordering since there is no particular ordering in the way
1220     // VirtualRanges are allocated.
1221 
1222     // As a result of spill slot coalescing, we'll need to delete the move
1223     // instructions leading to a mergable spill slot move.  The insn stream
1224     // editor can't delete instructions, so instead it'll replace them with zero
1225     // len nops obtained from the client.  `iixs_to_nop_out` records the insns
1226     // that this has to happen to.  It is in increasing order of InstIx (because
1227     // `edit_list_sorted` is), and the indices in it refer to the original
1228     // virtual-registerised code.
1229     let mut iixs_to_nop_out = Vec::<InstIx>::new();
1230 
1231     let n_edit_list_move = edit_list_move.len();
1232     let mut n_edit_list_move_processed = 0; // for assertions only
1233     let mut i_min = 0;
1234     loop {
1235         if i_min >= n_edit_list_move {
1236             break;
1237         }
1238         // Find the bounds of the current group.
1239         debug!("editlist entry (MOVE): min: {:?}", &edit_list_move[i_min]);
1240         let i_min_iix = edit_list_move[i_min].iix;
1241         let mut i_max = i_min;
1242         while i_max + 1 < n_edit_list_move && edit_list_move[i_max + 1].iix == i_min_iix {
1243             i_max += 1;
1244             debug!("editlist entry (MOVE): max: {:?}", &edit_list_move[i_max]);
1245         }
1246         // Current group is from i_min to i_max inclusive.  At most 2 entries are
1247         // allowed per group.
1248         assert!(i_max - i_min <= 1);
1249         // Check for a mergeable pair.
1250         if i_max - i_min == 1 {
1251             assert!(is_vv_boundary_move[i_min_iix]);
1252             let vlrix1 = edit_list_move[i_min].vlrix;
1253             let vlrix2 = edit_list_move[i_max].vlrix;
1254             assert!(vlrix1 != vlrix2);
1255             let vlr1 = &vlr_env[vlrix1];
1256             let vlr2 = &vlr_env[vlrix2];
1257             let frags1 = &vlr1.sorted_frags;
1258             let frags2 = &vlr2.sorted_frags;
1259             assert!(frags1.frags.len() == 1);
1260             assert!(frags2.frags.len() == 1);
1261             let frag1 = &frags1.frags[0];
1262             let frag2 = &frags2.frags[0];
1263             assert!(frag1.first.iix() == i_min_iix);
1264             assert!(frag1.last.iix() == i_min_iix);
1265             assert!(frag2.first.iix() == i_min_iix);
1266             assert!(frag2.last.iix() == i_min_iix);
1267             // frag1 must be R->U and frag2 must be D->S, or vice versa
1268             match (
1269                 frag1.first.pt(),
1270                 frag1.last.pt(),
1271                 frag2.first.pt(),
1272                 frag2.last.pt(),
1273             ) {
1274                 (Point::Reload, Point::Use, Point::Def, Point::Spill)
1275                 | (Point::Def, Point::Spill, Point::Reload, Point::Use) => {
1276                     let slot1 = edit_list_move[i_min].slot;
1277                     let slot2 = edit_list_move[i_max].slot;
1278                     if slot1 == slot2 {
1279                         // Yay.  We've found a coalescable pair.  We can just ignore the
1280                         // two entries and move on.  Except we have to mark the insn
1281                         // itself for deletion.
1282                         debug!("editlist entry (MOVE): delete {:?}", i_min_iix);
1283                         iixs_to_nop_out.push(i_min_iix);
1284                         i_min = i_max + 1;
1285                         n_edit_list_move_processed += 2;
1286                         continue;
1287                     }
1288                 }
1289                 (_, _, _, _) => {
1290                     panic!("spill slot coalescing, edit_list_move: unexpected frags");
1291                 }
1292             }
1293         }
1294         // If we get here for whatever reason, this group is uninteresting.  Copy
1295         // in to `edit_list_other` so that it gets processed in the normal way.
1296         for i in i_min..=i_max {
1297             edit_list_other.push(edit_list_move[i]);
1298             n_edit_list_move_processed += 1;
1299         }
1300         i_min = i_max + 1;
1301     }
1302     assert!(n_edit_list_move_processed == n_edit_list_move);
1303 
1304     // ======== END Do spill slot coalescing ========
1305 
1306     // ======== BEGIN Create all other spills and reloads ========
1307 
1308     debug!("");
1309     info!("alloc_main:   create spills_n_reloads for other insns");
1310 
1311     // Reload and spill instructions are missing.  To generate them, go through
1312     // the "edit list", which contains info on both how to generate the
1313     // instructions, and where to insert them.
1314     let mut spills_n_reloads = Vec::<InstToInsertAndPoint>::new();
1315     let mut num_spills = 0; // stats only
1316     let mut num_reloads = 0; // stats only
1317     for eli in &edit_list_other {
1318         debug!("editlist entry (other): {:?}", eli);
1319         let vlr = &vlr_env[eli.vlrix];
1320         let vlr_sfrags = &vlr.sorted_frags;
1321         assert!(vlr_sfrags.frags.len() == 1);
1322         let vlr_frag = &vlr_sfrags.frags[0];
1323         let rreg = vlr.rreg.expect("Gen of spill/reload: reg not assigned?!");
1324         let vreg = vlr.vreg;
1325         match eli.kind {
1326             BridgeKind::RtoU => {
1327                 debug_assert!(vlr_frag.first.pt().is_reload());
1328                 debug_assert!(vlr_frag.last.pt().is_use());
1329                 debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
1330                 let insnR = InstToInsert::Reload {
1331                     to_reg: Writable::from_reg(rreg),
1332                     from_slot: eli.slot,
1333                     for_vreg: vreg,
1334                 };
1335                 let whereToR = vlr_frag.first;
1336                 spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
1337                 num_reloads += 1;
1338             }
1339             BridgeKind::RtoS => {
1340                 debug_assert!(vlr_frag.first.pt().is_reload());
1341                 debug_assert!(vlr_frag.last.pt().is_spill());
1342                 debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
1343                 let insnR = InstToInsert::Reload {
1344                     to_reg: Writable::from_reg(rreg),
1345                     from_slot: eli.slot,
1346                     for_vreg: vreg,
1347                 };
1348                 let whereToR = vlr_frag.first;
1349                 let insnS = InstToInsert::Spill {
1350                     to_slot: eli.slot,
1351                     from_reg: rreg,
1352                     for_vreg: vreg,
1353                 };
1354                 let whereToS = vlr_frag.last;
1355                 spills_n_reloads.push(InstToInsertAndPoint::new(insnR, whereToR));
1356                 spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
1357                 num_reloads += 1;
1358                 num_spills += 1;
1359             }
1360             BridgeKind::DtoS => {
1361                 debug_assert!(vlr_frag.first.pt().is_def());
1362                 debug_assert!(vlr_frag.last.pt().is_spill());
1363                 debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix());
1364                 let insnS = InstToInsert::Spill {
1365                     to_slot: eli.slot,
1366                     from_reg: rreg,
1367                     for_vreg: vreg,
1368                 };
1369                 let whereToS = vlr_frag.last;
1370                 spills_n_reloads.push(InstToInsertAndPoint::new(insnS, whereToS));
1371                 num_spills += 1;
1372             }
1373         }
1374     }
1375 
1376     // ======== END Create all other spills and reloads ========
1377 
1378     // ======== BEGIN Create final instruction stream ========
1379 
1380     // Gather up a vector of (RangeFrag, VirtualReg, RealReg) resulting from
1381     // the previous phase.  This fundamentally is the result of the allocation
1382     // and tells us how the instruction stream must be edited.  Note it does
1383     // not take account of spill or reload instructions.  Dealing with those
1384     // is relatively simple and happens later.
1385 
1386     info!("alloc_main:   create frag_map");
1387 
1388     let mut frag_map = Vec::<(RangeFrag, VirtualReg, RealReg)>::new();
1389     // For each real register under our control ..
1390     for i in 0..reg_universe.allocable {
1391         let rreg = reg_universe.regs[i].0;
1392         // .. look at all the VirtualRanges assigned to it.  And for each such
1393         // VirtualRange ..
1394         for vlrix_assigned in per_real_reg[i].vlrixs_assigned.iter() {
1395             let VirtualRange {
1396                 vreg, sorted_frags, ..
1397             } = &vlr_env[*vlrix_assigned];
1398             // All the RangeFrags in `vlr_assigned` require `vlr_assigned.reg`
1399             // to be mapped to the real reg `i`
1400             // .. collect up all its constituent RangeFrags.
1401             for frag in &sorted_frags.frags {
1402                 frag_map.push((frag.clone(), *vreg, rreg));
1403             }
1404         }
1405     }
1406 
1407     info!("alloc_main:   edit_inst_stream");
1408 
1409     let final_insns_and_targetmap__or_err = edit_inst_stream(
1410         func,
1411         spills_n_reloads,
1412         &iixs_to_nop_out,
1413         frag_map,
1414         &reg_universe,
1415         use_checker,
1416     );
1417 
1418     // ======== END Create final instruction stream ========
1419 
1420     // ======== BEGIN Create the RegAllocResult ========
1421 
1422     match final_insns_and_targetmap__or_err {
1423         Ok((ref final_insns, ..)) => {
1424             info!(
1425                 "alloc_main:   out: VLRs: {} initially, {} processed",
1426                 num_vlrs_initial, num_vlrs_processed
1427             );
1428             info!(
1429                 "alloc_main:   out: VLRs: {} evicted, {} spilled",
1430                 num_vlrs_evicted, num_vlrs_spilled
1431             );
1432             info!(
1433                 "alloc_main:   out: insns: {} total, {} spills, {} reloads, {} nopzs",
1434                 final_insns.len(),
1435                 num_spills,
1436                 num_reloads,
1437                 iixs_to_nop_out.len()
1438             );
1439             info!(
1440                 "alloc_main:   out: spill slots: {} used",
1441                 spill_slot_allocator.num_slots_in_use()
1442             );
1443         }
1444         Err(_) => {
1445             info!("alloc_main:   allocation failed!");
1446         }
1447     }
1448 
1449     let (final_insns, target_map, orig_insn_map) = match final_insns_and_targetmap__or_err {
1450         Err(e) => {
1451             info!("alloc_main: fail");
1452             return Err(e);
1453         }
1454         Ok(pair) => {
1455             info!("alloc_main:   creating RegAllocResult");
1456             pair
1457         }
1458     };
1459 
1460     // Compute clobbered registers with one final, quick pass.
1461     //
1462     // FIXME: derive this information directly from the allocation data
1463     // structures used above.
1464     //
1465     // NB at this point, the `san_reg_uses` that was computed in the analysis
1466     // phase is no longer valid, because we've added and removed instructions to
1467     // the function relative to the one that `san_reg_uses` was computed from,
1468     // so we have to re-visit all insns with `add_raw_reg_vecs_for_insn`.
1469     // That's inefficient, but we don't care .. this should only be a temporary
1470     // fix.
1471 
1472     let mut clobbered_registers: Set<RealReg> = Set::empty();
1473 
1474     // We'll dump all the reg uses in here.  We don't care the bounds, so just
1475     // pass a dummy one in the loop.
1476     let mut reg_vecs = RegVecs::new(/*sanitized=*/ false);
1477     let mut dummy_bounds = RegVecBounds::new();
1478     for insn in &final_insns {
1479         add_raw_reg_vecs_for_insn::<F>(insn, &mut reg_vecs, &mut dummy_bounds);
1480     }
1481     for reg in reg_vecs.defs.iter().chain(reg_vecs.mods.iter()) {
1482         assert!(reg.is_real());
1483         clobbered_registers.insert(reg.to_real_reg());
1484     }
1485 
1486     // And now remove from the set, all those not available to the allocator.
1487     // But not removing the reserved regs, since we might have modified those.
1488     clobbered_registers.filter_map(|&reg| {
1489         if reg.get_index() >= reg_universe.allocable {
1490             None
1491         } else {
1492             Some(reg)
1493         }
1494     });
1495 
1496     assert!(est_freqs.len() as usize == func.blocks().len());
1497     let mut block_annotations = None;
1498     if opts.request_block_annotations {
1499         let mut anns = TypedIxVec::<BlockIx, Vec<String>>::new();
1500         for (estFreq, i) in est_freqs.iter().zip(0..) {
1501             let bix = BlockIx::new(i);
1502             let ef_str = format!("RA: bix {:?}, estFreq {}", bix, estFreq);
1503             anns.push(vec![ef_str]);
1504         }
1505         block_annotations = Some(anns);
1506     }
1507 
1508     let ra_res = RegAllocResult {
1509         insns: final_insns,
1510         target_map,
1511         orig_insn_map,
1512         clobbered_registers,
1513         num_spill_slots: spill_slot_allocator.num_slots_in_use() as u32,
1514         block_annotations,
1515     };
1516 
1517     info!("alloc_main: end");
1518 
1519     // ======== END Create the RegAllocResult ========
1520 
1521     Ok(ra_res)
1522 }
1523