1 //! Performs dataflow and liveness analysis, including live range construction.
2 
3 use log::{debug, info, log_enabled, Level};
4 use smallvec::{smallvec, SmallVec};
5 use std::cmp::min;
6 use std::fmt;
7 
8 use crate::analysis_control_flow::CFGInfo;
9 use crate::data_structures::{
10     BlockIx, InstIx, InstPoint, MoveInfo, MoveInfoElem, Point, Queue, RangeFrag, RangeFragIx,
11     RangeFragKind, RangeFragMetrics, RealRange, RealRangeIx, RealReg, RealRegUniverse, Reg,
12     RegClass, RegSets, RegToRangesMaps, RegUsageCollector, RegVecBounds, RegVecs, RegVecsAndBounds,
13     SortedRangeFragIxs, SortedRangeFrags, SpillCost, TypedIxVec, VirtualRange, VirtualRangeIx,
14     VirtualReg,
15 };
16 use crate::sparse_set::SparseSet;
17 use crate::union_find::{ToFromU32, UnionFind};
18 use crate::Function;
19 
20 //===========================================================================//
21 //                                                                           //
22 // DATA FLOW AND LIVENESS ANALYSIS                                           //
23 //                                                                           //
24 //===========================================================================//
25 
26 //=============================================================================
27 // Data flow analysis: extraction and sanitization of reg-use information: low
28 // level interface
29 
30 // === The meaning of "sanitization" ===
31 //
32 // The meaning of "sanitization" is as follows.  Incoming virtual-registerised
33 // code may mention a mixture of virtual and real registers.  Those real
34 // registers may include some which aren't available for the allocators to
35 // use.  Rather than scatter ad-hoc logic all over the analysis phase and the
36 // allocators, we simply remove all non-available real registers from the
37 // per-instruction use/def/mod sets.  The effect is that, after this point, we
38 // can operate on the assumption that any register we come across is either a
39 // virtual register or a real register available to the allocator.
40 //
41 // A real register is available to the allocator iff its index number is less
42 // than `RealRegUniverse.allocable`.
43 //
44 // Furthermore, it is not allowed that any incoming instruction mentions one
45 // of the per-class scratch registers listed in
46 // `RealRegUniverse.allocable_by_class[..].suggested_scratch` in either a use
47 // or mod role.  Sanitisation will also detect this case and return an error.
48 // Mentions of a scratch register in a def role are tolerated; however, since
49 // no instruction may use or modify a scratch register, all such writes are
50 // dead..
51 //
52 // In all of the above, "mentions" of a real register really means "uses,
53 // defines or modifications of said register".  It doesn't matter whether the
54 // instruction explicitly mentions the register or whether it is an implicit
55 // mention (eg, %cl in x86 shift-by-a-variable-amount instructions).  In other
56 // words, a "mention" is any use, def or mod as detected by the client's
57 // `get_regs` routine.
58 
59 // === Filtering of register groups in `RegVec`s ===
60 //
61 // Filtering on a group is done by leaving the start point unchanged, sliding
62 // back retained registers to fill the holes from non-retained registers, and
63 // reducing the group length accordingly.  The effect is to effectively "leak"
64 // some registers in the group, but that's not a problem.
65 //
66 // Extraction of register usages for the whole function is done by
67 // `get_sanitized_reg_uses_for_func`.  For each instruction, their used,
68 // defined and modified register sets are acquired by calling the client's
69 // `get_regs` function.  Then each of those three sets are cleaned up as
70 // follows:
71 //
72 // (1) duplicates are removed (after which they really are sets)
73 //
74 // (2) any registers in the modified set are removed from the used and defined
75 //     sets.  This enforces the invariant that
76 //    `intersect(modified, union(used, defined))` is the empty set.  Live range
77 //    fragment computation (get_range_frags_for_block) depends on this property.
78 //
79 // (3) real registers unavailable to the allocator are removed, per the
80 //     abovementioned sanitization rules.
81 
82 // ==== LOCAL FN ====
83 // Given a register group in `regs[start, +len)`, remove duplicates from the
84 // group.  The new group size is written to `*len`.
85 #[inline(never)]
remove_dups_from_group(regs: &mut Vec<Reg>, start: u32, len: &mut u8)86 fn remove_dups_from_group(regs: &mut Vec<Reg>, start: u32, len: &mut u8) {
87     // First sort the group, to facilitate de-duplication.
88     regs[start as usize..start as usize + *len as usize].sort_unstable();
89 
90     // Now make a compacting pass over the group.  'rd' = read point in the
91     // group, 'wr' = write point in the group.
92     let mut wr = start as usize;
93     for rd in start as usize..start as usize + *len as usize {
94         let reg = regs[rd];
95         if rd == start as usize || regs[rd - 1] != reg {
96             // It's not a duplicate.
97             if wr != rd {
98                 regs[wr] = reg;
99             }
100             wr += 1;
101         }
102     }
103 
104     let new_len_usize = wr - start as usize;
105     assert!(new_len_usize <= *len as usize);
106     // This narrowing is safe because the old `len` fitted in 8 bits.
107     *len = new_len_usize as u8;
108 }
109 
110 // ==== LOCAL FN ====
111 // Remove from `group[group_start, +group_len)` any registers mentioned in
112 // `mods[mods_start, +mods_len)`, and update `*group_len` accordingly.
113 #[inline(never)]
remove_mods_from_group( group: &mut Vec<Reg>, group_start: u32, group_len: &mut u8, mods: &Vec<Reg>, mods_start: u32, mods_len: u8, )114 fn remove_mods_from_group(
115     group: &mut Vec<Reg>,
116     group_start: u32,
117     group_len: &mut u8,
118     mods: &Vec<Reg>,
119     mods_start: u32,
120     mods_len: u8,
121 ) {
122     let mut wr = group_start as usize;
123     for rd in group_start as usize..group_start as usize + *group_len as usize {
124         let reg = group[rd];
125         // Only retain `reg` if it is not mentioned in `mods[mods_start, +mods_len)`
126         let mut retain = true;
127         for i in mods_start as usize..mods_start as usize + mods_len as usize {
128             if reg == mods[i] {
129                 retain = false;
130                 break;
131             }
132         }
133         if retain {
134             if wr != rd {
135                 group[wr] = reg;
136             }
137             wr += 1;
138         }
139     }
140     let new_group_len_usize = wr - group_start as usize;
141     assert!(new_group_len_usize <= *group_len as usize);
142     // This narrowing is safe because the old `group_len` fitted in 8 bits.
143     *group_len = new_group_len_usize as u8;
144 }
145 
146 // ==== EXPORTED FN ====
147 // For instruction `inst`, add the register uses to the ends of `reg_vecs`,
148 // and write bounds information into `bounds`.  The register uses are raw
149 // (unsanitized) but they are guaranteed to be duplicate-free and also to have
150 // no `mod` mentions in the `use` or `def` groups.  That is, cleanups (1) and
151 // (2) above have been done.
152 #[inline(never)]
add_raw_reg_vecs_for_insn<F: Function>( inst: &F::Inst, reg_vecs: &mut RegVecs, bounds: &mut RegVecBounds, )153 pub fn add_raw_reg_vecs_for_insn<F: Function>(
154     inst: &F::Inst,
155     reg_vecs: &mut RegVecs,
156     bounds: &mut RegVecBounds,
157 ) {
158     bounds.uses_start = reg_vecs.uses.len() as u32;
159     bounds.defs_start = reg_vecs.defs.len() as u32;
160     bounds.mods_start = reg_vecs.mods.len() as u32;
161 
162     let mut collector = RegUsageCollector::new(reg_vecs);
163     F::get_regs(inst, &mut collector);
164 
165     let uses_len = collector.reg_vecs.uses.len() as u32 - bounds.uses_start;
166     let defs_len = collector.reg_vecs.defs.len() as u32 - bounds.defs_start;
167     let mods_len = collector.reg_vecs.mods.len() as u32 - bounds.mods_start;
168 
169     // This assertion is important -- the cleanup logic also depends on it.
170     assert!((uses_len | defs_len | mods_len) < 256);
171     bounds.uses_len = uses_len as u8;
172     bounds.defs_len = defs_len as u8;
173     bounds.mods_len = mods_len as u8;
174 
175     // First, de-dup the three new groups.
176     if bounds.uses_len > 0 {
177         remove_dups_from_group(
178             &mut collector.reg_vecs.uses,
179             bounds.uses_start,
180             &mut bounds.uses_len,
181         );
182     }
183     if bounds.defs_len > 0 {
184         remove_dups_from_group(
185             &mut collector.reg_vecs.defs,
186             bounds.defs_start,
187             &mut bounds.defs_len,
188         );
189     }
190     if bounds.mods_len > 0 {
191         remove_dups_from_group(
192             &mut collector.reg_vecs.mods,
193             bounds.mods_start,
194             &mut bounds.mods_len,
195         );
196     }
197 
198     // And finally, remove modified registers from the set of used and defined
199     // registers, so we don't have to make the client do so.
200     if bounds.mods_len > 0 {
201         if bounds.uses_len > 0 {
202             remove_mods_from_group(
203                 &mut collector.reg_vecs.uses,
204                 bounds.uses_start,
205                 &mut bounds.uses_len,
206                 &collector.reg_vecs.mods,
207                 bounds.mods_start,
208                 bounds.mods_len,
209             );
210         }
211         if bounds.defs_len > 0 {
212             remove_mods_from_group(
213                 &mut collector.reg_vecs.defs,
214                 bounds.defs_start,
215                 &mut bounds.defs_len,
216                 &collector.reg_vecs.mods,
217                 bounds.mods_start,
218                 bounds.mods_len,
219             );
220         }
221     }
222 }
223 
224 // ==== LOCAL FN ====
225 // This is the fundamental keep-or-don't-keep? predicate for sanitization.  To
226 // do this exactly right we also need to know whether the register is
227 // mentioned in a def role (as opposed to a use or mod role).  Note that this
228 // function can fail, and the error must be propagated.
229 #[inline(never)]
sanitize_should_retain_reg( reg_universe: &RealRegUniverse, reg: Reg, reg_is_defd: bool, ) -> Result<bool, RealReg>230 fn sanitize_should_retain_reg(
231     reg_universe: &RealRegUniverse,
232     reg: Reg,
233     reg_is_defd: bool,
234 ) -> Result<bool, RealReg> {
235     // Retain all virtual regs.
236     if reg.is_virtual() {
237         return Ok(true);
238     }
239 
240     // So it's a RealReg.
241     let rreg_ix = reg.get_index();
242 
243     // Check that this RealReg is mentioned in the universe.
244     if rreg_ix >= reg_universe.regs.len() {
245         // This is a serious error which should be investigated.  It means the
246         // client gave us an instruction which mentions a RealReg which isn't
247         // listed in the RealRegUniverse it gave us.  That's not allowed.
248         return Err(reg.as_real_reg().unwrap());
249     }
250 
251     // Discard all real regs that aren't available to the allocator.
252     if rreg_ix >= reg_universe.allocable {
253         return Ok(false);
254     }
255 
256     // It isn't allowed for the client to give us an instruction which reads or
257     // modifies one of the scratch registers.  It is however allowed to write a
258     // scratch register.
259     for reg_info in &reg_universe.allocable_by_class {
260         if let Some(reg_info) = reg_info {
261             if let Some(scratch_idx) = &reg_info.suggested_scratch {
262                 let scratch_reg = reg_universe.regs[*scratch_idx].0;
263                 if reg.to_real_reg() == scratch_reg {
264                     if !reg_is_defd {
265                         // This is an error (on the part of the client).
266                         return Err(reg.as_real_reg().unwrap());
267                     }
268                 }
269             }
270         }
271     }
272 
273     // `reg` is mentioned in the universe, is available to the allocator, and if
274     // it is one of the scratch regs, it is only written, not read or modified.
275     Ok(true)
276 }
277 // END helper fn
278 
279 // ==== LOCAL FN ====
280 // Given a register group in `regs[start, +len)`, sanitize the group.  To do
281 // this exactly right we also need to know whether the registers in the group
282 // are mentioned in def roles (as opposed to use or mod roles).  Sanitisation
283 // can fail, in which case we must propagate the error.  If it is successful,
284 // the new group size is written to `*len`.
285 #[inline(never)]
sanitize_group( reg_universe: &RealRegUniverse, regs: &mut Vec<Reg>, start: u32, len: &mut u8, is_def_group: bool, ) -> Result<(), RealReg>286 fn sanitize_group(
287     reg_universe: &RealRegUniverse,
288     regs: &mut Vec<Reg>,
289     start: u32,
290     len: &mut u8,
291     is_def_group: bool,
292 ) -> Result<(), RealReg> {
293     // Make a single compacting pass over the group.  'rd' = read point in the
294     // group, 'wr' = write point in the group.
295     let mut wr = start as usize;
296     for rd in start as usize..start as usize + *len as usize {
297         let reg = regs[rd];
298         // This call can fail:
299         if sanitize_should_retain_reg(reg_universe, reg, is_def_group)? {
300             if wr != rd {
301                 regs[wr] = reg;
302             }
303             wr += 1;
304         }
305     }
306 
307     let new_len_usize = wr - start as usize;
308     assert!(new_len_usize <= *len as usize);
309     // This narrowing is safe because the old `len` fitted in 8 bits.
310     *len = new_len_usize as u8;
311     Ok(())
312 }
313 
314 // ==== LOCAL FN ====
315 // For instruction `inst`, add the fully cleaned-up register uses to the ends
316 // of `reg_vecs`, and write bounds information into `bounds`.  Cleanups (1)
317 // (2) and (3) mentioned above have been done.  Note, this can fail, and the
318 // error must be propagated.
319 #[inline(never)]
add_san_reg_vecs_for_insn<F: Function>( inst: &F::Inst, reg_universe: &RealRegUniverse, reg_vecs: &mut RegVecs, bounds: &mut RegVecBounds, ) -> Result<(), RealReg>320 fn add_san_reg_vecs_for_insn<F: Function>(
321     inst: &F::Inst,
322     reg_universe: &RealRegUniverse,
323     reg_vecs: &mut RegVecs,
324     bounds: &mut RegVecBounds,
325 ) -> Result<(), RealReg> {
326     // Get the raw reg usages.  These will be dup-free and mod-cleaned-up
327     // (meaning cleanups (1) and (3) have been done).
328     add_raw_reg_vecs_for_insn::<F>(inst, reg_vecs, bounds);
329 
330     // Finally and sanitize them.  Any errors from sanitization are propagated.
331     if bounds.uses_len > 0 {
332         sanitize_group(
333             &reg_universe,
334             &mut reg_vecs.uses,
335             bounds.uses_start,
336             &mut bounds.uses_len,
337             /*is_def_group=*/ false,
338         )?;
339     }
340     if bounds.defs_len > 0 {
341         sanitize_group(
342             &reg_universe,
343             &mut reg_vecs.defs,
344             bounds.defs_start,
345             &mut bounds.defs_len,
346             /*is_def_group=*/ true,
347         )?;
348     }
349     if bounds.mods_len > 0 {
350         sanitize_group(
351             &reg_universe,
352             &mut reg_vecs.mods,
353             bounds.mods_start,
354             &mut bounds.mods_len,
355             /*is_def_group=*/ false,
356         )?;
357     }
358 
359     Ok(())
360 }
361 
362 // ==== MAIN FN ====
363 #[inline(never)]
get_sanitized_reg_uses_for_func<F: Function>( func: &F, reg_universe: &RealRegUniverse, ) -> Result<RegVecsAndBounds, RealReg>364 pub fn get_sanitized_reg_uses_for_func<F: Function>(
365     func: &F,
366     reg_universe: &RealRegUniverse,
367 ) -> Result<RegVecsAndBounds, RealReg> {
368     // These are modified by the per-insn loop.
369     let mut reg_vecs = RegVecs::new(false);
370     let mut bounds_vec = TypedIxVec::<InstIx, RegVecBounds>::new();
371     bounds_vec.reserve(func.insns().len());
372 
373     // For each insn, add their register uses to the ends of the 3 vectors in
374     // `reg_vecs`, and create an admin entry to describe the 3 new groups.  Any
375     // errors from sanitization are propagated.
376     for insn in func.insns() {
377         let mut bounds = RegVecBounds::new();
378         add_san_reg_vecs_for_insn::<F>(insn, &reg_universe, &mut reg_vecs, &mut bounds)?;
379 
380         bounds_vec.push(bounds);
381     }
382 
383     assert!(!reg_vecs.is_sanitized());
384     reg_vecs.set_sanitized(true);
385 
386     if log_enabled!(Level::Debug) {
387         let show_reg = |r: Reg| {
388             if r.is_real() {
389                 reg_universe.regs[r.get_index()].1.clone()
390             } else {
391                 format!("{:?}", r).to_string()
392             }
393         };
394         let show_regs = |r_vec: &[Reg]| {
395             let mut s = "".to_string();
396             for r in r_vec {
397                 s = s + &show_reg(*r) + &" ".to_string();
398             }
399             s
400         };
401 
402         for i in 0..bounds_vec.len() {
403             let iix = InstIx::new(i);
404             let s_use = show_regs(
405                 &reg_vecs.uses[bounds_vec[iix].uses_start as usize
406                     ..bounds_vec[iix].uses_start as usize + bounds_vec[iix].uses_len as usize],
407             );
408             let s_mod = show_regs(
409                 &reg_vecs.mods[bounds_vec[iix].mods_start as usize
410                     ..bounds_vec[iix].mods_start as usize + bounds_vec[iix].mods_len as usize],
411             );
412             let s_def = show_regs(
413                 &reg_vecs.defs[bounds_vec[iix].defs_start as usize
414                     ..bounds_vec[iix].defs_start as usize + bounds_vec[iix].defs_len as usize],
415             );
416             debug!(
417                 "{:?}  SAN_RU: use {{ {}}} mod {{ {}}} def {{ {}}}",
418                 iix, s_use, s_mod, s_def
419             );
420         }
421     }
422 
423     Ok(RegVecsAndBounds::new(reg_vecs, bounds_vec))
424 }
425 // END main function
426 
427 //=============================================================================
428 // Data flow analysis: extraction and sanitization of reg-use information:
429 // convenience interface
430 
431 // ==== EXPORTED ====
432 #[inline(always)]
does_inst_use_def_or_mod_reg( rvb: &RegVecsAndBounds, iix: InstIx, reg: Reg, ) -> ( bool, bool, bool)433 pub fn does_inst_use_def_or_mod_reg(
434     rvb: &RegVecsAndBounds,
435     iix: InstIx,
436     reg: Reg,
437 ) -> (/*uses*/ bool, /*defs*/ bool, /*mods*/ bool) {
438     let bounds = &rvb.bounds[iix];
439     let vecs = &rvb.vecs;
440     let mut uses = false;
441     let mut defs = false;
442     let mut mods = false;
443     // Since each group of registers is in order and duplicate-free (as a result
444     // of `remove_dups_from_group`), we could in theory binary-search here.  But
445     // it'd almost certainly be a net loss; the group sizes are very small,
446     // often zero.
447     for i in bounds.uses_start as usize..bounds.uses_start as usize + bounds.uses_len as usize {
448         if vecs.uses[i] == reg {
449             uses = true;
450             break;
451         }
452     }
453     for i in bounds.defs_start as usize..bounds.defs_start as usize + bounds.defs_len as usize {
454         if vecs.defs[i] == reg {
455             defs = true;
456             break;
457         }
458     }
459     for i in bounds.mods_start as usize..bounds.mods_start as usize + bounds.mods_len as usize {
460         if vecs.mods[i] == reg {
461             mods = true;
462             break;
463         }
464     }
465     (uses, defs, mods)
466 }
467 
468 // ==== EXPORTED ====
469 // This is slow, really slow.  Don't use it on critical paths.  This applies
470 // `get_regs` to `inst`, performs cleanups (1) and (2), but does not sanitize
471 // the results.  The results are wrapped up as Sets for convenience.
472 // JRS 2020Apr09: remove this if no further use for it appears soon.
473 #[allow(dead_code)]
474 #[inline(never)]
get_raw_reg_sets_for_insn<F: Function>(inst: &F::Inst) -> RegSets475 pub fn get_raw_reg_sets_for_insn<F: Function>(inst: &F::Inst) -> RegSets {
476     let mut reg_vecs = RegVecs::new(false);
477     let mut bounds = RegVecBounds::new();
478 
479     add_raw_reg_vecs_for_insn::<F>(inst, &mut reg_vecs, &mut bounds);
480 
481     // Make up a fake RegVecsAndBounds for just this insn, so we can hand it to
482     // RegVecsAndBounds::get_reg_sets_for_iix.
483     let mut single_insn_bounds = TypedIxVec::<InstIx, RegVecBounds>::new();
484     single_insn_bounds.push(bounds);
485 
486     assert!(!reg_vecs.is_sanitized());
487     let single_insn_rvb = RegVecsAndBounds::new(reg_vecs, single_insn_bounds);
488     single_insn_rvb.get_reg_sets_for_iix(InstIx::new(0))
489 }
490 
491 // ==== EXPORTED ====
492 // This is even slower.  This applies `get_regs` to `inst`, performs cleanups
493 // (1) (2) and (3).  The results are wrapped up as Sets for convenience.  Note
494 // this function can fail.
495 #[inline(never)]
get_san_reg_sets_for_insn<F: Function>( inst: &F::Inst, reg_universe: &RealRegUniverse, ) -> Result<RegSets, RealReg>496 pub fn get_san_reg_sets_for_insn<F: Function>(
497     inst: &F::Inst,
498     reg_universe: &RealRegUniverse,
499 ) -> Result<RegSets, RealReg> {
500     let mut reg_vecs = RegVecs::new(false);
501     let mut bounds = RegVecBounds::new();
502 
503     add_san_reg_vecs_for_insn::<F>(inst, &reg_universe, &mut reg_vecs, &mut bounds)?;
504 
505     // Make up a fake RegVecsAndBounds for just this insn, so we can hand it to
506     // RegVecsAndBounds::get_reg_sets_for_iix.
507     let mut single_insn_bounds = TypedIxVec::<InstIx, RegVecBounds>::new();
508     single_insn_bounds.push(bounds);
509 
510     assert!(!reg_vecs.is_sanitized());
511     reg_vecs.set_sanitized(true);
512     let single_insn_rvb = RegVecsAndBounds::new(reg_vecs, single_insn_bounds);
513     Ok(single_insn_rvb.get_reg_sets_for_iix(InstIx::new(0)))
514 }
515 
516 //=============================================================================
517 // Data flow analysis: calculation of per-block register def and use sets
518 
519 // Returned TypedIxVecs contain one element per block
520 #[inline(never)]
calc_def_and_use<F: Function>( func: &F, rvb: &RegVecsAndBounds, univ: &RealRegUniverse, ) -> ( TypedIxVec<BlockIx, SparseSet<Reg>>, TypedIxVec<BlockIx, SparseSet<Reg>>, )521 pub fn calc_def_and_use<F: Function>(
522     func: &F,
523     rvb: &RegVecsAndBounds,
524     univ: &RealRegUniverse,
525 ) -> (
526     TypedIxVec<BlockIx, SparseSet<Reg>>,
527     TypedIxVec<BlockIx, SparseSet<Reg>>,
528 ) {
529     info!("    calc_def_and_use: begin");
530     assert!(rvb.is_sanitized());
531     let mut def_sets = TypedIxVec::new();
532     let mut use_sets = TypedIxVec::new();
533     for b in func.blocks() {
534         let mut def = SparseSet::empty();
535         let mut uce = SparseSet::empty();
536         for iix in func.block_insns(b) {
537             let bounds_for_iix = &rvb.bounds[iix];
538             // Add to `uce`, any registers for which the first event in this block
539             // is a read.  Dealing with the "first event" constraint is a bit
540             // tricky.  In the next two loops, `u` and `m` is used (either read or
541             // modified) by the instruction.  Whether or not we should consider it
542             // live-in for the block depends on whether it was been written earlier
543             // in the block.  We can determine that by checking whether it is
544             // already in the def set for the block.
545             // FIXME: isn't thus just:
546             //   uce union= (regs_u minus def)   followed by
547             //   uce union= (regs_m minus def)
548             for i in bounds_for_iix.uses_start as usize
549                 ..bounds_for_iix.uses_start as usize + bounds_for_iix.uses_len as usize
550             {
551                 let u = rvb.vecs.uses[i];
552                 if !def.contains(u) {
553                     uce.insert(u);
554                 }
555             }
556             for i in bounds_for_iix.mods_start as usize
557                 ..bounds_for_iix.mods_start as usize + bounds_for_iix.mods_len as usize
558             {
559                 let m = rvb.vecs.mods[i];
560                 if !def.contains(m) {
561                     uce.insert(m);
562                 }
563             }
564 
565             // Now add to `def`, all registers written by the instruction.
566             // This is simpler.
567             // FIXME: isn't this just: def union= (regs_d union regs_m) ?
568             for i in bounds_for_iix.defs_start as usize
569                 ..bounds_for_iix.defs_start as usize + bounds_for_iix.defs_len as usize
570             {
571                 let d = rvb.vecs.defs[i];
572                 def.insert(d);
573             }
574             for i in bounds_for_iix.mods_start as usize
575                 ..bounds_for_iix.mods_start as usize + bounds_for_iix.mods_len as usize
576             {
577                 let m = rvb.vecs.mods[i];
578                 def.insert(m);
579             }
580         }
581         def_sets.push(def);
582         use_sets.push(uce);
583     }
584 
585     assert!(def_sets.len() == use_sets.len());
586 
587     if log_enabled!(Level::Debug) {
588         let mut n = 0;
589         debug!("");
590         for (def_set, use_set) in def_sets.iter().zip(use_sets.iter()) {
591             let mut first = true;
592             let mut defs_str = "".to_string();
593             for def in def_set.to_vec() {
594                 if !first {
595                     defs_str = defs_str + &" ".to_string();
596                 }
597                 first = false;
598                 defs_str = defs_str + &def.show_with_rru(univ);
599             }
600             first = true;
601             let mut uses_str = "".to_string();
602             for uce in use_set.to_vec() {
603                 if !first {
604                     uses_str = uses_str + &" ".to_string();
605                 }
606                 first = false;
607                 uses_str = uses_str + &uce.show_with_rru(univ);
608             }
609             debug!(
610                 "{:<3?}   def {{{}}}  use {{{}}}",
611                 BlockIx::new(n),
612                 defs_str,
613                 uses_str
614             );
615             n += 1;
616         }
617     }
618 
619     info!("    calc_def_and_use: end");
620     (def_sets, use_sets)
621 }
622 
623 //=============================================================================
624 // Data flow analysis: computation of per-block register live-in and live-out
625 // sets
626 
627 // Returned vectors contain one element per block
628 #[inline(never)]
calc_livein_and_liveout<F: Function>( func: &F, def_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>, use_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>, cfg_info: &CFGInfo, univ: &RealRegUniverse, ) -> ( TypedIxVec<BlockIx, SparseSet<Reg>>, TypedIxVec<BlockIx, SparseSet<Reg>>, )629 pub fn calc_livein_and_liveout<F: Function>(
630     func: &F,
631     def_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
632     use_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
633     cfg_info: &CFGInfo,
634     univ: &RealRegUniverse,
635 ) -> (
636     TypedIxVec<BlockIx, SparseSet<Reg>>,
637     TypedIxVec<BlockIx, SparseSet<Reg>>,
638 ) {
639     info!("    calc_livein_and_liveout: begin");
640     let num_blocks = func.blocks().len() as u32;
641     let empty = SparseSet::<Reg>::empty();
642 
643     let mut num_evals = 0;
644     let mut liveouts = TypedIxVec::<BlockIx, SparseSet<Reg>>::new();
645     liveouts.resize(num_blocks, empty.clone());
646 
647     // Initialise the work queue so as to do a reverse preorder traversal
648     // through the graph, after which blocks are re-evaluated on demand.
649     let mut work_queue = Queue::<BlockIx>::new();
650     for i in 0..num_blocks {
651         // block_ix travels in "reverse preorder"
652         let block_ix = cfg_info.pre_ord[(num_blocks - 1 - i) as usize];
653         work_queue.push_back(block_ix);
654     }
655 
656     // in_queue is an optimisation -- this routine works fine without it.  in_queue is
657     // used to avoid inserting duplicate work items in work_queue.  This avoids some
658     // number of duplicate re-evaluations and gets us to a fixed point faster.
659     // Very roughly, it reduces the number of evaluations per block from around
660     // 3 to around 2.
661     let mut in_queue = Vec::<bool>::new();
662     in_queue.resize(num_blocks as usize, true);
663 
664     while let Some(block_ix) = work_queue.pop_front() {
665         let i = block_ix.get() as usize;
666         assert!(in_queue[i]);
667         in_queue[i] = false;
668 
669         // Compute a new value for liveouts[block_ix]
670         let mut set = SparseSet::<Reg>::empty();
671         for block_j_ix in cfg_info.succ_map[block_ix].iter() {
672             let mut live_in_j = liveouts[*block_j_ix].clone();
673             live_in_j.remove(&def_sets_per_block[*block_j_ix]);
674             live_in_j.union(&use_sets_per_block[*block_j_ix]);
675             set.union(&live_in_j);
676         }
677         num_evals += 1;
678 
679         if !set.equals(&liveouts[block_ix]) {
680             liveouts[block_ix] = set;
681             // Add `block_ix`'s predecessors to the work queue, since their
682             // liveout values might be affected.
683             for block_j_ix in cfg_info.pred_map[block_ix].iter() {
684                 let j = block_j_ix.get() as usize;
685                 if !in_queue[j] {
686                     work_queue.push_back(*block_j_ix);
687                     in_queue[j] = true;
688                 }
689             }
690         }
691     }
692 
693     // The liveout values are done, but we need to compute the liveins
694     // too.
695     let mut liveins = TypedIxVec::<BlockIx, SparseSet<Reg>>::new();
696     liveins.resize(num_blocks, empty.clone());
697     for block_ix in BlockIx::new(0).dotdot(BlockIx::new(num_blocks)) {
698         let mut live_in = liveouts[block_ix].clone();
699         live_in.remove(&def_sets_per_block[block_ix]);
700         live_in.union(&use_sets_per_block[block_ix]);
701         liveins[block_ix] = live_in;
702     }
703 
704     if false {
705         let mut sum_card_live_in = 0;
706         let mut sum_card_live_out = 0;
707         for bix in BlockIx::new(0).dotdot(BlockIx::new(num_blocks)) {
708             sum_card_live_in += liveins[bix].card();
709             sum_card_live_out += liveouts[bix].card();
710         }
711         println!(
712             "QQQQ calc_LI/LO: num_evals {}, tot LI {}, tot LO {}",
713             num_evals, sum_card_live_in, sum_card_live_out
714         );
715     }
716 
717     let ratio: f32 = (num_evals as f32) / ((if num_blocks == 0 { 1 } else { num_blocks }) as f32);
718     info!(
719         "    calc_livein_and_liveout:   {} blocks, {} evals ({:<.2} per block)",
720         num_blocks, num_evals, ratio
721     );
722 
723     if log_enabled!(Level::Debug) {
724         let mut n = 0;
725         debug!("");
726         for (livein, liveout) in liveins.iter().zip(liveouts.iter()) {
727             let mut first = true;
728             let mut li_str = "".to_string();
729             for li in livein.to_vec() {
730                 if !first {
731                     li_str = li_str + &" ".to_string();
732                 }
733                 first = false;
734                 li_str = li_str + &li.show_with_rru(univ);
735             }
736             first = true;
737             let mut lo_str = "".to_string();
738             for lo in liveout.to_vec() {
739                 if !first {
740                     lo_str = lo_str + &" ".to_string();
741                 }
742                 first = false;
743                 lo_str = lo_str + &lo.show_with_rru(univ);
744             }
745             debug!(
746                 "{:<3?}   livein {{{}}}  liveout {{{}}}",
747                 BlockIx::new(n),
748                 li_str,
749                 lo_str
750             );
751             n += 1;
752         }
753     }
754 
755     info!("    calc_livein_and_liveout: end");
756     (liveins, liveouts)
757 }
758 
759 //=============================================================================
760 // Computation of RangeFrags (Live Range Fragments), aggregated per register.
761 // This does not produce complete live ranges.  That is done later, by
762 // `merge_range_frags` below, using the information computed in this section
763 // by `get_range_frags`.
764 
765 // This is surprisingly complex, in part because of the need to correctly
766 // handle (1) live-in and live-out Regs, (2) dead writes, and (3) instructions
767 // that modify registers rather than merely reading or writing them.
768 
769 /// A ProtoRangeFrag carries information about a [write .. read] range, within a Block, which
770 /// we will later turn into a fully-fledged RangeFrag.  It basically records the first and
771 /// last-known InstPoints for appearances of a Reg.
772 ///
773 /// ProtoRangeFrag also keeps count of the number of appearances of the Reg to which it
774 /// pertains, using `uses`.  The counts get rolled into the resulting RangeFrags, and later are
775 /// used to calculate spill costs.
776 ///
777 /// The running state of this function is a map from Reg to ProtoRangeFrag.  Only Regs that
778 /// actually appear in the Block (or are live-in to it) are mapped.  This has the advantage of
779 /// economy, since most Regs will not appear in (or be live-in to) most Blocks.
780 #[derive(Clone)]
781 struct ProtoRangeFrag {
782     /// The InstPoint in this Block at which the associated Reg most recently became live (when
783     /// moving forwards though the Block).  If this value is the first InstPoint for the Block
784     /// (the U point for the Block's lowest InstIx), that indicates the associated Reg is
785     /// live-in to the Block.
786     first: InstPoint,
787 
788     /// This is the InstPoint which is the end point (most recently observed read, in general)
789     /// for the current RangeFrag under construction.  In general we will move `last` forwards
790     /// as we discover reads of the associated Reg.  If this is the last InstPoint for the
791     /// Block (the D point for the Block's highest InstInx), that indicates that the associated
792     /// reg is live-out from the Block.
793     last: InstPoint,
794 
795     /// Number of mentions of the associated Reg in this ProtoRangeFrag.
796     num_mentions: u16,
797 }
798 
799 impl fmt::Debug for ProtoRangeFrag {
fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result800     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
801         write!(
802             fmt,
803             "{:?}x {:?} - {:?}",
804             self.num_mentions, self.first, self.last
805         )
806     }
807 }
808 
809 // `fn get_range_frags` and `fn get_range_frags_for_block` below work with two vectors,
810 // `out_map` and `state`, that are indexed by register.  This allows them to entirely avoid the
811 // use of hash-based `Map`s.  However, it does give a problem in that real and virtual registers
812 // occupy separate, zero-based index spaces.  To solve this, we map `Reg`s to a "unified index
813 // space" as follows:
814 //
815 // a `RealReg` is mapped to its `.get_index()` value
816 //
817 // a `VirtualReg` is mapped to its `.get_index()` value + the number of real registers
818 //
819 // To make this not too inconvenient, `fn reg_to_reg_ix` and `fn reg_ix_to_reg` convert `Reg`s
820 // to and from the unified index space.  This has the annoying side effect that reconstructing a
821 // `Reg` from an index space value requires having available both the register universe, and a
822 // table specifying the class for each virtual register.
823 //
824 // Really, we ought to rework the `Reg`/`RealReg`/`VirtualReg` abstractions, so as to (1) impose
825 // a single index space for both register kinds, and (2) so as to separate the concepts of the
826 // register index from the `Reg` itself.  This second point would have the additional benefit of
827 // making it feasible to represent sets of registers using bit sets.
828 
829 #[inline(always)]
reg_to_reg_ix(num_real_regs: u32, r: Reg) -> u32830 pub(crate) fn reg_to_reg_ix(num_real_regs: u32, r: Reg) -> u32 {
831     if r.is_real() {
832         r.get_index_u32()
833     } else {
834         num_real_regs + r.get_index_u32()
835     }
836 }
837 
838 #[inline(always)]
reg_ix_to_reg( reg_universe: &RealRegUniverse, vreg_classes: &Vec< RegClass>, reg_ix: u32, ) -> Reg839 pub(crate) fn reg_ix_to_reg(
840     reg_universe: &RealRegUniverse,
841     vreg_classes: &Vec</*vreg index,*/ RegClass>,
842     reg_ix: u32,
843 ) -> Reg {
844     let reg_ix = reg_ix as usize;
845     let num_real_regs = reg_universe.regs.len();
846     if reg_ix < num_real_regs {
847         reg_universe.regs[reg_ix].0.to_reg()
848     } else {
849         let vreg_ix = reg_ix - num_real_regs;
850         Reg::new_virtual(vreg_classes[vreg_ix], vreg_ix as u32)
851     }
852 }
853 
854 // HELPER FUNCTION
855 // Add to `out_map`, a binding from `reg` to the frags-and-metrics pair specified by `frag` and
856 // `frag_metrics`.  As a space-saving optimisation, make some attempt to avoid creating
857 // duplicate entries in `out_frags` and `out_frag_metrics`.
858 #[inline(always)]
emit_range_frag( out_map: &mut Vec< SmallVec<[RangeFragIx; 8]>>, out_frags: &mut TypedIxVec<RangeFragIx, RangeFrag>, out_frag_metrics: &mut TypedIxVec<RangeFragIx, RangeFragMetrics>, num_real_regs: u32, reg: Reg, frag: &RangeFrag, frag_metrics: &RangeFragMetrics, )859 fn emit_range_frag(
860     out_map: &mut Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
861     out_frags: &mut TypedIxVec<RangeFragIx, RangeFrag>,
862     out_frag_metrics: &mut TypedIxVec<RangeFragIx, RangeFragMetrics>,
863     num_real_regs: u32,
864     reg: Reg,
865     frag: &RangeFrag,
866     frag_metrics: &RangeFragMetrics,
867 ) {
868     debug_assert!(out_frags.len() == out_frag_metrics.len());
869 
870     // Allocate a new RangeFragIx for `frag`, except, make some minimal effort to avoid huge
871     // numbers of duplicates by inspecting the previous two entries, and using them if
872     // possible.
873     let mut new_fix = None;
874 
875     let num_out_frags = out_frags.len();
876     if num_out_frags >= 2 {
877         let back_0 = RangeFragIx::new(num_out_frags - 1);
878         let back_1 = RangeFragIx::new(num_out_frags - 2);
879         if out_frags[back_0] == *frag && out_frag_metrics[back_0] == *frag_metrics {
880             new_fix = Some(back_0);
881         } else if out_frags[back_1] == *frag && out_frag_metrics[back_1] == *frag_metrics {
882             new_fix = Some(back_1);
883         }
884     }
885 
886     let new_fix = match new_fix {
887         Some(fix) => fix,
888         None => {
889             // We can't look back or there was no match; create a new one.
890             out_frags.push(frag.clone());
891             out_frag_metrics.push(frag_metrics.clone());
892             RangeFragIx::new(out_frags.len() as u32 - 1)
893         }
894     };
895 
896     // And use the new RangeFragIx.
897     out_map[reg_to_reg_ix(num_real_regs, reg) as usize].push(new_fix);
898 }
899 
900 /// Calculate all the RangeFrags for `bix`.  Add them to `out_frags` and corresponding metrics
901 /// data to `out_frag_metrics`.  Add to `out_map`, the associated RangeFragIxs, segregated by
902 /// Reg.  `bix`, `livein`, `liveout` and `rvb` are expected to be valid in the context of the
903 /// Func `f` (duh!).
904 #[inline(never)]
get_range_frags_for_block<F: Function>( func: &F, rvb: &RegVecsAndBounds, reg_universe: &RealRegUniverse, vreg_classes: &Vec< RegClass>, bix: BlockIx, livein: &SparseSet<Reg>, liveout: &SparseSet<Reg>, visited: &mut Vec<u32>, state: &mut Vec< Option<ProtoRangeFrag>>, out_map: &mut Vec< SmallVec<[RangeFragIx; 8]>>, out_frags: &mut TypedIxVec<RangeFragIx, RangeFrag>, out_frag_metrics: &mut TypedIxVec<RangeFragIx, RangeFragMetrics>, )905 fn get_range_frags_for_block<F: Function>(
906     // Constants
907     func: &F,
908     rvb: &RegVecsAndBounds,
909     reg_universe: &RealRegUniverse,
910     vreg_classes: &Vec</*vreg index,*/ RegClass>,
911     bix: BlockIx,
912     livein: &SparseSet<Reg>,
913     liveout: &SparseSet<Reg>,
914     // Preallocated storage for use in this function.  They do not carry any useful information
915     // in between calls here.
916     visited: &mut Vec<u32>,
917     state: &mut Vec</*rreg index, then vreg index, */ Option<ProtoRangeFrag>>,
918     // These accumulate the results of RangeFrag/RangeFragMetrics across multiple calls here.
919     out_map: &mut Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
920     out_frags: &mut TypedIxVec<RangeFragIx, RangeFrag>,
921     out_frag_metrics: &mut TypedIxVec<RangeFragIx, RangeFragMetrics>,
922 ) {
923     #[inline(always)]
plus1(n: u16) -> u16924     fn plus1(n: u16) -> u16 {
925         if n == 0xFFFFu16 {
926             n
927         } else {
928             n + 1
929         }
930     }
931 
932     // Invariants for the preallocated storage:
933     //
934     // * `visited` is always irrelevant (and cleared) at the start
935     //
936     // * `state` always has size (# real regs + # virtual regs).  However, all its entries
937     // should be `None` in between calls here.
938 
939     // We use `visited` to keep track of which `state` entries need processing at the end of
940     // this function.  Since `state` is indexed by unified-reg-index, it follows that `visited`
941     // is a vector of unified-reg-indices.  We add an entry to `visited` whenever we change a
942     // `state` entry from `None` to `Some`.  This guarantees that we can find all the `Some`
943     // `state` entries at the end of the function, change them back to `None`, and emit the
944     // corresponding fragment.
945     visited.clear();
946 
947     // Some handy constants.
948     assert!(func.block_insns(bix).len() >= 1);
949     let first_iix_in_block = func.block_insns(bix).first();
950     let last_iix_in_block = func.block_insns(bix).last();
951     let first_pt_in_block = InstPoint::new_use(first_iix_in_block);
952     let last_pt_in_block = InstPoint::new_def(last_iix_in_block);
953     let num_real_regs = reg_universe.regs.len() as u32;
954 
955     // First, set up `state` as if all of `livein` had been written just prior to the block.
956     for r in livein.iter() {
957         let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
958         debug_assert!(state[r_state_ix].is_none());
959         state[r_state_ix] = Some(ProtoRangeFrag {
960             num_mentions: 0,
961             first: first_pt_in_block,
962             last: first_pt_in_block,
963         });
964         visited.push(r_state_ix as u32);
965     }
966 
967     // Now visit each instruction in turn, examining first the registers it reads, then those it
968     // modifies, and finally those it writes.
969     for iix in func.block_insns(bix) {
970         let bounds_for_iix = &rvb.bounds[iix];
971 
972         // Examine reads.  This is pretty simple.  They simply extend an existing ProtoRangeFrag
973         // to the U point of the reading insn.
974         for i in
975             bounds_for_iix.uses_start..bounds_for_iix.uses_start + bounds_for_iix.uses_len as u32
976         {
977             let r = rvb.vecs.uses[i as usize];
978             let r_state_ix = reg_to_reg_ix(num_real_regs, r) as usize;
979             match &mut state[r_state_ix] {
980                 // First event for `r` is a read, but it's not listed in `livein`, since otherwise
981                 // `state` would have an entry for it.
982                 None => panic!("get_range_frags_for_block: fail #1"),
983                 Some(ref mut pf) => {
984                     // This the first or subsequent read after a write.  Note that the "write" can
985                     // be either a real write, or due to the fact that `r` is listed in `livein`.
986                     // We don't care here.
987                     pf.num_mentions = plus1(pf.num_mentions);
988                     let new_last = InstPoint::new_use(iix);
989                     debug_assert!(pf.last <= new_last);
990                     pf.last = new_last;
991                 }
992             }
993         }
994 
995         // Examine modifies.  These are handled almost identically to reads, except that they
996         // extend an existing ProtoRangeFrag down to the D point of the modifying insn.
997         for i in
998             bounds_for_iix.mods_start..bounds_for_iix.mods_start + bounds_for_iix.mods_len as u32
999         {
1000             let r = &rvb.vecs.mods[i as usize];
1001             let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
1002             match &mut state[r_state_ix] {
1003                 // First event for `r` is a read (really, since this insn modifies `r`), but it's
1004                 // not listed in `livein`, since otherwise `state` would have an entry for it.
1005                 None => panic!("get_range_frags_for_block: fail #2"),
1006                 Some(ref mut pf) => {
1007                     // This the first or subsequent modify after a write.
1008                     pf.num_mentions = plus1(pf.num_mentions);
1009                     let new_last = InstPoint::new_def(iix);
1010                     debug_assert!(pf.last <= new_last);
1011                     pf.last = new_last;
1012                 }
1013             }
1014         }
1015 
1016         // Examine writes (but not writes implied by modifies).  The general idea is that a
1017         // write causes us to terminate and emit the existing ProtoRangeFrag, if any, and start
1018         // a new frag.
1019         for i in
1020             bounds_for_iix.defs_start..bounds_for_iix.defs_start + bounds_for_iix.defs_len as u32
1021         {
1022             let r = &rvb.vecs.defs[i as usize];
1023             let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
1024             match &mut state[r_state_ix] {
1025                 // First mention of a Reg we've never heard of before.  Start a new
1026                 // ProtoRangeFrag for it and keep going.
1027                 None => {
1028                     let new_pt = InstPoint::new_def(iix);
1029                     let new_pf = ProtoRangeFrag {
1030                         num_mentions: 1,
1031                         first: new_pt,
1032                         last: new_pt,
1033                     };
1034                     state[r_state_ix] = Some(new_pf);
1035                     visited.push(r_state_ix as u32);
1036                 }
1037 
1038                 // There's already a ProtoRangeFrag for `r`.  This write will start a new one,
1039                 // so emit the existing one and note this write.
1040                 Some(ProtoRangeFrag {
1041                     ref mut num_mentions,
1042                     ref mut first,
1043                     ref mut last,
1044                 }) => {
1045                     if first == last {
1046                         debug_assert!(*num_mentions == 1);
1047                     }
1048 
1049                     let (frag, frag_metrics) =
1050                         RangeFrag::new_with_metrics(func, bix, *first, *last, *num_mentions);
1051                     emit_range_frag(
1052                         out_map,
1053                         out_frags,
1054                         out_frag_metrics,
1055                         num_real_regs,
1056                         *r,
1057                         &frag,
1058                         &frag_metrics,
1059                     );
1060                     let new_pt = InstPoint::new_def(iix);
1061                     // Reuse the previous entry for this new definition of the same vreg.
1062                     *num_mentions = 1;
1063                     *first = new_pt;
1064                     *last = new_pt;
1065                 }
1066             }
1067         }
1068     }
1069 
1070     // We are at the end of the block.  We still have to deal with live-out Regs.  We must also
1071     // deal with ProtoRangeFrags in `state` that are for registers not listed as live-out.
1072 
1073     // Deal with live-out Regs.  Treat each one as if it is read just after the block.
1074     for r in liveout.iter() {
1075         let r_state_ix = reg_to_reg_ix(num_real_regs, *r) as usize;
1076         let state_elem_p = &mut state[r_state_ix];
1077         match state_elem_p {
1078             // This can't happen.  `r` is in `liveout`, but this implies that it is neither
1079             // defined in the block nor present in `livein`.
1080             None => panic!("get_range_frags_for_block: fail #3"),
1081             Some(ref pf) => {
1082                 // `r` is written (or modified), either literally or by virtue of being present
1083                 // in `livein`, and may or may not subsequently be read -- we don't care,
1084                 // because it must be read "after" the block.  Create a `LiveOut` or `Thru` frag
1085                 // accordingly.
1086                 let (frag, frag_metrics) = RangeFrag::new_with_metrics(
1087                     func,
1088                     bix,
1089                     pf.first,
1090                     last_pt_in_block,
1091                     pf.num_mentions,
1092                 );
1093                 emit_range_frag(
1094                     out_map,
1095                     out_frags,
1096                     out_frag_metrics,
1097                     num_real_regs,
1098                     *r,
1099                     &frag,
1100                     &frag_metrics,
1101                 );
1102                 // Remove the entry from `state` so that the following loop doesn't process it
1103                 // again.
1104                 *state_elem_p = None;
1105             }
1106         }
1107     }
1108 
1109     // Finally, round up any remaining ProtoRangeFrags left in `state`.  This is what `visited`
1110     // is used for.
1111     for r_state_ix in visited {
1112         let state_elem_p = &mut state[*r_state_ix as usize];
1113         match state_elem_p {
1114             None => {}
1115             Some(pf) => {
1116                 if pf.first == pf.last {
1117                     debug_assert!(pf.num_mentions == 1);
1118                 }
1119                 let (frag, frag_metrics) =
1120                     RangeFrag::new_with_metrics(func, bix, pf.first, pf.last, pf.num_mentions);
1121                 let r = reg_ix_to_reg(reg_universe, vreg_classes, *r_state_ix);
1122                 emit_range_frag(
1123                     out_map,
1124                     out_frags,
1125                     out_frag_metrics,
1126                     num_real_regs,
1127                     r,
1128                     &frag,
1129                     &frag_metrics,
1130                 );
1131                 // Maintain invariant that all `state` entries are `None` in between calls to
1132                 // this function.
1133                 *state_elem_p = None;
1134             }
1135         }
1136     }
1137 }
1138 
1139 #[inline(never)]
get_range_frags<F: Function>( func: &F, rvb: &RegVecsAndBounds, reg_universe: &RealRegUniverse, livein_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>, liveout_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>, ) -> ( Vec< SmallVec<[RangeFragIx; 8]>>, TypedIxVec<RangeFragIx, RangeFrag>, TypedIxVec<RangeFragIx, RangeFragMetrics>, Vec< RegClass>, )1140 pub fn get_range_frags<F: Function>(
1141     func: &F,
1142     rvb: &RegVecsAndBounds,
1143     reg_universe: &RealRegUniverse,
1144     livein_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
1145     liveout_sets_per_block: &TypedIxVec<BlockIx, SparseSet<Reg>>,
1146 ) -> (
1147     Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
1148     TypedIxVec<RangeFragIx, RangeFrag>,
1149     TypedIxVec<RangeFragIx, RangeFragMetrics>,
1150     Vec</*vreg index,*/ RegClass>,
1151 ) {
1152     info!("    get_range_frags: begin");
1153     assert!(livein_sets_per_block.len() == func.blocks().len() as u32);
1154     assert!(liveout_sets_per_block.len() == func.blocks().len() as u32);
1155     assert!(rvb.is_sanitized());
1156 
1157     // In order that we can work with unified-reg-indices (see comments above), we need to know
1158     // the `RegClass` for each virtual register.  That info is collected here.
1159     let mut vreg_classes = vec![RegClass::INVALID; func.get_num_vregs()];
1160     for r in rvb
1161         .vecs
1162         .uses
1163         .iter()
1164         .chain(rvb.vecs.defs.iter())
1165         .chain(rvb.vecs.mods.iter())
1166     {
1167         if r.is_real() {
1168             continue;
1169         }
1170         let r_ix = r.get_index();
1171         // rustc 1.43.0 appears to have problems avoiding duplicate bounds checks for
1172         // `vreg_classes[r_ix]`; hence give it a helping hand here.
1173         let vreg_classes_ptr = &mut vreg_classes[r_ix];
1174         if *vreg_classes_ptr == RegClass::INVALID {
1175             *vreg_classes_ptr = r.get_class();
1176         } else {
1177             assert_eq!(*vreg_classes_ptr, r.get_class());
1178         }
1179     }
1180 
1181     let num_real_regs = reg_universe.regs.len();
1182     let num_virtual_regs = vreg_classes.len();
1183     let num_regs = num_real_regs + num_virtual_regs;
1184 
1185     // A state variable that's reused across calls to `get_range_frags_for_block`.  When not in
1186     // a call to `get_range_frags_for_block`, all entries should be `None`.
1187     let mut state = Vec::</*rreg index, then vreg index, */ Option<ProtoRangeFrag>>::new();
1188     state.resize(num_regs, None);
1189 
1190     // Scratch storage needed by `get_range_frags_for_block`.  Doesn't carry any useful info in
1191     // between calls.  Start it off not-quite-empty since it will always get used at least a
1192     // bit.
1193     let mut visited = Vec::<u32>::with_capacity(32);
1194 
1195     // `RangeFrag`/`RangeFragMetrics` are collected across multiple calls to
1196     // `get_range_frag_for_blocks` in these three vectors.  In other words, they collect the
1197     // overall results for this function.
1198     let mut result_frags = TypedIxVec::<RangeFragIx, RangeFrag>::new();
1199     let mut result_frag_metrics = TypedIxVec::<RangeFragIx, RangeFragMetrics>::new();
1200     let mut result_map =
1201         Vec::</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>::default();
1202     result_map.resize(num_regs, smallvec![]);
1203 
1204     for bix in func.blocks() {
1205         get_range_frags_for_block(
1206             func,
1207             rvb,
1208             reg_universe,
1209             &vreg_classes,
1210             bix,
1211             &livein_sets_per_block[bix],
1212             &liveout_sets_per_block[bix],
1213             &mut visited,
1214             &mut state,
1215             &mut result_map,
1216             &mut result_frags,
1217             &mut result_frag_metrics,
1218         );
1219     }
1220 
1221     assert!(state.len() == num_regs);
1222     assert!(result_map.len() == num_regs);
1223     assert!(vreg_classes.len() == num_virtual_regs);
1224     // This is pretty cheap (once per fn) and any failure will be catastrophic since it means we
1225     // may have forgotten some live range fragments.  Hence `assert!` and not `debug_assert!`.
1226     for state_elem in &state {
1227         assert!(state_elem.is_none());
1228     }
1229 
1230     if log_enabled!(Level::Debug) {
1231         debug!("");
1232         let mut n = 0;
1233         for frag in result_frags.iter() {
1234             debug!("{:<3?}   {:?}", RangeFragIx::new(n), frag);
1235             n += 1;
1236         }
1237 
1238         debug!("");
1239         for (reg_ix, frag_ixs) in result_map.iter().enumerate() {
1240             if frag_ixs.len() == 0 {
1241                 continue;
1242             }
1243             let reg = reg_ix_to_reg(reg_universe, &vreg_classes, reg_ix as u32);
1244             debug!(
1245                 "frags for {}   {:?}",
1246                 reg.show_with_rru(reg_universe),
1247                 frag_ixs
1248             );
1249         }
1250     }
1251 
1252     info!("    get_range_frags: end");
1253     assert!(result_frags.len() == result_frag_metrics.len());
1254     (result_map, result_frags, result_frag_metrics, vreg_classes)
1255 }
1256 
1257 //=============================================================================
1258 // Auxiliary tasks involved in creating a single VirtualRange from its
1259 // constituent RangeFragIxs:
1260 //
1261 // * The RangeFragIxs we are given here are purely within single blocks.
1262 //   Here, we "compress" them, that is, merge those pairs that flow from one
1263 //   block into the the one that immediately follows it in the instruction
1264 //   stream.  This does not imply anything about control flow; it is purely a
1265 //   scheme for reducing the total number of fragments that need to be dealt
1266 //   with during interference detection (later on).
1267 //
1268 // * Computation of metrics for the VirtualRange.  This is done by examining
1269 //   metrics of the individual fragments, and must be done before they are
1270 //   compressed.
1271 
1272 // HELPER FUNCTION
1273 // Does `frag1` describe some range of instructions that is followed
1274 // immediately by `frag2` ?  Note that this assumes (and checks) that there
1275 // are no spill or reload ranges in play at this point; there should not be.
1276 // Note also, this is very conservative: it only merges the case where the two
1277 // ranges are separated by a block boundary.  From measurements, it appears that
1278 // this is the only case where merging is actually a win, though.
frags_are_mergeable( frag1: &RangeFrag, frag1metrics: &RangeFragMetrics, frag2: &RangeFrag, frag2metrics: &RangeFragMetrics, ) -> bool1279 fn frags_are_mergeable(
1280     frag1: &RangeFrag,
1281     frag1metrics: &RangeFragMetrics,
1282     frag2: &RangeFrag,
1283     frag2metrics: &RangeFragMetrics,
1284 ) -> bool {
1285     assert!(frag1.first.pt().is_use_or_def());
1286     assert!(frag1.last.pt().is_use_or_def());
1287     assert!(frag2.first.pt().is_use_or_def());
1288     assert!(frag2.last.pt().is_use_or_def());
1289 
1290     if frag1metrics.bix != frag2metrics.bix
1291         && frag1.last.iix().plus(1) == frag2.first.iix()
1292         && frag1.last.pt() == Point::Def
1293         && frag2.first.pt() == Point::Use
1294     {
1295         assert!(
1296             frag1metrics.kind == RangeFragKind::LiveOut || frag1metrics.kind == RangeFragKind::Thru
1297         );
1298         assert!(
1299             frag2metrics.kind == RangeFragKind::LiveIn || frag2metrics.kind == RangeFragKind::Thru
1300         );
1301         return true;
1302     }
1303 
1304     false
1305 }
1306 
1307 // HELPER FUNCTION
1308 // Create a compressed version of the fragments listed in `sorted_frag_ixs`,
1309 // taking the opportunity to dereference them (look them up in `frag_env`) in
1310 // the process.  Assumes that `sorted_frag_ixs` is indeed ordered so that the
1311 // dereferenced frag sequence is in instruction order.
1312 #[inline(never)]
deref_and_compress_sorted_range_frag_ixs( stats_num_vfrags_uncompressed: &mut usize, stats_num_vfrags_compressed: &mut usize, sorted_frag_ixs: &SortedRangeFragIxs, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>, frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>, ) -> SortedRangeFrags1313 fn deref_and_compress_sorted_range_frag_ixs(
1314     stats_num_vfrags_uncompressed: &mut usize,
1315     stats_num_vfrags_compressed: &mut usize,
1316     sorted_frag_ixs: &SortedRangeFragIxs,
1317     frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
1318     frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
1319 ) -> SortedRangeFrags {
1320     let mut res = SortedRangeFrags::empty();
1321 
1322     let frag_ixs = &sorted_frag_ixs.frag_ixs;
1323     let num_frags = frag_ixs.len();
1324     *stats_num_vfrags_uncompressed += num_frags;
1325 
1326     if num_frags == 1 {
1327         // Nothing we can do.  Shortcut.
1328         res.frags.push(frag_env[frag_ixs[0]].clone());
1329         *stats_num_vfrags_compressed += 1;
1330         return res;
1331     }
1332 
1333     // BEGIN merge this frag sequence as much as possible
1334     assert!(num_frags > 1);
1335 
1336     let mut s = 0; // start point of current group
1337     let mut e = 0; // end point of current group
1338     loop {
1339         if s >= num_frags {
1340             break;
1341         }
1342         while e + 1 < num_frags
1343             && frags_are_mergeable(
1344                 &frag_env[frag_ixs[e]],
1345                 &frag_metrics_env[frag_ixs[e]],
1346                 &frag_env[frag_ixs[e + 1]],
1347                 &frag_metrics_env[frag_ixs[e + 1]],
1348             )
1349         {
1350             e += 1;
1351         }
1352         // s to e inclusive is a maximal group
1353         // emit (s, e)
1354         if s == e {
1355             // Can't compress this one
1356             res.frags.push(frag_env[frag_ixs[s]].clone());
1357         } else {
1358             let compressed_frag = RangeFrag {
1359                 first: frag_env[frag_ixs[s]].first,
1360                 last: frag_env[frag_ixs[e]].last,
1361             };
1362             res.frags.push(compressed_frag);
1363         }
1364         // move on
1365         s = e + 1;
1366         e = s;
1367     }
1368     // END merge this frag sequence as much as possible
1369 
1370     *stats_num_vfrags_compressed += res.frags.len();
1371     res
1372 }
1373 
1374 // HELPER FUNCTION
1375 // Computes the `size`, `total_cost` and `spill_cost` values for a
1376 // VirtualRange, while being very careful to avoid overflow.
calc_virtual_range_metrics( sorted_frag_ixs: &SortedRangeFragIxs, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>, frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>, estimated_frequencies: &TypedIxVec<BlockIx, u32>, ) -> (u16, u32, SpillCost)1377 fn calc_virtual_range_metrics(
1378     sorted_frag_ixs: &SortedRangeFragIxs,
1379     frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
1380     frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
1381     estimated_frequencies: &TypedIxVec<BlockIx, u32>,
1382 ) -> (u16, u32, SpillCost) {
1383     assert!(frag_env.len() == frag_metrics_env.len());
1384 
1385     let mut tot_size: u32 = 0;
1386     let mut tot_cost: u32 = 0;
1387 
1388     for fix in &sorted_frag_ixs.frag_ixs {
1389         let frag = &frag_env[*fix];
1390         let frag_metrics = &frag_metrics_env[*fix];
1391 
1392         // Add on the size of this fragment, but make sure we can't
1393         // overflow a u32 no matter how many fragments there are.
1394         let mut frag_size: u32 = frag.last.iix().get() - frag.first.iix().get() + 1;
1395         frag_size = min(frag_size, 0xFFFFu32);
1396         tot_size += frag_size;
1397         tot_size = min(tot_size, 0xFFFFu32);
1398 
1399         // Here, tot_size <= 0xFFFF.  frag.count is u16.  estFreq[] is u32.
1400         // We must be careful not to overflow tot_cost, which is u32.
1401         let mut new_tot_cost: u64 = frag_metrics.count as u64; // at max 16 bits
1402         new_tot_cost *= estimated_frequencies[frag_metrics.bix] as u64; // at max 48 bits
1403         new_tot_cost += tot_cost as u64; // at max 48 bits + epsilon
1404         new_tot_cost = min(new_tot_cost, 0xFFFF_FFFFu64);
1405 
1406         // Hence this is safe.
1407         tot_cost = new_tot_cost as u32;
1408     }
1409 
1410     debug_assert!(tot_size <= 0xFFFF);
1411     let size = tot_size as u16;
1412     let total_cost = tot_cost;
1413 
1414     // Divide tot_cost by the total length, so as to increase the apparent
1415     // spill cost of short LRs.  This is so as to give the advantage to
1416     // short LRs in competition for registers.  This seems a bit of a hack
1417     // to me, but hey ..
1418     debug_assert!(tot_size >= 1);
1419     let spill_cost = SpillCost::finite(tot_cost as f32 / tot_size as f32);
1420 
1421     (size, total_cost, spill_cost)
1422 }
1423 
1424 // MAIN FUNCTION in this section
1425 #[inline(never)]
create_and_add_range( stats_num_vfrags_uncompressed: &mut usize, stats_num_vfrags_compressed: &mut usize, result_real: &mut TypedIxVec<RealRangeIx, RealRange>, result_virtual: &mut TypedIxVec<VirtualRangeIx, VirtualRange>, reg: Reg, sorted_frag_ixs: SortedRangeFragIxs, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>, frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>, estimated_frequencies: &TypedIxVec<BlockIx, u32>, )1426 fn create_and_add_range(
1427     stats_num_vfrags_uncompressed: &mut usize,
1428     stats_num_vfrags_compressed: &mut usize,
1429     result_real: &mut TypedIxVec<RealRangeIx, RealRange>,
1430     result_virtual: &mut TypedIxVec<VirtualRangeIx, VirtualRange>,
1431     reg: Reg,
1432     sorted_frag_ixs: SortedRangeFragIxs,
1433     frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
1434     frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
1435     estimated_frequencies: &TypedIxVec<BlockIx, u32>,
1436 ) {
1437     if reg.is_virtual() {
1438         // First, compute the VirtualRange metrics.  This has to be done
1439         // before fragment compression.
1440         let (size, total_cost, spill_cost) = calc_virtual_range_metrics(
1441             &sorted_frag_ixs,
1442             frag_env,
1443             frag_metrics_env,
1444             estimated_frequencies,
1445         );
1446 
1447         // Now it's safe to compress the fragments.
1448         let sorted_frags = deref_and_compress_sorted_range_frag_ixs(
1449             stats_num_vfrags_uncompressed,
1450             stats_num_vfrags_compressed,
1451             &sorted_frag_ixs,
1452             frag_env,
1453             frag_metrics_env,
1454         );
1455 
1456         result_virtual.push(VirtualRange {
1457             vreg: reg.to_virtual_reg(),
1458             rreg: None,
1459             sorted_frags,
1460             is_ref: false, // analysis_reftypes.rs may later change this
1461             size,
1462             total_cost,
1463             spill_cost,
1464         });
1465     } else {
1466         result_real.push(RealRange {
1467             rreg: reg.to_real_reg(),
1468             sorted_frags: sorted_frag_ixs,
1469             is_ref: false, // analysis_reftypes.rs may later change this
1470         });
1471     }
1472 }
1473 
1474 //=============================================================================
1475 // Merging of RangeFrags, producing the final LRs, including metrication and
1476 // compression
1477 
1478 // We need this in order to construct a UnionFind<usize>.
1479 impl ToFromU32 for usize {
1480     // 64 bit
1481     #[cfg(target_pointer_width = "64")]
to_u32(x: usize) -> u321482     fn to_u32(x: usize) -> u32 {
1483         if x < 0x1_0000_0000usize {
1484             x as u32
1485         } else {
1486             panic!("impl ToFromU32 for usize: to_u32: out of range")
1487         }
1488     }
1489     #[cfg(target_pointer_width = "64")]
from_u32(x: u32) -> usize1490     fn from_u32(x: u32) -> usize {
1491         x as usize
1492     }
1493     // 32 bit
1494     #[cfg(target_pointer_width = "32")]
to_u32(x: usize) -> u321495     fn to_u32(x: usize) -> u32 {
1496         x as u32
1497     }
1498     #[cfg(target_pointer_width = "32")]
from_u32(x: u32) -> usize1499     fn from_u32(x: u32) -> usize {
1500         x as usize
1501     }
1502 }
1503 
1504 #[inline(never)]
merge_range_frags( frag_ix_vec_per_reg: &Vec< SmallVec<[RangeFragIx; 8]>>, frag_env: &TypedIxVec<RangeFragIx, RangeFrag>, frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>, estimated_frequencies: &TypedIxVec<BlockIx, u32>, cfg_info: &CFGInfo, reg_universe: &RealRegUniverse, vreg_classes: &Vec< RegClass>, ) -> ( TypedIxVec<RealRangeIx, RealRange>, TypedIxVec<VirtualRangeIx, VirtualRange>, )1505 pub fn merge_range_frags(
1506     frag_ix_vec_per_reg: &Vec</*rreg index, then vreg index, */ SmallVec<[RangeFragIx; 8]>>,
1507     frag_env: &TypedIxVec<RangeFragIx, RangeFrag>,
1508     frag_metrics_env: &TypedIxVec<RangeFragIx, RangeFragMetrics>,
1509     estimated_frequencies: &TypedIxVec<BlockIx, u32>,
1510     cfg_info: &CFGInfo,
1511     reg_universe: &RealRegUniverse,
1512     vreg_classes: &Vec</*vreg index,*/ RegClass>,
1513 ) -> (
1514     TypedIxVec<RealRangeIx, RealRange>,
1515     TypedIxVec<VirtualRangeIx, VirtualRange>,
1516 ) {
1517     assert!(frag_env.len() == frag_metrics_env.len());
1518     let mut stats_num_total_incoming_frags = 0;
1519     let mut stats_num_total_incoming_regs = 0;
1520     for all_frag_ixs_for_reg in frag_ix_vec_per_reg {
1521         stats_num_total_incoming_frags += all_frag_ixs_for_reg.len();
1522         if all_frag_ixs_for_reg.len() > 0 {
1523             stats_num_total_incoming_regs += 1;
1524         }
1525     }
1526     info!("    merge_range_frags: begin");
1527     info!("      in: {} in frag_env", frag_env.len());
1528     info!(
1529         "      in: {} regs containing in total {} frags",
1530         stats_num_total_incoming_regs, stats_num_total_incoming_frags
1531     );
1532 
1533     let mut stats_num_single_grps = 0;
1534     let mut stats_num_local_frags = 0;
1535 
1536     let mut stats_num_multi_grps_small = 0;
1537     let mut stats_num_multi_grps_large = 0;
1538     let mut stats_size_multi_grps_small = 0;
1539     let mut stats_size_multi_grps_large = 0;
1540 
1541     let mut stats_num_vfrags_uncompressed = 0;
1542     let mut stats_num_vfrags_compressed = 0;
1543 
1544     let mut result_real = TypedIxVec::<RealRangeIx, RealRange>::new();
1545     let mut result_virtual = TypedIxVec::<VirtualRangeIx, VirtualRange>::new();
1546 
1547     // BEGIN per_reg_loop
1548     for (reg_ix, all_frag_ixs_for_reg) in frag_ix_vec_per_reg.iter().enumerate() {
1549         let n_frags_for_this_reg = all_frag_ixs_for_reg.len();
1550 
1551         // The reg might never have been mentioned at all, especially if it's a real reg.
1552         if n_frags_for_this_reg == 0 {
1553             continue;
1554         }
1555 
1556         let reg_ix = reg_ix as u32;
1557         let reg = reg_ix_to_reg(reg_universe, vreg_classes, reg_ix);
1558 
1559         // Do some shortcutting.  First off, if there's only one frag for this reg, we can directly
1560         // give it its own live range, and have done.
1561         if n_frags_for_this_reg == 1 {
1562             create_and_add_range(
1563                 &mut stats_num_vfrags_uncompressed,
1564                 &mut stats_num_vfrags_compressed,
1565                 &mut result_real,
1566                 &mut result_virtual,
1567                 reg,
1568                 SortedRangeFragIxs::unit(all_frag_ixs_for_reg[0], frag_env),
1569                 frag_env,
1570                 frag_metrics_env,
1571                 estimated_frequencies,
1572             );
1573             stats_num_single_grps += 1;
1574             continue;
1575         }
1576 
1577         // BEGIN merge `all_frag_ixs_for_reg` entries as much as possible.
1578         //
1579         // but .. if we come across independents (RangeKind::Local), pull them out immediately.
1580 
1581         // Try to avoid heap allocation if at all possible.  Up to 100 entries are very
1582         // common, so this is sized large to be effective.  Each entry is definitely
1583         // 16 bytes at most, so this will use 4KB stack at most, which is reasonable.
1584         let mut triples = SmallVec::<[(RangeFragIx, RangeFragKind, BlockIx); 256]>::new();
1585 
1586         // Create `triples`.  We will use it to guide the merging phase, but it is immutable there.
1587         for fix in all_frag_ixs_for_reg {
1588             let frag_metrics = &frag_metrics_env[*fix];
1589 
1590             if frag_metrics.kind == RangeFragKind::Local {
1591                 // This frag is Local (standalone).  Give it its own Range and move on.  This is an
1592                 // optimisation, but it's also necessary: the main fragment-merging logic below
1593                 // relies on the fact that the fragments it is presented with are all either
1594                 // LiveIn, LiveOut or Thru.
1595                 create_and_add_range(
1596                     &mut stats_num_vfrags_uncompressed,
1597                     &mut stats_num_vfrags_compressed,
1598                     &mut result_real,
1599                     &mut result_virtual,
1600                     reg,
1601                     SortedRangeFragIxs::unit(*fix, frag_env),
1602                     frag_env,
1603                     frag_metrics_env,
1604                     estimated_frequencies,
1605                 );
1606                 stats_num_local_frags += 1;
1607                 continue;
1608             }
1609 
1610             // This frag isn't Local (standalone) so we have to process it the slow way.
1611             triples.push((*fix, frag_metrics.kind, frag_metrics.bix));
1612         }
1613 
1614         let triples_len = triples.len();
1615 
1616         // This is the core of the merging algorithm.
1617         //
1618         // For each ix@(fix, kind, bix) in `triples` (order unimportant):
1619         //
1620         // (1) "Merge with blocks that are live 'downstream' from here":
1621         //     if fix is live-out or live-through:
1622         //        for b in succs[bix]
1623         //           for each ix2@(fix2, kind2, bix2) in `triples`
1624         //              if bix2 == b && kind2 is live-in or live-through:
1625         //                  merge(ix, ix2)
1626         //
1627         // (2) "Merge with blocks that are live 'upstream' from here":
1628         //     if fix is live-in or live-through:
1629         //        for b in preds[bix]
1630         //           for each ix2@(fix2, kind2, bix2) in `triples`
1631         //              if bix2 == b && kind2 is live-out or live-through:
1632         //                  merge(ix, ix2)
1633         //
1634         // `triples` remains unchanged.  The equivalence class info is accumulated
1635         // in `eclasses_uf` instead.  `eclasses_uf` entries are indices into
1636         // `triples`.
1637         //
1638         // Now, you might think it necessary to do both (1) and (2).  But no, they
1639         // are mutually redundant, since if two blocks are connected by a live
1640         // flow from one to the other, then they are also connected in the other
1641         // direction.  Hence checking one of the directions is enough.
1642         let mut eclasses_uf = UnionFind::<usize>::new(triples_len);
1643 
1644         // We have two schemes for group merging, one of which is N^2 in the
1645         // length of triples, the other is N-log-N, but with higher constant
1646         // factors.  Some experimentation with the bz2 test on a Cortex A57 puts
1647         // the optimal crossover point between 200 and 300; it's not critical.
1648         // Having this protects us against bad behaviour for huge inputs whilst
1649         // still being fast for small inputs.
1650         if triples_len <= 250 {
1651             // The simple way, which is N^2 in the length of `triples`.
1652             for (ix, (_fix, kind, bix)) in triples.iter().enumerate() {
1653                 // Deal with liveness flows outbound from `fix`. Meaning, (1) above.
1654                 if *kind == RangeFragKind::LiveOut || *kind == RangeFragKind::Thru {
1655                     for b in cfg_info.succ_map[*bix].iter() {
1656                         // Visit all entries in `triples` that are for `b`.
1657                         for (ix2, (_fix2, kind2, bix2)) in triples.iter().enumerate() {
1658                             if *bix2 != *b || *kind2 == RangeFragKind::LiveOut {
1659                                 continue;
1660                             }
1661                             debug_assert!(
1662                                 *kind2 == RangeFragKind::LiveIn || *kind2 == RangeFragKind::Thru
1663                             );
1664                             // Now we know that liveness for this reg "flows" from `triples[ix]` to
1665                             // `triples[ix2]`.  So those two frags must be part of the same live
1666                             // range.  Note this.
1667                             if ix != ix2 {
1668                                 eclasses_uf.union(ix, ix2); // Order of args irrelevant
1669                             }
1670                         }
1671                     }
1672                 }
1673             } // outermost iteration over `triples`
1674 
1675             stats_num_multi_grps_small += 1;
1676             stats_size_multi_grps_small += triples_len;
1677         } else {
1678             // The more complex way, which is N-log-N in the length of `triples`.  This is the same
1679             // as the simple way, except that the innermost loop, which is a linear search in
1680             // `triples` to find entries for some block `b`, is replaced by a binary search.  This
1681             // means that `triples` first needs to be sorted by block index.
1682             triples.sort_unstable_by_key(|(_, _, bix)| *bix);
1683 
1684             for (ix, (_fix, kind, bix)) in triples.iter().enumerate() {
1685                 // Deal with liveness flows outbound from `fix`.  Meaning, (1) above.
1686                 if *kind == RangeFragKind::LiveOut || *kind == RangeFragKind::Thru {
1687                     for b in cfg_info.succ_map[*bix].iter() {
1688                         // Visit all entries in `triples` that are for `b`.  Binary search
1689                         // `triples` to find the lowest-indexed entry for `b`.
1690                         let mut ix_left = 0;
1691                         let mut ix_right = triples_len;
1692                         while ix_left < ix_right {
1693                             let m = (ix_left + ix_right) >> 1;
1694                             if triples[m].2 < *b {
1695                                 ix_left = m + 1;
1696                             } else {
1697                                 ix_right = m;
1698                             }
1699                         }
1700 
1701                         // It might be that there is no block for `b` in the sequence.  That's
1702                         // legit; it just means that block `bix` jumps to a successor where the
1703                         // associated register isn't live-in/thru.  A failure to find `b` can be
1704                         // indicated one of two ways:
1705                         //
1706                         // * ix_left == triples_len
1707                         // * ix_left < triples_len and b < triples[ix_left].b
1708                         //
1709                         // In both cases I *think* the 'loop_over_entries_for_b below will not do
1710                         // anything.  But this is all a bit hairy, so let's convert the second
1711                         // variant into the first, so as to make it obvious that the loop won't do
1712                         // anything.
1713 
1714                         // ix_left now holds the lowest index of any `triples` entry for block `b`.
1715                         // Assert this.
1716                         if ix_left < triples_len && *b < triples[ix_left].2 {
1717                             ix_left = triples_len;
1718                         }
1719                         if ix_left < triples_len {
1720                             assert!(ix_left == 0 || triples[ix_left - 1].2 < *b);
1721                         }
1722 
1723                         // ix2 plays the same role as in the quadratic version.  ix_left and
1724                         // ix_right are not used after this point.
1725                         let mut ix2 = ix_left;
1726                         loop {
1727                             let (_fix2, kind2, bix2) = match triples.get(ix2) {
1728                                 None => break,
1729                                 Some(triple) => *triple,
1730                             };
1731                             if *b < bix2 {
1732                                 // We've come to the end of the sequence of `b`-blocks.
1733                                 break;
1734                             }
1735                             debug_assert!(*b == bix2);
1736                             if kind2 == RangeFragKind::LiveOut {
1737                                 ix2 += 1;
1738                                 continue;
1739                             }
1740                             // Now we know that liveness for this reg "flows" from `triples[ix]` to
1741                             // `triples[ix2]`.  So those two frags must be part of the same live
1742                             // range.  Note this.
1743                             eclasses_uf.union(ix, ix2);
1744                             ix2 += 1;
1745                         }
1746 
1747                         if ix2 + 1 < triples_len {
1748                             debug_assert!(*b < triples[ix2 + 1].2);
1749                         }
1750                     }
1751                 }
1752             }
1753 
1754             stats_num_multi_grps_large += 1;
1755             stats_size_multi_grps_large += triples_len;
1756         }
1757 
1758         // Now `eclasses_uf` contains the results of the merging-search.  Visit each of its
1759         // equivalence classes in turn, and convert each into a virtual or real live range as
1760         // appropriate.
1761         let eclasses = eclasses_uf.get_equiv_classes();
1762         for leader_triple_ix in eclasses.equiv_class_leaders_iter() {
1763             // `leader_triple_ix` is an eclass leader.  Enumerate the whole eclass.
1764             let mut frag_ixs = SmallVec::<[RangeFragIx; 4]>::new();
1765             for triple_ix in eclasses.equiv_class_elems_iter(leader_triple_ix) {
1766                 frag_ixs.push(triples[triple_ix].0 /*first field is frag ix*/);
1767             }
1768             let sorted_frags = SortedRangeFragIxs::new(frag_ixs, &frag_env);
1769             create_and_add_range(
1770                 &mut stats_num_vfrags_uncompressed,
1771                 &mut stats_num_vfrags_compressed,
1772                 &mut result_real,
1773                 &mut result_virtual,
1774                 reg,
1775                 sorted_frags,
1776                 frag_env,
1777                 frag_metrics_env,
1778                 estimated_frequencies,
1779             );
1780         }
1781         // END merge `all_frag_ixs_for_reg` entries as much as possible
1782     } // END per reg loop
1783 
1784     info!("      in: {} single groups", stats_num_single_grps);
1785     info!(
1786         "      in: {} local frags in multi groups",
1787         stats_num_local_frags
1788     );
1789     info!(
1790         "      in: {} small multi groups, {} small multi group total size",
1791         stats_num_multi_grps_small, stats_size_multi_grps_small
1792     );
1793     info!(
1794         "      in: {} large multi groups, {} large multi group total size",
1795         stats_num_multi_grps_large, stats_size_multi_grps_large
1796     );
1797     info!(
1798         "      out: {} VLRs, {} RLRs",
1799         result_virtual.len(),
1800         result_real.len()
1801     );
1802     info!(
1803         "      compress vfrags: in {}, out {}",
1804         stats_num_vfrags_uncompressed, stats_num_vfrags_compressed
1805     );
1806     info!("    merge_range_frags: end");
1807 
1808     (result_real, result_virtual)
1809 }
1810 
1811 //=============================================================================
1812 // Auxiliary activities that mostly fall under the category "dataflow analysis", but are not
1813 // part of the main dataflow analysis pipeline.
1814 
1815 // Dataflow and liveness together create vectors of VirtualRanges and RealRanges.  These define
1816 // (amongst other things) mappings from VirtualRanges to VirtualRegs and from RealRanges to
1817 // RealRegs.  However, we often need the inverse mappings: from VirtualRegs to (sets of
1818 // VirtualRanges) and from RealRegs to (sets of) RealRanges.  This function computes those
1819 // inverse mappings.  They are used by BT's coalescing analysis, and for the dataflow analysis
1820 // that supports reftype handling.
1821 #[inline(never)]
compute_reg_to_ranges_maps<F: Function>( func: &F, univ: &RealRegUniverse, rlr_env: &TypedIxVec<RealRangeIx, RealRange>, vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>, ) -> RegToRangesMaps1822 pub fn compute_reg_to_ranges_maps<F: Function>(
1823     func: &F,
1824     univ: &RealRegUniverse,
1825     rlr_env: &TypedIxVec<RealRangeIx, RealRange>,
1826     vlr_env: &TypedIxVec<VirtualRangeIx, VirtualRange>,
1827 ) -> RegToRangesMaps {
1828     // Arbitrary, but chosen after quite some profiling, so as to minimise both instruction
1829     // count and number of `malloc` calls.  Don't mess with this without first collecting
1830     // comprehensive measurements.  Note that if you set this above 255, the type of
1831     // `r/vreg_approx_frag_counts` below will need to change accordingly.
1832     const MANY_FRAGS_THRESH: u8 = 200;
1833 
1834     // Adds `to_add` to `*counter`, taking care not to overflow it in the process.
1835     let add_u8_usize_saturate_to_u8 = |counter: &mut u8, mut to_add: usize| {
1836         if to_add > 0xFF {
1837             to_add = 0xFF;
1838         }
1839         let mut n = *counter as usize;
1840         n += to_add as usize;
1841         // n is at max 0x1FE (510)
1842         if n > 0xFF {
1843             n = 0xFF;
1844         }
1845         *counter = n as u8;
1846     };
1847 
1848     // We have in hand the virtual live ranges.  Each of these carries its
1849     // associated vreg.  So in effect we have a VLR -> VReg mapping.  We now
1850     // invert that, so as to generate a mapping from VRegs to their containing
1851     // VLRs.
1852     //
1853     // Note that multiple VLRs may map to the same VReg.  So the inverse mapping
1854     // will actually be from VRegs to a set of VLRs.  In most cases, we expect
1855     // the virtual-registerised-code given to this allocator to be derived from
1856     // SSA, in which case each VReg will have only one VLR.  So in this case,
1857     // the cost of first creating the mapping, and then looking up all the VRegs
1858     // in moves in it, will have cost linear in the size of the input function.
1859     //
1860     // NB re the SmallVec.  That has set semantics (no dups).
1861 
1862     let num_vregs = func.get_num_vregs();
1863     let num_rregs = univ.allocable;
1864 
1865     let mut vreg_approx_frag_counts = vec![0u8; num_vregs];
1866     let mut vreg_to_vlrs_map = vec![SmallVec::<[VirtualRangeIx; 3]>::new(); num_vregs];
1867     for (vlr, n) in vlr_env.iter().zip(0..) {
1868         let vlrix = VirtualRangeIx::new(n);
1869         let vreg: VirtualReg = vlr.vreg;
1870         // Now we know that there's a VLR `vlr` that is for VReg `vreg`.  Update the inverse
1871         // mapping accordingly.  We know we are stepping sequentially through the VLR (index)
1872         // space, so we'll never see the same VLRIx twice.  Hence there's no need to check for
1873         // dups when adding a VLR index to an existing binding for a VReg.
1874         //
1875         // If this array-indexing fails, it means the client's `.get_num_vregs()` function
1876         // claims there are fewer virtual regs than we actually observe in the code it gave us.
1877         // So it's a bug in the client.
1878         let vreg_index = vreg.get_index();
1879         vreg_to_vlrs_map[vreg_index].push(vlrix);
1880 
1881         let vlr_num_frags = vlr.sorted_frags.frags.len();
1882         add_u8_usize_saturate_to_u8(&mut vreg_approx_frag_counts[vreg_index], vlr_num_frags);
1883     }
1884 
1885     // Same for the real live ranges.
1886     let mut rreg_approx_frag_counts = vec![0u8; num_rregs];
1887     let mut rreg_to_rlrs_map = vec![SmallVec::<[RealRangeIx; 6]>::new(); num_rregs];
1888     for (rlr, n) in rlr_env.iter().zip(0..) {
1889         let rlrix = RealRangeIx::new(n);
1890         let rreg: RealReg = rlr.rreg;
1891         // If this array-indexing fails, it means something has gone wrong with sanitisation of
1892         // real registers -- that should ensure that we never see a real register with an index
1893         // greater than `univ.allocable`.  So it's a bug in the allocator's analysis phases.
1894         let rreg_index = rreg.get_index();
1895         rreg_to_rlrs_map[rreg_index].push(rlrix);
1896 
1897         let rlr_num_frags = rlr.sorted_frags.frag_ixs.len();
1898         add_u8_usize_saturate_to_u8(&mut rreg_approx_frag_counts[rreg_index], rlr_num_frags);
1899     }
1900 
1901     // Create sets indicating which regs have "many" live ranges.  Hopefully very few.
1902     // Since the `push`ed-in values are supplied by the `zip(0..)` iterator, they are
1903     // guaranteed duplicate-free, as required by the defn of `RegToRangesMaps`.
1904     let mut vregs_with_many_frags = Vec::<u32 /*VirtualReg index*/>::with_capacity(16);
1905     for (count, vreg_ix) in vreg_approx_frag_counts.iter().zip(0..) {
1906         if *count >= MANY_FRAGS_THRESH {
1907             vregs_with_many_frags.push(vreg_ix);
1908         }
1909     }
1910 
1911     let mut rregs_with_many_frags = Vec::<u32 /*RealReg index*/>::with_capacity(64);
1912     for (count, rreg_ix) in rreg_approx_frag_counts.iter().zip(0..) {
1913         if *count >= MANY_FRAGS_THRESH {
1914             rregs_with_many_frags.push(rreg_ix);
1915         }
1916     }
1917 
1918     RegToRangesMaps {
1919         rreg_to_rlrs_map,
1920         vreg_to_vlrs_map,
1921         rregs_with_many_frags,
1922         vregs_with_many_frags,
1923         many_frags_thresh: MANY_FRAGS_THRESH as usize,
1924     }
1925 }
1926 
1927 // Collect info about registers that are connected by moves.
1928 #[inline(never)]
collect_move_info<F: Function>( func: &F, reg_vecs_and_bounds: &RegVecsAndBounds, est_freqs: &TypedIxVec<BlockIx, u32>, ) -> MoveInfo1929 pub fn collect_move_info<F: Function>(
1930     func: &F,
1931     reg_vecs_and_bounds: &RegVecsAndBounds,
1932     est_freqs: &TypedIxVec<BlockIx, u32>,
1933 ) -> MoveInfo {
1934     let mut moves = Vec::<MoveInfoElem>::new();
1935     for b in func.blocks() {
1936         let block_eef = est_freqs[b];
1937         for iix in func.block_insns(b) {
1938             let insn = &func.get_insn(iix);
1939             let im = func.is_move(insn);
1940             match im {
1941                 None => {}
1942                 Some((wreg, reg)) => {
1943                     let iix_bounds = &reg_vecs_and_bounds.bounds[iix];
1944                     // It might seem strange to assert that `defs_len` and/or
1945                     // `uses_len` is <= 1 rather than == 1.  The reason is
1946                     // that either or even both registers might be ones which
1947                     // are not available to the allocator.  Hence they will
1948                     // have been removed by the sanitisation machinery before
1949                     // we get to this point.  If either is missing, we
1950                     // unfortunately can't coalesce the move away, and just
1951                     // have to live with it.
1952                     //
1953                     // If any of the following five assertions fail, the
1954                     // client's `is_move` is probably lying to us.
1955                     assert!(iix_bounds.uses_len <= 1);
1956                     assert!(iix_bounds.defs_len <= 1);
1957                     assert!(iix_bounds.mods_len == 0);
1958                     if iix_bounds.uses_len == 1 && iix_bounds.defs_len == 1 {
1959                         let reg_vecs = &reg_vecs_and_bounds.vecs;
1960                         assert!(reg_vecs.uses[iix_bounds.uses_start as usize] == reg);
1961                         assert!(reg_vecs.defs[iix_bounds.defs_start as usize] == wreg.to_reg());
1962                         let dst = wreg.to_reg();
1963                         let src = reg;
1964                         let est_freq = block_eef;
1965                         moves.push(MoveInfoElem {
1966                             dst,
1967                             src,
1968                             iix,
1969                             est_freq,
1970                         });
1971                     }
1972                 }
1973             }
1974         }
1975     }
1976 
1977     MoveInfo { moves }
1978 }
1979