1 //! Constructing Conventional SSA form.
2 //!
3 //! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related
4 //! values do not interfere. We construct CSSA by building virtual registers that are as large as
5 //! possible and inserting copies where necessary such that all argument values passed to a block
6 //! parameter will belong to the same virtual register as the block parameter value itself.
7 
8 use crate::cursor::{Cursor, EncCursor};
9 use crate::dbg::DisplayList;
10 use crate::dominator_tree::{DominatorTree, DominatorTreePreorder};
11 use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
12 use crate::fx::FxHashMap;
13 use crate::ir::{self, InstBuilder, ProgramOrder};
14 use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Value};
15 use crate::isa::{EncInfo, TargetIsa};
16 use crate::regalloc::affinity::Affinity;
17 use crate::regalloc::liveness::Liveness;
18 use crate::regalloc::virtregs::{VirtReg, VirtRegs};
19 use crate::timing;
20 use alloc::vec::Vec;
21 use core::cmp;
22 use core::fmt;
23 use core::iter;
24 use core::slice;
25 use log::debug;
26 
27 // # Implementation
28 //
29 // The coalescing algorithm implemented follows this paper fairly closely:
30 //
31 //     Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and
32 //     live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534
33 //
34 // We use a more efficient dominator forest representation (a linear stack) described here:
35 //
36 //     Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for
37 //     correctness, code quality and efficiency.
38 //
39 // The algorithm has two main phases:
40 //
41 // Phase 1: Union-find.
42 //
43 // We use the union-find support in `VirtRegs` to build virtual registers such that block parameter
44 // values always belong to the same virtual register as their corresponding block arguments at the
45 // predecessor branches. Trivial interferences between parameter and argument value live ranges are
46 // detected and resolved before unioning congruence classes, but non-trivial interferences between
47 // values that end up in the same congruence class are possible.
48 //
49 // Phase 2: Dominator forests.
50 //
51 // The virtual registers formed in phase 1 can contain interferences that we need to detect and
52 // eliminate. By ordering the values in a virtual register according to a dominator tree pre-order,
53 // we can identify all interferences in the virtual register in linear time.
54 //
55 // Interfering values are isolated and virtual registers rebuilt.
56 
57 /// Data structures to be used by the coalescing pass.
58 pub struct Coalescing {
59     preorder: DominatorTreePreorder,
60     forest: DomForest,
61     vcopies: VirtualCopies,
62     values: Vec<Value>,
63     predecessors: Vec<Inst>,
64     backedges: Vec<Inst>,
65 }
66 
67 /// One-shot context created once per invocation.
68 struct Context<'a> {
69     isa: &'a dyn TargetIsa,
70     encinfo: EncInfo,
71 
72     func: &'a mut Function,
73     cfg: &'a ControlFlowGraph,
74     domtree: &'a DominatorTree,
75     preorder: &'a DominatorTreePreorder,
76     liveness: &'a mut Liveness,
77     virtregs: &'a mut VirtRegs,
78 
79     forest: &'a mut DomForest,
80     vcopies: &'a mut VirtualCopies,
81     values: &'a mut Vec<Value>,
82     predecessors: &'a mut Vec<Inst>,
83     backedges: &'a mut Vec<Inst>,
84 }
85 
86 impl Coalescing {
87     /// Create a new coalescing pass.
new() -> Self88     pub fn new() -> Self {
89         Self {
90             forest: DomForest::new(),
91             preorder: DominatorTreePreorder::new(),
92             vcopies: VirtualCopies::new(),
93             values: Vec::new(),
94             predecessors: Vec::new(),
95             backedges: Vec::new(),
96         }
97     }
98 
99     /// Clear all data structures in this coalescing pass.
clear(&mut self)100     pub fn clear(&mut self) {
101         self.forest.clear();
102         self.vcopies.clear();
103         self.values.clear();
104         self.predecessors.clear();
105         self.backedges.clear();
106     }
107 
108     /// Convert `func` to Conventional SSA form and build virtual registers in the process.
conventional_ssa( &mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph, domtree: &DominatorTree, liveness: &mut Liveness, virtregs: &mut VirtRegs, )109     pub fn conventional_ssa(
110         &mut self,
111         isa: &dyn TargetIsa,
112         func: &mut Function,
113         cfg: &ControlFlowGraph,
114         domtree: &DominatorTree,
115         liveness: &mut Liveness,
116         virtregs: &mut VirtRegs,
117     ) {
118         let _tt = timing::ra_cssa();
119         debug!("Coalescing for:\n{}", func.display(isa));
120         self.preorder.compute(domtree, &func.layout);
121         let mut context = Context {
122             isa,
123             encinfo: isa.encoding_info(),
124             func,
125             cfg,
126             domtree,
127             preorder: &self.preorder,
128             liveness,
129             virtregs,
130             forest: &mut self.forest,
131             vcopies: &mut self.vcopies,
132             values: &mut self.values,
133             predecessors: &mut self.predecessors,
134             backedges: &mut self.backedges,
135         };
136 
137         // Run phase 1 (union-find) of the coalescing algorithm on the current function.
138         for &block in domtree.cfg_postorder() {
139             context.union_find_block(block);
140         }
141         context.finish_union_find();
142 
143         // Run phase 2 (dominator forests) on the current function.
144         context.process_vregs();
145     }
146 }
147 
148 /// Phase 1: Union-find.
149 ///
150 /// The two entry points for phase 1 are `union_find_block()` and `finish_union_find`.
151 impl<'a> Context<'a> {
152     /// Run the union-find algorithm on the parameter values on `block`.
153     ///
154     /// This ensure that all block parameters will belong to the same virtual register as their
155     /// corresponding arguments at all predecessor branches.
union_find_block(&mut self, block: Block)156     pub fn union_find_block(&mut self, block: Block) {
157         let num_params = self.func.dfg.num_block_params(block);
158         if num_params == 0 {
159             return;
160         }
161 
162         self.isolate_conflicting_params(block, num_params);
163 
164         for i in 0..num_params {
165             self.union_pred_args(block, i);
166         }
167     }
168 
169     // Identify block parameter values that are live at one of the predecessor branches.
170     //
171     // Such a parameter value will conflict with any argument value at the predecessor branch, so
172     // it must be isolated by inserting a copy.
isolate_conflicting_params(&mut self, block: Block, num_params: usize)173     fn isolate_conflicting_params(&mut self, block: Block, num_params: usize) {
174         debug_assert_eq!(num_params, self.func.dfg.num_block_params(block));
175         // The only way a parameter value can interfere with a predecessor branch is if the block is
176         // dominating the predecessor branch. That is, we are looking for loop back-edges.
177         for BlockPredecessor {
178             block: pred_block,
179             inst: pred_inst,
180         } in self.cfg.pred_iter(block)
181         {
182             // The quick pre-order dominance check is accurate because the block parameter is defined
183             // at the top of the block before any branches.
184             if !self.preorder.dominates(block, pred_block) {
185                 continue;
186             }
187 
188             debug!(
189                 " - checking {} params at back-edge {}: {}",
190                 num_params,
191                 pred_block,
192                 self.func.dfg.display_inst(pred_inst, self.isa)
193             );
194 
195             // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values
196             // to be live at the use.
197             for i in 0..num_params {
198                 let param = self.func.dfg.block_params(block)[i];
199                 if self.liveness[param].reaches_use(pred_inst, pred_block, &self.func.layout) {
200                     self.isolate_param(block, param);
201                 }
202             }
203         }
204     }
205 
206     // Union block parameter value `num` with the corresponding block arguments on the predecessor
207     // branches.
208     //
209     // Detect cases where the argument value is live-in to `block` so it conflicts with any block
210     // parameter. Isolate the argument in those cases before unioning it with the parameter value.
union_pred_args(&mut self, block: Block, argnum: usize)211     fn union_pred_args(&mut self, block: Block, argnum: usize) {
212         let param = self.func.dfg.block_params(block)[argnum];
213 
214         for BlockPredecessor {
215             block: pred_block,
216             inst: pred_inst,
217         } in self.cfg.pred_iter(block)
218         {
219             let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum];
220 
221             // Never coalesce incoming function parameters on the stack. These parameters are
222             // pre-spilled, and the rest of the virtual register would be forced to spill to the
223             // `incoming_arg` stack slot too.
224             if let ir::ValueDef::Param(def_block, def_num) = self.func.dfg.value_def(arg) {
225                 if Some(def_block) == self.func.layout.entry_block()
226                     && self.func.signature.params[def_num].location.is_stack()
227                 {
228                     debug!("-> isolating function stack parameter {}", arg);
229                     let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg);
230                     self.virtregs.union(param, new_arg);
231                     continue;
232                 }
233             }
234 
235             // Check for basic interference: If `arg` overlaps a value defined at the entry to
236             // `block`, it can never be used as a block argument.
237             let interference = {
238                 let lr = &self.liveness[arg];
239 
240                 // There are two ways the argument value can interfere with `block`:
241                 //
242                 // 1. It is defined in a dominating block and live-in to `block`.
243                 // 2. If is itself a parameter value for `block`. This case should already have been
244                 //    eliminated by `isolate_conflicting_params()`.
245                 debug_assert!(
246                     lr.def() != block.into(),
247                     "{} parameter {} was missed by isolate_conflicting_params()",
248                     block,
249                     arg
250                 );
251 
252                 // The only other possibility is that `arg` is live-in to `block`.
253                 lr.is_livein(block, &self.func.layout)
254             };
255 
256             if interference {
257                 let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg);
258                 self.virtregs.union(param, new_arg);
259             } else {
260                 self.virtregs.union(param, arg);
261             }
262         }
263     }
264 
265     // Isolate block parameter value `param` on `block`.
266     //
267     // When `param=v10`:
268     //
269     //     block1(v10: i32):
270     //         foo
271     //
272     // becomes:
273     //
274     //     block1(v11: i32):
275     //         v10 = copy v11
276     //         foo
277     //
278     // This function inserts the copy and updates the live ranges of the old and new parameter
279     // values. Returns the new parameter value.
isolate_param(&mut self, block: Block, param: Value) -> Value280     fn isolate_param(&mut self, block: Block, param: Value) -> Value {
281         debug_assert_eq!(
282             self.func.dfg.value_def(param).pp(),
283             ExpandedProgramPoint::Block(block)
284         );
285         let ty = self.func.dfg.value_type(param);
286         let new_val = self.func.dfg.replace_block_param(param, ty);
287 
288         // Insert a copy instruction at the top of `block`.
289         let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(block);
290         if let Some(inst) = pos.current_inst() {
291             pos.use_srcloc(inst);
292         }
293         pos.ins().with_result(param).copy(new_val);
294         let inst = pos.built_inst();
295         self.liveness.move_def_locally(param, inst);
296 
297         debug!(
298             "-> inserted {}, following {}({}: {})",
299             pos.display_inst(inst),
300             block,
301             new_val,
302             ty
303         );
304 
305         // Create a live range for the new value.
306         // TODO: Should we handle ghost values?
307         let affinity = Affinity::new(
308             &self
309                 .encinfo
310                 .operand_constraints(pos.func.encodings[inst])
311                 .expect("Bad copy encoding")
312                 .outs[0],
313         );
314         self.liveness.create_dead(new_val, block, affinity);
315         self.liveness
316             .extend_locally(new_val, block, inst, &pos.func.layout);
317 
318         new_val
319     }
320 
321     // Isolate the block argument `pred_val` from the predecessor `(pred_block, pred_inst)`.
322     //
323     // It is assumed that `pred_inst` is a branch instruction in `pred_block` whose `argnum`'th block
324     // argument is `pred_val`. Since the argument value interferes with the corresponding block
325     // parameter at the destination, a copy is used instead:
326     //
327     //     brnz v1, block2(v10)
328     //
329     // Becomes:
330     //
331     //     v11 = copy v10
332     //     brnz v1, block2(v11)
333     //
334     // This way the interference with the block parameter is avoided.
335     //
336     // A live range for the new value is created while the live range for `pred_val` is left
337     // unaltered.
338     //
339     // The new argument value is returned.
isolate_arg( &mut self, pred_block: Block, pred_inst: Inst, argnum: usize, pred_val: Value, ) -> Value340     fn isolate_arg(
341         &mut self,
342         pred_block: Block,
343         pred_inst: Inst,
344         argnum: usize,
345         pred_val: Value,
346     ) -> Value {
347         let mut pos = EncCursor::new(self.func, self.isa).at_inst(pred_inst);
348         pos.use_srcloc(pred_inst);
349         let copy = pos.ins().copy(pred_val);
350         let inst = pos.built_inst();
351 
352         // Create a live range for the new value.
353         // TODO: Handle affinity for ghost values.
354         let affinity = Affinity::new(
355             &self
356                 .encinfo
357                 .operand_constraints(pos.func.encodings[inst])
358                 .expect("Bad copy encoding")
359                 .outs[0],
360         );
361         self.liveness.create_dead(copy, inst, affinity);
362         self.liveness
363             .extend_locally(copy, pred_block, pred_inst, &pos.func.layout);
364 
365         pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy;
366 
367         debug!(
368             "-> inserted {}, before {}: {}",
369             pos.display_inst(inst),
370             pred_block,
371             pos.display_inst(pred_inst)
372         );
373 
374         copy
375     }
376 
377     /// Finish the union-find part of the coalescing algorithm.
378     ///
379     /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric
380     /// closure of the relation formed by block parameter-argument pairs found by `union_find_block()`.
finish_union_find(&mut self)381     fn finish_union_find(&mut self) {
382         self.virtregs.finish_union_find(None);
383         debug!("After union-find phase:{}", self.virtregs);
384     }
385 }
386 
387 /// Phase 2: Dominator forests.
388 ///
389 /// The main entry point is `process_vregs()`.
390 impl<'a> Context<'a> {
391     /// Check al virtual registers for interference and fix conflicts.
process_vregs(&mut self)392     pub fn process_vregs(&mut self) {
393         for vreg in self.virtregs.all_virtregs() {
394             self.process_vreg(vreg);
395         }
396     }
397 
398     // Check `vreg` for interferences and fix conflicts.
process_vreg(&mut self, vreg: VirtReg)399     fn process_vreg(&mut self, vreg: VirtReg) {
400         if !self.check_vreg(vreg) {
401             self.synthesize_vreg(vreg);
402         }
403     }
404 
405     // Check `vreg` for interferences.
406     //
407     // We use a Budimlic dominator forest to check for interferences between the values in `vreg`
408     // and identify values that should be isolated.
409     //
410     // Returns true if `vreg` is free of interference.
check_vreg(&mut self, vreg: VirtReg) -> bool411     fn check_vreg(&mut self, vreg: VirtReg) -> bool {
412         // Order the values according to the dominator pre-order of their definition.
413         let values = self.virtregs.sort_values(vreg, self.func, self.preorder);
414         debug!("Checking {} = {}", vreg, DisplayList(values));
415 
416         // Now push the values in order to the dominator forest.
417         // This gives us the closest dominating value def for each of the values.
418         self.forest.clear();
419         for &value in values {
420             let node = Node::value(value, 0, self.func);
421 
422             // Push this value and get the nearest dominating def back.
423             let parent = match self
424                 .forest
425                 .push_node(node, self.func, self.domtree, self.preorder)
426             {
427                 None => continue,
428                 Some(n) => n,
429             };
430 
431             // Check for interference between `parent` and `value`. Since `parent` dominates
432             // `value`, we only have to check if it overlaps the definition.
433             if self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) {
434                 // The two values are interfering, so they can't be in the same virtual register.
435                 debug!("-> interference: {} overlaps def of {}", parent, value);
436                 return false;
437             }
438         }
439 
440         // No interference found.
441         true
442     }
443 
444     /// Destroy and rebuild `vreg` by iterative coalescing.
445     ///
446     /// When detecting that a virtual register formed in phase 1 contains interference, we have to
447     /// start over in a more careful way. We'll split the vreg into individual values and then
448     /// reassemble virtual registers using an iterative algorithm of pairwise merging.
449     ///
450     /// It is possible to recover multiple large virtual registers this way while still avoiding
451     /// a lot of copies.
synthesize_vreg(&mut self, vreg: VirtReg)452     fn synthesize_vreg(&mut self, vreg: VirtReg) {
453         self.vcopies.initialize(
454             self.virtregs.values(vreg),
455             self.func,
456             self.cfg,
457             self.preorder,
458         );
459         debug!(
460             "Synthesizing {} from {} branches and params {}",
461             vreg,
462             self.vcopies.branches.len(),
463             DisplayList(&self.vcopies.params)
464         );
465         self.virtregs.remove(vreg);
466 
467         while let Some(param) = self.vcopies.next_param() {
468             self.merge_param(param);
469             self.vcopies.merged_param(param, self.func);
470         }
471     }
472 
473     /// Merge block parameter value `param` with virtual registers at its predecessors.
merge_param(&mut self, param: Value)474     fn merge_param(&mut self, param: Value) {
475         let (block, argnum) = match self.func.dfg.value_def(param) {
476             ir::ValueDef::Param(e, n) => (e, n),
477             ir::ValueDef::Result(_, _) => panic!("Expected parameter"),
478         };
479 
480         // Collect all the predecessors and rearrange them.
481         //
482         // The order we process the predecessors matters because once one predecessor's virtual
483         // register is merged, it can cause interference with following merges. This means that the
484         // first predecessors processed are more likely to be copy-free. We want an ordering that
485         // is a) good for performance and b) as stable as possible. The pred_iter() iterator uses
486         // instruction numbers which is not great for reproducible test cases.
487         //
488         // First merge loop back-edges in layout order, on the theory that shorter back-edges are
489         // more sensitive to inserted copies.
490         //
491         // Second everything else in reverse layout order. Again, short forward branches get merged
492         // first. There can also be backwards branches mixed in here, though, as long as they are
493         // not loop backedges.
494         debug_assert!(self.predecessors.is_empty());
495         debug_assert!(self.backedges.is_empty());
496         for BlockPredecessor {
497             block: pred_block,
498             inst: pred_inst,
499         } in self.cfg.pred_iter(block)
500         {
501             if self.preorder.dominates(block, pred_block) {
502                 self.backedges.push(pred_inst);
503             } else {
504                 self.predecessors.push(pred_inst);
505             }
506         }
507         // Order instructions in reverse order so we can pop them off the back.
508         {
509             let l = &self.func.layout;
510             self.backedges.sort_unstable_by(|&a, &b| l.cmp(b, a));
511             self.predecessors.sort_unstable_by(|&a, &b| l.cmp(a, b));
512             self.predecessors.extend_from_slice(&self.backedges);
513             self.backedges.clear();
514         }
515 
516         while let Some(pred_inst) = self.predecessors.pop() {
517             let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum];
518 
519             // We want to merge the vreg containing `param` with the vreg containing `arg`.
520             if self.try_merge_vregs(param, arg) {
521                 continue;
522             }
523 
524             // Can't merge because of interference. Insert a copy instead.
525             let pred_block = self.func.layout.pp_block(pred_inst);
526             let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg);
527             self.virtregs
528                 .insert_single(param, new_arg, self.func, self.preorder);
529         }
530     }
531 
532     /// Merge the virtual registers containing `param` and `arg` if possible.
533     ///
534     /// Use self.vcopies to check for virtual copy interference too.
535     ///
536     /// Returns true if the virtual registers are successfully merged.
try_merge_vregs(&mut self, param: Value, arg: Value) -> bool537     fn try_merge_vregs(&mut self, param: Value, arg: Value) -> bool {
538         if self.virtregs.same_class(param, arg) {
539             return true;
540         }
541 
542         if !self.can_merge_vregs(param, arg) {
543             return false;
544         }
545 
546         let _vreg = self.virtregs.unify(self.values);
547         debug!("-> merged into {} = {}", _vreg, DisplayList(self.values));
548         true
549     }
550 
551     /// Check if it is possible to merge two virtual registers.
552     ///
553     /// Also leave `self.values` with the ordered list of values in the merged vreg.
can_merge_vregs(&mut self, param: Value, arg: Value) -> bool554     fn can_merge_vregs(&mut self, param: Value, arg: Value) -> bool {
555         // We only need an immutable function reference.
556         let func = &*self.func;
557         let domtree = self.domtree;
558         let preorder = self.preorder;
559 
560         // Restrict the virtual copy nodes we look at and key the `set_id` and `value` properties
561         // of the nodes. Set_id 0 will be `param` and set_id 1 will be `arg`.
562         self.vcopies
563             .set_filter([param, arg], func, self.virtregs, preorder);
564 
565         // Now create an ordered sequence of dom-forest nodes from three sources: The two virtual
566         // registers and the filtered virtual copies.
567         let v0 = self.virtregs.congruence_class(&param);
568         let v1 = self.virtregs.congruence_class(&arg);
569         debug!(
570             " - set 0: {}\n - set 1: {}",
571             DisplayList(v0),
572             DisplayList(v1)
573         );
574         let nodes = MergeNodes::new(
575             func,
576             preorder,
577             MergeNodes::new(
578                 func,
579                 preorder,
580                 v0.iter().map(|&value| Node::value(value, 0, func)),
581                 v1.iter().map(|&value| Node::value(value, 1, func)),
582             ),
583             self.vcopies.iter(func),
584         );
585 
586         // Now push the values in order to the dominator forest.
587         // This gives us the closest dominating value def for each of the values.
588         self.forest.clear();
589         self.values.clear();
590         for node in nodes {
591             // Accumulate ordered values for the new vreg.
592             if node.is_value() {
593                 self.values.push(node.value);
594             }
595 
596             // Push this value and get the nearest dominating def back.
597             let parent = match self.forest.push_node(node, func, domtree, preorder) {
598                 None => {
599                     if node.is_vcopy {
600                         self.forest.pop_last();
601                     }
602                     continue;
603                 }
604                 Some(n) => n,
605             };
606 
607             if node.is_vcopy {
608                 // Vcopy nodes don't represent interference if they are copies of the parent value.
609                 // In that case, the node must be removed because the parent value can still be
610                 // live belong the vcopy.
611                 if parent.is_vcopy || node.value == parent.value {
612                     self.forest.pop_last();
613                     continue;
614                 }
615 
616                 // Check if the parent value interferes with the virtual copy.
617                 let inst = node.def.unwrap_inst();
618                 if node.set_id != parent.set_id
619                     && self.liveness[parent.value].reaches_use(inst, node.block, &self.func.layout)
620                 {
621                     debug!(
622                         " - interference: {} overlaps vcopy at {}:{}",
623                         parent,
624                         node.block,
625                         self.func.dfg.display_inst(inst, self.isa)
626                     );
627                     return false;
628                 }
629 
630                 // Keep this vcopy on the stack. It will save us a few interference checks.
631                 continue;
632             }
633 
634             // Parent vcopies never represent any interference. We only keep them on the stack to
635             // avoid an interference check against a value higher up.
636             if parent.is_vcopy {
637                 continue;
638             }
639 
640             // Both node and parent are values, so check for interference.
641             debug_assert!(node.is_value() && parent.is_value());
642             if node.set_id != parent.set_id
643                 && self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout)
644             {
645                 // The two values are interfering.
646                 debug!(" - interference: {} overlaps def of {}", parent, node.value);
647                 return false;
648             }
649         }
650 
651         // The values vector should receive all values.
652         debug_assert_eq!(v0.len() + v1.len(), self.values.len());
653 
654         // No interference found.
655         true
656     }
657 }
658 
659 /// Dominator forest.
660 ///
661 /// This is a utility type used for detecting interference in virtual registers, where each virtual
662 /// register is a list of values ordered according to the dominator tree pre-order.
663 ///
664 /// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack
665 /// representation in the Boissinot paper. Our version of the linear stack is slightly modified
666 /// because we have a pre-order of the dominator tree at the block granularity, not basic block
667 /// granularity.
668 ///
669 /// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed,
670 /// `push_node` will return the nearest previously pushed value that dominates the definition.
671 #[allow(dead_code)]
672 struct DomForest {
673     // Stack representing the rightmost edge of the dominator forest so far, ending in the last
674     // element of `values`.
675     //
676     // At all times, the block of each element in the stack dominates the block of the next one.
677     stack: Vec<Node>,
678 }
679 
680 /// A node in the dominator forest.
681 #[derive(Clone, Copy, Debug)]
682 #[allow(dead_code)]
683 struct Node {
684     /// The program point where the live range is defined.
685     def: ExpandedProgramPoint,
686     /// block containing `def`.
687     block: Block,
688     /// Is this a virtual copy or a value?
689     is_vcopy: bool,
690     /// Set identifier.
691     set_id: u8,
692     /// For a value node: The value defined at `def`.
693     /// For a vcopy node: The relevant branch argument at `def`.
694     value: Value,
695 }
696 
697 impl Node {
698     /// Create a node representing `value`.
value(value: Value, set_id: u8, func: &Function) -> Self699     pub fn value(value: Value, set_id: u8, func: &Function) -> Self {
700         let def = func.dfg.value_def(value).pp();
701         let block = func.layout.pp_block(def);
702         Self {
703             def,
704             block,
705             is_vcopy: false,
706             set_id,
707             value,
708         }
709     }
710 
711     /// Create a node representing a virtual copy.
vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self712     pub fn vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self {
713         let def = branch.into();
714         let block = func.layout.pp_block(def);
715         Self {
716             def,
717             block,
718             is_vcopy: true,
719             set_id,
720             value,
721         }
722     }
723 
724     /// IF this a value node?
is_value(&self) -> bool725     pub fn is_value(&self) -> bool {
726         !self.is_vcopy
727     }
728 }
729 
730 impl fmt::Display for Node {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result731     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
732         if self.is_vcopy {
733             write!(f, "{}:vcopy({})@{}", self.set_id, self.value, self.block)
734         } else {
735             write!(f, "{}:{}@{}", self.set_id, self.value, self.block)
736         }
737     }
738 }
739 
740 impl DomForest {
741     /// Create a new empty dominator forest.
new() -> Self742     pub fn new() -> Self {
743         Self { stack: Vec::new() }
744     }
745 
746     /// Clear all data structures in this dominator forest.
clear(&mut self)747     pub fn clear(&mut self) {
748         self.stack.clear();
749     }
750 
751     /// Add a single node to the forest.
752     ///
753     /// Update the stack so its dominance invariants are preserved. Detect a parent node on the
754     /// stack which is the closest one dominating the new node and return it.
push_node( &mut self, node: Node, func: &Function, domtree: &DominatorTree, preorder: &DominatorTreePreorder, ) -> Option<Node>755     fn push_node(
756         &mut self,
757         node: Node,
758         func: &Function,
759         domtree: &DominatorTree,
760         preorder: &DominatorTreePreorder,
761     ) -> Option<Node> {
762         // The stack contains the current sequence of dominating defs. Pop elements until we
763         // find one whose block dominates `node.block`.
764         while let Some(top) = self.stack.pop() {
765             if preorder.dominates(top.block, node.block) {
766                 // This is the right insertion spot for `node`.
767                 self.stack.push(top);
768                 self.stack.push(node);
769 
770                 // We know here that `top.block` dominates `node.block`, and thus `node.def`. This does
771                 // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def`
772                 // program point may be below the last branch in `top.block` that dominates
773                 // `node.def`.
774                 //
775                 // We do know, though, that if there is a nearest value dominating `node.def`, it
776                 // will be on the stack. We just need to find the last stack entry that actually
777                 // dominates.
778                 let mut last_dom = node.def;
779                 for &n in self.stack.iter().rev().skip(1) {
780                     // If the node is defined at the block header, it does in fact dominate
781                     // everything else pushed on the stack.
782                     let def_inst = match n.def {
783                         ExpandedProgramPoint::Block(_) => return Some(n),
784                         ExpandedProgramPoint::Inst(i) => i,
785                     };
786 
787                     // We need to find the last program point in `n.block` to dominate `node.def`.
788                     last_dom = match domtree.last_dominator(n.block, last_dom, &func.layout) {
789                         None => n.block.into(),
790                         Some(inst) => {
791                             if func.layout.cmp(def_inst, inst) != cmp::Ordering::Greater {
792                                 return Some(n);
793                             }
794                             inst.into()
795                         }
796                     };
797                 }
798 
799                 // No real dominator found on the stack.
800                 return None;
801             }
802         }
803 
804         // No dominators, start a new tree in the forest.
805         self.stack.push(node);
806         None
807     }
808 
pop_last(&mut self)809     pub fn pop_last(&mut self) {
810         self.stack.pop().expect("Stack is empty");
811     }
812 }
813 
814 /// Virtual copies.
815 ///
816 /// When building a full virtual register at once, like phase 1 does with union-find, it is good
817 /// enough to check for interference between the values in the full virtual register like
818 /// `check_vreg()` does. However, in phase 2 we are doing pairwise merges of partial virtual
819 /// registers that don't represent the full transitive closure of the block argument-parameter
820 /// relation. This means that just checking for interference between values is inadequate.
821 ///
822 /// Example:
823 ///
824 ///   v1 = iconst.i32 1
825 ///   brnz v10, block1(v1)
826 ///   v2 = iconst.i32 2
827 ///   brnz v11, block1(v2)
828 ///   return v1
829 ///
830 /// block1(v3: i32):
831 ///   v4 = iadd v3, v1
832 ///
833 /// With just value interference checking, we could build the virtual register [v3, v1] since those
834 /// two values don't interfere. We can't merge v2 into this virtual register because v1 and v2
835 /// interfere. However, we can't resolve that interference either by inserting a copy:
836 ///
837 ///   v1 = iconst.i32 1
838 ///   brnz v10, block1(v1)
839 ///   v2 = iconst.i32 2
840 ///   v20 = copy v2          <-- new value
841 ///   brnz v11, block1(v20)
842 ///   return v1
843 ///
844 /// block1(v3: i32):
845 ///   v4 = iadd v3, v1
846 ///
847 /// The new value v20 still interferes with v1 because v1 is live across the "brnz v11" branch. We
848 /// shouldn't have placed v1 and v3 in the same virtual register to begin with.
849 ///
850 /// LLVM detects this form of interference by inserting copies in the predecessors of all phi
851 /// instructions, then attempting to delete the copies. This is quite expensive because it involves
852 /// creating a large number of copies and value.
853 ///
854 /// We'll detect this form of interference with *virtual copies*: Each block parameter value that
855 /// hasn't yet been fully merged with its block argument values is given a set of virtual copies at
856 /// the predecessors. Any candidate value to be merged is checked for interference against both the
857 /// virtual register and the virtual copies.
858 ///
859 /// In the general case, we're checking if two virtual registers can be merged, and both can
860 /// contain incomplete block parameter values with associated virtual copies.
861 ///
862 /// The `VirtualCopies` struct represents a set of incomplete parameters and their associated
863 /// virtual copies. Given two virtual registers, it can produce an ordered sequence of nodes
864 /// representing the virtual copies in both vregs.
865 struct VirtualCopies {
866     // Incomplete block parameters. These don't need to belong to the same virtual register.
867     params: Vec<Value>,
868 
869     // Set of `(branch, destination)` pairs. These are all the predecessor branches for the blocks
870     // whose parameters can be found in `params`.
871     //
872     // Ordered by dominator tree pre-order of the branch instructions.
873     branches: Vec<(Inst, Block)>,
874 
875     // Filter for the currently active node iterator.
876     //
877     // A block => (set_id, num) entry means that branches to `block` are active in `set_id` with
878     // branch argument number `num`.
879     filter: FxHashMap<Block, (u8, usize)>,
880 }
881 
882 impl VirtualCopies {
883     /// Create an empty VirtualCopies struct.
new() -> Self884     pub fn new() -> Self {
885         Self {
886             params: Vec::new(),
887             branches: Vec::new(),
888             filter: FxHashMap(),
889         }
890     }
891 
892     /// Clear all state.
clear(&mut self)893     pub fn clear(&mut self) {
894         self.params.clear();
895         self.branches.clear();
896         self.filter.clear();
897     }
898 
899     /// Initialize virtual copies from the (interfering) values in a union-find virtual register
900     /// that is going to be broken up and reassembled iteratively.
901     ///
902     /// The values are assumed to be in domtree pre-order.
903     ///
904     /// This will extract the block parameter values and associate virtual copies all of them.
initialize( &mut self, values: &[Value], func: &Function, cfg: &ControlFlowGraph, preorder: &DominatorTreePreorder, )905     pub fn initialize(
906         &mut self,
907         values: &[Value],
908         func: &Function,
909         cfg: &ControlFlowGraph,
910         preorder: &DominatorTreePreorder,
911     ) {
912         self.clear();
913 
914         let mut last_block = None;
915         for &val in values {
916             if let ir::ValueDef::Param(block, _) = func.dfg.value_def(val) {
917                 self.params.push(val);
918 
919                 // We may have multiple parameters from the same block, but we only need to collect
920                 // predecessors once. Also verify the ordering of values.
921                 if let Some(last) = last_block {
922                     match preorder.pre_cmp_block(last, block) {
923                         cmp::Ordering::Less => {}
924                         cmp::Ordering::Equal => continue,
925                         cmp::Ordering::Greater => panic!("values in wrong order"),
926                     }
927                 }
928 
929                 // This block hasn't been seen before.
930                 for BlockPredecessor {
931                     inst: pred_inst, ..
932                 } in cfg.pred_iter(block)
933                 {
934                     self.branches.push((pred_inst, block));
935                 }
936                 last_block = Some(block);
937             }
938         }
939 
940         // Reorder the predecessor branches as required by the dominator forest.
941         self.branches
942             .sort_unstable_by(|&(a, _), &(b, _)| preorder.pre_cmp(a, b, &func.layout));
943     }
944 
945     /// Get the next unmerged parameter value.
next_param(&self) -> Option<Value>946     pub fn next_param(&self) -> Option<Value> {
947         self.params.last().cloned()
948     }
949 
950     /// Indicate that `param` is now fully merged.
merged_param(&mut self, param: Value, func: &Function)951     pub fn merged_param(&mut self, param: Value, func: &Function) {
952         let popped = self.params.pop();
953         debug_assert_eq!(popped, Some(param));
954 
955         // The domtree pre-order in `self.params` guarantees that all parameters defined at the
956         // same block will be adjacent. This means we can see when all parameters at a block have been
957         // merged.
958         //
959         // We don't care about the last parameter - when that is merged we are done.
960         let last = match self.params.last() {
961             None => return,
962             Some(x) => *x,
963         };
964         let block = func.dfg.value_def(param).unwrap_block();
965         if func.dfg.value_def(last).unwrap_block() == block {
966             // We're not done with `block` parameters yet.
967             return;
968         }
969 
970         // Alright, we know there are no remaining `block` parameters in `self.params`. This means we
971         // can get rid of the `block` predecessors in `self.branches`. We don't have to, the
972         // `VCopyIter` will just skip them, but this reduces its workload.
973         self.branches.retain(|&(_, dest)| dest != block);
974     }
975 
976     /// Set a filter for the virtual copy nodes we're generating.
977     ///
978     /// Only generate nodes for parameter values that are in the same congruence class as `reprs`.
979     /// Assign a set_id to each node corresponding to the index into `reprs` of the parameter's
980     /// congruence class.
set_filter( &mut self, reprs: [Value; 2], func: &Function, virtregs: &VirtRegs, preorder: &DominatorTreePreorder, )981     pub fn set_filter(
982         &mut self,
983         reprs: [Value; 2],
984         func: &Function,
985         virtregs: &VirtRegs,
986         preorder: &DominatorTreePreorder,
987     ) {
988         self.filter.clear();
989 
990         // Parameters in `self.params` are ordered according to the domtree per-order, and they are
991         // removed from the back once they are fully merged. This means we can stop looking for
992         // parameters once we're beyond the last one.
993         let last_param = *self.params.last().expect("No more parameters");
994         let limit = func.dfg.value_def(last_param).unwrap_block();
995 
996         for (set_id, repr) in reprs.iter().enumerate() {
997             let set_id = set_id as u8;
998             for &value in virtregs.congruence_class(repr) {
999                 if let ir::ValueDef::Param(block, num) = func.dfg.value_def(value) {
1000                     if preorder.pre_cmp_block(block, limit) == cmp::Ordering::Greater {
1001                         // Stop once we're outside the bounds of `self.params`.
1002                         break;
1003                     }
1004                     self.filter.insert(block, (set_id, num));
1005                 }
1006             }
1007         }
1008     }
1009 
1010     /// Look up the set_id and argument number for `block` in the current filter.
1011     ///
1012     /// Returns `None` if none of the currently active parameters are defined at `block`. Otherwise
1013     /// returns `(set_id, argnum)` for an active parameter defined at `block`.
lookup(&self, block: Block) -> Option<(u8, usize)>1014     fn lookup(&self, block: Block) -> Option<(u8, usize)> {
1015         self.filter.get(&block).cloned()
1016     }
1017 
1018     /// Get an iterator of dom-forest nodes corresponding to the current filter.
iter<'a>(&'a self, func: &'a Function) -> VCopyIter1019     pub fn iter<'a>(&'a self, func: &'a Function) -> VCopyIter {
1020         VCopyIter {
1021             func,
1022             vcopies: self,
1023             branches: self.branches.iter(),
1024         }
1025     }
1026 }
1027 
1028 /// Virtual copy iterator.
1029 ///
1030 /// This iterator produces dom-forest nodes corresponding to the current filter in the virtual
1031 /// copies container.
1032 struct VCopyIter<'a> {
1033     func: &'a Function,
1034     vcopies: &'a VirtualCopies,
1035     branches: slice::Iter<'a, (Inst, Block)>,
1036 }
1037 
1038 impl<'a> Iterator for VCopyIter<'a> {
1039     type Item = Node;
1040 
next(&mut self) -> Option<Node>1041     fn next(&mut self) -> Option<Node> {
1042         while let Some(&(branch, dest)) = self.branches.next() {
1043             if let Some((set_id, argnum)) = self.vcopies.lookup(dest) {
1044                 let arg = self.func.dfg.inst_variable_args(branch)[argnum];
1045                 return Some(Node::vcopy(branch, arg, set_id, self.func));
1046             }
1047         }
1048         None
1049     }
1050 }
1051 
1052 /// Node-merging iterator.
1053 ///
1054 /// Given two ordered sequences of nodes, yield an ordered sequence containing all of them.
1055 struct MergeNodes<'a, IA, IB>
1056 where
1057     IA: Iterator<Item = Node>,
1058     IB: Iterator<Item = Node>,
1059 {
1060     a: iter::Peekable<IA>,
1061     b: iter::Peekable<IB>,
1062     layout: &'a ir::Layout,
1063     preorder: &'a DominatorTreePreorder,
1064 }
1065 
1066 impl<'a, IA, IB> MergeNodes<'a, IA, IB>
1067 where
1068     IA: Iterator<Item = Node>,
1069     IB: Iterator<Item = Node>,
1070 {
new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self1071     pub fn new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self {
1072         MergeNodes {
1073             a: a.peekable(),
1074             b: b.peekable(),
1075             layout: &func.layout,
1076             preorder,
1077         }
1078     }
1079 }
1080 
1081 impl<'a, IA, IB> Iterator for MergeNodes<'a, IA, IB>
1082 where
1083     IA: Iterator<Item = Node>,
1084     IB: Iterator<Item = Node>,
1085 {
1086     type Item = Node;
1087 
next(&mut self) -> Option<Node>1088     fn next(&mut self) -> Option<Node> {
1089         let ord = match (self.a.peek(), self.b.peek()) {
1090             (Some(a), Some(b)) => {
1091                 let layout = self.layout;
1092                 self.preorder
1093                     .pre_cmp_block(a.block, b.block)
1094                     .then_with(|| layout.cmp(a.def, b.def))
1095             }
1096             (Some(_), None) => cmp::Ordering::Less,
1097             (None, Some(_)) => cmp::Ordering::Greater,
1098             (None, None) => return None,
1099         };
1100         // When the nodes compare equal, prefer the `a` side.
1101         if ord != cmp::Ordering::Greater {
1102             self.a.next()
1103         } else {
1104             self.b.next()
1105         }
1106     }
1107 }
1108