1 //! Constructing Conventional SSA form. 2 //! 3 //! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related 4 //! values do not interfere. We construct CSSA by building virtual registers that are as large as 5 //! possible and inserting copies where necessary such that all argument values passed to a block 6 //! parameter will belong to the same virtual register as the block parameter value itself. 7 8 use crate::cursor::{Cursor, EncCursor}; 9 use crate::dbg::DisplayList; 10 use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; 11 use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; 12 use crate::fx::FxHashMap; 13 use crate::ir::{self, InstBuilder, ProgramOrder}; 14 use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Value}; 15 use crate::isa::{EncInfo, TargetIsa}; 16 use crate::regalloc::affinity::Affinity; 17 use crate::regalloc::liveness::Liveness; 18 use crate::regalloc::virtregs::{VirtReg, VirtRegs}; 19 use crate::timing; 20 use alloc::vec::Vec; 21 use core::cmp; 22 use core::fmt; 23 use core::iter; 24 use core::slice; 25 use log::debug; 26 27 // # Implementation 28 // 29 // The coalescing algorithm implemented follows this paper fairly closely: 30 // 31 // Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and 32 // live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534 33 // 34 // We use a more efficient dominator forest representation (a linear stack) described here: 35 // 36 // Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for 37 // correctness, code quality and efficiency. 38 // 39 // The algorithm has two main phases: 40 // 41 // Phase 1: Union-find. 42 // 43 // We use the union-find support in `VirtRegs` to build virtual registers such that block parameter 44 // values always belong to the same virtual register as their corresponding block arguments at the 45 // predecessor branches. Trivial interferences between parameter and argument value live ranges are 46 // detected and resolved before unioning congruence classes, but non-trivial interferences between 47 // values that end up in the same congruence class are possible. 48 // 49 // Phase 2: Dominator forests. 50 // 51 // The virtual registers formed in phase 1 can contain interferences that we need to detect and 52 // eliminate. By ordering the values in a virtual register according to a dominator tree pre-order, 53 // we can identify all interferences in the virtual register in linear time. 54 // 55 // Interfering values are isolated and virtual registers rebuilt. 56 57 /// Data structures to be used by the coalescing pass. 58 pub struct Coalescing { 59 preorder: DominatorTreePreorder, 60 forest: DomForest, 61 vcopies: VirtualCopies, 62 values: Vec<Value>, 63 predecessors: Vec<Inst>, 64 backedges: Vec<Inst>, 65 } 66 67 /// One-shot context created once per invocation. 68 struct Context<'a> { 69 isa: &'a dyn TargetIsa, 70 encinfo: EncInfo, 71 72 func: &'a mut Function, 73 cfg: &'a ControlFlowGraph, 74 domtree: &'a DominatorTree, 75 preorder: &'a DominatorTreePreorder, 76 liveness: &'a mut Liveness, 77 virtregs: &'a mut VirtRegs, 78 79 forest: &'a mut DomForest, 80 vcopies: &'a mut VirtualCopies, 81 values: &'a mut Vec<Value>, 82 predecessors: &'a mut Vec<Inst>, 83 backedges: &'a mut Vec<Inst>, 84 } 85 86 impl Coalescing { 87 /// Create a new coalescing pass. new() -> Self88 pub fn new() -> Self { 89 Self { 90 forest: DomForest::new(), 91 preorder: DominatorTreePreorder::new(), 92 vcopies: VirtualCopies::new(), 93 values: Vec::new(), 94 predecessors: Vec::new(), 95 backedges: Vec::new(), 96 } 97 } 98 99 /// Clear all data structures in this coalescing pass. clear(&mut self)100 pub fn clear(&mut self) { 101 self.forest.clear(); 102 self.vcopies.clear(); 103 self.values.clear(); 104 self.predecessors.clear(); 105 self.backedges.clear(); 106 } 107 108 /// Convert `func` to Conventional SSA form and build virtual registers in the process. conventional_ssa( &mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph, domtree: &DominatorTree, liveness: &mut Liveness, virtregs: &mut VirtRegs, )109 pub fn conventional_ssa( 110 &mut self, 111 isa: &dyn TargetIsa, 112 func: &mut Function, 113 cfg: &ControlFlowGraph, 114 domtree: &DominatorTree, 115 liveness: &mut Liveness, 116 virtregs: &mut VirtRegs, 117 ) { 118 let _tt = timing::ra_cssa(); 119 debug!("Coalescing for:\n{}", func.display(isa)); 120 self.preorder.compute(domtree, &func.layout); 121 let mut context = Context { 122 isa, 123 encinfo: isa.encoding_info(), 124 func, 125 cfg, 126 domtree, 127 preorder: &self.preorder, 128 liveness, 129 virtregs, 130 forest: &mut self.forest, 131 vcopies: &mut self.vcopies, 132 values: &mut self.values, 133 predecessors: &mut self.predecessors, 134 backedges: &mut self.backedges, 135 }; 136 137 // Run phase 1 (union-find) of the coalescing algorithm on the current function. 138 for &block in domtree.cfg_postorder() { 139 context.union_find_block(block); 140 } 141 context.finish_union_find(); 142 143 // Run phase 2 (dominator forests) on the current function. 144 context.process_vregs(); 145 } 146 } 147 148 /// Phase 1: Union-find. 149 /// 150 /// The two entry points for phase 1 are `union_find_block()` and `finish_union_find`. 151 impl<'a> Context<'a> { 152 /// Run the union-find algorithm on the parameter values on `block`. 153 /// 154 /// This ensure that all block parameters will belong to the same virtual register as their 155 /// corresponding arguments at all predecessor branches. union_find_block(&mut self, block: Block)156 pub fn union_find_block(&mut self, block: Block) { 157 let num_params = self.func.dfg.num_block_params(block); 158 if num_params == 0 { 159 return; 160 } 161 162 self.isolate_conflicting_params(block, num_params); 163 164 for i in 0..num_params { 165 self.union_pred_args(block, i); 166 } 167 } 168 169 // Identify block parameter values that are live at one of the predecessor branches. 170 // 171 // Such a parameter value will conflict with any argument value at the predecessor branch, so 172 // it must be isolated by inserting a copy. isolate_conflicting_params(&mut self, block: Block, num_params: usize)173 fn isolate_conflicting_params(&mut self, block: Block, num_params: usize) { 174 debug_assert_eq!(num_params, self.func.dfg.num_block_params(block)); 175 // The only way a parameter value can interfere with a predecessor branch is if the block is 176 // dominating the predecessor branch. That is, we are looking for loop back-edges. 177 for BlockPredecessor { 178 block: pred_block, 179 inst: pred_inst, 180 } in self.cfg.pred_iter(block) 181 { 182 // The quick pre-order dominance check is accurate because the block parameter is defined 183 // at the top of the block before any branches. 184 if !self.preorder.dominates(block, pred_block) { 185 continue; 186 } 187 188 debug!( 189 " - checking {} params at back-edge {}: {}", 190 num_params, 191 pred_block, 192 self.func.dfg.display_inst(pred_inst, self.isa) 193 ); 194 195 // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values 196 // to be live at the use. 197 for i in 0..num_params { 198 let param = self.func.dfg.block_params(block)[i]; 199 if self.liveness[param].reaches_use(pred_inst, pred_block, &self.func.layout) { 200 self.isolate_param(block, param); 201 } 202 } 203 } 204 } 205 206 // Union block parameter value `num` with the corresponding block arguments on the predecessor 207 // branches. 208 // 209 // Detect cases where the argument value is live-in to `block` so it conflicts with any block 210 // parameter. Isolate the argument in those cases before unioning it with the parameter value. union_pred_args(&mut self, block: Block, argnum: usize)211 fn union_pred_args(&mut self, block: Block, argnum: usize) { 212 let param = self.func.dfg.block_params(block)[argnum]; 213 214 for BlockPredecessor { 215 block: pred_block, 216 inst: pred_inst, 217 } in self.cfg.pred_iter(block) 218 { 219 let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; 220 221 // Never coalesce incoming function parameters on the stack. These parameters are 222 // pre-spilled, and the rest of the virtual register would be forced to spill to the 223 // `incoming_arg` stack slot too. 224 if let ir::ValueDef::Param(def_block, def_num) = self.func.dfg.value_def(arg) { 225 if Some(def_block) == self.func.layout.entry_block() 226 && self.func.signature.params[def_num].location.is_stack() 227 { 228 debug!("-> isolating function stack parameter {}", arg); 229 let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); 230 self.virtregs.union(param, new_arg); 231 continue; 232 } 233 } 234 235 // Check for basic interference: If `arg` overlaps a value defined at the entry to 236 // `block`, it can never be used as a block argument. 237 let interference = { 238 let lr = &self.liveness[arg]; 239 240 // There are two ways the argument value can interfere with `block`: 241 // 242 // 1. It is defined in a dominating block and live-in to `block`. 243 // 2. If is itself a parameter value for `block`. This case should already have been 244 // eliminated by `isolate_conflicting_params()`. 245 debug_assert!( 246 lr.def() != block.into(), 247 "{} parameter {} was missed by isolate_conflicting_params()", 248 block, 249 arg 250 ); 251 252 // The only other possibility is that `arg` is live-in to `block`. 253 lr.is_livein(block, &self.func.layout) 254 }; 255 256 if interference { 257 let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); 258 self.virtregs.union(param, new_arg); 259 } else { 260 self.virtregs.union(param, arg); 261 } 262 } 263 } 264 265 // Isolate block parameter value `param` on `block`. 266 // 267 // When `param=v10`: 268 // 269 // block1(v10: i32): 270 // foo 271 // 272 // becomes: 273 // 274 // block1(v11: i32): 275 // v10 = copy v11 276 // foo 277 // 278 // This function inserts the copy and updates the live ranges of the old and new parameter 279 // values. Returns the new parameter value. isolate_param(&mut self, block: Block, param: Value) -> Value280 fn isolate_param(&mut self, block: Block, param: Value) -> Value { 281 debug_assert_eq!( 282 self.func.dfg.value_def(param).pp(), 283 ExpandedProgramPoint::Block(block) 284 ); 285 let ty = self.func.dfg.value_type(param); 286 let new_val = self.func.dfg.replace_block_param(param, ty); 287 288 // Insert a copy instruction at the top of `block`. 289 let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(block); 290 if let Some(inst) = pos.current_inst() { 291 pos.use_srcloc(inst); 292 } 293 pos.ins().with_result(param).copy(new_val); 294 let inst = pos.built_inst(); 295 self.liveness.move_def_locally(param, inst); 296 297 debug!( 298 "-> inserted {}, following {}({}: {})", 299 pos.display_inst(inst), 300 block, 301 new_val, 302 ty 303 ); 304 305 // Create a live range for the new value. 306 // TODO: Should we handle ghost values? 307 let affinity = Affinity::new( 308 &self 309 .encinfo 310 .operand_constraints(pos.func.encodings[inst]) 311 .expect("Bad copy encoding") 312 .outs[0], 313 ); 314 self.liveness.create_dead(new_val, block, affinity); 315 self.liveness 316 .extend_locally(new_val, block, inst, &pos.func.layout); 317 318 new_val 319 } 320 321 // Isolate the block argument `pred_val` from the predecessor `(pred_block, pred_inst)`. 322 // 323 // It is assumed that `pred_inst` is a branch instruction in `pred_block` whose `argnum`'th block 324 // argument is `pred_val`. Since the argument value interferes with the corresponding block 325 // parameter at the destination, a copy is used instead: 326 // 327 // brnz v1, block2(v10) 328 // 329 // Becomes: 330 // 331 // v11 = copy v10 332 // brnz v1, block2(v11) 333 // 334 // This way the interference with the block parameter is avoided. 335 // 336 // A live range for the new value is created while the live range for `pred_val` is left 337 // unaltered. 338 // 339 // The new argument value is returned. isolate_arg( &mut self, pred_block: Block, pred_inst: Inst, argnum: usize, pred_val: Value, ) -> Value340 fn isolate_arg( 341 &mut self, 342 pred_block: Block, 343 pred_inst: Inst, 344 argnum: usize, 345 pred_val: Value, 346 ) -> Value { 347 let mut pos = EncCursor::new(self.func, self.isa).at_inst(pred_inst); 348 pos.use_srcloc(pred_inst); 349 let copy = pos.ins().copy(pred_val); 350 let inst = pos.built_inst(); 351 352 // Create a live range for the new value. 353 // TODO: Handle affinity for ghost values. 354 let affinity = Affinity::new( 355 &self 356 .encinfo 357 .operand_constraints(pos.func.encodings[inst]) 358 .expect("Bad copy encoding") 359 .outs[0], 360 ); 361 self.liveness.create_dead(copy, inst, affinity); 362 self.liveness 363 .extend_locally(copy, pred_block, pred_inst, &pos.func.layout); 364 365 pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy; 366 367 debug!( 368 "-> inserted {}, before {}: {}", 369 pos.display_inst(inst), 370 pred_block, 371 pos.display_inst(pred_inst) 372 ); 373 374 copy 375 } 376 377 /// Finish the union-find part of the coalescing algorithm. 378 /// 379 /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric 380 /// closure of the relation formed by block parameter-argument pairs found by `union_find_block()`. finish_union_find(&mut self)381 fn finish_union_find(&mut self) { 382 self.virtregs.finish_union_find(None); 383 debug!("After union-find phase:{}", self.virtregs); 384 } 385 } 386 387 /// Phase 2: Dominator forests. 388 /// 389 /// The main entry point is `process_vregs()`. 390 impl<'a> Context<'a> { 391 /// Check al virtual registers for interference and fix conflicts. process_vregs(&mut self)392 pub fn process_vregs(&mut self) { 393 for vreg in self.virtregs.all_virtregs() { 394 self.process_vreg(vreg); 395 } 396 } 397 398 // Check `vreg` for interferences and fix conflicts. process_vreg(&mut self, vreg: VirtReg)399 fn process_vreg(&mut self, vreg: VirtReg) { 400 if !self.check_vreg(vreg) { 401 self.synthesize_vreg(vreg); 402 } 403 } 404 405 // Check `vreg` for interferences. 406 // 407 // We use a Budimlic dominator forest to check for interferences between the values in `vreg` 408 // and identify values that should be isolated. 409 // 410 // Returns true if `vreg` is free of interference. check_vreg(&mut self, vreg: VirtReg) -> bool411 fn check_vreg(&mut self, vreg: VirtReg) -> bool { 412 // Order the values according to the dominator pre-order of their definition. 413 let values = self.virtregs.sort_values(vreg, self.func, self.preorder); 414 debug!("Checking {} = {}", vreg, DisplayList(values)); 415 416 // Now push the values in order to the dominator forest. 417 // This gives us the closest dominating value def for each of the values. 418 self.forest.clear(); 419 for &value in values { 420 let node = Node::value(value, 0, self.func); 421 422 // Push this value and get the nearest dominating def back. 423 let parent = match self 424 .forest 425 .push_node(node, self.func, self.domtree, self.preorder) 426 { 427 None => continue, 428 Some(n) => n, 429 }; 430 431 // Check for interference between `parent` and `value`. Since `parent` dominates 432 // `value`, we only have to check if it overlaps the definition. 433 if self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) { 434 // The two values are interfering, so they can't be in the same virtual register. 435 debug!("-> interference: {} overlaps def of {}", parent, value); 436 return false; 437 } 438 } 439 440 // No interference found. 441 true 442 } 443 444 /// Destroy and rebuild `vreg` by iterative coalescing. 445 /// 446 /// When detecting that a virtual register formed in phase 1 contains interference, we have to 447 /// start over in a more careful way. We'll split the vreg into individual values and then 448 /// reassemble virtual registers using an iterative algorithm of pairwise merging. 449 /// 450 /// It is possible to recover multiple large virtual registers this way while still avoiding 451 /// a lot of copies. synthesize_vreg(&mut self, vreg: VirtReg)452 fn synthesize_vreg(&mut self, vreg: VirtReg) { 453 self.vcopies.initialize( 454 self.virtregs.values(vreg), 455 self.func, 456 self.cfg, 457 self.preorder, 458 ); 459 debug!( 460 "Synthesizing {} from {} branches and params {}", 461 vreg, 462 self.vcopies.branches.len(), 463 DisplayList(&self.vcopies.params) 464 ); 465 self.virtregs.remove(vreg); 466 467 while let Some(param) = self.vcopies.next_param() { 468 self.merge_param(param); 469 self.vcopies.merged_param(param, self.func); 470 } 471 } 472 473 /// Merge block parameter value `param` with virtual registers at its predecessors. merge_param(&mut self, param: Value)474 fn merge_param(&mut self, param: Value) { 475 let (block, argnum) = match self.func.dfg.value_def(param) { 476 ir::ValueDef::Param(e, n) => (e, n), 477 ir::ValueDef::Result(_, _) => panic!("Expected parameter"), 478 }; 479 480 // Collect all the predecessors and rearrange them. 481 // 482 // The order we process the predecessors matters because once one predecessor's virtual 483 // register is merged, it can cause interference with following merges. This means that the 484 // first predecessors processed are more likely to be copy-free. We want an ordering that 485 // is a) good for performance and b) as stable as possible. The pred_iter() iterator uses 486 // instruction numbers which is not great for reproducible test cases. 487 // 488 // First merge loop back-edges in layout order, on the theory that shorter back-edges are 489 // more sensitive to inserted copies. 490 // 491 // Second everything else in reverse layout order. Again, short forward branches get merged 492 // first. There can also be backwards branches mixed in here, though, as long as they are 493 // not loop backedges. 494 debug_assert!(self.predecessors.is_empty()); 495 debug_assert!(self.backedges.is_empty()); 496 for BlockPredecessor { 497 block: pred_block, 498 inst: pred_inst, 499 } in self.cfg.pred_iter(block) 500 { 501 if self.preorder.dominates(block, pred_block) { 502 self.backedges.push(pred_inst); 503 } else { 504 self.predecessors.push(pred_inst); 505 } 506 } 507 // Order instructions in reverse order so we can pop them off the back. 508 { 509 let l = &self.func.layout; 510 self.backedges.sort_unstable_by(|&a, &b| l.cmp(b, a)); 511 self.predecessors.sort_unstable_by(|&a, &b| l.cmp(a, b)); 512 self.predecessors.extend_from_slice(&self.backedges); 513 self.backedges.clear(); 514 } 515 516 while let Some(pred_inst) = self.predecessors.pop() { 517 let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; 518 519 // We want to merge the vreg containing `param` with the vreg containing `arg`. 520 if self.try_merge_vregs(param, arg) { 521 continue; 522 } 523 524 // Can't merge because of interference. Insert a copy instead. 525 let pred_block = self.func.layout.pp_block(pred_inst); 526 let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); 527 self.virtregs 528 .insert_single(param, new_arg, self.func, self.preorder); 529 } 530 } 531 532 /// Merge the virtual registers containing `param` and `arg` if possible. 533 /// 534 /// Use self.vcopies to check for virtual copy interference too. 535 /// 536 /// Returns true if the virtual registers are successfully merged. try_merge_vregs(&mut self, param: Value, arg: Value) -> bool537 fn try_merge_vregs(&mut self, param: Value, arg: Value) -> bool { 538 if self.virtregs.same_class(param, arg) { 539 return true; 540 } 541 542 if !self.can_merge_vregs(param, arg) { 543 return false; 544 } 545 546 let _vreg = self.virtregs.unify(self.values); 547 debug!("-> merged into {} = {}", _vreg, DisplayList(self.values)); 548 true 549 } 550 551 /// Check if it is possible to merge two virtual registers. 552 /// 553 /// Also leave `self.values` with the ordered list of values in the merged vreg. can_merge_vregs(&mut self, param: Value, arg: Value) -> bool554 fn can_merge_vregs(&mut self, param: Value, arg: Value) -> bool { 555 // We only need an immutable function reference. 556 let func = &*self.func; 557 let domtree = self.domtree; 558 let preorder = self.preorder; 559 560 // Restrict the virtual copy nodes we look at and key the `set_id` and `value` properties 561 // of the nodes. Set_id 0 will be `param` and set_id 1 will be `arg`. 562 self.vcopies 563 .set_filter([param, arg], func, self.virtregs, preorder); 564 565 // Now create an ordered sequence of dom-forest nodes from three sources: The two virtual 566 // registers and the filtered virtual copies. 567 let v0 = self.virtregs.congruence_class(¶m); 568 let v1 = self.virtregs.congruence_class(&arg); 569 debug!( 570 " - set 0: {}\n - set 1: {}", 571 DisplayList(v0), 572 DisplayList(v1) 573 ); 574 let nodes = MergeNodes::new( 575 func, 576 preorder, 577 MergeNodes::new( 578 func, 579 preorder, 580 v0.iter().map(|&value| Node::value(value, 0, func)), 581 v1.iter().map(|&value| Node::value(value, 1, func)), 582 ), 583 self.vcopies.iter(func), 584 ); 585 586 // Now push the values in order to the dominator forest. 587 // This gives us the closest dominating value def for each of the values. 588 self.forest.clear(); 589 self.values.clear(); 590 for node in nodes { 591 // Accumulate ordered values for the new vreg. 592 if node.is_value() { 593 self.values.push(node.value); 594 } 595 596 // Push this value and get the nearest dominating def back. 597 let parent = match self.forest.push_node(node, func, domtree, preorder) { 598 None => { 599 if node.is_vcopy { 600 self.forest.pop_last(); 601 } 602 continue; 603 } 604 Some(n) => n, 605 }; 606 607 if node.is_vcopy { 608 // Vcopy nodes don't represent interference if they are copies of the parent value. 609 // In that case, the node must be removed because the parent value can still be 610 // live belong the vcopy. 611 if parent.is_vcopy || node.value == parent.value { 612 self.forest.pop_last(); 613 continue; 614 } 615 616 // Check if the parent value interferes with the virtual copy. 617 let inst = node.def.unwrap_inst(); 618 if node.set_id != parent.set_id 619 && self.liveness[parent.value].reaches_use(inst, node.block, &self.func.layout) 620 { 621 debug!( 622 " - interference: {} overlaps vcopy at {}:{}", 623 parent, 624 node.block, 625 self.func.dfg.display_inst(inst, self.isa) 626 ); 627 return false; 628 } 629 630 // Keep this vcopy on the stack. It will save us a few interference checks. 631 continue; 632 } 633 634 // Parent vcopies never represent any interference. We only keep them on the stack to 635 // avoid an interference check against a value higher up. 636 if parent.is_vcopy { 637 continue; 638 } 639 640 // Both node and parent are values, so check for interference. 641 debug_assert!(node.is_value() && parent.is_value()); 642 if node.set_id != parent.set_id 643 && self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) 644 { 645 // The two values are interfering. 646 debug!(" - interference: {} overlaps def of {}", parent, node.value); 647 return false; 648 } 649 } 650 651 // The values vector should receive all values. 652 debug_assert_eq!(v0.len() + v1.len(), self.values.len()); 653 654 // No interference found. 655 true 656 } 657 } 658 659 /// Dominator forest. 660 /// 661 /// This is a utility type used for detecting interference in virtual registers, where each virtual 662 /// register is a list of values ordered according to the dominator tree pre-order. 663 /// 664 /// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack 665 /// representation in the Boissinot paper. Our version of the linear stack is slightly modified 666 /// because we have a pre-order of the dominator tree at the block granularity, not basic block 667 /// granularity. 668 /// 669 /// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed, 670 /// `push_node` will return the nearest previously pushed value that dominates the definition. 671 #[allow(dead_code)] 672 struct DomForest { 673 // Stack representing the rightmost edge of the dominator forest so far, ending in the last 674 // element of `values`. 675 // 676 // At all times, the block of each element in the stack dominates the block of the next one. 677 stack: Vec<Node>, 678 } 679 680 /// A node in the dominator forest. 681 #[derive(Clone, Copy, Debug)] 682 #[allow(dead_code)] 683 struct Node { 684 /// The program point where the live range is defined. 685 def: ExpandedProgramPoint, 686 /// block containing `def`. 687 block: Block, 688 /// Is this a virtual copy or a value? 689 is_vcopy: bool, 690 /// Set identifier. 691 set_id: u8, 692 /// For a value node: The value defined at `def`. 693 /// For a vcopy node: The relevant branch argument at `def`. 694 value: Value, 695 } 696 697 impl Node { 698 /// Create a node representing `value`. value(value: Value, set_id: u8, func: &Function) -> Self699 pub fn value(value: Value, set_id: u8, func: &Function) -> Self { 700 let def = func.dfg.value_def(value).pp(); 701 let block = func.layout.pp_block(def); 702 Self { 703 def, 704 block, 705 is_vcopy: false, 706 set_id, 707 value, 708 } 709 } 710 711 /// Create a node representing a virtual copy. vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self712 pub fn vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self { 713 let def = branch.into(); 714 let block = func.layout.pp_block(def); 715 Self { 716 def, 717 block, 718 is_vcopy: true, 719 set_id, 720 value, 721 } 722 } 723 724 /// IF this a value node? is_value(&self) -> bool725 pub fn is_value(&self) -> bool { 726 !self.is_vcopy 727 } 728 } 729 730 impl fmt::Display for Node { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result731 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 732 if self.is_vcopy { 733 write!(f, "{}:vcopy({})@{}", self.set_id, self.value, self.block) 734 } else { 735 write!(f, "{}:{}@{}", self.set_id, self.value, self.block) 736 } 737 } 738 } 739 740 impl DomForest { 741 /// Create a new empty dominator forest. new() -> Self742 pub fn new() -> Self { 743 Self { stack: Vec::new() } 744 } 745 746 /// Clear all data structures in this dominator forest. clear(&mut self)747 pub fn clear(&mut self) { 748 self.stack.clear(); 749 } 750 751 /// Add a single node to the forest. 752 /// 753 /// Update the stack so its dominance invariants are preserved. Detect a parent node on the 754 /// stack which is the closest one dominating the new node and return it. push_node( &mut self, node: Node, func: &Function, domtree: &DominatorTree, preorder: &DominatorTreePreorder, ) -> Option<Node>755 fn push_node( 756 &mut self, 757 node: Node, 758 func: &Function, 759 domtree: &DominatorTree, 760 preorder: &DominatorTreePreorder, 761 ) -> Option<Node> { 762 // The stack contains the current sequence of dominating defs. Pop elements until we 763 // find one whose block dominates `node.block`. 764 while let Some(top) = self.stack.pop() { 765 if preorder.dominates(top.block, node.block) { 766 // This is the right insertion spot for `node`. 767 self.stack.push(top); 768 self.stack.push(node); 769 770 // We know here that `top.block` dominates `node.block`, and thus `node.def`. This does 771 // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def` 772 // program point may be below the last branch in `top.block` that dominates 773 // `node.def`. 774 // 775 // We do know, though, that if there is a nearest value dominating `node.def`, it 776 // will be on the stack. We just need to find the last stack entry that actually 777 // dominates. 778 let mut last_dom = node.def; 779 for &n in self.stack.iter().rev().skip(1) { 780 // If the node is defined at the block header, it does in fact dominate 781 // everything else pushed on the stack. 782 let def_inst = match n.def { 783 ExpandedProgramPoint::Block(_) => return Some(n), 784 ExpandedProgramPoint::Inst(i) => i, 785 }; 786 787 // We need to find the last program point in `n.block` to dominate `node.def`. 788 last_dom = match domtree.last_dominator(n.block, last_dom, &func.layout) { 789 None => n.block.into(), 790 Some(inst) => { 791 if func.layout.cmp(def_inst, inst) != cmp::Ordering::Greater { 792 return Some(n); 793 } 794 inst.into() 795 } 796 }; 797 } 798 799 // No real dominator found on the stack. 800 return None; 801 } 802 } 803 804 // No dominators, start a new tree in the forest. 805 self.stack.push(node); 806 None 807 } 808 pop_last(&mut self)809 pub fn pop_last(&mut self) { 810 self.stack.pop().expect("Stack is empty"); 811 } 812 } 813 814 /// Virtual copies. 815 /// 816 /// When building a full virtual register at once, like phase 1 does with union-find, it is good 817 /// enough to check for interference between the values in the full virtual register like 818 /// `check_vreg()` does. However, in phase 2 we are doing pairwise merges of partial virtual 819 /// registers that don't represent the full transitive closure of the block argument-parameter 820 /// relation. This means that just checking for interference between values is inadequate. 821 /// 822 /// Example: 823 /// 824 /// v1 = iconst.i32 1 825 /// brnz v10, block1(v1) 826 /// v2 = iconst.i32 2 827 /// brnz v11, block1(v2) 828 /// return v1 829 /// 830 /// block1(v3: i32): 831 /// v4 = iadd v3, v1 832 /// 833 /// With just value interference checking, we could build the virtual register [v3, v1] since those 834 /// two values don't interfere. We can't merge v2 into this virtual register because v1 and v2 835 /// interfere. However, we can't resolve that interference either by inserting a copy: 836 /// 837 /// v1 = iconst.i32 1 838 /// brnz v10, block1(v1) 839 /// v2 = iconst.i32 2 840 /// v20 = copy v2 <-- new value 841 /// brnz v11, block1(v20) 842 /// return v1 843 /// 844 /// block1(v3: i32): 845 /// v4 = iadd v3, v1 846 /// 847 /// The new value v20 still interferes with v1 because v1 is live across the "brnz v11" branch. We 848 /// shouldn't have placed v1 and v3 in the same virtual register to begin with. 849 /// 850 /// LLVM detects this form of interference by inserting copies in the predecessors of all phi 851 /// instructions, then attempting to delete the copies. This is quite expensive because it involves 852 /// creating a large number of copies and value. 853 /// 854 /// We'll detect this form of interference with *virtual copies*: Each block parameter value that 855 /// hasn't yet been fully merged with its block argument values is given a set of virtual copies at 856 /// the predecessors. Any candidate value to be merged is checked for interference against both the 857 /// virtual register and the virtual copies. 858 /// 859 /// In the general case, we're checking if two virtual registers can be merged, and both can 860 /// contain incomplete block parameter values with associated virtual copies. 861 /// 862 /// The `VirtualCopies` struct represents a set of incomplete parameters and their associated 863 /// virtual copies. Given two virtual registers, it can produce an ordered sequence of nodes 864 /// representing the virtual copies in both vregs. 865 struct VirtualCopies { 866 // Incomplete block parameters. These don't need to belong to the same virtual register. 867 params: Vec<Value>, 868 869 // Set of `(branch, destination)` pairs. These are all the predecessor branches for the blocks 870 // whose parameters can be found in `params`. 871 // 872 // Ordered by dominator tree pre-order of the branch instructions. 873 branches: Vec<(Inst, Block)>, 874 875 // Filter for the currently active node iterator. 876 // 877 // A block => (set_id, num) entry means that branches to `block` are active in `set_id` with 878 // branch argument number `num`. 879 filter: FxHashMap<Block, (u8, usize)>, 880 } 881 882 impl VirtualCopies { 883 /// Create an empty VirtualCopies struct. new() -> Self884 pub fn new() -> Self { 885 Self { 886 params: Vec::new(), 887 branches: Vec::new(), 888 filter: FxHashMap(), 889 } 890 } 891 892 /// Clear all state. clear(&mut self)893 pub fn clear(&mut self) { 894 self.params.clear(); 895 self.branches.clear(); 896 self.filter.clear(); 897 } 898 899 /// Initialize virtual copies from the (interfering) values in a union-find virtual register 900 /// that is going to be broken up and reassembled iteratively. 901 /// 902 /// The values are assumed to be in domtree pre-order. 903 /// 904 /// This will extract the block parameter values and associate virtual copies all of them. initialize( &mut self, values: &[Value], func: &Function, cfg: &ControlFlowGraph, preorder: &DominatorTreePreorder, )905 pub fn initialize( 906 &mut self, 907 values: &[Value], 908 func: &Function, 909 cfg: &ControlFlowGraph, 910 preorder: &DominatorTreePreorder, 911 ) { 912 self.clear(); 913 914 let mut last_block = None; 915 for &val in values { 916 if let ir::ValueDef::Param(block, _) = func.dfg.value_def(val) { 917 self.params.push(val); 918 919 // We may have multiple parameters from the same block, but we only need to collect 920 // predecessors once. Also verify the ordering of values. 921 if let Some(last) = last_block { 922 match preorder.pre_cmp_block(last, block) { 923 cmp::Ordering::Less => {} 924 cmp::Ordering::Equal => continue, 925 cmp::Ordering::Greater => panic!("values in wrong order"), 926 } 927 } 928 929 // This block hasn't been seen before. 930 for BlockPredecessor { 931 inst: pred_inst, .. 932 } in cfg.pred_iter(block) 933 { 934 self.branches.push((pred_inst, block)); 935 } 936 last_block = Some(block); 937 } 938 } 939 940 // Reorder the predecessor branches as required by the dominator forest. 941 self.branches 942 .sort_unstable_by(|&(a, _), &(b, _)| preorder.pre_cmp(a, b, &func.layout)); 943 } 944 945 /// Get the next unmerged parameter value. next_param(&self) -> Option<Value>946 pub fn next_param(&self) -> Option<Value> { 947 self.params.last().cloned() 948 } 949 950 /// Indicate that `param` is now fully merged. merged_param(&mut self, param: Value, func: &Function)951 pub fn merged_param(&mut self, param: Value, func: &Function) { 952 let popped = self.params.pop(); 953 debug_assert_eq!(popped, Some(param)); 954 955 // The domtree pre-order in `self.params` guarantees that all parameters defined at the 956 // same block will be adjacent. This means we can see when all parameters at a block have been 957 // merged. 958 // 959 // We don't care about the last parameter - when that is merged we are done. 960 let last = match self.params.last() { 961 None => return, 962 Some(x) => *x, 963 }; 964 let block = func.dfg.value_def(param).unwrap_block(); 965 if func.dfg.value_def(last).unwrap_block() == block { 966 // We're not done with `block` parameters yet. 967 return; 968 } 969 970 // Alright, we know there are no remaining `block` parameters in `self.params`. This means we 971 // can get rid of the `block` predecessors in `self.branches`. We don't have to, the 972 // `VCopyIter` will just skip them, but this reduces its workload. 973 self.branches.retain(|&(_, dest)| dest != block); 974 } 975 976 /// Set a filter for the virtual copy nodes we're generating. 977 /// 978 /// Only generate nodes for parameter values that are in the same congruence class as `reprs`. 979 /// Assign a set_id to each node corresponding to the index into `reprs` of the parameter's 980 /// congruence class. set_filter( &mut self, reprs: [Value; 2], func: &Function, virtregs: &VirtRegs, preorder: &DominatorTreePreorder, )981 pub fn set_filter( 982 &mut self, 983 reprs: [Value; 2], 984 func: &Function, 985 virtregs: &VirtRegs, 986 preorder: &DominatorTreePreorder, 987 ) { 988 self.filter.clear(); 989 990 // Parameters in `self.params` are ordered according to the domtree per-order, and they are 991 // removed from the back once they are fully merged. This means we can stop looking for 992 // parameters once we're beyond the last one. 993 let last_param = *self.params.last().expect("No more parameters"); 994 let limit = func.dfg.value_def(last_param).unwrap_block(); 995 996 for (set_id, repr) in reprs.iter().enumerate() { 997 let set_id = set_id as u8; 998 for &value in virtregs.congruence_class(repr) { 999 if let ir::ValueDef::Param(block, num) = func.dfg.value_def(value) { 1000 if preorder.pre_cmp_block(block, limit) == cmp::Ordering::Greater { 1001 // Stop once we're outside the bounds of `self.params`. 1002 break; 1003 } 1004 self.filter.insert(block, (set_id, num)); 1005 } 1006 } 1007 } 1008 } 1009 1010 /// Look up the set_id and argument number for `block` in the current filter. 1011 /// 1012 /// Returns `None` if none of the currently active parameters are defined at `block`. Otherwise 1013 /// returns `(set_id, argnum)` for an active parameter defined at `block`. lookup(&self, block: Block) -> Option<(u8, usize)>1014 fn lookup(&self, block: Block) -> Option<(u8, usize)> { 1015 self.filter.get(&block).cloned() 1016 } 1017 1018 /// Get an iterator of dom-forest nodes corresponding to the current filter. iter<'a>(&'a self, func: &'a Function) -> VCopyIter1019 pub fn iter<'a>(&'a self, func: &'a Function) -> VCopyIter { 1020 VCopyIter { 1021 func, 1022 vcopies: self, 1023 branches: self.branches.iter(), 1024 } 1025 } 1026 } 1027 1028 /// Virtual copy iterator. 1029 /// 1030 /// This iterator produces dom-forest nodes corresponding to the current filter in the virtual 1031 /// copies container. 1032 struct VCopyIter<'a> { 1033 func: &'a Function, 1034 vcopies: &'a VirtualCopies, 1035 branches: slice::Iter<'a, (Inst, Block)>, 1036 } 1037 1038 impl<'a> Iterator for VCopyIter<'a> { 1039 type Item = Node; 1040 next(&mut self) -> Option<Node>1041 fn next(&mut self) -> Option<Node> { 1042 while let Some(&(branch, dest)) = self.branches.next() { 1043 if let Some((set_id, argnum)) = self.vcopies.lookup(dest) { 1044 let arg = self.func.dfg.inst_variable_args(branch)[argnum]; 1045 return Some(Node::vcopy(branch, arg, set_id, self.func)); 1046 } 1047 } 1048 None 1049 } 1050 } 1051 1052 /// Node-merging iterator. 1053 /// 1054 /// Given two ordered sequences of nodes, yield an ordered sequence containing all of them. 1055 struct MergeNodes<'a, IA, IB> 1056 where 1057 IA: Iterator<Item = Node>, 1058 IB: Iterator<Item = Node>, 1059 { 1060 a: iter::Peekable<IA>, 1061 b: iter::Peekable<IB>, 1062 layout: &'a ir::Layout, 1063 preorder: &'a DominatorTreePreorder, 1064 } 1065 1066 impl<'a, IA, IB> MergeNodes<'a, IA, IB> 1067 where 1068 IA: Iterator<Item = Node>, 1069 IB: Iterator<Item = Node>, 1070 { new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self1071 pub fn new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self { 1072 MergeNodes { 1073 a: a.peekable(), 1074 b: b.peekable(), 1075 layout: &func.layout, 1076 preorder, 1077 } 1078 } 1079 } 1080 1081 impl<'a, IA, IB> Iterator for MergeNodes<'a, IA, IB> 1082 where 1083 IA: Iterator<Item = Node>, 1084 IB: Iterator<Item = Node>, 1085 { 1086 type Item = Node; 1087 next(&mut self) -> Option<Node>1088 fn next(&mut self) -> Option<Node> { 1089 let ord = match (self.a.peek(), self.b.peek()) { 1090 (Some(a), Some(b)) => { 1091 let layout = self.layout; 1092 self.preorder 1093 .pre_cmp_block(a.block, b.block) 1094 .then_with(|| layout.cmp(a.def, b.def)) 1095 } 1096 (Some(_), None) => cmp::Ordering::Less, 1097 (None, Some(_)) => cmp::Ordering::Greater, 1098 (None, None) => return None, 1099 }; 1100 // When the nodes compare equal, prefer the `a` side. 1101 if ord != cmp::Ordering::Greater { 1102 self.a.next() 1103 } else { 1104 self.b.next() 1105 } 1106 } 1107 } 1108