1 //! Routine to compute the strongly connected components (SCCs) of a graph. 2 //! 3 //! Also computes as the resulting DAG if each SCC is replaced with a 4 //! node in the graph. This uses [Tarjan's algorithm]( 5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) 6 //! that completes in *O(n)* time. 7 8 use crate::fx::FxHashSet; 9 use crate::graph::vec_graph::VecGraph; 10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}; 11 use rustc_index::vec::{Idx, IndexVec}; 12 use std::ops::Range; 13 14 #[cfg(test)] 15 mod tests; 16 17 /// Strongly connected components (SCC) of a graph. The type `N` is 18 /// the index type for the graph nodes and `S` is the index type for 19 /// the SCCs. We can map from each node to the SCC that it 20 /// participates in, and we also have the successors of each SCC. 21 pub struct Sccs<N: Idx, S: Idx> { 22 /// For each node, what is the SCC index of the SCC to which it 23 /// belongs. 24 scc_indices: IndexVec<N, S>, 25 26 /// Data about each SCC. 27 scc_data: SccData<S>, 28 } 29 30 struct SccData<S: Idx> { 31 /// For each SCC, the range of `all_successors` where its 32 /// successors can be found. 33 ranges: IndexVec<S, Range<usize>>, 34 35 /// Contains the successors for all the Sccs, concatenated. The 36 /// range of indices corresponding to a given SCC is found in its 37 /// SccData. 38 all_successors: Vec<S>, 39 } 40 41 impl<N: Idx, S: Idx> Sccs<N, S> { new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self42 pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self { 43 SccsConstruction::construct(graph) 44 } 45 46 /// Returns the number of SCCs in the graph. num_sccs(&self) -> usize47 pub fn num_sccs(&self) -> usize { 48 self.scc_data.len() 49 } 50 51 /// Returns an iterator over the SCCs in the graph. 52 /// 53 /// The SCCs will be iterated in **dependency order** (or **post order**), 54 /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after. 55 /// This is convenient when the edges represent dependencies: when you visit 56 /// `S1`, the value for `S2` will already have been computed. all_sccs(&self) -> impl Iterator<Item = S>57 pub fn all_sccs(&self) -> impl Iterator<Item = S> { 58 (0..self.scc_data.len()).map(S::new) 59 } 60 61 /// Returns the SCC to which a node `r` belongs. scc(&self, r: N) -> S62 pub fn scc(&self, r: N) -> S { 63 self.scc_indices[r] 64 } 65 66 /// Returns the successors of the given SCC. successors(&self, scc: S) -> &[S]67 pub fn successors(&self, scc: S) -> &[S] { 68 self.scc_data.successors(scc) 69 } 70 71 /// Construct the reverse graph of the SCC graph. reverse(&self) -> VecGraph<S>72 pub fn reverse(&self) -> VecGraph<S> { 73 VecGraph::new( 74 self.num_sccs(), 75 self.all_sccs() 76 .flat_map(|source| { 77 self.successors(source).iter().map(move |&target| (target, source)) 78 }) 79 .collect(), 80 ) 81 } 82 } 83 84 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> { 85 type Node = S; 86 } 87 88 impl<N: Idx, S: Idx> WithNumNodes for Sccs<N, S> { num_nodes(&self) -> usize89 fn num_nodes(&self) -> usize { 90 self.num_sccs() 91 } 92 } 93 94 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> { num_edges(&self) -> usize95 fn num_edges(&self) -> usize { 96 self.scc_data.all_successors.len() 97 } 98 } 99 100 impl<N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> { 101 type Item = S; 102 103 type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>; 104 } 105 106 impl<N: Idx, S: Idx> WithSuccessors for Sccs<N, S> { successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter107 fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter { 108 self.successors(node).iter().cloned() 109 } 110 } 111 112 impl<S: Idx> SccData<S> { 113 /// Number of SCCs, len(&self) -> usize114 fn len(&self) -> usize { 115 self.ranges.len() 116 } 117 118 /// Returns the successors of the given SCC. successors(&self, scc: S) -> &[S]119 fn successors(&self, scc: S) -> &[S] { 120 // Annoyingly, `range` does not implement `Copy`, so we have 121 // to do `range.start..range.end`: 122 let range = &self.ranges[scc]; 123 &self.all_successors[range.start..range.end] 124 } 125 126 /// Creates a new SCC with `successors` as its successors and 127 /// returns the resulting index. create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S128 fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S { 129 // Store the successors on `scc_successors_vec`, remembering 130 // the range of indices. 131 let all_successors_start = self.all_successors.len(); 132 self.all_successors.extend(successors); 133 let all_successors_end = self.all_successors.len(); 134 135 debug!( 136 "create_scc({:?}) successors={:?}", 137 self.ranges.len(), 138 &self.all_successors[all_successors_start..all_successors_end], 139 ); 140 141 self.ranges.push(all_successors_start..all_successors_end) 142 } 143 } 144 145 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> { 146 graph: &'c G, 147 148 /// The state of each node; used during walk to record the stack 149 /// and after walk to record what cycle each node ended up being 150 /// in. 151 node_states: IndexVec<G::Node, NodeState<G::Node, S>>, 152 153 /// The stack of nodes that we are visiting as part of the DFS. 154 node_stack: Vec<G::Node>, 155 156 /// The stack of successors: as we visit a node, we mark our 157 /// position in this stack, and when we encounter a successor SCC, 158 /// we push it on the stack. When we complete an SCC, we can pop 159 /// everything off the stack that was found along the way. 160 successors_stack: Vec<S>, 161 162 /// A set used to strip duplicates. As we accumulate successors 163 /// into the successors_stack, we sometimes get duplicate entries. 164 /// We use this set to remove those -- we also keep its storage 165 /// around between successors to amortize memory allocation costs. 166 duplicate_set: FxHashSet<S>, 167 168 scc_data: SccData<S>, 169 } 170 171 #[derive(Copy, Clone, Debug)] 172 enum NodeState<N, S> { 173 /// This node has not yet been visited as part of the DFS. 174 /// 175 /// After SCC construction is complete, this state ought to be 176 /// impossible. 177 NotVisited, 178 179 /// This node is currently being walk as part of our DFS. It is on 180 /// the stack at the depth `depth`. 181 /// 182 /// After SCC construction is complete, this state ought to be 183 /// impossible. 184 BeingVisited { depth: usize }, 185 186 /// Indicates that this node is a member of the given cycle. 187 InCycle { scc_index: S }, 188 189 /// Indicates that this node is a member of whatever cycle 190 /// `parent` is a member of. This state is transient: whenever we 191 /// see it, we try to overwrite it with the current state of 192 /// `parent` (this is the "path compression" step of a union-find 193 /// algorithm). 194 InCycleWith { parent: N }, 195 } 196 197 #[derive(Copy, Clone, Debug)] 198 enum WalkReturn<S> { 199 Cycle { min_depth: usize }, 200 Complete { scc_index: S }, 201 } 202 203 impl<'c, G, S> SccsConstruction<'c, G, S> 204 where 205 G: DirectedGraph + WithNumNodes + WithSuccessors, 206 S: Idx, 207 { 208 /// Identifies SCCs in the graph `G` and computes the resulting 209 /// DAG. This uses a variant of [Tarjan's 210 /// algorithm][wikipedia]. The high-level summary of the algorithm 211 /// is that we do a depth-first search. Along the way, we keep a 212 /// stack of each node whose successors are being visited. We 213 /// track the depth of each node on this stack (there is no depth 214 /// if the node is not on the stack). When we find that some node 215 /// N with depth D can reach some other node N' with lower depth 216 /// D' (i.e., D' < D), we know that N, N', and all nodes in 217 /// between them on the stack are part of an SCC. 218 /// 219 /// [wikipedia]: https://bit.ly/2EZIx84 construct(graph: &'c G) -> Sccs<G::Node, S>220 fn construct(graph: &'c G) -> Sccs<G::Node, S> { 221 let num_nodes = graph.num_nodes(); 222 223 let mut this = Self { 224 graph, 225 node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes), 226 node_stack: Vec::with_capacity(num_nodes), 227 successors_stack: Vec::new(), 228 scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() }, 229 duplicate_set: FxHashSet::default(), 230 }; 231 232 let scc_indices = (0..num_nodes) 233 .map(G::Node::new) 234 .map(|node| match this.start_walk_from(node) { 235 WalkReturn::Complete { scc_index } => scc_index, 236 WalkReturn::Cycle { min_depth } => panic!( 237 "`start_walk_node({:?})` returned cycle with depth {:?}", 238 node, min_depth 239 ), 240 }) 241 .collect(); 242 243 Sccs { scc_indices, scc_data: this.scc_data } 244 } 245 start_walk_from(&mut self, node: G::Node) -> WalkReturn<S>246 fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> { 247 if let Some(result) = self.inspect_node(node) { 248 result 249 } else { 250 self.walk_unvisited_node(node) 251 } 252 } 253 254 /// Inspect a node during the DFS. We first examine its current 255 /// state -- if it is not yet visited (`NotVisited`), return `None` so 256 /// that the caller might push it onto the stack and start walking its 257 /// successors. 258 /// 259 /// If it is already on the DFS stack it will be in the state 260 /// `BeingVisited`. In that case, we have found a cycle and we 261 /// return the depth from the stack. 262 /// 263 /// Otherwise, we are looking at a node that has already been 264 /// completely visited. We therefore return `WalkReturn::Complete` 265 /// with its associated SCC index. inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>>266 fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> { 267 Some(match self.find_state(node) { 268 NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index }, 269 270 NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth }, 271 272 NodeState::NotVisited => return None, 273 274 NodeState::InCycleWith { parent } => panic!( 275 "`find_state` returned `InCycleWith({:?})`, which ought to be impossible", 276 parent 277 ), 278 }) 279 } 280 281 /// Fetches the state of the node `r`. If `r` is recorded as being 282 /// in a cycle with some other node `r2`, then fetches the state 283 /// of `r2` (and updates `r` to reflect current result). This is 284 /// basically the "find" part of a standard union-find algorithm 285 /// (with path compression). find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S>286 fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> { 287 // To avoid recursion we temporarily reuse the `parent` of each 288 // InCycleWith link to encode a downwards link while compressing 289 // the path. After we have found the root or deepest node being 290 // visited, we traverse the reverse links and correct the node 291 // states on the way. 292 // 293 // **Note**: This mutation requires that this is a leaf function 294 // or at least that none of the called functions inspects the 295 // current node states. Luckily, we are a leaf. 296 297 // Remember one previous link. The termination condition when 298 // following links downwards is then simply as soon as we have 299 // found the initial self-loop. 300 let mut previous_node = node; 301 302 // Ultimately assigned by the parent when following 303 // `InCycleWith` upwards. 304 let node_state = loop { 305 debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]); 306 match self.node_states[node] { 307 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index }, 308 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth }, 309 NodeState::NotVisited => break NodeState::NotVisited, 310 NodeState::InCycleWith { parent } => { 311 // We test this, to be extremely sure that we never 312 // ever break our termination condition for the 313 // reverse iteration loop. 314 assert!(node != parent, "Node can not be in cycle with itself"); 315 // Store the previous node as an inverted list link 316 self.node_states[node] = NodeState::InCycleWith { parent: previous_node }; 317 // Update to parent node. 318 previous_node = node; 319 node = parent; 320 } 321 } 322 }; 323 324 // The states form a graph where up to one outgoing link is stored at 325 // each node. Initially in general, 326 // 327 // E 328 // ^ 329 // | 330 // InCycleWith/BeingVisited/NotVisited 331 // | 332 // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+ 333 // | 334 // = node, previous_node 335 // 336 // After the first loop, this will look like 337 // E 338 // ^ 339 // | 340 // InCycleWith/BeingVisited/NotVisited 341 // | 342 // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+ 343 // | | | | 344 // | InCycleWith | = node 345 // +-+ =previous_node 346 // 347 // Note in particular that A will be linked to itself in a self-cycle 348 // and no other self-cycles occur due to how InCycleWith is assigned in 349 // the find phase implemented by `walk_unvisited_node`. 350 // 351 // We now want to compress the path, that is assign the state of the 352 // link D-E to all other links. 353 // 354 // We can then walk backwards, starting from `previous_node`, and assign 355 // each node in the list with the updated state. The loop terminates 356 // when we reach the self-cycle. 357 358 // Move backwards until we found the node where we started. We 359 // will know when we hit the state where previous_node == node. 360 loop { 361 // Back at the beginning, we can return. 362 if previous_node == node { 363 return node_state; 364 } 365 // Update to previous node in the link. 366 match self.node_states[previous_node] { 367 NodeState::InCycleWith { parent: previous } => { 368 node = previous_node; 369 previous_node = previous; 370 } 371 // Only InCycleWith nodes were added to the reverse linked list. 372 other => panic!("Invalid previous link while compressing cycle: {:?}", other), 373 } 374 375 debug!("find_state: parent_state = {:?}", node_state); 376 377 // Update the node state from the parent state. The assigned 378 // state is actually a loop invariant but it will only be 379 // evaluated if there is at least one backlink to follow. 380 // Fully trusting llvm here to find this loop optimization. 381 match node_state { 382 // Path compression, make current node point to the same root. 383 NodeState::InCycle { .. } => { 384 self.node_states[node] = node_state; 385 } 386 // Still visiting nodes, compress to cycle to the node 387 // at that depth. 388 NodeState::BeingVisited { depth } => { 389 self.node_states[node] = 390 NodeState::InCycleWith { parent: self.node_stack[depth] }; 391 } 392 // These are never allowed as parent nodes. InCycleWith 393 // should have been followed to a real parent and 394 // NotVisited can not be part of a cycle since it should 395 // have instead gotten explored. 396 NodeState::NotVisited | NodeState::InCycleWith { .. } => { 397 panic!("invalid parent state: {:?}", node_state) 398 } 399 } 400 } 401 } 402 403 /// Walks a node that has never been visited before. 404 /// 405 /// Call this method when `inspect_node` has returned `None`. Having the 406 /// caller decide avoids mutual recursion between the two methods and allows 407 /// us to maintain an allocated stack for nodes on the path between calls. walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S>408 fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> { 409 struct VisitingNodeFrame<G: DirectedGraph, Successors> { 410 node: G::Node, 411 iter: Option<Successors>, 412 depth: usize, 413 min_depth: usize, 414 successors_len: usize, 415 min_cycle_root: G::Node, 416 successor_node: G::Node, 417 } 418 419 // Move the stack to a local variable. We want to utilize the existing allocation and 420 // mutably borrow it without borrowing self at the same time. 421 let mut successors_stack = core::mem::take(&mut self.successors_stack); 422 debug_assert_eq!(successors_stack.len(), 0); 423 424 let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame { 425 node: initial, 426 depth: 0, 427 min_depth: 0, 428 iter: None, 429 successors_len: 0, 430 min_cycle_root: initial, 431 successor_node: initial, 432 }]; 433 434 let mut return_value = None; 435 436 'recurse: while let Some(frame) = stack.last_mut() { 437 let VisitingNodeFrame { 438 node, 439 depth, 440 iter, 441 successors_len, 442 min_depth, 443 min_cycle_root, 444 successor_node, 445 } = frame; 446 447 let node = *node; 448 let depth = *depth; 449 450 let successors = match iter { 451 Some(iter) => iter, 452 None => { 453 // This None marks that we still have the initialize this node's frame. 454 debug!("walk_unvisited_node(depth = {:?}, node = {:?})", depth, node); 455 456 debug_assert!(matches!(self.node_states[node], NodeState::NotVisited)); 457 458 // Push `node` onto the stack. 459 self.node_states[node] = NodeState::BeingVisited { depth }; 460 self.node_stack.push(node); 461 462 // Walk each successor of the node, looking to see if any of 463 // them can reach a node that is presently on the stack. If 464 // so, that means they can also reach us. 465 *successors_len = successors_stack.len(); 466 // Set and return a reference, this is currently empty. 467 iter.get_or_insert(self.graph.successors(node)) 468 } 469 }; 470 471 // Now that iter is initialized, this is a constant for this frame. 472 let successors_len = *successors_len; 473 474 // Construct iterators for the nodes and walk results. There are two cases: 475 // * The walk of a successor node returned. 476 // * The remaining successor nodes. 477 let returned_walk = 478 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk))); 479 480 let successor_walk = successors.by_ref().map(|successor_node| { 481 debug!( 482 "walk_unvisited_node: node = {:?} successor_ode = {:?}", 483 node, successor_node 484 ); 485 (successor_node, self.inspect_node(successor_node)) 486 }); 487 488 for (successor_node, walk) in returned_walk.chain(successor_walk) { 489 match walk { 490 Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => { 491 // Track the minimum depth we can reach. 492 assert!(successor_min_depth <= depth); 493 if successor_min_depth < *min_depth { 494 debug!( 495 "walk_unvisited_node: node = {:?} successor_min_depth = {:?}", 496 node, successor_min_depth 497 ); 498 *min_depth = successor_min_depth; 499 *min_cycle_root = successor_node; 500 } 501 } 502 503 Some(WalkReturn::Complete { scc_index: successor_scc_index }) => { 504 // Push the completed SCC indices onto 505 // the `successors_stack` for later. 506 debug!( 507 "walk_unvisited_node: node = {:?} successor_scc_index = {:?}", 508 node, successor_scc_index 509 ); 510 successors_stack.push(successor_scc_index); 511 } 512 513 None => { 514 let depth = depth + 1; 515 debug!("walk_node(depth = {:?}, node = {:?})", depth, successor_node); 516 // Remember which node the return value will come from. 517 frame.successor_node = successor_node; 518 // Start a new stack frame the step into it. 519 stack.push(VisitingNodeFrame { 520 node: successor_node, 521 depth, 522 iter: None, 523 successors_len: 0, 524 min_depth: depth, 525 min_cycle_root: successor_node, 526 successor_node, 527 }); 528 continue 'recurse; 529 } 530 } 531 } 532 533 // Completed walk, remove `node` from the stack. 534 let r = self.node_stack.pop(); 535 debug_assert_eq!(r, Some(node)); 536 537 // Remove the frame, it's done. 538 let frame = stack.pop().unwrap(); 539 540 // If `min_depth == depth`, then we are the root of the 541 // cycle: we can't reach anyone further down the stack. 542 543 // Pass the 'return value' down the stack. 544 // We return one frame at a time so there can't be another return value. 545 debug_assert!(return_value.is_none()); 546 return_value = Some(if frame.min_depth == depth { 547 // Note that successor stack may have duplicates, so we 548 // want to remove those: 549 let deduplicated_successors = { 550 let duplicate_set = &mut self.duplicate_set; 551 duplicate_set.clear(); 552 successors_stack 553 .drain(successors_len..) 554 .filter(move |&i| duplicate_set.insert(i)) 555 }; 556 let scc_index = self.scc_data.create_scc(deduplicated_successors); 557 self.node_states[node] = NodeState::InCycle { scc_index }; 558 WalkReturn::Complete { scc_index } 559 } else { 560 // We are not the head of the cycle. Return back to our 561 // caller. They will take ownership of the 562 // `self.successors` data that we pushed. 563 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root }; 564 WalkReturn::Cycle { min_depth: frame.min_depth } 565 }); 566 } 567 568 // Keep the allocation we used for successors_stack. 569 self.successors_stack = successors_stack; 570 debug_assert_eq!(self.successors_stack.len(), 0); 571 572 return_value.unwrap() 573 } 574 } 575