1 //! Routine to compute the strongly connected components (SCCs) of a graph.
2 //!
3 //! Also computes as the resulting DAG if each SCC is replaced with a
4 //! node in the graph. This uses [Tarjan's algorithm](
5 //! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm)
6 //! that completes in *O*(*n*) time.
7 
8 use crate::fx::FxHashSet;
9 use crate::graph::vec_graph::VecGraph;
10 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
11 use rustc_index::vec::{Idx, IndexVec};
12 use std::ops::Range;
13 
14 #[cfg(test)]
15 mod tests;
16 
17 /// Strongly connected components (SCC) of a graph. The type `N` is
18 /// the index type for the graph nodes and `S` is the index type for
19 /// the SCCs. We can map from each node to the SCC that it
20 /// participates in, and we also have the successors of each SCC.
21 pub struct Sccs<N: Idx, S: Idx> {
22     /// For each node, what is the SCC index of the SCC to which it
23     /// belongs.
24     scc_indices: IndexVec<N, S>,
25 
26     /// Data about each SCC.
27     scc_data: SccData<S>,
28 }
29 
30 struct SccData<S: Idx> {
31     /// For each SCC, the range of `all_successors` where its
32     /// successors can be found.
33     ranges: IndexVec<S, Range<usize>>,
34 
35     /// Contains the successors for all the Sccs, concatenated. The
36     /// range of indices corresponding to a given SCC is found in its
37     /// SccData.
38     all_successors: Vec<S>,
39 }
40 
41 impl<N: Idx, S: Idx> Sccs<N, S> {
new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self42     pub fn new(graph: &(impl DirectedGraph<Node = N> + WithNumNodes + WithSuccessors)) -> Self {
43         SccsConstruction::construct(graph)
44     }
45 
46     /// Returns the number of SCCs in the graph.
num_sccs(&self) -> usize47     pub fn num_sccs(&self) -> usize {
48         self.scc_data.len()
49     }
50 
51     /// Returns an iterator over the SCCs in the graph.
52     ///
53     /// The SCCs will be iterated in **dependency order** (or **post order**),
54     /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after.
55     /// This is convenient when the edges represent dependencies: when you visit
56     /// `S1`, the value for `S2` will already have been computed.
all_sccs(&self) -> impl Iterator<Item = S>57     pub fn all_sccs(&self) -> impl Iterator<Item = S> {
58         (0..self.scc_data.len()).map(S::new)
59     }
60 
61     /// Returns the SCC to which a node `r` belongs.
scc(&self, r: N) -> S62     pub fn scc(&self, r: N) -> S {
63         self.scc_indices[r]
64     }
65 
66     /// Returns the successors of the given SCC.
successors(&self, scc: S) -> &[S]67     pub fn successors(&self, scc: S) -> &[S] {
68         self.scc_data.successors(scc)
69     }
70 
71     /// Construct the reverse graph of the SCC graph.
reverse(&self) -> VecGraph<S>72     pub fn reverse(&self) -> VecGraph<S> {
73         VecGraph::new(
74             self.num_sccs(),
75             self.all_sccs()
76                 .flat_map(|source| {
77                     self.successors(source).iter().map(move |&target| (target, source))
78                 })
79                 .collect(),
80         )
81     }
82 }
83 
84 impl<N: Idx, S: Idx> DirectedGraph for Sccs<N, S> {
85     type Node = S;
86 }
87 
88 impl<N: Idx, S: Idx> WithNumNodes for Sccs<N, S> {
num_nodes(&self) -> usize89     fn num_nodes(&self) -> usize {
90         self.num_sccs()
91     }
92 }
93 
94 impl<N: Idx, S: Idx> WithNumEdges for Sccs<N, S> {
num_edges(&self) -> usize95     fn num_edges(&self) -> usize {
96         self.scc_data.all_successors.len()
97     }
98 }
99 
100 impl<N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs<N, S> {
101     type Item = S;
102 
103     type Iter = std::iter::Cloned<std::slice::Iter<'graph, S>>;
104 }
105 
106 impl<N: Idx, S: Idx> WithSuccessors for Sccs<N, S> {
successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter107     fn successors(&self, node: S) -> <Self as GraphSuccessors<'_>>::Iter {
108         self.successors(node).iter().cloned()
109     }
110 }
111 
112 impl<S: Idx> SccData<S> {
113     /// Number of SCCs,
len(&self) -> usize114     fn len(&self) -> usize {
115         self.ranges.len()
116     }
117 
118     /// Returns the successors of the given SCC.
successors(&self, scc: S) -> &[S]119     fn successors(&self, scc: S) -> &[S] {
120         // Annoyingly, `range` does not implement `Copy`, so we have
121         // to do `range.start..range.end`:
122         let range = &self.ranges[scc];
123         &self.all_successors[range.start..range.end]
124     }
125 
126     /// Creates a new SCC with `successors` as its successors and
127     /// returns the resulting index.
create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S128     fn create_scc(&mut self, successors: impl IntoIterator<Item = S>) -> S {
129         // Store the successors on `scc_successors_vec`, remembering
130         // the range of indices.
131         let all_successors_start = self.all_successors.len();
132         self.all_successors.extend(successors);
133         let all_successors_end = self.all_successors.len();
134 
135         debug!(
136             "create_scc({:?}) successors={:?}",
137             self.ranges.len(),
138             &self.all_successors[all_successors_start..all_successors_end],
139         );
140 
141         self.ranges.push(all_successors_start..all_successors_end)
142     }
143 }
144 
145 struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> {
146     graph: &'c G,
147 
148     /// The state of each node; used during walk to record the stack
149     /// and after walk to record what cycle each node ended up being
150     /// in.
151     node_states: IndexVec<G::Node, NodeState<G::Node, S>>,
152 
153     /// The stack of nodes that we are visiting as part of the DFS.
154     node_stack: Vec<G::Node>,
155 
156     /// The stack of successors: as we visit a node, we mark our
157     /// position in this stack, and when we encounter a successor SCC,
158     /// we push it on the stack. When we complete an SCC, we can pop
159     /// everything off the stack that was found along the way.
160     successors_stack: Vec<S>,
161 
162     /// A set used to strip duplicates. As we accumulate successors
163     /// into the successors_stack, we sometimes get duplicate entries.
164     /// We use this set to remove those -- we also keep its storage
165     /// around between successors to amortize memory allocation costs.
166     duplicate_set: FxHashSet<S>,
167 
168     scc_data: SccData<S>,
169 }
170 
171 #[derive(Copy, Clone, Debug)]
172 enum NodeState<N, S> {
173     /// This node has not yet been visited as part of the DFS.
174     ///
175     /// After SCC construction is complete, this state ought to be
176     /// impossible.
177     NotVisited,
178 
179     /// This node is currently being walk as part of our DFS. It is on
180     /// the stack at the depth `depth`.
181     ///
182     /// After SCC construction is complete, this state ought to be
183     /// impossible.
184     BeingVisited { depth: usize },
185 
186     /// Indicates that this node is a member of the given cycle.
187     InCycle { scc_index: S },
188 
189     /// Indicates that this node is a member of whatever cycle
190     /// `parent` is a member of. This state is transient: whenever we
191     /// see it, we try to overwrite it with the current state of
192     /// `parent` (this is the "path compression" step of a union-find
193     /// algorithm).
194     InCycleWith { parent: N },
195 }
196 
197 #[derive(Copy, Clone, Debug)]
198 enum WalkReturn<S> {
199     Cycle { min_depth: usize },
200     Complete { scc_index: S },
201 }
202 
203 impl<'c, G, S> SccsConstruction<'c, G, S>
204 where
205     G: DirectedGraph + WithNumNodes + WithSuccessors,
206     S: Idx,
207 {
208     /// Identifies SCCs in the graph `G` and computes the resulting
209     /// DAG. This uses a variant of [Tarjan's
210     /// algorithm][wikipedia]. The high-level summary of the algorithm
211     /// is that we do a depth-first search. Along the way, we keep a
212     /// stack of each node whose successors are being visited. We
213     /// track the depth of each node on this stack (there is no depth
214     /// if the node is not on the stack). When we find that some node
215     /// N with depth D can reach some other node N' with lower depth
216     /// D' (i.e., D' < D), we know that N, N', and all nodes in
217     /// between them on the stack are part of an SCC.
218     ///
219     /// [wikipedia]: https://bit.ly/2EZIx84
construct(graph: &'c G) -> Sccs<G::Node, S>220     fn construct(graph: &'c G) -> Sccs<G::Node, S> {
221         let num_nodes = graph.num_nodes();
222 
223         let mut this = Self {
224             graph,
225             node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes),
226             node_stack: Vec::with_capacity(num_nodes),
227             successors_stack: Vec::new(),
228             scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() },
229             duplicate_set: FxHashSet::default(),
230         };
231 
232         let scc_indices = (0..num_nodes)
233             .map(G::Node::new)
234             .map(|node| match this.start_walk_from(node) {
235                 WalkReturn::Complete { scc_index } => scc_index,
236                 WalkReturn::Cycle { min_depth } => panic!(
237                     "`start_walk_node({:?})` returned cycle with depth {:?}",
238                     node, min_depth
239                 ),
240             })
241             .collect();
242 
243         Sccs { scc_indices, scc_data: this.scc_data }
244     }
245 
start_walk_from(&mut self, node: G::Node) -> WalkReturn<S>246     fn start_walk_from(&mut self, node: G::Node) -> WalkReturn<S> {
247         if let Some(result) = self.inspect_node(node) {
248             result
249         } else {
250             self.walk_unvisited_node(node)
251         }
252     }
253 
254     /// Inspect a node during the DFS. We first examine its current
255     /// state -- if it is not yet visited (`NotVisited`), return `None` so
256     /// that the caller might push it onto the stack and start walking its
257     /// successors.
258     ///
259     /// If it is already on the DFS stack it will be in the state
260     /// `BeingVisited`. In that case, we have found a cycle and we
261     /// return the depth from the stack.
262     ///
263     /// Otherwise, we are looking at a node that has already been
264     /// completely visited. We therefore return `WalkReturn::Complete`
265     /// with its associated SCC index.
inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>>266     fn inspect_node(&mut self, node: G::Node) -> Option<WalkReturn<S>> {
267         Some(match self.find_state(node) {
268             NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index },
269 
270             NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth },
271 
272             NodeState::NotVisited => return None,
273 
274             NodeState::InCycleWith { parent } => panic!(
275                 "`find_state` returned `InCycleWith({:?})`, which ought to be impossible",
276                 parent
277             ),
278         })
279     }
280 
281     /// Fetches the state of the node `r`. If `r` is recorded as being
282     /// in a cycle with some other node `r2`, then fetches the state
283     /// of `r2` (and updates `r` to reflect current result). This is
284     /// basically the "find" part of a standard union-find algorithm
285     /// (with path compression).
find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S>286     fn find_state(&mut self, mut node: G::Node) -> NodeState<G::Node, S> {
287         // To avoid recursion we temporarily reuse the `parent` of each
288         // InCycleWith link to encode a downwards link while compressing
289         // the path. After we have found the root or deepest node being
290         // visited, we traverse the reverse links and correct the node
291         // states on the way.
292         //
293         // **Note**: This mutation requires that this is a leaf function
294         // or at least that none of the called functions inspects the
295         // current node states. Luckily, we are a leaf.
296 
297         // Remember one previous link. The termination condition when
298         // following links downwards is then simply as soon as we have
299         // found the initial self-loop.
300         let mut previous_node = node;
301 
302         // Ultimately assigned by the parent when following
303         // `InCycleWith` upwards.
304         let node_state = loop {
305             debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]);
306             match self.node_states[node] {
307                 NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index },
308                 NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth },
309                 NodeState::NotVisited => break NodeState::NotVisited,
310                 NodeState::InCycleWith { parent } => {
311                     // We test this, to be extremely sure that we never
312                     // ever break our termination condition for the
313                     // reverse iteration loop.
314                     assert!(node != parent, "Node can not be in cycle with itself");
315                     // Store the previous node as an inverted list link
316                     self.node_states[node] = NodeState::InCycleWith { parent: previous_node };
317                     // Update to parent node.
318                     previous_node = node;
319                     node = parent;
320                 }
321             }
322         };
323 
324         // The states form a graph where up to one outgoing link is stored at
325         // each node. Initially in general,
326         //
327         //                                                  E
328         //                                                  ^
329         //                                                  |
330         //                                InCycleWith/BeingVisited/NotVisited
331         //                                                  |
332         //   A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+
333         //   |
334         //   = node, previous_node
335         //
336         // After the first loop, this will look like
337         //                                                  E
338         //                                                  ^
339         //                                                  |
340         //                                InCycleWith/BeingVisited/NotVisited
341         //                                                  |
342         // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+
343         // | |                             |              |
344         // | InCycleWith                   |              = node
345         // +-+                             =previous_node
346         //
347         // Note in particular that A will be linked to itself in a self-cycle
348         // and no other self-cycles occur due to how InCycleWith is assigned in
349         // the find phase implemented by `walk_unvisited_node`.
350         //
351         // We now want to compress the path, that is assign the state of the
352         // link D-E to all other links.
353         //
354         // We can then walk backwards, starting from `previous_node`, and assign
355         // each node in the list with the updated state. The loop terminates
356         // when we reach the self-cycle.
357 
358         // Move backwards until we found the node where we started. We
359         // will know when we hit the state where previous_node == node.
360         loop {
361             // Back at the beginning, we can return.
362             if previous_node == node {
363                 return node_state;
364             }
365             // Update to previous node in the link.
366             match self.node_states[previous_node] {
367                 NodeState::InCycleWith { parent: previous } => {
368                     node = previous_node;
369                     previous_node = previous;
370                 }
371                 // Only InCycleWith nodes were added to the reverse linked list.
372                 other => panic!("Invalid previous link while compressing cycle: {:?}", other),
373             }
374 
375             debug!("find_state: parent_state = {:?}", node_state);
376 
377             // Update the node state from the parent state. The assigned
378             // state is actually a loop invariant but it will only be
379             // evaluated if there is at least one backlink to follow.
380             // Fully trusting llvm here to find this loop optimization.
381             match node_state {
382                 // Path compression, make current node point to the same root.
383                 NodeState::InCycle { .. } => {
384                     self.node_states[node] = node_state;
385                 }
386                 // Still visiting nodes, compress to cycle to the node
387                 // at that depth.
388                 NodeState::BeingVisited { depth } => {
389                     self.node_states[node] =
390                         NodeState::InCycleWith { parent: self.node_stack[depth] };
391                 }
392                 // These are never allowed as parent nodes. InCycleWith
393                 // should have been followed to a real parent and
394                 // NotVisited can not be part of a cycle since it should
395                 // have instead gotten explored.
396                 NodeState::NotVisited | NodeState::InCycleWith { .. } => {
397                     panic!("invalid parent state: {:?}", node_state)
398                 }
399             }
400         }
401     }
402 
403     /// Walks a node that has never been visited before.
404     ///
405     /// Call this method when `inspect_node` has returned `None`. Having the
406     /// caller decide avoids mutual recursion between the two methods and allows
407     /// us to maintain an allocated stack for nodes on the path between calls.
408     #[instrument(skip(self, initial), level = "debug")]
walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S>409     fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn<S> {
410         struct VisitingNodeFrame<G: DirectedGraph, Successors> {
411             node: G::Node,
412             iter: Option<Successors>,
413             depth: usize,
414             min_depth: usize,
415             successors_len: usize,
416             min_cycle_root: G::Node,
417             successor_node: G::Node,
418         }
419 
420         // Move the stack to a local variable. We want to utilize the existing allocation and
421         // mutably borrow it without borrowing self at the same time.
422         let mut successors_stack = core::mem::take(&mut self.successors_stack);
423         debug_assert_eq!(successors_stack.len(), 0);
424 
425         let mut stack: Vec<VisitingNodeFrame<G, _>> = vec![VisitingNodeFrame {
426             node: initial,
427             depth: 0,
428             min_depth: 0,
429             iter: None,
430             successors_len: 0,
431             min_cycle_root: initial,
432             successor_node: initial,
433         }];
434 
435         let mut return_value = None;
436 
437         'recurse: while let Some(frame) = stack.last_mut() {
438             let VisitingNodeFrame {
439                 node,
440                 depth,
441                 iter,
442                 successors_len,
443                 min_depth,
444                 min_cycle_root,
445                 successor_node,
446             } = frame;
447 
448             let node = *node;
449             let depth = *depth;
450 
451             let successors = match iter {
452                 Some(iter) => iter,
453                 None => {
454                     // This None marks that we still have the initialize this node's frame.
455                     debug!(?depth, ?node);
456 
457                     debug_assert!(matches!(self.node_states[node], NodeState::NotVisited));
458 
459                     // Push `node` onto the stack.
460                     self.node_states[node] = NodeState::BeingVisited { depth };
461                     self.node_stack.push(node);
462 
463                     // Walk each successor of the node, looking to see if any of
464                     // them can reach a node that is presently on the stack. If
465                     // so, that means they can also reach us.
466                     *successors_len = successors_stack.len();
467                     // Set and return a reference, this is currently empty.
468                     iter.get_or_insert(self.graph.successors(node))
469                 }
470             };
471 
472             // Now that iter is initialized, this is a constant for this frame.
473             let successors_len = *successors_len;
474 
475             // Construct iterators for the nodes and walk results. There are two cases:
476             // * The walk of a successor node returned.
477             // * The remaining successor nodes.
478             let returned_walk =
479                 return_value.take().into_iter().map(|walk| (*successor_node, Some(walk)));
480 
481             let successor_walk = successors.by_ref().map(|successor_node| {
482                 debug!(?node, ?successor_node);
483                 (successor_node, self.inspect_node(successor_node))
484             });
485 
486             for (successor_node, walk) in returned_walk.chain(successor_walk) {
487                 match walk {
488                     Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => {
489                         // Track the minimum depth we can reach.
490                         assert!(successor_min_depth <= depth);
491                         if successor_min_depth < *min_depth {
492                             debug!(?node, ?successor_min_depth);
493                             *min_depth = successor_min_depth;
494                             *min_cycle_root = successor_node;
495                         }
496                     }
497 
498                     Some(WalkReturn::Complete { scc_index: successor_scc_index }) => {
499                         // Push the completed SCC indices onto
500                         // the `successors_stack` for later.
501                         debug!(?node, ?successor_scc_index);
502                         successors_stack.push(successor_scc_index);
503                     }
504 
505                     None => {
506                         let depth = depth + 1;
507                         debug!(?depth, ?successor_node);
508                         // Remember which node the return value will come from.
509                         frame.successor_node = successor_node;
510                         // Start a new stack frame the step into it.
511                         stack.push(VisitingNodeFrame {
512                             node: successor_node,
513                             depth,
514                             iter: None,
515                             successors_len: 0,
516                             min_depth: depth,
517                             min_cycle_root: successor_node,
518                             successor_node,
519                         });
520                         continue 'recurse;
521                     }
522                 }
523             }
524 
525             // Completed walk, remove `node` from the stack.
526             let r = self.node_stack.pop();
527             debug_assert_eq!(r, Some(node));
528 
529             // Remove the frame, it's done.
530             let frame = stack.pop().unwrap();
531 
532             // If `min_depth == depth`, then we are the root of the
533             // cycle: we can't reach anyone further down the stack.
534 
535             // Pass the 'return value' down the stack.
536             // We return one frame at a time so there can't be another return value.
537             debug_assert!(return_value.is_none());
538             return_value = Some(if frame.min_depth == depth {
539                 // Note that successor stack may have duplicates, so we
540                 // want to remove those:
541                 let deduplicated_successors = {
542                     let duplicate_set = &mut self.duplicate_set;
543                     duplicate_set.clear();
544                     successors_stack
545                         .drain(successors_len..)
546                         .filter(move |&i| duplicate_set.insert(i))
547                 };
548                 let scc_index = self.scc_data.create_scc(deduplicated_successors);
549                 self.node_states[node] = NodeState::InCycle { scc_index };
550                 WalkReturn::Complete { scc_index }
551             } else {
552                 // We are not the head of the cycle. Return back to our
553                 // caller. They will take ownership of the
554                 // `self.successors` data that we pushed.
555                 self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root };
556                 WalkReturn::Cycle { min_depth: frame.min_depth }
557             });
558         }
559 
560         // Keep the allocation we used for successors_stack.
561         self.successors_stack = successors_stack;
562         debug_assert_eq!(self.successors_stack.len(), 0);
563 
564         return_value.unwrap()
565     }
566 }
567