1 // dagops.rs
2 //
3 // Copyright 2019 Georges Racinet <georges.racinet@octobus.net>
4 //
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
7 
8 //! Miscellaneous DAG operations
9 //!
10 //! # Terminology
11 //! - By *relative heads* of a collection of revision numbers (`Revision`), we
12 //!   mean those revisions that have no children among the collection.
13 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
14 //!   whose parents, if any, don't belong to the collection.
15 use super::{Graph, GraphError, Revision, NULL_REVISION};
16 use crate::ancestors::AncestorsIterator;
17 use std::collections::{BTreeSet, HashSet};
18 
remove_parents<S: std::hash::BuildHasher>( graph: &impl Graph, rev: Revision, set: &mut HashSet<Revision, S>, ) -> Result<(), GraphError>19 fn remove_parents<S: std::hash::BuildHasher>(
20     graph: &impl Graph,
21     rev: Revision,
22     set: &mut HashSet<Revision, S>,
23 ) -> Result<(), GraphError> {
24     for parent in graph.parents(rev)?.iter() {
25         if *parent != NULL_REVISION {
26             set.remove(parent);
27         }
28     }
29     Ok(())
30 }
31 
32 /// Relative heads out of some revisions, passed as an iterator.
33 ///
34 /// These heads are defined as those revisions that have no children
35 /// among those emitted by the iterator.
36 ///
37 /// # Performance notes
38 /// Internally, this clones the iterator, and builds a `HashSet` out of it.
39 ///
40 /// This function takes an `Iterator` instead of `impl IntoIterator` to
41 /// guarantee that cloning the iterator doesn't result in cloning the full
42 /// construct it comes from.
heads<'a>( graph: &impl Graph, iter_revs: impl Clone + Iterator<Item = &'a Revision>, ) -> Result<HashSet<Revision>, GraphError>43 pub fn heads<'a>(
44     graph: &impl Graph,
45     iter_revs: impl Clone + Iterator<Item = &'a Revision>,
46 ) -> Result<HashSet<Revision>, GraphError> {
47     let mut heads: HashSet<Revision> = iter_revs.clone().cloned().collect();
48     heads.remove(&NULL_REVISION);
49     for rev in iter_revs {
50         if *rev != NULL_REVISION {
51             remove_parents(graph, *rev, &mut heads)?;
52         }
53     }
54     Ok(heads)
55 }
56 
57 /// Retain in `revs` only its relative heads.
58 ///
59 /// This is an in-place operation, so that control of the incoming
60 /// set is left to the caller.
61 /// - a direct Python binding would probably need to build its own `HashSet`
62 ///   from an incoming iterable, even if its sole purpose is to extract the
63 ///   heads.
64 /// - a Rust caller can decide whether cloning beforehand is appropriate
65 ///
66 /// # Performance notes
67 /// Internally, this function will store a full copy of `revs` in a `Vec`.
retain_heads<S: std::hash::BuildHasher>( graph: &impl Graph, revs: &mut HashSet<Revision, S>, ) -> Result<(), GraphError>68 pub fn retain_heads<S: std::hash::BuildHasher>(
69     graph: &impl Graph,
70     revs: &mut HashSet<Revision, S>,
71 ) -> Result<(), GraphError> {
72     revs.remove(&NULL_REVISION);
73     // we need to construct an iterable copy of revs to avoid itering while
74     // mutating
75     let as_vec: Vec<Revision> = revs.iter().cloned().collect();
76     for rev in as_vec {
77         if rev != NULL_REVISION {
78             remove_parents(graph, rev, revs)?;
79         }
80     }
81     Ok(())
82 }
83 
84 /// Roots of `revs`, passed as a `HashSet`
85 ///
86 /// They are returned in arbitrary order
roots<G: Graph, S: std::hash::BuildHasher>( graph: &G, revs: &HashSet<Revision, S>, ) -> Result<Vec<Revision>, GraphError>87 pub fn roots<G: Graph, S: std::hash::BuildHasher>(
88     graph: &G,
89     revs: &HashSet<Revision, S>,
90 ) -> Result<Vec<Revision>, GraphError> {
91     let mut roots: Vec<Revision> = Vec::new();
92     for rev in revs {
93         if graph
94             .parents(*rev)?
95             .iter()
96             .filter(|p| **p != NULL_REVISION)
97             .all(|p| !revs.contains(p))
98         {
99             roots.push(*rev);
100         }
101     }
102     Ok(roots)
103 }
104 
105 /// Compute the topological range between two collections of revisions
106 ///
107 /// This is equivalent to the revset `<roots>::<heads>`.
108 ///
109 /// Currently, the given `Graph` has to implement `Clone`, which means
110 /// actually cloning just a reference-counted Python pointer if
111 /// it's passed over through `rust-cpython`. This is due to the internal
112 /// use of `AncestorsIterator`
113 ///
114 /// # Algorithmic details
115 ///
116 /// This is a two-pass swipe inspired from what `reachableroots2` from
117 /// `mercurial.cext.parsers` does to obtain the same results.
118 ///
119 /// - first, we climb up the DAG from `heads` in topological order, keeping
120 ///   them in the vector `heads_ancestors` vector, and adding any element of
121 ///   `roots` we find among them to the resulting range.
122 /// - Then, we iterate on that recorded vector so that a revision is always
123 ///   emitted after its parents and add all revisions whose parents are already
124 ///   in the range to the results.
125 ///
126 /// # Performance notes
127 ///
128 /// The main difference with the C implementation is that
129 /// the latter uses a flat array with bit flags, instead of complex structures
130 /// like `HashSet`, making it faster in most scenarios. In theory, it's
131 /// possible that the present implementation could be more memory efficient
132 /// for very large repositories with many branches.
range( graph: &(impl Graph + Clone), roots: impl IntoIterator<Item = Revision>, heads: impl IntoIterator<Item = Revision>, ) -> Result<BTreeSet<Revision>, GraphError>133 pub fn range(
134     graph: &(impl Graph + Clone),
135     roots: impl IntoIterator<Item = Revision>,
136     heads: impl IntoIterator<Item = Revision>,
137 ) -> Result<BTreeSet<Revision>, GraphError> {
138     let mut range = BTreeSet::new();
139     let roots: HashSet<Revision> = roots.into_iter().collect();
140     let min_root: Revision = match roots.iter().cloned().min() {
141         None => {
142             return Ok(range);
143         }
144         Some(r) => r,
145     };
146 
147     // Internally, AncestorsIterator currently maintains a `HashSet`
148     // of all seen revision, which is also what we record, albeit in an ordered
149     // way. There's room for improvement on this duplication.
150     let ait = AncestorsIterator::new(graph.clone(), heads, min_root, true)?;
151     let mut heads_ancestors: Vec<Revision> = Vec::new();
152     for revres in ait {
153         let rev = revres?;
154         if roots.contains(&rev) {
155             range.insert(rev);
156         }
157         heads_ancestors.push(rev);
158     }
159 
160     for rev in heads_ancestors.into_iter().rev() {
161         for parent in graph.parents(rev)?.iter() {
162             if *parent != NULL_REVISION && range.contains(parent) {
163                 range.insert(rev);
164             }
165         }
166     }
167     Ok(range)
168 }
169 
170 #[cfg(test)]
171 mod tests {
172 
173     use super::*;
174     use crate::testing::SampleGraph;
175 
176     /// Apply `retain_heads()` to the given slice and return as a sorted `Vec`
retain_heads_sorted( graph: &impl Graph, revs: &[Revision], ) -> Result<Vec<Revision>, GraphError>177     fn retain_heads_sorted(
178         graph: &impl Graph,
179         revs: &[Revision],
180     ) -> Result<Vec<Revision>, GraphError> {
181         let mut revs: HashSet<Revision> = revs.iter().cloned().collect();
182         retain_heads(graph, &mut revs)?;
183         let mut as_vec: Vec<Revision> = revs.iter().cloned().collect();
184         as_vec.sort();
185         Ok(as_vec)
186     }
187 
188     #[test]
test_retain_heads() -> Result<(), GraphError>189     fn test_retain_heads() -> Result<(), GraphError> {
190         assert_eq!(retain_heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
191         assert_eq!(
192             retain_heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
193             vec![1, 6, 12]
194         );
195         assert_eq!(
196             retain_heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
197             vec![3, 5, 8, 9]
198         );
199         Ok(())
200     }
201 
202     /// Apply `heads()` to the given slice and return as a sorted `Vec`
heads_sorted( graph: &impl Graph, revs: &[Revision], ) -> Result<Vec<Revision>, GraphError>203     fn heads_sorted(
204         graph: &impl Graph,
205         revs: &[Revision],
206     ) -> Result<Vec<Revision>, GraphError> {
207         let heads = heads(graph, revs.iter())?;
208         let mut as_vec: Vec<Revision> = heads.iter().cloned().collect();
209         as_vec.sort();
210         Ok(as_vec)
211     }
212 
213     #[test]
test_heads() -> Result<(), GraphError>214     fn test_heads() -> Result<(), GraphError> {
215         assert_eq!(heads_sorted(&SampleGraph, &[4, 5, 6])?, vec![5, 6]);
216         assert_eq!(
217             heads_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
218             vec![1, 6, 12]
219         );
220         assert_eq!(
221             heads_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
222             vec![3, 5, 8, 9]
223         );
224         Ok(())
225     }
226 
227     /// Apply `roots()` and sort the result for easier comparison
roots_sorted( graph: &impl Graph, revs: &[Revision], ) -> Result<Vec<Revision>, GraphError>228     fn roots_sorted(
229         graph: &impl Graph,
230         revs: &[Revision],
231     ) -> Result<Vec<Revision>, GraphError> {
232         let set: HashSet<_> = revs.iter().cloned().collect();
233         let mut as_vec = roots(graph, &set)?;
234         as_vec.sort();
235         Ok(as_vec)
236     }
237 
238     #[test]
test_roots() -> Result<(), GraphError>239     fn test_roots() -> Result<(), GraphError> {
240         assert_eq!(roots_sorted(&SampleGraph, &[4, 5, 6])?, vec![4]);
241         assert_eq!(
242             roots_sorted(&SampleGraph, &[4, 1, 6, 12, 0])?,
243             vec![0, 4, 12]
244         );
245         assert_eq!(
246             roots_sorted(&SampleGraph, &[1, 2, 3, 4, 5, 6, 7, 8, 9])?,
247             vec![1, 8]
248         );
249         Ok(())
250     }
251 
252     /// Apply `range()` and convert the result into a Vec for easier comparison
range_vec( graph: impl Graph + Clone, roots: &[Revision], heads: &[Revision], ) -> Result<Vec<Revision>, GraphError>253     fn range_vec(
254         graph: impl Graph + Clone,
255         roots: &[Revision],
256         heads: &[Revision],
257     ) -> Result<Vec<Revision>, GraphError> {
258         range(&graph, roots.iter().cloned(), heads.iter().cloned())
259             .map(|bs| bs.into_iter().collect())
260     }
261 
262     #[test]
test_range() -> Result<(), GraphError>263     fn test_range() -> Result<(), GraphError> {
264         assert_eq!(range_vec(SampleGraph, &[0], &[4])?, vec![0, 1, 2, 4]);
265         assert_eq!(range_vec(SampleGraph, &[0], &[8])?, vec![]);
266         assert_eq!(
267             range_vec(SampleGraph, &[5, 6], &[10, 11, 13])?,
268             vec![5, 10]
269         );
270         assert_eq!(
271             range_vec(SampleGraph, &[5, 6], &[10, 12])?,
272             vec![5, 6, 9, 10, 12]
273         );
274         Ok(())
275     }
276 }
277