1 //! Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
2 //!
3 //! * time: `O((NM)D log (M)D)`
4 //! * space `O(MN)`
5 use std::collections::BTreeMap;
6 use std::ops::{Index, Range};
7 use std::time::Instant;
8 
9 use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range};
10 use crate::algorithms::DiffHook;
11 
12 /// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
13 ///
14 /// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
15 ///
16 /// This diff is done with an optional deadline that defines the maximal
17 /// execution time permitted before it bails and falls back to an very bad
18 /// approximation.  Deadlines with LCS do not make a lot of sense and should
19 /// not be used.
diff<Old, New, D>( d: &mut D, old: &Old, old_range: Range<usize>, new: &New, new_range: Range<usize>, ) -> Result<(), D::Error> where Old: Index<usize> + ?Sized, New: Index<usize> + ?Sized, D: DiffHook, New::Output: PartialEq<Old::Output>,20 pub fn diff<Old, New, D>(
21     d: &mut D,
22     old: &Old,
23     old_range: Range<usize>,
24     new: &New,
25     new_range: Range<usize>,
26 ) -> Result<(), D::Error>
27 where
28     Old: Index<usize> + ?Sized,
29     New: Index<usize> + ?Sized,
30     D: DiffHook,
31     New::Output: PartialEq<Old::Output>,
32 {
33     diff_deadline(d, old, old_range, new, new_range, None)
34 }
35 
36 /// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
37 ///
38 /// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
39 ///
40 /// This diff is done with an optional deadline that defines the maximal
41 /// execution time permitted before it bails and falls back to an approximation.
diff_deadline<Old, New, D>( d: &mut D, old: &Old, old_range: Range<usize>, new: &New, new_range: Range<usize>, deadline: Option<Instant>, ) -> Result<(), D::Error> where Old: Index<usize> + ?Sized, New: Index<usize> + ?Sized, D: DiffHook, New::Output: PartialEq<Old::Output>,42 pub fn diff_deadline<Old, New, D>(
43     d: &mut D,
44     old: &Old,
45     old_range: Range<usize>,
46     new: &New,
47     new_range: Range<usize>,
48     deadline: Option<Instant>,
49 ) -> Result<(), D::Error>
50 where
51     Old: Index<usize> + ?Sized,
52     New: Index<usize> + ?Sized,
53     D: DiffHook,
54     New::Output: PartialEq<Old::Output>,
55 {
56     if is_empty_range(&new_range) {
57         d.delete(old_range.start, old_range.len(), new_range.start)?;
58         return Ok(());
59     } else if is_empty_range(&old_range) {
60         d.insert(old_range.start, new_range.start, new_range.len())?;
61         return Ok(());
62     }
63 
64     let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
65     let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
66 
67     let maybe_table = make_table(
68         old,
69         common_prefix_len..(old_range.len() - common_suffix_len),
70         new,
71         common_prefix_len..(new_range.len() - common_suffix_len),
72         deadline,
73     );
74     let mut old_idx = 0;
75     let mut new_idx = 0;
76     let new_len = new_range.len() - common_prefix_len - common_suffix_len;
77     let old_len = old_range.len() - common_prefix_len - common_suffix_len;
78 
79     if common_prefix_len > 0 {
80         d.equal(old_range.start, new_range.start, common_prefix_len)?;
81     }
82 
83     if let Some(table) = maybe_table {
84         while new_idx < new_len && old_idx < old_len {
85             let old_orig_idx = old_range.start + common_prefix_len + old_idx;
86             let new_orig_idx = new_range.start + common_prefix_len + new_idx;
87 
88             if new[new_orig_idx] == old[old_orig_idx] {
89                 d.equal(old_orig_idx, new_orig_idx, 1)?;
90                 old_idx += 1;
91                 new_idx += 1;
92             } else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x)
93                 >= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x)
94             {
95                 d.delete(old_orig_idx, 1, new_orig_idx)?;
96                 old_idx += 1;
97             } else {
98                 d.insert(old_orig_idx, new_orig_idx, 1)?;
99                 new_idx += 1;
100             }
101         }
102     } else {
103         let old_orig_idx = old_range.start + common_prefix_len + old_idx;
104         let new_orig_idx = new_range.start + common_prefix_len + new_idx;
105         d.delete(old_orig_idx, old_len, new_orig_idx)?;
106         d.insert(old_orig_idx, new_orig_idx, new_len)?;
107     }
108 
109     if old_idx < old_len {
110         d.delete(
111             old_range.start + common_prefix_len + old_idx,
112             old_len - old_idx,
113             new_range.start + common_prefix_len + new_idx,
114         )?;
115         old_idx += old_len - old_idx;
116     }
117 
118     if new_idx < new_len {
119         d.insert(
120             old_range.start + common_prefix_len + old_idx,
121             new_range.start + common_prefix_len + new_idx,
122             new_len - new_idx,
123         )?;
124     }
125 
126     if common_suffix_len > 0 {
127         d.equal(
128             old_range.start + old_len + common_prefix_len,
129             new_range.start + new_len + common_prefix_len,
130             common_suffix_len,
131         )?;
132     }
133 
134     d.finish()
135 }
136 
137 /// Shortcut for diffing slices.
138 #[deprecated(
139     since = "1.4.0",
140     note = "slice utility function is now only available via similar::algorithms::diff_slices"
141 )]
diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error> where D: DiffHook, T: PartialEq,142 pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
143 where
144     D: DiffHook,
145     T: PartialEq,
146 {
147     diff(d, old, 0..old.len(), new, 0..new.len())
148 }
149 
make_table<Old, New>( old: &Old, old_range: Range<usize>, new: &New, new_range: Range<usize>, deadline: Option<Instant>, ) -> Option<BTreeMap<(usize, usize), u32>> where Old: Index<usize> + ?Sized, New: Index<usize> + ?Sized, New::Output: PartialEq<Old::Output>,150 fn make_table<Old, New>(
151     old: &Old,
152     old_range: Range<usize>,
153     new: &New,
154     new_range: Range<usize>,
155     deadline: Option<Instant>,
156 ) -> Option<BTreeMap<(usize, usize), u32>>
157 where
158     Old: Index<usize> + ?Sized,
159     New: Index<usize> + ?Sized,
160     New::Output: PartialEq<Old::Output>,
161 {
162     let old_len = old_range.len();
163     let new_len = new_range.len();
164     let mut table = BTreeMap::new();
165 
166     for i in (0..new_len).rev() {
167         // are we running for too long?  give up on the table
168         if let Some(deadline) = deadline {
169             if Instant::now() > deadline {
170                 return None;
171             }
172         }
173 
174         for j in (0..old_len).rev() {
175             let val = if new[i] == old[j] {
176                 table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1
177             } else {
178                 table
179                     .get(&(i + 1, j))
180                     .map_or(0, |&x| x)
181                     .max(table.get(&(i, j + 1)).map_or(0, |&x| x))
182             };
183             if val > 0 {
184                 table.insert((i, j), val);
185             }
186         }
187     }
188 
189     Some(table)
190 }
191 
192 #[test]
test_table()193 fn test_table() {
194     let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap();
195     let expected = {
196         let mut m = BTreeMap::new();
197         m.insert((1, 0), 1);
198         m.insert((0, 0), 1);
199         m.insert((2, 0), 1);
200         m
201     };
202     assert_eq!(table, expected);
203 }
204 
205 #[test]
test_diff()206 fn test_diff() {
207     let a: &[usize] = &[0, 1, 2, 3, 4];
208     let b: &[usize] = &[0, 1, 2, 9, 4];
209 
210     let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
211     diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
212     insta::assert_debug_snapshot!(d.into_inner().ops());
213 }
214 
215 #[test]
test_contiguous()216 fn test_contiguous() {
217     let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
218     let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
219 
220     let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
221     diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
222     insta::assert_debug_snapshot!(d.into_inner().ops());
223 }
224 
225 #[test]
test_pat()226 fn test_pat() {
227     let a: &[usize] = &[0, 1, 3, 4, 5];
228     let b: &[usize] = &[0, 1, 4, 5, 8, 9];
229 
230     let mut d = crate::algorithms::Capture::new();
231     diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
232     insta::assert_debug_snapshot!(d.ops());
233 }
234