1 use sequencematcher::SequenceMatcher;
2 use std::cmp;
3 use utils::{count_leading, str_with_similar_chars};
4 
5 #[derive(Default)]
6 pub struct Differ {
7     pub line_junk: Option<fn(&&str) -> bool>,
8     pub char_junk: Option<fn(&char) -> bool>,
9 }
10 
11 impl Differ {
new() -> Differ12     pub fn new() -> Differ {
13         Differ {
14             line_junk: None,
15             char_junk: None,
16         }
17     }
18 
compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec<String>19     pub fn compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec<String> {
20         let mut matcher = SequenceMatcher::new(first_sequence, second_sequence);
21         matcher.set_is_junk(self.line_junk);
22         let mut res = Vec::new();
23         for opcode in matcher.get_opcodes() {
24             let mut gen = Vec::new();
25             match opcode.tag.as_ref() {
26                 "replace" => {
27                     gen = self.fancy_replace(
28                         first_sequence,
29                         opcode.first_start,
30                         opcode.first_end,
31                         second_sequence,
32                         opcode.second_start,
33                         opcode.second_end,
34                     )
35                 }
36                 "delete" => {
37                     gen = self.dump("-", first_sequence, opcode.first_start, opcode.first_end)
38                 }
39                 "insert" => {
40                     gen = self.dump("+", second_sequence, opcode.second_start, opcode.second_end)
41                 }
42                 "equal" => {
43                     gen = self.dump(" ", first_sequence, opcode.first_start, opcode.first_end)
44                 }
45                 _ => {}
46             }
47             for i in gen {
48                 res.push(i);
49             }
50         }
51         res
52     }
53 
dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec<String>54     fn dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec<String> {
55         let mut res = Vec::new();
56         for i in start..end {
57             if let Some(s) = sequence.get(i) {
58                 res.push(format!("{} {}", tag, s))
59             }
60         }
61         res
62     }
63 
plain_replace( &self, first_sequence: &[&str], first_start: usize, first_end: usize, second_sequence: &[&str], second_start: usize, second_end: usize, ) -> Vec<String>64     fn plain_replace(
65         &self,
66         first_sequence: &[&str],
67         first_start: usize,
68         first_end: usize,
69         second_sequence: &[&str],
70         second_start: usize,
71         second_end: usize,
72     ) -> Vec<String> {
73         if !(first_start < first_end && second_start < second_end) {
74             return Vec::new();
75         }
76         let (mut first, second) = if second_end - second_start < first_end - first_start {
77             (
78                 self.dump("+", second_sequence, second_start, second_end),
79                 self.dump("-", first_sequence, first_start, first_end),
80             )
81         } else {
82             (
83                 self.dump("-", first_sequence, first_start, first_end),
84                 self.dump("+", second_sequence, second_start, second_end),
85             )
86         };
87         for s in second {
88             first.push(s);
89         }
90         first
91     }
92 
fancy_replace( &self, first_sequence: &[&str], first_start: usize, first_end: usize, second_sequence: &[&str], second_start: usize, second_end: usize, ) -> Vec<String>93     fn fancy_replace(
94         &self,
95         first_sequence: &[&str],
96         first_start: usize,
97         first_end: usize,
98         second_sequence: &[&str],
99         second_start: usize,
100         second_end: usize,
101     ) -> Vec<String> {
102         let mut res = Vec::new();
103         let (mut best_ratio, cutoff) = (0.74, 0.75);
104         let (mut best_i, mut best_j) = (0, 0);
105         let mut eqi: Option<usize> = None;
106         let mut eqj: Option<usize> = None;
107         for (j, second_sequence_str) in second_sequence
108             .iter()
109             .enumerate()
110             .take(second_end)
111             .skip(second_start)
112         {
113             for (i, first_sequence_str) in first_sequence
114                 .iter()
115                 .enumerate()
116                 .take(second_end)
117                 .skip(second_start)
118             {
119                 if first_sequence_str == second_sequence_str {
120                     if eqi.is_none() {
121                         eqi = Some(i);
122                         eqj = Some(j);
123                     }
124                     continue;
125                 }
126                 let (first_sequence_chars, second_sequence_chars) = (
127                     first_sequence_str.chars().collect::<Vec<char>>(),
128                     second_sequence_str.chars().collect::<Vec<char>>(),
129                 );
130                 let mut cruncher =
131                     SequenceMatcher::new(&first_sequence_chars, &second_sequence_chars);
132                 cruncher.set_is_junk(self.char_junk);
133                 if cruncher.ratio() > best_ratio {
134                     best_ratio = cruncher.ratio();
135                     best_i = i;
136                     best_j = j;
137                 }
138             }
139         }
140         if best_ratio < cutoff {
141             if eqi.is_none() {
142                 res.extend(
143                     self.plain_replace(
144                         first_sequence,
145                         first_start,
146                         first_end,
147                         second_sequence,
148                         second_start,
149                         second_end,
150                     ).iter()
151                         .cloned(),
152                 );
153                 return res;
154             }
155             best_i = eqi.unwrap();
156             best_j = eqj.unwrap();
157         } else {
158             eqi = None;
159         }
160         res.extend(
161             self.fancy_helper(
162                 first_sequence,
163                 first_start,
164                 best_i,
165                 second_sequence,
166                 second_start,
167                 best_j,
168             ).iter()
169                 .cloned(),
170         );
171         let first_element = &first_sequence[best_i];
172         let second_element = &second_sequence[best_j];
173         if eqi.is_none() {
174             let (mut first_tag, mut second_tag) = (String::new(), String::new());
175             let first_element_chars: Vec<char> = first_element.chars().collect();
176             let second_element_chars: Vec<char> = second_element.chars().collect();
177             let mut cruncher = SequenceMatcher::new(&first_element_chars, &second_element_chars);
178             cruncher.set_is_junk(self.char_junk);
179             for opcode in &cruncher.get_opcodes() {
180                 let (first_length, second_length) = (
181                     opcode.first_end - opcode.first_start,
182                     opcode.second_end - opcode.second_start,
183                 );
184                 match opcode.tag.as_ref() {
185                     "replace" => {
186                         first_tag.push_str(&str_with_similar_chars('^', first_length));
187                         second_tag.push_str(&str_with_similar_chars('^', second_length));
188                     }
189                     "delete" => {
190                         first_tag.push_str(&str_with_similar_chars('-', first_length));
191                     }
192                     "insert" => {
193                         second_tag.push_str(&str_with_similar_chars('+', second_length));
194                     }
195                     "equal" => {
196                         first_tag.push_str(&str_with_similar_chars(' ', first_length));
197                         second_tag.push_str(&str_with_similar_chars(' ', second_length));
198                     }
199                     _ => {}
200                 }
201             }
202             res.extend(
203                 self.qformat(&first_element, &second_element, &first_tag, &second_tag)
204                     .iter()
205                     .cloned(),
206             );
207         } else {
208             let mut s = String::from("  ");
209             s.push_str(&first_element);
210             res.push(s);
211         }
212         res.extend(
213             self.fancy_helper(
214                 first_sequence,
215                 best_i + 1,
216                 first_end,
217                 second_sequence,
218                 best_j + 1,
219                 second_end,
220             ).iter()
221                 .cloned(),
222         );
223         res
224     }
225 
fancy_helper( &self, first_sequence: &[&str], first_start: usize, first_end: usize, second_sequence: &[&str], second_start: usize, second_end: usize, ) -> Vec<String>226     fn fancy_helper(
227         &self,
228         first_sequence: &[&str],
229         first_start: usize,
230         first_end: usize,
231         second_sequence: &[&str],
232         second_start: usize,
233         second_end: usize,
234     ) -> Vec<String> {
235         let mut res = Vec::new();
236         if first_start < first_end {
237             if second_start < second_end {
238                 res = self.fancy_replace(
239                     first_sequence,
240                     first_start,
241                     first_end,
242                     second_sequence,
243                     second_start,
244                     second_end,
245                 );
246             } else {
247                 res = self.dump("-", first_sequence, first_start, first_end);
248             }
249         } else if second_start < second_end {
250             res = self.dump("+", second_sequence, second_start, second_end);
251         }
252         res
253     }
254 
qformat( &self, first_line: &str, second_line: &str, first_tags: &str, second_tags: &str, ) -> Vec<String>255     fn qformat(
256         &self,
257         first_line: &str,
258         second_line: &str,
259         first_tags: &str,
260         second_tags: &str,
261     ) -> Vec<String> {
262         let mut res = Vec::new();
263         let mut first_tags = first_tags;
264         let mut second_tags = second_tags;
265         let mut common = cmp::min(
266             count_leading(first_line, '\t'),
267             count_leading(second_line, '\t'),
268         );
269         common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
270         common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
271         first_tags = first_tags.split_at(common).1.trim_right();
272         second_tags = second_tags.split_at(common).1.trim_right();
273         let mut s = format!("- {}", first_line);
274         res.push(s);
275         if first_tags != "" {
276             s = format!("? {}{}\n", str_with_similar_chars('\t', common), first_tags);
277             res.push(s);
278         }
279         s = format!("+ {}", second_line);
280         res.push(s);
281         if second_tags != "" {
282             s = format!(
283                 "? {}{}\n",
284                 str_with_similar_chars('\t', common),
285                 second_tags
286             );
287             res.push(s);
288         }
289         res
290     }
291 
restore(delta: &[String], which: usize) -> Vec<String>292     pub fn restore(delta: &[String], which: usize) -> Vec<String> {
293         if !(which == 1 || which == 2) {
294             panic!("Second parameter must be 1 or 2");
295         }
296         let mut res = Vec::new();
297         let tag = if which == 1 { "- " } else { "+ " }.to_string();
298         let prefixes = vec![tag, "  ".to_string()];
299         for line in delta {
300             for prefix in &prefixes {
301                 if line.starts_with(prefix) {
302                     res.push(line.split_at(2).1.to_string());
303                 }
304             }
305         }
306         res
307     }
308 }
309 
310 #[test]
test_fancy_replace()311 fn test_fancy_replace() {
312     let differ = Differ::new();
313     let result = differ
314         .fancy_replace(&vec!["abcDefghiJkl\n"], 0, 1, &vec!["abcdefGhijkl\n"], 0, 1)
315         .join("");
316     assert_eq!(
317         result,
318         "- abcDefghiJkl\n?    ^  ^  ^\n+ abcdefGhijkl\n?    ^  ^  ^\n"
319     );
320 }
321 
322 #[test]
test_qformat()323 fn test_qformat() {
324     let differ = Differ::new();
325     let result = differ.qformat(
326         "\tabcDefghiJkl\n",
327         "\tabcdefGhijkl\n",
328         "  ^ ^  ^      ",
329         "  ^ ^  ^      ",
330     );
331     assert_eq!(
332         result,
333         vec![
334             "- \tabcDefghiJkl\n",
335             "? \t ^ ^  ^\n",
336             "+ \tabcdefGhijkl\n",
337             "? \t ^ ^  ^\n",
338         ]
339     );
340 }
341