1 //! Text diffing utilities.
2 use std::borrow::Cow;
3 use std::cmp::Reverse;
4 use std::collections::BinaryHeap;
5 use std::time::{Duration, Instant};
6
7 mod abstraction;
8 #[cfg(feature = "inline")]
9 mod inline;
10 mod utils;
11
12 pub use self::abstraction::{DiffableStr, DiffableStrRef};
13 #[cfg(feature = "inline")]
14 pub use self::inline::InlineChange;
15
16 use self::utils::{upper_seq_ratio, QuickSeqRatio};
17 use crate::algorithms::IdentifyDistinct;
18 use crate::iter::{AllChangesIter, ChangesIter};
19 use crate::udiff::UnifiedDiff;
20 use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
21
22 #[derive(Debug, Clone, Copy)]
23 enum Deadline {
24 Absolute(Instant),
25 Relative(Duration),
26 }
27
28 impl Deadline {
into_instant(self) -> Instant29 fn into_instant(self) -> Instant {
30 match self {
31 Deadline::Absolute(instant) => instant,
32 Deadline::Relative(duration) => Instant::now() + duration,
33 }
34 }
35 }
36
37 /// A builder type config for more complex uses of [`TextDiff`].
38 ///
39 /// Requires the `text` feature.
40 #[derive(Clone, Debug)]
41 pub struct TextDiffConfig {
42 algorithm: Algorithm,
43 newline_terminated: Option<bool>,
44 deadline: Option<Deadline>,
45 }
46
47 impl Default for TextDiffConfig {
default() -> TextDiffConfig48 fn default() -> TextDiffConfig {
49 TextDiffConfig {
50 algorithm: Algorithm::default(),
51 newline_terminated: None,
52 deadline: None,
53 }
54 }
55 }
56
57 impl TextDiffConfig {
58 /// Changes the algorithm.
59 ///
60 /// The default algorithm is [`Algorithm::Myers`].
algorithm(&mut self, alg: Algorithm) -> &mut Self61 pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
62 self.algorithm = alg;
63 self
64 }
65
66 /// Sets a deadline for the diff operation.
67 ///
68 /// By default a diff will take as long as it takes. For certain diff
69 /// algorthms like Myer's and Patience a maximum running time can be
70 /// defined after which the algorithm gives up and approximates.
deadline(&mut self, deadline: Instant) -> &mut Self71 pub fn deadline(&mut self, deadline: Instant) -> &mut Self {
72 self.deadline = Some(Deadline::Absolute(deadline));
73 self
74 }
75
76 /// Sets a timeout for thediff operation.
77 ///
78 /// This is like [`deadline`](Self::deadline) but accepts a duration.
timeout(&mut self, timeout: Duration) -> &mut Self79 pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
80 self.deadline = Some(Deadline::Relative(timeout));
81 self
82 }
83
84 /// Changes the newline termination flag.
85 ///
86 /// The default is automatic based on input. This flag controls the
87 /// behavior of [`TextDiff::iter_changes`] and unified diff generation
88 /// with regards to newlines. When the flag is set to `false` (which
89 /// is the default) then newlines are added. Otherwise the newlines
90 /// from the source sequences are reused.
newline_terminated(&mut self, yes: bool) -> &mut Self91 pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
92 self.newline_terminated = Some(yes);
93 self
94 }
95
96 /// Creates a diff of lines.
97 ///
98 /// This splits the text `old` and `new` into lines preserving newlines
99 /// in the input. Line diffs are very common and because of that enjoy
100 /// special handling in similar. When a line diff is created with this
101 /// method the `newline_terminated` flag is flipped to `true` and will
102 /// influence the behavior of unified diff generation.
103 ///
104 /// ```rust
105 /// use similar::{TextDiff, ChangeTag};
106 ///
107 /// let diff = TextDiff::configure().diff_lines("a\nb\nc", "a\nb\nC");
108 /// let changes: Vec<_> = diff
109 /// .iter_all_changes()
110 /// .map(|x| (x.tag(), x.value()))
111 /// .collect();
112 ///
113 /// assert_eq!(changes, vec![
114 /// (ChangeTag::Equal, "a\n"),
115 /// (ChangeTag::Equal, "b\n"),
116 /// (ChangeTag::Delete, "c"),
117 /// (ChangeTag::Insert, "C"),
118 /// ]);
119 /// ```
diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>120 pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
121 &self,
122 old: &'old T,
123 new: &'new T,
124 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
125 self.diff(
126 Cow::Owned(old.as_diffable_str().tokenize_lines()),
127 Cow::Owned(new.as_diffable_str().tokenize_lines()),
128 true,
129 )
130 }
131
132 /// Creates a diff of words.
133 ///
134 /// This splits the text into words and whitespace.
135 ///
136 /// Note on word diffs: because the text differ will tokenize the strings
137 /// into small segments it can be inconvenient to work with the results
138 /// depending on the use case. You might also want to combine word level
139 /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
140 /// which lets you remap the diffs back to the original input strings.
141 ///
142 /// ```rust
143 /// use similar::{TextDiff, ChangeTag};
144 ///
145 /// let diff = TextDiff::configure().diff_words("foo bar baz", "foo BAR baz");
146 /// let changes: Vec<_> = diff
147 /// .iter_all_changes()
148 /// .map(|x| (x.tag(), x.value()))
149 /// .collect();
150 ///
151 /// assert_eq!(changes, vec![
152 /// (ChangeTag::Equal, "foo"),
153 /// (ChangeTag::Equal, " "),
154 /// (ChangeTag::Delete, "bar"),
155 /// (ChangeTag::Insert, "BAR"),
156 /// (ChangeTag::Equal, " "),
157 /// (ChangeTag::Equal, "baz"),
158 /// ]);
159 /// ```
diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>160 pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
161 &self,
162 old: &'old T,
163 new: &'new T,
164 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
165 self.diff(
166 Cow::Owned(old.as_diffable_str().tokenize_words()),
167 Cow::Owned(new.as_diffable_str().tokenize_words()),
168 false,
169 )
170 }
171
172 /// Creates a diff of characters.
173 ///
174 /// Note on character diffs: because the text differ will tokenize the strings
175 /// into small segments it can be inconvenient to work with the results
176 /// depending on the use case. You might also want to combine word level
177 /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
178 /// which lets you remap the diffs back to the original input strings.
179 ///
180 /// ```rust
181 /// use similar::{TextDiff, ChangeTag};
182 ///
183 /// let diff = TextDiff::configure().diff_chars("abcdef", "abcDDf");
184 /// let changes: Vec<_> = diff
185 /// .iter_all_changes()
186 /// .map(|x| (x.tag(), x.value()))
187 /// .collect();
188 ///
189 /// assert_eq!(changes, vec![
190 /// (ChangeTag::Equal, "a"),
191 /// (ChangeTag::Equal, "b"),
192 /// (ChangeTag::Equal, "c"),
193 /// (ChangeTag::Delete, "d"),
194 /// (ChangeTag::Delete, "e"),
195 /// (ChangeTag::Insert, "D"),
196 /// (ChangeTag::Insert, "D"),
197 /// (ChangeTag::Equal, "f"),
198 /// ]);
199 /// ```
diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>200 pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
201 &self,
202 old: &'old T,
203 new: &'new T,
204 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
205 self.diff(
206 Cow::Owned(old.as_diffable_str().tokenize_chars()),
207 Cow::Owned(new.as_diffable_str().tokenize_chars()),
208 false,
209 )
210 }
211
212 /// Creates a diff of unicode words.
213 ///
214 /// This splits the text into words according to unicode rules. This is
215 /// generally recommended over [`TextDiffConfig::diff_words`] but
216 /// requires a dependency.
217 ///
218 /// This requires the `unicode` feature.
219 ///
220 /// Note on word diffs: because the text differ will tokenize the strings
221 /// into small segments it can be inconvenient to work with the results
222 /// depending on the use case. You might also want to combine word level
223 /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
224 /// which lets you remap the diffs back to the original input strings.
225 ///
226 /// ```rust
227 /// use similar::{TextDiff, ChangeTag};
228 ///
229 /// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce");
230 /// let changes: Vec<_> = diff
231 /// .iter_all_changes()
232 /// .map(|x| (x.tag(), x.value()))
233 /// .collect();
234 ///
235 /// assert_eq!(changes, vec![
236 /// (ChangeTag::Equal, "ah"),
237 /// (ChangeTag::Equal, "("),
238 /// (ChangeTag::Delete, "be"),
239 /// (ChangeTag::Insert, "ah"),
240 /// (ChangeTag::Equal, ")"),
241 /// (ChangeTag::Equal, "ce"),
242 /// ]);
243 /// ```
244 #[cfg(feature = "unicode")]
diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>245 pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
246 &self,
247 old: &'old T,
248 new: &'new T,
249 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
250 self.diff(
251 Cow::Owned(old.as_diffable_str().tokenize_unicode_words()),
252 Cow::Owned(new.as_diffable_str().tokenize_unicode_words()),
253 false,
254 )
255 }
256
257 /// Creates a diff of graphemes.
258 ///
259 /// This requires the `unicode` feature.
260 ///
261 /// Note on grapheme diffs: because the text differ will tokenize the strings
262 /// into small segments it can be inconvenient to work with the results
263 /// depending on the use case. You might also want to combine word level
264 /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
265 /// which lets you remap the diffs back to the original input strings.
266 ///
267 /// ```rust
268 /// use similar::{TextDiff, ChangeTag};
269 ///
270 /// let diff = TextDiff::configure().diff_graphemes("", "❄️");
271 /// let changes: Vec<_> = diff
272 /// .iter_all_changes()
273 /// .map(|x| (x.tag(), x.value()))
274 /// .collect();
275 ///
276 /// assert_eq!(changes, vec![
277 /// (ChangeTag::Equal, ""),
278 /// (ChangeTag::Delete, ""),
279 /// (ChangeTag::Delete, ""),
280 /// (ChangeTag::Insert, ""),
281 /// (ChangeTag::Insert, "❄️"),
282 /// ]);
283 /// ```
284 #[cfg(feature = "unicode")]
diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>285 pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
286 &self,
287 old: &'old T,
288 new: &'new T,
289 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
290 self.diff(
291 Cow::Owned(old.as_diffable_str().tokenize_graphemes()),
292 Cow::Owned(new.as_diffable_str().tokenize_graphemes()),
293 false,
294 )
295 }
296
297 /// Creates a diff of arbitrary slices.
298 ///
299 /// ```rust
300 /// use similar::{TextDiff, ChangeTag};
301 ///
302 /// let old = &["foo", "bar", "baz"];
303 /// let new = &["foo", "BAR", "baz"];
304 /// let diff = TextDiff::configure().diff_slices(old, new);
305 /// let changes: Vec<_> = diff
306 /// .iter_all_changes()
307 /// .map(|x| (x.tag(), x.value()))
308 /// .collect();
309 ///
310 /// assert_eq!(changes, vec![
311 /// (ChangeTag::Equal, "foo"),
312 /// (ChangeTag::Delete, "bar"),
313 /// (ChangeTag::Insert, "BAR"),
314 /// (ChangeTag::Equal, "baz"),
315 /// ]);
316 /// ```
diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( &self, old: &'bufs [&'old T], new: &'bufs [&'new T], ) -> TextDiff<'old, 'new, 'bufs, T>317 pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
318 &self,
319 old: &'bufs [&'old T],
320 new: &'bufs [&'new T],
321 ) -> TextDiff<'old, 'new, 'bufs, T> {
322 self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
323 }
324
diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( &self, old: Cow<'bufs, [&'old T]>, new: Cow<'bufs, [&'new T]>, newline_terminated: bool, ) -> TextDiff<'old, 'new, 'bufs, T>325 fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
326 &self,
327 old: Cow<'bufs, [&'old T]>,
328 new: Cow<'bufs, [&'new T]>,
329 newline_terminated: bool,
330 ) -> TextDiff<'old, 'new, 'bufs, T> {
331 let deadline = self.deadline.map(|x| x.into_instant());
332 let ops = if old.len() > 100 || new.len() > 100 {
333 let ih = IdentifyDistinct::<u32>::new(&old[..], 0..old.len(), &new[..], 0..new.len());
334 capture_diff_deadline(
335 self.algorithm,
336 ih.old_lookup(),
337 ih.old_range(),
338 ih.new_lookup(),
339 ih.new_range(),
340 deadline,
341 )
342 } else {
343 capture_diff_deadline(
344 self.algorithm,
345 &old[..],
346 0..old.len(),
347 &new[..],
348 0..new.len(),
349 deadline,
350 )
351 };
352 TextDiff {
353 old,
354 new,
355 ops,
356 newline_terminated: self.newline_terminated.unwrap_or(newline_terminated),
357 algorithm: self.algorithm,
358 }
359 }
360 }
361
362 /// Captures diff op codes for textual diffs.
363 ///
364 /// The exact diff behavior is depending on the underlying [`DiffableStr`].
365 /// For instance diffs on bytes and strings are slightly different. You can
366 /// create a text diff from constructors such as [`TextDiff::from_lines`] or
367 /// the [`TextDiffConfig`] created by [`TextDiff::configure`].
368 ///
369 /// Requires the `text` feature.
370 pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
371 old: Cow<'bufs, [&'old T]>,
372 new: Cow<'bufs, [&'new T]>,
373 ops: Vec<DiffOp>,
374 newline_terminated: bool,
375 algorithm: Algorithm,
376 }
377
378 impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
379 /// Configures a text differ before diffing.
configure() -> TextDiffConfig380 pub fn configure() -> TextDiffConfig {
381 TextDiffConfig::default()
382 }
383
384 /// Creates a diff of lines.
385 ///
386 /// For more information see [`TextDiffConfig::diff_lines`].
from_lines<T: DiffableStrRef + ?Sized>( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>387 pub fn from_lines<T: DiffableStrRef + ?Sized>(
388 old: &'old T,
389 new: &'new T,
390 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
391 TextDiff::configure().diff_lines(old, new)
392 }
393
394 /// Creates a diff of words.
395 ///
396 /// For more information see [`TextDiffConfig::diff_words`].
from_words<T: DiffableStrRef + ?Sized>( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>397 pub fn from_words<T: DiffableStrRef + ?Sized>(
398 old: &'old T,
399 new: &'new T,
400 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
401 TextDiff::configure().diff_words(old, new)
402 }
403
404 /// Creates a diff of chars.
405 ///
406 /// For more information see [`TextDiffConfig::diff_chars`].
from_chars<T: DiffableStrRef + ?Sized>( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>407 pub fn from_chars<T: DiffableStrRef + ?Sized>(
408 old: &'old T,
409 new: &'new T,
410 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
411 TextDiff::configure().diff_chars(old, new)
412 }
413
414 /// Creates a diff of unicode words.
415 ///
416 /// For more information see [`TextDiffConfig::diff_unicode_words`].
417 ///
418 /// This requires the `unicode` feature.
419 #[cfg(feature = "unicode")]
from_unicode_words<T: DiffableStrRef + ?Sized>( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>420 pub fn from_unicode_words<T: DiffableStrRef + ?Sized>(
421 old: &'old T,
422 new: &'new T,
423 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
424 TextDiff::configure().diff_unicode_words(old, new)
425 }
426
427 /// Creates a diff of graphemes.
428 ///
429 /// For more information see [`TextDiffConfig::diff_graphemes`].
430 ///
431 /// This requires the `unicode` feature.
432 #[cfg(feature = "unicode")]
from_graphemes<T: DiffableStrRef + ?Sized>( old: &'old T, new: &'new T, ) -> TextDiff<'old, 'new, 'bufs, T::Output>433 pub fn from_graphemes<T: DiffableStrRef + ?Sized>(
434 old: &'old T,
435 new: &'new T,
436 ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
437 TextDiff::configure().diff_graphemes(old, new)
438 }
439 }
440
441 impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
442 /// Creates a diff of arbitrary slices.
443 ///
444 /// For more information see [`TextDiffConfig::diff_slices`].
from_slices( old: &'bufs [&'old T], new: &'bufs [&'new T], ) -> TextDiff<'old, 'new, 'bufs, T>445 pub fn from_slices(
446 old: &'bufs [&'old T],
447 new: &'bufs [&'new T],
448 ) -> TextDiff<'old, 'new, 'bufs, T> {
449 TextDiff::configure().diff_slices(old, new)
450 }
451
452 /// The name of the algorithm that created the diff.
algorithm(&self) -> Algorithm453 pub fn algorithm(&self) -> Algorithm {
454 self.algorithm
455 }
456
457 /// Returns `true` if items in the slice are newline terminated.
458 ///
459 /// This flag is used by the unified diff writer to determine if extra
460 /// newlines have to be added.
newline_terminated(&self) -> bool461 pub fn newline_terminated(&self) -> bool {
462 self.newline_terminated
463 }
464
465 /// Returns all old slices.
old_slices(&self) -> &[&'old T]466 pub fn old_slices(&self) -> &[&'old T] {
467 &self.old
468 }
469
470 /// Returns all new slices.
new_slices(&self) -> &[&'new T]471 pub fn new_slices(&self) -> &[&'new T] {
472 &self.new
473 }
474
475 /// Return a measure of the sequences' similarity in the range `0..=1`.
476 ///
477 /// A ratio of `1.0` means the two sequences are a complete match, a
478 /// ratio of `0.0` would indicate completely distinct sequences.
479 ///
480 /// ```rust
481 /// # use similar::TextDiff;
482 /// let diff = TextDiff::from_chars("abcd", "bcde");
483 /// assert_eq!(diff.ratio(), 0.75);
484 /// ```
ratio(&self) -> f32485 pub fn ratio(&self) -> f32 {
486 get_diff_ratio(self.ops(), self.old.len(), self.new.len())
487 }
488
489 /// Iterates over the changes the op expands to.
490 ///
491 /// This method is a convenient way to automatically resolve the different
492 /// ways in which a change could be encoded (insert/delete vs replace), look
493 /// up the value from the appropriate slice and also handle correct index
494 /// handling.
iter_changes<'x, 'slf>( &'slf self, op: &DiffOp, ) -> ChangesIter<'slf, [&'x T], [&'x T], &'x T> where 'x: 'slf, 'old: 'x, 'new: 'x,495 pub fn iter_changes<'x, 'slf>(
496 &'slf self,
497 op: &DiffOp,
498 ) -> ChangesIter<'slf, [&'x T], [&'x T], &'x T>
499 where
500 'x: 'slf,
501 'old: 'x,
502 'new: 'x,
503 {
504 op.iter_changes(self.old_slices(), self.new_slices())
505 }
506
507 /// Returns the captured diff ops.
ops(&self) -> &[DiffOp]508 pub fn ops(&self) -> &[DiffOp] {
509 &self.ops
510 }
511
512 /// Isolate change clusters by eliminating ranges with no changes.
513 ///
514 /// This is equivalent to calling [`group_diff_ops`] on [`TextDiff::ops`].
grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>>515 pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> {
516 group_diff_ops(self.ops().to_vec(), n)
517 }
518
519 /// Flattens out the diff into all changes.
520 ///
521 /// This is a shortcut for combining [`TextDiff::ops`] with
522 /// [`TextDiff::iter_changes`].
iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T> where 'x: 'slf + 'old + 'new, 'old: 'x, 'new: 'x,523 pub fn iter_all_changes<'x, 'slf>(&'slf self) -> AllChangesIter<'slf, 'x, T>
524 where
525 'x: 'slf + 'old + 'new,
526 'old: 'x,
527 'new: 'x,
528 {
529 AllChangesIter::new(&self.old[..], &self.new[..], self.ops())
530 }
531
532 /// Utility to return a unified diff formatter.
unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T>533 pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
534 UnifiedDiff::from_text_diff(self)
535 }
536
537 /// Iterates over the changes the op expands to with inline emphasis.
538 ///
539 /// This is very similar to [`TextDiff::iter_changes`] but it performs a second
540 /// level diff on adjacent line replacements. The exact behavior of
541 /// this function with regards to how it detects those inline changes
542 /// is currently not defined and will likely change over time.
543 ///
544 /// As of similar 1.2.0 the behavior of this function changes depending on
545 /// if the `unicode` feature is enabled or not. It will prefer unicode word
546 /// splitting over word splitting depending on the feature flag.
547 ///
548 /// Requires the `inline` feature.
549 #[cfg(feature = "inline")]
iter_inline_changes<'slf>( &'slf self, op: &DiffOp, ) -> impl Iterator<Item = InlineChange<'slf, T>> + '_ where 'slf: 'old + 'new,550 pub fn iter_inline_changes<'slf>(
551 &'slf self,
552 op: &DiffOp,
553 ) -> impl Iterator<Item = InlineChange<'slf, T>> + '_
554 where
555 'slf: 'old + 'new,
556 {
557 inline::iter_inline_changes(self, op)
558 }
559 }
560
561 /// Use the text differ to find `n` close matches.
562 ///
563 /// `cutoff` defines the threshold which needs to be reached for a word
564 /// to be considered similar. See [`TextDiff::ratio`] for more information.
565 ///
566 /// ```
567 /// # use similar::get_close_matches;
568 /// let matches = get_close_matches(
569 /// "appel",
570 /// &["ape", "apple", "peach", "puppy"][..],
571 /// 3,
572 /// 0.6
573 /// );
574 /// assert_eq!(matches, vec!["apple", "ape"]);
575 /// ```
576 ///
577 /// Requires the `text` feature.
get_close_matches<'a, T: DiffableStr + ?Sized>( word: &T, possibilities: &[&'a T], n: usize, cutoff: f32, ) -> Vec<&'a T>578 pub fn get_close_matches<'a, T: DiffableStr + ?Sized>(
579 word: &T,
580 possibilities: &[&'a T],
581 n: usize,
582 cutoff: f32,
583 ) -> Vec<&'a T> {
584 let mut matches = BinaryHeap::new();
585 let seq1 = word.tokenize_chars();
586 let quick_ratio = QuickSeqRatio::new(&seq1);
587
588 for &possibility in possibilities {
589 let seq2 = possibility.tokenize_chars();
590
591 if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
592 continue;
593 }
594
595 let diff = TextDiff::from_slices(&seq1, &seq2);
596 let ratio = diff.ratio();
597 if ratio >= cutoff {
598 // we're putting the word itself in reverse in so that matches with
599 // the same ratio are ordered lexicographically.
600 matches.push(((ratio * std::u32::MAX as f32) as u32, Reverse(possibility)));
601 }
602 }
603
604 let mut rv = vec![];
605 for _ in 0..n {
606 if let Some((_, elt)) = matches.pop() {
607 rv.push(elt.0);
608 } else {
609 break;
610 }
611 }
612
613 rv
614 }
615
616 #[test]
test_captured_ops()617 fn test_captured_ops() {
618 let diff = TextDiff::from_lines(
619 "Hello World\nsome stuff here\nsome more stuff here\n",
620 "Hello World\nsome amazing stuff here\nsome more stuff here\n",
621 );
622 insta::assert_debug_snapshot!(&diff.ops());
623 }
624
625 #[test]
test_captured_word_ops()626 fn test_captured_word_ops() {
627 let diff = TextDiff::from_words(
628 "Hello World\nsome stuff here\nsome more stuff here\n",
629 "Hello World\nsome amazing stuff here\nsome more stuff here\n",
630 );
631 let changes = diff
632 .ops()
633 .iter()
634 .flat_map(|op| diff.iter_changes(op))
635 .collect::<Vec<_>>();
636 insta::assert_debug_snapshot!(&changes);
637 }
638
639 #[test]
test_unified_diff()640 fn test_unified_diff() {
641 let diff = TextDiff::from_lines(
642 "Hello World\nsome stuff here\nsome more stuff here\n",
643 "Hello World\nsome amazing stuff here\nsome more stuff here\n",
644 );
645 assert_eq!(diff.newline_terminated(), true);
646 insta::assert_snapshot!(&diff
647 .unified_diff()
648 .context_radius(3)
649 .header("old", "new")
650 .to_string());
651 }
652
653 #[test]
test_line_ops()654 fn test_line_ops() {
655 let a = "Hello World\nsome stuff here\nsome more stuff here\n";
656 let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n";
657 let diff = TextDiff::from_lines(a, b);
658 assert_eq!(diff.newline_terminated(), true);
659 let changes = diff
660 .ops()
661 .iter()
662 .flat_map(|op| diff.iter_changes(op))
663 .collect::<Vec<_>>();
664 insta::assert_debug_snapshot!(&changes);
665
666 #[cfg(feature = "bytes")]
667 {
668 let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes());
669 let byte_changes = byte_diff
670 .ops()
671 .iter()
672 .flat_map(|op| byte_diff.iter_changes(op))
673 .collect::<Vec<_>>();
674 for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
675 assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy());
676 }
677 }
678 }
679
680 #[test]
test_virtual_newlines()681 fn test_virtual_newlines() {
682 let diff = TextDiff::from_lines("a\nb", "a\nc\n");
683 assert_eq!(diff.newline_terminated(), true);
684 let changes = diff
685 .ops()
686 .iter()
687 .flat_map(|op| diff.iter_changes(op))
688 .collect::<Vec<_>>();
689 insta::assert_debug_snapshot!(&changes);
690 }
691
692 #[test]
test_char_diff()693 fn test_char_diff() {
694 let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
695 insta::assert_debug_snapshot!(diff.ops());
696
697 #[cfg(feature = "bytes")]
698 {
699 let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes());
700 assert_eq!(diff.ops(), byte_diff.ops());
701 }
702 }
703
704 #[test]
test_ratio()705 fn test_ratio() {
706 let diff = TextDiff::from_chars("abcd", "bcde");
707 assert_eq!(diff.ratio(), 0.75);
708 let diff = TextDiff::from_chars("", "");
709 assert_eq!(diff.ratio(), 1.0);
710 }
711
712 #[test]
test_get_close_matches()713 fn test_get_close_matches() {
714 let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6);
715 assert_eq!(matches, vec!["apple", "ape"]);
716 let matches = get_close_matches(
717 "hulo",
718 &[
719 "hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo",
720 ][..],
721 5,
722 0.7,
723 );
724 assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]);
725 }
726
727 #[test]
test_lifetimes_on_iter()728 fn test_lifetimes_on_iter() {
729 use crate::Change;
730
731 fn diff_lines<'x, T>(old: &'x T, new: &'x T) -> Vec<Change<&'x T::Output>>
732 where
733 T: DiffableStrRef + ?Sized,
734 {
735 TextDiff::from_lines(old, new).iter_all_changes().collect()
736 }
737
738 let a = "1\n2\n3\n".to_string();
739 let b = "1\n99\n3\n".to_string();
740 let changes = diff_lines(&a, &b);
741 insta::assert_debug_snapshot!(&changes);
742 }
743
744 #[test]
745 #[cfg(feature = "serde")]
test_serde()746 fn test_serde() {
747 let diff = TextDiff::from_lines(
748 "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
749 "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
750 );
751 let changes = diff
752 .ops()
753 .iter()
754 .flat_map(|op| diff.iter_changes(op))
755 .collect::<Vec<_>>();
756 let json = serde_json::to_string_pretty(&changes).unwrap();
757 insta::assert_snapshot!(&json);
758 }
759
760 #[test]
761 #[cfg(feature = "serde")]
test_serde_ops()762 fn test_serde_ops() {
763 let diff = TextDiff::from_lines(
764 "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
765 "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
766 );
767 let changes = diff.ops();
768 let json = serde_json::to_string_pretty(&changes).unwrap();
769 insta::assert_snapshot!(&json);
770 }
771