dissimilar-1.0.3/src/lib.rs

//! [![github]](https://github.com/dtolnay/dissimilar)&ensp;[![crates-io]](https://crates.io/crates/dissimilar)&ensp;[![docs-rs]](https://docs.rs/dissimilar)
//!
//! [github]: https://img.shields.io/badge/github-8da0cb?style=for-the-badge&labelColor=555555&logo=github
//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust
//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logoColor=white&logo=data:image/svg+xml;base64,PHN2ZyByb2xlPSJpbWciIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgdmlld0JveD0iMCAwIDUxMiA1MTIiPjxwYXRoIGZpbGw9IiNmNWY1ZjUiIGQ9Ik00ODguNiAyNTAuMkwzOTIgMjE0VjEwNS41YzAtMTUtOS4zLTI4LjQtMjMuNC0zMy43bC0xMDAtMzcuNWMtOC4xLTMuMS0xNy4xLTMuMS0yNS4zIDBsLTEwMCAzNy41Yy0xNC4xIDUuMy0yMy40IDE4LjctMjMuNCAzMy43VjIxNGwtOTYuNiAzNi4yQzkuMyAyNTUuNSAwIDI2OC45IDAgMjgzLjlWMzk0YzAgMTMuNiA3LjcgMjYuMSAxOS45IDMyLjJsMTAwIDUwYzEwLjEgNS4xIDIyLjEgNS4xIDMyLjIgMGwxMDMuOS01MiAxMDMuOSA1MmMxMC4xIDUuMSAyMi4xIDUuMSAzMi4yIDBsMTAwLTUwYzEyLjItNi4xIDE5LjktMTguNiAxOS45LTMyLjJWMjgzLjljMC0xNS05LjMtMjguNC0yMy40LTMzLjd6TTM1OCAyMTQuOGwtODUgMzEuOXYtNjguMmw4NS0zN3Y3My4zek0xNTQgMTA0LjFsMTAyLTM4LjIgMTAyIDM4LjJ2LjZsLTEwMiA0MS40LTEwMi00MS40di0uNnptODQgMjkxLjFsLTg1IDQyLjV2LTc5LjFsODUtMzguOHY3NS40em0wLTExMmwtMTAyIDQxLjQtMTAyLTQxLjR2LS42bDEwMi0zOC4yIDEwMiAzOC4ydi42em0yNDAgMTEybC04NSA0Mi41di03OS4xbDg1LTM4Ljh2NzUuNHptMC0xMTJsLTEwMiA0MS40LTEwMi00MS40di0uNmwxMDItMzguMiAxMDIgMzguMnYuNnoiPjwvcGF0aD48L3N2Zz4K
//!
//! <br>
//!
//! ## Diff library with semantic cleanup, based on Google's diff-match-patch
//!
//! This library is a port of the Diff component of [Diff Match Patch] to Rust.
//! The diff implementation is based on [Myers' diff algorithm] but includes
//! some [semantic cleanups] to increase human readability by factoring out
//! commonalities which are likely to be coincidental.
//!
//! Diff Match Patch was originally built in 2006 to power Google Docs.
//!
//! # Interface
//!
//! Here is the entire API of the Rust implementation. It operates on borrowed
//! strings and the return value of the diff algorithm is a vector of chunks
//! pointing into slices of those input strings.
//!
//! ```
//! pub enum Chunk<'a> {
//!     Equal(&'a str),
//!     Delete(&'a str),
//!     Insert(&'a str),
//! }
//!
//! # const IGNORE: &str = stringify! {
//! pub fn diff(text1: &str, text2: &str) -> Vec<Chunk>;
//! # };
//! ```
//!
//! [Diff Match Patch]: https://github.com/google/diff-match-patch
//! [Myers' diff algorithm]: https://neil.fraser.name/writing/diff/myers.pdf
//! [semantic cleanups]: https://neil.fraser.name/writing/diff/

#![doc(html_root_url = "https://docs.rs/dissimilar/1.0.3")]
#![allow(
    clippy::blocks_in_if_conditions,
    clippy::cast_possible_wrap,
    clippy::cast_sign_loss,
    clippy::cloned_instead_of_copied, // https://github.com/rust-lang/rust-clippy/issues/7127
    clippy::collapsible_else_if,
    clippy::comparison_chain,
    clippy::match_same_arms,
    clippy::module_name_repetitions,
    clippy::must_use_candidate,
    clippy::new_without_default,
    clippy::shadow_unrelated,
    clippy::similar_names,
    clippy::too_many_lines,
    clippy::unseparated_literal_suffix
)]

mod find;
mod range;

#[cfg(test)]
mod tests;

use crate::range::{bytes, str, Range};
use std::cmp;
use std::collections::VecDeque;
use std::fmt::{self, Debug};

#[derive(Copy, Clone, PartialEq, Eq)]
pub enum Chunk<'a> {
    Equal(&'a str),
    Delete(&'a str),
    Insert(&'a str),
}

#[derive(Copy, Clone)]
enum Diff<'a, 'b> {
    Equal(Range<'a>, Range<'b>),
    Delete(Range<'a>),
    Insert(Range<'b>),
}

impl<'tmp, 'a: 'tmp, 'b: 'tmp> Diff<'a, 'b> {
    fn text(&self) -> Range<'tmp> {
        match *self {
            Diff::Equal(range, _) | Diff::Delete(range) | Diff::Insert(range) => range,
        }
    }

    fn grow_left(&mut self, increment: usize) {
        self.for_each(|range| {
            range.offset -= increment;
            range.len += increment;
        });
    }

    fn grow_right(&mut self, increment: usize) {
        self.for_each(|range| range.len += increment);
    }

    fn shift_left(&mut self, increment: usize) {
        self.for_each(|range| range.offset -= increment);
    }

    fn shift_right(&mut self, increment: usize) {
        self.for_each(|range| range.offset += increment);
    }

    fn for_each(&mut self, f: impl Fn(&mut Range)) {
        match self {
            Diff::Equal(range1, range2) => {
                f(range1);
                f(range2);
            }
            Diff::Delete(range) => f(range),
            Diff::Insert(range) => f(range),
        }
    }
}

pub fn diff<'a>(text1: &'a str, text2: &'a str) -> Vec<Chunk<'a>> {
    let text1 = Range::new(text1, ..);
    let text2 = Range::new(text2, ..);
    let mut solution = main(text1, text2);
    cleanup_char_boundary(&mut solution);
    cleanup_semantic(&mut solution);
    cleanup_merge(&mut solution);
    solution.diffs.into_iter().map(Chunk::from).collect()
}

struct Solution<'a, 'b> {
    text1: Range<'a>,
    text2: Range<'b>,
    diffs: Vec<Diff<'a, 'b>>,
    utf8: bool,
}

fn main<'a, 'b>(mut text1: Range<'a>, mut text2: Range<'b>) -> Solution<'a, 'b> {
    let whole1 = text1;
    let whole2 = text2;

    // Trim off common prefix.
    let common_prefix_len = common_prefix_bytes(text1, text2);
    let common_prefix = Diff::Equal(
        text1.substring(..common_prefix_len),
        text2.substring(..common_prefix_len),
    );
    text1 = text1.substring(common_prefix_len..);
    text2 = text2.substring(common_prefix_len..);

    // Trim off common suffix.
    let common_suffix_len = common_suffix_bytes(text1, text2);
    let common_suffix = Diff::Equal(
        text1.substring(text1.len - common_suffix_len..),
        text2.substring(text2.len - common_suffix_len..),
    );
    text1 = text1.substring(..text1.len - common_suffix_len);
    text2 = text2.substring(..text2.len - common_suffix_len);

    // Compute the diff on the middle block.
    let mut solution = Solution {
        text1: whole1,
        text2: whole2,
        diffs: compute(text1, text2),
        utf8: false,
    };

    // Restore the prefix and suffix.
    if common_prefix_len > 0 {
        solution.diffs.insert(0, common_prefix);
    }
    if common_suffix_len > 0 {
        solution.diffs.push(common_suffix);
    }

    cleanup_merge(&mut solution);

    solution
}

// Find the differences between two texts. Assumes that the texts do not have
// any common prefix or suffix.
fn compute<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
    match (text1.is_empty(), text2.is_empty()) {
        (true, true) => return Vec::new(),
        (true, false) => return vec![Diff::Insert(text2)],
        (false, true) => return vec![Diff::Delete(text1)],
        (false, false) => {}
    }

    // Check for entire shorter text inside the longer text.
    if text1.len > text2.len {
        if let Some(i) = text1.find(text2) {
            return vec![
                Diff::Delete(text1.substring(..i)),
                Diff::Equal(text1.substring(i..i + text2.len), text2),
                Diff::Delete(text1.substring(i + text2.len..)),
            ];
        }
    } else {
        if let Some(i) = text2.find(text1) {
            return vec![
                Diff::Insert(text2.substring(..i)),
                Diff::Equal(text1, text2.substring(i..i + text1.len)),
                Diff::Insert(text2.substring(i + text1.len..)),
            ];
        }
    }

    if text1.len == 1 || text2.len == 1 {
        // Single character string.
        // After the previous check, the character can't be an equality.
        return vec![Diff::Delete(text1), Diff::Insert(text2)];
    }

    bisect(text1, text2)
}

// Find the 'middle snake' of a diff, split the problem in two and return the
// recursively constructed diff.
//
// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
fn bisect<'a, 'b>(text1: Range<'a>, text2: Range<'b>) -> Vec<Diff<'a, 'b>> {
    let max_d = (text1.len + text2.len + 1) / 2;
    let v_offset = max_d;
    let v_len = 2 * max_d;
    let mut v1 = vec![-1isize; v_len];
    let mut v2 = vec![-1isize; v_len];
    v1[v_offset + 1] = 0;
    v2[v_offset + 1] = 0;
    let delta = text1.len as isize - text2.len as isize;
    // If the total number of characters is odd, then the front path will
    // collide with the reverse path.
    let front = delta % 2 != 0;
    // Offsets for start and end of k loop.
    // Prevents mapping of space beyond the grid.
    let mut k1start = 0;
    let mut k1end = 0;
    let mut k2start = 0;
    let mut k2end = 0;
    for d in 0..max_d as isize {
        // Walk the front path one step.
        let mut k1 = -d + k1start;
        while k1 <= d - k1end {
            let k1_offset = (v_offset as isize + k1) as usize;
            let mut x1 = if k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1]) {
                v1[k1_offset + 1]
            } else {
                v1[k1_offset - 1] + 1
            } as usize;
            let mut y1 = (x1 as isize - k1) as usize;
            if let (Some(s1), Some(s2)) = (text1.get(x1..), text2.get(y1..)) {
                let advance = common_prefix_bytes(s1, s2);
                x1 += advance;
                y1 += advance;
            }
            v1[k1_offset] = x1 as isize;
            if x1 > text1.len {
                // Ran off the right of the graph.
                k1end += 2;
            } else if y1 > text2.len {
                // Ran off the bottom of the graph.
                k1start += 2;
            } else if front {
                let k2_offset = v_offset as isize + delta - k1;
                if k2_offset >= 0 && k2_offset < v_len as isize && v2[k2_offset as usize] != -1 {
                    // Mirror x2 onto top-left coordinate system.
                    let x2 = text1.len as isize - v2[k2_offset as usize];
                    if x1 as isize >= x2 {
                        // Overlap detected.
                        return bisect_split(text1, text2, x1, y1);
                    }
                }
            }
            k1 += 2;
        }

        // Walk the reverse path one step.
        let mut k2 = -d + k2start;
        while k2 <= d - k2end {
            let k2_offset = (v_offset as isize + k2) as usize;
            let mut x2 = if k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1]) {
                v2[k2_offset + 1]
            } else {
                v2[k2_offset - 1] + 1
            } as usize;
            let mut y2 = (x2 as isize - k2) as usize;
            if x2 < text1.len && y2 < text2.len {
                let advance = common_suffix_bytes(
                    text1.substring(..text1.len - x2),
                    text2.substring(..text2.len - y2),
                );
                x2 += advance;
                y2 += advance;
            }
            v2[k2_offset] = x2 as isize;
            if x2 > text1.len {
                // Ran off the left of the graph.
                k2end += 2;
            } else if y2 > text2.len {
                // Ran off the top of the graph.
                k2start += 2;
            } else if !front {
                let k1_offset = v_offset as isize + delta - k2;
                if k1_offset >= 0 && k1_offset < v_len as isize && v1[k1_offset as usize] != -1 {
                    let x1 = v1[k1_offset as usize] as usize;
                    let y1 = v_offset + x1 - k1_offset as usize;
                    // Mirror x2 onto top-left coordinate system.
                    x2 = text1.len - x2;
                    if x1 >= x2 {
                        // Overlap detected.
                        return bisect_split(text1, text2, x1, y1);
                    }
                }
            }
            k2 += 2;
        }
    }
    // Number of diffs equals number of characters, no commonality at all.
    vec![Diff::Delete(text1), Diff::Insert(text2)]
}

// Given the location of the 'middle snake', split the diff in two parts and
// recurse.
fn bisect_split<'a, 'b>(
    text1: Range<'a>,
    text2: Range<'b>,
    x: usize,
    y: usize,
) -> Vec<Diff<'a, 'b>> {
    let (text1a, text1b) = text1.split_at(x);
    let (text2a, text2b) = text2.split_at(y);

    // Compute both diffs serially.
    let mut diffs = main(text1a, text2a).diffs;
    diffs.extend(main(text1b, text2b).diffs);

    diffs
}

// Determine the length of the common prefix of two strings.
fn common_prefix(text1: Range, text2: Range) -> usize {
    for ((i, ch1), ch2) in text1.char_indices().zip(text2.chars()) {
        if ch1 != ch2 {
            return i;
        }
    }
    cmp::min(text1.len, text2.len)
}

// Determine the length of the common suffix of two strings.
fn common_suffix(text1: Range, text2: Range) -> usize {
    for ((i, ch1), ch2) in text1.char_indices().rev().zip(text2.chars().rev()) {
        if ch1 != ch2 {
            return text1.len - i - ch1.len_utf8();
        }
    }
    cmp::min(text1.len, text2.len)
}

fn common_prefix_bytes(text1: Range, text2: Range) -> usize {
    for (i, (b1, b2)) in text1.bytes().zip(text2.bytes()).enumerate() {
        if b1 != b2 {
            return i;
        }
    }
    cmp::min(text1.len, text2.len)
}

fn common_suffix_bytes(text1: Range, text2: Range) -> usize {
    for (i, (b1, b2)) in text1.bytes().rev().zip(text2.bytes().rev()).enumerate() {
        if b1 != b2 {
            return i;
        }
    }
    cmp::min(text1.len, text2.len)
}

// Determine if the suffix of one string is the prefix of another.
//
// Returns the number of characters common to the end of the first string and
// the start of the second string.
fn common_overlap(mut text1: Range, mut text2: Range) -> usize {
    // Eliminate the null case.
    if text1.is_empty() || text2.is_empty() {
        return 0;
    }
    // Truncate the longer string.
    if text1.len > text2.len {
        text1 = text1.substring(text1.len - text2.len..);
    } else if text1.len < text2.len {
        text2 = text2.substring(..text1.len);
    }
    // Quick check for the worst case.
    if bytes(text1) == bytes(text2) {
        return text1.len;
    }

    // Start by looking for a single character match
    // and increase length until no match is found.
    // Performance analysis: https://neil.fraser.name/news/2010/11/04/
    let mut best = 0;
    let mut length = 1;
    loop {
        let pattern = text1.substring(text1.len - length..);
        let found = match text2.find(pattern) {
            Some(found) => found,
            None => return best,
        };
        length += found;
        if found == 0
            || bytes(text1.substring(text1.len - length..)) == bytes(text2.substring(..length))
        {
            best = length;
            length += 1;
        }
    }
}

fn cleanup_char_boundary(solution: &mut Solution) {
    fn boundary_down(doc: &str, pos: usize) -> usize {
        let mut adjust = 0;
        while !doc.is_char_boundary(pos - adjust) {
            adjust += 1;
        }
        adjust
    }

    fn boundary_up(doc: &str, pos: usize) -> usize {
        let mut adjust = 0;
        while !doc.is_char_boundary(pos + adjust) {
            adjust += 1;
        }
        adjust
    }

    fn skip_overlap<'a>(prev: &Range<'a>, range: &mut Range<'a>) {
        let prev_end = prev.offset + prev.len;
        if prev_end > range.offset {
            let delta = cmp::min(prev_end - range.offset, range.len);
            range.offset += delta;
            range.len -= delta;
        }
    }

    let mut read = 0;
    let mut retain = 0;
    let mut last_delete = Range::empty();
    let mut last_insert = Range::empty();
    while let Some(&(mut diff)) = solution.diffs.get(read) {
        read += 1;
        match &mut diff {
            Diff::Equal(range1, range2) => {
                let adjust = boundary_up(range1.doc, range1.offset);
                // If the whole range is sub-character, skip it.
                if range1.len <= adjust {
                    continue;
                }
                range1.offset += adjust;
                range1.len -= adjust;
                range2.offset += adjust;
                range2.len -= adjust;
                let adjust = boundary_down(range1.doc, range1.offset + range1.len);
                range1.len -= adjust;
                range2.len -= adjust;
                last_delete = Range::empty();
                last_insert = Range::empty();
            }
            Diff::Delete(range) => {
                skip_overlap(&last_delete, range);
                if range.len == 0 {
                    continue;
                }
                let adjust = boundary_down(range.doc, range.offset);
                range.offset -= adjust;
                range.len += adjust;
                let adjust = boundary_up(range.doc, range.offset + range.len);
                range.len += adjust;
                last_delete = *range;
            }
            Diff::Insert(range) => {
                skip_overlap(&last_insert, range);
                if range.len == 0 {
                    continue;
                }
                let adjust = boundary_down(range.doc, range.offset);
                range.offset -= adjust;
                range.len += adjust;
                let adjust = boundary_up(range.doc, range.offset + range.len);
                range.len += adjust;
                last_insert = *range;
            }
        }
        solution.diffs[retain] = diff;
        retain += 1;
    }

    solution.diffs.truncate(retain);
    solution.utf8 = true;
}

// Reduce the number of edits by eliminating semantically trivial equalities.
fn cleanup_semantic(solution: &mut Solution) {
    let mut diffs = &mut solution.diffs;
    if diffs.is_empty() {
        return;
    }

    let mut changes = false;
    let mut equalities = VecDeque::new(); // Double-ended queue of equalities.
    let mut last_equality = None; // Always equal to equalities.peek().text
    let mut pointer = 0;
    // Number of characters that changed prior to the equality.
    let mut len_insertions1 = 0;
    let mut len_deletions1 = 0;
    // Number of characters that changed after the equality.
    let mut len_insertions2 = 0;
    let mut len_deletions2 = 0;
    while let Some(&this_diff) = diffs.get(pointer) {
        match this_diff {
            Diff::Equal(text1, text2) => {
                equalities.push_back(pointer);
                len_insertions1 = len_insertions2;
                len_deletions1 = len_deletions2;
                len_insertions2 = 0;
                len_deletions2 = 0;
                last_equality = Some((text1, text2));
                pointer += 1;
                continue;
            }
            Diff::Delete(text) => len_deletions2 += text.len,
            Diff::Insert(text) => len_insertions2 += text.len,
        }
        // Eliminate an equality that is smaller or equal to the edits on both
        // sides of it.
        if last_equality.map_or(false, |(last_equality, _)| {
            last_equality.len <= cmp::max(len_insertions1, len_deletions1)
                && last_equality.len <= cmp::max(len_insertions2, len_deletions2)
        }) {
            // Jump back to offending equality.
            pointer = equalities.pop_back().unwrap();

            // Replace equality with a delete.
            diffs[pointer] = Diff::Delete(last_equality.unwrap().0);
            // Insert a corresponding insert.
            diffs.insert(pointer + 1, Diff::Insert(last_equality.unwrap().1));

            len_insertions1 = 0; // Reset the counters.
            len_insertions2 = 0;
            len_deletions1 = 0;
            len_deletions2 = 0;
            last_equality = None;
            changes = true;

            // Throw away the previous equality (it needs to be reevaluated).
            equalities.pop_back();
            if let Some(back) = equalities.back() {
                // There is a safe equality we can fall back to.
                pointer = *back;
            } else {
                // There are no previous equalities, jump back to the start.
                pointer = 0;
                continue;
            }
        }
        pointer += 1;
    }

    // Normalize the diff.
    if changes {
        cleanup_merge(solution);
    }
    cleanup_semantic_lossless(solution);
    diffs = &mut solution.diffs;

    // Find any overlaps between deletions and insertions.
    // e.g: <del>abcxxx</del><ins>xxxdef</ins>
    //   -> <del>abc</del>xxx<ins>def</ins>
    // e.g: <del>xxxabc</del><ins>defxxx</ins>
    //   -> <ins>def</ins>xxx<del>abc</del>
    // Only extract an overlap if it is as big as the edit ahead or behind it.
    let mut pointer = 1;
    while let Some(&this_diff) = diffs.get(pointer) {
        let prev_diff = diffs[pointer - 1];
        if let (Diff::Delete(deletion), Diff::Insert(insertion)) = (prev_diff, this_diff) {
            let overlap_len1 = common_overlap(deletion, insertion);
            let overlap_len2 = common_overlap(insertion, deletion);
            let overlap_min = cmp::min(deletion.len, insertion.len);
            if overlap_len1 >= overlap_len2 && 2 * overlap_len1 >= overlap_min {
                // Overlap found. Insert an equality and trim the surrounding edits.
                diffs.insert(
                    pointer,
                    Diff::Equal(
                        deletion.substring(deletion.len - overlap_len1..deletion.len),
                        insertion.substring(..overlap_len1),
                    ),
                );
                diffs[pointer - 1] =
                    Diff::Delete(deletion.substring(..deletion.len - overlap_len1));
                diffs[pointer + 1] = Diff::Insert(insertion.substring(overlap_len1..));
            } else if overlap_len1 < overlap_len2 && 2 * overlap_len2 >= overlap_min {
                // Reverse overlap found.
                // Insert an equality and swap and trim the surrounding edits.
                diffs.insert(
                    pointer,
                    Diff::Equal(
                        deletion.substring(..overlap_len2),
                        insertion.substring(insertion.len - overlap_len2..insertion.len),
                    ),
                );
                diffs[pointer - 1] =
                    Diff::Insert(insertion.substring(..insertion.len - overlap_len2));
                diffs[pointer + 1] = Diff::Delete(deletion.substring(overlap_len2..));
            }
            pointer += 1;
        }
        pointer += 1;
    }
}

// Look for single edits surrounded on both sides by equalities which can be
// shifted sideways to align the edit to a word boundary.
//
// e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
fn cleanup_semantic_lossless(solution: &mut Solution) {
    let diffs = &mut solution.diffs;
    let mut pointer = 1;
    while let Some(&next_diff) = diffs.get(pointer + 1) {
        let prev_diff = diffs[pointer - 1];
        if let (
            Diff::Equal(mut prev_equal1, mut prev_equal2),
            Diff::Equal(mut next_equal1, mut next_equal2),
        ) = (prev_diff, next_diff)
        {
            // This is a single edit surrounded by equalities.
            let mut edit = diffs[pointer];

            // First, shift the edit as far left as possible.
            let common_offset = common_suffix(prev_equal1, edit.text());
            let original_prev_len = prev_equal1.len;
            prev_equal1.len -= common_offset;
            prev_equal2.len -= common_offset;
            edit.shift_left(common_offset);
            next_equal1.offset -= common_offset;
            next_equal1.len += common_offset;
            next_equal2.offset -= common_offset;
            next_equal2.len += common_offset;

            // Second, step character by character right, looking for the best fit.
            let mut best_prev_equal = (prev_equal1, prev_equal2);
            let mut best_edit = edit;
            let mut best_next_equal = (next_equal1, next_equal2);
            let mut best_score = cleanup_semantic_score(prev_equal1, edit.text())
                + cleanup_semantic_score(edit.text(), next_equal1);
            while !edit.text().is_empty()
                && !next_equal1.is_empty()
                && edit.text().chars().next().unwrap() == next_equal1.chars().next().unwrap()
            {
                let increment = edit.text().chars().next().unwrap().len_utf8();
                prev_equal1.len += increment;
                prev_equal2.len += increment;
                edit.shift_right(increment);
                next_equal1.offset += increment;
                next_equal1.len -= increment;
                next_equal2.offset += increment;
                next_equal2.len -= increment;
                let score = cleanup_semantic_score(prev_equal1, edit.text())
                    + cleanup_semantic_score(edit.text(), next_equal1);
                // The >= encourages trailing rather than leading whitespace on edits.
                if score >= best_score {
                    best_score = score;
                    best_prev_equal = (prev_equal1, prev_equal2);
                    best_edit = edit;
                    best_next_equal = (next_equal1, next_equal2);
                }
            }

            if original_prev_len != best_prev_equal.0.len {
                // We have an improvement, save it back to the diff.
                if best_next_equal.0.is_empty() {
                    diffs.remove(pointer + 1);
                } else {
                    diffs[pointer + 1] = Diff::Equal(best_next_equal.0, best_next_equal.1);
                }
                diffs[pointer] = best_edit;
                if best_prev_equal.0.is_empty() {
                    diffs.remove(pointer - 1);
                    pointer -= 1;
                } else {
                    diffs[pointer - 1] = Diff::Equal(best_prev_equal.0, best_prev_equal.1);
                }
            }
        }
        pointer += 1;
    }
}

// Given two strings, compute a score representing whether the internal boundary
// falls on logical boundaries.
//
// Scores range from 6 (best) to 0 (worst).
fn cleanup_semantic_score(one: Range, two: Range) -> usize {
    if one.is_empty() || two.is_empty() {
        // Edges are the best.
        return 6;
    }

    // Each port of this function behaves slightly differently due to subtle
    // differences in each language's definition of things like 'whitespace'.
    // Since this function's purpose is largely cosmetic, the choice has been
    // made to use each language's native features rather than force total
    // conformity.
    let char1 = one.chars().next_back().unwrap();
    let char2 = two.chars().next().unwrap();
    let non_alphanumeric1 = !char1.is_ascii_alphanumeric();
    let non_alphanumeric2 = !char2.is_ascii_alphanumeric();
    let whitespace1 = non_alphanumeric1 && char1.is_ascii_whitespace();
    let whitespace2 = non_alphanumeric2 && char2.is_ascii_whitespace();
    let line_break1 = whitespace1 && char1.is_control();
    let line_break2 = whitespace2 && char2.is_control();
    let blank_line1 = line_break1 && (one.ends_with("\n\n") || one.ends_with("\n\r\n"));
    let blank_line2 = line_break2 && (two.starts_with("\n\n") || two.starts_with("\r\n\r\n"));

    if blank_line1 || blank_line2 {
        // Five points for blank lines.
        5
    } else if line_break1 || line_break2 {
        // Four points for line breaks.
        4
    } else if non_alphanumeric1 && !whitespace1 && whitespace2 {
        // Three points for end of sentences.
        3
    } else if whitespace1 || whitespace2 {
        // Two points for whitespace.
        2
    } else if non_alphanumeric1 || non_alphanumeric2 {
        // One point for non-alphanumeric.
        1
    } else {
        0
    }
}

// Reorder and merge like edit sections. Merge equalities. Any edit section can
// move as long as it doesn't cross an equality.
fn cleanup_merge(solution: &mut Solution) {
    let diffs = &mut solution.diffs;
    let common_prefix = if solution.utf8 {
        common_prefix
    } else {
        common_prefix_bytes
    };
    let common_suffix = if solution.utf8 {
        common_suffix
    } else {
        common_suffix_bytes
    };

    loop {
        if diffs.is_empty() {
            return;
        }

        diffs.push(Diff::Equal(
            solution.text1.substring(solution.text1.len..),
            solution.text2.substring(solution.text2.len..),
        )); // Add a dummy entry at the end.
        let mut pointer = 0;
        let mut count_delete = 0;
        let mut count_insert = 0;
        let mut text_delete = Range::empty();
        let mut text_insert = Range::empty();
        while let Some(&this_diff) = diffs.get(pointer) {
            match this_diff {
                Diff::Insert(text) => {
                    count_insert += 1;
                    if text_insert.is_empty() {
                        text_insert = text;
                    } else {
                        text_insert.len += text.len;
                    }
                }
                Diff::Delete(text) => {
                    count_delete += 1;
                    if text_delete.is_empty() {
                        text_delete = text;
                    } else {
                        text_delete.len += text.len;
                    }
                }
                Diff::Equal(text, _) => {
                    let count_both = count_delete + count_insert;
                    if count_both > 1 {
                        let both_types = count_delete != 0 && count_insert != 0;
                        // Delete the offending records.
                        diffs.splice(pointer - count_both..pointer, None);
                        pointer -= count_both;
                        if both_types {
                            // Factor out any common prefix.
                            let common_length = common_prefix(text_insert, text_delete);
                            if common_length != 0 {
                                if pointer > 0 {
                                    match &mut diffs[pointer - 1] {
                                        Diff::Equal(this_diff1, this_diff2) => {
                                            this_diff1.len += common_length;
                                            this_diff2.len += common_length;
                                        }
                                        _ => unreachable!(
                                            "previous diff should have been an equality"
                                        ),
                                    }
                                } else {
                                    diffs.insert(
                                        pointer,
                                        Diff::Equal(
                                            text_delete.substring(..common_length),
                                            text_insert.substring(..common_length),
                                        ),
                                    );
                                    pointer += 1;
                                }
                                text_insert = text_insert.substring(common_length..);
                                text_delete = text_delete.substring(common_length..);
                            }
                            // Factor out any common suffix.
                            let common_length = common_suffix(text_insert, text_delete);
                            if common_length != 0 {
                                diffs[pointer].grow_left(common_length);
                                text_insert.len -= common_length;
                                text_delete.len -= common_length;
                            }
                        }
                        // Insert the merged records.
                        if !text_delete.is_empty() {
                            diffs.insert(pointer, Diff::Delete(text_delete));
                            pointer += 1;
                        }
                        if !text_insert.is_empty() {
                            diffs.insert(pointer, Diff::Insert(text_insert));
                            pointer += 1;
                        }
                    } else if pointer > 0 {
                        if let Some(Diff::Equal(prev_equal1, prev_equal2)) =
                            diffs.get_mut(pointer - 1)
                        {
                            // Merge this equality with the previous one.
                            prev_equal1.len += text.len;
                            prev_equal2.len += text.len;
                            diffs.remove(pointer);
                            pointer -= 1;
                        }
                    }
                    count_insert = 0;
                    count_delete = 0;
                    text_delete = Range::empty();
                    text_insert = Range::empty();
                }
            }
            pointer += 1;
        }
        if diffs.last().unwrap().text().is_empty() {
            diffs.pop(); // Remove the dummy entry at the end.
        }

        // Second pass: look for single edits surrounded on both sides by equalities
        // which can be shifted sideways to eliminate an equality.
        // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
        let mut changes = false;
        let mut pointer = 1;
        // Intentionally ignore the first and last element (don't need checking).
        while let Some(&next_diff) = diffs.get(pointer + 1) {
            let prev_diff = diffs[pointer - 1];
            let this_diff = diffs[pointer];
            if let (Diff::Equal(prev_diff, _), Diff::Equal(next_diff, _)) = (prev_diff, next_diff) {
                // This is a single edit surrounded by equalities.
                if this_diff.text().ends_with(prev_diff) {
                    // Shift the edit over the previous equality.
                    diffs[pointer].shift_left(prev_diff.len);
                    diffs[pointer + 1].grow_left(prev_diff.len);
                    diffs.remove(pointer - 1); // Delete prev_diff.
                    changes = true;
                } else if this_diff.text().starts_with(next_diff) {
                    // Shift the edit over the next equality.
                    diffs[pointer - 1].grow_right(next_diff.len);
                    diffs[pointer].shift_right(next_diff.len);
                    diffs.remove(pointer + 1); // Delete next_diff.
                    changes = true;
                }
            }
            pointer += 1;
        }
        // If shifts were made, the diff needs reordering and another shift sweep.
        if !changes {
            return;
        }
    }
}

impl Debug for Chunk<'_> {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        let (name, text) = match *self {
            Chunk::Equal(text) => ("Equal", text),
            Chunk::Delete(text) => ("Delete", text),
            Chunk::Insert(text) => ("Insert", text),
        };
        write!(formatter, "{}({:?})", name, text)
    }
}

impl Debug for Diff<'_, '_> {
    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
        let (name, bytes) = match *self {
            Diff::Equal(range, _) => ("Equal", bytes(range)),
            Diff::Delete(range) => ("Delete", bytes(range)),
            Diff::Insert(range) => ("Insert", bytes(range)),
        };
        let text = String::from_utf8_lossy(bytes);
        write!(formatter, "{}({:?})", name, text)
    }
}

impl<'a> From<Diff<'a, 'a>> for Chunk<'a> {
    fn from(diff: Diff<'a, 'a>) -> Self {
        match diff {
            Diff::Equal(range, _) => Chunk::Equal(str(range)),
            Diff::Delete(range) => Chunk::Delete(str(range)),
            Diff::Insert(range) => Chunk::Insert(str(range)),
        }
    }
}