src/slice/mergesort.rs

//! Parallel merge sort.
//!
//! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
//! The only difference from the original is that the sequential `mergesort` returns
//! `MergesortResult` and leaves descending arrays intact.

use crate::iter::*;
use crate::slice::ParallelSliceMut;
use std::mem;
use std::mem::size_of;
use std::ptr;
use std::slice;

unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
    let old = *ptr;
    *ptr = ptr.offset(1);
    old
}

unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
    *ptr = ptr.offset(-1);
    *ptr
}

/// When dropped, copies from `src` into `dest` a sequence of length `len`.
struct CopyOnDrop<T> {
    src: *mut T,
    dest: *mut T,
    len: usize,
}

impl<T> Drop for CopyOnDrop<T> {
    fn drop(&mut self) {
        unsafe {
            ptr::copy_nonoverlapping(self.src, self.dest, self.len);
        }
    }
}

/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
///
/// This is the integral subroutine of insertion sort.
fn insert_head<T, F>(v: &mut [T], is_less: &F)
where
    F: Fn(&T, &T) -> bool,
{
    if v.len() >= 2 && is_less(&v[1], &v[0]) {
        unsafe {
            // There are three ways to implement insertion here:
            //
            // 1. Swap adjacent elements until the first one gets to its final destination.
            //    However, this way we copy data around more than is necessary. If elements are big
            //    structures (costly to copy), this method will be slow.
            //
            // 2. Iterate until the right place for the first element is found. Then shift the
            //    elements succeeding it to make room for it and finally place it into the
            //    remaining hole. This is a good method.
            //
            // 3. Copy the first element into a temporary variable. Iterate until the right place
            //    for it is found. As we go along, copy every traversed element into the slot
            //    preceding it. Finally, copy data from the temporary variable into the remaining
            //    hole. This method is very good. Benchmarks demonstrated slightly better
            //    performance than with the 2nd method.
            //
            // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
            let mut tmp = NoDrop {
                value: Some(ptr::read(&v[0])),
            };

            // Intermediate state of the insertion process is always tracked by `hole`, which
            // serves two purposes:
            // 1. Protects integrity of `v` from panics in `is_less`.
            // 2. Fills the remaining hole in `v` in the end.
            //
            // Panic safety:
            //
            // If `is_less` panics at any point during the process, `hole` will get dropped and
            // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
            // initially held exactly once.
            let mut hole = InsertionHole {
                src: tmp.value.as_mut().unwrap(),
                dest: &mut v[1],
            };
            ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);

            for i in 2..v.len() {
                if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
                    break;
                }
                ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
                hole.dest = &mut v[i];
            }
            // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
        }
    }

    // Holds a value, but never drops it.
    struct NoDrop<T> {
        value: Option<T>,
    }

    impl<T> Drop for NoDrop<T> {
        fn drop(&mut self) {
            mem::forget(self.value.take());
        }
    }

    // When dropped, copies from `src` into `dest`.
    struct InsertionHole<T> {
        src: *mut T,
        dest: *mut T,
    }

    impl<T> Drop for InsertionHole<T> {
        fn drop(&mut self) {
            unsafe {
                ptr::copy_nonoverlapping(self.src, self.dest, 1);
            }
        }
    }
}

/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
/// stores the result into `v[..]`.
///
/// # Safety
///
/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
where
    F: Fn(&T, &T) -> bool,
{
    let len = v.len();
    let v = v.as_mut_ptr();
    let v_mid = v.add(mid);
    let v_end = v.add(len);

    // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
    // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
    // copying the lesser (or greater) one into `v`.
    //
    // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
    // consumed first, then we must copy whatever is left of the shorter run into the remaining
    // hole in `v`.
    //
    // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
    // 1. Protects integrity of `v` from panics in `is_less`.
    // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
    //
    // Panic safety:
    //
    // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
    // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
    // object it initially held exactly once.
    let mut hole;

    if mid <= len - mid {
        // The left run is shorter.
        ptr::copy_nonoverlapping(v, buf, mid);
        hole = MergeHole {
            start: buf,
            end: buf.add(mid),
            dest: v,
        };

        // Initially, these pointers point to the beginnings of their arrays.
        let left = &mut hole.start;
        let mut right = v_mid;
        let out = &mut hole.dest;

        while *left < hole.end && right < v_end {
            // Consume the lesser side.
            // If equal, prefer the left run to maintain stability.
            let to_copy = if is_less(&*right, &**left) {
                get_and_increment(&mut right)
            } else {
                get_and_increment(left)
            };
            ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
        }
    } else {
        // The right run is shorter.
        ptr::copy_nonoverlapping(v_mid, buf, len - mid);
        hole = MergeHole {
            start: buf,
            end: buf.add(len - mid),
            dest: v_mid,
        };

        // Initially, these pointers point past the ends of their arrays.
        let left = &mut hole.dest;
        let right = &mut hole.end;
        let mut out = v_end;

        while v < *left && buf < *right {
            // Consume the greater side.
            // If equal, prefer the right run to maintain stability.
            let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
                decrement_and_get(left)
            } else {
                decrement_and_get(right)
            };
            ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
        }
    }
    // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
    // it will now be copied into the hole in `v`.

    // When dropped, copies the range `start..end` into `dest..`.
    struct MergeHole<T> {
        start: *mut T,
        end: *mut T,
        dest: *mut T,
    }

    impl<T> Drop for MergeHole<T> {
        fn drop(&mut self) {
            // `T` is not a zero-sized type, so it's okay to divide by its size.
            let len = (self.end as usize - self.start as usize) / size_of::<T>();
            unsafe {
                ptr::copy_nonoverlapping(self.start, self.dest, len);
            }
        }
    }
}

/// The result of merge sort.
#[must_use]
#[derive(Clone, Copy, PartialEq, Eq)]
enum MergesortResult {
    /// The slice has already been sorted.
    NonDescending,
    /// The slice has been descending and therefore it was left intact.
    Descending,
    /// The slice was sorted.
    Sorted,
}

/// A sorted run that starts at index `start` and is of length `len`.
#[derive(Clone, Copy)]
struct Run {
    start: usize,
    len: usize,
}

/// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
/// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
/// algorithm should continue building a new run instead, `None` is returned.
///
/// TimSort is infamous for its buggy implementations, as described here:
/// http://envisage-project.eu/timsort-specification-and-verification/
///
/// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
/// Enforcing them on just top three is not sufficient to ensure that the invariants will still
/// hold for *all* runs in the stack.
///
/// This function correctly checks invariants for the top four runs. Additionally, if the top
/// run starts at index 0, it will always demand a merge operation until the stack is fully
/// collapsed, in order to complete the sort.
#[inline]
fn collapse(runs: &[Run]) -> Option<usize> {
    let n = runs.len();

    if n >= 2
        && (runs[n - 1].start == 0
            || runs[n - 2].len <= runs[n - 1].len
            || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
            || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
    {
        if n >= 3 && runs[n - 3].len < runs[n - 1].len {
            Some(n - 3)
        } else {
            Some(n - 2)
        }
    } else {
        None
    }
}

/// Sorts a slice using merge sort, unless it is already in descending order.
///
/// This function doesn't modify the slice if it is already non-descending or descending.
/// Otherwise, it sorts the slice into non-descending order.
///
/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
/// [here](https://svn.python.org/projects/python/trunk/Objects/listsort.txt).
///
/// The algorithm identifies strictly descending and non-descending subsequences, which are called
/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
/// satisfied:
///
/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
///
/// The invariants ensure that the total running time is `O(n log n)` worst-case.
///
/// # Safety
///
/// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
where
    T: Send,
    F: Fn(&T, &T) -> bool + Sync,
{
    // Very short runs are extended using insertion sort to span at least this many elements.
    const MIN_RUN: usize = 10;

    let len = v.len();

    // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
    // strange decision, but consider the fact that merges more often go in the opposite direction
    // (forwards). According to benchmarks, merging forwards is slightly faster than merging
    // backwards. To conclude, identifying runs by traversing backwards improves performance.
    let mut runs = vec![];
    let mut end = len;
    while end > 0 {
        // Find the next natural run, and reverse it if it's strictly descending.
        let mut start = end - 1;

        if start > 0 {
            start -= 1;

            if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
                while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
                    start -= 1;
                }

                // If this descending run covers the whole slice, return immediately.
                if start == 0 && end == len {
                    return MergesortResult::Descending;
                } else {
                    v[start..end].reverse();
                }
            } else {
                while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
                    start -= 1;
                }

                // If this non-descending run covers the whole slice, return immediately.
                if end - start == len {
                    return MergesortResult::NonDescending;
                }
            }
        }

        // Insert some more elements into the run if it's too short. Insertion sort is faster than
        // merge sort on short sequences, so this significantly improves performance.
        while start > 0 && end - start < MIN_RUN {
            start -= 1;
            insert_head(&mut v[start..end], &is_less);
        }

        // Push this run onto the stack.
        runs.push(Run {
            start,
            len: end - start,
        });
        end = start;

        // Merge some pairs of adjacent runs to satisfy the invariants.
        while let Some(r) = collapse(&runs) {
            let left = runs[r + 1];
            let right = runs[r];
            merge(
                &mut v[left.start..right.start + right.len],
                left.len,
                buf,
                &is_less,
            );

            runs[r] = Run {
                start: left.start,
                len: left.len + right.len,
            };
            runs.remove(r + 1);
        }
    }

    // Finally, exactly one run must remain in the stack.
    debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);

    // The original order of the slice was neither non-descending nor descending.
    MergesortResult::Sorted
}

////////////////////////////////////////////////////////////////////////////
// Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
// Everything below this line is parallelization.
////////////////////////////////////////////////////////////////////////////

/// Splits two sorted slices so that they can be merged in parallel.
///
/// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
/// `left[a..]` and `right[b..]`.
fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
where
    F: Fn(&T, &T) -> bool,
{
    let left_len = left.len();
    let right_len = right.len();

    if left_len >= right_len {
        let left_mid = left_len / 2;

        // Find the first element in `right` that is greater than or equal to `left[left_mid]`.
        let mut a = 0;
        let mut b = right_len;
        while a < b {
            let m = a + (b - a) / 2;
            if is_less(&right[m], &left[left_mid]) {
                a = m + 1;
            } else {
                b = m;
            }
        }

        (left_mid, a)
    } else {
        let right_mid = right_len / 2;

        // Find the first element in `left` that is greater than `right[right_mid]`.
        let mut a = 0;
        let mut b = left_len;
        while a < b {
            let m = a + (b - a) / 2;
            if is_less(&right[right_mid], &left[m]) {
                b = m;
            } else {
                a = m + 1;
            }
        }

        (a, right_mid)
    }
}

/// Merges slices `left` and `right` in parallel and stores the result into `dest`.
///
/// # Safety
///
/// The `dest` pointer must have enough space to store the result.
///
/// Even if `is_less` panics at any point during the merge process, this function will fully copy
/// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
where
    T: Send,
    F: Fn(&T, &T) -> bool + Sync,
{
    // Slices whose lengths sum up to this value are merged sequentially. This number is slightly
    // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
    // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
    // scheduling.
    const MAX_SEQUENTIAL: usize = 5000;

    let left_len = left.len();
    let right_len = right.len();

    // Intermediate state of the merge process, which serves two purposes:
    // 1. Protects integrity of `dest` from panics in `is_less`.
    // 2. Copies the remaining elements as soon as one of the two sides is exhausted.
    //
    // Panic safety:
    //
    // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
    // remaining parts of `left` and `right` into `dest`.
    let mut s = State {
        left_start: left.as_mut_ptr(),
        left_end: left.as_mut_ptr().add(left_len),
        right_start: right.as_mut_ptr(),
        right_end: right.as_mut_ptr().add(right_len),
        dest,
    };

    if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL {
        while s.left_start < s.left_end && s.right_start < s.right_end {
            // Consume the lesser side.
            // If equal, prefer the left run to maintain stability.
            let to_copy = if is_less(&*s.right_start, &*s.left_start) {
                get_and_increment(&mut s.right_start)
            } else {
                get_and_increment(&mut s.left_start)
            };
            ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1);
        }
    } else {
        // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
        // the whole `left` and `right` into `dest`.
        let (left_mid, right_mid) = split_for_merge(left, right, is_less);
        let (left_l, left_r) = left.split_at_mut(left_mid);
        let (right_l, right_r) = right.split_at_mut(right_mid);

        // Prevent the destructor of `s` from running. Rayon will ensure that both calls to
        // `par_merge` happen. If one of the two calls panics, they will ensure that elements still
        // get copied into `dest_left` and `dest_right``.
        mem::forget(s);

        // Convert the pointers to `usize` because `*mut T` is not `Send`.
        let dest_l = dest as usize;
        let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
        rayon_core::join(
            || par_merge(left_l, right_l, dest_l as *mut T, is_less),
            || par_merge(left_r, right_r, dest_r as *mut T, is_less),
        );
    }
    // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
    // all at once.

    // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
    // in that order.
    struct State<T> {
        left_start: *mut T,
        left_end: *mut T,
        right_start: *mut T,
        right_end: *mut T,
        dest: *mut T,
    }

    impl<T> Drop for State<T> {
        fn drop(&mut self) {
            let size = size_of::<T>();
            let left_len = (self.left_end as usize - self.left_start as usize) / size;
            let right_len = (self.right_end as usize - self.right_start as usize) / size;

            // Copy array `left`, followed by `right`.
            unsafe {
                ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
                self.dest = self.dest.add(left_len);
                ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
            }
        }
    }
}

/// Recursively merges pre-sorted chunks inside `v`.
///
/// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
/// Argument `buf` is an auxiliary buffer that will be used during the procedure.
/// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
///
/// # Safety
///
/// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
/// must equal the left bound of the following chunk.
///
/// The buffer must be at least as long as `v`.
unsafe fn recurse<T, F>(
    v: *mut T,
    buf: *mut T,
    chunks: &[(usize, usize)],
    into_buf: bool,
    is_less: &F,
) where
    T: Send,
    F: Fn(&T, &T) -> bool + Sync,
{
    let len = chunks.len();
    debug_assert!(len > 0);

    // Base case of the algorithm.
    // If only one chunk is remaining, there's no more work to split and merge.
    if len == 1 {
        if into_buf {
            // Copy the chunk from `v` into `buf`.
            let (start, end) = chunks[0];
            let src = v.add(start);
            let dest = buf.add(start);
            ptr::copy_nonoverlapping(src, dest, end - start);
        }
        return;
    }

    // Split the chunks into two halves.
    let (start, _) = chunks[0];
    let (mid, _) = chunks[len / 2];
    let (_, end) = chunks[len - 1];
    let (left, right) = chunks.split_at(len / 2);

    // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
    // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
    // merge chunks from `v` into `buf`, and viceversa.
    //
    // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
    // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
    // level etc.
    let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };

    // Panic safety:
    //
    // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
    // be executed, thus copying everything from `src` into `dest`. This way we ensure that all
    // chunks are in fact copied into `dest`, even if the merge process doesn't finish.
    let guard = CopyOnDrop {
        src: src.add(start),
        dest: dest.add(start),
        len: end - start,
    };

    // Convert the pointers to `usize` because `*mut T` is not `Send`.
    let v = v as usize;
    let buf = buf as usize;
    rayon_core::join(
        || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
        || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
    );

    // Everything went all right - recursive calls didn't panic.
    // Forget the guard in order to prevent its destructor from running.
    mem::forget(guard);

    // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
    let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
    let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
    par_merge(src_left, src_right, dest.add(start), is_less);
}

/// Sorts `v` using merge sort in parallel.
///
/// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
/// The allocated temporary buffer is of the same length as is `v`.
pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
where
    T: Send,
    F: Fn(&T, &T) -> bool + Sync,
{
    // Slices of up to this length get sorted using insertion sort in order to avoid the cost of
    // buffer allocation.
    const MAX_INSERTION: usize = 20;
    // The length of initial chunks. This number is as small as possible but so that the overhead
    // of Rayon's task scheduling is still negligible.
    const CHUNK_LENGTH: usize = 2000;

    // Sorting has no meaningful behavior on zero-sized types.
    if size_of::<T>() == 0 {
        return;
    }

    let len = v.len();

    // Short slices get sorted in-place via insertion sort to avoid allocations.
    if len <= MAX_INSERTION {
        if len >= 2 {
            for i in (0..len - 1).rev() {
                insert_head(&mut v[i..], &is_less);
            }
        }
        return;
    }

    // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
    // shallow copies of the contents of `v` without risking the dtors running on copies if
    // `is_less` panics.
    let mut buf = Vec::<T>::with_capacity(len);
    let buf = buf.as_mut_ptr();

    // If the slice is not longer than one chunk would be, do sequential merge sort and return.
    if len <= CHUNK_LENGTH {
        let res = unsafe { mergesort(v, buf, &is_less) };
        if res == MergesortResult::Descending {
            v.reverse();
        }
        return;
    }

    // Split the slice into chunks and merge sort them in parallel.
    // However, descending chunks will not be sorted - they will be simply left intact.
    let mut iter = {
        // Convert the pointer to `usize` because `*mut T` is not `Send`.
        let buf = buf as usize;

        v.par_chunks_mut(CHUNK_LENGTH)
            .with_max_len(1)
            .enumerate()
            .map(|(i, chunk)| {
                let l = CHUNK_LENGTH * i;
                let r = l + chunk.len();
                unsafe {
                    let buf = (buf as *mut T).add(l);
                    (l, r, mergesort(chunk, buf, &is_less))
                }
            })
            .collect::<Vec<_>>()
            .into_iter()
            .peekable()
    };

    // Now attempt to concatenate adjacent chunks that were left intact.
    let mut chunks = Vec::with_capacity(iter.len());

    while let Some((a, mut b, res)) = iter.next() {
        // If this chunk was not modified by the sort procedure...
        if res != MergesortResult::Sorted {
            while let Some(&(x, y, r)) = iter.peek() {
                // If the following chunk is of the same type and can be concatenated...
                if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) {
                    // Concatenate them.
                    b = y;
                    iter.next();
                } else {
                    break;
                }
            }
        }

        // Descending chunks must be reversed.
        if res == MergesortResult::Descending {
            v[a..b].reverse();
        }

        chunks.push((a, b));
    }

    // All chunks are properly sorted.
    // Now we just have to merge them together.
    unsafe {
        recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less);
    }
}

#[cfg(test)]
mod tests {
    use super::split_for_merge;
    use rand::distributions::Uniform;
    use rand::{thread_rng, Rng};

    #[test]
    fn test_split_for_merge() {
        fn check(left: &[u32], right: &[u32]) {
            let (l, r) = split_for_merge(left, right, &|&a, &b| a < b);
            assert!(left[..l]
                .iter()
                .all(|&x| right[r..].iter().all(|&y| x <= y)));
            assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y)));
        }

        check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]);
        check(&[1, 2, 2, 2, 2, 3], &[]);
        check(&[], &[1, 2, 2, 2, 2, 3]);

        let ref mut rng = thread_rng();

        for _ in 0..100 {
            let limit: u32 = rng.gen_range(1..21);
            let left_len: usize = rng.gen_range(0..20);
            let right_len: usize = rng.gen_range(0..20);

            let mut left = rng
                .sample_iter(&Uniform::new(0, limit))
                .take(left_len)
                .collect::<Vec<_>>();
            let mut right = rng
                .sample_iter(&Uniform::new(0, limit))
                .take(right_len)
                .collect::<Vec<_>>();

            left.sort();
            right.sort();
            check(&left, &right);
        }
    }
}