1 //! Parallel merge sort.
2 //!
3 //! This implementation is copied verbatim from `std::slice::sort` and then parallelized.
4 //! The only difference from the original is that the sequential `mergesort` returns
5 //! `MergesortResult` and leaves descending arrays intact.
6
7 use iter::*;
8 use rayon_core;
9 use slice::ParallelSliceMut;
10 use std::mem;
11 use std::mem::size_of;
12 use std::ptr;
13 use std::slice;
14
get_and_increment<T>(ptr: &mut *mut T) -> *mut T15 unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
16 let old = *ptr;
17 *ptr = ptr.offset(1);
18 old
19 }
20
decrement_and_get<T>(ptr: &mut *mut T) -> *mut T21 unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
22 *ptr = ptr.offset(-1);
23 *ptr
24 }
25
26 /// When dropped, copies from `src` into `dest` a sequence of length `len`.
27 struct CopyOnDrop<T> {
28 src: *mut T,
29 dest: *mut T,
30 len: usize,
31 }
32
33 impl<T> Drop for CopyOnDrop<T> {
drop(&mut self)34 fn drop(&mut self) {
35 unsafe {
36 ptr::copy_nonoverlapping(self.src, self.dest, self.len);
37 }
38 }
39 }
40
41 /// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
42 ///
43 /// This is the integral subroutine of insertion sort.
insert_head<T, F>(v: &mut [T], is_less: &F) where F: Fn(&T, &T) -> bool,44 fn insert_head<T, F>(v: &mut [T], is_less: &F)
45 where
46 F: Fn(&T, &T) -> bool,
47 {
48 if v.len() >= 2 && is_less(&v[1], &v[0]) {
49 unsafe {
50 // There are three ways to implement insertion here:
51 //
52 // 1. Swap adjacent elements until the first one gets to its final destination.
53 // However, this way we copy data around more than is necessary. If elements are big
54 // structures (costly to copy), this method will be slow.
55 //
56 // 2. Iterate until the right place for the first element is found. Then shift the
57 // elements succeeding it to make room for it and finally place it into the
58 // remaining hole. This is a good method.
59 //
60 // 3. Copy the first element into a temporary variable. Iterate until the right place
61 // for it is found. As we go along, copy every traversed element into the slot
62 // preceding it. Finally, copy data from the temporary variable into the remaining
63 // hole. This method is very good. Benchmarks demonstrated slightly better
64 // performance than with the 2nd method.
65 //
66 // All methods were benchmarked, and the 3rd showed best results. So we chose that one.
67 let mut tmp = NoDrop {
68 value: Some(ptr::read(&v[0])),
69 };
70
71 // Intermediate state of the insertion process is always tracked by `hole`, which
72 // serves two purposes:
73 // 1. Protects integrity of `v` from panics in `is_less`.
74 // 2. Fills the remaining hole in `v` in the end.
75 //
76 // Panic safety:
77 //
78 // If `is_less` panics at any point during the process, `hole` will get dropped and
79 // fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
80 // initially held exactly once.
81 let mut hole = InsertionHole {
82 src: tmp.value.as_mut().unwrap(),
83 dest: &mut v[1],
84 };
85 ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
86
87 for i in 2..v.len() {
88 if !is_less(&v[i], tmp.value.as_ref().unwrap()) {
89 break;
90 }
91 ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
92 hole.dest = &mut v[i];
93 }
94 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
95 }
96 }
97
98 // Holds a value, but never drops it.
99 struct NoDrop<T> {
100 value: Option<T>,
101 }
102
103 impl<T> Drop for NoDrop<T> {
104 fn drop(&mut self) {
105 mem::forget(self.value.take());
106 }
107 }
108
109 // When dropped, copies from `src` into `dest`.
110 struct InsertionHole<T> {
111 src: *mut T,
112 dest: *mut T,
113 }
114
115 impl<T> Drop for InsertionHole<T> {
116 fn drop(&mut self) {
117 unsafe {
118 ptr::copy_nonoverlapping(self.src, self.dest, 1);
119 }
120 }
121 }
122 }
123
124 /// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
125 /// stores the result into `v[..]`.
126 ///
127 /// # Safety
128 ///
129 /// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
130 /// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F) where F: Fn(&T, &T) -> bool,131 unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &F)
132 where
133 F: Fn(&T, &T) -> bool,
134 {
135 let len = v.len();
136 let v = v.as_mut_ptr();
137 let v_mid = v.add(mid);
138 let v_end = v.add(len);
139
140 // The merge process first copies the shorter run into `buf`. Then it traces the newly copied
141 // run and the longer run forwards (or backwards), comparing their next unconsumed elements and
142 // copying the lesser (or greater) one into `v`.
143 //
144 // As soon as the shorter run is fully consumed, the process is done. If the longer run gets
145 // consumed first, then we must copy whatever is left of the shorter run into the remaining
146 // hole in `v`.
147 //
148 // Intermediate state of the process is always tracked by `hole`, which serves two purposes:
149 // 1. Protects integrity of `v` from panics in `is_less`.
150 // 2. Fills the remaining hole in `v` if the longer run gets consumed first.
151 //
152 // Panic safety:
153 //
154 // If `is_less` panics at any point during the process, `hole` will get dropped and fill the
155 // hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
156 // object it initially held exactly once.
157 let mut hole;
158
159 if mid <= len - mid {
160 // The left run is shorter.
161 ptr::copy_nonoverlapping(v, buf, mid);
162 hole = MergeHole {
163 start: buf,
164 end: buf.add(mid),
165 dest: v,
166 };
167
168 // Initially, these pointers point to the beginnings of their arrays.
169 let left = &mut hole.start;
170 let mut right = v_mid;
171 let out = &mut hole.dest;
172
173 while *left < hole.end && right < v_end {
174 // Consume the lesser side.
175 // If equal, prefer the left run to maintain stability.
176 let to_copy = if is_less(&*right, &**left) {
177 get_and_increment(&mut right)
178 } else {
179 get_and_increment(left)
180 };
181 ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
182 }
183 } else {
184 // The right run is shorter.
185 ptr::copy_nonoverlapping(v_mid, buf, len - mid);
186 hole = MergeHole {
187 start: buf,
188 end: buf.add(len - mid),
189 dest: v_mid,
190 };
191
192 // Initially, these pointers point past the ends of their arrays.
193 let left = &mut hole.dest;
194 let right = &mut hole.end;
195 let mut out = v_end;
196
197 while v < *left && buf < *right {
198 // Consume the greater side.
199 // If equal, prefer the right run to maintain stability.
200 let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
201 decrement_and_get(left)
202 } else {
203 decrement_and_get(right)
204 };
205 ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
206 }
207 }
208 // Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
209 // it will now be copied into the hole in `v`.
210
211 // When dropped, copies the range `start..end` into `dest..`.
212 struct MergeHole<T> {
213 start: *mut T,
214 end: *mut T,
215 dest: *mut T,
216 }
217
218 impl<T> Drop for MergeHole<T> {
219 fn drop(&mut self) {
220 // `T` is not a zero-sized type, so it's okay to divide by its size.
221 let len = (self.end as usize - self.start as usize) / size_of::<T>();
222 unsafe {
223 ptr::copy_nonoverlapping(self.start, self.dest, len);
224 }
225 }
226 }
227 }
228
229 /// The result of merge sort.
230 #[must_use]
231 #[derive(Clone, Copy, PartialEq, Eq)]
232 enum MergesortResult {
233 /// The slice has already been sorted.
234 NonDescending,
235 /// The slice has been descending and therefore it was left intact.
236 Descending,
237 /// The slice was sorted.
238 Sorted,
239 }
240
241 /// A sorted run that starts at index `start` and is of length `len`.
242 #[derive(Clone, Copy)]
243 struct Run {
244 start: usize,
245 len: usize,
246 }
247
248 /// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
249 /// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
250 /// algorithm should continue building a new run instead, `None` is returned.
251 ///
252 /// TimSort is infamous for its buggy implementations, as described here:
253 /// http://envisage-project.eu/timsort-specification-and-verification/
254 ///
255 /// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
256 /// Enforcing them on just top three is not sufficient to ensure that the invariants will still
257 /// hold for *all* runs in the stack.
258 ///
259 /// This function correctly checks invariants for the top four runs. Additionally, if the top
260 /// run starts at index 0, it will always demand a merge operation until the stack is fully
261 /// collapsed, in order to complete the sort.
262 #[inline]
collapse(runs: &[Run]) -> Option<usize>263 fn collapse(runs: &[Run]) -> Option<usize> {
264 let n = runs.len();
265
266 if n >= 2
267 && (runs[n - 1].start == 0
268 || runs[n - 2].len <= runs[n - 1].len
269 || (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
270 || (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
271 {
272 if n >= 3 && runs[n - 3].len < runs[n - 1].len {
273 Some(n - 3)
274 } else {
275 Some(n - 2)
276 }
277 } else {
278 None
279 }
280 }
281
282 /// Sorts a slice using merge sort, unless it is already in descending order.
283 ///
284 /// This function doesn't modify the slice if it is already non-descending or descending.
285 /// Otherwise, it sorts the slice into non-descending order.
286 ///
287 /// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
288 /// [here](http://svn.python.org/projects/python/trunk/Objects/listsort.txt).
289 ///
290 /// The algorithm identifies strictly descending and non-descending subsequences, which are called
291 /// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
292 /// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
293 /// satisfied:
294 ///
295 /// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
296 /// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
297 ///
298 /// The invariants ensure that the total running time is `O(n log n)` worst-case.
299 ///
300 /// # Safety
301 ///
302 /// The argument `buf` is used as a temporary buffer and must be at least as long as `v`.
mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult where T: Send, F: Fn(&T, &T) -> bool + Sync,303 unsafe fn mergesort<T, F>(v: &mut [T], buf: *mut T, is_less: &F) -> MergesortResult
304 where
305 T: Send,
306 F: Fn(&T, &T) -> bool + Sync,
307 {
308 // Very short runs are extended using insertion sort to span at least this many elements.
309 const MIN_RUN: usize = 10;
310
311 let len = v.len();
312
313 // In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
314 // strange decision, but consider the fact that merges more often go in the opposite direction
315 // (forwards). According to benchmarks, merging forwards is slightly faster than merging
316 // backwards. To conclude, identifying runs by traversing backwards improves performance.
317 let mut runs = vec![];
318 let mut end = len;
319 while end > 0 {
320 // Find the next natural run, and reverse it if it's strictly descending.
321 let mut start = end - 1;
322
323 if start > 0 {
324 start -= 1;
325
326 if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
327 while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
328 start -= 1;
329 }
330
331 // If this descending run covers the whole slice, return immediately.
332 if start == 0 && end == len {
333 return MergesortResult::Descending;
334 } else {
335 v[start..end].reverse();
336 }
337 } else {
338 while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
339 start -= 1;
340 }
341
342 // If this non-descending run covers the whole slice, return immediately.
343 if end - start == len {
344 return MergesortResult::NonDescending;
345 }
346 }
347 }
348
349 // Insert some more elements into the run if it's too short. Insertion sort is faster than
350 // merge sort on short sequences, so this significantly improves performance.
351 while start > 0 && end - start < MIN_RUN {
352 start -= 1;
353 insert_head(&mut v[start..end], &is_less);
354 }
355
356 // Push this run onto the stack.
357 runs.push(Run {
358 start,
359 len: end - start,
360 });
361 end = start;
362
363 // Merge some pairs of adjacent runs to satisfy the invariants.
364 while let Some(r) = collapse(&runs) {
365 let left = runs[r + 1];
366 let right = runs[r];
367 merge(
368 &mut v[left.start..right.start + right.len],
369 left.len,
370 buf,
371 &is_less,
372 );
373
374 runs[r] = Run {
375 start: left.start,
376 len: left.len + right.len,
377 };
378 runs.remove(r + 1);
379 }
380 }
381
382 // Finally, exactly one run must remain in the stack.
383 debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
384
385 // The original order of the slice was neither non-descending nor descending.
386 MergesortResult::Sorted
387 }
388
389 ////////////////////////////////////////////////////////////////////////////
390 // Everything above this line is copied from `std::slice::sort` (with very minor tweaks).
391 // Everything below this line is parallelization.
392 ////////////////////////////////////////////////////////////////////////////
393
394 /// Splits two sorted slices so that they can be merged in parallel.
395 ///
396 /// Returns two indices `(a, b)` so that slices `left[..a]` and `right[..b]` come before
397 /// `left[a..]` and `right[b..]`.
split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize) where F: Fn(&T, &T) -> bool,398 fn split_for_merge<T, F>(left: &[T], right: &[T], is_less: &F) -> (usize, usize)
399 where
400 F: Fn(&T, &T) -> bool,
401 {
402 let left_len = left.len();
403 let right_len = right.len();
404
405 if left_len >= right_len {
406 let left_mid = left_len / 2;
407
408 // Find the first element in `right` that is greater than or equal to `left[left_mid]`.
409 let mut a = 0;
410 let mut b = right_len;
411 while a < b {
412 let m = a + (b - a) / 2;
413 if is_less(&right[m], &left[left_mid]) {
414 a = m + 1;
415 } else {
416 b = m;
417 }
418 }
419
420 (left_mid, a)
421 } else {
422 let right_mid = right_len / 2;
423
424 // Find the first element in `left` that is greater than `right[right_mid]`.
425 let mut a = 0;
426 let mut b = left_len;
427 while a < b {
428 let m = a + (b - a) / 2;
429 if is_less(&right[right_mid], &left[m]) {
430 b = m;
431 } else {
432 a = m + 1;
433 }
434 }
435
436 (a, right_mid)
437 }
438 }
439
440 /// Merges slices `left` and `right` in parallel and stores the result into `dest`.
441 ///
442 /// # Safety
443 ///
444 /// The `dest` pointer must have enough space to store the result.
445 ///
446 /// Even if `is_less` panics at any point during the merge process, this function will fully copy
447 /// all elements from `left` and `right` into `dest` (not necessarily in sorted order).
par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F) where T: Send, F: Fn(&T, &T) -> bool + Sync,448 unsafe fn par_merge<T, F>(left: &mut [T], right: &mut [T], dest: *mut T, is_less: &F)
449 where
450 T: Send,
451 F: Fn(&T, &T) -> bool + Sync,
452 {
453 // Slices whose lengths sum up to this value are merged sequentially. This number is slightly
454 // larger than `CHUNK_LENGTH`, and the reason is that merging is faster than merge sorting, so
455 // merging needs a bit coarser granularity in order to hide the overhead of Rayon's task
456 // scheduling.
457 const MAX_SEQUENTIAL: usize = 5000;
458
459 let left_len = left.len();
460 let right_len = right.len();
461
462 // Intermediate state of the merge process, which serves two purposes:
463 // 1. Protects integrity of `dest` from panics in `is_less`.
464 // 2. Copies the remaining elements as soon as one of the two sides is exhausted.
465 //
466 // Panic safety:
467 //
468 // If `is_less` panics at any point during the merge process, `s` will get dropped and copy the
469 // remaining parts of `left` and `right` into `dest`.
470 let mut s = State {
471 left_start: left.as_mut_ptr(),
472 left_end: left.as_mut_ptr().add(left_len),
473 right_start: right.as_mut_ptr(),
474 right_end: right.as_mut_ptr().add(right_len),
475 dest,
476 };
477
478 if left_len == 0 || right_len == 0 || left_len + right_len < MAX_SEQUENTIAL {
479 while s.left_start < s.left_end && s.right_start < s.right_end {
480 // Consume the lesser side.
481 // If equal, prefer the left run to maintain stability.
482 let to_copy = if is_less(&*s.right_start, &*s.left_start) {
483 get_and_increment(&mut s.right_start)
484 } else {
485 get_and_increment(&mut s.left_start)
486 };
487 ptr::copy_nonoverlapping(to_copy, get_and_increment(&mut s.dest), 1);
488 }
489 } else {
490 // Function `split_for_merge` might panic. If that happens, `s` will get destructed and copy
491 // the whole `left` and `right` into `dest`.
492 let (left_mid, right_mid) = split_for_merge(left, right, is_less);
493 let (left_l, left_r) = left.split_at_mut(left_mid);
494 let (right_l, right_r) = right.split_at_mut(right_mid);
495
496 // Prevent the destructor of `s` from running. Rayon will ensure that both calls to
497 // `par_merge` happen. If one of the two calls panics, they will ensure that elements still
498 // get copied into `dest_left` and `dest_right``.
499 mem::forget(s);
500
501 // Convert the pointers to `usize` because `*mut T` is not `Send`.
502 let dest_l = dest as usize;
503 let dest_r = dest.add(left_l.len() + right_l.len()) as usize;
504 rayon_core::join(
505 || par_merge(left_l, right_l, dest_l as *mut T, is_less),
506 || par_merge(left_r, right_r, dest_r as *mut T, is_less),
507 );
508 }
509 // Finally, `s` gets dropped if we used sequential merge, thus copying the remaining elements
510 // all at once.
511
512 // When dropped, copies arrays `left_start..left_end` and `right_start..right_end` into `dest`,
513 // in that order.
514 struct State<T> {
515 left_start: *mut T,
516 left_end: *mut T,
517 right_start: *mut T,
518 right_end: *mut T,
519 dest: *mut T,
520 }
521
522 impl<T> Drop for State<T> {
523 fn drop(&mut self) {
524 let size = size_of::<T>();
525 let left_len = (self.left_end as usize - self.left_start as usize) / size;
526 let right_len = (self.right_end as usize - self.right_start as usize) / size;
527
528 // Copy array `left`, followed by `right`.
529 unsafe {
530 ptr::copy_nonoverlapping(self.left_start, self.dest, left_len);
531 self.dest = self.dest.add(left_len);
532 ptr::copy_nonoverlapping(self.right_start, self.dest, right_len);
533 }
534 }
535 }
536 }
537
538 /// Recursively merges pre-sorted chunks inside `v`.
539 ///
540 /// Chunks of `v` are stored in `chunks` as intervals (inclusive left and exclusive right bound).
541 /// Argument `buf` is an auxiliary buffer that will be used during the procedure.
542 /// If `into_buf` is true, the result will be stored into `buf`, otherwise it will be in `v`.
543 ///
544 /// # Safety
545 ///
546 /// The number of chunks must be positive and they must be adjacent: the right bound of each chunk
547 /// must equal the left bound of the following chunk.
548 ///
549 /// The buffer must be at least as long as `v`.
recurse<T, F>( v: *mut T, buf: *mut T, chunks: &[(usize, usize)], into_buf: bool, is_less: &F, ) where T: Send, F: Fn(&T, &T) -> bool + Sync,550 unsafe fn recurse<T, F>(
551 v: *mut T,
552 buf: *mut T,
553 chunks: &[(usize, usize)],
554 into_buf: bool,
555 is_less: &F,
556 ) where
557 T: Send,
558 F: Fn(&T, &T) -> bool + Sync,
559 {
560 let len = chunks.len();
561 debug_assert!(len > 0);
562
563 // Base case of the algorithm.
564 // If only one chunk is remaining, there's no more work to split and merge.
565 if len == 1 {
566 if into_buf {
567 // Copy the chunk from `v` into `buf`.
568 let (start, end) = chunks[0];
569 let src = v.add(start);
570 let dest = buf.add(start);
571 ptr::copy_nonoverlapping(src, dest, end - start);
572 }
573 return;
574 }
575
576 // Split the chunks into two halves.
577 let (start, _) = chunks[0];
578 let (mid, _) = chunks[len / 2];
579 let (_, end) = chunks[len - 1];
580 let (left, right) = chunks.split_at(len / 2);
581
582 // After recursive calls finish we'll have to merge chunks `(start, mid)` and `(mid, end)` from
583 // `src` into `dest`. If the current invocation has to store the result into `buf`, we'll
584 // merge chunks from `v` into `buf`, and viceversa.
585 //
586 // Recursive calls flip `into_buf` at each level of recursion. More concretely, `par_merge`
587 // merges chunks from `buf` into `v` at the first level, from `v` into `buf` at the second
588 // level etc.
589 let (src, dest) = if into_buf { (v, buf) } else { (buf, v) };
590
591 // Panic safety:
592 //
593 // If `is_less` panics at any point during the recursive calls, the destructor of `guard` will
594 // be executed, thus copying everything from `src` into `dest`. This way we ensure that all
595 // chunks are in fact copied into `dest`, even if the merge process doesn't finish.
596 let guard = CopyOnDrop {
597 src: src.add(start),
598 dest: dest.add(start),
599 len: end - start,
600 };
601
602 // Convert the pointers to `usize` because `*mut T` is not `Send`.
603 let v = v as usize;
604 let buf = buf as usize;
605 rayon_core::join(
606 || recurse(v as *mut T, buf as *mut T, left, !into_buf, is_less),
607 || recurse(v as *mut T, buf as *mut T, right, !into_buf, is_less),
608 );
609
610 // Everything went all right - recursive calls didn't panic.
611 // Forget the guard in order to prevent its destructor from running.
612 mem::forget(guard);
613
614 // Merge chunks `(start, mid)` and `(mid, end)` from `src` into `dest`.
615 let src_left = slice::from_raw_parts_mut(src.add(start), mid - start);
616 let src_right = slice::from_raw_parts_mut(src.add(mid), end - mid);
617 par_merge(src_left, src_right, dest.add(start), is_less);
618 }
619
620 /// Sorts `v` using merge sort in parallel.
621 ///
622 /// The algorithm is stable, allocates memory, and `O(n log n)` worst-case.
623 /// The allocated temporary buffer is of the same length as is `v`.
par_mergesort<T, F>(v: &mut [T], is_less: F) where T: Send, F: Fn(&T, &T) -> bool + Sync,624 pub(super) fn par_mergesort<T, F>(v: &mut [T], is_less: F)
625 where
626 T: Send,
627 F: Fn(&T, &T) -> bool + Sync,
628 {
629 // Slices of up to this length get sorted using insertion sort in order to avoid the cost of
630 // buffer allocation.
631 const MAX_INSERTION: usize = 20;
632 // The length of initial chunks. This number is as small as possible but so that the overhead
633 // of Rayon's task scheduling is still negligible.
634 const CHUNK_LENGTH: usize = 2000;
635
636 // Sorting has no meaningful behavior on zero-sized types.
637 if size_of::<T>() == 0 {
638 return;
639 }
640
641 let len = v.len();
642
643 // Short slices get sorted in-place via insertion sort to avoid allocations.
644 if len <= MAX_INSERTION {
645 if len >= 2 {
646 for i in (0..len - 1).rev() {
647 insert_head(&mut v[i..], &is_less);
648 }
649 }
650 return;
651 }
652
653 // Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
654 // shallow copies of the contents of `v` without risking the dtors running on copies if
655 // `is_less` panics.
656 let mut buf = Vec::<T>::with_capacity(len);
657 let buf = buf.as_mut_ptr();
658
659 // If the slice is not longer than one chunk would be, do sequential merge sort and return.
660 if len <= CHUNK_LENGTH {
661 let res = unsafe { mergesort(v, buf, &is_less) };
662 if res == MergesortResult::Descending {
663 v.reverse();
664 }
665 return;
666 }
667
668 // Split the slice into chunks and merge sort them in parallel.
669 // However, descending chunks will not be sorted - they will be simply left intact.
670 let mut iter = {
671 // Convert the pointer to `usize` because `*mut T` is not `Send`.
672 let buf = buf as usize;
673
674 v.par_chunks_mut(CHUNK_LENGTH)
675 .with_max_len(1)
676 .enumerate()
677 .map(|(i, chunk)| {
678 let l = CHUNK_LENGTH * i;
679 let r = l + chunk.len();
680 unsafe {
681 let buf = (buf as *mut T).add(l);
682 (l, r, mergesort(chunk, buf, &is_less))
683 }
684 })
685 .collect::<Vec<_>>()
686 .into_iter()
687 .peekable()
688 };
689
690 // Now attempt to concatenate adjacent chunks that were left intact.
691 let mut chunks = Vec::with_capacity(iter.len());
692
693 while let Some((a, mut b, res)) = iter.next() {
694 // If this chunk was not modified by the sort procedure...
695 if res != MergesortResult::Sorted {
696 while let Some(&(x, y, r)) = iter.peek() {
697 // If the following chunk is of the same type and can be concatenated...
698 if r == res && (r == MergesortResult::Descending) == is_less(&v[x], &v[x - 1]) {
699 // Concatenate them.
700 b = y;
701 iter.next();
702 } else {
703 break;
704 }
705 }
706 }
707
708 // Descending chunks must be reversed.
709 if res == MergesortResult::Descending {
710 v[a..b].reverse();
711 }
712
713 chunks.push((a, b));
714 }
715
716 // All chunks are properly sorted.
717 // Now we just have to merge them together.
718 unsafe {
719 recurse(v.as_mut_ptr(), buf, &chunks, false, &is_less);
720 }
721 }
722
723 #[cfg(test)]
724 mod tests {
725 use super::split_for_merge;
726 use rand::distributions::Uniform;
727 use rand::{thread_rng, Rng};
728
729 #[test]
test_split_for_merge()730 fn test_split_for_merge() {
731 fn check(left: &[u32], right: &[u32]) {
732 let (l, r) = split_for_merge(left, right, &|&a, &b| a < b);
733 assert!(left[..l]
734 .iter()
735 .all(|&x| right[r..].iter().all(|&y| x <= y)));
736 assert!(right[..r].iter().all(|&x| left[l..].iter().all(|&y| x < y)));
737 }
738
739 check(&[1, 2, 2, 2, 2, 3], &[1, 2, 2, 2, 2, 3]);
740 check(&[1, 2, 2, 2, 2, 3], &[]);
741 check(&[], &[1, 2, 2, 2, 2, 3]);
742
743 let mut rng = thread_rng();
744
745 for _ in 0..100 {
746 let limit: u32 = rng.gen_range(1, 21);
747 let left_len: usize = rng.gen_range(0, 20);
748 let right_len: usize = rng.gen_range(0, 20);
749
750 let mut left = rng
751 .sample_iter(&Uniform::new(0, limit))
752 .take(left_len)
753 .collect::<Vec<_>>();
754 let mut right = rng
755 .sample_iter(&Uniform::new(0, limit))
756 .take(right_len)
757 .collect::<Vec<_>>();
758
759 left.sort();
760 right.sort();
761 check(&left, &right);
762 }
763 }
764 }
765