1 // Pattern-defeating quicksort
2 
3 //              Copyright Orson Peters 2021.
4 // Distributed under the Boost Software License, Version 1.0.
5 //    (See accompanying file LICENSE_1_0.txt or copy at
6 //          http://www.boost.org/LICENSE_1_0.txt)
7 
8 // See http://www.boost.org/libs/sort/ for library home page.
9 
10 
11 #ifndef BOOST_SORT_PDQSORT_HPP
12 #define BOOST_SORT_PDQSORT_HPP
13 
14 #include <algorithm>
15 #include <cstddef>
16 #include <functional>
17 #include <iterator>
18 #include <utility>
19 #include <boost/type_traits.hpp>
20 
21 #if __cplusplus >= 201103L
22     #include <cstdint>
23     #define BOOST_PDQSORT_PREFER_MOVE(x) std::move(x)
24 #else
25     #define BOOST_PDQSORT_PREFER_MOVE(x) (x)
26 #endif
27 
28 namespace boost {
29 namespace sort {
30 
31 namespace pdqsort_detail {
32     enum {
33         // Partitions below this size are sorted using insertion sort.
34         insertion_sort_threshold = 24,
35 
36         // Partitions above this size use Tukey's ninther to select the pivot.
37         ninther_threshold = 128,
38 
39         // When we detect an already sorted partition, attempt an insertion sort that allows this
40         // amount of element moves before giving up.
41         partial_insertion_sort_limit = 8,
42 
43         // Must be multiple of 8 due to loop unrolling, and < 256 to fit in unsigned char.
44         block_size = 64,
45 
46         // Cacheline size, assumes power of two.
47         cacheline_size = 64
48     };
49 
50     template<class T> struct is_default_compare : boost::false_type { };
51     template<class T> struct is_default_compare<std::less<T> > : boost::true_type { };
52     template<class T> struct is_default_compare<std::greater<T> > : boost::true_type { };
53 
54     // Returns floor(log2(n)), assumes n > 0.
55     template<class T>
log2(T n)56     inline int log2(T n) {
57         int log = 0;
58         while (n >>= 1) ++log;
59         return log;
60     }
61 
62     // Sorts [begin, end) using insertion sort with the given comparison function.
63     template<class Iter, class Compare>
insertion_sort(Iter begin,Iter end,Compare comp)64     inline void insertion_sort(Iter begin, Iter end, Compare comp) {
65         typedef typename std::iterator_traits<Iter>::value_type T;
66         if (begin == end) return;
67 
68         for (Iter cur = begin + 1; cur != end; ++cur) {
69             Iter sift = cur;
70             Iter sift_1 = cur - 1;
71 
72             // Compare first so we can avoid 2 moves for an element already positioned correctly.
73             if (comp(*sift, *sift_1)) {
74                 T tmp = BOOST_PDQSORT_PREFER_MOVE(*sift);
75 
76                 do { *sift-- = BOOST_PDQSORT_PREFER_MOVE(*sift_1); }
77                 while (sift != begin && comp(tmp, *--sift_1));
78 
79                 *sift = BOOST_PDQSORT_PREFER_MOVE(tmp);
80             }
81         }
82     }
83 
84     // Sorts [begin, end) using insertion sort with the given comparison function. Assumes
85     // *(begin - 1) is an element smaller than or equal to any element in [begin, end).
86     template<class Iter, class Compare>
unguarded_insertion_sort(Iter begin,Iter end,Compare comp)87     inline void unguarded_insertion_sort(Iter begin, Iter end, Compare comp) {
88         typedef typename std::iterator_traits<Iter>::value_type T;
89         if (begin == end) return;
90 
91         for (Iter cur = begin + 1; cur != end; ++cur) {
92             Iter sift = cur;
93             Iter sift_1 = cur - 1;
94 
95             // Compare first so we can avoid 2 moves for an element already positioned correctly.
96             if (comp(*sift, *sift_1)) {
97                 T tmp = BOOST_PDQSORT_PREFER_MOVE(*sift);
98 
99                 do { *sift-- = BOOST_PDQSORT_PREFER_MOVE(*sift_1); }
100                 while (comp(tmp, *--sift_1));
101 
102                 *sift = BOOST_PDQSORT_PREFER_MOVE(tmp);
103             }
104         }
105     }
106 
107     // Attempts to use insertion sort on [begin, end). Will return false if more than
108     // partial_insertion_sort_limit elements were moved, and abort sorting. Otherwise it will
109     // successfully sort and return true.
110     template<class Iter, class Compare>
partial_insertion_sort(Iter begin,Iter end,Compare comp)111     inline bool partial_insertion_sort(Iter begin, Iter end, Compare comp) {
112         typedef typename std::iterator_traits<Iter>::value_type T;
113         if (begin == end) return true;
114 
115         std::size_t limit = 0;
116         for (Iter cur = begin + 1; cur != end; ++cur) {
117             Iter sift = cur;
118             Iter sift_1 = cur - 1;
119 
120             // Compare first so we can avoid 2 moves for an element already positioned correctly.
121             if (comp(*sift, *sift_1)) {
122                 T tmp = BOOST_PDQSORT_PREFER_MOVE(*sift);
123 
124                 do { *sift-- = BOOST_PDQSORT_PREFER_MOVE(*sift_1); }
125                 while (sift != begin && comp(tmp, *--sift_1));
126 
127                 *sift = BOOST_PDQSORT_PREFER_MOVE(tmp);
128                 limit += cur - sift;
129             }
130 
131             if (limit > partial_insertion_sort_limit) return false;
132         }
133 
134         return true;
135     }
136 
137     template<class Iter, class Compare>
sort2(Iter a,Iter b,Compare comp)138     inline void sort2(Iter a, Iter b, Compare comp) {
139         if (comp(*b, *a)) std::iter_swap(a, b);
140     }
141 
142     // Sorts the elements *a, *b and *c using comparison function comp.
143     template<class Iter, class Compare>
sort3(Iter a,Iter b,Iter c,Compare comp)144     inline void sort3(Iter a, Iter b, Iter c, Compare comp) {
145         sort2(a, b, comp);
146         sort2(b, c, comp);
147         sort2(a, b, comp);
148     }
149 
150     template<class T>
align_cacheline(T * p)151     inline T* align_cacheline(T* p) {
152 #if defined(UINTPTR_MAX) && __cplusplus >= 201103L
153         std::uintptr_t ip = reinterpret_cast<std::uintptr_t>(p);
154 #else
155         std::size_t ip = reinterpret_cast<std::size_t>(p);
156 #endif
157         ip = (ip + cacheline_size - 1) & -cacheline_size;
158         return reinterpret_cast<T*>(ip);
159     }
160 
161     template<class Iter>
swap_offsets(Iter first,Iter last,unsigned char * offsets_l,unsigned char * offsets_r,size_t num,bool use_swaps)162     inline void swap_offsets(Iter first, Iter last,
163                              unsigned char* offsets_l, unsigned char* offsets_r,
164                              size_t num, bool use_swaps) {
165         typedef typename std::iterator_traits<Iter>::value_type T;
166         if (use_swaps) {
167             // This case is needed for the descending distribution, where we need
168             // to have proper swapping for pdqsort to remain O(n).
169             for (size_t i = 0; i < num; ++i) {
170                 std::iter_swap(first + offsets_l[i], last - offsets_r[i]);
171             }
172         } else if (num > 0) {
173             Iter l = first + offsets_l[0]; Iter r = last - offsets_r[0];
174             T tmp(BOOST_PDQSORT_PREFER_MOVE(*l)); *l = BOOST_PDQSORT_PREFER_MOVE(*r);
175             for (size_t i = 1; i < num; ++i) {
176                 l = first + offsets_l[i]; *r = BOOST_PDQSORT_PREFER_MOVE(*l);
177                 r = last - offsets_r[i]; *l = BOOST_PDQSORT_PREFER_MOVE(*r);
178             }
179             *r = BOOST_PDQSORT_PREFER_MOVE(tmp);
180         }
181     }
182 
183     // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
184     // to the pivot are put in the right-hand partition. Returns the position of the pivot after
185     // partitioning and whether the passed sequence already was correctly partitioned. Assumes the
186     // pivot is a median of at least 3 elements and that [begin, end) is at least
187     // insertion_sort_threshold long. Uses branchless partitioning.
188     template<class Iter, class Compare>
partition_right_branchless(Iter begin,Iter end,Compare comp)189     inline std::pair<Iter, bool> partition_right_branchless(Iter begin, Iter end, Compare comp) {
190         typedef typename std::iterator_traits<Iter>::value_type T;
191 
192         // Move pivot into local for speed.
193         T pivot(BOOST_PDQSORT_PREFER_MOVE(*begin));
194         Iter first = begin;
195         Iter last = end;
196 
197         // Find the first element greater than or equal than the pivot (the median of 3 guarantees
198         // this exists).
199         while (comp(*++first, pivot));
200 
201         // Find the first element strictly smaller than the pivot. We have to guard this search if
202         // there was no element before *first.
203         if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
204         else                    while (                !comp(*--last, pivot));
205 
206         // If the first pair of elements that should be swapped to partition are the same element,
207         // the passed in sequence already was correctly partitioned.
208         bool already_partitioned = first >= last;
209         if (!already_partitioned) {
210             std::iter_swap(first, last);
211             ++first;
212 
213             // The following branchless partitioning is derived from "BlockQuicksort: How Branch
214             // Mispredictions don’t affect Quicksort" by Stefan Edelkamp and Armin Weiss, but
215             // heavily micro-optimized.
216             unsigned char offsets_l_storage[block_size + cacheline_size];
217             unsigned char offsets_r_storage[block_size + cacheline_size];
218             unsigned char* offsets_l = align_cacheline(offsets_l_storage);
219             unsigned char* offsets_r = align_cacheline(offsets_r_storage);
220 
221             Iter offsets_l_base = first;
222             Iter offsets_r_base = last;
223             size_t num_l, num_r, start_l, start_r;
224             num_l = num_r = start_l = start_r = 0;
225 
226             while (first < last) {
227                 // Fill up offset blocks with elements that are on the wrong side.
228                 // First we determine how much elements are considered for each offset block.
229                 size_t num_unknown = last - first;
230                 size_t left_split = num_l == 0 ? (num_r == 0 ? num_unknown / 2 : num_unknown) : 0;
231                 size_t right_split = num_r == 0 ? (num_unknown - left_split) : 0;
232 
233                 // Fill the offset blocks.
234                 if (left_split >= block_size) {
235                     for (size_t i = 0; i < block_size;) {
236                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
237                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
238                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
239                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
240                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
241                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
242                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
243                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
244                     }
245                 } else {
246                     for (size_t i = 0; i < left_split;) {
247                         offsets_l[num_l] = i++; num_l += !comp(*first, pivot); ++first;
248                     }
249                 }
250 
251                 if (right_split >= block_size) {
252                     for (size_t i = 0; i < block_size;) {
253                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
254                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
255                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
256                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
257                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
258                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
259                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
260                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
261                     }
262                 } else {
263                     for (size_t i = 0; i < right_split;) {
264                         offsets_r[num_r] = ++i; num_r += comp(*--last, pivot);
265                     }
266                 }
267 
268                 // Swap elements and update block sizes and first/last boundaries.
269                 size_t num = (std::min)(num_l, num_r);
270                 swap_offsets(offsets_l_base, offsets_r_base,
271                              offsets_l + start_l, offsets_r + start_r,
272                              num, num_l == num_r);
273                 num_l -= num; num_r -= num;
274                 start_l += num; start_r += num;
275 
276                 if (num_l == 0) {
277                     start_l = 0;
278                     offsets_l_base = first;
279                 }
280 
281                 if (num_r == 0) {
282                     start_r = 0;
283                     offsets_r_base = last;
284                 }
285             }
286 
287             // We have now fully identified [first, last)'s proper position. Swap the last elements.
288             if (num_l) {
289                 offsets_l += start_l;
290                 while (num_l--) std::iter_swap(offsets_l_base + offsets_l[num_l], --last);
291                 first = last;
292             }
293             if (num_r) {
294                 offsets_r += start_r;
295                 while (num_r--) std::iter_swap(offsets_r_base - offsets_r[num_r], first), ++first;
296                 last = first;
297             }
298         }
299 
300         // Put the pivot in the right place.
301         Iter pivot_pos = first - 1;
302         *begin = BOOST_PDQSORT_PREFER_MOVE(*pivot_pos);
303         *pivot_pos = BOOST_PDQSORT_PREFER_MOVE(pivot);
304 
305         return std::make_pair(pivot_pos, already_partitioned);
306     }
307 
308     // Partitions [begin, end) around pivot *begin using comparison function comp. Elements equal
309     // to the pivot are put in the right-hand partition. Returns the position of the pivot after
310     // partitioning and whether the passed sequence already was correctly partitioned. Assumes the
311     // pivot is a median of at least 3 elements and that [begin, end) is at least
312     // insertion_sort_threshold long.
313     template<class Iter, class Compare>
partition_right(Iter begin,Iter end,Compare comp)314     inline std::pair<Iter, bool> partition_right(Iter begin, Iter end, Compare comp) {
315         typedef typename std::iterator_traits<Iter>::value_type T;
316 
317         // Move pivot into local for speed.
318         T pivot(BOOST_PDQSORT_PREFER_MOVE(*begin));
319 
320         Iter first = begin;
321         Iter last = end;
322 
323         // Find the first element greater than or equal than the pivot (the median of 3 guarantees
324         // this exists).
325         while (comp(*++first, pivot));
326 
327         // Find the first element strictly smaller than the pivot. We have to guard this search if
328         // there was no element before *first.
329         if (first - 1 == begin) while (first < last && !comp(*--last, pivot));
330         else                    while (                !comp(*--last, pivot));
331 
332         // If the first pair of elements that should be swapped to partition are the same element,
333         // the passed in sequence already was correctly partitioned.
334         bool already_partitioned = first >= last;
335 
336         // Keep swapping pairs of elements that are on the wrong side of the pivot. Previously
337         // swapped pairs guard the searches, which is why the first iteration is special-cased
338         // above.
339         while (first < last) {
340             std::iter_swap(first, last);
341             while (comp(*++first, pivot));
342             while (!comp(*--last, pivot));
343         }
344 
345         // Put the pivot in the right place.
346         Iter pivot_pos = first - 1;
347         *begin = BOOST_PDQSORT_PREFER_MOVE(*pivot_pos);
348         *pivot_pos = BOOST_PDQSORT_PREFER_MOVE(pivot);
349 
350         return std::make_pair(pivot_pos, already_partitioned);
351     }
352 
353     // Similar function to the one above, except elements equal to the pivot are put to the left of
354     // the pivot and it doesn't check or return if the passed sequence already was partitioned.
355     // Since this is rarely used (the many equal case), and in that case pdqsort already has O(n)
356     // performance, no block quicksort is applied here for simplicity.
357     template<class Iter, class Compare>
partition_left(Iter begin,Iter end,Compare comp)358     inline Iter partition_left(Iter begin, Iter end, Compare comp) {
359         typedef typename std::iterator_traits<Iter>::value_type T;
360 
361         T pivot(BOOST_PDQSORT_PREFER_MOVE(*begin));
362         Iter first = begin;
363         Iter last = end;
364 
365         while (comp(pivot, *--last));
366 
367         if (last + 1 == end) while (first < last && !comp(pivot, *++first));
368         else                 while (                !comp(pivot, *++first));
369 
370         while (first < last) {
371             std::iter_swap(first, last);
372             while (comp(pivot, *--last));
373             while (!comp(pivot, *++first));
374         }
375 
376         Iter pivot_pos = last;
377         *begin = BOOST_PDQSORT_PREFER_MOVE(*pivot_pos);
378         *pivot_pos = BOOST_PDQSORT_PREFER_MOVE(pivot);
379 
380         return pivot_pos;
381     }
382 
383 
384     template<class Iter, class Compare, bool Branchless>
pdqsort_loop(Iter begin,Iter end,Compare comp,int bad_allowed,bool leftmost=true)385     inline void pdqsort_loop(Iter begin, Iter end, Compare comp, int bad_allowed, bool leftmost = true) {
386         typedef typename std::iterator_traits<Iter>::difference_type diff_t;
387 
388         // Use a while loop for tail recursion elimination.
389         while (true) {
390             diff_t size = end - begin;
391 
392             // Insertion sort is faster for small arrays.
393             if (size < insertion_sort_threshold) {
394                 if (leftmost) insertion_sort(begin, end, comp);
395                 else unguarded_insertion_sort(begin, end, comp);
396                 return;
397             }
398 
399             // Choose pivot as median of 3 or pseudomedian of 9.
400             diff_t s2 = size / 2;
401             if (size > ninther_threshold) {
402                 sort3(begin, begin + s2, end - 1, comp);
403                 sort3(begin + 1, begin + (s2 - 1), end - 2, comp);
404                 sort3(begin + 2, begin + (s2 + 1), end - 3, comp);
405                 sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp);
406                 std::iter_swap(begin, begin + s2);
407             } else sort3(begin + s2, begin, end - 1, comp);
408 
409             // If *(begin - 1) is the end of the right partition of a previous partition operation
410             // there is no element in [begin, end) that is smaller than *(begin - 1). Then if our
411             // pivot compares equal to *(begin - 1) we change strategy, putting equal elements in
412             // the left partition, greater elements in the right partition. We do not have to
413             // recurse on the left partition, since it's sorted (all equal).
414             if (!leftmost && !comp(*(begin - 1), *begin)) {
415                 begin = partition_left(begin, end, comp) + 1;
416                 continue;
417             }
418 
419             // Partition and get results.
420             std::pair<Iter, bool> part_result =
421                 Branchless ? partition_right_branchless(begin, end, comp)
422                            : partition_right(begin, end, comp);
423             Iter pivot_pos = part_result.first;
424             bool already_partitioned = part_result.second;
425 
426             // Check for a highly unbalanced partition.
427             diff_t l_size = pivot_pos - begin;
428             diff_t r_size = end - (pivot_pos + 1);
429             bool highly_unbalanced = l_size < size / 8 || r_size < size / 8;
430 
431             // If we got a highly unbalanced partition we shuffle elements to break many patterns.
432             if (highly_unbalanced) {
433                 // If we had too many bad partitions, switch to heapsort to guarantee O(n log n).
434                 if (--bad_allowed == 0) {
435                     std::make_heap(begin, end, comp);
436                     std::sort_heap(begin, end, comp);
437                     return;
438                 }
439 
440                 if (l_size >= insertion_sort_threshold) {
441                     std::iter_swap(begin,             begin + l_size / 4);
442                     std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4);
443 
444                     if (l_size > ninther_threshold) {
445                         std::iter_swap(begin + 1,         begin + (l_size / 4 + 1));
446                         std::iter_swap(begin + 2,         begin + (l_size / 4 + 2));
447                         std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1));
448                         std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2));
449                     }
450                 }
451 
452                 if (r_size >= insertion_sort_threshold) {
453                     std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4));
454                     std::iter_swap(end - 1,                   end - r_size / 4);
455 
456                     if (r_size > ninther_threshold) {
457                         std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4));
458                         std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4));
459                         std::iter_swap(end - 2,             end - (1 + r_size / 4));
460                         std::iter_swap(end - 3,             end - (2 + r_size / 4));
461                     }
462                 }
463             } else {
464                 // If we were decently balanced and we tried to sort an already partitioned
465                 // sequence try to use insertion sort.
466                 if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp)
467                                         && partial_insertion_sort(pivot_pos + 1, end, comp)) return;
468             }
469 
470             // Sort the left partition first using recursion and do tail recursion elimination for
471             // the right-hand partition.
472             pdqsort_loop<Iter, Compare, Branchless>(begin, pivot_pos, comp, bad_allowed, leftmost);
473             begin = pivot_pos + 1;
474             leftmost = false;
475         }
476     }
477 }
478 
479 
480 /*! \brief Generic sort algorithm using random access iterators and a user-defined comparison operator.
481 
482     \details @c pdqsort is a fast generic sorting algorithm that is similar in concept to introsort
483 but runs faster on certain patterns. @c pdqsort is in-place, unstable, deterministic, has a worst
484 case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>. Even without patterns, the
485 quicksort has been very efficiently implemented, and @c pdqsort runs 1-5% faster than GCC 6.2's
486 @c std::sort. If the type being sorted is @c std::is_arithmetic and Compare is @c std::less or
487 @c std::greater this function will automatically use @c pdqsort_branchless for far greater speedups.
488 
489    \param[in] first Iterator pointer to first element.
490    \param[in] last Iterator pointing to one beyond the end of data.
491    \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
492    \pre [@c first, @c last) is a valid range.
493    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
494    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
495    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
496    \post The elements in the range [@c first, @c last) are sorted in ascending order.
497 
498    \return @c void.
499 
500    \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
501    (or moves), functors, or any operations on iterators throw.
502    \warning Invalid arguments cause undefined behaviour.
503    \warning Throwing an exception may cause data loss.
504 */
505 template<class Iter, class Compare>
pdqsort(Iter first,Iter last,Compare comp)506 inline void pdqsort(Iter first, Iter last, Compare comp) {
507     if (first == last) return;
508     pdqsort_detail::pdqsort_loop<Iter, Compare,
509         pdqsort_detail::is_default_compare<typename boost::decay<Compare>::type>::value &&
510         boost::is_arithmetic<typename std::iterator_traits<Iter>::value_type>::value>(
511         first, last, comp, pdqsort_detail::log2(last - first));
512 }
513 
514 
515 /*! \brief Generic sort algorithm using random access iterators and a user-defined comparison operator.
516 
517     \details @c pdqsort_branchless is a fast generic sorting algorithm that is similar in concept to
518 introsort but runs faster on certain patterns. @c pdqsort_branchless is in-place, unstable,
519 deterministic, has a worst case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>.
520 Even without patterns, the quicksort has been very efficiently implemented with block based
521 partitioning, and @c pdqsort_branchless runs 80-90% faster than GCC 6.2's @c std::sort when sorting
522 small data such as integers. However, this speedup is gained by totally bypassing the branch
523 predictor, if your comparison operator or iterator contains branches you will most likely see little
524 gain or a small loss in performance.
525 
526    \param[in] first Iterator pointer to first element.
527    \param[in] last Iterator pointing to one beyond the end of data.
528    \param[in] comp A binary functor that returns whether the first element passed to it should go before the second in order.
529    \pre [@c first, @c last) is a valid range.
530    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
531    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
532    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
533    \post The elements in the range [@c first, @c last) are sorted in ascending order.
534 
535    \return @c void.
536 
537    \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
538    (or moves), functors, or any operations on iterators throw.
539    \warning Invalid arguments cause undefined behaviour.
540    \warning Throwing an exception may cause data loss.
541 */
542 template<class Iter, class Compare>
pdqsort_branchless(Iter first,Iter last,Compare comp)543 inline void pdqsort_branchless(Iter first, Iter last, Compare comp) {
544     if (first == last) return;
545     pdqsort_detail::pdqsort_loop<Iter, Compare, true>(
546         first, last, comp, pdqsort_detail::log2(last - first));
547 }
548 
549 
550 /*! \brief Generic sort algorithm using random access iterators.
551 
552     \details @c pdqsort is a fast generic sorting algorithm that is similar in concept to introsort
553 but runs faster on certain patterns. @c pdqsort is in-place, unstable, deterministic, has a worst
554 case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>. Even without patterns, the
555 quicksort partitioning has been very efficiently implemented, and @c pdqsort runs 80-90% faster than
556 GCC 6.2's @c std::sort. If the type being sorted is @c std::is_arithmetic this function will
557 automatically use @c pdqsort_branchless.
558 
559    \param[in] first Iterator pointer to first element.
560    \param[in] last Iterator pointing to one beyond the end of data.
561    \pre [@c first, @c last) is a valid range.
562    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
563    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
564    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
565    \post The elements in the range [@c first, @c last) are sorted in ascending order.
566 
567    \return @c void.
568 
569    \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
570    (or moves), functors, or any operations on iterators throw.
571    \warning Invalid arguments cause undefined behaviour.
572    \warning Throwing an exception may cause data loss.
573 */
574 template<class Iter>
pdqsort(Iter first,Iter last)575 inline void pdqsort(Iter first, Iter last) {
576     typedef typename std::iterator_traits<Iter>::value_type T;
577     pdqsort(first, last, std::less<T>());
578 }
579 
580 
581 /*! \brief Generic sort algorithm using random access iterators.
582 
583     \details @c pdqsort_branchless is a fast generic sorting algorithm that is similar in concept to
584 introsort but runs faster on certain patterns. @c pdqsort_branchless is in-place, unstable,
585 deterministic, has a worst case runtime of <em>O(N * lg(N))</em> and a best case of <em>O(N)</em>.
586 Even without patterns, the quicksort has been very efficiently implemented with block based
587 partitioning, and @c pdqsort_branchless runs 80-90% faster than GCC 6.2's @c std::sort when sorting
588 small data such as integers. However, this speedup is gained by totally bypassing the branch
589 predictor, if your comparison operator or iterator contains branches you will most likely see little
590 gain or a small loss in performance.
591 
592    \param[in] first Iterator pointer to first element.
593    \param[in] last Iterator pointing to one beyond the end of data.
594    \pre [@c first, @c last) is a valid range.
595    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveAssignable">MoveAssignable</a>
596    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/MoveConstructible">MoveConstructible</a>
597    \pre @c RandomAccessIter @c value_type is <a href="http://en.cppreference.com/w/cpp/concept/LessThanComparable">LessThanComparable</a>
598    \post The elements in the range [@c first, @c last) are sorted in ascending order.
599 
600    \return @c void.
601 
602    \throws std::exception Propagates exceptions if any of the element comparisons, the element swaps
603    (or moves), functors, or any operations on iterators throw.
604    \warning Invalid arguments cause undefined behaviour.
605    \warning Throwing an exception may cause data loss.
606 */
607 template<class Iter>
pdqsort_branchless(Iter first,Iter last)608 inline void pdqsort_branchless(Iter first, Iter last) {
609     typedef typename std::iterator_traits<Iter>::value_type T;
610     pdqsort_branchless(first, last, std::less<T>());
611 }
612 
613 }
614 }
615 
616 #undef BOOST_PDQSORT_PREFER_MOVE
617 
618 #endif
619