1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11 //
12 // Stable sorting that works in O(N*log(N)) worst time
13 // and uses O(1) extra memory
14 //
15 //////////////////////////////////////////////////////////////////////////////
16 //
17 // The main idea of the adaptive_sort algorithm was developed by Andrey Astrelin
18 // and explained in the article from the russian collaborative blog
19 // Habrahabr (http://habrahabr.ru/post/205290/). The algorithm is based on
20 // ideas from B-C. Huang and M. A. Langston explained in their article
21 // "Fast Stable Merging and Sorting in Constant Extra Space (1989-1992)"
22 // (http://comjnl.oxfordjournals.org/content/35/6/643.full.pdf).
23 //
24 // This implementation by Ion Gaztanaga uses previous ideas with additional changes:
25 //
26 // - Use of GCD-based rotation.
27 // - Non power of two buffer-sizes.
28 // - Tries to find sqrt(len)*2 unique keys, so that the merge sort
29 //   phase can form up to sqrt(len)*4 segments if enough keys are found.
30 // - The merge-sort phase can take advantage of external memory to
31 //   save some additional combination steps.
32 // - Combination phase: Blocks are selection sorted and merged in parallel.
33 // - The combination phase is performed alternating merge to left and merge
34 //   to right phases minimizing swaps due to internal buffer repositioning.
35 // - When merging blocks special optimizations are made to avoid moving some
36 //   elements twice.
37 //
38 // The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts
39 // from the sorting algorithm and implementing an additional block merge algorithm
40 // without moving elements to left or right.
41 //////////////////////////////////////////////////////////////////////////////
42 #ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
43 #define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
44 
45 #include <boost/move/detail/config_begin.hpp>
46 #include <boost/move/detail/reverse_iterator.hpp>
47 #include <boost/move/algo/move.hpp>
48 #include <boost/move/algo/detail/merge.hpp>
49 #include <boost/move/adl_move_swap.hpp>
50 #include <boost/move/algo/detail/insertion_sort.hpp>
51 #include <boost/move/algo/detail/merge_sort.hpp>
52 #include <boost/move/algo/detail/merge.hpp>
53 #include <boost/assert.hpp>
54 #include <boost/cstdint.hpp>
55 
56 #ifndef BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL
57    #define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 1
58 #endif
59 
60 #ifdef BOOST_MOVE_ADAPTIVE_SORT_STATS
61    #if BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL == 2
62       #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \
63          print_stats(STR, L)\
64       //
65 
66       #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) \
67          print_stats(STR, L)\
68       //
69    #else
70       #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \
71          print_stats(STR, L)\
72       //
73 
74       #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L)
75    #endif
76 #else
77    #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L)
78    #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L)
79 #endif
80 
81 #ifdef BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
82    #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT  BOOST_ASSERT
83 #else
84    #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L)
85 #endif
86 
87 
88 
89 namespace boost {
90 namespace movelib {
91 
92 namespace detail_adaptive {
93 
94 static const std::size_t AdaptiveSortInsertionSortThreshold = 16;
95 //static const std::size_t AdaptiveSortInsertionSortThreshold = 4;
96 BOOST_STATIC_ASSERT((AdaptiveSortInsertionSortThreshold&(AdaptiveSortInsertionSortThreshold-1)) == 0);
97 
98 #if defined BOOST_HAS_INTPTR_T
99    typedef ::boost::uintptr_t uintptr_t;
100 #else
101    typedef std::size_t uintptr_t;
102 #endif
103 
104 template<class T>
105 const T &min_value(const T &a, const T &b)
106 {
107    return a < b ? a : b;
108 }
109 
110 template<class T>
111 const T &max_value(const T &a, const T &b)
112 {
113    return a > b ? a : b;
114 }
115 
116 template<class ForwardIt, class Pred>
117 bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred)
118 {
119    if (first != last) {
120       ForwardIt next = first, cur(first);
121       while (++next != last) {
122          if (pred(*next, *cur))
123             return false;
124          cur = next;
125       }
126    }
127    return true;
128 }
129 
130 #if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS)
131 
132 bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less)
133 {
134    if (first != last) {
135       const order_perf_type *next = first, *cur(first);
136       while (++next != last) {
137          if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val)))
138             return false;
139          cur = next;
140       }
141    }
142    return true;
143 }
144 
145 #endif   //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
146 
147 template<class ForwardIt, class Pred>
148 bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred)
149 {
150    if (first != last) {
151       ForwardIt next = first;
152       while (++next != last) {
153          if (!pred(*first, *next))
154             return false;
155          first = next;
156       }
157    }
158    return true;
159 }
160 
161 template<class ForwardIt, class Pred, class V>
162 typename iterator_traits<ForwardIt>::size_type
163    count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v)
164 {
165    typedef typename iterator_traits<ForwardIt>::size_type size_type;
166    size_type count = 0;
167    while(first != last) {
168       count += static_cast<size_type>(0 != pred(*first, v));
169       ++first;
170    }
171    return count;
172 }
173 
174 template<class T, class RandRawIt = T*>
175 class adaptive_xbuf
176 {
177    adaptive_xbuf(const adaptive_xbuf &);
178    adaptive_xbuf & operator=(const adaptive_xbuf &);
179 
180    public:
181    typedef RandRawIt iterator;
182 
183    adaptive_xbuf()
184       : m_ptr(), m_size(0), m_capacity(0)
185    {}
186 
187    adaptive_xbuf(RandRawIt raw_memory, std::size_t capacity)
188       : m_ptr(raw_memory), m_size(0), m_capacity(capacity)
189    {}
190 
191    template<class RandIt>
192    void move_assign(RandIt first, std::size_t n)
193    {
194       if(n <= m_size){
195          boost::move(first, first+n, m_ptr);
196          std::size_t size = m_size;
197          while(size-- != n){
198             m_ptr[size].~T();
199          }
200          m_size = n;
201       }
202       else{
203          RandRawIt result = boost::move(first, first+m_size, m_ptr);
204          boost::uninitialized_move(first+m_size, first+n, result);
205          m_size = n;
206       }
207    }
208 
209    template<class RandIt>
210    void push_back(RandIt first, std::size_t n)
211    {
212       BOOST_ASSERT(m_capacity - m_size >= n);
213       boost::uninitialized_move(first, first+n, m_ptr+m_size);
214       m_size += n;
215    }
216 
217    template<class RandIt>
218    iterator add(RandIt it)
219    {
220       BOOST_ASSERT(m_size < m_capacity);
221       RandRawIt p_ret = m_ptr + m_size;
222       ::new(&*p_ret) T(::boost::move(*it));
223       ++m_size;
224       return p_ret;
225    }
226 
227    template<class RandIt>
228    void insert(iterator pos, RandIt it)
229    {
230       if(pos == (m_ptr + m_size)){
231          this->add(it);
232       }
233       else{
234          this->add(m_ptr+m_size-1);
235          //m_size updated
236          boost::move_backward(pos, m_ptr+m_size-2, m_ptr+m_size-1);
237          *pos = boost::move(*it);
238       }
239    }
240 
241    void set_size(std::size_t size)
242    {
243       m_size = size;
244    }
245 
246    void shrink_to_fit(std::size_t const size)
247    {
248       if(m_size > size){
249          for(std::size_t szt_i = size; szt_i != m_size; ++szt_i){
250             m_ptr[szt_i].~T();
251          }
252          m_size = size;
253       }
254    }
255 
256    void initialize_until(std::size_t const size, T &t)
257    {
258       BOOST_ASSERT(m_size < m_capacity);
259       if(m_size < size){
260          ::new((void*)&m_ptr[m_size]) T(::boost::move(t));
261          ++m_size;
262          for(; m_size != size; ++m_size){
263             ::new((void*)&m_ptr[m_size]) T(::boost::move(m_ptr[m_size-1]));
264          }
265          t = ::boost::move(m_ptr[m_size-1]);
266       }
267    }
268 
269    private:
270    template<class RIt>
271    static bool is_raw_ptr(RIt)
272    {
273       return false;
274    }
275 
276    static bool is_raw_ptr(T*)
277    {
278       return true;
279    }
280 
281    public:
282    template<class U>
283    bool supports_aligned_trailing(std::size_t size, std::size_t trail_count) const
284    {
285       if(this->is_raw_ptr(this->data()) && m_capacity){
286          uintptr_t u_addr_sz = uintptr_t(&*(this->data()+size));
287          uintptr_t u_addr_cp = uintptr_t(&*(this->data()+this->capacity()));
288          u_addr_sz = ((u_addr_sz + sizeof(U)-1)/sizeof(U))*sizeof(U);
289          return (u_addr_cp >= u_addr_sz) && ((u_addr_cp - u_addr_sz)/sizeof(U) >= trail_count);
290       }
291       return false;
292    }
293 
294    template<class U>
295    U *aligned_trailing() const
296    {
297       return this->aligned_trailing<U>(this->size());
298    }
299 
300    template<class U>
301    U *aligned_trailing(std::size_t pos) const
302    {
303       uintptr_t u_addr = uintptr_t(&*(this->data()+pos));
304       u_addr = ((u_addr + sizeof(U)-1)/sizeof(U))*sizeof(U);
305       return (U*)u_addr;
306    }
307 
308    ~adaptive_xbuf()
309    {
310       this->clear();
311    }
312 
313    std::size_t capacity() const
314    {  return m_capacity;   }
315 
316    iterator data() const
317    {  return m_ptr;   }
318 
319    iterator end() const
320    {  return m_ptr+m_size;   }
321 
322    std::size_t size() const
323    {  return m_size;   }
324 
325    bool empty() const
326    {  return !m_size;   }
327 
328    void clear()
329    {
330       this->shrink_to_fit(0u);
331    }
332 
333    private:
334    RandRawIt m_ptr;
335    std::size_t m_size;
336    std::size_t m_capacity;
337 };
338 
339 template<class Iterator, class Op>
340 class range_xbuf
341 {
342    range_xbuf(const range_xbuf &);
343    range_xbuf & operator=(const range_xbuf &);
344 
345    public:
346    typedef typename iterator_traits<Iterator>::size_type size_type;
347    typedef Iterator iterator;
348 
349    range_xbuf(Iterator first, Iterator last)
350       : m_first(first), m_last(first), m_cap(last)
351    {}
352 
353    template<class RandIt>
354    void move_assign(RandIt first, std::size_t n)
355    {
356       BOOST_ASSERT(size_type(n) <= size_type(m_cap-m_first));
357       m_last = Op()(forward_t(), first, first+n, m_first);
358    }
359 
360    ~range_xbuf()
361    {}
362 
363    std::size_t capacity() const
364    {  return m_cap-m_first;   }
365 
366    Iterator data() const
367    {  return m_first;   }
368 
369    Iterator end() const
370    {  return m_last;   }
371 
372    std::size_t size() const
373    {  return m_last-m_first;   }
374 
375    bool empty() const
376    {  return m_first == m_last;   }
377 
378    void clear()
379    {
380       m_last = m_first;
381    }
382 
383    template<class RandIt>
384    iterator add(RandIt it)
385    {
386       Iterator pos(m_last);
387       *pos = boost::move(*it);
388       ++m_last;
389       return pos;
390    }
391 
392    void set_size(std::size_t size)
393    {
394       m_last  = m_first;
395       m_last += size;
396    }
397 
398    private:
399    Iterator const m_first;
400    Iterator m_last;
401    Iterator const m_cap;
402 };
403 
404 
405 template<class RandIt, class Compare>
406 RandIt skip_until_merge
407    ( RandIt first1, RandIt const last1
408    , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp)
409 {
410    while(first1 != last1 && !comp(next_key, *first1)){
411       ++first1;
412    }
413    return first1;
414 }
415 
416 
417 template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
418 RandItB op_buffered_partial_merge_to_range1_and_buffer
419    ( RandIt1 first1, RandIt1 const last1
420    , RandIt2 &rfirst2, RandIt2 const last2
421    , RandItB &rfirstb, Compare comp, Op op )
422 {
423    RandItB firstb = rfirstb;
424    RandItB lastb  = firstb;
425    RandIt2 first2 = rfirst2;
426 
427    //Move to buffer while merging
428    //Three way moves need less moves when op is swap_op so use it
429    //when merging elements from range2 to the destination occupied by range1
430    if(first1 != last1 && first2 != last2){
431       op(three_way_t(), first2++, first1++, lastb++);
432 
433       while(true){
434          if(first1 == last1){
435             break;
436          }
437          if(first2 == last2){
438             lastb = op(forward_t(), first1, last1, firstb);
439             break;
440          }
441          if (comp(*first2, *firstb)) {
442             op(three_way_t(), first2++, first1++, lastb++);
443          }
444          else {
445             op(three_way_t(), firstb++, first1++, lastb++);
446          }
447       }
448       rfirst2 = first2;
449       rfirstb = firstb;
450    }
451 
452    return lastb;
453 }
454 
455 template<class RandItKeys, class RandIt>
456 void swap_and_update_key
457    ( RandItKeys const key_next
458    , RandItKeys const key_range2
459    , RandItKeys &key_mid
460    , RandIt const begin
461    , RandIt const end
462    , RandIt const with)
463 {
464    if(begin != with){
465       ::boost::adl_move_swap_ranges(begin, end, with);
466       ::boost::adl_move_swap(*key_next, *key_range2);
467       if(key_next == key_mid){
468          key_mid = key_range2;
469       }
470       else if(key_mid == key_range2){
471          key_mid = key_next;
472       }
473    }
474 }
475 
476 ///////////////////////////////////////////////////////////////////////////////
477 //
478 //                         MERGE BUFFERLESS
479 //
480 ///////////////////////////////////////////////////////////////////////////////
481 
482 // [first1, last1) merge [last1,last2) -> [first1,last2)
483 template<class RandIt, class Compare>
484 RandIt partial_merge_bufferless_impl
485    (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp)
486 {
487    if(last1 == last2){
488       return first1;
489    }
490    bool const is_range1_A = *pis_range1_A;
491    if(first1 != last1 && comp(*last1, last1[-1])){
492       do{
493          RandIt const old_last1 = last1;
494          last1  = boost::movelib::lower_bound(last1, last2, *first1, comp);
495          first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported
496          if(last1 == last2){
497             return first1;
498          }
499          do{
500             ++first1;
501          } while(last1 != first1 && !comp(*last1, *first1) );
502       } while(first1 != last1);
503    }
504    *pis_range1_A = !is_range1_A;
505    return last1;
506 }
507 
508 // [first1, last1) merge [last1,last2) -> [first1,last2)
509 template<class RandIt, class Compare>
510 RandIt partial_merge_bufferless
511    (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp)
512 {
513    return *pis_range1_A ? partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, comp)
514                         : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable<Compare>(comp));
515 }
516 
517 template<class SizeType>
518 static SizeType needed_keys_count(SizeType n_block_a, SizeType n_block_b)
519 {
520    return n_block_a + n_block_b;
521 }
522 
523 template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
524 typename iterator_traits<RandIt>::size_type
525    find_next_block
526       ( RandItKeys key_first
527       , KeyCompare key_comp
528       , RandIt const first
529       , typename iterator_traits<RandIt>::size_type const l_block
530       , typename iterator_traits<RandIt>::size_type const ix_first_block
531       , typename iterator_traits<RandIt>::size_type const ix_last_block
532       , Compare comp)
533 {
534    typedef typename iterator_traits<RandIt>::size_type      size_type;
535    typedef typename iterator_traits<RandIt>::value_type     value_type;
536    typedef typename iterator_traits<RandItKeys>::value_type key_type;
537    BOOST_ASSERT(ix_first_block <= ix_last_block);
538    size_type ix_min_block = 0u;
539    for (size_type szt_i = ix_first_block; szt_i < ix_last_block; ++szt_i) {
540       const value_type &min_val = first[ix_min_block*l_block];
541       const value_type &cur_val = first[szt_i*l_block];
542       const key_type   &min_key = key_first[ix_min_block];
543       const key_type   &cur_key = key_first[szt_i];
544 
545       bool const less_than_minimum = comp(cur_val, min_val) ||
546          (!comp(min_val, cur_val) && key_comp(cur_key, min_key));
547 
548       if (less_than_minimum) {
549          ix_min_block = szt_i;
550       }
551    }
552    return ix_min_block;
553 }
554 
555 template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
556 void merge_blocks_bufferless
557    ( RandItKeys key_first
558    , KeyCompare key_comp
559    , RandIt const first
560    , typename iterator_traits<RandIt>::size_type const l_block
561    , typename iterator_traits<RandIt>::size_type const l_irreg1
562    , typename iterator_traits<RandIt>::size_type const n_block_a
563    , typename iterator_traits<RandIt>::size_type const n_block_b
564    , typename iterator_traits<RandIt>::size_type const l_irreg2
565    , Compare comp)
566 {
567    typedef typename iterator_traits<RandIt>::size_type size_type;
568    size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
569    //BOOST_ASSERT(n_block_a || n_block_b);
570    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
571    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
572 
573    size_type n_bef_irreg2 = 0;
574    bool l_irreg_pos_count = true;
575    RandItKeys key_mid(key_first + n_block_a);
576    RandIt const first_irr2 = first + l_irreg1 + (n_block_a+n_block_b)*l_block;
577    RandIt const last_irr2  = first_irr2 + l_irreg2;
578 
579    {  //Selection sort blocks
580       size_type n_block_left = n_block_b + n_block_a;
581       RandItKeys key_range2(key_first);
582 
583       size_type min_check = n_block_a == n_block_left ? 0u : n_block_a;
584       size_type max_check = min_value(min_check+1, n_block_left);
585       for (RandIt f = first+l_irreg1; n_block_left; --n_block_left, ++key_range2, f += l_block, min_check -= min_check != 0, max_check -= max_check != 0) {
586          size_type const next_key_idx = find_next_block(key_range2, key_comp, f, l_block, min_check, max_check, comp);
587          RandItKeys const key_next(key_range2 + next_key_idx);
588          max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
589 
590          RandIt const first_min = f + next_key_idx*l_block;
591 
592          //Check if irregular b block should go here.
593          //If so, break to the special code handling the irregular block
594          if (l_irreg_pos_count && l_irreg2 && comp(*first_irr2, *first_min)){
595             l_irreg_pos_count = false;
596          }
597          n_bef_irreg2 += l_irreg_pos_count;
598 
599          swap_and_update_key(key_next, key_range2, key_mid, f, f + l_block, first_min);
600          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(f, f+l_block, comp));
601          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, first_min + l_block, comp));
602          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((f == (first+l_irreg1)) || !comp(*f, *(f-l_block)));
603       }
604    }
605    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first+l_irreg1+n_bef_irreg2*l_block, first_irr2, comp));
606 
607    RandIt first1 = first;
608    RandIt last1  = first+l_irreg1;
609    RandItKeys const key_end (key_first+n_bef_irreg2);
610    bool is_range1_A = true;
611 
612    for( ; key_first != key_end; ++key_first){
613       bool is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_first, *key_mid);
614       first1 = is_range1_A == is_range2_A
615          ? last1 : partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp);
616       last1 += l_block;
617       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp));
618    }
619 
620    merge_bufferless(is_range1_A ? first1 : last1, first_irr2, last_irr2, comp);
621    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp));
622 }
623 
624 ///////////////////////////////////////////////////////////////////////////////
625 //
626 //                            BUFFERED MERGE
627 //
628 ///////////////////////////////////////////////////////////////////////////////
629 template<class RandIt, class Compare, class Op, class Buf>
630 void op_buffered_merge
631       ( RandIt first, RandIt const middle, RandIt last
632       , Compare comp, Op op
633       , Buf &xbuf)
634 {
635    if(first != middle && middle != last && comp(*middle, middle[-1])){
636       typedef typename iterator_traits<RandIt>::size_type   size_type;
637       size_type const len1 = size_type(middle-first);
638       size_type const len2 = size_type(last-middle);
639       if(len1 <= len2){
640          first = boost::movelib::upper_bound(first, middle, *middle, comp);
641          xbuf.move_assign(first, size_type(middle-first));
642          op_merge_with_right_placed
643             (xbuf.data(), xbuf.end(), first, middle, last, comp, op);
644       }
645       else{
646          last = boost::movelib::lower_bound(middle, last, middle[-1], comp);
647          xbuf.move_assign(middle, size_type(last-middle));
648          op_merge_with_left_placed
649             (first, middle, last, xbuf.data(), xbuf.end(), comp, op);
650       }
651    }
652 }
653 
654 template<class RandIt, class Compare, class XBuf>
655 void buffered_merge
656       ( RandIt first, RandIt const middle, RandIt last
657       , Compare comp
658       , XBuf &xbuf)
659 {
660    op_buffered_merge(first, middle, last, comp, move_op(), xbuf);
661 }
662 
663 // Complexity: 2*distance(first, last)+max_collected^2/2
664 //
665 // Tries to collect at most n_keys unique elements from [first, last),
666 // in the begining of the range, and ordered according to comp
667 //
668 // Returns the number of collected keys
669 template<class RandIt, class Compare, class XBuf>
670 typename iterator_traits<RandIt>::size_type
671    collect_unique
672       ( RandIt const first, RandIt const last
673       , typename iterator_traits<RandIt>::size_type const max_collected, Compare comp
674       , XBuf & xbuf)
675 {
676    typedef typename iterator_traits<RandIt>::size_type size_type;
677    size_type h = 0;
678    if(max_collected){
679       ++h;  // first key is always here
680       RandIt h0 = first;
681       RandIt u = first; ++u;
682       RandIt search_end = u;
683 
684       if(xbuf.capacity() >= max_collected){
685          typename XBuf::iterator const ph0 = xbuf.add(first);
686          while(u != last && h < max_collected){
687             typename XBuf::iterator const r = boost::movelib::lower_bound(ph0, xbuf.end(), *u, comp);
688             //If key not found add it to [h, h+h0)
689             if(r == xbuf.end() || comp(*u, *r) ){
690                RandIt const new_h0 = boost::move(search_end, u, h0);
691                search_end = u;
692                ++search_end;
693                ++h;
694                xbuf.insert(r, u);
695                h0 = new_h0;
696             }
697             ++u;
698          }
699          boost::move_backward(first, h0, h0+h);
700          boost::move(xbuf.data(), xbuf.end(), first);
701       }
702       else{
703          while(u != last && h < max_collected){
704             RandIt const r = boost::movelib::lower_bound(h0, search_end, *u, comp);
705             //If key not found add it to [h, h+h0)
706             if(r == search_end || comp(*u, *r) ){
707                RandIt const new_h0 = rotate_gcd(h0, search_end, u);
708                search_end = u;
709                ++search_end;
710                ++h;
711                rotate_gcd(r+(new_h0-h0), u, search_end);
712                h0 = new_h0;
713             }
714             ++u;
715          }
716          rotate_gcd(first, h0, h0+h);
717       }
718    }
719    return h;
720 }
721 
722 template<class Unsigned>
723 Unsigned floor_sqrt(Unsigned const n)
724 {
725    Unsigned x = n;
726    Unsigned y = x/2 + (x&1);
727    while (y < x){
728       x = y;
729       y = (x + n / x)/2;
730    }
731    return x;
732 }
733 
734 template<class Unsigned>
735 Unsigned ceil_sqrt(Unsigned const n)
736 {
737    Unsigned r = floor_sqrt(n);
738    return r + Unsigned((n%r) != 0);
739 }
740 
741 template<class Unsigned>
742 Unsigned floor_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow)
743 {
744    Unsigned s = n;
745    Unsigned p = 0;
746    while(s > AdaptiveSortInsertionSortThreshold){
747       s /= 2;
748       ++p;
749    }
750    base = s;
751    pow = p;
752    return s << p;
753 }
754 
755 template<class Unsigned>
756 Unsigned ceil_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow)
757 {
758    Unsigned fm = floor_merge_multiple(n, base, pow);
759 
760    if(fm != n){
761       if(base < AdaptiveSortInsertionSortThreshold){
762          ++base;
763       }
764       else{
765          base = AdaptiveSortInsertionSortThreshold/2 + 1;
766          ++pow;
767       }
768    }
769    return base << pow;
770 }
771 
772 template<class Unsigned>
773 Unsigned ceil_sqrt_multiple(Unsigned const n, Unsigned *pbase = 0)
774 {
775    Unsigned const r = ceil_sqrt(n);
776    Unsigned pow = 0;
777    Unsigned base = 0;
778    Unsigned const res = ceil_merge_multiple(r, base, pow);
779    if(pbase) *pbase = base;
780    return res;
781 }
782 
783 struct less
784 {
785    template<class T>
786    bool operator()(const T &l, const T &r)
787    {  return l < r;  }
788 };
789 
790 ///////////////////////////////////////////////////////////////////////////////
791 //
792 //                            MERGE BLOCKS
793 //
794 ///////////////////////////////////////////////////////////////////////////////
795 
796 //#define ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
797 
798 #if defined ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
799 template<class RandIt, class Compare>
800 void slow_stable_sort
801    ( RandIt const first, RandIt const last, Compare comp)
802 {
803    boost::movelib::inplace_stable_sort(first, last, comp);
804 }
805 
806 #else //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
807 
808 template<class RandIt, class Compare>
809 void slow_stable_sort
810    ( RandIt const first, RandIt const last, Compare comp)
811 {
812    typedef typename iterator_traits<RandIt>::size_type size_type;
813    size_type L = size_type(last - first);
814    {  //Use insertion sort to merge first elements
815       size_type m = 0;
816       while((L - m) > size_type(AdaptiveSortInsertionSortThreshold)){
817          insertion_sort(first+m, first+m+size_type(AdaptiveSortInsertionSortThreshold), comp);
818          m += AdaptiveSortInsertionSortThreshold;
819       }
820       insertion_sort(first+m, last, comp);
821    }
822 
823    size_type h = AdaptiveSortInsertionSortThreshold;
824    for(bool do_merge = L > h; do_merge; h*=2){
825       do_merge = (L - h) > h;
826       size_type p0 = 0;
827       if(do_merge){
828          size_type const h_2 = 2*h;
829          while((L-p0) > h_2){
830             merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp);
831             p0 += h_2;
832          }
833       }
834       if((L-p0) > h){
835          merge_bufferless(first+p0, first+p0+h, last, comp);
836       }
837    }
838 }
839 
840 #endif   //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN
841 
842 //Returns new l_block and updates use_buf
843 template<class Unsigned>
844 Unsigned lblock_for_combine
845    (Unsigned const l_block, Unsigned const n_keys, Unsigned const l_data, bool &use_buf)
846 {
847    BOOST_ASSERT(l_data > 1);
848 
849    //We need to guarantee lblock >= l_merged/(n_keys/2) keys for the combination.
850    //We have at least 4 keys guaranteed (which are the minimum to merge 2 ranges)
851    //If l_block != 0, then n_keys is already enough to merge all blocks in all
852    //phases as we've found all needed keys for that buffer and length before.
853    //If l_block == 0 then see if half keys can be used as buffer and the rest
854    //as keys guaranteeing that n_keys >= (2*l_merged)/lblock =
855    if(!l_block){
856       //If l_block == 0 then n_keys is power of two
857       //(guaranteed by build_params(...))
858       BOOST_ASSERT(n_keys >= 4);
859       //BOOST_ASSERT(0 == (n_keys &(n_keys-1)));
860 
861       //See if half keys are at least 4 and if half keys fulfill
862       Unsigned const new_buf  = n_keys/2;
863       Unsigned const new_keys = n_keys-new_buf;
864       use_buf = new_keys >= 4 && new_keys >= l_data/new_buf;
865       if(use_buf){
866          return new_buf;
867       }
868       else{
869          return l_data/n_keys;
870       }
871    }
872    else{
873       use_buf = true;
874       return l_block;
875    }
876 }
877 
878 template<class RandIt, class Compare, class XBuf>
879 void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf)
880 {
881    typedef typename iterator_traits<RandIt>::size_type size_type;
882    size_type const len = size_type(last - first);
883    size_type const half_len = len/2 + (len&1);
884    if(std::size_t(xbuf.capacity() - xbuf.size()) >= half_len) {
885       merge_sort(first, last, comp, xbuf.data()+xbuf.size());
886    }
887    else{
888       slow_stable_sort(first, last, comp);
889    }
890 }
891 
892 template<class RandIt, class Comp, class XBuf>
893 void initialize_keys( RandIt first, RandIt last
894                     , Comp comp
895                     , XBuf & xbuf)
896 {
897    stable_sort(first, last, comp, xbuf);
898 }
899 
900 template<class RandIt, class U>
901 void initialize_keys( RandIt first, RandIt last
902                     , less
903                     , U &)
904 {
905    typedef typename iterator_traits<RandIt>::value_type value_type;
906    std::size_t count = std::size_t(last - first);
907    for(std::size_t i = 0; i != count; ++i){
908       *first = value_type(i);
909       ++first;
910    }
911 }
912 
913 template<class RandIt>
914 void move_data_backward( RandIt cur_pos
915               , typename iterator_traits<RandIt>::size_type const l_data
916               , RandIt new_pos
917               , bool const xbuf_used)
918 {
919    //Move buffer to the total combination right
920    if(xbuf_used){
921       boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
922    }
923    else{
924       boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data);
925       //Rotate does less moves but it seems slower due to cache issues
926       //rotate_gcd(first-l_block, first+len-l_block, first+len);
927    }
928 }
929 
930 template<class RandIt>
931 void move_data_forward( RandIt cur_pos
932               , typename iterator_traits<RandIt>::size_type const l_data
933               , RandIt new_pos
934               , bool const xbuf_used)
935 {
936    //Move buffer to the total combination right
937    if(xbuf_used){
938       boost::move(cur_pos, cur_pos+l_data, new_pos);
939    }
940    else{
941       boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos);
942       //Rotate does less moves but it seems slower due to cache issues
943       //rotate_gcd(first-l_block, first+len-l_block, first+len);
944    }
945 }
946 
947 template <class Unsigned>
948 Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merged, Unsigned *pl_irreg_combined = 0)
949 {
950    typedef Unsigned size_type;
951 
952    size_type const l_combined = 2*l_prev_merged;
953    size_type l_irreg_combined = len%l_combined;
954    size_type l_total_combined = len;
955    if(l_irreg_combined <= l_prev_merged){
956       l_total_combined -= l_irreg_combined;
957       l_irreg_combined = 0;
958    }
959    if(pl_irreg_combined)
960       *pl_irreg_combined = l_irreg_combined;
961    return l_total_combined;
962 }
963 
964 template<class RandItKeys, class KeyCompare, class SizeType, class XBuf>
965 void combine_params
966    ( RandItKeys const keys
967    , KeyCompare key_comp
968    , SizeType l_combined
969    , SizeType const l_prev_merged
970    , SizeType const l_block
971    , XBuf & xbuf
972    //Output
973    , SizeType &n_block_a
974    , SizeType &n_block_b
975    , SizeType &l_irreg1
976    , SizeType &l_irreg2
977    //Options
978    , bool do_initialize_keys = true)
979 {
980    typedef SizeType   size_type;
981 
982    //Initial parameters for selection sort blocks
983    l_irreg1 = l_prev_merged%l_block;
984    l_irreg2 = (l_combined-l_irreg1)%l_block;
985    BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0);
986    size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block;
987    n_block_a = l_prev_merged/l_block;
988    n_block_b = n_reg_block - n_block_a;
989    BOOST_ASSERT(n_reg_block>=n_block_a);
990 
991    //Key initialization
992    if (do_initialize_keys) {
993       initialize_keys(keys, keys + needed_keys_count(n_block_a, n_block_b), key_comp, xbuf);
994    }
995 }
996 
997 template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op>
998 RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer
999    ( RandIt1 first1, RandIt1 const last1
1000    , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min
1001    , RandItB &rfirstb, Compare comp, Op op )
1002 {
1003    RandItB firstb = rfirstb;
1004    RandItB lastb  = firstb;
1005    RandIt2 first2 = rfirst2;
1006 
1007    //Move to buffer while merging
1008    //Three way moves need less moves when op is swap_op so use it
1009    //when merging elements from range2 to the destination occupied by range1
1010    if(first1 != last1 && first2 != last2){
1011       RandIt2 first_min = rfirst_min;
1012       op(four_way_t(), first2++, first_min++, first1++, lastb++);
1013 
1014       while(first1 != last1){
1015          if(first2 == last2){
1016             lastb = op(forward_t(), first1, last1, firstb);
1017             break;
1018          }
1019 
1020          if(comp(*first_min, *firstb)){
1021             op( four_way_t(), first2++, first_min++, first1++, lastb++);
1022          }
1023          else{
1024             op(three_way_t(), firstb++, first1++, lastb++);
1025          }
1026       }
1027       rfirst2 = first2;
1028       rfirstb = firstb;
1029       rfirst_min = first_min;
1030    }
1031 
1032    return lastb;
1033 }
1034 
1035 //////////////////////////////////
1036 //
1037 //          partial_merge
1038 //
1039 //////////////////////////////////
1040 template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op>
1041 OutputIt op_partial_merge_impl
1042    (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op)
1043 {
1044    InputIt1 first1(r_first1);
1045    InputIt2 first2(r_first2);
1046    if(first2 != last2 && last1 != first1)
1047    while(1){
1048       if(comp(*first2, *first1)) {
1049          op(first2++, d_first++);
1050          if(first2 == last2){
1051             break;
1052          }
1053       }
1054       else{
1055          op(first1++, d_first++);
1056          if(first1 == last1){
1057             break;
1058          }
1059       }
1060    }
1061    r_first1 = first1;
1062    r_first2 = first2;
1063    return d_first;
1064 }
1065 
1066 template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op>
1067 OutputIt op_partial_merge
1068    (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op, bool is_stable)
1069 {
1070    return is_stable ? op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, comp, op)
1071                     : op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, antistable<Compare>(comp), op);
1072 }
1073 
1074 //////////////////////////////////
1075 //
1076 //    partial_merge_and_swap
1077 //
1078 //////////////////////////////////
1079 template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op>
1080 OutputIt op_partial_merge_and_swap_impl
1081    (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op)
1082 {
1083    InputIt1 first1(r_first1);
1084    InputIt2 first2(r_first2);
1085 
1086    if(first2 != last2 && last1 != first1) {
1087       InputIt2 first_min(r_first_min);
1088       bool non_empty_ranges = true;
1089       do{
1090          if(comp(*first_min, *first1)) {
1091             op(three_way_t(), first2++, first_min++, d_first++);
1092             non_empty_ranges = first2 != last2;
1093          }
1094          else{
1095             op(first1++, d_first++);
1096             non_empty_ranges = first1 != last1;
1097          }
1098       } while(non_empty_ranges);
1099       r_first_min = first_min;
1100       r_first1 = first1;
1101       r_first2 = first2;
1102    }
1103    return d_first;
1104 }
1105 
1106 template<class RandIt, class InputIt2, class OutputIt, class Compare, class Op>
1107 OutputIt op_partial_merge_and_swap
1108    (RandIt &r_first1, RandIt const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op, bool is_stable)
1109 {
1110    return is_stable ? op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, comp, op)
1111                     : op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, antistable<Compare>(comp), op);
1112 }
1113 
1114 template<class RandIt, class RandItBuf, class Compare, class Op>
1115 RandIt op_partial_merge_and_save_impl
1116    ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min
1117    , RandItBuf &buf_first1_in_out, RandItBuf &buf_last1_in_out
1118    , Compare comp, Op op
1119    )
1120 {
1121    RandItBuf buf_first1 = buf_first1_in_out;
1122    RandItBuf buf_last1  = buf_last1_in_out;
1123    RandIt first2(rfirst2);
1124 
1125    bool const do_swap = first2 != first_min;
1126    if(buf_first1 == buf_last1){
1127       //Skip any element that does not need to be moved
1128       RandIt new_first1 = skip_until_merge(first1, last1, *first_min, comp);
1129       buf_first1 += (new_first1-first1);
1130       first1 = new_first1;
1131       buf_last1  = do_swap ? op_buffered_partial_merge_and_swap_to_range1_and_buffer(first1, last1, first2, last2, first_min, buf_first1, comp, op)
1132                            : op_buffered_partial_merge_to_range1_and_buffer    (first1, last1, first2, last2, buf_first1, comp, op);
1133       first1 = last1;
1134    }
1135    else{
1136       BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1));
1137    }
1138 
1139    //Now merge from buffer
1140    first1 = do_swap ? op_partial_merge_and_swap_impl(buf_first1, buf_last1, first2, last2, first_min, first1, comp, op)
1141                     : op_partial_merge_impl    (buf_first1, buf_last1, first2, last2, first1, comp, op);
1142    buf_first1_in_out = buf_first1;
1143    buf_last1_in_out  = buf_last1;
1144    rfirst2 = first2;
1145    return first1;
1146 }
1147 
1148 template<class RandIt, class RandItBuf, class Compare, class Op>
1149 RandIt op_partial_merge_and_save
1150    ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min
1151    , RandItBuf &buf_first1_in_out
1152    , RandItBuf &buf_last1_in_out
1153    , Compare comp
1154    , Op op
1155    , bool is_stable)
1156 {
1157    return is_stable
1158       ? op_partial_merge_and_save_impl
1159          (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, comp, op)
1160       : op_partial_merge_and_save_impl
1161          (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, antistable<Compare>(comp), op)
1162       ;
1163 }
1164 
1165 
1166 
1167 template<class RandItKeys, class KeyCompare, class RandIt, class RandIt2, class OutputIt, class Compare, class Op>
1168 OutputIt op_merge_blocks_with_irreg
1169    ( RandItKeys key_first
1170    , RandItKeys key_mid
1171    , KeyCompare key_comp
1172    , RandIt first_reg
1173    , RandIt2 &first_irr
1174    , RandIt2 const last_irr
1175    , OutputIt dest
1176    , typename iterator_traits<RandIt>::size_type const l_block
1177    , typename iterator_traits<RandIt>::size_type n_block_left
1178    , typename iterator_traits<RandIt>::size_type min_check
1179    , typename iterator_traits<RandIt>::size_type max_check
1180    , Compare comp, bool const is_stable, Op op)
1181 {
1182    typedef typename iterator_traits<RandIt>::size_type size_type;
1183 
1184    for(; n_block_left; --n_block_left, ++key_first, min_check -= min_check != 0, max_check -= max_check != 0){
1185       size_type next_key_idx = find_next_block(key_first, key_comp, first_reg, l_block, min_check, max_check, comp);
1186       max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
1187       RandIt const last_reg  = first_reg + l_block;
1188       RandIt first_min = first_reg + next_key_idx*l_block;
1189       RandIt const last_min  = first_min + l_block; (void)last_min;
1190 
1191       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_reg, last_reg, comp));
1192       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!next_key_idx || is_sorted(first_min, last_min, comp));
1193       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((!next_key_idx || !comp(*first_reg, *first_min )));
1194 
1195       OutputIt orig_dest = dest; (void)orig_dest;
1196       dest = next_key_idx ? op_partial_merge_and_swap(first_irr, last_irr, first_reg, last_reg, first_min, dest, comp, op, is_stable)
1197                           : op_partial_merge         (first_irr, last_irr, first_reg, last_reg, dest, comp, op, is_stable);
1198       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp));
1199 
1200       if(first_reg == dest){
1201          dest = next_key_idx ? ::boost::adl_move_swap_ranges(first_min, last_min, first_reg)
1202                              : last_reg;
1203       }
1204       else{
1205          dest = next_key_idx ? op(three_way_forward_t(), first_reg, last_reg, first_min, dest)
1206                              : op(forward_t(), first_reg, last_reg, dest);
1207       }
1208 
1209       RandItKeys const key_next(key_first + next_key_idx);
1210       swap_and_update_key(key_next, key_first, key_mid, last_reg, last_reg, first_min);
1211 
1212       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp));
1213       first_reg = last_reg;
1214    }
1215    return dest;
1216 }
1217 
1218 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op>
1219 void op_merge_blocks_left
1220    ( RandItKeys const key_first
1221    , KeyCompare key_comp
1222    , RandIt const first
1223    , typename iterator_traits<RandIt>::size_type const l_block
1224    , typename iterator_traits<RandIt>::size_type const l_irreg1
1225    , typename iterator_traits<RandIt>::size_type const n_block_a
1226    , typename iterator_traits<RandIt>::size_type const n_block_b
1227    , typename iterator_traits<RandIt>::size_type const l_irreg2
1228    , Compare comp, Op op)
1229 {
1230    typedef typename iterator_traits<RandIt>::size_type size_type;
1231    size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
1232 //   BOOST_ASSERT(n_block_a || n_block_b);
1233    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
1234    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
1235 
1236    size_type n_block_b_left = n_block_b;
1237    size_type n_block_a_left = n_block_a;
1238    size_type n_block_left = n_block_b + n_block_a;
1239    RandItKeys key_mid(key_first + n_block_a);
1240 
1241    RandIt buffer = first - l_block;
1242    RandIt first1 = first;
1243    RandIt last1  = first1 + l_irreg1;
1244    RandIt first2 = last1;
1245    RandIt const irreg2 = first2 + n_block_left*l_block;
1246    bool is_range1_A = true;
1247 
1248    RandItKeys key_range2(key_first);
1249 
1250    ////////////////////////////////////////////////////////////////////////////
1251    //Process all regular blocks before the irregular B block
1252    ////////////////////////////////////////////////////////////////////////////
1253    size_type min_check = n_block_a == n_block_left ? 0u : n_block_a;
1254    size_type max_check = min_value(min_check+1, n_block_left);
1255    for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) {
1256       size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp);
1257       max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
1258       RandIt const first_min = first2 + next_key_idx*l_block;
1259       RandIt const last_min  = first_min + l_block; (void)last_min;
1260       RandIt const last2  = first2 + l_block;
1261 
1262       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first1, last1, comp));
1263       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp));
1264       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp));
1265 
1266       //Check if irregular b block should go here.
1267       //If so, break to the special code handling the irregular block
1268       if (!n_block_b_left &&
1269             ( (l_irreg2 && comp(*irreg2, *first_min)) || (!l_irreg2 && is_range1_A)) ){
1270          break;
1271       }
1272 
1273       RandItKeys const key_next(key_range2 + next_key_idx);
1274       bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid);
1275 
1276       bool const is_buffer_middle = last1 == buffer;
1277       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( ( is_buffer_middle && size_type(first2-buffer) == l_block && buffer == last1) ||
1278                                           (!is_buffer_middle && size_type(first1-buffer) == l_block && first2 == last1));
1279 
1280       if(is_range1_A == is_range2_A){
1281          BOOST_ASSERT((first1 == last1) || !comp(*first_min, last1[-1]));
1282          if(!is_buffer_middle){
1283             buffer = op(forward_t(), first1, last1, buffer);
1284          }
1285          swap_and_update_key(key_next, key_range2, key_mid, first2, last2, first_min);
1286          first1 = first2;
1287          last1  = last2;
1288       }
1289       else {
1290          RandIt unmerged;
1291          RandIt buf_beg;
1292          RandIt buf_end;
1293          if(is_buffer_middle){
1294             buf_end = buf_beg = first2 - (last1-first1);
1295             unmerged = op_partial_merge_and_save( first1, last1, first2, last2, first_min
1296                                                 , buf_beg, buf_end, comp, op, is_range1_A);
1297          }
1298          else{
1299             buf_beg = first1;
1300             buf_end = last1;
1301             unmerged = op_partial_merge_and_save
1302                (buffer, buffer+(last1-first1), first2, last2, first_min, buf_beg, buf_end, comp, op, is_range1_A);
1303          }
1304          (void)unmerged;
1305          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, unmerged, comp));
1306 
1307          swap_and_update_key( key_next, key_range2, key_mid, first2, last2
1308                             , last_min - size_type(last2 - first2));
1309 
1310          if(buf_beg != buf_end){  //range2 exhausted: is_buffer_middle for the next iteration
1311             first1 = buf_beg;
1312             last1  = buf_end;
1313             BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buf_end == (last2-l_block));
1314             buffer = last1;
1315          }
1316          else{ //range1 exhausted: !is_buffer_middle for the next iteration
1317             first1 = first2;
1318             last1  = last2;
1319             buffer = first2 - l_block;
1320             is_range1_A = is_range2_A;
1321          }
1322       }
1323       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left));
1324       is_range2_A ? --n_block_a_left : --n_block_b_left;
1325       first2 = last2;
1326    }
1327 
1328    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_range2 + n_block_left, key_comp, *key_mid));
1329    BOOST_ASSERT(!n_block_b_left);
1330 
1331    ////////////////////////////////////////////////////////////////////////////
1332    //Process remaining range 1 left before the irregular B block
1333    ////////////////////////////////////////////////////////////////////////////
1334    bool const is_buffer_middle = last1 == buffer;
1335    RandIt first_irr2 = irreg2;
1336    RandIt const last_irr2  = first_irr2 + l_irreg2;
1337    if(l_irreg2 && is_range1_A){
1338       if(is_buffer_middle){
1339          first1 = skip_until_merge(first1, last1, *first_irr2, comp);
1340          //Even if we copy backward, no overlapping occurs so use forward copy
1341          //that can be faster specially with trivial types
1342          RandIt const new_first1 = first2 - (last1 - first1);
1343          op(forward_t(), first1, last1, new_first1);
1344          first1 = new_first1;
1345          last1 = first2;
1346          buffer = first1 - l_block;
1347       }
1348       buffer = op_partial_merge_impl(first1, last1, first_irr2, last_irr2, buffer, comp, op);
1349       buffer = op(forward_t(), first1, last1, buffer);
1350    }
1351    else if(!is_buffer_middle){
1352       buffer = op(forward_t(), first1, last1, buffer);
1353    }
1354    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp));
1355 
1356    ////////////////////////////////////////////////////////////////////////////
1357    //Process irregular B block and remaining A blocks
1358    ////////////////////////////////////////////////////////////////////////////
1359    buffer = op_merge_blocks_with_irreg
1360       ( key_range2, key_mid, key_comp, first2, first_irr2, last_irr2
1361       , buffer, l_block, n_block_left, min_check, max_check, comp, false, op);
1362    buffer = op(forward_t(), first_irr2, last_irr2, buffer);(void)buffer;
1363    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp));
1364 }
1365 
1366 // first - first element to merge.
1367 // first[-l_block, 0) - buffer (if use_buf == true)
1368 // l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
1369 // keys - sequence of keys, in same order as blocks. key<midkey means stream A
1370 // n_bef_irreg2/n_aft_irreg2 are regular blocks
1371 // l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks
1372 // If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks).
1373 template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
1374 void merge_blocks_left
1375    ( RandItKeys const key_first
1376    , KeyCompare key_comp
1377    , RandIt const first
1378    , typename iterator_traits<RandIt>::size_type const l_block
1379    , typename iterator_traits<RandIt>::size_type const l_irreg1
1380    , typename iterator_traits<RandIt>::size_type const n_block_a
1381    , typename iterator_traits<RandIt>::size_type const n_block_b
1382    , typename iterator_traits<RandIt>::size_type const l_irreg2
1383    , Compare comp
1384    , bool const xbuf_used)
1385 {
1386    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + needed_keys_count(n_block_a, n_block_b), key_comp, key_first[n_block_a]));
1387    if(xbuf_used){
1388       op_merge_blocks_left
1389          (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op());
1390    }
1391    else{
1392       op_merge_blocks_left
1393          (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op());
1394    }
1395 }
1396 
1397 
1398 // first - first element to merge.
1399 // [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer
1400 // l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded
1401 // keys - sequence of keys, in same order as blocks. key<midkey means stream A
1402 // n_bef_irreg2/n_aft_irreg2 are regular blocks
1403 // l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks
1404 // If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks).
1405 template<class RandItKeys, class KeyCompare, class RandIt, class Compare>
1406 void merge_blocks_right
1407    ( RandItKeys const key_first
1408    , KeyCompare key_comp
1409    , RandIt const first
1410    , typename iterator_traits<RandIt>::size_type const l_block
1411    , typename iterator_traits<RandIt>::size_type const n_block_a
1412    , typename iterator_traits<RandIt>::size_type const n_block_b
1413    , typename iterator_traits<RandIt>::size_type const l_irreg2
1414    , Compare comp
1415    , bool const xbuf_used)
1416 {
1417    merge_blocks_left
1418       ( (make_reverse_iterator)(key_first + needed_keys_count(n_block_a, n_block_b))
1419       , inverse<KeyCompare>(key_comp)
1420       , (make_reverse_iterator)(first + ((n_block_a+n_block_b)*l_block+l_irreg2))
1421       , l_block
1422       , l_irreg2
1423       , n_block_b
1424       , n_block_a
1425       , 0
1426       , inverse<Compare>(comp), xbuf_used);
1427 }
1428 
1429 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op, class RandItBuf>
1430 void op_merge_blocks_with_buf
1431    ( RandItKeys key_first
1432    , KeyCompare key_comp
1433    , RandIt const first
1434    , typename iterator_traits<RandIt>::size_type const l_block
1435    , typename iterator_traits<RandIt>::size_type const l_irreg1
1436    , typename iterator_traits<RandIt>::size_type const n_block_a
1437    , typename iterator_traits<RandIt>::size_type const n_block_b
1438    , typename iterator_traits<RandIt>::size_type const l_irreg2
1439    , Compare comp
1440    , Op op
1441    , RandItBuf const buf_first)
1442 {
1443    typedef typename iterator_traits<RandIt>::size_type size_type;
1444    size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count;
1445    //BOOST_ASSERT(n_block_a || n_block_b);
1446    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp));
1447    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a]));
1448 
1449    size_type n_block_b_left = n_block_b;
1450    size_type n_block_a_left = n_block_a;
1451    size_type n_block_left = n_block_b + n_block_a;
1452    RandItKeys key_mid(key_first + n_block_a);
1453 
1454    RandItBuf buffer = buf_first;
1455    RandItBuf buffer_end = buffer;
1456    RandIt first1 = first;
1457    RandIt last1  = first1 + l_irreg1;
1458    RandIt first2 = last1;
1459    RandIt const first_irr2 = first2 + n_block_left*l_block;
1460    bool is_range1_A = true;
1461 
1462    RandItKeys key_range2(key_first);
1463 
1464    ////////////////////////////////////////////////////////////////////////////
1465    //Process all regular blocks before the irregular B block
1466    ////////////////////////////////////////////////////////////////////////////
1467    size_type min_check = n_block_a == n_block_left ? 0u : n_block_a;
1468    size_type max_check = min_value(min_check+1, n_block_left);
1469    for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) {
1470       size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp);
1471       max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left);
1472       RandIt       first_min = first2 + next_key_idx*l_block;
1473       RandIt const last_min  = first_min + l_block; (void)last_min;
1474       RandIt const last2  = first2 + l_block;
1475 
1476       bool const buffer_empty = buffer == buffer_end; (void)buffer_empty;
1477       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buffer_empty ? is_sorted(first1, last1, comp) : is_sorted(buffer, buffer_end, comp));
1478       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp));
1479       BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp));
1480 
1481       //Check if irregular b block should go here.
1482       //If so, break to the special code handling the irregular block
1483       if (!n_block_b_left &&
1484             ( (l_irreg2 && comp(*first_irr2, *first_min)) || (!l_irreg2 && is_range1_A)) ){
1485          break;
1486       }
1487 
1488       RandItKeys const key_next(key_range2 + next_key_idx);
1489       bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid);
1490 
1491       if(is_range1_A == is_range2_A){
1492          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((first1 == last1) || (buffer_empty ? !comp(*first_min, last1[-1]) : !comp(*first_min, buffer_end[-1])));
1493          //If buffered, put those elements in place
1494          RandIt res = op(forward_t(), buffer, buffer_end, first1);
1495          buffer    = buffer_end = buf_first;
1496          BOOST_ASSERT(buffer_empty || res == last1); (void)res;
1497          swap_and_update_key(key_next, key_range2, key_mid, first2, last2, first_min);
1498          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp));
1499          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp));
1500          first1 = first2;
1501          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp));
1502       }
1503       else {
1504          RandIt const unmerged = op_partial_merge_and_save(first1, last1, first2, last2, first_min, buffer, buffer_end, comp, op, is_range1_A);
1505          bool const is_range_1_empty = buffer == buffer_end;
1506          BOOST_ASSERT(is_range_1_empty || (buffer_end-buffer) == (last1+l_block-unmerged));
1507          if(is_range_1_empty){
1508             buffer    = buffer_end = buf_first;
1509             first_min = last_min - (last2 - first2);
1510          }
1511          else{
1512             first_min = last_min;
1513          }
1514          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!is_range_1_empty || (last_min-first_min) == (last2-unmerged));
1515          swap_and_update_key(key_next, key_range2, key_mid, first2, last2, first_min);
1516 
1517          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp));
1518          is_range1_A ^= is_range_1_empty;
1519          first1 = unmerged;
1520          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, unmerged, comp));
1521       }
1522       BOOST_ASSERT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left));
1523       is_range2_A ? --n_block_a_left : --n_block_b_left;
1524       last1 += l_block;
1525       first2 = last2;
1526    }
1527 
1528    RandIt res = op(forward_t(), buffer, buffer_end, first1); (void)res;
1529    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, res, comp));
1530 
1531    ////////////////////////////////////////////////////////////////////////////
1532    //Process irregular B block and remaining A blocks
1533    ////////////////////////////////////////////////////////////////////////////
1534    RandIt const last_irr2 = first_irr2 + l_irreg2;
1535    op(forward_t(), first_irr2, first_irr2+l_irreg2, buf_first);
1536    buffer = buf_first;
1537    buffer_end = buffer+l_irreg2;
1538 
1539    reverse_iterator<RandItBuf> rbuf_beg(buffer_end);
1540    RandIt dest = op_merge_blocks_with_irreg
1541       ((make_reverse_iterator)(key_first + n_block_b + n_block_a), (make_reverse_iterator)(key_mid), inverse<KeyCompare>(key_comp)
1542       , (make_reverse_iterator)(first_irr2), rbuf_beg
1543       , (make_reverse_iterator)(buffer), (make_reverse_iterator)(last_irr2)
1544       , l_block, n_block_left, 0, n_block_left
1545       , inverse<Compare>(comp), true, op).base();
1546    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(dest, last_irr2, comp));
1547 
1548    buffer_end = rbuf_beg.base();
1549    BOOST_ASSERT((dest-last1) == (buffer_end-buffer));
1550    op_merge_with_left_placed(is_range1_A ? first1 : last1, last1, dest, buffer, buffer_end, comp, op);
1551 
1552    BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp));
1553 }
1554 
1555 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class RandItBuf>
1556 void merge_blocks_with_buf
1557    ( RandItKeys key_first
1558    , KeyCompare key_comp
1559    , RandIt const first
1560    , typename iterator_traits<RandIt>::size_type const l_block
1561    , typename iterator_traits<RandIt>::size_type const l_irreg1
1562    , typename iterator_traits<RandIt>::size_type const n_block_a
1563    , typename iterator_traits<RandIt>::size_type const n_block_b
1564    , typename iterator_traits<RandIt>::size_type const l_irreg2
1565    , Compare comp
1566    , RandItBuf const buf_first
1567    , bool const xbuf_used)
1568 {
1569    if(xbuf_used){
1570       op_merge_blocks_with_buf
1571          (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), buf_first);
1572    }
1573    else{
1574       op_merge_blocks_with_buf
1575          (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), buf_first);
1576    }
1577 }
1578 
1579 template<class RandIt, class Compare, class Op>
1580 typename iterator_traits<RandIt>::size_type
1581    op_insertion_sort_step_left
1582       ( RandIt const first
1583       , typename iterator_traits<RandIt>::size_type const length
1584       , typename iterator_traits<RandIt>::size_type const step
1585       , Compare comp, Op op)
1586 {
1587    typedef typename iterator_traits<RandIt>::size_type size_type;
1588    size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold);
1589    size_type m = 0;
1590 
1591    while((length - m) > s){
1592       insertion_sort_op(first+m, first+m+s, first+m-s, comp, op);
1593       m += s;
1594    }
1595    insertion_sort_op(first+m, first+length, first+m-s, comp, op);
1596    return s;
1597 }
1598 
1599 template<class RandIt, class Compare>
1600 typename iterator_traits<RandIt>::size_type
1601    insertion_sort_step
1602       ( RandIt const first
1603       , typename iterator_traits<RandIt>::size_type const length
1604       , typename iterator_traits<RandIt>::size_type const step
1605       , Compare comp)
1606 {
1607    typedef typename iterator_traits<RandIt>::size_type size_type;
1608    size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold);
1609    size_type m = 0;
1610 
1611    while((length - m) > s){
1612       insertion_sort(first+m, first+m+s, comp);
1613       m += s;
1614    }
1615    insertion_sort(first+m, first+length, comp);
1616    return s;
1617 }
1618 
1619 template<class RandIt, class Compare, class Op>
1620 typename iterator_traits<RandIt>::size_type
1621    op_merge_left_step_multiple
1622       ( RandIt first_block
1623       , typename iterator_traits<RandIt>::size_type const elements_in_blocks
1624       , typename iterator_traits<RandIt>::size_type l_merged
1625       , typename iterator_traits<RandIt>::size_type const l_build_buf
1626       , typename iterator_traits<RandIt>::size_type l_left_space
1627       , Compare comp
1628       , Op op)
1629 {
1630    typedef typename iterator_traits<RandIt>::size_type size_type;
1631    for(; l_merged < l_build_buf && l_left_space >= l_merged; l_merged*=2){
1632       size_type p0=0;
1633       RandIt pos = first_block;
1634       while((elements_in_blocks - p0) > 2*l_merged) {
1635          op_merge_left(pos-l_merged, pos, pos+l_merged, pos+2*l_merged, comp, op);
1636          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos+l_merged, comp));
1637          p0 += 2*l_merged;
1638          pos = first_block+p0;
1639       }
1640       if((elements_in_blocks-p0) > l_merged) {
1641          op_merge_left(pos-l_merged, pos, pos+l_merged, first_block+elements_in_blocks, comp, op);
1642          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos-l_merged+(first_block+elements_in_blocks-pos), comp));
1643       }
1644       else {
1645          op(forward_t(), pos, first_block+elements_in_blocks, pos-l_merged);
1646          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, first_block+elements_in_blocks-l_merged, comp));
1647       }
1648       first_block -= l_merged;
1649       l_left_space -= l_merged;
1650    }
1651    return l_merged;
1652 }
1653 
1654 template<class RandIt, class Compare, class Op>
1655 void op_merge_right_step_once
1656       ( RandIt first_block
1657       , typename iterator_traits<RandIt>::size_type const elements_in_blocks
1658       , typename iterator_traits<RandIt>::size_type const l_build_buf
1659       , Compare comp
1660       , Op op)
1661 {
1662    typedef typename iterator_traits<RandIt>::size_type size_type;
1663    size_type restk = elements_in_blocks%(2*l_build_buf);
1664    size_type p = elements_in_blocks - restk;
1665    BOOST_ASSERT(0 == (p%(2*l_build_buf)));
1666 
1667    if(restk <= l_build_buf){
1668       op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf);
1669    }
1670    else{
1671       op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op);
1672    }
1673    while(p>0){
1674       p -= 2*l_build_buf;
1675       op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op);
1676    }
1677 }
1678 
1679 
1680 // build blocks of length 2*l_build_buf. l_build_buf is power of two
1681 // input: [0, l_build_buf) elements are buffer, rest unsorted elements
1682 // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted
1683 //
1684 // First elements are merged from right to left until elements start
1685 // at first. All old elements [first, first + l_build_buf) are placed at the end
1686 // [first+len-l_build_buf, first+len). To achieve this:
1687 // - If we have external memory to merge, we save elements from the buffer
1688 //   so that a non-swapping merge is used. Buffer elements are restored
1689 //   at the end of the buffer from the external memory.
1690 //
1691 // - When the external memory is not available or it is insufficient
1692 //   for a merge operation, left swap merging is used.
1693 //
1694 // Once elements are merged left to right in blocks of l_build_buf, then a single left
1695 // to right merge step is performed to achieve merged blocks of size 2K.
1696 // If external memory is available, usual merge is used, swap merging otherwise.
1697 //
1698 // As a last step, if auxiliary memory is available in-place merge is performed.
1699 // until all is merged or auxiliary memory is not large enough.
1700 template<class RandIt, class Compare, class XBuf>
1701 typename iterator_traits<RandIt>::size_type
1702    adaptive_sort_build_blocks
1703       ( RandIt const first
1704       , typename iterator_traits<RandIt>::size_type const len
1705       , typename iterator_traits<RandIt>::size_type const l_base
1706       , typename iterator_traits<RandIt>::size_type const l_build_buf
1707       , XBuf & xbuf
1708       , Compare comp)
1709 {
1710    typedef typename iterator_traits<RandIt>::size_type  size_type;
1711    BOOST_ASSERT(l_build_buf <= len);
1712    BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1)));
1713 
1714    //Place the start pointer after the buffer
1715    RandIt first_block = first + l_build_buf;
1716    size_type const elements_in_blocks = len - l_build_buf;
1717 
1718    //////////////////////////////////
1719    // Start of merge to left step
1720    //////////////////////////////////
1721    size_type l_merged = 0u;
1722 
1723    BOOST_ASSERT(l_build_buf);
1724    //If there is no enough buffer for the insertion sort step, just avoid the external buffer
1725    size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity()));
1726    kbuf = kbuf < l_base ? 0 : kbuf;
1727 
1728    if(kbuf){
1729       //Backup internal buffer values in external buffer so they can be overwritten
1730       xbuf.move_assign(first+l_build_buf-kbuf, kbuf);
1731       l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op());
1732 
1733       //Now combine them using the buffer. Elements from buffer can be
1734       //overwritten since they've been saved to xbuf
1735       l_merged = op_merge_left_step_multiple
1736          ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op());
1737 
1738       //Restore internal buffer from external buffer unless kbuf was l_build_buf,
1739       //in that case restoration will happen later
1740       if(kbuf != l_build_buf){
1741          boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks);
1742       }
1743    }
1744    else{
1745       l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp);
1746       rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks);
1747    }
1748 
1749    //Now combine elements using the buffer. Elements from buffer can't be
1750    //overwritten since xbuf was not big enough, so merge swapping elements.
1751    l_merged = op_merge_left_step_multiple
1752       (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op());
1753 
1754    BOOST_ASSERT(l_merged == l_build_buf);
1755 
1756    //////////////////////////////////
1757    // Start of merge to right step
1758    //////////////////////////////////
1759 
1760    //If kbuf is l_build_buf then we can merge right without swapping
1761    //Saved data is still in xbuf
1762    if(kbuf && kbuf == l_build_buf){
1763       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op());
1764       //Restore internal buffer from external buffer if kbuf was l_build_buf.
1765       //as this operation was previously delayed.
1766       boost::move(xbuf.data(), xbuf.data() + kbuf, first);
1767    }
1768    else{
1769       op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op());
1770    }
1771    xbuf.clear();
1772    //2*l_build_buf or total already merged
1773    return min_value(elements_in_blocks, 2*l_build_buf);
1774 }
1775 
1776 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf>
1777 void adaptive_sort_combine_blocks
1778    ( RandItKeys const keys
1779    , KeyCompare key_comp
1780    , RandIt const first
1781    , typename iterator_traits<RandIt>::size_type const len
1782    , typename iterator_traits<RandIt>::size_type const l_prev_merged
1783    , typename iterator_traits<RandIt>::size_type const l_block
1784    , bool const use_buf
1785    , bool const xbuf_used
1786    , XBuf & xbuf
1787    , Compare comp
1788    , bool merge_left)
1789 {
1790    (void)xbuf;
1791    typedef typename iterator_traits<RandIt>::size_type   size_type;
1792 
1793    size_type const l_reg_combined   = 2*l_prev_merged;
1794    size_type l_irreg_combined = 0;
1795    size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined);
1796    size_type const n_reg_combined = len/l_reg_combined;
1797    RandIt combined_first = first;
1798 
1799    (void)l_total_combined;
1800    BOOST_ASSERT(l_total_combined <= len);
1801 
1802    size_type const max_i = n_reg_combined + (l_irreg_combined != 0);
1803 
1804    if(merge_left || !use_buf) {
1805       for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) {
1806          //Now merge blocks
1807          bool const is_last = combined_i==n_reg_combined;
1808          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
1809 
1810          range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first);
1811          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
1812          combine_params( keys, key_comp, l_cur_combined
1813                         , l_prev_merged, l_block, rbuf
1814                         , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
1815          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
1816          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
1817             BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
1818          if(!use_buf){
1819             merge_blocks_bufferless
1820                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp);
1821          }
1822          else{
1823             merge_blocks_left
1824                (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
1825          }
1826          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_L: ", len + l_block);
1827       }
1828    }
1829    else{
1830       combined_first += l_reg_combined*(max_i-1);
1831       for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) {
1832          bool const is_last = combined_i==n_reg_combined;
1833          size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined;
1834 
1835          RandIt const combined_last(combined_first+l_cur_combined);
1836          range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last);
1837          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
1838          combine_params( keys, key_comp, l_cur_combined
1839                         , l_prev_merged, l_block, rbuf
1840                         , n_block_a, n_block_b, l_irreg1, l_irreg2);  //Outputs
1841          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combpar:            ", len + l_block);
1842          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp));
1843          BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp));
1844          merge_blocks_right
1845             (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used);
1846          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After merge_blocks_R: ", len + l_block);
1847       }
1848    }
1849 }
1850 
1851 //Returns true if buffer is placed in
1852 //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is
1853 //[buffer,buffer+l_intbuf)
1854 template<class RandIt, class Compare, class XBuf>
1855 bool adaptive_sort_combine_all_blocks
1856    ( RandIt keys
1857    , typename iterator_traits<RandIt>::size_type &n_keys
1858    , RandIt const buffer
1859    , typename iterator_traits<RandIt>::size_type const l_buf_plus_data
1860    , typename iterator_traits<RandIt>::size_type l_merged
1861    , typename iterator_traits<RandIt>::size_type &l_intbuf
1862    , XBuf & xbuf
1863    , Compare comp)
1864 {
1865    typedef typename iterator_traits<RandIt>::size_type  size_type;
1866    RandIt const first = buffer + l_intbuf;
1867    size_type const l_data = l_buf_plus_data - l_intbuf;
1868    size_type const l_unique = l_intbuf+n_keys;
1869    //Backup data to external buffer once if possible
1870    bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity();
1871    if(common_xbuf){
1872       xbuf.move_assign(buffer, l_intbuf);
1873    }
1874 
1875    bool prev_merge_left = true;
1876    size_type l_prev_total_combined = l_merged, l_prev_block = 0;
1877    bool prev_use_internal_buf = true;
1878 
1879    for( size_type n = 0; l_data > l_merged
1880       ; l_merged*=2
1881       , ++n){
1882       //If l_intbuf is non-zero, use that internal buffer.
1883       //    Implies l_block == l_intbuf && use_internal_buf == true
1884       //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer,
1885       //    Implies l_block == n_keys/2 && use_internal_buf == true
1886       //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false)
1887       bool use_internal_buf = false;
1888       size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf);
1889       BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf));
1890       BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) );
1891       BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) );
1892 
1893       bool const is_merge_left = (n&1) == 0;
1894       size_type const l_total_combined = calculate_total_combined(l_data, l_merged);
1895       if(n && prev_use_internal_buf && prev_merge_left){
1896          if(is_merge_left || !use_internal_buf){
1897             move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf);
1898          }
1899          else{
1900             //Put the buffer just after l_total_combined
1901             RandIt const buf_end = first+l_prev_total_combined;
1902             RandIt const buf_beg = buf_end-l_block;
1903             if(l_prev_total_combined > l_total_combined){
1904                size_type const l_diff = l_prev_total_combined - l_total_combined;
1905                move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf);
1906             }
1907             else if(l_prev_total_combined < l_total_combined){
1908                size_type const l_diff = l_total_combined - l_prev_total_combined;
1909                move_data_forward(buf_end, l_diff, buf_beg, common_xbuf);
1910             }
1911          }
1912          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   After move_data     : ", l_data + l_intbuf);
1913       }
1914 
1915       //Combine to form l_merged*2 segments
1916       if(n_keys){
1917          adaptive_sort_combine_blocks
1918             ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block
1919             , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
1920       }
1921       else{
1922          size_type *const uint_keys = xbuf.template aligned_trailing<size_type>();
1923          adaptive_sort_combine_blocks
1924             ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block
1925             , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left);
1926       }
1927 
1928       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? "   After comb blocks L:  " : "   After comb blocks R:  ", l_data + l_intbuf);
1929       prev_merge_left = is_merge_left;
1930       l_prev_total_combined = l_total_combined;
1931       l_prev_block = l_block;
1932       prev_use_internal_buf = use_internal_buf;
1933    }
1934    BOOST_ASSERT(l_prev_total_combined == l_data);
1935    bool const buffer_right = prev_use_internal_buf && prev_merge_left;
1936 
1937    l_intbuf = prev_use_internal_buf ? l_prev_block : 0u;
1938    n_keys = l_unique - l_intbuf;
1939    //Restore data from to external common buffer if used
1940    if(common_xbuf){
1941       if(buffer_right){
1942          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data);
1943       }
1944       else{
1945          boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer);
1946       }
1947    }
1948    return buffer_right;
1949 }
1950 
1951 template<class RandIt, class Compare, class XBuf>
1952 void stable_merge
1953       ( RandIt first, RandIt const middle, RandIt last
1954       , Compare comp
1955       , XBuf &xbuf)
1956 {
1957    BOOST_ASSERT(xbuf.empty());
1958    typedef typename iterator_traits<RandIt>::size_type   size_type;
1959    size_type const len1  = size_type(middle-first);
1960    size_type const len2  = size_type(last-middle);
1961    size_type const l_min = min_value(len1, len2);
1962    if(xbuf.capacity() >= l_min){
1963       buffered_merge(first, middle, last, comp, xbuf);
1964       xbuf.clear();
1965    }
1966    else{
1967       merge_bufferless(first, middle, last, comp);
1968    }
1969 }
1970 
1971 
1972 template<class RandIt, class Compare, class XBuf>
1973 void adaptive_sort_final_merge( bool buffer_right
1974                               , RandIt const first
1975                               , typename iterator_traits<RandIt>::size_type const l_intbuf
1976                               , typename iterator_traits<RandIt>::size_type const n_keys
1977                               , typename iterator_traits<RandIt>::size_type const len
1978                               , XBuf & xbuf
1979                               , Compare comp)
1980 {
1981    //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf);
1982    xbuf.clear();
1983 
1984    typedef typename iterator_traits<RandIt>::size_type  size_type;
1985    size_type const n_key_plus_buf = l_intbuf+n_keys;
1986    if(buffer_right){
1987       stable_sort(first+len-l_intbuf, first+len, comp, xbuf);
1988       stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf);
1989       stable_sort(first, first+n_keys, comp, xbuf);
1990       stable_merge(first, first+n_keys, first+len, comp, xbuf);
1991    }
1992    else{
1993       stable_sort(first, first+n_key_plus_buf, comp, xbuf);
1994       if(xbuf.capacity() >= n_key_plus_buf){
1995          buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf);
1996       }
1997       else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){
1998          stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf);
1999          stable_merge(first, first+n_keys, first+len, comp, xbuf);
2000       }
2001       else{
2002          merge_bufferless(first, first+n_key_plus_buf, first+len, comp);
2003       }
2004    }
2005    BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After final_merge   : ", len);
2006 }
2007 
2008 template<class RandIt, class Compare, class Unsigned, class XBuf>
2009 bool adaptive_sort_build_params
2010    (RandIt first, Unsigned const len, Compare comp
2011    , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf
2012    , XBuf & xbuf
2013    )
2014 {
2015    typedef Unsigned size_type;
2016 
2017    //Calculate ideal parameters and try to collect needed unique keys
2018    l_base = 0u;
2019 
2020    //Try to find a value near sqrt(len) that is 2^N*l_base where
2021    //l_base <= AdaptiveSortInsertionSortThreshold. This property is important
2022    //as build_blocks merges to the left iteratively duplicating the
2023    //merged size and all the buffer must be used just before the final
2024    //merge to right step. This guarantees "build_blocks" produces
2025    //segments of size l_build_buf*2, maximizing the classic merge phase.
2026    l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base));
2027 
2028    //The internal buffer can be expanded if there is enough external memory
2029    while(xbuf.capacity() >= l_intbuf*2){
2030       l_intbuf *= 2;
2031    }
2032 
2033    //This is the minimum number of keys to implement the ideal algorithm
2034    //
2035    //l_intbuf is used as buffer plus the key count
2036    size_type n_min_ideal_keys = l_intbuf-1;
2037    while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){
2038       --n_min_ideal_keys;
2039    }
2040    n_min_ideal_keys += 1;
2041    BOOST_ASSERT(n_min_ideal_keys <= l_intbuf);
2042 
2043    if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){
2044       n_keys = 0u;
2045       l_build_buf = l_intbuf;
2046    }
2047    else{
2048       //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that
2049       //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half
2050       //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin.
2051       //
2052       //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed,
2053       //(to be used for keys in combine_all_blocks) as the whole l_build_buf
2054       //will be backuped in the buffer during build_blocks.
2055       bool const non_unique_buf = xbuf.capacity() >= l_intbuf;
2056       size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2;
2057       size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf);
2058 
2059       //If available memory is 2*sqrt(l), then for "build_params"
2060       //the situation is the same as if 2*l_intbuf were collected.
2061       if(non_unique_buf && collected == n_min_ideal_keys){
2062          l_build_buf = l_intbuf;
2063          n_keys = n_min_ideal_keys;
2064       }
2065       else if(collected == 2*l_intbuf){
2066          //l_intbuf*2 elements found. Use all of them in the build phase
2067          l_build_buf = l_intbuf*2;
2068          n_keys = l_intbuf;
2069       }
2070       else if(collected == (n_min_ideal_keys+l_intbuf)){
2071          l_build_buf = l_intbuf;
2072          n_keys = n_min_ideal_keys;
2073       }
2074       //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix
2075       //is possible (due to very low unique keys), then go to a slow sort based on rotations.
2076       else{
2077          BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf));
2078          if(collected < 4){  //No combination possible with less that 4 keys
2079             return false;
2080          }
2081          n_keys = l_intbuf;
2082          while(n_keys&(n_keys-1)){
2083             n_keys &= n_keys-1;  // make it power or 2
2084          }
2085          while(n_keys > collected){
2086             n_keys/=2;
2087          }
2088          //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two
2089          l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold);
2090          l_intbuf = 0;
2091          l_build_buf = n_keys;
2092       }
2093       BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf);
2094    }
2095 
2096    return true;
2097 }
2098 
2099 template<class RandIt, class Compare, class XBuf>
2100 inline void adaptive_merge_combine_blocks( RandIt first
2101                                       , typename iterator_traits<RandIt>::size_type len1
2102                                       , typename iterator_traits<RandIt>::size_type len2
2103                                       , typename iterator_traits<RandIt>::size_type collected
2104                                       , typename iterator_traits<RandIt>::size_type n_keys
2105                                       , typename iterator_traits<RandIt>::size_type l_block
2106                                       , bool use_internal_buf
2107                                       , bool xbuf_used
2108                                       , Compare comp
2109                                       , XBuf & xbuf
2110                                       )
2111 {
2112    typedef typename iterator_traits<RandIt>::size_type size_type;
2113    size_type const len = len1+len2;
2114    size_type const l_combine  = len-collected;
2115    size_type const l_combine1 = len1-collected;
2116 
2117     if(n_keys){
2118       RandIt const first_data = first+collected;
2119       RandIt const keys = first;
2120       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
2121       if(xbuf_used){
2122          if(xbuf.size() < l_block){
2123             xbuf.initialize_until(l_block, *first);
2124          }
2125          BOOST_ASSERT(xbuf.size() >= l_block);
2126          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
2127          combine_params( keys, comp, l_combine
2128                            , l_combine1, l_block, xbuf
2129                            , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
2130          merge_blocks_with_buf
2131             (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), xbuf_used);
2132          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg xbf: ", len);
2133       }
2134       else{
2135          size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
2136          combine_params( keys, comp, l_combine
2137                            , l_combine1, l_block, xbuf
2138                            , n_block_a, n_block_b, l_irreg1, l_irreg2);   //Outputs
2139          if(use_internal_buf){
2140             merge_blocks_with_buf
2141                (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, first_data-l_block, xbuf_used);
2142             BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A mrg buf: ", len);
2143          }
2144          else{
2145             merge_blocks_bufferless
2146                (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp);
2147             BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg nbf: ", len);
2148          }
2149       }
2150    }
2151    else{
2152       xbuf.shrink_to_fit(l_block);
2153       if(xbuf.size() < l_block){
2154          xbuf.initialize_until(l_block, *first);
2155       }
2156       size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block);
2157       size_type n_block_a, n_block_b, l_irreg1, l_irreg2;
2158       combine_params( uint_keys, less(), l_combine
2159                      , l_combine1, l_block, xbuf
2160                      , n_block_a, n_block_b, l_irreg1, l_irreg2, true);   //Outputs
2161       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A combine: ", len);
2162       BOOST_ASSERT(xbuf.size() >= l_block);
2163       merge_blocks_with_buf
2164          (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), true);
2165       xbuf.clear();
2166       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A mrg buf: ", len);
2167    }
2168 }
2169 
2170 template<class RandIt, class Compare, class XBuf>
2171 inline void adaptive_merge_final_merge( RandIt first
2172                                       , typename iterator_traits<RandIt>::size_type len1
2173                                       , typename iterator_traits<RandIt>::size_type len2
2174                                       , typename iterator_traits<RandIt>::size_type collected
2175                                       , typename iterator_traits<RandIt>::size_type l_intbuf
2176                                       , typename iterator_traits<RandIt>::size_type l_block
2177                                       , bool use_internal_buf
2178                                       , bool xbuf_used
2179                                       , Compare comp
2180                                       , XBuf & xbuf
2181                                       )
2182 {
2183    typedef typename iterator_traits<RandIt>::size_type size_type;
2184    (void)l_block;
2185    size_type n_keys = collected-l_intbuf;
2186    size_type len = len1+len2;
2187    if(use_internal_buf){
2188       if(xbuf_used){
2189          xbuf.clear();
2190          //Nothing to do
2191          if(n_keys){
2192             stable_sort(first, first+n_keys, comp, xbuf);
2193             stable_merge(first, first+n_keys, first+len, comp, xbuf);
2194             BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A key mrg: ", len);
2195          }
2196       }
2197       else{
2198          xbuf.clear();
2199          stable_sort(first, first+collected, comp, xbuf);
2200          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A k/b srt: ", len);
2201          stable_merge(first, first+collected, first+len, comp, xbuf);
2202          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A k/b mrg: ", len);
2203       }
2204    }
2205    else{
2206       xbuf.clear();
2207       stable_sort(first, first+collected, comp, xbuf);
2208       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A k/b srt: ", len);
2209       stable_merge(first, first+collected, first+len1+len2, comp, xbuf);
2210       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2("   A k/b mrg: ", len);
2211    }
2212    BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   A fin mrg: ", len);
2213 }
2214 
2215 template<class SizeType, class Xbuf>
2216 inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout)
2217 {
2218    typedef SizeType size_type;
2219    size_type l_block = rl_block;
2220    size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block;
2221 
2222    while(xbuf.capacity() >= l_block*2){
2223       l_block *= 2;
2224    }
2225 
2226    //This is the minimum number of keys to implement the ideal algorithm
2227    size_type n_keys = len1/l_block+len2/l_block;
2228    while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){
2229       --n_keys;
2230    }
2231    ++n_keys;
2232    BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block));
2233 
2234    if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){
2235       n_keys = 0u;
2236    }
2237    l_intbuf_inout = l_intbuf;
2238    rl_block = l_block;
2239    return n_keys;
2240 }
2241 
2242 ///////////////////////////////////////////////////////////////////////////////////////////
2243 ///////////////////////////////////////////////////////////////////////////////////////////
2244 ///////////////////////////////////////////////////////////////////////////////////////////
2245 ///////////////////////////////////////////////////////////////////////////////////////////
2246 ///////////////////////////////////////////////////////////////////////////////////////////
2247 ///////////////////////////////////////////////////////////////////////////////////////////
2248 ///////////////////////////////////////////////////////////////////////////////////////////
2249 
2250 // Main explanation of the sort algorithm.
2251 //
2252 // csqrtlen = ceil(sqrt(len));
2253 //
2254 // * First, 2*csqrtlen unique elements elements are extracted from elements to be
2255 //   sorted and placed in the beginning of the range.
2256 //
2257 // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements
2258 //   will be used as auxiliary memory, so trailing len-2*csqrtlen elements are
2259 //   are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step
2260 //   2*csqrtlen unique elements are again the leading elements of the whole range.
2261 //
2262 // * Step "combine_blocks": pairs of previously formed blocks are merged with a different
2263 //   ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the
2264 //   "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen
2265 //   elements, etc) of until all trailing (len-2*csqrtlen) elements are merged.
2266 //
2267 //   In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to
2268 //   know if elements belong to the first or second block to be merged and another
2269 //   leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step:
2270 //
2271 //   Iteratively until all trailing (len-2*csqrtlen) elements are merged:
2272 //      Iteratively for each pair of previously merged block:
2273 //         * Blocks are divided groups of csqrtlen elements and
2274 //           2*merged_block/csqrtlen keys are sorted to be used as markers
2275 //         * Groups are selection-sorted by first or last element (depending whether they are going
2276 //           to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
2277 //         * Elements of each block pair are merged using the csqrtlen buffer taking into account
2278 //           if they belong to the first half or second half (marked by the key).
2279 //
2280 // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with
2281 //   rotations with the rest of sorted elements in the "combine_blocks" step.
2282 //
2283 // Corner cases:
2284 //
2285 // * If no 2*csqrtlen elements can be extracted:
2286 //
2287 //    * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used
2288 //      as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This
2289 //      means that an additional "combine_blocks" step will be needed to merge all elements.
2290 //
2291 //    * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum,
2292 //      then reduces the number of elements used as buffer and keys in the "build_blocks"
2293 //      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
2294 //      then uses a rotation based smart merge.
2295 //
2296 //    * If the minimum number of keys can't be extracted, a rotation-based sorting is performed.
2297 //
2298 // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used.
2299 //
2300 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
2301 //   then only csqrtlen elements need to be extracted and "combine_blocks" will use integral
2302 //   keys to combine blocks.
2303 //
2304 // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks
2305 //   using classic merge and "combine_blocks" will use bigger blocks when merging.
2306 template<class RandIt, class Compare, class XBuf>
2307 void adaptive_sort_impl
2308    ( RandIt first
2309    , typename iterator_traits<RandIt>::size_type const len
2310    , Compare comp
2311    , XBuf & xbuf
2312    )
2313 {
2314    typedef typename iterator_traits<RandIt>::size_type  size_type;
2315 
2316    //Small sorts go directly to insertion sort
2317    if(len <= size_type(AdaptiveSortInsertionSortThreshold)){
2318       insertion_sort(first, first + len, comp);
2319    }
2320    else if((len-len/2) <= xbuf.capacity()){
2321       merge_sort(first, first+len, comp, xbuf.data());
2322    }
2323    else{
2324       //Make sure it is at least four
2325       BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4);
2326 
2327       size_type l_base = 0;
2328       size_type l_intbuf = 0;
2329       size_type n_keys = 0;
2330       size_type l_build_buf = 0;
2331 
2332       //Calculate and extract needed unique elements. If a minimum is not achieved
2333       //fallback to a slow stable sort
2334       if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){
2335          stable_sort(first, first+len, comp, xbuf);
2336       }
2337       else{
2338          BOOST_ASSERT(l_build_buf);
2339          //Otherwise, continue the adaptive_sort
2340          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   After collect_unique: ", len);
2341          size_type const n_key_plus_buf = l_intbuf+n_keys;
2342          //l_build_buf is always power of two if l_intbuf is zero
2343          BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1))));
2344 
2345          //Classic merge sort until internal buffer and xbuf are exhausted
2346          size_type const l_merged = adaptive_sort_build_blocks
2347             (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp);
2348          BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("   After build_blocks:   ", len);
2349 
2350          //Non-trivial merge
2351          bool const buffer_right = adaptive_sort_combine_all_blocks
2352             (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp);
2353 
2354          //Sort keys and buffer and merge the whole sequence
2355          adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp);
2356       }
2357    }
2358 }
2359 
2360 // Main explanation of the merge algorithm.
2361 //
2362 // csqrtlen = ceil(sqrt(len));
2363 //
2364 // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect
2365 //   unique elements are extracted from elements to be sorted and placed in the beginning of the range.
2366 //
2367 // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements
2368 //   are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements.
2369 //
2370 //   Explanation of the "combine_blocks" step:
2371 //
2372 //         * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements.
2373 //           Remaining elements that can't form a group are grouped in front of those elements.
2374 //         * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements.
2375 //           Remaining elements that can't form a group are grouped in the back of those elements.
2376 //         * In parallel the following two steps are performed:
2377 //             *  Groups are selection-sorted by first or last element (depending whether they are going
2378 //                to be merged to left or right) and keys are reordered accordingly as an imitation-buffer.
2379 //             * Elements of each block pair are merged using the csqrtlen buffer taking into account
2380 //                if they belong to the first half or second half (marked by the key).
2381 //
2382 // * In the final merge step leading "to_collect" elements are merged with rotations
2383 //   with the rest of merged elements in the "combine_blocks" step.
2384 //
2385 // Corner cases:
2386 //
2387 // * If no "to_collect" elements can be extracted:
2388 //
2389 //    * If more than a minimum number of elements is extracted
2390 //      then reduces the number of elements used as buffer and keys in the
2391 //      and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction
2392 //      then uses a rotation based smart merge.
2393 //
2394 //    * If the minimum number of keys can't be extracted, a rotation-based merge is performed.
2395 //
2396 // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed.
2397 //
2398 // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed.
2399 //
2400 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t),
2401 //   then no csqrtlen need to be extracted and "combine_blocks" will use integral
2402 //   keys to combine blocks.
2403 template<class RandIt, class Compare, class XBuf>
2404 void adaptive_merge_impl
2405    ( RandIt first
2406    , typename iterator_traits<RandIt>::size_type const len1
2407    , typename iterator_traits<RandIt>::size_type const len2
2408    , Compare comp
2409    , XBuf & xbuf
2410    )
2411 {
2412    typedef typename iterator_traits<RandIt>::size_type size_type;
2413 
2414    if(xbuf.capacity() >= min_value<size_type>(len1, len2)){
2415       buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf);
2416    }
2417    else{
2418       const size_type len = len1+len2;
2419       //Calculate ideal parameters and try to collect needed unique keys
2420       size_type l_block = size_type(ceil_sqrt(len));
2421 
2422       //One range is not big enough to extract keys and the internal buffer so a
2423       //rotation-based based merge will do just fine
2424       if(len1 <= l_block*2 || len2 <= l_block*2){
2425          merge_bufferless(first, first+len1, first+len1+len2, comp);
2426          return;
2427       }
2428 
2429       //Detail the number of keys and internal buffer. If xbuf has enough memory, no
2430       //internal buffer is needed so l_intbuf will remain 0.
2431       size_type l_intbuf = 0;
2432       size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf);
2433       size_type const to_collect = l_intbuf+n_keys;
2434       //Try to extract needed unique values from the first range
2435       size_type const collected  = collect_unique(first, first+len1, to_collect, comp, xbuf);
2436       BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n   A collect: ", len);
2437 
2438       //Not the minimum number of keys is not available on the first range, so fallback to rotations
2439       if(collected != to_collect && collected < 4){
2440          merge_bufferless(first, first+collected, first+len1, comp);
2441          merge_bufferless(first, first + len1, first + len1 + len2, comp);
2442          return;
2443       }
2444 
2445       //If not enough keys but more than minimum, adjust the internal buffer and key count
2446       bool use_internal_buf = collected == to_collect;
2447       if (!use_internal_buf){
2448          l_intbuf = 0u;
2449          n_keys = collected;
2450          l_block  = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf);
2451          //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used
2452          l_intbuf = use_internal_buf ? l_block : 0u;
2453       }
2454 
2455       bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block;
2456       //Merge trailing elements using smart merges
2457       adaptive_merge_combine_blocks(first, len1, len2, collected,   n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf);
2458       //Merge buffer and keys with the rest of the values
2459       adaptive_merge_final_merge   (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf);
2460    }
2461 }
2462 
2463 
2464 }  //namespace detail_adaptive {
2465 }  //namespace movelib {
2466 }  //namespace boost {
2467 
2468 #include <boost/move/detail/config_end.hpp>
2469 
2470 #endif   //#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP
2471