1 ////////////////////////////////////////////////////////////////////////////// 2 // 3 // (C) Copyright Ion Gaztanaga 2015-2016. 4 // Distributed under the Boost Software License, Version 1.0. 5 // (See accompanying file LICENSE_1_0.txt or copy at 6 // http://www.boost.org/LICENSE_1_0.txt) 7 // 8 // See http://www.boost.org/libs/move for documentation. 9 // 10 ////////////////////////////////////////////////////////////////////////////// 11 // 12 // Stable sorting that works in O(N*log(N)) worst time 13 // and uses O(1) extra memory 14 // 15 ////////////////////////////////////////////////////////////////////////////// 16 // 17 // The main idea of the adaptive_sort algorithm was developed by Andrey Astrelin 18 // and explained in the article from the russian collaborative blog 19 // Habrahabr (http://habrahabr.ru/post/205290/). The algorithm is based on 20 // ideas from B-C. Huang and M. A. Langston explained in their article 21 // "Fast Stable Merging and Sorting in Constant Extra Space (1989-1992)" 22 // (http://comjnl.oxfordjournals.org/content/35/6/643.full.pdf). 23 // 24 // This implementation by Ion Gaztanaga uses previous ideas with additional changes: 25 // 26 // - Use of GCD-based rotation. 27 // - Non power of two buffer-sizes. 28 // - Tries to find sqrt(len)*2 unique keys, so that the merge sort 29 // phase can form up to sqrt(len)*4 segments if enough keys are found. 30 // - The merge-sort phase can take advantage of external memory to 31 // save some additional combination steps. 32 // - Combination phase: Blocks are selection sorted and merged in parallel. 33 // - The combination phase is performed alternating merge to left and merge 34 // to right phases minimizing swaps due to internal buffer repositioning. 35 // - When merging blocks special optimizations are made to avoid moving some 36 // elements twice. 37 // 38 // The adaptive_merge algorithm was developed by Ion Gaztanaga reusing some parts 39 // from the sorting algorithm and implementing an additional block merge algorithm 40 // without moving elements to left or right. 41 ////////////////////////////////////////////////////////////////////////////// 42 #ifndef BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP 43 #define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP 44 45 #include <boost/move/detail/config_begin.hpp> 46 #include <boost/move/detail/reverse_iterator.hpp> 47 #include <boost/move/algo/move.hpp> 48 #include <boost/move/algo/detail/merge.hpp> 49 #include <boost/move/adl_move_swap.hpp> 50 #include <boost/move/algo/detail/insertion_sort.hpp> 51 #include <boost/move/algo/detail/merge_sort.hpp> 52 #include <boost/move/algo/detail/merge.hpp> 53 #include <boost/assert.hpp> 54 #include <boost/cstdint.hpp> 55 56 #ifndef BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 57 #define BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL 1 58 #endif 59 60 #ifdef BOOST_MOVE_ADAPTIVE_SORT_STATS 61 #if BOOST_MOVE_ADAPTIVE_SORT_STATS_LEVEL == 2 62 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \ 63 print_stats(STR, L)\ 64 // 65 66 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) \ 67 print_stats(STR, L)\ 68 // 69 #else 70 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) \ 71 print_stats(STR, L)\ 72 // 73 74 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) 75 #endif 76 #else 77 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(STR, L) 78 #define BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(STR, L) 79 #endif 80 81 #ifdef BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS 82 #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT BOOST_ASSERT 83 #else 84 #define BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(L) 85 #endif 86 87 88 89 namespace boost { 90 namespace movelib { 91 92 namespace detail_adaptive { 93 94 static const std::size_t AdaptiveSortInsertionSortThreshold = 16; 95 //static const std::size_t AdaptiveSortInsertionSortThreshold = 4; 96 BOOST_STATIC_ASSERT((AdaptiveSortInsertionSortThreshold&(AdaptiveSortInsertionSortThreshold-1)) == 0); 97 98 #if defined BOOST_HAS_INTPTR_T 99 typedef ::boost::uintptr_t uintptr_t; 100 #else 101 typedef std::size_t uintptr_t; 102 #endif 103 104 template<class T> 105 const T &min_value(const T &a, const T &b) 106 { 107 return a < b ? a : b; 108 } 109 110 template<class T> 111 const T &max_value(const T &a, const T &b) 112 { 113 return a > b ? a : b; 114 } 115 116 template<class ForwardIt, class Pred> 117 bool is_sorted(ForwardIt const first, ForwardIt last, Pred pred) 118 { 119 if (first != last) { 120 ForwardIt next = first, cur(first); 121 while (++next != last) { 122 if (pred(*next, *cur)) 123 return false; 124 cur = next; 125 } 126 } 127 return true; 128 } 129 130 #if defined(BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS) 131 132 bool is_sorted(::order_perf_type *first, ::order_perf_type *last, ::order_type_less) 133 { 134 if (first != last) { 135 const order_perf_type *next = first, *cur(first); 136 while (++next != last) { 137 if (!(cur->key < next->key || (cur->key == next->key && cur->val < next->val))) 138 return false; 139 cur = next; 140 } 141 } 142 return true; 143 } 144 145 #endif //BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS 146 147 template<class ForwardIt, class Pred> 148 bool is_sorted_and_unique(ForwardIt first, ForwardIt last, Pred pred) 149 { 150 if (first != last) { 151 ForwardIt next = first; 152 while (++next != last) { 153 if (!pred(*first, *next)) 154 return false; 155 first = next; 156 } 157 } 158 return true; 159 } 160 161 template<class ForwardIt, class Pred, class V> 162 typename iterator_traits<ForwardIt>::size_type 163 count_if_with(ForwardIt first, ForwardIt last, Pred pred, const V &v) 164 { 165 typedef typename iterator_traits<ForwardIt>::size_type size_type; 166 size_type count = 0; 167 while(first != last) { 168 count += static_cast<size_type>(0 != pred(*first, v)); 169 ++first; 170 } 171 return count; 172 } 173 174 template<class T, class RandRawIt = T*> 175 class adaptive_xbuf 176 { 177 adaptive_xbuf(const adaptive_xbuf &); 178 adaptive_xbuf & operator=(const adaptive_xbuf &); 179 180 public: 181 typedef RandRawIt iterator; 182 183 adaptive_xbuf() 184 : m_ptr(), m_size(0), m_capacity(0) 185 {} 186 187 adaptive_xbuf(RandRawIt raw_memory, std::size_t capacity) 188 : m_ptr(raw_memory), m_size(0), m_capacity(capacity) 189 {} 190 191 template<class RandIt> 192 void move_assign(RandIt first, std::size_t n) 193 { 194 if(n <= m_size){ 195 boost::move(first, first+n, m_ptr); 196 std::size_t size = m_size; 197 while(size-- != n){ 198 m_ptr[size].~T(); 199 } 200 m_size = n; 201 } 202 else{ 203 RandRawIt result = boost::move(first, first+m_size, m_ptr); 204 boost::uninitialized_move(first+m_size, first+n, result); 205 m_size = n; 206 } 207 } 208 209 template<class RandIt> 210 void push_back(RandIt first, std::size_t n) 211 { 212 BOOST_ASSERT(m_capacity - m_size >= n); 213 boost::uninitialized_move(first, first+n, m_ptr+m_size); 214 m_size += n; 215 } 216 217 template<class RandIt> 218 iterator add(RandIt it) 219 { 220 BOOST_ASSERT(m_size < m_capacity); 221 RandRawIt p_ret = m_ptr + m_size; 222 ::new(&*p_ret) T(::boost::move(*it)); 223 ++m_size; 224 return p_ret; 225 } 226 227 template<class RandIt> 228 void insert(iterator pos, RandIt it) 229 { 230 if(pos == (m_ptr + m_size)){ 231 this->add(it); 232 } 233 else{ 234 this->add(m_ptr+m_size-1); 235 //m_size updated 236 boost::move_backward(pos, m_ptr+m_size-2, m_ptr+m_size-1); 237 *pos = boost::move(*it); 238 } 239 } 240 241 void set_size(std::size_t size) 242 { 243 m_size = size; 244 } 245 246 void shrink_to_fit(std::size_t const size) 247 { 248 if(m_size > size){ 249 for(std::size_t szt_i = size; szt_i != m_size; ++szt_i){ 250 m_ptr[szt_i].~T(); 251 } 252 m_size = size; 253 } 254 } 255 256 void initialize_until(std::size_t const size, T &t) 257 { 258 BOOST_ASSERT(m_size < m_capacity); 259 if(m_size < size){ 260 ::new((void*)&m_ptr[m_size]) T(::boost::move(t)); 261 ++m_size; 262 for(; m_size != size; ++m_size){ 263 ::new((void*)&m_ptr[m_size]) T(::boost::move(m_ptr[m_size-1])); 264 } 265 t = ::boost::move(m_ptr[m_size-1]); 266 } 267 } 268 269 private: 270 template<class RIt> 271 static bool is_raw_ptr(RIt) 272 { 273 return false; 274 } 275 276 static bool is_raw_ptr(T*) 277 { 278 return true; 279 } 280 281 public: 282 template<class U> 283 bool supports_aligned_trailing(std::size_t size, std::size_t trail_count) const 284 { 285 if(this->is_raw_ptr(this->data()) && m_capacity){ 286 uintptr_t u_addr_sz = uintptr_t(&*(this->data()+size)); 287 uintptr_t u_addr_cp = uintptr_t(&*(this->data()+this->capacity())); 288 u_addr_sz = ((u_addr_sz + sizeof(U)-1)/sizeof(U))*sizeof(U); 289 return (u_addr_cp >= u_addr_sz) && ((u_addr_cp - u_addr_sz)/sizeof(U) >= trail_count); 290 } 291 return false; 292 } 293 294 template<class U> 295 U *aligned_trailing() const 296 { 297 return this->aligned_trailing<U>(this->size()); 298 } 299 300 template<class U> 301 U *aligned_trailing(std::size_t pos) const 302 { 303 uintptr_t u_addr = uintptr_t(&*(this->data()+pos)); 304 u_addr = ((u_addr + sizeof(U)-1)/sizeof(U))*sizeof(U); 305 return (U*)u_addr; 306 } 307 308 ~adaptive_xbuf() 309 { 310 this->clear(); 311 } 312 313 std::size_t capacity() const 314 { return m_capacity; } 315 316 iterator data() const 317 { return m_ptr; } 318 319 iterator end() const 320 { return m_ptr+m_size; } 321 322 std::size_t size() const 323 { return m_size; } 324 325 bool empty() const 326 { return !m_size; } 327 328 void clear() 329 { 330 this->shrink_to_fit(0u); 331 } 332 333 private: 334 RandRawIt m_ptr; 335 std::size_t m_size; 336 std::size_t m_capacity; 337 }; 338 339 template<class Iterator, class Op> 340 class range_xbuf 341 { 342 range_xbuf(const range_xbuf &); 343 range_xbuf & operator=(const range_xbuf &); 344 345 public: 346 typedef typename iterator_traits<Iterator>::size_type size_type; 347 typedef Iterator iterator; 348 349 range_xbuf(Iterator first, Iterator last) 350 : m_first(first), m_last(first), m_cap(last) 351 {} 352 353 template<class RandIt> 354 void move_assign(RandIt first, std::size_t n) 355 { 356 BOOST_ASSERT(size_type(n) <= size_type(m_cap-m_first)); 357 m_last = Op()(forward_t(), first, first+n, m_first); 358 } 359 360 ~range_xbuf() 361 {} 362 363 std::size_t capacity() const 364 { return m_cap-m_first; } 365 366 Iterator data() const 367 { return m_first; } 368 369 Iterator end() const 370 { return m_last; } 371 372 std::size_t size() const 373 { return m_last-m_first; } 374 375 bool empty() const 376 { return m_first == m_last; } 377 378 void clear() 379 { 380 m_last = m_first; 381 } 382 383 template<class RandIt> 384 iterator add(RandIt it) 385 { 386 Iterator pos(m_last); 387 *pos = boost::move(*it); 388 ++m_last; 389 return pos; 390 } 391 392 void set_size(std::size_t size) 393 { 394 m_last = m_first; 395 m_last += size; 396 } 397 398 private: 399 Iterator const m_first; 400 Iterator m_last; 401 Iterator const m_cap; 402 }; 403 404 405 template<class RandIt, class Compare> 406 RandIt skip_until_merge 407 ( RandIt first1, RandIt const last1 408 , const typename iterator_traits<RandIt>::value_type &next_key, Compare comp) 409 { 410 while(first1 != last1 && !comp(next_key, *first1)){ 411 ++first1; 412 } 413 return first1; 414 } 415 416 417 template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> 418 RandItB op_buffered_partial_merge_to_range1_and_buffer 419 ( RandIt1 first1, RandIt1 const last1 420 , RandIt2 &rfirst2, RandIt2 const last2 421 , RandItB &rfirstb, Compare comp, Op op ) 422 { 423 RandItB firstb = rfirstb; 424 RandItB lastb = firstb; 425 RandIt2 first2 = rfirst2; 426 427 //Move to buffer while merging 428 //Three way moves need less moves when op is swap_op so use it 429 //when merging elements from range2 to the destination occupied by range1 430 if(first1 != last1 && first2 != last2){ 431 op(three_way_t(), first2++, first1++, lastb++); 432 433 while(true){ 434 if(first1 == last1){ 435 break; 436 } 437 if(first2 == last2){ 438 lastb = op(forward_t(), first1, last1, firstb); 439 break; 440 } 441 if (comp(*first2, *firstb)) { 442 op(three_way_t(), first2++, first1++, lastb++); 443 } 444 else { 445 op(three_way_t(), firstb++, first1++, lastb++); 446 } 447 } 448 rfirst2 = first2; 449 rfirstb = firstb; 450 } 451 452 return lastb; 453 } 454 455 template<class RandItKeys, class RandIt> 456 void swap_and_update_key 457 ( RandItKeys const key_next 458 , RandItKeys const key_range2 459 , RandItKeys &key_mid 460 , RandIt const begin 461 , RandIt const end 462 , RandIt const with) 463 { 464 if(begin != with){ 465 ::boost::adl_move_swap_ranges(begin, end, with); 466 ::boost::adl_move_swap(*key_next, *key_range2); 467 if(key_next == key_mid){ 468 key_mid = key_range2; 469 } 470 else if(key_mid == key_range2){ 471 key_mid = key_next; 472 } 473 } 474 } 475 476 /////////////////////////////////////////////////////////////////////////////// 477 // 478 // MERGE BUFFERLESS 479 // 480 /////////////////////////////////////////////////////////////////////////////// 481 482 // [first1, last1) merge [last1,last2) -> [first1,last2) 483 template<class RandIt, class Compare> 484 RandIt partial_merge_bufferless_impl 485 (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) 486 { 487 if(last1 == last2){ 488 return first1; 489 } 490 bool const is_range1_A = *pis_range1_A; 491 if(first1 != last1 && comp(*last1, last1[-1])){ 492 do{ 493 RandIt const old_last1 = last1; 494 last1 = boost::movelib::lower_bound(last1, last2, *first1, comp); 495 first1 = rotate_gcd(first1, old_last1, last1);//old_last1 == last1 supported 496 if(last1 == last2){ 497 return first1; 498 } 499 do{ 500 ++first1; 501 } while(last1 != first1 && !comp(*last1, *first1) ); 502 } while(first1 != last1); 503 } 504 *pis_range1_A = !is_range1_A; 505 return last1; 506 } 507 508 // [first1, last1) merge [last1,last2) -> [first1,last2) 509 template<class RandIt, class Compare> 510 RandIt partial_merge_bufferless 511 (RandIt first1, RandIt last1, RandIt const last2, bool *const pis_range1_A, Compare comp) 512 { 513 return *pis_range1_A ? partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, comp) 514 : partial_merge_bufferless_impl(first1, last1, last2, pis_range1_A, antistable<Compare>(comp)); 515 } 516 517 template<class SizeType> 518 static SizeType needed_keys_count(SizeType n_block_a, SizeType n_block_b) 519 { 520 return n_block_a + n_block_b; 521 } 522 523 template<class RandItKeys, class KeyCompare, class RandIt, class Compare> 524 typename iterator_traits<RandIt>::size_type 525 find_next_block 526 ( RandItKeys key_first 527 , KeyCompare key_comp 528 , RandIt const first 529 , typename iterator_traits<RandIt>::size_type const l_block 530 , typename iterator_traits<RandIt>::size_type const ix_first_block 531 , typename iterator_traits<RandIt>::size_type const ix_last_block 532 , Compare comp) 533 { 534 typedef typename iterator_traits<RandIt>::size_type size_type; 535 typedef typename iterator_traits<RandIt>::value_type value_type; 536 typedef typename iterator_traits<RandItKeys>::value_type key_type; 537 BOOST_ASSERT(ix_first_block <= ix_last_block); 538 size_type ix_min_block = 0u; 539 for (size_type szt_i = ix_first_block; szt_i < ix_last_block; ++szt_i) { 540 const value_type &min_val = first[ix_min_block*l_block]; 541 const value_type &cur_val = first[szt_i*l_block]; 542 const key_type &min_key = key_first[ix_min_block]; 543 const key_type &cur_key = key_first[szt_i]; 544 545 bool const less_than_minimum = comp(cur_val, min_val) || 546 (!comp(min_val, cur_val) && key_comp(cur_key, min_key)); 547 548 if (less_than_minimum) { 549 ix_min_block = szt_i; 550 } 551 } 552 return ix_min_block; 553 } 554 555 template<class RandItKeys, class KeyCompare, class RandIt, class Compare> 556 void merge_blocks_bufferless 557 ( RandItKeys key_first 558 , KeyCompare key_comp 559 , RandIt const first 560 , typename iterator_traits<RandIt>::size_type const l_block 561 , typename iterator_traits<RandIt>::size_type const l_irreg1 562 , typename iterator_traits<RandIt>::size_type const n_block_a 563 , typename iterator_traits<RandIt>::size_type const n_block_b 564 , typename iterator_traits<RandIt>::size_type const l_irreg2 565 , Compare comp) 566 { 567 typedef typename iterator_traits<RandIt>::size_type size_type; 568 size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; 569 //BOOST_ASSERT(n_block_a || n_block_b); 570 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); 571 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); 572 573 size_type n_bef_irreg2 = 0; 574 bool l_irreg_pos_count = true; 575 RandItKeys key_mid(key_first + n_block_a); 576 RandIt const first_irr2 = first + l_irreg1 + (n_block_a+n_block_b)*l_block; 577 RandIt const last_irr2 = first_irr2 + l_irreg2; 578 579 { //Selection sort blocks 580 size_type n_block_left = n_block_b + n_block_a; 581 RandItKeys key_range2(key_first); 582 583 size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; 584 size_type max_check = min_value(min_check+1, n_block_left); 585 for (RandIt f = first+l_irreg1; n_block_left; --n_block_left, ++key_range2, f += l_block, min_check -= min_check != 0, max_check -= max_check != 0) { 586 size_type const next_key_idx = find_next_block(key_range2, key_comp, f, l_block, min_check, max_check, comp); 587 RandItKeys const key_next(key_range2 + next_key_idx); 588 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); 589 590 RandIt const first_min = f + next_key_idx*l_block; 591 592 //Check if irregular b block should go here. 593 //If so, break to the special code handling the irregular block 594 if (l_irreg_pos_count && l_irreg2 && comp(*first_irr2, *first_min)){ 595 l_irreg_pos_count = false; 596 } 597 n_bef_irreg2 += l_irreg_pos_count; 598 599 swap_and_update_key(key_next, key_range2, key_mid, f, f + l_block, first_min); 600 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(f, f+l_block, comp)); 601 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, first_min + l_block, comp)); 602 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((f == (first+l_irreg1)) || !comp(*f, *(f-l_block))); 603 } 604 } 605 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first+l_irreg1+n_bef_irreg2*l_block, first_irr2, comp)); 606 607 RandIt first1 = first; 608 RandIt last1 = first+l_irreg1; 609 RandItKeys const key_end (key_first+n_bef_irreg2); 610 bool is_range1_A = true; 611 612 for( ; key_first != key_end; ++key_first){ 613 bool is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_first, *key_mid); 614 first1 = is_range1_A == is_range2_A 615 ? last1 : partial_merge_bufferless(first1, last1, last1 + l_block, &is_range1_A, comp); 616 last1 += l_block; 617 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp)); 618 } 619 620 merge_bufferless(is_range1_A ? first1 : last1, first_irr2, last_irr2, comp); 621 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp)); 622 } 623 624 /////////////////////////////////////////////////////////////////////////////// 625 // 626 // BUFFERED MERGE 627 // 628 /////////////////////////////////////////////////////////////////////////////// 629 template<class RandIt, class Compare, class Op, class Buf> 630 void op_buffered_merge 631 ( RandIt first, RandIt const middle, RandIt last 632 , Compare comp, Op op 633 , Buf &xbuf) 634 { 635 if(first != middle && middle != last && comp(*middle, middle[-1])){ 636 typedef typename iterator_traits<RandIt>::size_type size_type; 637 size_type const len1 = size_type(middle-first); 638 size_type const len2 = size_type(last-middle); 639 if(len1 <= len2){ 640 first = boost::movelib::upper_bound(first, middle, *middle, comp); 641 xbuf.move_assign(first, size_type(middle-first)); 642 op_merge_with_right_placed 643 (xbuf.data(), xbuf.end(), first, middle, last, comp, op); 644 } 645 else{ 646 last = boost::movelib::lower_bound(middle, last, middle[-1], comp); 647 xbuf.move_assign(middle, size_type(last-middle)); 648 op_merge_with_left_placed 649 (first, middle, last, xbuf.data(), xbuf.end(), comp, op); 650 } 651 } 652 } 653 654 template<class RandIt, class Compare, class XBuf> 655 void buffered_merge 656 ( RandIt first, RandIt const middle, RandIt last 657 , Compare comp 658 , XBuf &xbuf) 659 { 660 op_buffered_merge(first, middle, last, comp, move_op(), xbuf); 661 } 662 663 // Complexity: 2*distance(first, last)+max_collected^2/2 664 // 665 // Tries to collect at most n_keys unique elements from [first, last), 666 // in the begining of the range, and ordered according to comp 667 // 668 // Returns the number of collected keys 669 template<class RandIt, class Compare, class XBuf> 670 typename iterator_traits<RandIt>::size_type 671 collect_unique 672 ( RandIt const first, RandIt const last 673 , typename iterator_traits<RandIt>::size_type const max_collected, Compare comp 674 , XBuf & xbuf) 675 { 676 typedef typename iterator_traits<RandIt>::size_type size_type; 677 size_type h = 0; 678 if(max_collected){ 679 ++h; // first key is always here 680 RandIt h0 = first; 681 RandIt u = first; ++u; 682 RandIt search_end = u; 683 684 if(xbuf.capacity() >= max_collected){ 685 typename XBuf::iterator const ph0 = xbuf.add(first); 686 while(u != last && h < max_collected){ 687 typename XBuf::iterator const r = boost::movelib::lower_bound(ph0, xbuf.end(), *u, comp); 688 //If key not found add it to [h, h+h0) 689 if(r == xbuf.end() || comp(*u, *r) ){ 690 RandIt const new_h0 = boost::move(search_end, u, h0); 691 search_end = u; 692 ++search_end; 693 ++h; 694 xbuf.insert(r, u); 695 h0 = new_h0; 696 } 697 ++u; 698 } 699 boost::move_backward(first, h0, h0+h); 700 boost::move(xbuf.data(), xbuf.end(), first); 701 } 702 else{ 703 while(u != last && h < max_collected){ 704 RandIt const r = boost::movelib::lower_bound(h0, search_end, *u, comp); 705 //If key not found add it to [h, h+h0) 706 if(r == search_end || comp(*u, *r) ){ 707 RandIt const new_h0 = rotate_gcd(h0, search_end, u); 708 search_end = u; 709 ++search_end; 710 ++h; 711 rotate_gcd(r+(new_h0-h0), u, search_end); 712 h0 = new_h0; 713 } 714 ++u; 715 } 716 rotate_gcd(first, h0, h0+h); 717 } 718 } 719 return h; 720 } 721 722 template<class Unsigned> 723 Unsigned floor_sqrt(Unsigned const n) 724 { 725 Unsigned x = n; 726 Unsigned y = x/2 + (x&1); 727 while (y < x){ 728 x = y; 729 y = (x + n / x)/2; 730 } 731 return x; 732 } 733 734 template<class Unsigned> 735 Unsigned ceil_sqrt(Unsigned const n) 736 { 737 Unsigned r = floor_sqrt(n); 738 return r + Unsigned((n%r) != 0); 739 } 740 741 template<class Unsigned> 742 Unsigned floor_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow) 743 { 744 Unsigned s = n; 745 Unsigned p = 0; 746 while(s > AdaptiveSortInsertionSortThreshold){ 747 s /= 2; 748 ++p; 749 } 750 base = s; 751 pow = p; 752 return s << p; 753 } 754 755 template<class Unsigned> 756 Unsigned ceil_merge_multiple(Unsigned const n, Unsigned &base, Unsigned &pow) 757 { 758 Unsigned fm = floor_merge_multiple(n, base, pow); 759 760 if(fm != n){ 761 if(base < AdaptiveSortInsertionSortThreshold){ 762 ++base; 763 } 764 else{ 765 base = AdaptiveSortInsertionSortThreshold/2 + 1; 766 ++pow; 767 } 768 } 769 return base << pow; 770 } 771 772 template<class Unsigned> 773 Unsigned ceil_sqrt_multiple(Unsigned const n, Unsigned *pbase = 0) 774 { 775 Unsigned const r = ceil_sqrt(n); 776 Unsigned pow = 0; 777 Unsigned base = 0; 778 Unsigned const res = ceil_merge_multiple(r, base, pow); 779 if(pbase) *pbase = base; 780 return res; 781 } 782 783 struct less 784 { 785 template<class T> 786 bool operator()(const T &l, const T &r) 787 { return l < r; } 788 }; 789 790 /////////////////////////////////////////////////////////////////////////////// 791 // 792 // MERGE BLOCKS 793 // 794 /////////////////////////////////////////////////////////////////////////////// 795 796 //#define ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN 797 798 #if defined ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN 799 template<class RandIt, class Compare> 800 void slow_stable_sort 801 ( RandIt const first, RandIt const last, Compare comp) 802 { 803 boost::movelib::inplace_stable_sort(first, last, comp); 804 } 805 806 #else //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN 807 808 template<class RandIt, class Compare> 809 void slow_stable_sort 810 ( RandIt const first, RandIt const last, Compare comp) 811 { 812 typedef typename iterator_traits<RandIt>::size_type size_type; 813 size_type L = size_type(last - first); 814 { //Use insertion sort to merge first elements 815 size_type m = 0; 816 while((L - m) > size_type(AdaptiveSortInsertionSortThreshold)){ 817 insertion_sort(first+m, first+m+size_type(AdaptiveSortInsertionSortThreshold), comp); 818 m += AdaptiveSortInsertionSortThreshold; 819 } 820 insertion_sort(first+m, last, comp); 821 } 822 823 size_type h = AdaptiveSortInsertionSortThreshold; 824 for(bool do_merge = L > h; do_merge; h*=2){ 825 do_merge = (L - h) > h; 826 size_type p0 = 0; 827 if(do_merge){ 828 size_type const h_2 = 2*h; 829 while((L-p0) > h_2){ 830 merge_bufferless(first+p0, first+p0+h, first+p0+h_2, comp); 831 p0 += h_2; 832 } 833 } 834 if((L-p0) > h){ 835 merge_bufferless(first+p0, first+p0+h, last, comp); 836 } 837 } 838 } 839 840 #endif //ADAPTIVE_SORT_MERGE_SLOW_STABLE_SORT_IS_NLOGN 841 842 //Returns new l_block and updates use_buf 843 template<class Unsigned> 844 Unsigned lblock_for_combine 845 (Unsigned const l_block, Unsigned const n_keys, Unsigned const l_data, bool &use_buf) 846 { 847 BOOST_ASSERT(l_data > 1); 848 849 //We need to guarantee lblock >= l_merged/(n_keys/2) keys for the combination. 850 //We have at least 4 keys guaranteed (which are the minimum to merge 2 ranges) 851 //If l_block != 0, then n_keys is already enough to merge all blocks in all 852 //phases as we've found all needed keys for that buffer and length before. 853 //If l_block == 0 then see if half keys can be used as buffer and the rest 854 //as keys guaranteeing that n_keys >= (2*l_merged)/lblock = 855 if(!l_block){ 856 //If l_block == 0 then n_keys is power of two 857 //(guaranteed by build_params(...)) 858 BOOST_ASSERT(n_keys >= 4); 859 //BOOST_ASSERT(0 == (n_keys &(n_keys-1))); 860 861 //See if half keys are at least 4 and if half keys fulfill 862 Unsigned const new_buf = n_keys/2; 863 Unsigned const new_keys = n_keys-new_buf; 864 use_buf = new_keys >= 4 && new_keys >= l_data/new_buf; 865 if(use_buf){ 866 return new_buf; 867 } 868 else{ 869 return l_data/n_keys; 870 } 871 } 872 else{ 873 use_buf = true; 874 return l_block; 875 } 876 } 877 878 template<class RandIt, class Compare, class XBuf> 879 void stable_sort( RandIt first, RandIt last, Compare comp, XBuf & xbuf) 880 { 881 typedef typename iterator_traits<RandIt>::size_type size_type; 882 size_type const len = size_type(last - first); 883 size_type const half_len = len/2 + (len&1); 884 if(std::size_t(xbuf.capacity() - xbuf.size()) >= half_len) { 885 merge_sort(first, last, comp, xbuf.data()+xbuf.size()); 886 } 887 else{ 888 slow_stable_sort(first, last, comp); 889 } 890 } 891 892 template<class RandIt, class Comp, class XBuf> 893 void initialize_keys( RandIt first, RandIt last 894 , Comp comp 895 , XBuf & xbuf) 896 { 897 stable_sort(first, last, comp, xbuf); 898 } 899 900 template<class RandIt, class U> 901 void initialize_keys( RandIt first, RandIt last 902 , less 903 , U &) 904 { 905 typedef typename iterator_traits<RandIt>::value_type value_type; 906 std::size_t count = std::size_t(last - first); 907 for(std::size_t i = 0; i != count; ++i){ 908 *first = value_type(i); 909 ++first; 910 } 911 } 912 913 template<class RandIt> 914 void move_data_backward( RandIt cur_pos 915 , typename iterator_traits<RandIt>::size_type const l_data 916 , RandIt new_pos 917 , bool const xbuf_used) 918 { 919 //Move buffer to the total combination right 920 if(xbuf_used){ 921 boost::move_backward(cur_pos, cur_pos+l_data, new_pos+l_data); 922 } 923 else{ 924 boost::adl_move_swap_ranges_backward(cur_pos, cur_pos+l_data, new_pos+l_data); 925 //Rotate does less moves but it seems slower due to cache issues 926 //rotate_gcd(first-l_block, first+len-l_block, first+len); 927 } 928 } 929 930 template<class RandIt> 931 void move_data_forward( RandIt cur_pos 932 , typename iterator_traits<RandIt>::size_type const l_data 933 , RandIt new_pos 934 , bool const xbuf_used) 935 { 936 //Move buffer to the total combination right 937 if(xbuf_used){ 938 boost::move(cur_pos, cur_pos+l_data, new_pos); 939 } 940 else{ 941 boost::adl_move_swap_ranges(cur_pos, cur_pos+l_data, new_pos); 942 //Rotate does less moves but it seems slower due to cache issues 943 //rotate_gcd(first-l_block, first+len-l_block, first+len); 944 } 945 } 946 947 template <class Unsigned> 948 Unsigned calculate_total_combined(Unsigned const len, Unsigned const l_prev_merged, Unsigned *pl_irreg_combined = 0) 949 { 950 typedef Unsigned size_type; 951 952 size_type const l_combined = 2*l_prev_merged; 953 size_type l_irreg_combined = len%l_combined; 954 size_type l_total_combined = len; 955 if(l_irreg_combined <= l_prev_merged){ 956 l_total_combined -= l_irreg_combined; 957 l_irreg_combined = 0; 958 } 959 if(pl_irreg_combined) 960 *pl_irreg_combined = l_irreg_combined; 961 return l_total_combined; 962 } 963 964 template<class RandItKeys, class KeyCompare, class SizeType, class XBuf> 965 void combine_params 966 ( RandItKeys const keys 967 , KeyCompare key_comp 968 , SizeType l_combined 969 , SizeType const l_prev_merged 970 , SizeType const l_block 971 , XBuf & xbuf 972 //Output 973 , SizeType &n_block_a 974 , SizeType &n_block_b 975 , SizeType &l_irreg1 976 , SizeType &l_irreg2 977 //Options 978 , bool do_initialize_keys = true) 979 { 980 typedef SizeType size_type; 981 982 //Initial parameters for selection sort blocks 983 l_irreg1 = l_prev_merged%l_block; 984 l_irreg2 = (l_combined-l_irreg1)%l_block; 985 BOOST_ASSERT(((l_combined-l_irreg1-l_irreg2)%l_block) == 0); 986 size_type const n_reg_block = (l_combined-l_irreg1-l_irreg2)/l_block; 987 n_block_a = l_prev_merged/l_block; 988 n_block_b = n_reg_block - n_block_a; 989 BOOST_ASSERT(n_reg_block>=n_block_a); 990 991 //Key initialization 992 if (do_initialize_keys) { 993 initialize_keys(keys, keys + needed_keys_count(n_block_a, n_block_b), key_comp, xbuf); 994 } 995 } 996 997 template<class RandIt1, class RandIt2, class RandItB, class Compare, class Op> 998 RandItB op_buffered_partial_merge_and_swap_to_range1_and_buffer 999 ( RandIt1 first1, RandIt1 const last1 1000 , RandIt2 &rfirst2, RandIt2 const last2, RandIt2 &rfirst_min 1001 , RandItB &rfirstb, Compare comp, Op op ) 1002 { 1003 RandItB firstb = rfirstb; 1004 RandItB lastb = firstb; 1005 RandIt2 first2 = rfirst2; 1006 1007 //Move to buffer while merging 1008 //Three way moves need less moves when op is swap_op so use it 1009 //when merging elements from range2 to the destination occupied by range1 1010 if(first1 != last1 && first2 != last2){ 1011 RandIt2 first_min = rfirst_min; 1012 op(four_way_t(), first2++, first_min++, first1++, lastb++); 1013 1014 while(first1 != last1){ 1015 if(first2 == last2){ 1016 lastb = op(forward_t(), first1, last1, firstb); 1017 break; 1018 } 1019 1020 if(comp(*first_min, *firstb)){ 1021 op( four_way_t(), first2++, first_min++, first1++, lastb++); 1022 } 1023 else{ 1024 op(three_way_t(), firstb++, first1++, lastb++); 1025 } 1026 } 1027 rfirst2 = first2; 1028 rfirstb = firstb; 1029 rfirst_min = first_min; 1030 } 1031 1032 return lastb; 1033 } 1034 1035 ////////////////////////////////// 1036 // 1037 // partial_merge 1038 // 1039 ////////////////////////////////// 1040 template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op> 1041 OutputIt op_partial_merge_impl 1042 (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op) 1043 { 1044 InputIt1 first1(r_first1); 1045 InputIt2 first2(r_first2); 1046 if(first2 != last2 && last1 != first1) 1047 while(1){ 1048 if(comp(*first2, *first1)) { 1049 op(first2++, d_first++); 1050 if(first2 == last2){ 1051 break; 1052 } 1053 } 1054 else{ 1055 op(first1++, d_first++); 1056 if(first1 == last1){ 1057 break; 1058 } 1059 } 1060 } 1061 r_first1 = first1; 1062 r_first2 = first2; 1063 return d_first; 1064 } 1065 1066 template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op> 1067 OutputIt op_partial_merge 1068 (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, OutputIt d_first, Compare comp, Op op, bool is_stable) 1069 { 1070 return is_stable ? op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, comp, op) 1071 : op_partial_merge_impl(r_first1, last1, r_first2, last2, d_first, antistable<Compare>(comp), op); 1072 } 1073 1074 ////////////////////////////////// 1075 // 1076 // partial_merge_and_swap 1077 // 1078 ////////////////////////////////// 1079 template<class InputIt1, class InputIt2, class OutputIt, class Compare, class Op> 1080 OutputIt op_partial_merge_and_swap_impl 1081 (InputIt1 &r_first1, InputIt1 const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op) 1082 { 1083 InputIt1 first1(r_first1); 1084 InputIt2 first2(r_first2); 1085 1086 if(first2 != last2 && last1 != first1) { 1087 InputIt2 first_min(r_first_min); 1088 bool non_empty_ranges = true; 1089 do{ 1090 if(comp(*first_min, *first1)) { 1091 op(three_way_t(), first2++, first_min++, d_first++); 1092 non_empty_ranges = first2 != last2; 1093 } 1094 else{ 1095 op(first1++, d_first++); 1096 non_empty_ranges = first1 != last1; 1097 } 1098 } while(non_empty_ranges); 1099 r_first_min = first_min; 1100 r_first1 = first1; 1101 r_first2 = first2; 1102 } 1103 return d_first; 1104 } 1105 1106 template<class RandIt, class InputIt2, class OutputIt, class Compare, class Op> 1107 OutputIt op_partial_merge_and_swap 1108 (RandIt &r_first1, RandIt const last1, InputIt2 &r_first2, InputIt2 const last2, InputIt2 &r_first_min, OutputIt d_first, Compare comp, Op op, bool is_stable) 1109 { 1110 return is_stable ? op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, comp, op) 1111 : op_partial_merge_and_swap_impl(r_first1, last1, r_first2, last2, r_first_min, d_first, antistable<Compare>(comp), op); 1112 } 1113 1114 template<class RandIt, class RandItBuf, class Compare, class Op> 1115 RandIt op_partial_merge_and_save_impl 1116 ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min 1117 , RandItBuf &buf_first1_in_out, RandItBuf &buf_last1_in_out 1118 , Compare comp, Op op 1119 ) 1120 { 1121 RandItBuf buf_first1 = buf_first1_in_out; 1122 RandItBuf buf_last1 = buf_last1_in_out; 1123 RandIt first2(rfirst2); 1124 1125 bool const do_swap = first2 != first_min; 1126 if(buf_first1 == buf_last1){ 1127 //Skip any element that does not need to be moved 1128 RandIt new_first1 = skip_until_merge(first1, last1, *first_min, comp); 1129 buf_first1 += (new_first1-first1); 1130 first1 = new_first1; 1131 buf_last1 = do_swap ? op_buffered_partial_merge_and_swap_to_range1_and_buffer(first1, last1, first2, last2, first_min, buf_first1, comp, op) 1132 : op_buffered_partial_merge_to_range1_and_buffer (first1, last1, first2, last2, buf_first1, comp, op); 1133 first1 = last1; 1134 } 1135 else{ 1136 BOOST_ASSERT((last1-first1) == (buf_last1 - buf_first1)); 1137 } 1138 1139 //Now merge from buffer 1140 first1 = do_swap ? op_partial_merge_and_swap_impl(buf_first1, buf_last1, first2, last2, first_min, first1, comp, op) 1141 : op_partial_merge_impl (buf_first1, buf_last1, first2, last2, first1, comp, op); 1142 buf_first1_in_out = buf_first1; 1143 buf_last1_in_out = buf_last1; 1144 rfirst2 = first2; 1145 return first1; 1146 } 1147 1148 template<class RandIt, class RandItBuf, class Compare, class Op> 1149 RandIt op_partial_merge_and_save 1150 ( RandIt first1, RandIt const last1, RandIt &rfirst2, RandIt last2, RandIt first_min 1151 , RandItBuf &buf_first1_in_out 1152 , RandItBuf &buf_last1_in_out 1153 , Compare comp 1154 , Op op 1155 , bool is_stable) 1156 { 1157 return is_stable 1158 ? op_partial_merge_and_save_impl 1159 (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, comp, op) 1160 : op_partial_merge_and_save_impl 1161 (first1, last1, rfirst2, last2, first_min, buf_first1_in_out, buf_last1_in_out, antistable<Compare>(comp), op) 1162 ; 1163 } 1164 1165 1166 1167 template<class RandItKeys, class KeyCompare, class RandIt, class RandIt2, class OutputIt, class Compare, class Op> 1168 OutputIt op_merge_blocks_with_irreg 1169 ( RandItKeys key_first 1170 , RandItKeys key_mid 1171 , KeyCompare key_comp 1172 , RandIt first_reg 1173 , RandIt2 &first_irr 1174 , RandIt2 const last_irr 1175 , OutputIt dest 1176 , typename iterator_traits<RandIt>::size_type const l_block 1177 , typename iterator_traits<RandIt>::size_type n_block_left 1178 , typename iterator_traits<RandIt>::size_type min_check 1179 , typename iterator_traits<RandIt>::size_type max_check 1180 , Compare comp, bool const is_stable, Op op) 1181 { 1182 typedef typename iterator_traits<RandIt>::size_type size_type; 1183 1184 for(; n_block_left; --n_block_left, ++key_first, min_check -= min_check != 0, max_check -= max_check != 0){ 1185 size_type next_key_idx = find_next_block(key_first, key_comp, first_reg, l_block, min_check, max_check, comp); 1186 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); 1187 RandIt const last_reg = first_reg + l_block; 1188 RandIt first_min = first_reg + next_key_idx*l_block; 1189 RandIt const last_min = first_min + l_block; (void)last_min; 1190 1191 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_reg, last_reg, comp)); 1192 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!next_key_idx || is_sorted(first_min, last_min, comp)); 1193 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((!next_key_idx || !comp(*first_reg, *first_min ))); 1194 1195 OutputIt orig_dest = dest; (void)orig_dest; 1196 dest = next_key_idx ? op_partial_merge_and_swap(first_irr, last_irr, first_reg, last_reg, first_min, dest, comp, op, is_stable) 1197 : op_partial_merge (first_irr, last_irr, first_reg, last_reg, dest, comp, op, is_stable); 1198 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp)); 1199 1200 if(first_reg == dest){ 1201 dest = next_key_idx ? ::boost::adl_move_swap_ranges(first_min, last_min, first_reg) 1202 : last_reg; 1203 } 1204 else{ 1205 dest = next_key_idx ? op(three_way_forward_t(), first_reg, last_reg, first_min, dest) 1206 : op(forward_t(), first_reg, last_reg, dest); 1207 } 1208 1209 RandItKeys const key_next(key_first + next_key_idx); 1210 swap_and_update_key(key_next, key_first, key_mid, last_reg, last_reg, first_min); 1211 1212 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(orig_dest, dest, comp)); 1213 first_reg = last_reg; 1214 } 1215 return dest; 1216 } 1217 1218 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op> 1219 void op_merge_blocks_left 1220 ( RandItKeys const key_first 1221 , KeyCompare key_comp 1222 , RandIt const first 1223 , typename iterator_traits<RandIt>::size_type const l_block 1224 , typename iterator_traits<RandIt>::size_type const l_irreg1 1225 , typename iterator_traits<RandIt>::size_type const n_block_a 1226 , typename iterator_traits<RandIt>::size_type const n_block_b 1227 , typename iterator_traits<RandIt>::size_type const l_irreg2 1228 , Compare comp, Op op) 1229 { 1230 typedef typename iterator_traits<RandIt>::size_type size_type; 1231 size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; 1232 // BOOST_ASSERT(n_block_a || n_block_b); 1233 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); 1234 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); 1235 1236 size_type n_block_b_left = n_block_b; 1237 size_type n_block_a_left = n_block_a; 1238 size_type n_block_left = n_block_b + n_block_a; 1239 RandItKeys key_mid(key_first + n_block_a); 1240 1241 RandIt buffer = first - l_block; 1242 RandIt first1 = first; 1243 RandIt last1 = first1 + l_irreg1; 1244 RandIt first2 = last1; 1245 RandIt const irreg2 = first2 + n_block_left*l_block; 1246 bool is_range1_A = true; 1247 1248 RandItKeys key_range2(key_first); 1249 1250 //////////////////////////////////////////////////////////////////////////// 1251 //Process all regular blocks before the irregular B block 1252 //////////////////////////////////////////////////////////////////////////// 1253 size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; 1254 size_type max_check = min_value(min_check+1, n_block_left); 1255 for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) { 1256 size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp); 1257 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); 1258 RandIt const first_min = first2 + next_key_idx*l_block; 1259 RandIt const last_min = first_min + l_block; (void)last_min; 1260 RandIt const last2 = first2 + l_block; 1261 1262 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first1, last1, comp)); 1263 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); 1264 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp)); 1265 1266 //Check if irregular b block should go here. 1267 //If so, break to the special code handling the irregular block 1268 if (!n_block_b_left && 1269 ( (l_irreg2 && comp(*irreg2, *first_min)) || (!l_irreg2 && is_range1_A)) ){ 1270 break; 1271 } 1272 1273 RandItKeys const key_next(key_range2 + next_key_idx); 1274 bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid); 1275 1276 bool const is_buffer_middle = last1 == buffer; 1277 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( ( is_buffer_middle && size_type(first2-buffer) == l_block && buffer == last1) || 1278 (!is_buffer_middle && size_type(first1-buffer) == l_block && first2 == last1)); 1279 1280 if(is_range1_A == is_range2_A){ 1281 BOOST_ASSERT((first1 == last1) || !comp(*first_min, last1[-1])); 1282 if(!is_buffer_middle){ 1283 buffer = op(forward_t(), first1, last1, buffer); 1284 } 1285 swap_and_update_key(key_next, key_range2, key_mid, first2, last2, first_min); 1286 first1 = first2; 1287 last1 = last2; 1288 } 1289 else { 1290 RandIt unmerged; 1291 RandIt buf_beg; 1292 RandIt buf_end; 1293 if(is_buffer_middle){ 1294 buf_end = buf_beg = first2 - (last1-first1); 1295 unmerged = op_partial_merge_and_save( first1, last1, first2, last2, first_min 1296 , buf_beg, buf_end, comp, op, is_range1_A); 1297 } 1298 else{ 1299 buf_beg = first1; 1300 buf_end = last1; 1301 unmerged = op_partial_merge_and_save 1302 (buffer, buffer+(last1-first1), first2, last2, first_min, buf_beg, buf_end, comp, op, is_range1_A); 1303 } 1304 (void)unmerged; 1305 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, unmerged, comp)); 1306 1307 swap_and_update_key( key_next, key_range2, key_mid, first2, last2 1308 , last_min - size_type(last2 - first2)); 1309 1310 if(buf_beg != buf_end){ //range2 exhausted: is_buffer_middle for the next iteration 1311 first1 = buf_beg; 1312 last1 = buf_end; 1313 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buf_end == (last2-l_block)); 1314 buffer = last1; 1315 } 1316 else{ //range1 exhausted: !is_buffer_middle for the next iteration 1317 first1 = first2; 1318 last1 = last2; 1319 buffer = first2 - l_block; 1320 is_range1_A = is_range2_A; 1321 } 1322 } 1323 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left)); 1324 is_range2_A ? --n_block_a_left : --n_block_b_left; 1325 first2 = last2; 1326 } 1327 1328 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_range2 + n_block_left, key_comp, *key_mid)); 1329 BOOST_ASSERT(!n_block_b_left); 1330 1331 //////////////////////////////////////////////////////////////////////////// 1332 //Process remaining range 1 left before the irregular B block 1333 //////////////////////////////////////////////////////////////////////////// 1334 bool const is_buffer_middle = last1 == buffer; 1335 RandIt first_irr2 = irreg2; 1336 RandIt const last_irr2 = first_irr2 + l_irreg2; 1337 if(l_irreg2 && is_range1_A){ 1338 if(is_buffer_middle){ 1339 first1 = skip_until_merge(first1, last1, *first_irr2, comp); 1340 //Even if we copy backward, no overlapping occurs so use forward copy 1341 //that can be faster specially with trivial types 1342 RandIt const new_first1 = first2 - (last1 - first1); 1343 op(forward_t(), first1, last1, new_first1); 1344 first1 = new_first1; 1345 last1 = first2; 1346 buffer = first1 - l_block; 1347 } 1348 buffer = op_partial_merge_impl(first1, last1, first_irr2, last_irr2, buffer, comp, op); 1349 buffer = op(forward_t(), first1, last1, buffer); 1350 } 1351 else if(!is_buffer_middle){ 1352 buffer = op(forward_t(), first1, last1, buffer); 1353 } 1354 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp)); 1355 1356 //////////////////////////////////////////////////////////////////////////// 1357 //Process irregular B block and remaining A blocks 1358 //////////////////////////////////////////////////////////////////////////// 1359 buffer = op_merge_blocks_with_irreg 1360 ( key_range2, key_mid, key_comp, first2, first_irr2, last_irr2 1361 , buffer, l_block, n_block_left, min_check, max_check, comp, false, op); 1362 buffer = op(forward_t(), first_irr2, last_irr2, buffer);(void)buffer; 1363 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first-l_block, buffer, comp)); 1364 } 1365 1366 // first - first element to merge. 1367 // first[-l_block, 0) - buffer (if use_buf == true) 1368 // l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded 1369 // keys - sequence of keys, in same order as blocks. key<midkey means stream A 1370 // n_bef_irreg2/n_aft_irreg2 are regular blocks 1371 // l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks 1372 // If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). 1373 template<class RandItKeys, class KeyCompare, class RandIt, class Compare> 1374 void merge_blocks_left 1375 ( RandItKeys const key_first 1376 , KeyCompare key_comp 1377 , RandIt const first 1378 , typename iterator_traits<RandIt>::size_type const l_block 1379 , typename iterator_traits<RandIt>::size_type const l_irreg1 1380 , typename iterator_traits<RandIt>::size_type const n_block_a 1381 , typename iterator_traits<RandIt>::size_type const n_block_b 1382 , typename iterator_traits<RandIt>::size_type const l_irreg2 1383 , Compare comp 1384 , bool const xbuf_used) 1385 { 1386 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + needed_keys_count(n_block_a, n_block_b), key_comp, key_first[n_block_a])); 1387 if(xbuf_used){ 1388 op_merge_blocks_left 1389 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op()); 1390 } 1391 else{ 1392 op_merge_blocks_left 1393 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op()); 1394 } 1395 } 1396 1397 1398 // first - first element to merge. 1399 // [first+l_block*(n_bef_irreg2+n_aft_irreg2)+l_irreg2, first+l_block*(n_bef_irreg2+n_aft_irreg2+1)+l_irreg2) - buffer 1400 // l_block - length of regular blocks. First nblocks are stable sorted by 1st elements and key-coded 1401 // keys - sequence of keys, in same order as blocks. key<midkey means stream A 1402 // n_bef_irreg2/n_aft_irreg2 are regular blocks 1403 // l_irreg2 is a irregular block, that is to be combined after n_bef_irreg2 blocks and before n_aft_irreg2 blocks 1404 // If l_irreg2==0 then n_aft_irreg2==0 (no irregular blocks). 1405 template<class RandItKeys, class KeyCompare, class RandIt, class Compare> 1406 void merge_blocks_right 1407 ( RandItKeys const key_first 1408 , KeyCompare key_comp 1409 , RandIt const first 1410 , typename iterator_traits<RandIt>::size_type const l_block 1411 , typename iterator_traits<RandIt>::size_type const n_block_a 1412 , typename iterator_traits<RandIt>::size_type const n_block_b 1413 , typename iterator_traits<RandIt>::size_type const l_irreg2 1414 , Compare comp 1415 , bool const xbuf_used) 1416 { 1417 merge_blocks_left 1418 ( (make_reverse_iterator)(key_first + needed_keys_count(n_block_a, n_block_b)) 1419 , inverse<KeyCompare>(key_comp) 1420 , (make_reverse_iterator)(first + ((n_block_a+n_block_b)*l_block+l_irreg2)) 1421 , l_block 1422 , l_irreg2 1423 , n_block_b 1424 , n_block_a 1425 , 0 1426 , inverse<Compare>(comp), xbuf_used); 1427 } 1428 1429 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class Op, class RandItBuf> 1430 void op_merge_blocks_with_buf 1431 ( RandItKeys key_first 1432 , KeyCompare key_comp 1433 , RandIt const first 1434 , typename iterator_traits<RandIt>::size_type const l_block 1435 , typename iterator_traits<RandIt>::size_type const l_irreg1 1436 , typename iterator_traits<RandIt>::size_type const n_block_a 1437 , typename iterator_traits<RandIt>::size_type const n_block_b 1438 , typename iterator_traits<RandIt>::size_type const l_irreg2 1439 , Compare comp 1440 , Op op 1441 , RandItBuf const buf_first) 1442 { 1443 typedef typename iterator_traits<RandIt>::size_type size_type; 1444 size_type const key_count = needed_keys_count(n_block_a, n_block_b); (void)key_count; 1445 //BOOST_ASSERT(n_block_a || n_block_b); 1446 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted_and_unique(key_first, key_first + key_count, key_comp)); 1447 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_b || n_block_a == count_if_with(key_first, key_first + key_count, key_comp, key_first[n_block_a])); 1448 1449 size_type n_block_b_left = n_block_b; 1450 size_type n_block_a_left = n_block_a; 1451 size_type n_block_left = n_block_b + n_block_a; 1452 RandItKeys key_mid(key_first + n_block_a); 1453 1454 RandItBuf buffer = buf_first; 1455 RandItBuf buffer_end = buffer; 1456 RandIt first1 = first; 1457 RandIt last1 = first1 + l_irreg1; 1458 RandIt first2 = last1; 1459 RandIt const first_irr2 = first2 + n_block_left*l_block; 1460 bool is_range1_A = true; 1461 1462 RandItKeys key_range2(key_first); 1463 1464 //////////////////////////////////////////////////////////////////////////// 1465 //Process all regular blocks before the irregular B block 1466 //////////////////////////////////////////////////////////////////////////// 1467 size_type min_check = n_block_a == n_block_left ? 0u : n_block_a; 1468 size_type max_check = min_value(min_check+1, n_block_left); 1469 for (; n_block_left; --n_block_left, ++key_range2, min_check -= min_check != 0, max_check -= max_check != 0) { 1470 size_type const next_key_idx = find_next_block(key_range2, key_comp, first2, l_block, min_check, max_check, comp); 1471 max_check = min_value(max_value(max_check, next_key_idx+2), n_block_left); 1472 RandIt first_min = first2 + next_key_idx*l_block; 1473 RandIt const last_min = first_min + l_block; (void)last_min; 1474 RandIt const last2 = first2 + l_block; 1475 1476 bool const buffer_empty = buffer == buffer_end; (void)buffer_empty; 1477 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(buffer_empty ? is_sorted(first1, last1, comp) : is_sorted(buffer, buffer_end, comp)); 1478 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); 1479 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!n_block_left || is_sorted(first_min, last_min, comp)); 1480 1481 //Check if irregular b block should go here. 1482 //If so, break to the special code handling the irregular block 1483 if (!n_block_b_left && 1484 ( (l_irreg2 && comp(*first_irr2, *first_min)) || (!l_irreg2 && is_range1_A)) ){ 1485 break; 1486 } 1487 1488 RandItKeys const key_next(key_range2 + next_key_idx); 1489 bool const is_range2_A = key_mid == (key_first+key_count) || key_comp(*key_next, *key_mid); 1490 1491 if(is_range1_A == is_range2_A){ 1492 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT((first1 == last1) || (buffer_empty ? !comp(*first_min, last1[-1]) : !comp(*first_min, buffer_end[-1]))); 1493 //If buffered, put those elements in place 1494 RandIt res = op(forward_t(), buffer, buffer_end, first1); 1495 buffer = buffer_end = buf_first; 1496 BOOST_ASSERT(buffer_empty || res == last1); (void)res; 1497 swap_and_update_key(key_next, key_range2, key_mid, first2, last2, first_min); 1498 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first2, last2, comp)); 1499 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp)); 1500 first1 = first2; 1501 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, first1, comp)); 1502 } 1503 else { 1504 RandIt const unmerged = op_partial_merge_and_save(first1, last1, first2, last2, first_min, buffer, buffer_end, comp, op, is_range1_A); 1505 bool const is_range_1_empty = buffer == buffer_end; 1506 BOOST_ASSERT(is_range_1_empty || (buffer_end-buffer) == (last1+l_block-unmerged)); 1507 if(is_range_1_empty){ 1508 buffer = buffer_end = buf_first; 1509 first_min = last_min - (last2 - first2); 1510 } 1511 else{ 1512 first_min = last_min; 1513 } 1514 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(!is_range_1_empty || (last_min-first_min) == (last2-unmerged)); 1515 swap_and_update_key(key_next, key_range2, key_mid, first2, last2, first_min); 1516 1517 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first_min, last_min, comp)); 1518 is_range1_A ^= is_range_1_empty; 1519 first1 = unmerged; 1520 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, unmerged, comp)); 1521 } 1522 BOOST_ASSERT( (is_range2_A && n_block_a_left) || (!is_range2_A && n_block_b_left)); 1523 is_range2_A ? --n_block_a_left : --n_block_b_left; 1524 last1 += l_block; 1525 first2 = last2; 1526 } 1527 1528 RandIt res = op(forward_t(), buffer, buffer_end, first1); (void)res; 1529 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, res, comp)); 1530 1531 //////////////////////////////////////////////////////////////////////////// 1532 //Process irregular B block and remaining A blocks 1533 //////////////////////////////////////////////////////////////////////////// 1534 RandIt const last_irr2 = first_irr2 + l_irreg2; 1535 op(forward_t(), first_irr2, first_irr2+l_irreg2, buf_first); 1536 buffer = buf_first; 1537 buffer_end = buffer+l_irreg2; 1538 1539 reverse_iterator<RandItBuf> rbuf_beg(buffer_end); 1540 RandIt dest = op_merge_blocks_with_irreg 1541 ((make_reverse_iterator)(key_first + n_block_b + n_block_a), (make_reverse_iterator)(key_mid), inverse<KeyCompare>(key_comp) 1542 , (make_reverse_iterator)(first_irr2), rbuf_beg 1543 , (make_reverse_iterator)(buffer), (make_reverse_iterator)(last_irr2) 1544 , l_block, n_block_left, 0, n_block_left 1545 , inverse<Compare>(comp), true, op).base(); 1546 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(dest, last_irr2, comp)); 1547 1548 buffer_end = rbuf_beg.base(); 1549 BOOST_ASSERT((dest-last1) == (buffer_end-buffer)); 1550 op_merge_with_left_placed(is_range1_A ? first1 : last1, last1, dest, buffer, buffer_end, comp, op); 1551 1552 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(first, last_irr2, comp)); 1553 } 1554 1555 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class RandItBuf> 1556 void merge_blocks_with_buf 1557 ( RandItKeys key_first 1558 , KeyCompare key_comp 1559 , RandIt const first 1560 , typename iterator_traits<RandIt>::size_type const l_block 1561 , typename iterator_traits<RandIt>::size_type const l_irreg1 1562 , typename iterator_traits<RandIt>::size_type const n_block_a 1563 , typename iterator_traits<RandIt>::size_type const n_block_b 1564 , typename iterator_traits<RandIt>::size_type const l_irreg2 1565 , Compare comp 1566 , RandItBuf const buf_first 1567 , bool const xbuf_used) 1568 { 1569 if(xbuf_used){ 1570 op_merge_blocks_with_buf 1571 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, move_op(), buf_first); 1572 } 1573 else{ 1574 op_merge_blocks_with_buf 1575 (key_first, key_comp, first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, swap_op(), buf_first); 1576 } 1577 } 1578 1579 template<class RandIt, class Compare, class Op> 1580 typename iterator_traits<RandIt>::size_type 1581 op_insertion_sort_step_left 1582 ( RandIt const first 1583 , typename iterator_traits<RandIt>::size_type const length 1584 , typename iterator_traits<RandIt>::size_type const step 1585 , Compare comp, Op op) 1586 { 1587 typedef typename iterator_traits<RandIt>::size_type size_type; 1588 size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold); 1589 size_type m = 0; 1590 1591 while((length - m) > s){ 1592 insertion_sort_op(first+m, first+m+s, first+m-s, comp, op); 1593 m += s; 1594 } 1595 insertion_sort_op(first+m, first+length, first+m-s, comp, op); 1596 return s; 1597 } 1598 1599 template<class RandIt, class Compare> 1600 typename iterator_traits<RandIt>::size_type 1601 insertion_sort_step 1602 ( RandIt const first 1603 , typename iterator_traits<RandIt>::size_type const length 1604 , typename iterator_traits<RandIt>::size_type const step 1605 , Compare comp) 1606 { 1607 typedef typename iterator_traits<RandIt>::size_type size_type; 1608 size_type const s = min_value<size_type>(step, AdaptiveSortInsertionSortThreshold); 1609 size_type m = 0; 1610 1611 while((length - m) > s){ 1612 insertion_sort(first+m, first+m+s, comp); 1613 m += s; 1614 } 1615 insertion_sort(first+m, first+length, comp); 1616 return s; 1617 } 1618 1619 template<class RandIt, class Compare, class Op> 1620 typename iterator_traits<RandIt>::size_type 1621 op_merge_left_step_multiple 1622 ( RandIt first_block 1623 , typename iterator_traits<RandIt>::size_type const elements_in_blocks 1624 , typename iterator_traits<RandIt>::size_type l_merged 1625 , typename iterator_traits<RandIt>::size_type const l_build_buf 1626 , typename iterator_traits<RandIt>::size_type l_left_space 1627 , Compare comp 1628 , Op op) 1629 { 1630 typedef typename iterator_traits<RandIt>::size_type size_type; 1631 for(; l_merged < l_build_buf && l_left_space >= l_merged; l_merged*=2){ 1632 size_type p0=0; 1633 RandIt pos = first_block; 1634 while((elements_in_blocks - p0) > 2*l_merged) { 1635 op_merge_left(pos-l_merged, pos, pos+l_merged, pos+2*l_merged, comp, op); 1636 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos+l_merged, comp)); 1637 p0 += 2*l_merged; 1638 pos = first_block+p0; 1639 } 1640 if((elements_in_blocks-p0) > l_merged) { 1641 op_merge_left(pos-l_merged, pos, pos+l_merged, first_block+elements_in_blocks, comp, op); 1642 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, pos-l_merged+(first_block+elements_in_blocks-pos), comp)); 1643 } 1644 else { 1645 op(forward_t(), pos, first_block+elements_in_blocks, pos-l_merged); 1646 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(pos-l_merged, first_block+elements_in_blocks-l_merged, comp)); 1647 } 1648 first_block -= l_merged; 1649 l_left_space -= l_merged; 1650 } 1651 return l_merged; 1652 } 1653 1654 template<class RandIt, class Compare, class Op> 1655 void op_merge_right_step_once 1656 ( RandIt first_block 1657 , typename iterator_traits<RandIt>::size_type const elements_in_blocks 1658 , typename iterator_traits<RandIt>::size_type const l_build_buf 1659 , Compare comp 1660 , Op op) 1661 { 1662 typedef typename iterator_traits<RandIt>::size_type size_type; 1663 size_type restk = elements_in_blocks%(2*l_build_buf); 1664 size_type p = elements_in_blocks - restk; 1665 BOOST_ASSERT(0 == (p%(2*l_build_buf))); 1666 1667 if(restk <= l_build_buf){ 1668 op(backward_t(),first_block+p, first_block+p+restk, first_block+p+restk+l_build_buf); 1669 } 1670 else{ 1671 op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+restk, first_block+p+restk+l_build_buf, comp, op); 1672 } 1673 while(p>0){ 1674 p -= 2*l_build_buf; 1675 op_merge_right(first_block+p, first_block+p+l_build_buf, first_block+p+2*l_build_buf, first_block+p+3*l_build_buf, comp, op); 1676 } 1677 } 1678 1679 1680 // build blocks of length 2*l_build_buf. l_build_buf is power of two 1681 // input: [0, l_build_buf) elements are buffer, rest unsorted elements 1682 // output: [0, l_build_buf) elements are buffer, blocks 2*l_build_buf and last subblock sorted 1683 // 1684 // First elements are merged from right to left until elements start 1685 // at first. All old elements [first, first + l_build_buf) are placed at the end 1686 // [first+len-l_build_buf, first+len). To achieve this: 1687 // - If we have external memory to merge, we save elements from the buffer 1688 // so that a non-swapping merge is used. Buffer elements are restored 1689 // at the end of the buffer from the external memory. 1690 // 1691 // - When the external memory is not available or it is insufficient 1692 // for a merge operation, left swap merging is used. 1693 // 1694 // Once elements are merged left to right in blocks of l_build_buf, then a single left 1695 // to right merge step is performed to achieve merged blocks of size 2K. 1696 // If external memory is available, usual merge is used, swap merging otherwise. 1697 // 1698 // As a last step, if auxiliary memory is available in-place merge is performed. 1699 // until all is merged or auxiliary memory is not large enough. 1700 template<class RandIt, class Compare, class XBuf> 1701 typename iterator_traits<RandIt>::size_type 1702 adaptive_sort_build_blocks 1703 ( RandIt const first 1704 , typename iterator_traits<RandIt>::size_type const len 1705 , typename iterator_traits<RandIt>::size_type const l_base 1706 , typename iterator_traits<RandIt>::size_type const l_build_buf 1707 , XBuf & xbuf 1708 , Compare comp) 1709 { 1710 typedef typename iterator_traits<RandIt>::size_type size_type; 1711 BOOST_ASSERT(l_build_buf <= len); 1712 BOOST_ASSERT(0 == ((l_build_buf / l_base)&(l_build_buf/l_base-1))); 1713 1714 //Place the start pointer after the buffer 1715 RandIt first_block = first + l_build_buf; 1716 size_type const elements_in_blocks = len - l_build_buf; 1717 1718 ////////////////////////////////// 1719 // Start of merge to left step 1720 ////////////////////////////////// 1721 size_type l_merged = 0u; 1722 1723 BOOST_ASSERT(l_build_buf); 1724 //If there is no enough buffer for the insertion sort step, just avoid the external buffer 1725 size_type kbuf = min_value<size_type>(l_build_buf, size_type(xbuf.capacity())); 1726 kbuf = kbuf < l_base ? 0 : kbuf; 1727 1728 if(kbuf){ 1729 //Backup internal buffer values in external buffer so they can be overwritten 1730 xbuf.move_assign(first+l_build_buf-kbuf, kbuf); 1731 l_merged = op_insertion_sort_step_left(first_block, elements_in_blocks, l_base, comp, move_op()); 1732 1733 //Now combine them using the buffer. Elements from buffer can be 1734 //overwritten since they've been saved to xbuf 1735 l_merged = op_merge_left_step_multiple 1736 ( first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, kbuf - l_merged, comp, move_op()); 1737 1738 //Restore internal buffer from external buffer unless kbuf was l_build_buf, 1739 //in that case restoration will happen later 1740 if(kbuf != l_build_buf){ 1741 boost::move(xbuf.data()+kbuf-l_merged, xbuf.data() + kbuf, first_block-l_merged+elements_in_blocks); 1742 } 1743 } 1744 else{ 1745 l_merged = insertion_sort_step(first_block, elements_in_blocks, l_base, comp); 1746 rotate_gcd(first_block - l_merged, first_block, first_block+elements_in_blocks); 1747 } 1748 1749 //Now combine elements using the buffer. Elements from buffer can't be 1750 //overwritten since xbuf was not big enough, so merge swapping elements. 1751 l_merged = op_merge_left_step_multiple 1752 (first_block - l_merged, elements_in_blocks, l_merged, l_build_buf, l_build_buf - l_merged, comp, swap_op()); 1753 1754 BOOST_ASSERT(l_merged == l_build_buf); 1755 1756 ////////////////////////////////// 1757 // Start of merge to right step 1758 ////////////////////////////////// 1759 1760 //If kbuf is l_build_buf then we can merge right without swapping 1761 //Saved data is still in xbuf 1762 if(kbuf && kbuf == l_build_buf){ 1763 op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, move_op()); 1764 //Restore internal buffer from external buffer if kbuf was l_build_buf. 1765 //as this operation was previously delayed. 1766 boost::move(xbuf.data(), xbuf.data() + kbuf, first); 1767 } 1768 else{ 1769 op_merge_right_step_once(first, elements_in_blocks, l_build_buf, comp, swap_op()); 1770 } 1771 xbuf.clear(); 1772 //2*l_build_buf or total already merged 1773 return min_value(elements_in_blocks, 2*l_build_buf); 1774 } 1775 1776 template<class RandItKeys, class KeyCompare, class RandIt, class Compare, class XBuf> 1777 void adaptive_sort_combine_blocks 1778 ( RandItKeys const keys 1779 , KeyCompare key_comp 1780 , RandIt const first 1781 , typename iterator_traits<RandIt>::size_type const len 1782 , typename iterator_traits<RandIt>::size_type const l_prev_merged 1783 , typename iterator_traits<RandIt>::size_type const l_block 1784 , bool const use_buf 1785 , bool const xbuf_used 1786 , XBuf & xbuf 1787 , Compare comp 1788 , bool merge_left) 1789 { 1790 (void)xbuf; 1791 typedef typename iterator_traits<RandIt>::size_type size_type; 1792 1793 size_type const l_reg_combined = 2*l_prev_merged; 1794 size_type l_irreg_combined = 0; 1795 size_type const l_total_combined = calculate_total_combined(len, l_prev_merged, &l_irreg_combined); 1796 size_type const n_reg_combined = len/l_reg_combined; 1797 RandIt combined_first = first; 1798 1799 (void)l_total_combined; 1800 BOOST_ASSERT(l_total_combined <= len); 1801 1802 size_type const max_i = n_reg_combined + (l_irreg_combined != 0); 1803 1804 if(merge_left || !use_buf) { 1805 for( size_type combined_i = 0; combined_i != max_i; ++combined_i, combined_first += l_reg_combined) { 1806 //Now merge blocks 1807 bool const is_last = combined_i==n_reg_combined; 1808 size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; 1809 1810 range_xbuf<RandIt, move_op> rbuf( (use_buf && xbuf_used) ? (combined_first-l_block) : combined_first, combined_first); 1811 size_type n_block_a, n_block_b, l_irreg1, l_irreg2; 1812 combine_params( keys, key_comp, l_cur_combined 1813 , l_prev_merged, l_block, rbuf 1814 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs 1815 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); 1816 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); 1817 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); 1818 if(!use_buf){ 1819 merge_blocks_bufferless 1820 (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp); 1821 } 1822 else{ 1823 merge_blocks_left 1824 (keys, key_comp, combined_first, l_block, 0u, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); 1825 } 1826 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_L: ", len + l_block); 1827 } 1828 } 1829 else{ 1830 combined_first += l_reg_combined*(max_i-1); 1831 for( size_type combined_i = max_i; combined_i--; combined_first -= l_reg_combined) { 1832 bool const is_last = combined_i==n_reg_combined; 1833 size_type const l_cur_combined = is_last ? l_irreg_combined : l_reg_combined; 1834 1835 RandIt const combined_last(combined_first+l_cur_combined); 1836 range_xbuf<RandIt, move_op> rbuf(combined_last, xbuf_used ? (combined_last+l_block) : combined_last); 1837 size_type n_block_a, n_block_b, l_irreg1, l_irreg2; 1838 combine_params( keys, key_comp, l_cur_combined 1839 , l_prev_merged, l_block, rbuf 1840 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs 1841 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combpar: ", len + l_block); 1842 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first, combined_first + n_block_a*l_block+l_irreg1, comp)); 1843 BOOST_MOVE_ADAPTIVE_SORT_INVARIANT(is_sorted(combined_first + n_block_a*l_block+l_irreg1, combined_first + n_block_a*l_block+l_irreg1+n_block_b*l_block+l_irreg2, comp)); 1844 merge_blocks_right 1845 (keys, key_comp, combined_first, l_block, n_block_a, n_block_b, l_irreg2, comp, xbuf_used); 1846 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After merge_blocks_R: ", len + l_block); 1847 } 1848 } 1849 } 1850 1851 //Returns true if buffer is placed in 1852 //[buffer+len-l_intbuf, buffer+len). Otherwise, buffer is 1853 //[buffer,buffer+l_intbuf) 1854 template<class RandIt, class Compare, class XBuf> 1855 bool adaptive_sort_combine_all_blocks 1856 ( RandIt keys 1857 , typename iterator_traits<RandIt>::size_type &n_keys 1858 , RandIt const buffer 1859 , typename iterator_traits<RandIt>::size_type const l_buf_plus_data 1860 , typename iterator_traits<RandIt>::size_type l_merged 1861 , typename iterator_traits<RandIt>::size_type &l_intbuf 1862 , XBuf & xbuf 1863 , Compare comp) 1864 { 1865 typedef typename iterator_traits<RandIt>::size_type size_type; 1866 RandIt const first = buffer + l_intbuf; 1867 size_type const l_data = l_buf_plus_data - l_intbuf; 1868 size_type const l_unique = l_intbuf+n_keys; 1869 //Backup data to external buffer once if possible 1870 bool const common_xbuf = l_data > l_merged && l_intbuf && l_intbuf <= xbuf.capacity(); 1871 if(common_xbuf){ 1872 xbuf.move_assign(buffer, l_intbuf); 1873 } 1874 1875 bool prev_merge_left = true; 1876 size_type l_prev_total_combined = l_merged, l_prev_block = 0; 1877 bool prev_use_internal_buf = true; 1878 1879 for( size_type n = 0; l_data > l_merged 1880 ; l_merged*=2 1881 , ++n){ 1882 //If l_intbuf is non-zero, use that internal buffer. 1883 // Implies l_block == l_intbuf && use_internal_buf == true 1884 //If l_intbuf is zero, see if half keys can be reused as a reduced emergency buffer, 1885 // Implies l_block == n_keys/2 && use_internal_buf == true 1886 //Otherwise, just give up and and use all keys to merge using rotations (use_internal_buf = false) 1887 bool use_internal_buf = false; 1888 size_type const l_block = lblock_for_combine(l_intbuf, n_keys, 2*l_merged, use_internal_buf); 1889 BOOST_ASSERT(!l_intbuf || (l_block == l_intbuf)); 1890 BOOST_ASSERT(n == 0 || (!use_internal_buf || prev_use_internal_buf) ); 1891 BOOST_ASSERT(n == 0 || (!use_internal_buf || l_prev_block == l_block) ); 1892 1893 bool const is_merge_left = (n&1) == 0; 1894 size_type const l_total_combined = calculate_total_combined(l_data, l_merged); 1895 if(n && prev_use_internal_buf && prev_merge_left){ 1896 if(is_merge_left || !use_internal_buf){ 1897 move_data_backward(first-l_prev_block, l_prev_total_combined, first, common_xbuf); 1898 } 1899 else{ 1900 //Put the buffer just after l_total_combined 1901 RandIt const buf_end = first+l_prev_total_combined; 1902 RandIt const buf_beg = buf_end-l_block; 1903 if(l_prev_total_combined > l_total_combined){ 1904 size_type const l_diff = l_prev_total_combined - l_total_combined; 1905 move_data_backward(buf_beg-l_diff, l_diff, buf_end-l_diff, common_xbuf); 1906 } 1907 else if(l_prev_total_combined < l_total_combined){ 1908 size_type const l_diff = l_total_combined - l_prev_total_combined; 1909 move_data_forward(buf_end, l_diff, buf_beg, common_xbuf); 1910 } 1911 } 1912 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" After move_data : ", l_data + l_intbuf); 1913 } 1914 1915 //Combine to form l_merged*2 segments 1916 if(n_keys){ 1917 adaptive_sort_combine_blocks 1918 ( keys, comp, !use_internal_buf || is_merge_left ? first : first-l_block 1919 , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); 1920 } 1921 else{ 1922 size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(); 1923 adaptive_sort_combine_blocks 1924 ( uint_keys, less(), !use_internal_buf || is_merge_left ? first : first-l_block 1925 , l_data, l_merged, l_block, use_internal_buf, common_xbuf, xbuf, comp, is_merge_left); 1926 } 1927 1928 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(is_merge_left ? " After comb blocks L: " : " After comb blocks R: ", l_data + l_intbuf); 1929 prev_merge_left = is_merge_left; 1930 l_prev_total_combined = l_total_combined; 1931 l_prev_block = l_block; 1932 prev_use_internal_buf = use_internal_buf; 1933 } 1934 BOOST_ASSERT(l_prev_total_combined == l_data); 1935 bool const buffer_right = prev_use_internal_buf && prev_merge_left; 1936 1937 l_intbuf = prev_use_internal_buf ? l_prev_block : 0u; 1938 n_keys = l_unique - l_intbuf; 1939 //Restore data from to external common buffer if used 1940 if(common_xbuf){ 1941 if(buffer_right){ 1942 boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer+l_data); 1943 } 1944 else{ 1945 boost::move(xbuf.data(), xbuf.data() + l_intbuf, buffer); 1946 } 1947 } 1948 return buffer_right; 1949 } 1950 1951 template<class RandIt, class Compare, class XBuf> 1952 void stable_merge 1953 ( RandIt first, RandIt const middle, RandIt last 1954 , Compare comp 1955 , XBuf &xbuf) 1956 { 1957 BOOST_ASSERT(xbuf.empty()); 1958 typedef typename iterator_traits<RandIt>::size_type size_type; 1959 size_type const len1 = size_type(middle-first); 1960 size_type const len2 = size_type(last-middle); 1961 size_type const l_min = min_value(len1, len2); 1962 if(xbuf.capacity() >= l_min){ 1963 buffered_merge(first, middle, last, comp, xbuf); 1964 xbuf.clear(); 1965 } 1966 else{ 1967 merge_bufferless(first, middle, last, comp); 1968 } 1969 } 1970 1971 1972 template<class RandIt, class Compare, class XBuf> 1973 void adaptive_sort_final_merge( bool buffer_right 1974 , RandIt const first 1975 , typename iterator_traits<RandIt>::size_type const l_intbuf 1976 , typename iterator_traits<RandIt>::size_type const n_keys 1977 , typename iterator_traits<RandIt>::size_type const len 1978 , XBuf & xbuf 1979 , Compare comp) 1980 { 1981 //BOOST_ASSERT(n_keys || xbuf.size() == l_intbuf); 1982 xbuf.clear(); 1983 1984 typedef typename iterator_traits<RandIt>::size_type size_type; 1985 size_type const n_key_plus_buf = l_intbuf+n_keys; 1986 if(buffer_right){ 1987 stable_sort(first+len-l_intbuf, first+len, comp, xbuf); 1988 stable_merge(first+n_keys, first+len-l_intbuf, first+len, antistable<Compare>(comp), xbuf); 1989 stable_sort(first, first+n_keys, comp, xbuf); 1990 stable_merge(first, first+n_keys, first+len, comp, xbuf); 1991 } 1992 else{ 1993 stable_sort(first, first+n_key_plus_buf, comp, xbuf); 1994 if(xbuf.capacity() >= n_key_plus_buf){ 1995 buffered_merge(first, first+n_key_plus_buf, first+len, comp, xbuf); 1996 } 1997 else if(xbuf.capacity() >= min_value<size_type>(l_intbuf, n_keys)){ 1998 stable_merge(first+n_keys, first+n_key_plus_buf, first+len, comp, xbuf); 1999 stable_merge(first, first+n_keys, first+len, comp, xbuf); 2000 } 2001 else{ 2002 merge_bufferless(first, first+n_key_plus_buf, first+len, comp); 2003 } 2004 } 2005 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After final_merge : ", len); 2006 } 2007 2008 template<class RandIt, class Compare, class Unsigned, class XBuf> 2009 bool adaptive_sort_build_params 2010 (RandIt first, Unsigned const len, Compare comp 2011 , Unsigned &n_keys, Unsigned &l_intbuf, Unsigned &l_base, Unsigned &l_build_buf 2012 , XBuf & xbuf 2013 ) 2014 { 2015 typedef Unsigned size_type; 2016 2017 //Calculate ideal parameters and try to collect needed unique keys 2018 l_base = 0u; 2019 2020 //Try to find a value near sqrt(len) that is 2^N*l_base where 2021 //l_base <= AdaptiveSortInsertionSortThreshold. This property is important 2022 //as build_blocks merges to the left iteratively duplicating the 2023 //merged size and all the buffer must be used just before the final 2024 //merge to right step. This guarantees "build_blocks" produces 2025 //segments of size l_build_buf*2, maximizing the classic merge phase. 2026 l_intbuf = size_type(ceil_sqrt_multiple(len, &l_base)); 2027 2028 //The internal buffer can be expanded if there is enough external memory 2029 while(xbuf.capacity() >= l_intbuf*2){ 2030 l_intbuf *= 2; 2031 } 2032 2033 //This is the minimum number of keys to implement the ideal algorithm 2034 // 2035 //l_intbuf is used as buffer plus the key count 2036 size_type n_min_ideal_keys = l_intbuf-1; 2037 while(n_min_ideal_keys >= (len-l_intbuf-n_min_ideal_keys)/l_intbuf){ 2038 --n_min_ideal_keys; 2039 } 2040 n_min_ideal_keys += 1; 2041 BOOST_ASSERT(n_min_ideal_keys <= l_intbuf); 2042 2043 if(xbuf.template supports_aligned_trailing<size_type>(l_intbuf, (len-l_intbuf-1)/l_intbuf+1)){ 2044 n_keys = 0u; 2045 l_build_buf = l_intbuf; 2046 } 2047 else{ 2048 //Try to achieve a l_build_buf of length l_intbuf*2, so that we can merge with that 2049 //l_intbuf*2 buffer in "build_blocks" and use half of them as buffer and the other half 2050 //as keys in combine_all_blocks. In that case n_keys >= n_min_ideal_keys but by a small margin. 2051 // 2052 //If available memory is 2*sqrt(l), then only sqrt(l) unique keys are needed, 2053 //(to be used for keys in combine_all_blocks) as the whole l_build_buf 2054 //will be backuped in the buffer during build_blocks. 2055 bool const non_unique_buf = xbuf.capacity() >= l_intbuf; 2056 size_type const to_collect = non_unique_buf ? n_min_ideal_keys : l_intbuf*2; 2057 size_type collected = collect_unique(first, first+len, to_collect, comp, xbuf); 2058 2059 //If available memory is 2*sqrt(l), then for "build_params" 2060 //the situation is the same as if 2*l_intbuf were collected. 2061 if(non_unique_buf && collected == n_min_ideal_keys){ 2062 l_build_buf = l_intbuf; 2063 n_keys = n_min_ideal_keys; 2064 } 2065 else if(collected == 2*l_intbuf){ 2066 //l_intbuf*2 elements found. Use all of them in the build phase 2067 l_build_buf = l_intbuf*2; 2068 n_keys = l_intbuf; 2069 } 2070 else if(collected == (n_min_ideal_keys+l_intbuf)){ 2071 l_build_buf = l_intbuf; 2072 n_keys = n_min_ideal_keys; 2073 } 2074 //If collected keys are not enough, try to fix n_keys and l_intbuf. If no fix 2075 //is possible (due to very low unique keys), then go to a slow sort based on rotations. 2076 else{ 2077 BOOST_ASSERT(collected < (n_min_ideal_keys+l_intbuf)); 2078 if(collected < 4){ //No combination possible with less that 4 keys 2079 return false; 2080 } 2081 n_keys = l_intbuf; 2082 while(n_keys&(n_keys-1)){ 2083 n_keys &= n_keys-1; // make it power or 2 2084 } 2085 while(n_keys > collected){ 2086 n_keys/=2; 2087 } 2088 //AdaptiveSortInsertionSortThreshold is always power of two so the minimum is power of two 2089 l_base = min_value<Unsigned>(n_keys, AdaptiveSortInsertionSortThreshold); 2090 l_intbuf = 0; 2091 l_build_buf = n_keys; 2092 } 2093 BOOST_ASSERT((n_keys+l_intbuf) >= l_build_buf); 2094 } 2095 2096 return true; 2097 } 2098 2099 template<class RandIt, class Compare, class XBuf> 2100 inline void adaptive_merge_combine_blocks( RandIt first 2101 , typename iterator_traits<RandIt>::size_type len1 2102 , typename iterator_traits<RandIt>::size_type len2 2103 , typename iterator_traits<RandIt>::size_type collected 2104 , typename iterator_traits<RandIt>::size_type n_keys 2105 , typename iterator_traits<RandIt>::size_type l_block 2106 , bool use_internal_buf 2107 , bool xbuf_used 2108 , Compare comp 2109 , XBuf & xbuf 2110 ) 2111 { 2112 typedef typename iterator_traits<RandIt>::size_type size_type; 2113 size_type const len = len1+len2; 2114 size_type const l_combine = len-collected; 2115 size_type const l_combine1 = len1-collected; 2116 2117 if(n_keys){ 2118 RandIt const first_data = first+collected; 2119 RandIt const keys = first; 2120 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); 2121 if(xbuf_used){ 2122 if(xbuf.size() < l_block){ 2123 xbuf.initialize_until(l_block, *first); 2124 } 2125 BOOST_ASSERT(xbuf.size() >= l_block); 2126 size_type n_block_a, n_block_b, l_irreg1, l_irreg2; 2127 combine_params( keys, comp, l_combine 2128 , l_combine1, l_block, xbuf 2129 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs 2130 merge_blocks_with_buf 2131 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), xbuf_used); 2132 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg xbf: ", len); 2133 } 2134 else{ 2135 size_type n_block_a, n_block_b, l_irreg1, l_irreg2; 2136 combine_params( keys, comp, l_combine 2137 , l_combine1, l_block, xbuf 2138 , n_block_a, n_block_b, l_irreg1, l_irreg2); //Outputs 2139 if(use_internal_buf){ 2140 merge_blocks_with_buf 2141 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, first_data-l_block, xbuf_used); 2142 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A mrg buf: ", len); 2143 } 2144 else{ 2145 merge_blocks_bufferless 2146 (keys, comp, first_data, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp); 2147 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg nbf: ", len); 2148 } 2149 } 2150 } 2151 else{ 2152 xbuf.shrink_to_fit(l_block); 2153 if(xbuf.size() < l_block){ 2154 xbuf.initialize_until(l_block, *first); 2155 } 2156 size_type *const uint_keys = xbuf.template aligned_trailing<size_type>(l_block); 2157 size_type n_block_a, n_block_b, l_irreg1, l_irreg2; 2158 combine_params( uint_keys, less(), l_combine 2159 , l_combine1, l_block, xbuf 2160 , n_block_a, n_block_b, l_irreg1, l_irreg2, true); //Outputs 2161 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A combine: ", len); 2162 BOOST_ASSERT(xbuf.size() >= l_block); 2163 merge_blocks_with_buf 2164 (uint_keys, less(), first, l_block, l_irreg1, n_block_a, n_block_b, l_irreg2, comp, xbuf.data(), true); 2165 xbuf.clear(); 2166 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A mrg buf: ", len); 2167 } 2168 } 2169 2170 template<class RandIt, class Compare, class XBuf> 2171 inline void adaptive_merge_final_merge( RandIt first 2172 , typename iterator_traits<RandIt>::size_type len1 2173 , typename iterator_traits<RandIt>::size_type len2 2174 , typename iterator_traits<RandIt>::size_type collected 2175 , typename iterator_traits<RandIt>::size_type l_intbuf 2176 , typename iterator_traits<RandIt>::size_type l_block 2177 , bool use_internal_buf 2178 , bool xbuf_used 2179 , Compare comp 2180 , XBuf & xbuf 2181 ) 2182 { 2183 typedef typename iterator_traits<RandIt>::size_type size_type; 2184 (void)l_block; 2185 size_type n_keys = collected-l_intbuf; 2186 size_type len = len1+len2; 2187 if(use_internal_buf){ 2188 if(xbuf_used){ 2189 xbuf.clear(); 2190 //Nothing to do 2191 if(n_keys){ 2192 stable_sort(first, first+n_keys, comp, xbuf); 2193 stable_merge(first, first+n_keys, first+len, comp, xbuf); 2194 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A key mrg: ", len); 2195 } 2196 } 2197 else{ 2198 xbuf.clear(); 2199 stable_sort(first, first+collected, comp, xbuf); 2200 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); 2201 stable_merge(first, first+collected, first+len, comp, xbuf); 2202 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); 2203 } 2204 } 2205 else{ 2206 xbuf.clear(); 2207 stable_sort(first, first+collected, comp, xbuf); 2208 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b srt: ", len); 2209 stable_merge(first, first+collected, first+len1+len2, comp, xbuf); 2210 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L2(" A k/b mrg: ", len); 2211 } 2212 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" A fin mrg: ", len); 2213 } 2214 2215 template<class SizeType, class Xbuf> 2216 inline SizeType adaptive_merge_n_keys_intbuf(SizeType &rl_block, SizeType len1, SizeType len2, Xbuf & xbuf, SizeType &l_intbuf_inout) 2217 { 2218 typedef SizeType size_type; 2219 size_type l_block = rl_block; 2220 size_type l_intbuf = xbuf.capacity() >= l_block ? 0u : l_block; 2221 2222 while(xbuf.capacity() >= l_block*2){ 2223 l_block *= 2; 2224 } 2225 2226 //This is the minimum number of keys to implement the ideal algorithm 2227 size_type n_keys = len1/l_block+len2/l_block; 2228 while(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)){ 2229 --n_keys; 2230 } 2231 ++n_keys; 2232 BOOST_ASSERT(n_keys >= ((len1-l_intbuf-n_keys)/l_block + len2/l_block)); 2233 2234 if(xbuf.template supports_aligned_trailing<size_type>(l_block, n_keys)){ 2235 n_keys = 0u; 2236 } 2237 l_intbuf_inout = l_intbuf; 2238 rl_block = l_block; 2239 return n_keys; 2240 } 2241 2242 /////////////////////////////////////////////////////////////////////////////////////////// 2243 /////////////////////////////////////////////////////////////////////////////////////////// 2244 /////////////////////////////////////////////////////////////////////////////////////////// 2245 /////////////////////////////////////////////////////////////////////////////////////////// 2246 /////////////////////////////////////////////////////////////////////////////////////////// 2247 /////////////////////////////////////////////////////////////////////////////////////////// 2248 /////////////////////////////////////////////////////////////////////////////////////////// 2249 2250 // Main explanation of the sort algorithm. 2251 // 2252 // csqrtlen = ceil(sqrt(len)); 2253 // 2254 // * First, 2*csqrtlen unique elements elements are extracted from elements to be 2255 // sorted and placed in the beginning of the range. 2256 // 2257 // * Step "build_blocks": In this nearly-classic merge step, 2*csqrtlen unique elements 2258 // will be used as auxiliary memory, so trailing len-2*csqrtlen elements are 2259 // are grouped in blocks of sorted 4*csqrtlen elements. At the end of the step 2260 // 2*csqrtlen unique elements are again the leading elements of the whole range. 2261 // 2262 // * Step "combine_blocks": pairs of previously formed blocks are merged with a different 2263 // ("smart") algorithm to form blocks of 8*csqrtlen elements. This step is slower than the 2264 // "build_blocks" step and repeated iteratively (forming blocks of 16*csqrtlen, 32*csqrtlen 2265 // elements, etc) of until all trailing (len-2*csqrtlen) elements are merged. 2266 // 2267 // In "combine_blocks" len/csqrtlen elements used are as "keys" (markers) to 2268 // know if elements belong to the first or second block to be merged and another 2269 // leading csqrtlen elements are used as buffer. Explanation of the "combine_blocks" step: 2270 // 2271 // Iteratively until all trailing (len-2*csqrtlen) elements are merged: 2272 // Iteratively for each pair of previously merged block: 2273 // * Blocks are divided groups of csqrtlen elements and 2274 // 2*merged_block/csqrtlen keys are sorted to be used as markers 2275 // * Groups are selection-sorted by first or last element (depending whether they are going 2276 // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. 2277 // * Elements of each block pair are merged using the csqrtlen buffer taking into account 2278 // if they belong to the first half or second half (marked by the key). 2279 // 2280 // * In the final merge step leading elements (2*csqrtlen) are sorted and merged with 2281 // rotations with the rest of sorted elements in the "combine_blocks" step. 2282 // 2283 // Corner cases: 2284 // 2285 // * If no 2*csqrtlen elements can be extracted: 2286 // 2287 // * If csqrtlen+len/csqrtlen are extracted, then only csqrtlen elements are used 2288 // as buffer in the "build_blocks" step forming blocks of 2*csqrtlen elements. This 2289 // means that an additional "combine_blocks" step will be needed to merge all elements. 2290 // 2291 // * If no csqrtlen+len/csqrtlen elements can be extracted, but still more than a minimum, 2292 // then reduces the number of elements used as buffer and keys in the "build_blocks" 2293 // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction 2294 // then uses a rotation based smart merge. 2295 // 2296 // * If the minimum number of keys can't be extracted, a rotation-based sorting is performed. 2297 // 2298 // * If auxiliary memory is more or equal than ceil(len/2), half-copying mergesort is used. 2299 // 2300 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), 2301 // then only csqrtlen elements need to be extracted and "combine_blocks" will use integral 2302 // keys to combine blocks. 2303 // 2304 // * If auxiliary memory is available, the "build_blocks" will be extended to build bigger blocks 2305 // using classic merge and "combine_blocks" will use bigger blocks when merging. 2306 template<class RandIt, class Compare, class XBuf> 2307 void adaptive_sort_impl 2308 ( RandIt first 2309 , typename iterator_traits<RandIt>::size_type const len 2310 , Compare comp 2311 , XBuf & xbuf 2312 ) 2313 { 2314 typedef typename iterator_traits<RandIt>::size_type size_type; 2315 2316 //Small sorts go directly to insertion sort 2317 if(len <= size_type(AdaptiveSortInsertionSortThreshold)){ 2318 insertion_sort(first, first + len, comp); 2319 } 2320 else if((len-len/2) <= xbuf.capacity()){ 2321 merge_sort(first, first+len, comp, xbuf.data()); 2322 } 2323 else{ 2324 //Make sure it is at least four 2325 BOOST_STATIC_ASSERT(AdaptiveSortInsertionSortThreshold >= 4); 2326 2327 size_type l_base = 0; 2328 size_type l_intbuf = 0; 2329 size_type n_keys = 0; 2330 size_type l_build_buf = 0; 2331 2332 //Calculate and extract needed unique elements. If a minimum is not achieved 2333 //fallback to a slow stable sort 2334 if(!adaptive_sort_build_params(first, len, comp, n_keys, l_intbuf, l_base, l_build_buf, xbuf)){ 2335 stable_sort(first, first+len, comp, xbuf); 2336 } 2337 else{ 2338 BOOST_ASSERT(l_build_buf); 2339 //Otherwise, continue the adaptive_sort 2340 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n After collect_unique: ", len); 2341 size_type const n_key_plus_buf = l_intbuf+n_keys; 2342 //l_build_buf is always power of two if l_intbuf is zero 2343 BOOST_ASSERT(l_intbuf || (0 == (l_build_buf & (l_build_buf-1)))); 2344 2345 //Classic merge sort until internal buffer and xbuf are exhausted 2346 size_type const l_merged = adaptive_sort_build_blocks 2347 (first+n_key_plus_buf-l_build_buf, len-n_key_plus_buf+l_build_buf, l_base, l_build_buf, xbuf, comp); 2348 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1(" After build_blocks: ", len); 2349 2350 //Non-trivial merge 2351 bool const buffer_right = adaptive_sort_combine_all_blocks 2352 (first, n_keys, first+n_keys, len-n_keys, l_merged, l_intbuf, xbuf, comp); 2353 2354 //Sort keys and buffer and merge the whole sequence 2355 adaptive_sort_final_merge(buffer_right, first, l_intbuf, n_keys, len, xbuf, comp); 2356 } 2357 } 2358 } 2359 2360 // Main explanation of the merge algorithm. 2361 // 2362 // csqrtlen = ceil(sqrt(len)); 2363 // 2364 // * First, csqrtlen [to be used as buffer] + (len/csqrtlen - 1) [to be used as keys] => to_collect 2365 // unique elements are extracted from elements to be sorted and placed in the beginning of the range. 2366 // 2367 // * Step "combine_blocks": the leading (len1-to_collect) elements plus trailing len2 elements 2368 // are merged with a non-trivial ("smart") algorithm to form an ordered range trailing "len-to_collect" elements. 2369 // 2370 // Explanation of the "combine_blocks" step: 2371 // 2372 // * Trailing [first+to_collect, first+len1) elements are divided in groups of cqrtlen elements. 2373 // Remaining elements that can't form a group are grouped in front of those elements. 2374 // * Trailing [first+len1, first+len1+len2) elements are divided in groups of cqrtlen elements. 2375 // Remaining elements that can't form a group are grouped in the back of those elements. 2376 // * In parallel the following two steps are performed: 2377 // * Groups are selection-sorted by first or last element (depending whether they are going 2378 // to be merged to left or right) and keys are reordered accordingly as an imitation-buffer. 2379 // * Elements of each block pair are merged using the csqrtlen buffer taking into account 2380 // if they belong to the first half or second half (marked by the key). 2381 // 2382 // * In the final merge step leading "to_collect" elements are merged with rotations 2383 // with the rest of merged elements in the "combine_blocks" step. 2384 // 2385 // Corner cases: 2386 // 2387 // * If no "to_collect" elements can be extracted: 2388 // 2389 // * If more than a minimum number of elements is extracted 2390 // then reduces the number of elements used as buffer and keys in the 2391 // and "combine_blocks" steps. If "combine_blocks" has no enough keys due to this reduction 2392 // then uses a rotation based smart merge. 2393 // 2394 // * If the minimum number of keys can't be extracted, a rotation-based merge is performed. 2395 // 2396 // * If auxiliary memory is more or equal than min(len1, len2), a buffered merge is performed. 2397 // 2398 // * If the len1 or len2 are less than 2*csqrtlen then a rotation-based merge is performed. 2399 // 2400 // * If auxiliary memory is more than csqrtlen+n_keys*sizeof(std::size_t), 2401 // then no csqrtlen need to be extracted and "combine_blocks" will use integral 2402 // keys to combine blocks. 2403 template<class RandIt, class Compare, class XBuf> 2404 void adaptive_merge_impl 2405 ( RandIt first 2406 , typename iterator_traits<RandIt>::size_type const len1 2407 , typename iterator_traits<RandIt>::size_type const len2 2408 , Compare comp 2409 , XBuf & xbuf 2410 ) 2411 { 2412 typedef typename iterator_traits<RandIt>::size_type size_type; 2413 2414 if(xbuf.capacity() >= min_value<size_type>(len1, len2)){ 2415 buffered_merge(first, first+len1, first+(len1+len2), comp, xbuf); 2416 } 2417 else{ 2418 const size_type len = len1+len2; 2419 //Calculate ideal parameters and try to collect needed unique keys 2420 size_type l_block = size_type(ceil_sqrt(len)); 2421 2422 //One range is not big enough to extract keys and the internal buffer so a 2423 //rotation-based based merge will do just fine 2424 if(len1 <= l_block*2 || len2 <= l_block*2){ 2425 merge_bufferless(first, first+len1, first+len1+len2, comp); 2426 return; 2427 } 2428 2429 //Detail the number of keys and internal buffer. If xbuf has enough memory, no 2430 //internal buffer is needed so l_intbuf will remain 0. 2431 size_type l_intbuf = 0; 2432 size_type n_keys = adaptive_merge_n_keys_intbuf(l_block, len1, len2, xbuf, l_intbuf); 2433 size_type const to_collect = l_intbuf+n_keys; 2434 //Try to extract needed unique values from the first range 2435 size_type const collected = collect_unique(first, first+len1, to_collect, comp, xbuf); 2436 BOOST_MOVE_ADAPTIVE_SORT_PRINT_L1("\n A collect: ", len); 2437 2438 //Not the minimum number of keys is not available on the first range, so fallback to rotations 2439 if(collected != to_collect && collected < 4){ 2440 merge_bufferless(first, first+collected, first+len1, comp); 2441 merge_bufferless(first, first + len1, first + len1 + len2, comp); 2442 return; 2443 } 2444 2445 //If not enough keys but more than minimum, adjust the internal buffer and key count 2446 bool use_internal_buf = collected == to_collect; 2447 if (!use_internal_buf){ 2448 l_intbuf = 0u; 2449 n_keys = collected; 2450 l_block = lblock_for_combine(l_intbuf, n_keys, len, use_internal_buf); 2451 //If use_internal_buf is false, then then internal buffer will be zero and rotation-based combination will be used 2452 l_intbuf = use_internal_buf ? l_block : 0u; 2453 } 2454 2455 bool const xbuf_used = collected == to_collect && xbuf.capacity() >= l_block; 2456 //Merge trailing elements using smart merges 2457 adaptive_merge_combine_blocks(first, len1, len2, collected, n_keys, l_block, use_internal_buf, xbuf_used, comp, xbuf); 2458 //Merge buffer and keys with the rest of the values 2459 adaptive_merge_final_merge (first, len1, len2, collected, l_intbuf, l_block, use_internal_buf, xbuf_used, comp, xbuf); 2460 } 2461 } 2462 2463 2464 } //namespace detail_adaptive { 2465 } //namespace movelib { 2466 } //namespace boost { 2467 2468 #include <boost/move/detail/config_end.hpp> 2469 2470 #endif //#define BOOST_MOVE_ADAPTIVE_SORT_MERGE_HPP 2471