1 /*************************************************************************** 2 * include/stxxl/bits/containers/pq_ext_merger.h 3 * 4 * Part of the STXXL. See http://stxxl.sourceforge.net 5 * 6 * Copyright (C) 1999 Peter Sanders <sanders@mpi-sb.mpg.de> 7 * Copyright (C) 2003, 2004, 2007 Roman Dementiev <dementiev@mpi-sb.mpg.de> 8 * Copyright (C) 2007-2009 Johannes Singler <singler@ira.uka.de> 9 * Copyright (C) 2007-2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de> 10 * 11 * Distributed under the Boost Software License, Version 1.0. 12 * (See accompanying file LICENSE_1_0.txt or copy at 13 * http://www.boost.org/LICENSE_1_0.txt) 14 **************************************************************************/ 15 16 #ifndef STXXL_CONTAINERS_PQ_EXT_MERGER_HEADER 17 #define STXXL_CONTAINERS_PQ_EXT_MERGER_HEADER 18 19 #include <stxxl/bits/containers/pq_helpers.h> 20 21 STXXL_BEGIN_NAMESPACE 22 23 //! \addtogroup stlcontinternals 24 //! 25 //! \{ 26 27 /*! \internal 28 */ 29 namespace priority_queue_local { 30 31 template <typename Iterator> 32 class short_sequence : public std::pair<Iterator, Iterator> 33 { 34 typedef std::pair<Iterator, Iterator> pair; 35 36 public: 37 typedef Iterator iterator; 38 typedef const iterator const_iterator; 39 typedef typename std::iterator_traits<iterator>::value_type value_type; 40 typedef typename std::iterator_traits<iterator>::difference_type size_type; 41 typedef value_type& reference; 42 typedef const value_type& const_reference; 43 typedef unsigned_type origin_type; 44 45 private: 46 origin_type m_origin; 47 48 public: short_sequence(Iterator first,Iterator last,origin_type origin)49 short_sequence(Iterator first, Iterator last, origin_type origin) 50 : pair(first, last), m_origin(origin) 51 { } 52 begin()53 iterator begin() 54 { 55 return this->first; 56 } 57 begin()58 const_iterator begin() const 59 { 60 return this->first; 61 } 62 cbegin()63 const_iterator cbegin() const 64 { 65 return begin(); 66 } 67 end()68 iterator end() 69 { 70 return this->second; 71 } 72 end()73 const_iterator end() const 74 { 75 return this->second; 76 } 77 cend()78 const_iterator cend() const 79 { 80 return end(); 81 } 82 front()83 reference front() 84 { 85 return *begin(); 86 } 87 front()88 const_reference front() const 89 { 90 return *begin(); 91 } 92 back()93 reference back() 94 { 95 return *(end() - 1); 96 } 97 back()98 const_reference back() const 99 { 100 return *(end() - 1); 101 } 102 size()103 size_type size() const 104 { 105 return end() - begin(); 106 } 107 empty()108 bool empty() const 109 { 110 return size() == 0; 111 } 112 origin()113 origin_type origin() const 114 { 115 return m_origin; 116 } 117 }; 118 119 /*! 120 * External merger, based on the loser tree data structure. 121 * \param Arity_ maximum arity of merger, does not need to be a power of 2 122 */ 123 template <class BlockType, 124 class Cmp, 125 unsigned Arity, 126 class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY> 127 class ext_merger : private noncopyable 128 { 129 public: 130 typedef stxxl::uint64 size_type; 131 typedef BlockType block_type; 132 typedef typename block_type::bid_type bid_type; 133 typedef typename block_type::value_type value_type; 134 typedef Cmp comparator_type; 135 typedef AllocStr alloc_strategy; 136 typedef read_write_pool<block_type> pool_type; 137 138 // arity_bound / 2 < arity <= arity_bound 139 enum { arity = Arity, arity_bound = 1UL << (LOG2<Arity>::ceil) }; 140 141 protected: 142 comparator_type cmp; 143 is_sentinel(const value_type & a)144 bool is_sentinel(const value_type& a) const 145 { 146 return !(cmp(cmp.min_value(), a)); // a <= cmp.min_value() 147 } 148 not_sentinel(const value_type & a)149 bool not_sentinel(const value_type& a) const 150 { 151 return cmp(cmp.min_value(), a); // a > cmp.min_value() 152 } 153 154 struct sequence_state : private noncopyable 155 { 156 block_type* block; //current block 157 unsigned_type current; //current index in current block 158 std::list<bid_type>* bids; //list of blocks forming this sequence 159 comparator_type cmp; 160 ext_merger* merger; 161 bool allocated; 162 163 //! \returns current element 164 const value_type& operator * () const 165 { 166 return (*block)[current]; 167 } 168 sequence_statesequence_state169 sequence_state() 170 : block(NULL), current(0), 171 bids(NULL), merger(NULL), 172 allocated(false) 173 { } 174 ~sequence_statesequence_state175 ~sequence_state() 176 { 177 STXXL_VERBOSE2("ext_merger sequence_state::~sequence_state()"); 178 if (bids != NULL) 179 { 180 block_manager* bm = block_manager::get_instance(); 181 bm->delete_blocks(bids->begin(), bids->end()); 182 delete bids; 183 } 184 } 185 make_infsequence_state186 void make_inf() 187 { 188 current = 0; 189 (*block)[0] = cmp.min_value(); 190 } 191 is_sentinelsequence_state192 bool is_sentinel(const value_type& a) const 193 { 194 return !(cmp(cmp.min_value(), a)); 195 } 196 not_sentinelsequence_state197 bool not_sentinel(const value_type& a) const 198 { 199 return cmp(cmp.min_value(), a); 200 } 201 swapsequence_state202 void swap(sequence_state& obj) 203 { 204 if (&obj != this) 205 { 206 std::swap(current, obj.current); 207 std::swap(block, obj.block); 208 std::swap(bids, obj.bids); 209 assert(merger == obj.merger); 210 std::swap(allocated, obj.allocated); 211 } 212 } 213 214 sequence_state& operator ++ () 215 { 216 assert(not_sentinel((*block)[current])); 217 assert(current < block->size); 218 219 ++current; 220 221 if (current == block->size) 222 { 223 STXXL_VERBOSE2("ext_merger sequence_state operator++ crossing block border "); 224 // go to the next block 225 assert(bids != NULL); 226 if (bids->empty()) // if there is no next block 227 { 228 STXXL_VERBOSE2("ext_merger sequence_state operator++ it was the last block in the sequence "); 229 delete bids; 230 bids = NULL; 231 make_inf(); 232 } 233 else 234 { 235 STXXL_VERBOSE2("ext_merger sequence_state operator++ there is another block "); 236 bid_type bid = bids->front(); 237 bids->pop_front(); 238 merger->pool->hint(bid); 239 if (!(bids->empty())) 240 { 241 STXXL_VERBOSE2("ext_merger sequence_state operator++ more blocks exist in a sequence, hinting the next"); 242 merger->pool->hint(bids->front()); 243 } 244 merger->pool->read(block, bid)->wait(); 245 STXXL_VERBOSE2("first element of read block " << bid << " " << *(block->begin()) << " cached in " << block); 246 if (!(bids->empty())) 247 merger->pool->hint(bids->front()); // re-hint, reading might have made a block free 248 block_manager::get_instance()->delete_block(bid); 249 current = 0; 250 } 251 } 252 return *this; 253 } 254 }; 255 256 #if STXXL_PQ_EXTERNAL_LOSER_TREE 257 struct Entry 258 { 259 value_type key; // key of loser element (winner for 0) 260 unsigned_type index; // the number of the losing segment 261 }; 262 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE 263 264 size_type size_; // total number of elements stored 265 unsigned_type log_k; // log of current tree size 266 unsigned_type k; // invariant (k == 1 << log_k), always a power of 2 267 // only entries 0 .. arity-1 may hold actual sequences, the other 268 // entries arity .. arity_bound-1 are sentinels to make the size of the tree 269 // a power of 2 always 270 271 // stack of empty segment indices 272 internal_bounded_stack<unsigned_type, arity> free_segments; 273 274 #if STXXL_PQ_EXTERNAL_LOSER_TREE 275 // upper levels of loser trees 276 // entry[0] contains the winner info 277 Entry entry[arity_bound]; 278 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE 279 280 // leaf information 281 // note that Knuth uses indices k..k-1 282 // while we use 0..k-1 283 sequence_state states[arity_bound]; // sequence including current position, dereference gives current element 284 285 pool_type* pool; 286 287 block_type* sentinel_block; 288 289 public: ext_merger()290 ext_merger() 291 : size_(0), log_k(0), k(1), pool(0) 292 { 293 init(); 294 } 295 ext_merger(pool_type * pool_)296 ext_merger(pool_type* pool_) 297 : size_(0), log_k(0), k(1), 298 pool(pool_) 299 { 300 init(); 301 } 302 ~ext_merger()303 virtual ~ext_merger() 304 { 305 STXXL_VERBOSE1("ext_merger::~ext_merger()"); 306 for (unsigned_type i = 0; i < arity; ++i) 307 { 308 delete states[i].block; 309 } 310 delete sentinel_block; 311 } 312 set_pool(pool_type * pool_)313 void set_pool(pool_type* pool_) 314 { 315 pool = pool_; 316 } 317 318 private: init()319 void init() 320 { 321 STXXL_VERBOSE2("ext_merger::init()"); 322 assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering 323 324 sentinel_block = NULL; 325 if (arity < arity_bound) 326 { 327 sentinel_block = new block_type; 328 for (unsigned_type i = 0; i < block_type::size; ++i) 329 (*sentinel_block)[i] = cmp.min_value(); 330 if (arity + 1 == arity_bound) { 331 // same memory consumption, but smaller merge width, better use arity = arity_bound 332 STXXL_ERRMSG("inefficient PQ parameters for ext_merger: arity + 1 == arity_bound"); 333 } 334 } 335 336 for (unsigned_type i = 0; i < arity_bound; ++i) 337 { 338 states[i].merger = this; 339 if (i < arity) 340 states[i].block = new block_type; 341 else 342 states[i].block = sentinel_block; 343 344 states[i].make_inf(); 345 } 346 347 assert(k == 1); 348 free_segments.push(0); //total state: one free sequence 349 350 rebuild_loser_tree(); 351 #if STXXL_PQ_EXTERNAL_LOSER_TREE 352 assert(is_sentinel(*states[entry[0].index])); 353 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE 354 } 355 356 // rebuild loser tree information from the values in current rebuild_loser_tree()357 void rebuild_loser_tree() 358 { 359 #if STXXL_PQ_EXTERNAL_LOSER_TREE 360 unsigned_type winner = init_winner(1); 361 entry[0].index = winner; 362 entry[0].key = *(states[winner]); 363 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE 364 } 365 366 #if STXXL_PQ_EXTERNAL_LOSER_TREE 367 // given any values in the leaves this 368 // routing recomputes upper levels of the tree 369 // from scratch in linear time 370 // initialize entry[root].index and the subtree rooted there 371 // return winner index init_winner(unsigned_type root)372 unsigned_type init_winner(unsigned_type root) 373 { 374 if (root >= k || root >= arity_bound) 375 { // leaf reached 376 return root - k; 377 } 378 else 379 { 380 unsigned_type left = init_winner(2 * root); 381 unsigned_type right = init_winner(2 * root + 1); 382 value_type lk = *(states[left]); 383 value_type rk = *(states[right]); 384 assert(root < arity_bound); 385 if (!(cmp(lk, rk))) 386 { // right subtree looses 387 entry[root].index = right; 388 entry[root].key = rk; 389 return left; 390 } 391 else 392 { 393 entry[root].index = left; 394 entry[root].key = lk; 395 return right; 396 } 397 } 398 } 399 400 // first go up the tree all the way to the root 401 // hand down old winner for the respective subtree 402 // based on new value, and old winner and loser 403 // update each node on the path to the root top down. 404 // This is implemented recursively update_on_insert(unsigned_type node,const value_type & new_key,unsigned_type new_index,value_type * winner_key,unsigned_type * winner_index,unsigned_type * mask)405 void update_on_insert( 406 unsigned_type node, 407 const value_type& new_key, 408 unsigned_type new_index, 409 value_type* winner_key, 410 unsigned_type* winner_index, // old winner 411 unsigned_type* mask) // 1 << (ceil(log KNK) - dist-from-root) 412 { 413 if (node == 0) 414 { // winner part of root 415 *mask = unsigned_type(1) << (log_k - 1); 416 *winner_key = entry[0].key; 417 *winner_index = entry[0].index; 418 if (cmp(entry[node].key, new_key)) 419 { 420 entry[node].key = new_key; 421 entry[node].index = new_index; 422 } 423 } 424 else 425 { 426 update_on_insert(node >> 1, new_key, new_index, winner_key, winner_index, mask); 427 value_type loserKey = entry[node].key; 428 unsigned_type loserIndex = entry[node].index; 429 if ((*winner_index & *mask) != (new_index & *mask)) 430 { // different subtrees 431 if (cmp(loserKey, new_key)) 432 { // new_key will have influence here 433 if (cmp(*winner_key, new_key)) 434 { // old winner loses here 435 entry[node].key = *winner_key; 436 entry[node].index = *winner_index; 437 } 438 else 439 { // new entry looses here 440 entry[node].key = new_key; 441 entry[node].index = new_index; 442 } 443 } 444 *winner_key = loserKey; 445 *winner_index = loserIndex; 446 } 447 // note that nothing needs to be done if 448 // the winner came from the same subtree 449 // a) new_key <= winner_key => even more reason for the other tree to lose 450 // b) new_key > winner_key => the old winner will beat the new 451 // entry further down the tree 452 // also the same old winner is handed down the tree 453 454 *mask >>= 1; // next level 455 } 456 } 457 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE 458 459 // make the tree two times as wide double_k()460 void double_k() 461 { 462 STXXL_VERBOSE1("ext_merger::double_k (before) k=" << k << " log_k=" << log_k << " arity_bound=" << arity_bound << " arity=" << arity << " #free=" << free_segments.size()); 463 assert(k > 0); 464 assert(k < arity); 465 assert(free_segments.empty()); // stack was free (probably not needed) 466 467 // make all new entries free 468 // and push them on the free stack 469 for (unsigned_type i = 2 * k - 1; i >= k; i--) //backwards 470 { 471 states[i].make_inf(); 472 if (i < arity) 473 free_segments.push(i); 474 } 475 476 // double the size 477 k *= 2; 478 log_k++; 479 480 STXXL_VERBOSE1("ext_merger::double_k (after) k=" << k << " log_k=" << log_k << " arity_bound=" << arity_bound << " arity=" << arity << " #free=" << free_segments.size()); 481 assert(!free_segments.empty()); 482 assert(k <= arity_bound); 483 484 // recompute loser tree information 485 rebuild_loser_tree(); 486 } 487 488 // compact nonempty segments in the left half of the tree compact_tree()489 void compact_tree() 490 { 491 STXXL_VERBOSE1("ext_merger::compact_tree (before) k=" << k << " log_k=" << log_k << " #free=" << free_segments.size()); 492 assert(log_k > 0); 493 494 // compact all nonempty segments to the left 495 496 unsigned_type target = 0; 497 for (unsigned_type from = 0; from < k; from++) 498 { 499 if (!is_segment_empty(from)) 500 { 501 assert(is_segment_allocated(from)); 502 if (from != target) 503 { 504 assert(!is_segment_allocated(target)); 505 states[target].swap(states[from]); 506 } 507 ++target; 508 } 509 } 510 511 // half degree as often as possible 512 while (k > 1 && target <= (k / 2)) 513 { 514 k /= 2; 515 log_k--; 516 } 517 518 // overwrite garbage and compact the stack of free segment indices 519 free_segments.clear(); // none free 520 for ( ; target < k; target++) 521 { 522 assert(!is_segment_allocated(target)); 523 states[target].make_inf(); 524 if (target < arity) 525 free_segments.push(target); 526 } 527 528 STXXL_VERBOSE1("ext_merger::compact_tree (after) k=" << k << " log_k=" << log_k << " #free=" << free_segments.size()); 529 assert(k > 0); 530 531 // recompute loser tree information 532 rebuild_loser_tree(); 533 } 534 535 #if 0 536 void swap(ext_merger& obj) 537 { 538 std::swap(cmp, obj.cmp); 539 std::swap(free_segments, obj.free_segments); 540 std::swap(size_, obj.size_); 541 std::swap(log_k, obj.log_k); 542 std::swap(k, obj.k); 543 swap_1D_arrays(entry, obj.entry, arity_bound); 544 swap_1D_arrays(states, obj.states, arity_bound); 545 546 // std::swap(pool,obj.pool); 547 } 548 #endif 549 550 public: mem_cons()551 unsigned_type mem_cons() const // only rough estimation 552 { 553 return (STXXL_MIN<unsigned_type>(arity + 1, arity_bound) * block_type::raw_size); 554 } 555 556 // delete the (length = end-begin) smallest elements and write them to [begin..end) 557 // empty segments are deallocated 558 // requires: 559 // - there are at least length elements 560 // - segments are ended by sentinels 561 template <class OutputIterator> multi_merge(OutputIterator begin,OutputIterator end)562 void multi_merge(OutputIterator begin, OutputIterator end) 563 { 564 int_type length = end - begin; 565 566 STXXL_VERBOSE1("ext_merger::multi_merge from " << k << " sequence(s)," 567 " length = " << length); 568 569 if (length == 0) 570 return; 571 572 assert(k > 0); 573 assert(length <= (int_type)size_); 574 575 //This is the place to make statistics about external multi_merge calls. 576 577 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_EXTERNAL 578 typedef stxxl::int64 diff_type; 579 580 typedef std::pair<typename block_type::iterator, typename block_type::iterator> sequence; 581 582 std::vector<sequence> seqs; 583 std::vector<unsigned_type> orig_seq_index; 584 585 Cmp cmp; 586 priority_queue_local::invert_order<Cmp, value_type, value_type> inv_cmp(cmp); 587 588 for (unsigned_type i = 0; i < k; ++i) //initialize sequences 589 { 590 if (states[i].current == states[i].block->size || is_sentinel(*states[i])) 591 continue; 592 593 seqs.push_back(std::make_pair(states[i].block->begin() + states[i].current, states[i].block->end())); 594 orig_seq_index.push_back(i); 595 596 #if STXXL_CHECK_ORDER_IN_SORTS 597 if (!is_sentinel(*seqs.back().first) && !stxxl::is_sorted(seqs.back().first, seqs.back().second, inv_cmp)) 598 { 599 STXXL_VERBOSE1("length " << i << " " << (seqs.back().second - seqs.back().first)); 600 for (value_type* v = seqs.back().first + 1; v < seqs.back().second; ++v) 601 { 602 if (inv_cmp(*v, *(v - 1))) 603 { 604 STXXL_VERBOSE1("Error at position " << i << "/" << (v - seqs.back().first - 1) << "/" << (v - seqs.back().first) << " " << *(v - 1) << " " << *v); 605 } 606 if (is_sentinel(*v)) 607 { 608 STXXL_VERBOSE1("Wrong sentinel at position " << (v - seqs.back().first)); 609 } 610 } 611 assert(false); 612 } 613 #endif 614 615 //Hint first non-internal (actually second) block of this sequence. 616 if (states[i].bids != NULL && !states[i].bids->empty()) 617 pool->hint(states[i].bids->front()); 618 } 619 620 assert(seqs.size() > 0); 621 622 #if STXXL_CHECK_ORDER_IN_SORTS 623 value_type last_elem; 624 #endif 625 626 diff_type rest = length; //elements still to merge for this output block 627 628 while (rest > 0) 629 { 630 value_type min_last = cmp.min_value(); // minimum of the sequences' last elements 631 diff_type total_size = 0; 632 633 for (unsigned_type i = 0; i < seqs.size(); ++i) 634 { 635 diff_type seq_i_size = seqs[i].second - seqs[i].first; 636 if (seq_i_size > 0) 637 { 638 total_size += seq_i_size; 639 if (inv_cmp(*(seqs[i].second - 1), min_last)) 640 min_last = *(seqs[i].second - 1); 641 642 STXXL_VERBOSE1("front block of seq " << i << ": front=" << *(seqs[i].first) << " back=" << *(seqs[i].second - 1) << " len=" << seq_i_size); 643 } else { 644 STXXL_VERBOSE1("front block of seq " << i << ": empty"); 645 } 646 } 647 648 assert(total_size > 0); 649 assert(!is_sentinel(min_last)); 650 651 STXXL_VERBOSE1("min_last " << min_last << " total size " << total_size << " num_seq " << seqs.size()); 652 653 diff_type less_equal_than_min_last = 0; 654 //locate this element in all sequences 655 for (unsigned_type i = 0; i < seqs.size(); ++i) 656 { 657 //assert(seqs[i].first < seqs[i].second); 658 659 typename block_type::iterator position = 660 std::upper_bound(seqs[i].first, seqs[i].second, min_last, inv_cmp); 661 662 //no element larger than min_last is merged 663 664 STXXL_VERBOSE1("seq " << i << ": " << (position - seqs[i].first) << " greater equal than " << min_last); 665 666 less_equal_than_min_last += (position - seqs[i].first); 667 } 668 669 diff_type output_size = STXXL_MIN(less_equal_than_min_last, rest); // at most rest elements 670 671 STXXL_VERBOSE1("output_size=" << output_size << " = min(leq_t_ml=" << less_equal_than_min_last << ", rest=" << rest << ")"); 672 673 assert(output_size > 0); 674 675 //main call 676 677 begin = parallel::multiway_merge(seqs.begin(), seqs.end(), begin, inv_cmp, output_size); //sequence iterators are progressed appropriately 678 679 rest -= output_size; 680 size_ -= output_size; 681 682 for (unsigned_type i = 0; i < seqs.size(); ++i) 683 { 684 sequence_state& state = states[orig_seq_index[i]]; 685 686 state.current = seqs[i].first - state.block->begin(); 687 688 assert(seqs[i].first <= seqs[i].second); 689 690 if (seqs[i].first == seqs[i].second) //has run empty 691 { 692 assert(state.current == state.block->size); 693 if (state.bids == NULL || state.bids->empty()) // if there is no next block 694 { 695 STXXL_VERBOSE1("seq " << i << ": ext_merger::multi_merge(...) it was the last block in the sequence "); 696 state.make_inf(); 697 } 698 else 699 { 700 #if STXXL_CHECK_ORDER_IN_SORTS 701 last_elem = *(seqs[i].second - 1); 702 #endif 703 STXXL_VERBOSE1("seq " << i << ": ext_merger::multi_merge(...) there is another block "); 704 bid_type bid = state.bids->front(); 705 state.bids->pop_front(); 706 pool->hint(bid); 707 if (!(state.bids->empty())) 708 { 709 STXXL_VERBOSE2("seq " << i << ": ext_merger::multi_merge(...) more blocks exist, hinting the next"); 710 pool->hint(state.bids->front()); 711 } 712 pool->read(state.block, bid)->wait(); 713 STXXL_VERBOSE1("seq " << i << ": first element of read block " << bid << " " << *(state.block->begin()) << " cached in " << state.block); 714 if (!(state.bids->empty())) 715 pool->hint(state.bids->front()); // re-hint, reading might have made a block free 716 state.current = 0; 717 seqs[i] = std::make_pair(state.block->begin() + state.current, state.block->end()); 718 block_manager::get_instance()->delete_block(bid); 719 720 #if STXXL_CHECK_ORDER_IN_SORTS 721 STXXL_VERBOSE1("before " << last_elem << " after " << *seqs[i].first << " newly loaded block " << bid); 722 if (!stxxl::is_sorted(seqs[i].first, seqs[i].second, inv_cmp)) 723 { 724 STXXL_VERBOSE1("length " << i << " " << (seqs[i].second - seqs[i].first)); 725 for (value_type* v = seqs[i].first + 1; v < seqs[i].second; ++v) 726 { 727 if (inv_cmp(*v, *(v - 1))) 728 { 729 STXXL_VERBOSE1("Error at position " << i << "/" << (v - seqs[i].first - 1) << "/" << (v - seqs[i].first) << " " << *(v - 1) << " " << *v); 730 } 731 if (is_sentinel(*v)) 732 { 733 STXXL_VERBOSE1("Wrong sentinel at position " << (v - seqs[i].first)); 734 } 735 } 736 assert(false); 737 } 738 #endif 739 } 740 } 741 } 742 } //while (rest > 1) 743 744 for (unsigned_type i = 0; i < seqs.size(); ++i) 745 { 746 unsigned_type seg = orig_seq_index[i]; 747 if (is_segment_empty(seg)) 748 { 749 STXXL_VERBOSE1("deallocated " << seg); 750 deallocate_segment(seg); 751 } 752 } 753 754 #else // STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_EXTERNAL 755 756 //Hint first non-internal (actually second) block of each sequence. 757 for (unsigned_type i = 0; i < k; ++i) 758 { 759 if (states[i].bids != NULL && !states[i].bids->empty()) 760 pool->hint(states[i].bids->front()); 761 } 762 763 switch (log_k) { 764 case 0: 765 assert(k == 1); 766 assert(entry[0].index == 0); 767 assert(free_segments.empty()); 768 //memcpy(target, states[0], length * sizeof(value_type)); 769 //std::copy(states[0],states[0]+length,target); 770 for (int_type i = 0; i < length; ++i, ++(states[0]), ++begin) 771 *begin = *(states[0]); 772 773 entry[0].key = **states; 774 if (is_segment_empty(0)) 775 deallocate_segment(0); 776 777 break; 778 case 1: 779 assert(k == 2); 780 merge_iterator(states[0], states[1], begin, length, cmp); 781 rebuild_loser_tree(); 782 if (is_segment_empty(0) && is_segment_allocated(0)) 783 deallocate_segment(0); 784 785 if (is_segment_empty(1) && is_segment_allocated(1)) 786 deallocate_segment(1); 787 788 break; 789 case 2: 790 assert(k == 4); 791 if (is_segment_empty(3)) 792 merge3_iterator(states[0], states[1], states[2], begin, length, cmp); 793 else 794 merge4_iterator(states[0], states[1], states[2], states[3], begin, length, cmp); 795 rebuild_loser_tree(); 796 if (is_segment_empty(0) && is_segment_allocated(0)) 797 deallocate_segment(0); 798 799 if (is_segment_empty(1) && is_segment_allocated(1)) 800 deallocate_segment(1); 801 802 if (is_segment_empty(2) && is_segment_allocated(2)) 803 deallocate_segment(2); 804 805 if (is_segment_empty(3) && is_segment_allocated(3)) 806 deallocate_segment(3); 807 808 break; 809 case 3: multi_merge_f<OutputIterator, 3>(begin, end); 810 break; 811 case 4: multi_merge_f<OutputIterator, 4>(begin, end); 812 break; 813 case 5: multi_merge_f<OutputIterator, 5>(begin, end); 814 break; 815 case 6: multi_merge_f<OutputIterator, 6>(begin, end); 816 break; 817 case 7: multi_merge_f<OutputIterator, 7>(begin, end); 818 break; 819 case 8: multi_merge_f<OutputIterator, 8>(begin, end); 820 break; 821 case 9: multi_merge_f<OutputIterator, 9>(begin, end); 822 break; 823 case 10: multi_merge_f<OutputIterator, 10>(begin, end); 824 break; 825 default: multi_merge_k(begin, end); 826 break; 827 } 828 829 size_ -= length; 830 #endif 831 832 // compact tree if it got considerably smaller 833 { 834 const unsigned_type num_segments_used = std::min<unsigned_type>(arity, k) - free_segments.size(); 835 const unsigned_type num_segments_trigger = k - (3 * k / 5); 836 // using k/2 would be worst case inefficient (for large k) 837 // for k \in {2, 4, 8} the trigger is k/2 which is good 838 // because we have special mergers for k \in {1, 2, 4} 839 // there is also a special 3-way-merger, that will be 840 // triggered if k == 4 && is_segment_empty(3) 841 STXXL_VERBOSE3("ext_merger compact? k=" << k << " #used=" << num_segments_used 842 << " <= #trigger=" << num_segments_trigger << " ==> " 843 << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ") 844 << " || " 845 << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ") 846 << " #free=" << free_segments.size()); 847 if (k > 1 && ((num_segments_used <= num_segments_trigger) || 848 (k == 4 && !free_segments.empty() && !is_segment_empty(3)))) 849 { 850 compact_tree(); 851 } 852 } 853 } 854 855 private: 856 #if STXXL_PQ_EXTERNAL_LOSER_TREE 857 // multi-merge for arbitrary K 858 template <class OutputIterator> multi_merge_k(OutputIterator begin,OutputIterator end)859 void multi_merge_k(OutputIterator begin, OutputIterator end) 860 { 861 Entry* current_pos; 862 value_type current_key; 863 unsigned_type current_index; // leaf pointed to by current entry 864 unsigned_type kReg = k; 865 OutputIterator done = end; 866 OutputIterator target = begin; 867 unsigned_type winner_index = entry[0].index; 868 value_type winner_key = entry[0].key; 869 870 while (target != done) 871 { 872 // write result 873 *target = *(states[winner_index]); 874 875 // advance winner segment 876 ++(states[winner_index]); 877 878 winner_key = *(states[winner_index]); 879 880 // remove winner segment if empty now 881 if (is_sentinel(winner_key)) // 882 deallocate_segment(winner_index); 883 884 // go up the entry-tree 885 for (unsigned_type i = (winner_index + kReg) >> 1; i > 0; i >>= 1) 886 { 887 current_pos = entry + i; 888 current_key = current_pos->key; 889 if (cmp(winner_key, current_key)) 890 { 891 current_index = current_pos->index; 892 current_pos->key = winner_key; 893 current_pos->index = winner_index; 894 winner_key = current_key; 895 winner_index = current_index; 896 } 897 } 898 899 ++target; 900 } 901 entry[0].index = winner_index; 902 entry[0].key = winner_key; 903 } 904 905 template <class OutputIterator, int LogK> multi_merge_f(OutputIterator begin,OutputIterator end)906 void multi_merge_f(OutputIterator begin, OutputIterator end) 907 { 908 OutputIterator done = end; 909 OutputIterator target = begin; 910 unsigned_type winner_index = entry[0].index; 911 Entry* reg_entry = entry; 912 sequence_state* reg_states = states; 913 value_type winner_key = entry[0].key; 914 915 assert(log_k >= LogK); 916 while (target != done) 917 { 918 // write result 919 *target = *(reg_states[winner_index]); 920 921 // advance winner segment 922 ++(reg_states[winner_index]); 923 924 winner_key = *(reg_states[winner_index]); 925 926 // remove winner segment if empty now 927 if (is_sentinel(winner_key)) 928 deallocate_segment(winner_index); 929 930 ++target; 931 932 // update loser tree 933 #define TreeStep(L) \ 934 if (1 << LogK >= 1 << L) { \ 935 Entry* pos ## L = reg_entry + ((winner_index + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \ 936 value_type key ## L = pos ## L->key; \ 937 if (cmp(winner_key, key ## L)) { \ 938 unsigned_type index ## L = pos ## L->index; \ 939 pos ## L->key = winner_key; \ 940 pos ## L->index = winner_index; \ 941 winner_key = key ## L; \ 942 winner_index = index ## L; \ 943 } \ 944 } 945 TreeStep(10); 946 TreeStep(9); 947 TreeStep(8); 948 TreeStep(7); 949 TreeStep(6); 950 TreeStep(5); 951 TreeStep(4); 952 TreeStep(3); 953 TreeStep(2); 954 TreeStep(1); 955 #undef TreeStep 956 } 957 reg_entry[0].index = winner_index; 958 reg_entry[0].key = winner_key; 959 } 960 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE 961 962 public: is_space_available()963 bool is_space_available() const // for new segment 964 { 965 return k < arity || !free_segments.empty(); 966 } 967 968 // insert segment beginning at target 969 // require: is_space_available() == 1 970 template <class Merger> insert_segment(Merger & another_merger,size_type segment_size)971 void insert_segment(Merger& another_merger, size_type segment_size) 972 { 973 STXXL_VERBOSE1("ext_merger::insert_segment(merger,...)" << this); 974 975 if (segment_size > 0) 976 { 977 // get a free slot 978 if (free_segments.empty()) 979 { // tree is too small 980 double_k(); 981 } 982 assert(!free_segments.empty()); 983 unsigned_type free_slot = free_segments.top(); 984 free_segments.pop(); 985 986 // link new segment 987 assert(segment_size); 988 unsigned_type nblocks = (unsigned_type)(segment_size / block_type::size); 989 //assert(nblocks); // at least one block 990 STXXL_VERBOSE1("ext_merger::insert_segment nblocks=" << nblocks); 991 if (nblocks == 0) 992 { 993 STXXL_VERBOSE1("ext_merger::insert_segment(merger,...) WARNING: inserting a segment with " << 994 nblocks << " blocks"); 995 STXXL_VERBOSE1("THIS IS INEFFICIENT: TRY TO CHANGE PRIORITY QUEUE PARAMETERS"); 996 } 997 unsigned_type first_size = (unsigned_type)(segment_size % block_type::size); 998 if (first_size == 0) 999 { 1000 first_size = block_type::size; 1001 --nblocks; 1002 } 1003 block_manager* bm = block_manager::get_instance(); 1004 std::list<bid_type>* bids = new std::list<bid_type>(nblocks); 1005 bm->new_blocks(alloc_strategy(), bids->begin(), bids->end()); 1006 block_type* first_block = new block_type; 1007 1008 another_merger.multi_merge( 1009 first_block->begin() + (block_type::size - first_size), 1010 first_block->end()); 1011 1012 STXXL_VERBOSE1("last element of first block " << *(first_block->end() - 1)); 1013 assert(!cmp(*(first_block->begin() + (block_type::size - first_size)), *(first_block->end() - 1))); 1014 1015 assert(pool->size_write() > 0); 1016 1017 for (typename std::list<bid_type>::iterator curbid = bids->begin(); curbid != bids->end(); ++curbid) 1018 { 1019 block_type* b = pool->steal(); 1020 another_merger.multi_merge(b->begin(), b->end()); 1021 STXXL_VERBOSE1("first element of following block " << *curbid << " " << *(b->begin())); 1022 STXXL_VERBOSE1("last element of following block " << *curbid << " " << *(b->end() - 1)); 1023 assert(!cmp(*(b->begin()), *(b->end() - 1))); 1024 pool->write(b, *curbid); 1025 STXXL_VERBOSE1("written to block " << *curbid << " cached in " << b); 1026 } 1027 1028 insert_segment(bids, first_block, first_size, free_slot); 1029 1030 size_ += segment_size; 1031 1032 #if STXXL_PQ_EXTERNAL_LOSER_TREE 1033 // propagate new information up the tree 1034 value_type dummyKey; 1035 unsigned_type dummyIndex; 1036 unsigned_type dummyMask; 1037 update_on_insert((free_slot + k) >> 1, *(states[free_slot]), free_slot, 1038 &dummyKey, &dummyIndex, &dummyMask); 1039 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE 1040 } 1041 else 1042 { 1043 // deallocate memory ? 1044 STXXL_VERBOSE1("Merged segment with zero size."); 1045 } 1046 } 1047 size()1048 size_type size() const { return size_; } 1049 1050 protected: 1051 /*! 1052 \param bidlist list of blocks to insert 1053 \param first_block the first block of the sequence, before bidlist 1054 \param first_size number of elements in the first block 1055 \param slot slot to insert into 1056 */ insert_segment(std::list<bid_type> * bidlist,block_type * first_block,unsigned_type first_size,unsigned_type slot)1057 void insert_segment(std::list<bid_type>* bidlist, block_type* first_block, 1058 unsigned_type first_size, unsigned_type slot) 1059 { 1060 STXXL_VERBOSE1("ext_merger::insert_segment(bidlist,...) " << this << " " << bidlist->size() << " " << slot); 1061 assert(!is_segment_allocated(slot)); 1062 assert(first_size > 0); 1063 1064 sequence_state& new_sequence = states[slot]; 1065 new_sequence.current = block_type::size - first_size; 1066 std::swap(new_sequence.block, first_block); 1067 delete first_block; 1068 std::swap(new_sequence.bids, bidlist); 1069 if (bidlist) // the old list 1070 { 1071 assert(bidlist->empty()); 1072 delete bidlist; 1073 } 1074 new_sequence.allocated = true; 1075 assert(is_segment_allocated(slot)); 1076 } 1077 1078 // free an empty segment . deallocate_segment(unsigned_type slot)1079 void deallocate_segment(unsigned_type slot) 1080 { 1081 STXXL_VERBOSE1("ext_merger::deallocate_segment() deleting segment " << slot << " allocated=" << int(is_segment_allocated(slot))); 1082 assert(is_segment_allocated(slot)); 1083 states[slot].allocated = false; 1084 states[slot].make_inf(); 1085 1086 // push on the stack of free segment indices 1087 free_segments.push(slot); 1088 } 1089 1090 // is this segment empty ? is_segment_empty(unsigned_type slot)1091 bool is_segment_empty(unsigned_type slot) const 1092 { 1093 return is_sentinel(*(states[slot])); 1094 } 1095 1096 // Is this segment allocated? Otherwise it's empty, 1097 // already on the stack of free segment indices and can be reused. is_segment_allocated(unsigned_type slot)1098 bool is_segment_allocated(unsigned_type slot) const 1099 { 1100 return states[slot].allocated; 1101 } 1102 }; // class ext_merger 1103 1104 } // namespace priority_queue_local 1105 1106 //! \} 1107 1108 STXXL_END_NAMESPACE 1109 1110 #endif // !STXXL_CONTAINERS_PQ_EXT_MERGER_HEADER 1111 // vim: et:ts=4:sw=4 1112