1 /***************************************************************************
2  *  include/stxxl/bits/containers/pq_ext_merger.h
3  *
4  *  Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  *  Copyright (C) 1999 Peter Sanders <sanders@mpi-sb.mpg.de>
7  *  Copyright (C) 2003, 2004, 2007 Roman Dementiev <dementiev@mpi-sb.mpg.de>
8  *  Copyright (C) 2007-2009 Johannes Singler <singler@ira.uka.de>
9  *  Copyright (C) 2007-2010 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
10  *
11  *  Distributed under the Boost Software License, Version 1.0.
12  *  (See accompanying file LICENSE_1_0.txt or copy at
13  *  http://www.boost.org/LICENSE_1_0.txt)
14  **************************************************************************/
15 
16 #ifndef STXXL_CONTAINERS_PQ_EXT_MERGER_HEADER
17 #define STXXL_CONTAINERS_PQ_EXT_MERGER_HEADER
18 
19 #include <stxxl/bits/containers/pq_helpers.h>
20 
21 STXXL_BEGIN_NAMESPACE
22 
23 //! \addtogroup stlcontinternals
24 //!
25 //! \{
26 
27 /*! \internal
28  */
29 namespace priority_queue_local {
30 
31 template <typename Iterator>
32 class short_sequence : public std::pair<Iterator, Iterator>
33 {
34     typedef std::pair<Iterator, Iterator> pair;
35 
36 public:
37     typedef Iterator iterator;
38     typedef const iterator const_iterator;
39     typedef typename std::iterator_traits<iterator>::value_type value_type;
40     typedef typename std::iterator_traits<iterator>::difference_type size_type;
41     typedef value_type& reference;
42     typedef const value_type& const_reference;
43     typedef unsigned_type origin_type;
44 
45 private:
46     origin_type m_origin;
47 
48 public:
short_sequence(Iterator first,Iterator last,origin_type origin)49     short_sequence(Iterator first, Iterator last, origin_type origin)
50         : pair(first, last), m_origin(origin)
51     { }
52 
begin()53     iterator begin()
54     {
55         return this->first;
56     }
57 
begin()58     const_iterator begin() const
59     {
60         return this->first;
61     }
62 
cbegin()63     const_iterator cbegin() const
64     {
65         return begin();
66     }
67 
end()68     iterator end()
69     {
70         return this->second;
71     }
72 
end()73     const_iterator end() const
74     {
75         return this->second;
76     }
77 
cend()78     const_iterator cend() const
79     {
80         return end();
81     }
82 
front()83     reference front()
84     {
85         return *begin();
86     }
87 
front()88     const_reference front() const
89     {
90         return *begin();
91     }
92 
back()93     reference back()
94     {
95         return *(end() - 1);
96     }
97 
back()98     const_reference back() const
99     {
100         return *(end() - 1);
101     }
102 
size()103     size_type size() const
104     {
105         return end() - begin();
106     }
107 
empty()108     bool empty() const
109     {
110         return size() == 0;
111     }
112 
origin()113     origin_type origin() const
114     {
115         return m_origin;
116     }
117 };
118 
119 /*!
120  * External merger, based on the loser tree data structure.
121  * \param Arity_  maximum arity of merger, does not need to be a power of 2
122  */
123 template <class BlockType,
124           class Cmp,
125           unsigned Arity,
126           class AllocStr = STXXL_DEFAULT_ALLOC_STRATEGY>
127 class ext_merger : private noncopyable
128 {
129 public:
130     typedef stxxl::uint64 size_type;
131     typedef BlockType block_type;
132     typedef typename block_type::bid_type bid_type;
133     typedef typename block_type::value_type value_type;
134     typedef Cmp comparator_type;
135     typedef AllocStr alloc_strategy;
136     typedef read_write_pool<block_type> pool_type;
137 
138     // arity_bound / 2  <  arity  <=  arity_bound
139     enum { arity = Arity, arity_bound = 1UL << (LOG2<Arity>::ceil) };
140 
141 protected:
142     comparator_type cmp;
143 
is_sentinel(const value_type & a)144     bool is_sentinel(const value_type& a) const
145     {
146         return !(cmp(cmp.min_value(), a)); // a <= cmp.min_value()
147     }
148 
not_sentinel(const value_type & a)149     bool not_sentinel(const value_type& a) const
150     {
151         return cmp(cmp.min_value(), a); // a > cmp.min_value()
152     }
153 
154     struct sequence_state : private noncopyable
155     {
156         block_type* block;          //current block
157         unsigned_type current;      //current index in current block
158         std::list<bid_type>* bids;  //list of blocks forming this sequence
159         comparator_type cmp;
160         ext_merger* merger;
161         bool allocated;
162 
163         //! \returns current element
164         const value_type& operator * () const
165         {
166             return (*block)[current];
167         }
168 
sequence_statesequence_state169         sequence_state()
170             : block(NULL), current(0),
171               bids(NULL), merger(NULL),
172               allocated(false)
173         { }
174 
~sequence_statesequence_state175         ~sequence_state()
176         {
177             STXXL_VERBOSE2("ext_merger sequence_state::~sequence_state()");
178             if (bids != NULL)
179             {
180                 block_manager* bm = block_manager::get_instance();
181                 bm->delete_blocks(bids->begin(), bids->end());
182                 delete bids;
183             }
184         }
185 
make_infsequence_state186         void make_inf()
187         {
188             current = 0;
189             (*block)[0] = cmp.min_value();
190         }
191 
is_sentinelsequence_state192         bool is_sentinel(const value_type& a) const
193         {
194             return !(cmp(cmp.min_value(), a));
195         }
196 
not_sentinelsequence_state197         bool not_sentinel(const value_type& a) const
198         {
199             return cmp(cmp.min_value(), a);
200         }
201 
swapsequence_state202         void swap(sequence_state& obj)
203         {
204             if (&obj != this)
205             {
206                 std::swap(current, obj.current);
207                 std::swap(block, obj.block);
208                 std::swap(bids, obj.bids);
209                 assert(merger == obj.merger);
210                 std::swap(allocated, obj.allocated);
211             }
212         }
213 
214         sequence_state& operator ++ ()
215         {
216             assert(not_sentinel((*block)[current]));
217             assert(current < block->size);
218 
219             ++current;
220 
221             if (current == block->size)
222             {
223                 STXXL_VERBOSE2("ext_merger sequence_state operator++ crossing block border ");
224                 // go to the next block
225                 assert(bids != NULL);
226                 if (bids->empty()) // if there is no next block
227                 {
228                     STXXL_VERBOSE2("ext_merger sequence_state operator++ it was the last block in the sequence ");
229                     delete bids;
230                     bids = NULL;
231                     make_inf();
232                 }
233                 else
234                 {
235                     STXXL_VERBOSE2("ext_merger sequence_state operator++ there is another block ");
236                     bid_type bid = bids->front();
237                     bids->pop_front();
238                     merger->pool->hint(bid);
239                     if (!(bids->empty()))
240                     {
241                         STXXL_VERBOSE2("ext_merger sequence_state operator++ more blocks exist in a sequence, hinting the next");
242                         merger->pool->hint(bids->front());
243                     }
244                     merger->pool->read(block, bid)->wait();
245                     STXXL_VERBOSE2("first element of read block " << bid << " " << *(block->begin()) << " cached in " << block);
246                     if (!(bids->empty()))
247                         merger->pool->hint(bids->front());  // re-hint, reading might have made a block free
248                     block_manager::get_instance()->delete_block(bid);
249                     current = 0;
250                 }
251             }
252             return *this;
253         }
254     };
255 
256 #if STXXL_PQ_EXTERNAL_LOSER_TREE
257     struct Entry
258     {
259         value_type key;       // key of loser element (winner for 0)
260         unsigned_type index;  // the number of the losing segment
261     };
262 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE
263 
264     size_type size_;          // total number of elements stored
265     unsigned_type log_k;      // log of current tree size
266     unsigned_type k;          // invariant (k == 1 << log_k), always a power of 2
267     // only entries 0 .. arity-1 may hold actual sequences, the other
268     // entries arity .. arity_bound-1 are sentinels to make the size of the tree
269     // a power of 2 always
270 
271     // stack of empty segment indices
272     internal_bounded_stack<unsigned_type, arity> free_segments;
273 
274 #if STXXL_PQ_EXTERNAL_LOSER_TREE
275     // upper levels of loser trees
276     // entry[0] contains the winner info
277     Entry entry[arity_bound];
278 #endif  //STXXL_PQ_EXTERNAL_LOSER_TREE
279 
280     // leaf information
281     // note that Knuth uses indices k..k-1
282     // while we use 0..k-1
283     sequence_state states[arity_bound]; // sequence including current position, dereference gives current element
284 
285     pool_type* pool;
286 
287     block_type* sentinel_block;
288 
289 public:
ext_merger()290     ext_merger()
291         : size_(0), log_k(0), k(1), pool(0)
292     {
293         init();
294     }
295 
ext_merger(pool_type * pool_)296     ext_merger(pool_type* pool_)
297         : size_(0), log_k(0), k(1),
298           pool(pool_)
299     {
300         init();
301     }
302 
~ext_merger()303     virtual ~ext_merger()
304     {
305         STXXL_VERBOSE1("ext_merger::~ext_merger()");
306         for (unsigned_type i = 0; i < arity; ++i)
307         {
308             delete states[i].block;
309         }
310         delete sentinel_block;
311     }
312 
set_pool(pool_type * pool_)313     void set_pool(pool_type* pool_)
314     {
315         pool = pool_;
316     }
317 
318 private:
init()319     void init()
320     {
321         STXXL_VERBOSE2("ext_merger::init()");
322         assert(!cmp(cmp.min_value(), cmp.min_value())); // verify strict weak ordering
323 
324         sentinel_block = NULL;
325         if (arity < arity_bound)
326         {
327             sentinel_block = new block_type;
328             for (unsigned_type i = 0; i < block_type::size; ++i)
329                 (*sentinel_block)[i] = cmp.min_value();
330             if (arity + 1 == arity_bound) {
331                 // same memory consumption, but smaller merge width, better use arity = arity_bound
332                 STXXL_ERRMSG("inefficient PQ parameters for ext_merger: arity + 1 == arity_bound");
333             }
334         }
335 
336         for (unsigned_type i = 0; i < arity_bound; ++i)
337         {
338             states[i].merger = this;
339             if (i < arity)
340                 states[i].block = new block_type;
341             else
342                 states[i].block = sentinel_block;
343 
344             states[i].make_inf();
345         }
346 
347         assert(k == 1);
348         free_segments.push(0); //total state: one free sequence
349 
350         rebuild_loser_tree();
351 #if STXXL_PQ_EXTERNAL_LOSER_TREE
352         assert(is_sentinel(*states[entry[0].index]));
353 #endif      //STXXL_PQ_EXTERNAL_LOSER_TREE
354     }
355 
356     // rebuild loser tree information from the values in current
rebuild_loser_tree()357     void rebuild_loser_tree()
358     {
359 #if STXXL_PQ_EXTERNAL_LOSER_TREE
360         unsigned_type winner = init_winner(1);
361         entry[0].index = winner;
362         entry[0].key = *(states[winner]);
363 #endif      //STXXL_PQ_EXTERNAL_LOSER_TREE
364     }
365 
366 #if STXXL_PQ_EXTERNAL_LOSER_TREE
367     // given any values in the leaves this
368     // routing recomputes upper levels of the tree
369     // from scratch in linear time
370     // initialize entry[root].index and the subtree rooted there
371     // return winner index
init_winner(unsigned_type root)372     unsigned_type init_winner(unsigned_type root)
373     {
374         if (root >= k || root >= arity_bound)
375         {       // leaf reached
376             return root - k;
377         }
378         else
379         {
380             unsigned_type left = init_winner(2 * root);
381             unsigned_type right = init_winner(2 * root + 1);
382             value_type lk = *(states[left]);
383             value_type rk = *(states[right]);
384             assert(root < arity_bound);
385             if (!(cmp(lk, rk)))
386             {       // right subtree looses
387                 entry[root].index = right;
388                 entry[root].key = rk;
389                 return left;
390             }
391             else
392             {
393                 entry[root].index = left;
394                 entry[root].key = lk;
395                 return right;
396             }
397         }
398     }
399 
400     // first go up the tree all the way to the root
401     // hand down old winner for the respective subtree
402     // based on new value, and old winner and loser
403     // update each node on the path to the root top down.
404     // This is implemented recursively
update_on_insert(unsigned_type node,const value_type & new_key,unsigned_type new_index,value_type * winner_key,unsigned_type * winner_index,unsigned_type * mask)405     void update_on_insert(
406         unsigned_type node,
407         const value_type& new_key,
408         unsigned_type new_index,
409         value_type* winner_key,
410         unsigned_type* winner_index,         // old winner
411         unsigned_type* mask)                 // 1 << (ceil(log KNK) - dist-from-root)
412     {
413         if (node == 0)
414         {                                    // winner part of root
415             *mask = unsigned_type(1) << (log_k - 1);
416             *winner_key = entry[0].key;
417             *winner_index = entry[0].index;
418             if (cmp(entry[node].key, new_key))
419             {
420                 entry[node].key = new_key;
421                 entry[node].index = new_index;
422             }
423         }
424         else
425         {
426             update_on_insert(node >> 1, new_key, new_index, winner_key, winner_index, mask);
427             value_type loserKey = entry[node].key;
428             unsigned_type loserIndex = entry[node].index;
429             if ((*winner_index & *mask) != (new_index & *mask))
430             {                        // different subtrees
431                 if (cmp(loserKey, new_key))
432                 {                    // new_key will have influence here
433                     if (cmp(*winner_key, new_key))
434                     {                // old winner loses here
435                         entry[node].key = *winner_key;
436                         entry[node].index = *winner_index;
437                     }
438                     else
439                     {                                    // new entry looses here
440                         entry[node].key = new_key;
441                         entry[node].index = new_index;
442                     }
443                 }
444                 *winner_key = loserKey;
445                 *winner_index = loserIndex;
446             }
447             // note that nothing needs to be done if
448             // the winner came from the same subtree
449             // a) new_key <= winner_key => even more reason for the other tree to lose
450             // b) new_key >  winner_key => the old winner will beat the new
451             //                           entry further down the tree
452             // also the same old winner is handed down the tree
453 
454             *mask >>= 1; // next level
455         }
456     }
457 #endif //STXXL_PQ_EXTERNAL_LOSER_TREE
458 
459     // make the tree two times as wide
double_k()460     void double_k()
461     {
462         STXXL_VERBOSE1("ext_merger::double_k (before) k=" << k << " log_k=" << log_k << " arity_bound=" << arity_bound << " arity=" << arity << " #free=" << free_segments.size());
463         assert(k > 0);
464         assert(k < arity);
465         assert(free_segments.empty());                 // stack was free (probably not needed)
466 
467         // make all new entries free
468         // and push them on the free stack
469         for (unsigned_type i = 2 * k - 1; i >= k; i--) //backwards
470         {
471             states[i].make_inf();
472             if (i < arity)
473                 free_segments.push(i);
474         }
475 
476         // double the size
477         k *= 2;
478         log_k++;
479 
480         STXXL_VERBOSE1("ext_merger::double_k (after)  k=" << k << " log_k=" << log_k << " arity_bound=" << arity_bound << " arity=" << arity << " #free=" << free_segments.size());
481         assert(!free_segments.empty());
482         assert(k <= arity_bound);
483 
484         // recompute loser tree information
485         rebuild_loser_tree();
486     }
487 
488     // compact nonempty segments in the left half of the tree
compact_tree()489     void compact_tree()
490     {
491         STXXL_VERBOSE1("ext_merger::compact_tree (before) k=" << k << " log_k=" << log_k << " #free=" << free_segments.size());
492         assert(log_k > 0);
493 
494         // compact all nonempty segments to the left
495 
496         unsigned_type target = 0;
497         for (unsigned_type from = 0; from < k; from++)
498         {
499             if (!is_segment_empty(from))
500             {
501                 assert(is_segment_allocated(from));
502                 if (from != target)
503                 {
504                     assert(!is_segment_allocated(target));
505                     states[target].swap(states[from]);
506                 }
507                 ++target;
508             }
509         }
510 
511         // half degree as often as possible
512         while (k > 1 && target <= (k / 2))
513         {
514             k /= 2;
515             log_k--;
516         }
517 
518         // overwrite garbage and compact the stack of free segment indices
519         free_segments.clear(); // none free
520         for ( ; target < k; target++)
521         {
522             assert(!is_segment_allocated(target));
523             states[target].make_inf();
524             if (target < arity)
525                 free_segments.push(target);
526         }
527 
528         STXXL_VERBOSE1("ext_merger::compact_tree (after)  k=" << k << " log_k=" << log_k << " #free=" << free_segments.size());
529         assert(k > 0);
530 
531         // recompute loser tree information
532         rebuild_loser_tree();
533     }
534 
535 #if 0
536     void swap(ext_merger& obj)
537     {
538         std::swap(cmp, obj.cmp);
539         std::swap(free_segments, obj.free_segments);
540         std::swap(size_, obj.size_);
541         std::swap(log_k, obj.log_k);
542         std::swap(k, obj.k);
543         swap_1D_arrays(entry, obj.entry, arity_bound);
544         swap_1D_arrays(states, obj.states, arity_bound);
545 
546         // std::swap(pool,obj.pool);
547     }
548 #endif
549 
550 public:
mem_cons()551     unsigned_type mem_cons() const // only rough estimation
552     {
553         return (STXXL_MIN<unsigned_type>(arity + 1, arity_bound) * block_type::raw_size);
554     }
555 
556     // delete the (length = end-begin) smallest elements and write them to [begin..end)
557     // empty segments are deallocated
558     // requires:
559     // - there are at least length elements
560     // - segments are ended by sentinels
561     template <class OutputIterator>
multi_merge(OutputIterator begin,OutputIterator end)562     void multi_merge(OutputIterator begin, OutputIterator end)
563     {
564         int_type length = end - begin;
565 
566         STXXL_VERBOSE1("ext_merger::multi_merge from " << k << " sequence(s),"
567                        " length = " << length);
568 
569         if (length == 0)
570             return;
571 
572         assert(k > 0);
573         assert(length <= (int_type)size_);
574 
575         //This is the place to make statistics about external multi_merge calls.
576 
577 #if STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_EXTERNAL
578         typedef stxxl::int64 diff_type;
579 
580         typedef std::pair<typename block_type::iterator, typename block_type::iterator> sequence;
581 
582         std::vector<sequence> seqs;
583         std::vector<unsigned_type> orig_seq_index;
584 
585         Cmp cmp;
586         priority_queue_local::invert_order<Cmp, value_type, value_type> inv_cmp(cmp);
587 
588         for (unsigned_type i = 0; i < k; ++i) //initialize sequences
589         {
590             if (states[i].current == states[i].block->size || is_sentinel(*states[i]))
591                 continue;
592 
593             seqs.push_back(std::make_pair(states[i].block->begin() + states[i].current, states[i].block->end()));
594             orig_seq_index.push_back(i);
595 
596 #if STXXL_CHECK_ORDER_IN_SORTS
597             if (!is_sentinel(*seqs.back().first) && !stxxl::is_sorted(seqs.back().first, seqs.back().second, inv_cmp))
598             {
599                 STXXL_VERBOSE1("length " << i << " " << (seqs.back().second - seqs.back().first));
600                 for (value_type* v = seqs.back().first + 1; v < seqs.back().second; ++v)
601                 {
602                     if (inv_cmp(*v, *(v - 1)))
603                     {
604                         STXXL_VERBOSE1("Error at position " << i << "/" << (v - seqs.back().first - 1) << "/" << (v - seqs.back().first) << "   " << *(v - 1) << " " << *v);
605                     }
606                     if (is_sentinel(*v))
607                     {
608                         STXXL_VERBOSE1("Wrong sentinel at position " << (v - seqs.back().first));
609                     }
610                 }
611                 assert(false);
612             }
613 #endif
614 
615             //Hint first non-internal (actually second) block of this sequence.
616             if (states[i].bids != NULL && !states[i].bids->empty())
617                 pool->hint(states[i].bids->front());
618         }
619 
620         assert(seqs.size() > 0);
621 
622 #if STXXL_CHECK_ORDER_IN_SORTS
623         value_type last_elem;
624 #endif
625 
626         diff_type rest = length;                     //elements still to merge for this output block
627 
628         while (rest > 0)
629         {
630             value_type min_last = cmp.min_value();   // minimum of the sequences' last elements
631             diff_type total_size = 0;
632 
633             for (unsigned_type i = 0; i < seqs.size(); ++i)
634             {
635                 diff_type seq_i_size = seqs[i].second - seqs[i].first;
636                 if (seq_i_size > 0)
637                 {
638                     total_size += seq_i_size;
639                     if (inv_cmp(*(seqs[i].second - 1), min_last))
640                         min_last = *(seqs[i].second - 1);
641 
642                     STXXL_VERBOSE1("front block of seq " << i << ": front=" << *(seqs[i].first) << " back=" << *(seqs[i].second - 1) << " len=" << seq_i_size);
643                 } else {
644                     STXXL_VERBOSE1("front block of seq " << i << ": empty");
645                 }
646             }
647 
648             assert(total_size > 0);
649             assert(!is_sentinel(min_last));
650 
651             STXXL_VERBOSE1("min_last " << min_last << " total size " << total_size << " num_seq " << seqs.size());
652 
653             diff_type less_equal_than_min_last = 0;
654             //locate this element in all sequences
655             for (unsigned_type i = 0; i < seqs.size(); ++i)
656             {
657                 //assert(seqs[i].first < seqs[i].second);
658 
659                 typename block_type::iterator position =
660                     std::upper_bound(seqs[i].first, seqs[i].second, min_last, inv_cmp);
661 
662                 //no element larger than min_last is merged
663 
664                 STXXL_VERBOSE1("seq " << i << ": " << (position - seqs[i].first) << " greater equal than " << min_last);
665 
666                 less_equal_than_min_last += (position - seqs[i].first);
667             }
668 
669             diff_type output_size = STXXL_MIN(less_equal_than_min_last, rest);  // at most rest elements
670 
671             STXXL_VERBOSE1("output_size=" << output_size << " = min(leq_t_ml=" << less_equal_than_min_last << ", rest=" << rest << ")");
672 
673             assert(output_size > 0);
674 
675             //main call
676 
677             begin = parallel::multiway_merge(seqs.begin(), seqs.end(), begin, inv_cmp, output_size); //sequence iterators are progressed appropriately
678 
679             rest -= output_size;
680             size_ -= output_size;
681 
682             for (unsigned_type i = 0; i < seqs.size(); ++i)
683             {
684                 sequence_state& state = states[orig_seq_index[i]];
685 
686                 state.current = seqs[i].first - state.block->begin();
687 
688                 assert(seqs[i].first <= seqs[i].second);
689 
690                 if (seqs[i].first == seqs[i].second)               //has run empty
691                 {
692                     assert(state.current == state.block->size);
693                     if (state.bids == NULL || state.bids->empty()) // if there is no next block
694                     {
695                         STXXL_VERBOSE1("seq " << i << ": ext_merger::multi_merge(...) it was the last block in the sequence ");
696                         state.make_inf();
697                     }
698                     else
699                     {
700 #if STXXL_CHECK_ORDER_IN_SORTS
701                         last_elem = *(seqs[i].second - 1);
702 #endif
703                         STXXL_VERBOSE1("seq " << i << ": ext_merger::multi_merge(...) there is another block ");
704                         bid_type bid = state.bids->front();
705                         state.bids->pop_front();
706                         pool->hint(bid);
707                         if (!(state.bids->empty()))
708                         {
709                             STXXL_VERBOSE2("seq " << i << ": ext_merger::multi_merge(...) more blocks exist, hinting the next");
710                             pool->hint(state.bids->front());
711                         }
712                         pool->read(state.block, bid)->wait();
713                         STXXL_VERBOSE1("seq " << i << ": first element of read block " << bid << " " << *(state.block->begin()) << " cached in " << state.block);
714                         if (!(state.bids->empty()))
715                             pool->hint(state.bids->front());  // re-hint, reading might have made a block free
716                         state.current = 0;
717                         seqs[i] = std::make_pair(state.block->begin() + state.current, state.block->end());
718                         block_manager::get_instance()->delete_block(bid);
719 
720 #if STXXL_CHECK_ORDER_IN_SORTS
721                         STXXL_VERBOSE1("before " << last_elem << " after " << *seqs[i].first << " newly loaded block " << bid);
722                         if (!stxxl::is_sorted(seqs[i].first, seqs[i].second, inv_cmp))
723                         {
724                             STXXL_VERBOSE1("length " << i << " " << (seqs[i].second - seqs[i].first));
725                             for (value_type* v = seqs[i].first + 1; v < seqs[i].second; ++v)
726                             {
727                                 if (inv_cmp(*v, *(v - 1)))
728                                 {
729                                     STXXL_VERBOSE1("Error at position " << i << "/" << (v - seqs[i].first - 1) << "/" << (v - seqs[i].first) << "   " << *(v - 1) << " " << *v);
730                                 }
731                                 if (is_sentinel(*v))
732                                 {
733                                     STXXL_VERBOSE1("Wrong sentinel at position " << (v - seqs[i].first));
734                                 }
735                             }
736                             assert(false);
737                         }
738 #endif
739                     }
740                 }
741             }
742         }   //while (rest > 1)
743 
744         for (unsigned_type i = 0; i < seqs.size(); ++i)
745         {
746             unsigned_type seg = orig_seq_index[i];
747             if (is_segment_empty(seg))
748             {
749                 STXXL_VERBOSE1("deallocated " << seg);
750                 deallocate_segment(seg);
751             }
752         }
753 
754 #else       // STXXL_PARALLEL && STXXL_PARALLEL_PQ_MULTIWAY_MERGE_EXTERNAL
755 
756         //Hint first non-internal (actually second) block of each sequence.
757         for (unsigned_type i = 0; i < k; ++i)
758         {
759             if (states[i].bids != NULL && !states[i].bids->empty())
760                 pool->hint(states[i].bids->front());
761         }
762 
763         switch (log_k) {
764         case 0:
765             assert(k == 1);
766             assert(entry[0].index == 0);
767             assert(free_segments.empty());
768             //memcpy(target, states[0], length * sizeof(value_type));
769             //std::copy(states[0],states[0]+length,target);
770             for (int_type i = 0; i < length; ++i, ++(states[0]), ++begin)
771                 *begin = *(states[0]);
772 
773             entry[0].key = **states;
774             if (is_segment_empty(0))
775                 deallocate_segment(0);
776 
777             break;
778         case 1:
779             assert(k == 2);
780             merge_iterator(states[0], states[1], begin, length, cmp);
781             rebuild_loser_tree();
782             if (is_segment_empty(0) && is_segment_allocated(0))
783                 deallocate_segment(0);
784 
785             if (is_segment_empty(1) && is_segment_allocated(1))
786                 deallocate_segment(1);
787 
788             break;
789         case 2:
790             assert(k == 4);
791             if (is_segment_empty(3))
792                 merge3_iterator(states[0], states[1], states[2], begin, length, cmp);
793             else
794                 merge4_iterator(states[0], states[1], states[2], states[3], begin, length, cmp);
795             rebuild_loser_tree();
796             if (is_segment_empty(0) && is_segment_allocated(0))
797                 deallocate_segment(0);
798 
799             if (is_segment_empty(1) && is_segment_allocated(1))
800                 deallocate_segment(1);
801 
802             if (is_segment_empty(2) && is_segment_allocated(2))
803                 deallocate_segment(2);
804 
805             if (is_segment_empty(3) && is_segment_allocated(3))
806                 deallocate_segment(3);
807 
808             break;
809         case  3: multi_merge_f<OutputIterator, 3>(begin, end);
810             break;
811         case  4: multi_merge_f<OutputIterator, 4>(begin, end);
812             break;
813         case  5: multi_merge_f<OutputIterator, 5>(begin, end);
814             break;
815         case  6: multi_merge_f<OutputIterator, 6>(begin, end);
816             break;
817         case  7: multi_merge_f<OutputIterator, 7>(begin, end);
818             break;
819         case  8: multi_merge_f<OutputIterator, 8>(begin, end);
820             break;
821         case  9: multi_merge_f<OutputIterator, 9>(begin, end);
822             break;
823         case 10: multi_merge_f<OutputIterator, 10>(begin, end);
824             break;
825         default: multi_merge_k(begin, end);
826             break;
827         }
828 
829         size_ -= length;
830 #endif
831 
832         // compact tree if it got considerably smaller
833         {
834             const unsigned_type num_segments_used = std::min<unsigned_type>(arity, k) - free_segments.size();
835             const unsigned_type num_segments_trigger = k - (3 * k / 5);
836             // using k/2 would be worst case inefficient (for large k)
837             // for k \in {2, 4, 8} the trigger is k/2 which is good
838             // because we have special mergers for k \in {1, 2, 4}
839             // there is also a special 3-way-merger, that will be
840             // triggered if k == 4 && is_segment_empty(3)
841             STXXL_VERBOSE3("ext_merger  compact? k=" << k << " #used=" << num_segments_used
842                                                      << " <= #trigger=" << num_segments_trigger << " ==> "
843                                                      << ((k > 1 && num_segments_used <= num_segments_trigger) ? "yes" : "no ")
844                                                      << " || "
845                                                      << ((k == 4 && !free_segments.empty() && !is_segment_empty(3)) ? "yes" : "no ")
846                                                      << " #free=" << free_segments.size());
847             if (k > 1 && ((num_segments_used <= num_segments_trigger) ||
848                           (k == 4 && !free_segments.empty() && !is_segment_empty(3))))
849             {
850                 compact_tree();
851             }
852         }
853     }
854 
855 private:
856 #if STXXL_PQ_EXTERNAL_LOSER_TREE
857     // multi-merge for arbitrary K
858     template <class OutputIterator>
multi_merge_k(OutputIterator begin,OutputIterator end)859     void multi_merge_k(OutputIterator begin, OutputIterator end)
860     {
861         Entry* current_pos;
862         value_type current_key;
863         unsigned_type current_index; // leaf pointed to by current entry
864         unsigned_type kReg = k;
865         OutputIterator done = end;
866         OutputIterator target = begin;
867         unsigned_type winner_index = entry[0].index;
868         value_type winner_key = entry[0].key;
869 
870         while (target != done)
871         {
872             // write result
873             *target = *(states[winner_index]);
874 
875             // advance winner segment
876             ++(states[winner_index]);
877 
878             winner_key = *(states[winner_index]);
879 
880             // remove winner segment if empty now
881             if (is_sentinel(winner_key)) //
882                 deallocate_segment(winner_index);
883 
884             // go up the entry-tree
885             for (unsigned_type i = (winner_index + kReg) >> 1; i > 0; i >>= 1)
886             {
887                 current_pos = entry + i;
888                 current_key = current_pos->key;
889                 if (cmp(winner_key, current_key))
890                 {
891                     current_index = current_pos->index;
892                     current_pos->key = winner_key;
893                     current_pos->index = winner_index;
894                     winner_key = current_key;
895                     winner_index = current_index;
896                 }
897             }
898 
899             ++target;
900         }
901         entry[0].index = winner_index;
902         entry[0].key = winner_key;
903     }
904 
905     template <class OutputIterator, int LogK>
multi_merge_f(OutputIterator begin,OutputIterator end)906     void multi_merge_f(OutputIterator begin, OutputIterator end)
907     {
908         OutputIterator done = end;
909         OutputIterator target = begin;
910         unsigned_type winner_index = entry[0].index;
911         Entry* reg_entry = entry;
912         sequence_state* reg_states = states;
913         value_type winner_key = entry[0].key;
914 
915         assert(log_k >= LogK);
916         while (target != done)
917         {
918             // write result
919             *target = *(reg_states[winner_index]);
920 
921             // advance winner segment
922             ++(reg_states[winner_index]);
923 
924             winner_key = *(reg_states[winner_index]);
925 
926             // remove winner segment if empty now
927             if (is_sentinel(winner_key))
928                 deallocate_segment(winner_index);
929 
930             ++target;
931 
932             // update loser tree
933 #define TreeStep(L)                                                                                                          \
934     if (1 << LogK >= 1 << L) {                                                                                               \
935         Entry* pos ## L = reg_entry + ((winner_index + (1 << LogK)) >> (((int(LogK - L) + 1) >= 0) ? ((LogK - L) + 1) : 0)); \
936         value_type key ## L = pos ## L->key;                                                                                 \
937         if (cmp(winner_key, key ## L)) {                                                                                     \
938             unsigned_type index ## L = pos ## L->index;                                                                      \
939             pos ## L->key = winner_key;                                                                                      \
940             pos ## L->index = winner_index;                                                                                  \
941             winner_key = key ## L;                                                                                           \
942             winner_index = index ## L;                                                                                       \
943         }                                                                                                                    \
944     }
945             TreeStep(10);
946             TreeStep(9);
947             TreeStep(8);
948             TreeStep(7);
949             TreeStep(6);
950             TreeStep(5);
951             TreeStep(4);
952             TreeStep(3);
953             TreeStep(2);
954             TreeStep(1);
955 #undef TreeStep
956         }
957         reg_entry[0].index = winner_index;
958         reg_entry[0].key = winner_key;
959     }
960 #endif  //STXXL_PQ_EXTERNAL_LOSER_TREE
961 
962 public:
is_space_available()963     bool is_space_available() const // for new segment
964     {
965         return k < arity || !free_segments.empty();
966     }
967 
968     // insert segment beginning at target
969     // require: is_space_available() == 1
970     template <class Merger>
insert_segment(Merger & another_merger,size_type segment_size)971     void insert_segment(Merger& another_merger, size_type segment_size)
972     {
973         STXXL_VERBOSE1("ext_merger::insert_segment(merger,...)" << this);
974 
975         if (segment_size > 0)
976         {
977             // get a free slot
978             if (free_segments.empty())
979             {       // tree is too small
980                 double_k();
981             }
982             assert(!free_segments.empty());
983             unsigned_type free_slot = free_segments.top();
984             free_segments.pop();
985 
986             // link new segment
987             assert(segment_size);
988             unsigned_type nblocks = (unsigned_type)(segment_size / block_type::size);
989             //assert(nblocks); // at least one block
990             STXXL_VERBOSE1("ext_merger::insert_segment nblocks=" << nblocks);
991             if (nblocks == 0)
992             {
993                 STXXL_VERBOSE1("ext_merger::insert_segment(merger,...) WARNING: inserting a segment with " <<
994                                nblocks << " blocks");
995                 STXXL_VERBOSE1("THIS IS INEFFICIENT: TRY TO CHANGE PRIORITY QUEUE PARAMETERS");
996             }
997             unsigned_type first_size = (unsigned_type)(segment_size % block_type::size);
998             if (first_size == 0)
999             {
1000                 first_size = block_type::size;
1001                 --nblocks;
1002             }
1003             block_manager* bm = block_manager::get_instance();
1004             std::list<bid_type>* bids = new std::list<bid_type>(nblocks);
1005             bm->new_blocks(alloc_strategy(), bids->begin(), bids->end());
1006             block_type* first_block = new block_type;
1007 
1008             another_merger.multi_merge(
1009                 first_block->begin() + (block_type::size - first_size),
1010                 first_block->end());
1011 
1012             STXXL_VERBOSE1("last element of first block " << *(first_block->end() - 1));
1013             assert(!cmp(*(first_block->begin() + (block_type::size - first_size)), *(first_block->end() - 1)));
1014 
1015             assert(pool->size_write() > 0);
1016 
1017             for (typename std::list<bid_type>::iterator curbid = bids->begin(); curbid != bids->end(); ++curbid)
1018             {
1019                 block_type* b = pool->steal();
1020                 another_merger.multi_merge(b->begin(), b->end());
1021                 STXXL_VERBOSE1("first element of following block " << *curbid << " " << *(b->begin()));
1022                 STXXL_VERBOSE1("last element of following block " << *curbid << " " << *(b->end() - 1));
1023                 assert(!cmp(*(b->begin()), *(b->end() - 1)));
1024                 pool->write(b, *curbid);
1025                 STXXL_VERBOSE1("written to block " << *curbid << " cached in " << b);
1026             }
1027 
1028             insert_segment(bids, first_block, first_size, free_slot);
1029 
1030             size_ += segment_size;
1031 
1032 #if STXXL_PQ_EXTERNAL_LOSER_TREE
1033             // propagate new information up the tree
1034             value_type dummyKey;
1035             unsigned_type dummyIndex;
1036             unsigned_type dummyMask;
1037             update_on_insert((free_slot + k) >> 1, *(states[free_slot]), free_slot,
1038                              &dummyKey, &dummyIndex, &dummyMask);
1039 #endif          //STXXL_PQ_EXTERNAL_LOSER_TREE
1040         }
1041         else
1042         {
1043             // deallocate memory ?
1044             STXXL_VERBOSE1("Merged segment with zero size.");
1045         }
1046     }
1047 
size()1048     size_type size() const { return size_; }
1049 
1050 protected:
1051     /*!
1052       \param bidlist list of blocks to insert
1053       \param first_block the first block of the sequence, before bidlist
1054       \param first_size number of elements in the first block
1055       \param slot slot to insert into
1056     */
insert_segment(std::list<bid_type> * bidlist,block_type * first_block,unsigned_type first_size,unsigned_type slot)1057     void insert_segment(std::list<bid_type>* bidlist, block_type* first_block,
1058                         unsigned_type first_size, unsigned_type slot)
1059     {
1060         STXXL_VERBOSE1("ext_merger::insert_segment(bidlist,...) " << this << " " << bidlist->size() << " " << slot);
1061         assert(!is_segment_allocated(slot));
1062         assert(first_size > 0);
1063 
1064         sequence_state& new_sequence = states[slot];
1065         new_sequence.current = block_type::size - first_size;
1066         std::swap(new_sequence.block, first_block);
1067         delete first_block;
1068         std::swap(new_sequence.bids, bidlist);
1069         if (bidlist) // the old list
1070         {
1071             assert(bidlist->empty());
1072             delete bidlist;
1073         }
1074         new_sequence.allocated = true;
1075         assert(is_segment_allocated(slot));
1076     }
1077 
1078     // free an empty segment .
deallocate_segment(unsigned_type slot)1079     void deallocate_segment(unsigned_type slot)
1080     {
1081         STXXL_VERBOSE1("ext_merger::deallocate_segment() deleting segment " << slot << " allocated=" << int(is_segment_allocated(slot)));
1082         assert(is_segment_allocated(slot));
1083         states[slot].allocated = false;
1084         states[slot].make_inf();
1085 
1086         // push on the stack of free segment indices
1087         free_segments.push(slot);
1088     }
1089 
1090     // is this segment empty ?
is_segment_empty(unsigned_type slot)1091     bool is_segment_empty(unsigned_type slot) const
1092     {
1093         return is_sentinel(*(states[slot]));
1094     }
1095 
1096     // Is this segment allocated? Otherwise it's empty,
1097     // already on the stack of free segment indices and can be reused.
is_segment_allocated(unsigned_type slot)1098     bool is_segment_allocated(unsigned_type slot) const
1099     {
1100         return states[slot].allocated;
1101     }
1102 };  // class ext_merger
1103 
1104 } // namespace priority_queue_local
1105 
1106 //! \}
1107 
1108 STXXL_END_NAMESPACE
1109 
1110 #endif // !STXXL_CONTAINERS_PQ_EXT_MERGER_HEADER
1111 // vim: et:ts=4:sw=4
1112