1 /***************************************************************************
2  *  include/stxxl/bits/algo/ksort.h
3  *
4  *  Part of the STXXL. See http://stxxl.sourceforge.net
5  *
6  *  Copyright (C) 2002 Roman Dementiev <dementiev@mpi-sb.mpg.de>
7  *  Copyright (C) 2008-2011 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
8  *  Copyright (C) 2013 Timo Bingmann <tb@panthema.net>
9  *
10  *  Distributed under the Boost Software License, Version 1.0.
11  *  (See accompanying file LICENSE_1_0.txt or copy at
12  *  http://www.boost.org/LICENSE_1_0.txt)
13  **************************************************************************/
14 
15 #ifndef STXXL_ALGO_KSORT_HEADER
16 #define STXXL_ALGO_KSORT_HEADER
17 
18 #include <stxxl/bits/mng/block_manager.h>
19 #include <stxxl/bits/common/rand.h>
20 #include <stxxl/bits/mng/adaptor.h>
21 #include <stxxl/bits/common/simple_vector.h>
22 #include <stxxl/bits/common/onoff_switch.h>
23 #include <stxxl/bits/mng/block_alloc_interleaved.h>
24 #include <stxxl/bits/algo/intksort.h>
25 #include <stxxl/bits/algo/adaptor.h>
26 #include <stxxl/bits/algo/async_schedule.h>
27 #include <stxxl/bits/mng/block_prefetcher.h>
28 #include <stxxl/bits/mng/buf_writer.h>
29 #include <stxxl/bits/algo/run_cursor.h>
30 #include <stxxl/bits/algo/losertree.h>
31 #include <stxxl/bits/algo/inmemsort.h>
32 #include <stxxl/bits/algo/sort_base.h>
33 #include <stxxl/bits/common/is_sorted.h>
34 #include <stxxl/bits/common/utils.h>
35 
36 //#define INTERLEAVED_ALLOC
37 
38 #define OPT_MERGING
39 
40 STXXL_BEGIN_NAMESPACE
41 
42 //! \addtogroup stllayer
43 
44 //! \defgroup stlalgo Algorithms
45 //! \ingroup stllayer
46 //! Algorithms with STL-compatible interface
47 //! \{
48 
49 /*! \internal
50  */
51 namespace ksort_local {
52 
53 template <typename BIDType, typename KeyType>
54 struct trigger_entry
55 {
56     typedef BIDType bid_type;
57     typedef KeyType key_type;
58 
59     bid_type bid;
60     key_type key;
61 
bid_typetrigger_entry62     operator bid_type ()
63     {
64         return bid;
65     }
66 };
67 
68 template <typename BIDType, typename KeyType>
69 inline bool operator < (const trigger_entry<BIDType, KeyType>& a,
70                         const trigger_entry<BIDType, KeyType>& b)
71 {
72     return (a.key < b.key);
73 }
74 
75 template <typename BIDType, typename KeyType>
76 inline bool operator > (const trigger_entry<BIDType, KeyType>& a,
77                         const trigger_entry<BIDType, KeyType>& b)
78 {
79     return (a.key > b.key);
80 }
81 
82 template <typename Type, typename KeyType>
83 struct type_key
84 {
85     typedef KeyType key_type;
86     key_type key;
87     Type* ptr;
88 
type_keytype_key89     type_key() { }
type_keytype_key90     type_key(key_type k, Type* p) : key(k), ptr(p)
91     { }
92 };
93 
94 template <typename Type, typename KeyType>
95 bool operator < (const type_key<Type, KeyType>& a, const type_key<Type, KeyType>& b)
96 {
97     return a.key < b.key;
98 }
99 
100 template <typename Type, typename KeyType>
101 bool operator > (const type_key<Type, KeyType>& a, const type_key<Type, KeyType>& b)
102 {
103     return a.key > b.key;
104 }
105 
106 template <typename BlockType, typename BidType>
107 struct write_completion_handler
108 {
109     BlockType* block;
110     BidType bid;
111     request_ptr* req;
operatorwrite_completion_handler112     void operator () (request* /*completed_req*/)
113     {
114         * req = block->read(bid);
115     }
116 };
117 
118 template <typename TypeKey,
119           typename BlockType,
120           typename RunType,
121           typename InputBidIterator,
122           typename KeyExtractor>
write_out(TypeKey * begin,TypeKey * end,BlockType * & cur_blk,const BlockType * end_blk,int_type & out_block,int_type & out_pos,RunType & run,write_completion_handler<BlockType,typename BlockType::bid_type> * & next_read,typename BlockType::bid_type * & bids,request_ptr * write_reqs,request_ptr * read_reqs,InputBidIterator & it,KeyExtractor keyobj)123 inline void write_out(
124     TypeKey* begin,
125     TypeKey* end,
126     BlockType*& cur_blk,
127     const BlockType* end_blk,
128     int_type& out_block,
129     int_type& out_pos,
130     RunType& run,
131     write_completion_handler<BlockType, typename BlockType::bid_type>*& next_read,
132     typename BlockType::bid_type*& bids,
133     request_ptr* write_reqs,
134     request_ptr* read_reqs,
135     InputBidIterator& it,
136     KeyExtractor keyobj)
137 {
138     typedef typename BlockType::type type;
139 
140     type* elem = cur_blk->elem;
141     for (TypeKey* p = begin; p < end; p++)
142     {
143         elem[out_pos++] = *(p->ptr);
144 
145         if (out_pos >= BlockType::size)
146         {
147             run[out_block].key = keyobj(*(cur_blk->elem));
148 
149             if (cur_blk < end_blk)
150             {
151                 next_read->block = cur_blk;
152                 next_read->req = read_reqs + out_block;
153                 read_reqs[out_block] = NULL;
154                 bids[out_block] = next_read->bid = *(it++);
155 
156                 write_reqs[out_block] = cur_blk->write(
157                     run[out_block].bid,
158                     // postpone read of block from next run
159                     // after write of block from this run
160                     *(next_read++));
161             }
162             else
163             {
164                 write_reqs[out_block] = cur_blk->write(run[out_block].bid);
165             }
166 
167             cur_blk++;
168             elem = cur_blk->elem;
169             out_block++;
170             out_pos = 0;
171         }
172     }
173 }
174 
175 template <
176     typename BlockType,
177     typename RunType,
178     typename InputBidIterator,
179     typename KeyExtractor>
180 void
create_runs(InputBidIterator it,RunType ** runs,const unsigned_type nruns,const unsigned_type m2,KeyExtractor keyobj)181 create_runs(
182     InputBidIterator it,
183     RunType** runs,
184     const unsigned_type nruns,
185     const unsigned_type m2,
186     KeyExtractor keyobj)
187 {
188     typedef typename BlockType::value_type type;
189     typedef typename BlockType::bid_type bid_type;
190     typedef typename KeyExtractor::key_type key_type;
191     typedef type_key<type, key_type> type_key_;
192 
193     block_manager* bm = block_manager::get_instance();
194     BlockType* Blocks1 = new BlockType[m2];
195     BlockType* Blocks2 = new BlockType[m2];
196     bid_type* bids = new bid_type[m2];
197     type_key_* refs1 = new type_key_[m2 * Blocks1->size];
198     type_key_* refs2 = new type_key_[m2 * Blocks1->size];
199     request_ptr* read_reqs = new request_ptr[m2];
200     request_ptr* write_reqs = new request_ptr[m2];
201     write_completion_handler<BlockType, bid_type>* next_run_reads =
202         new write_completion_handler<BlockType, bid_type>[m2];
203 
204     RunType* run;
205     run = *runs;
206     int_type run_size = (*runs)->size();
207     key_type offset = 0;
208     const int log_k1 = ilog2_ceil((m2 * BlockType::size * sizeof(type_key_) / STXXL_L2_SIZE) ?
209                                   (m2 * BlockType::size * sizeof(type_key_) / STXXL_L2_SIZE) : 2);
210     const int log_k2 = ilog2_floor(m2 * Blocks1->size) - log_k1 - 1;
211     STXXL_VERBOSE("log_k1: " << log_k1 << " log_k2:" << log_k2);
212     const int_type k1 = int_type(1) << log_k1;
213     const int_type k2 = int_type(1) << log_k2;
214     int_type* bucket1 = new int_type[k1];
215     int_type* bucket2 = new int_type[k2];
216     int_type i;
217 
218     disk_queues::get_instance()->set_priority_op(request_queue::WRITE);
219 
220     for (i = 0; i < run_size; i++)
221     {
222         bids[i] = *(it++);
223         read_reqs[i] = Blocks1[i].read(bids[i]);
224     }
225 
226     unsigned_type k = 0;
227     const int shift1 = (int)(sizeof(key_type) * 8 - log_k1);
228     const int shift2 = shift1 - log_k2;
229     STXXL_VERBOSE("shift1: " << shift1 << " shift2:" << shift2);
230 
231     for ( ; k < nruns; k++)
232     {
233         run = runs[k];
234         run_size = run->size();
235 
236         std::fill(bucket1, bucket1 + k1, 0);
237 
238         type_key_* ref_ptr = refs1;
239         for (i = 0; i < run_size; i++)
240         {
241             if (k)
242                 write_reqs[i]->wait();
243 
244             read_reqs[i]->wait();
245             bm->delete_block(bids[i]);
246 
247             classify_block(Blocks1[i].begin(), Blocks1[i].end(), ref_ptr, bucket1, offset, shift1, keyobj);
248         }
249 
250         exclusive_prefix_sum(bucket1, k1);
251         classify(refs1, refs1 + run_size * Blocks1->size, refs2, bucket1,
252                  offset, shift1);
253 
254         int_type out_block = 0;
255         int_type out_pos = 0;
256         unsigned_type next_run_size = (k < nruns - 1) ? (runs[k + 1]->size()) : 0;
257 
258         // recurse on each bucket
259         type_key_* c = refs2;
260         type_key_* d = refs1;
261         BlockType* cur_blk = Blocks2;
262         BlockType* end_blk = Blocks2 + next_run_size;
263         write_completion_handler<BlockType, bid_type>* next_read = next_run_reads;
264 
265         for (i = 0; i < k1; i++)
266         {
267             type_key_* cEnd = refs2 + bucket1[i];
268             type_key_* dEnd = refs1 + bucket1[i];
269 
270             l1sort(c, cEnd, d, bucket2, k2,
271                    offset + (key_type(1) << key_type(shift1)) * key_type(i), shift2);             // key_type,key_type,... paranoia
272 
273             write_out(
274                 d, dEnd, cur_blk, end_blk,
275                 out_block, out_pos, *run, next_read, bids,
276                 write_reqs, read_reqs, it, keyobj);
277 
278             c = cEnd;
279             d = dEnd;
280         }
281 
282         std::swap(Blocks1, Blocks2);
283     }
284 
285     wait_all(write_reqs, m2);
286 
287     delete[] bucket1;
288     delete[] bucket2;
289     delete[] refs1;
290     delete[] refs2;
291     delete[] Blocks1;
292     delete[] Blocks2;
293     delete[] bids;
294     delete[] next_run_reads;
295     delete[] read_reqs;
296     delete[] write_reqs;
297 }
298 
299 template <typename BlockType,
300           typename prefetcher_type,
301           typename KeyExtractor>
302 struct run_cursor2_cmp : public std::binary_function<
303                              run_cursor2<BlockType, prefetcher_type>,
304                              run_cursor2<BlockType, prefetcher_type>,
305                              bool
306                              >
307 {
308     typedef run_cursor2<BlockType, prefetcher_type> cursor_type;
309     KeyExtractor keyobj;
run_cursor2_cmprun_cursor2_cmp310     run_cursor2_cmp(KeyExtractor _keyobj)
311         : keyobj(_keyobj)
312     { }
operatorrun_cursor2_cmp313     inline bool operator () (const cursor_type& a, const cursor_type& b) const
314     {
315         if (UNLIKELY(b.empty()))
316             return true;
317         // sentinel emulation
318         if (UNLIKELY(a.empty()))
319             return false;
320         //sentinel emulation
321 
322         return (keyobj(a.current()) < keyobj(b.current()));
323     }
324 
325 private:
run_cursor2_cmprun_cursor2_cmp326     run_cursor2_cmp() { }
327 };
328 
329 template <typename RecordType, typename KeyExtractor>
330 class key_comparison : public std::binary_function<RecordType, RecordType, bool>
331 {
332     KeyExtractor ke;
333 
334 public:
key_comparison()335     key_comparison() { }
key_comparison(KeyExtractor ke_)336     key_comparison(KeyExtractor ke_) : ke(ke_) { }
operator()337     bool operator () (const RecordType& a, const RecordType& b) const
338     {
339         return ke(a) < ke(b);
340     }
341 };
342 
343 template <typename BlockType, typename RunType, typename KeyExtractor>
check_ksorted_runs(RunType ** runs,unsigned_type nruns,unsigned_type m,KeyExtractor keyext)344 bool check_ksorted_runs(RunType** runs,
345                         unsigned_type nruns,
346                         unsigned_type m,
347                         KeyExtractor keyext)
348 {
349     typedef BlockType block_type;
350     typedef typename BlockType::value_type value_type;
351 
352     STXXL_MSG("check_ksorted_runs  Runs: " << nruns);
353     unsigned_type irun = 0;
354     for (irun = 0; irun < nruns; ++irun)
355     {
356         const unsigned_type nblocks_per_run = runs[irun]->size();
357         unsigned_type blocks_left = nblocks_per_run;
358         block_type* blocks = new block_type[m];
359         request_ptr* reqs = new request_ptr[m];
360         value_type last = keyext.min_value();
361 
362         for (unsigned_type off = 0; off < nblocks_per_run; off += m)
363         {
364             const unsigned_type nblocks = STXXL_MIN(blocks_left, m);
365             const unsigned_type nelements = nblocks * block_type::size;
366             blocks_left -= nblocks;
367 
368             for (unsigned_type j = 0; j < nblocks; ++j)
369             {
370                 reqs[j] = blocks[j].read((*runs[irun])[off + j].bid);
371             }
372             wait_all(reqs, reqs + nblocks);
373 
374             if (off && (keyext(blocks[0][0]) < keyext(last)))
375             {
376                 STXXL_MSG("check_sorted_runs  wrong first value in the run " << irun);
377                 STXXL_MSG(" first value: " << blocks[0][0] << " with key" << keyext(blocks[0][0]));
378                 STXXL_MSG(" last  value: " << last << " with key" << keyext(last));
379                 for (unsigned_type k = 0; k < block_type::size; ++k)
380                     STXXL_MSG("Element " << k << " in the block is :" << blocks[0][k] << " key: " << keyext(blocks[0][k]));
381 
382                 delete[] reqs;
383                 delete[] blocks;
384                 return false;
385             }
386 
387             for (unsigned_type j = 0; j < nblocks; ++j)
388             {
389                 if (keyext(blocks[j][0]) != (*runs[irun])[off + j].key)
390                 {
391                     STXXL_MSG("check_sorted_runs  wrong trigger in the run " << irun << " block " << (off + j));
392                     STXXL_MSG("                   trigger value: " << (*runs[irun])[off + j].key);
393                     STXXL_MSG("Data in the block:");
394                     for (unsigned_type k = 0; k < block_type::size; ++k)
395                         STXXL_MSG("Element " << k << " in the block is :" << blocks[j][k] << " with key: " << keyext(blocks[j][k]));
396 
397                     STXXL_MSG("BIDS:");
398                     for (unsigned_type k = 0; k < nblocks; ++k)
399                     {
400                         if (k == j)
401                             STXXL_MSG("Bad one comes next.");
402                         STXXL_MSG("BID " << (off + k) << " is: " << ((*runs[irun])[off + k].bid));
403                     }
404 
405                     delete[] reqs;
406                     delete[] blocks;
407                     return false;
408                 }
409             }
410             if (!stxxl::is_sorted(make_element_iterator(blocks, 0),
411                                   make_element_iterator(blocks, nelements),
412                                   key_comparison<value_type, KeyExtractor>()))
413             {
414                 STXXL_MSG("check_sorted_runs  wrong order in the run " << irun);
415                 STXXL_MSG("Data in blocks:");
416                 for (unsigned_type j = 0; j < nblocks; ++j)
417                 {
418                     for (unsigned_type k = 0; k < block_type::size; ++k)
419                         STXXL_MSG("     Element " << k << " in block " << (off + j) << " is :" << blocks[j][k] << " with key: " << keyext(blocks[j][k]));
420                 }
421                 STXXL_MSG("BIDS:");
422                 for (unsigned_type k = 0; k < nblocks; ++k)
423                 {
424                     STXXL_MSG("BID " << (k + off) << " is: " << ((*runs[irun])[k + off].bid));
425                 }
426 
427                 delete[] reqs;
428                 delete[] blocks;
429                 return false;
430             }
431             last = blocks[nblocks - 1][block_type::size - 1];
432         }
433 
434         assert(blocks_left == 0);
435         delete[] reqs;
436         delete[] blocks;
437     }
438 
439     return true;
440 }
441 
442 template <typename BlockType, typename RunType, typename KeyExtractor>
merge_runs(RunType ** in_runs,unsigned_type nruns,RunType * out_run,unsigned_type _m,KeyExtractor keyobj)443 void merge_runs(RunType** in_runs, unsigned_type nruns, RunType* out_run, unsigned_type _m, KeyExtractor keyobj)
444 {
445     typedef BlockType block_type;
446     typedef block_prefetcher<BlockType, typename RunType::iterator> prefetcher_type;
447     typedef run_cursor2<BlockType, prefetcher_type> run_cursor_type;
448 
449     unsigned_type i;
450     RunType consume_seq(out_run->size());
451 
452     int_type* prefetch_seq = new int_type[out_run->size()];
453 
454     typename RunType::iterator copy_start = consume_seq.begin();
455     for (i = 0; i < nruns; i++)
456     {
457         // TODO: try to avoid copy
458         copy_start = std::copy(
459             in_runs[i]->begin(),
460             in_runs[i]->end(),
461             copy_start);
462     }
463     std::stable_sort(consume_seq.begin(), consume_seq.end() _STXXL_SORT_TRIGGER_FORCE_SEQUENTIAL);
464 
465     size_t disks_number = config::get_instance()->disks_number();
466 
467 #ifdef PLAY_WITH_OPT_PREF
468     const int_type n_write_buffers = 4 * disks_number;
469 #else
470     const int_type n_prefetch_buffers = STXXL_MAX(int_type(2 * disks_number), (3 * (int_type(_m) - int_type(nruns)) / 4));
471     STXXL_VERBOSE("Prefetch buffers " << n_prefetch_buffers);
472     const int_type n_write_buffers = STXXL_MAX(int_type(2 * disks_number), int_type(_m) - int_type(nruns) - int_type(n_prefetch_buffers));
473     STXXL_VERBOSE("Write buffers " << n_write_buffers);
474     // heuristic
475     const int_type n_opt_prefetch_buffers = 2 * int_type(disks_number) + (3 * (int_type(n_prefetch_buffers) - int_type(2 * disks_number))) / 10;
476     STXXL_VERBOSE("Prefetch buffers " << n_opt_prefetch_buffers);
477 #endif
478 
479 #if STXXL_SORT_OPTIMAL_PREFETCHING
480     compute_prefetch_schedule(
481         consume_seq,
482         prefetch_seq,
483         n_opt_prefetch_buffers,
484         config::get_instance()->get_max_device_id());
485 #else
486     for (i = 0; i < out_run->size(); i++)
487         prefetch_seq[i] = i;
488 
489 #endif
490 
491     prefetcher_type prefetcher(consume_seq.begin(),
492                                consume_seq.end(),
493                                prefetch_seq,
494                                nruns + n_prefetch_buffers);
495 
496     buffered_writer<block_type> writer(n_write_buffers, n_write_buffers / 2);
497 
498     unsigned_type out_run_size = out_run->size();
499 
500     run_cursor2_cmp<block_type, prefetcher_type, KeyExtractor> cmp(keyobj);
501     loser_tree<
502         run_cursor_type,
503         run_cursor2_cmp<block_type, prefetcher_type, KeyExtractor> >
504     losers(&prefetcher, nruns, cmp);
505 
506     block_type* out_buffer = writer.get_free_block();
507 
508     for (i = 0; i < out_run_size; i++)
509     {
510         losers.multi_merge(out_buffer->elem, out_buffer->elem + block_type::size);
511         (*out_run)[i].key = keyobj(out_buffer->elem[0]);
512         out_buffer = writer.write(out_buffer, (*out_run)[i].bid);
513     }
514 
515     delete[] prefetch_seq;
516 
517     block_manager* bm = block_manager::get_instance();
518     for (i = 0; i < nruns; i++)
519     {
520         unsigned_type sz = in_runs[i]->size();
521         for (unsigned_type j = 0; j < sz; j++)
522             bm->delete_block((*in_runs[i])[j].bid);
523 
524         delete in_runs[i];
525     }
526 }
527 
528 template <typename BlockType,
529           typename AllocStrategy,
530           typename InputBidIterator,
531           typename KeyExtractor>
532 simple_vector<
533     trigger_entry<typename BlockType::bid_type, typename KeyExtractor::key_type>
534     >*
ksort_blocks(InputBidIterator input_bids,unsigned_type _n,unsigned_type _m,KeyExtractor keyobj)535 ksort_blocks(InputBidIterator input_bids, unsigned_type _n,
536              unsigned_type _m, KeyExtractor keyobj)
537 {
538     typedef BlockType block_type;
539     typedef typename BlockType::value_type type;
540     typedef typename KeyExtractor::key_type key_type;
541     typedef typename BlockType::bid_type bid_type;
542     typedef trigger_entry<bid_type, typename KeyExtractor::key_type> trigger_entry_type;
543     typedef simple_vector<trigger_entry_type> run_type;
544     typedef typename interleaved_alloc_traits<AllocStrategy>::strategy interleaved_alloc_strategy;
545 
546     unsigned_type m2 = div_ceil(_m, 2);
547     const unsigned_type m2_rf = m2 * block_type::raw_size /
548                                 (block_type::raw_size + block_type::size * sizeof(type_key<type, key_type>));
549     STXXL_VERBOSE("Reducing number of blocks in a run from " << m2 << " to " <<
550                   m2_rf << " due to key size: " << sizeof(typename KeyExtractor::key_type) << " bytes");
551     m2 = m2_rf;
552     unsigned_type full_runs = _n / m2;
553     unsigned_type partial_runs = ((_n % m2) ? 1 : 0);
554     unsigned_type nruns = full_runs + partial_runs;
555     unsigned_type i;
556 
557     block_manager* mng = block_manager::get_instance();
558 
559     STXXL_VERBOSE("n=" << _n << " nruns=" << nruns << "=" << full_runs << "+" << partial_runs);
560 
561     double begin = timestamp(), after_runs_creation, end;
562 
563     run_type** runs = new run_type*[nruns];
564 
565     for (i = 0; i < full_runs; i++)
566         runs[i] = new run_type(m2);
567 
568 #ifdef INTERLEAVED_ALLOC
569     if (partial_runs)
570     {
571         unsigned_type last_run_size = _n - full_runs * m2;
572         runs[i] = new run_type(last_run_size);
573 
574         mng->new_blocks(interleaved_alloc_strategy(nruns, AllocStrategy()),
575                         runs2bid_array_adaptor2<block_type::raw_size, run_type>
576                             (runs, 0, nruns, last_run_size),
577                         runs2bid_array_adaptor2<block_type::raw_size, run_type>
578                             (runs, _n, nruns, last_run_size));
579     }
580     else
581         mng->new_blocks(interleaved_alloc_strategy(nruns, AllocStrategy()),
582                         runs2bid_array_adaptor<block_type::raw_size, run_type>
583                             (runs, 0, nruns),
584                         runs2bid_array_adaptor<block_type::raw_size, run_type>
585                             (runs, _n, nruns));
586 
587 #else
588     if (partial_runs)
589         runs[i] = new run_type(_n - full_runs * m2);
590 
591     for (i = 0; i < nruns; i++)
592     {
593         mng->new_blocks(AllocStrategy(), make_bid_iterator(runs[i]->begin()), make_bid_iterator(runs[i]->end()));
594     }
595 #endif
596 
597     create_runs<block_type, run_type, InputBidIterator, KeyExtractor>(
598         input_bids, runs, nruns, m2, keyobj);
599 
600     after_runs_creation = timestamp();
601 
602     double io_wait_after_rf = stats::get_instance()->get_io_wait_time();
603 
604     disk_queues::get_instance()->set_priority_op(request_queue::WRITE);
605 
606     const int_type merge_factor = optimal_merge_factor(nruns, _m);
607     run_type** new_runs;
608 
609     while (nruns > 1)
610     {
611         int_type new_nruns = div_ceil(nruns, merge_factor);
612         STXXL_VERBOSE("Starting new merge phase: nruns: " << nruns <<
613                       " opt_merge_factor: " << merge_factor << " m:" << _m << " new_nruns: " << new_nruns);
614 
615         new_runs = new run_type*[new_nruns];
616 
617         int_type runs_left = nruns;
618         int_type cur_out_run = 0;
619         int_type blocks_in_new_run = 0;
620 
621         while (runs_left > 0)
622         {
623             int_type runs2merge = STXXL_MIN(runs_left, merge_factor);
624             blocks_in_new_run = 0;
625             for (unsigned_type i = nruns - runs_left; i < (nruns - runs_left + runs2merge); i++)
626                 blocks_in_new_run += runs[i]->size();
627 
628             // allocate run
629             new_runs[cur_out_run++] = new run_type(blocks_in_new_run);
630             runs_left -= runs2merge;
631         }
632         // allocate blocks in the new runs
633         if (cur_out_run == 1 && blocks_in_new_run == int_type(_n) && !input_bids->is_managed())
634         {
635             // if we sort a file we can reuse the input bids for the output
636             InputBidIterator cur = input_bids;
637             for (int_type i = 0; cur != (input_bids + _n); ++cur)
638             {
639                 (*new_runs[0])[i++].bid = *cur;
640             }
641 
642             bid_type& firstBID = (*new_runs[0])[0].bid;
643             if (firstBID.is_managed())
644             {
645                 // the first block does not belong to the file
646                 // need to reallocate it
647                 mng->new_block(FR(), firstBID);
648             }
649             bid_type& lastBID = (*new_runs[0])[_n - 1].bid;
650             if (lastBID.is_managed())
651             {
652                 // the first block does not belong to the file
653                 // need to reallocate it
654                 mng->new_block(FR(), lastBID);
655             }
656         }
657         else
658         {
659             mng->new_blocks(interleaved_alloc_strategy(new_nruns, AllocStrategy()),
660                             runs2bid_array_adaptor2<block_type::raw_size, run_type>(new_runs, 0, new_nruns, blocks_in_new_run),
661                             runs2bid_array_adaptor2<block_type::raw_size, run_type>(new_runs, _n, new_nruns, blocks_in_new_run));
662         }
663 
664         // merge all
665         runs_left = nruns;
666         cur_out_run = 0;
667         while (runs_left > 0)
668         {
669             int_type runs2merge = STXXL_MIN(runs_left, merge_factor);
670 #if STXXL_CHECK_ORDER_IN_SORTS
671             assert((check_ksorted_runs<block_type, run_type, KeyExtractor>(runs + nruns - runs_left, runs2merge, m2, keyobj)));
672 #endif
673             STXXL_VERBOSE("Merging " << runs2merge << " runs");
674             merge_runs<block_type, run_type, KeyExtractor>(runs + nruns - runs_left,
675                                                            runs2merge, *(new_runs + (cur_out_run++)), _m, keyobj);
676             runs_left -= runs2merge;
677         }
678 
679         nruns = new_nruns;
680         delete[] runs;
681         runs = new_runs;
682     }
683 
684     run_type* result = *runs;
685     delete[] runs;
686 
687     end = timestamp();
688 
689     STXXL_VERBOSE("Elapsed time        : " << end - begin << " s. Run creation time: " <<
690                   after_runs_creation - begin << " s");
691     STXXL_VERBOSE("Time in I/O wait(rf): " << io_wait_after_rf << " s");
692     STXXL_VERBOSE(*stats::get_instance());
693 
694     return result;
695 }
696 
697 } // namespace ksort_local
698 
699 /*!
700  * Sort records with integer keys, see \ref design_algo_ksort.
701  *
702  * stxxl::ksort sorts the elements in [first, last) into ascending order,
703  * meaning that if \c i and \c j are any two valid iterators in [first, last)
704  * such that \c i precedes \c j, then \c *j is not less than \c *i. Note: as
705  * std::sort and stxxl::sort, stxxl::ksort is not guaranteed to be stable. That
706  * is, suppose that \c *i and \c *j are equivalent: neither one is less than
707  * the other. It is not guaranteed that the relative order of these two
708  * elements will be preserved by stxxl::ksort.
709  *
710  * The two versions of stxxl::ksort differ in how they define whether one
711  * element is less than another. The first version assumes that the elements
712  * have \c key() member function that returns an integral key (32 or 64 bit),
713  * as well as the minimum and the maximum element values. The second version
714  * compares objects extracting the keys using \c keyobj object, that is in turn
715  * provides min and max element values.
716  *
717  * The sorter's internal memory consumption is bounded by \c M bytes.
718  *
719  * \param first object of model of \c ext_random_access_iterator concept
720  * \param last object of model of \c ext_random_access_iterator concept
721  * \param keyobj \link design_algo_ksort_key_extractor key extractor \endlink object
722  * \param M amount of memory for internal use (in bytes)
723  */
724 template <typename ExtIterator, typename KeyExtractor>
ksort(ExtIterator first,ExtIterator last,KeyExtractor keyobj,unsigned_type M)725 void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_type M)
726 {
727     typedef simple_vector<
728             ksort_local::trigger_entry<
729                 typename ExtIterator::bid_type, typename KeyExtractor::key_type
730                 >
731             > run_type;
732     typedef typename ExtIterator::vector_type::value_type value_type;
733     typedef typename ExtIterator::bid_type bid_type;
734     typedef typename ExtIterator::block_type block_type;
735     typedef typename ExtIterator::vector_type::alloc_strategy_type alloc_strategy_type;
736     typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
737 
738     unsigned_type n = 0;
739     block_manager* mng = block_manager::get_instance();
740 
741     first.flush();
742 
743     if ((last - first) * sizeof(value_type) < M)
744     {
745         stl_in_memory_sort(first, last,
746                            ksort_local::key_comparison<value_type, KeyExtractor>(keyobj));
747     }
748     else
749     {
750         assert(2 * block_type::raw_size <= M);
751 
752         if (first.block_offset())
753         {
754             if (last.block_offset())            // first and last element reside
755             // not in the beginning of the block
756             {
757                 block_type* first_block = new block_type;
758                 block_type* last_block = new block_type;
759                 bid_type first_bid, last_bid;
760                 request_ptr req;
761 
762                 req = first_block->read(*first.bid());
763                 mng->new_block(FR(), first_bid);                // try to overlap
764                 mng->new_block(FR(), last_bid);
765                 req->wait();
766 
767                 req = last_block->read(*last.bid());
768 
769                 unsigned_type i = 0;
770                 for ( ; i < first.block_offset(); i++)
771                 {
772                     first_block->elem[i] = keyobj.min_value();
773                 }
774 
775                 req->wait();
776 
777                 req = first_block->write(first_bid);
778                 for (i = last.block_offset(); i < block_type::size; i++)
779                 {
780                     last_block->elem[i] = keyobj.max_value();
781                 }
782 
783                 req->wait();
784 
785                 req = last_block->write(last_bid);
786 
787                 n = last.bid() - first.bid() + 1;
788 
789                 std::swap(first_bid, *first.bid());
790                 std::swap(last_bid, *last.bid());
791 
792                 req->wait();
793 
794                 delete first_block;
795                 delete last_block;
796 
797                 run_type* out =
798                     ksort_local::ksort_blocks<
799                         block_type, alloc_strategy_type,
800                         bids_container_iterator, KeyExtractor
801                         >(first.bid(), n, M / block_type::raw_size, keyobj);
802 
803                 first_block = new block_type;
804                 last_block = new block_type;
805                 block_type* sorted_first_block = new block_type;
806                 block_type* sorted_last_block = new block_type;
807                 request_ptr* reqs = new request_ptr[2];
808 
809                 reqs[0] = first_block->read(first_bid);
810                 reqs[1] = sorted_first_block->read((*(out->begin())).bid);
811                 wait_all(reqs, 2);
812 
813                 reqs[0] = last_block->read(last_bid);
814                 reqs[1] = sorted_last_block->read(((*out)[out->size() - 1]).bid);
815 
816                 for (i = first.block_offset(); i < block_type::size; i++)
817                 {
818                     first_block->elem[i] = sorted_first_block->elem[i];
819                 }
820                 wait_all(reqs, 2);
821 
822                 req = first_block->write(first_bid);
823 
824                 for (i = 0; i < last.block_offset(); i++)
825                 {
826                     last_block->elem[i] = sorted_last_block->elem[i];
827                 }
828 
829                 req->wait();
830 
831                 req = last_block->write(last_bid);
832 
833                 mng->delete_block(out->begin()->bid);
834                 mng->delete_block((*out)[out->size() - 1].bid);
835 
836                 *first.bid() = first_bid;
837                 *last.bid() = last_bid;
838 
839                 typename run_type::iterator it = out->begin();
840                 it++;
841                 bids_container_iterator cur_bid = first.bid();
842                 cur_bid++;
843 
844                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
845                 {
846                     *cur_bid = (*it).bid;
847                 }
848 
849                 delete first_block;
850                 delete sorted_first_block;
851                 delete sorted_last_block;
852                 delete[] reqs;
853                 delete out;
854 
855                 req->wait();
856 
857                 delete last_block;
858             }
859             else
860             {
861                 // first element resides
862                 // not in the beginning of the block
863 
864                 block_type* first_block = new block_type;
865                 bid_type first_bid;
866                 request_ptr req;
867 
868                 req = first_block->read(*first.bid());
869                 mng->new_block(FR(), first_bid);                // try to overlap
870                 req->wait();
871 
872                 unsigned_type i = 0;
873                 for ( ; i < first.block_offset(); i++)
874                 {
875                     first_block->elem[i] = keyobj.min_value();
876                 }
877 
878                 req = first_block->write(first_bid);
879 
880                 n = last.bid() - first.bid();
881 
882                 std::swap(first_bid, *first.bid());
883 
884                 req->wait();
885 
886                 delete first_block;
887 
888                 run_type* out =
889                     ksort_local::ksort_blocks<
890                         block_type, alloc_strategy_type,
891                         bids_container_iterator, KeyExtractor
892                         >(first.bid(), n, M / block_type::raw_size, keyobj);
893 
894                 first_block = new block_type;
895 
896                 block_type* sorted_first_block = new block_type;
897 
898                 request_ptr* reqs = new request_ptr[2];
899 
900                 reqs[0] = first_block->read(first_bid);
901                 reqs[1] = sorted_first_block->read((*(out->begin())).bid);
902                 wait_all(reqs, 2);
903 
904                 for (i = first.block_offset(); i < block_type::size; i++)
905                 {
906                     first_block->elem[i] = sorted_first_block->elem[i];
907                 }
908 
909                 req = first_block->write(first_bid);
910 
911                 mng->delete_block(out->begin()->bid);
912 
913                 *first.bid() = first_bid;
914 
915                 typename run_type::iterator it = out->begin();
916                 it++;
917                 bids_container_iterator cur_bid = first.bid();
918                 cur_bid++;
919 
920                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
921                 {
922                     *cur_bid = (*it).bid;
923                 }
924 
925                 *cur_bid = (*it).bid;
926 
927                 delete sorted_first_block;
928                 delete[] reqs;
929                 delete out;
930 
931                 req->wait();
932 
933                 delete first_block;
934             }
935         }
936         else
937         {
938             if (last.block_offset())            // last element resides
939             // not in the beginning of the block
940             {
941                 block_type* last_block = new block_type;
942                 bid_type last_bid;
943                 request_ptr req;
944                 unsigned_type i;
945 
946                 req = last_block->read(*last.bid());
947                 mng->new_block(FR(), last_bid);
948                 req->wait();
949 
950                 for (i = last.block_offset(); i < block_type::size; i++)
951                 {
952                     last_block->elem[i] = keyobj.max_value();
953                 }
954 
955                 req = last_block->write(last_bid);
956 
957                 n = last.bid() - first.bid() + 1;
958 
959                 std::swap(last_bid, *last.bid());
960 
961                 req->wait();
962 
963                 delete last_block;
964 
965                 run_type* out =
966                     ksort_local::ksort_blocks<
967                         block_type, alloc_strategy_type,
968                         bids_container_iterator, KeyExtractor
969                         >(first.bid(), n, M / block_type::raw_size, keyobj);
970 
971                 last_block = new block_type;
972                 block_type* sorted_last_block = new block_type;
973                 request_ptr* reqs = new request_ptr[2];
974 
975                 reqs[0] = last_block->read(last_bid);
976                 reqs[1] = sorted_last_block->read(((*out)[out->size() - 1]).bid);
977                 wait_all(reqs, 2);
978 
979                 for (i = 0; i < last.block_offset(); i++)
980                 {
981                     last_block->elem[i] = sorted_last_block->elem[i];
982                 }
983 
984                 req = last_block->write(last_bid);
985 
986                 mng->delete_block((*out)[out->size() - 1].bid);
987 
988                 *last.bid() = last_bid;
989 
990                 typename run_type::iterator it = out->begin();
991                 bids_container_iterator cur_bid = first.bid();
992 
993                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
994                 {
995                     *cur_bid = (*it).bid;
996                 }
997 
998                 delete sorted_last_block;
999                 delete[] reqs;
1000                 delete out;
1001 
1002                 req->wait();
1003 
1004                 delete last_block;
1005             }
1006             else
1007             {
1008                 // first and last element reside in the beginning of blocks
1009                 n = last.bid() - first.bid();
1010 
1011                 run_type* out =
1012                     ksort_local::ksort_blocks<
1013                         block_type, alloc_strategy_type,
1014                         bids_container_iterator, KeyExtractor
1015                         >(first.bid(), n, M / block_type::raw_size, keyobj);
1016 
1017                 typename run_type::iterator it = out->begin();
1018                 bids_container_iterator cur_bid = first.bid();
1019 
1020                 for ( ; cur_bid != last.bid(); cur_bid++, it++)
1021                 {
1022                     *cur_bid = (*it).bid;
1023                 }
1024 
1025                 delete out;
1026             }
1027         }
1028     }
1029 
1030 #if STXXL_CHECK_ORDER_IN_SORTS
1031     typedef typename ExtIterator::const_iterator const_iterator;
1032     STXXL_ASSERT(stxxl::is_sorted(const_iterator(first), const_iterator(last),
1033                                   ksort_local::key_comparison<value_type, KeyExtractor>()));
1034 #endif
1035 }
1036 
1037 template <typename RecordType>
1038 struct ksort_defaultkey
1039 {
1040     typedef typename RecordType::key_type key_type;
operatorksort_defaultkey1041     key_type operator () (const RecordType& obj) const
1042     {
1043         return obj.key();
1044     }
max_valueksort_defaultkey1045     RecordType max_value() const
1046     {
1047         return RecordType::max_value();
1048     }
min_valueksort_defaultkey1049     RecordType min_value() const
1050     {
1051         return RecordType::min_value();
1052     }
1053 };
1054 
1055 /*!
1056  * Sort records with integer keys, see \ref design_algo_ksort.
1057  *
1058  * stxxl::ksort sorts the elements in [first, last) into ascending order,
1059  * meaning that if \c i and \c j are any two valid iterators in [first, last)
1060  * such that \c i precedes \c j, then \c *j is not less than \c *i. Note: as
1061  * std::sort and stxxl::sort, stxxl::ksort is not guaranteed to be stable. That
1062  * is, suppose that \c *i and \c *j are equivalent: neither one is less than
1063  * the other. It is not guaranteed that the relative order of these two
1064  * elements will be preserved by stxxl::ksort.
1065  *
1066  * \param first object of model of \c ext_random_access_iterator concept
1067  * \param last object of model of \c ext_random_access_iterator concept
1068  * \param M amount of buffers for internal use
1069  * \remark Order in the result is non-stable
1070  */
1071 template <typename ExtIterator>
ksort(ExtIterator first,ExtIterator last,unsigned_type M)1072 void ksort(ExtIterator first, ExtIterator last, unsigned_type M)
1073 {
1074     ksort(first, last,
1075           ksort_defaultkey<typename ExtIterator::vector_type::value_type>(), M);
1076 }
1077 
1078 //! \}
1079 
1080 STXXL_END_NAMESPACE
1081 
1082 #endif // !STXXL_ALGO_KSORT_HEADER
1083 // vim: et:ts=4:sw=4
1084