1 /***************************************************************************
2 * include/stxxl/bits/algo/ksort.h
3 *
4 * Part of the STXXL. See http://stxxl.sourceforge.net
5 *
6 * Copyright (C) 2002 Roman Dementiev <dementiev@mpi-sb.mpg.de>
7 * Copyright (C) 2008-2011 Andreas Beckmann <beckmann@cs.uni-frankfurt.de>
8 * Copyright (C) 2013 Timo Bingmann <tb@panthema.net>
9 *
10 * Distributed under the Boost Software License, Version 1.0.
11 * (See accompanying file LICENSE_1_0.txt or copy at
12 * http://www.boost.org/LICENSE_1_0.txt)
13 **************************************************************************/
14
15 #ifndef STXXL_ALGO_KSORT_HEADER
16 #define STXXL_ALGO_KSORT_HEADER
17
18 #include <stxxl/bits/mng/block_manager.h>
19 #include <stxxl/bits/common/rand.h>
20 #include <stxxl/bits/mng/adaptor.h>
21 #include <stxxl/bits/common/simple_vector.h>
22 #include <stxxl/bits/common/onoff_switch.h>
23 #include <stxxl/bits/mng/block_alloc_interleaved.h>
24 #include <stxxl/bits/algo/intksort.h>
25 #include <stxxl/bits/algo/adaptor.h>
26 #include <stxxl/bits/algo/async_schedule.h>
27 #include <stxxl/bits/mng/block_prefetcher.h>
28 #include <stxxl/bits/mng/buf_writer.h>
29 #include <stxxl/bits/algo/run_cursor.h>
30 #include <stxxl/bits/algo/losertree.h>
31 #include <stxxl/bits/algo/inmemsort.h>
32 #include <stxxl/bits/algo/sort_base.h>
33 #include <stxxl/bits/common/is_sorted.h>
34 #include <stxxl/bits/common/utils.h>
35
36 //#define INTERLEAVED_ALLOC
37
38 #define OPT_MERGING
39
40 STXXL_BEGIN_NAMESPACE
41
42 //! \addtogroup stllayer
43
44 //! \defgroup stlalgo Algorithms
45 //! \ingroup stllayer
46 //! Algorithms with STL-compatible interface
47 //! \{
48
49 /*! \internal
50 */
51 namespace ksort_local {
52
53 template <typename BIDType, typename KeyType>
54 struct trigger_entry
55 {
56 typedef BIDType bid_type;
57 typedef KeyType key_type;
58
59 bid_type bid;
60 key_type key;
61
bid_typetrigger_entry62 operator bid_type ()
63 {
64 return bid;
65 }
66 };
67
68 template <typename BIDType, typename KeyType>
69 inline bool operator < (const trigger_entry<BIDType, KeyType>& a,
70 const trigger_entry<BIDType, KeyType>& b)
71 {
72 return (a.key < b.key);
73 }
74
75 template <typename BIDType, typename KeyType>
76 inline bool operator > (const trigger_entry<BIDType, KeyType>& a,
77 const trigger_entry<BIDType, KeyType>& b)
78 {
79 return (a.key > b.key);
80 }
81
82 template <typename Type, typename KeyType>
83 struct type_key
84 {
85 typedef KeyType key_type;
86 key_type key;
87 Type* ptr;
88
type_keytype_key89 type_key() { }
type_keytype_key90 type_key(key_type k, Type* p) : key(k), ptr(p)
91 { }
92 };
93
94 template <typename Type, typename KeyType>
95 bool operator < (const type_key<Type, KeyType>& a, const type_key<Type, KeyType>& b)
96 {
97 return a.key < b.key;
98 }
99
100 template <typename Type, typename KeyType>
101 bool operator > (const type_key<Type, KeyType>& a, const type_key<Type, KeyType>& b)
102 {
103 return a.key > b.key;
104 }
105
106 template <typename BlockType, typename BidType>
107 struct write_completion_handler
108 {
109 BlockType* block;
110 BidType bid;
111 request_ptr* req;
operatorwrite_completion_handler112 void operator () (request* /*completed_req*/)
113 {
114 * req = block->read(bid);
115 }
116 };
117
118 template <typename TypeKey,
119 typename BlockType,
120 typename RunType,
121 typename InputBidIterator,
122 typename KeyExtractor>
write_out(TypeKey * begin,TypeKey * end,BlockType * & cur_blk,const BlockType * end_blk,int_type & out_block,int_type & out_pos,RunType & run,write_completion_handler<BlockType,typename BlockType::bid_type> * & next_read,typename BlockType::bid_type * & bids,request_ptr * write_reqs,request_ptr * read_reqs,InputBidIterator & it,KeyExtractor keyobj)123 inline void write_out(
124 TypeKey* begin,
125 TypeKey* end,
126 BlockType*& cur_blk,
127 const BlockType* end_blk,
128 int_type& out_block,
129 int_type& out_pos,
130 RunType& run,
131 write_completion_handler<BlockType, typename BlockType::bid_type>*& next_read,
132 typename BlockType::bid_type*& bids,
133 request_ptr* write_reqs,
134 request_ptr* read_reqs,
135 InputBidIterator& it,
136 KeyExtractor keyobj)
137 {
138 typedef typename BlockType::type type;
139
140 type* elem = cur_blk->elem;
141 for (TypeKey* p = begin; p < end; p++)
142 {
143 elem[out_pos++] = *(p->ptr);
144
145 if (out_pos >= BlockType::size)
146 {
147 run[out_block].key = keyobj(*(cur_blk->elem));
148
149 if (cur_blk < end_blk)
150 {
151 next_read->block = cur_blk;
152 next_read->req = read_reqs + out_block;
153 read_reqs[out_block] = NULL;
154 bids[out_block] = next_read->bid = *(it++);
155
156 write_reqs[out_block] = cur_blk->write(
157 run[out_block].bid,
158 // postpone read of block from next run
159 // after write of block from this run
160 *(next_read++));
161 }
162 else
163 {
164 write_reqs[out_block] = cur_blk->write(run[out_block].bid);
165 }
166
167 cur_blk++;
168 elem = cur_blk->elem;
169 out_block++;
170 out_pos = 0;
171 }
172 }
173 }
174
175 template <
176 typename BlockType,
177 typename RunType,
178 typename InputBidIterator,
179 typename KeyExtractor>
180 void
create_runs(InputBidIterator it,RunType ** runs,const unsigned_type nruns,const unsigned_type m2,KeyExtractor keyobj)181 create_runs(
182 InputBidIterator it,
183 RunType** runs,
184 const unsigned_type nruns,
185 const unsigned_type m2,
186 KeyExtractor keyobj)
187 {
188 typedef typename BlockType::value_type type;
189 typedef typename BlockType::bid_type bid_type;
190 typedef typename KeyExtractor::key_type key_type;
191 typedef type_key<type, key_type> type_key_;
192
193 block_manager* bm = block_manager::get_instance();
194 BlockType* Blocks1 = new BlockType[m2];
195 BlockType* Blocks2 = new BlockType[m2];
196 bid_type* bids = new bid_type[m2];
197 type_key_* refs1 = new type_key_[m2 * Blocks1->size];
198 type_key_* refs2 = new type_key_[m2 * Blocks1->size];
199 request_ptr* read_reqs = new request_ptr[m2];
200 request_ptr* write_reqs = new request_ptr[m2];
201 write_completion_handler<BlockType, bid_type>* next_run_reads =
202 new write_completion_handler<BlockType, bid_type>[m2];
203
204 RunType* run;
205 run = *runs;
206 int_type run_size = (*runs)->size();
207 key_type offset = 0;
208 const int log_k1 = ilog2_ceil((m2 * BlockType::size * sizeof(type_key_) / STXXL_L2_SIZE) ?
209 (m2 * BlockType::size * sizeof(type_key_) / STXXL_L2_SIZE) : 2);
210 const int log_k2 = ilog2_floor(m2 * Blocks1->size) - log_k1 - 1;
211 STXXL_VERBOSE("log_k1: " << log_k1 << " log_k2:" << log_k2);
212 const int_type k1 = int_type(1) << log_k1;
213 const int_type k2 = int_type(1) << log_k2;
214 int_type* bucket1 = new int_type[k1];
215 int_type* bucket2 = new int_type[k2];
216 int_type i;
217
218 disk_queues::get_instance()->set_priority_op(request_queue::WRITE);
219
220 for (i = 0; i < run_size; i++)
221 {
222 bids[i] = *(it++);
223 read_reqs[i] = Blocks1[i].read(bids[i]);
224 }
225
226 unsigned_type k = 0;
227 const int shift1 = (int)(sizeof(key_type) * 8 - log_k1);
228 const int shift2 = shift1 - log_k2;
229 STXXL_VERBOSE("shift1: " << shift1 << " shift2:" << shift2);
230
231 for ( ; k < nruns; k++)
232 {
233 run = runs[k];
234 run_size = run->size();
235
236 std::fill(bucket1, bucket1 + k1, 0);
237
238 type_key_* ref_ptr = refs1;
239 for (i = 0; i < run_size; i++)
240 {
241 if (k)
242 write_reqs[i]->wait();
243
244 read_reqs[i]->wait();
245 bm->delete_block(bids[i]);
246
247 classify_block(Blocks1[i].begin(), Blocks1[i].end(), ref_ptr, bucket1, offset, shift1, keyobj);
248 }
249
250 exclusive_prefix_sum(bucket1, k1);
251 classify(refs1, refs1 + run_size * Blocks1->size, refs2, bucket1,
252 offset, shift1);
253
254 int_type out_block = 0;
255 int_type out_pos = 0;
256 unsigned_type next_run_size = (k < nruns - 1) ? (runs[k + 1]->size()) : 0;
257
258 // recurse on each bucket
259 type_key_* c = refs2;
260 type_key_* d = refs1;
261 BlockType* cur_blk = Blocks2;
262 BlockType* end_blk = Blocks2 + next_run_size;
263 write_completion_handler<BlockType, bid_type>* next_read = next_run_reads;
264
265 for (i = 0; i < k1; i++)
266 {
267 type_key_* cEnd = refs2 + bucket1[i];
268 type_key_* dEnd = refs1 + bucket1[i];
269
270 l1sort(c, cEnd, d, bucket2, k2,
271 offset + (key_type(1) << key_type(shift1)) * key_type(i), shift2); // key_type,key_type,... paranoia
272
273 write_out(
274 d, dEnd, cur_blk, end_blk,
275 out_block, out_pos, *run, next_read, bids,
276 write_reqs, read_reqs, it, keyobj);
277
278 c = cEnd;
279 d = dEnd;
280 }
281
282 std::swap(Blocks1, Blocks2);
283 }
284
285 wait_all(write_reqs, m2);
286
287 delete[] bucket1;
288 delete[] bucket2;
289 delete[] refs1;
290 delete[] refs2;
291 delete[] Blocks1;
292 delete[] Blocks2;
293 delete[] bids;
294 delete[] next_run_reads;
295 delete[] read_reqs;
296 delete[] write_reqs;
297 }
298
299 template <typename BlockType,
300 typename prefetcher_type,
301 typename KeyExtractor>
302 struct run_cursor2_cmp : public std::binary_function<
303 run_cursor2<BlockType, prefetcher_type>,
304 run_cursor2<BlockType, prefetcher_type>,
305 bool
306 >
307 {
308 typedef run_cursor2<BlockType, prefetcher_type> cursor_type;
309 KeyExtractor keyobj;
run_cursor2_cmprun_cursor2_cmp310 run_cursor2_cmp(KeyExtractor _keyobj)
311 : keyobj(_keyobj)
312 { }
operatorrun_cursor2_cmp313 inline bool operator () (const cursor_type& a, const cursor_type& b) const
314 {
315 if (UNLIKELY(b.empty()))
316 return true;
317 // sentinel emulation
318 if (UNLIKELY(a.empty()))
319 return false;
320 //sentinel emulation
321
322 return (keyobj(a.current()) < keyobj(b.current()));
323 }
324
325 private:
run_cursor2_cmprun_cursor2_cmp326 run_cursor2_cmp() { }
327 };
328
329 template <typename RecordType, typename KeyExtractor>
330 class key_comparison : public std::binary_function<RecordType, RecordType, bool>
331 {
332 KeyExtractor ke;
333
334 public:
key_comparison()335 key_comparison() { }
key_comparison(KeyExtractor ke_)336 key_comparison(KeyExtractor ke_) : ke(ke_) { }
operator()337 bool operator () (const RecordType& a, const RecordType& b) const
338 {
339 return ke(a) < ke(b);
340 }
341 };
342
343 template <typename BlockType, typename RunType, typename KeyExtractor>
check_ksorted_runs(RunType ** runs,unsigned_type nruns,unsigned_type m,KeyExtractor keyext)344 bool check_ksorted_runs(RunType** runs,
345 unsigned_type nruns,
346 unsigned_type m,
347 KeyExtractor keyext)
348 {
349 typedef BlockType block_type;
350 typedef typename BlockType::value_type value_type;
351
352 STXXL_MSG("check_ksorted_runs Runs: " << nruns);
353 unsigned_type irun = 0;
354 for (irun = 0; irun < nruns; ++irun)
355 {
356 const unsigned_type nblocks_per_run = runs[irun]->size();
357 unsigned_type blocks_left = nblocks_per_run;
358 block_type* blocks = new block_type[m];
359 request_ptr* reqs = new request_ptr[m];
360 value_type last = keyext.min_value();
361
362 for (unsigned_type off = 0; off < nblocks_per_run; off += m)
363 {
364 const unsigned_type nblocks = STXXL_MIN(blocks_left, m);
365 const unsigned_type nelements = nblocks * block_type::size;
366 blocks_left -= nblocks;
367
368 for (unsigned_type j = 0; j < nblocks; ++j)
369 {
370 reqs[j] = blocks[j].read((*runs[irun])[off + j].bid);
371 }
372 wait_all(reqs, reqs + nblocks);
373
374 if (off && (keyext(blocks[0][0]) < keyext(last)))
375 {
376 STXXL_MSG("check_sorted_runs wrong first value in the run " << irun);
377 STXXL_MSG(" first value: " << blocks[0][0] << " with key" << keyext(blocks[0][0]));
378 STXXL_MSG(" last value: " << last << " with key" << keyext(last));
379 for (unsigned_type k = 0; k < block_type::size; ++k)
380 STXXL_MSG("Element " << k << " in the block is :" << blocks[0][k] << " key: " << keyext(blocks[0][k]));
381
382 delete[] reqs;
383 delete[] blocks;
384 return false;
385 }
386
387 for (unsigned_type j = 0; j < nblocks; ++j)
388 {
389 if (keyext(blocks[j][0]) != (*runs[irun])[off + j].key)
390 {
391 STXXL_MSG("check_sorted_runs wrong trigger in the run " << irun << " block " << (off + j));
392 STXXL_MSG(" trigger value: " << (*runs[irun])[off + j].key);
393 STXXL_MSG("Data in the block:");
394 for (unsigned_type k = 0; k < block_type::size; ++k)
395 STXXL_MSG("Element " << k << " in the block is :" << blocks[j][k] << " with key: " << keyext(blocks[j][k]));
396
397 STXXL_MSG("BIDS:");
398 for (unsigned_type k = 0; k < nblocks; ++k)
399 {
400 if (k == j)
401 STXXL_MSG("Bad one comes next.");
402 STXXL_MSG("BID " << (off + k) << " is: " << ((*runs[irun])[off + k].bid));
403 }
404
405 delete[] reqs;
406 delete[] blocks;
407 return false;
408 }
409 }
410 if (!stxxl::is_sorted(make_element_iterator(blocks, 0),
411 make_element_iterator(blocks, nelements),
412 key_comparison<value_type, KeyExtractor>()))
413 {
414 STXXL_MSG("check_sorted_runs wrong order in the run " << irun);
415 STXXL_MSG("Data in blocks:");
416 for (unsigned_type j = 0; j < nblocks; ++j)
417 {
418 for (unsigned_type k = 0; k < block_type::size; ++k)
419 STXXL_MSG(" Element " << k << " in block " << (off + j) << " is :" << blocks[j][k] << " with key: " << keyext(blocks[j][k]));
420 }
421 STXXL_MSG("BIDS:");
422 for (unsigned_type k = 0; k < nblocks; ++k)
423 {
424 STXXL_MSG("BID " << (k + off) << " is: " << ((*runs[irun])[k + off].bid));
425 }
426
427 delete[] reqs;
428 delete[] blocks;
429 return false;
430 }
431 last = blocks[nblocks - 1][block_type::size - 1];
432 }
433
434 assert(blocks_left == 0);
435 delete[] reqs;
436 delete[] blocks;
437 }
438
439 return true;
440 }
441
442 template <typename BlockType, typename RunType, typename KeyExtractor>
merge_runs(RunType ** in_runs,unsigned_type nruns,RunType * out_run,unsigned_type _m,KeyExtractor keyobj)443 void merge_runs(RunType** in_runs, unsigned_type nruns, RunType* out_run, unsigned_type _m, KeyExtractor keyobj)
444 {
445 typedef BlockType block_type;
446 typedef block_prefetcher<BlockType, typename RunType::iterator> prefetcher_type;
447 typedef run_cursor2<BlockType, prefetcher_type> run_cursor_type;
448
449 unsigned_type i;
450 RunType consume_seq(out_run->size());
451
452 int_type* prefetch_seq = new int_type[out_run->size()];
453
454 typename RunType::iterator copy_start = consume_seq.begin();
455 for (i = 0; i < nruns; i++)
456 {
457 // TODO: try to avoid copy
458 copy_start = std::copy(
459 in_runs[i]->begin(),
460 in_runs[i]->end(),
461 copy_start);
462 }
463 std::stable_sort(consume_seq.begin(), consume_seq.end() _STXXL_SORT_TRIGGER_FORCE_SEQUENTIAL);
464
465 size_t disks_number = config::get_instance()->disks_number();
466
467 #ifdef PLAY_WITH_OPT_PREF
468 const int_type n_write_buffers = 4 * disks_number;
469 #else
470 const int_type n_prefetch_buffers = STXXL_MAX(int_type(2 * disks_number), (3 * (int_type(_m) - int_type(nruns)) / 4));
471 STXXL_VERBOSE("Prefetch buffers " << n_prefetch_buffers);
472 const int_type n_write_buffers = STXXL_MAX(int_type(2 * disks_number), int_type(_m) - int_type(nruns) - int_type(n_prefetch_buffers));
473 STXXL_VERBOSE("Write buffers " << n_write_buffers);
474 // heuristic
475 const int_type n_opt_prefetch_buffers = 2 * int_type(disks_number) + (3 * (int_type(n_prefetch_buffers) - int_type(2 * disks_number))) / 10;
476 STXXL_VERBOSE("Prefetch buffers " << n_opt_prefetch_buffers);
477 #endif
478
479 #if STXXL_SORT_OPTIMAL_PREFETCHING
480 compute_prefetch_schedule(
481 consume_seq,
482 prefetch_seq,
483 n_opt_prefetch_buffers,
484 config::get_instance()->get_max_device_id());
485 #else
486 for (i = 0; i < out_run->size(); i++)
487 prefetch_seq[i] = i;
488
489 #endif
490
491 prefetcher_type prefetcher(consume_seq.begin(),
492 consume_seq.end(),
493 prefetch_seq,
494 nruns + n_prefetch_buffers);
495
496 buffered_writer<block_type> writer(n_write_buffers, n_write_buffers / 2);
497
498 unsigned_type out_run_size = out_run->size();
499
500 run_cursor2_cmp<block_type, prefetcher_type, KeyExtractor> cmp(keyobj);
501 loser_tree<
502 run_cursor_type,
503 run_cursor2_cmp<block_type, prefetcher_type, KeyExtractor> >
504 losers(&prefetcher, nruns, cmp);
505
506 block_type* out_buffer = writer.get_free_block();
507
508 for (i = 0; i < out_run_size; i++)
509 {
510 losers.multi_merge(out_buffer->elem, out_buffer->elem + block_type::size);
511 (*out_run)[i].key = keyobj(out_buffer->elem[0]);
512 out_buffer = writer.write(out_buffer, (*out_run)[i].bid);
513 }
514
515 delete[] prefetch_seq;
516
517 block_manager* bm = block_manager::get_instance();
518 for (i = 0; i < nruns; i++)
519 {
520 unsigned_type sz = in_runs[i]->size();
521 for (unsigned_type j = 0; j < sz; j++)
522 bm->delete_block((*in_runs[i])[j].bid);
523
524 delete in_runs[i];
525 }
526 }
527
528 template <typename BlockType,
529 typename AllocStrategy,
530 typename InputBidIterator,
531 typename KeyExtractor>
532 simple_vector<
533 trigger_entry<typename BlockType::bid_type, typename KeyExtractor::key_type>
534 >*
ksort_blocks(InputBidIterator input_bids,unsigned_type _n,unsigned_type _m,KeyExtractor keyobj)535 ksort_blocks(InputBidIterator input_bids, unsigned_type _n,
536 unsigned_type _m, KeyExtractor keyobj)
537 {
538 typedef BlockType block_type;
539 typedef typename BlockType::value_type type;
540 typedef typename KeyExtractor::key_type key_type;
541 typedef typename BlockType::bid_type bid_type;
542 typedef trigger_entry<bid_type, typename KeyExtractor::key_type> trigger_entry_type;
543 typedef simple_vector<trigger_entry_type> run_type;
544 typedef typename interleaved_alloc_traits<AllocStrategy>::strategy interleaved_alloc_strategy;
545
546 unsigned_type m2 = div_ceil(_m, 2);
547 const unsigned_type m2_rf = m2 * block_type::raw_size /
548 (block_type::raw_size + block_type::size * sizeof(type_key<type, key_type>));
549 STXXL_VERBOSE("Reducing number of blocks in a run from " << m2 << " to " <<
550 m2_rf << " due to key size: " << sizeof(typename KeyExtractor::key_type) << " bytes");
551 m2 = m2_rf;
552 unsigned_type full_runs = _n / m2;
553 unsigned_type partial_runs = ((_n % m2) ? 1 : 0);
554 unsigned_type nruns = full_runs + partial_runs;
555 unsigned_type i;
556
557 block_manager* mng = block_manager::get_instance();
558
559 STXXL_VERBOSE("n=" << _n << " nruns=" << nruns << "=" << full_runs << "+" << partial_runs);
560
561 double begin = timestamp(), after_runs_creation, end;
562
563 run_type** runs = new run_type*[nruns];
564
565 for (i = 0; i < full_runs; i++)
566 runs[i] = new run_type(m2);
567
568 #ifdef INTERLEAVED_ALLOC
569 if (partial_runs)
570 {
571 unsigned_type last_run_size = _n - full_runs * m2;
572 runs[i] = new run_type(last_run_size);
573
574 mng->new_blocks(interleaved_alloc_strategy(nruns, AllocStrategy()),
575 runs2bid_array_adaptor2<block_type::raw_size, run_type>
576 (runs, 0, nruns, last_run_size),
577 runs2bid_array_adaptor2<block_type::raw_size, run_type>
578 (runs, _n, nruns, last_run_size));
579 }
580 else
581 mng->new_blocks(interleaved_alloc_strategy(nruns, AllocStrategy()),
582 runs2bid_array_adaptor<block_type::raw_size, run_type>
583 (runs, 0, nruns),
584 runs2bid_array_adaptor<block_type::raw_size, run_type>
585 (runs, _n, nruns));
586
587 #else
588 if (partial_runs)
589 runs[i] = new run_type(_n - full_runs * m2);
590
591 for (i = 0; i < nruns; i++)
592 {
593 mng->new_blocks(AllocStrategy(), make_bid_iterator(runs[i]->begin()), make_bid_iterator(runs[i]->end()));
594 }
595 #endif
596
597 create_runs<block_type, run_type, InputBidIterator, KeyExtractor>(
598 input_bids, runs, nruns, m2, keyobj);
599
600 after_runs_creation = timestamp();
601
602 double io_wait_after_rf = stats::get_instance()->get_io_wait_time();
603
604 disk_queues::get_instance()->set_priority_op(request_queue::WRITE);
605
606 const int_type merge_factor = optimal_merge_factor(nruns, _m);
607 run_type** new_runs;
608
609 while (nruns > 1)
610 {
611 int_type new_nruns = div_ceil(nruns, merge_factor);
612 STXXL_VERBOSE("Starting new merge phase: nruns: " << nruns <<
613 " opt_merge_factor: " << merge_factor << " m:" << _m << " new_nruns: " << new_nruns);
614
615 new_runs = new run_type*[new_nruns];
616
617 int_type runs_left = nruns;
618 int_type cur_out_run = 0;
619 int_type blocks_in_new_run = 0;
620
621 while (runs_left > 0)
622 {
623 int_type runs2merge = STXXL_MIN(runs_left, merge_factor);
624 blocks_in_new_run = 0;
625 for (unsigned_type i = nruns - runs_left; i < (nruns - runs_left + runs2merge); i++)
626 blocks_in_new_run += runs[i]->size();
627
628 // allocate run
629 new_runs[cur_out_run++] = new run_type(blocks_in_new_run);
630 runs_left -= runs2merge;
631 }
632 // allocate blocks in the new runs
633 if (cur_out_run == 1 && blocks_in_new_run == int_type(_n) && !input_bids->is_managed())
634 {
635 // if we sort a file we can reuse the input bids for the output
636 InputBidIterator cur = input_bids;
637 for (int_type i = 0; cur != (input_bids + _n); ++cur)
638 {
639 (*new_runs[0])[i++].bid = *cur;
640 }
641
642 bid_type& firstBID = (*new_runs[0])[0].bid;
643 if (firstBID.is_managed())
644 {
645 // the first block does not belong to the file
646 // need to reallocate it
647 mng->new_block(FR(), firstBID);
648 }
649 bid_type& lastBID = (*new_runs[0])[_n - 1].bid;
650 if (lastBID.is_managed())
651 {
652 // the first block does not belong to the file
653 // need to reallocate it
654 mng->new_block(FR(), lastBID);
655 }
656 }
657 else
658 {
659 mng->new_blocks(interleaved_alloc_strategy(new_nruns, AllocStrategy()),
660 runs2bid_array_adaptor2<block_type::raw_size, run_type>(new_runs, 0, new_nruns, blocks_in_new_run),
661 runs2bid_array_adaptor2<block_type::raw_size, run_type>(new_runs, _n, new_nruns, blocks_in_new_run));
662 }
663
664 // merge all
665 runs_left = nruns;
666 cur_out_run = 0;
667 while (runs_left > 0)
668 {
669 int_type runs2merge = STXXL_MIN(runs_left, merge_factor);
670 #if STXXL_CHECK_ORDER_IN_SORTS
671 assert((check_ksorted_runs<block_type, run_type, KeyExtractor>(runs + nruns - runs_left, runs2merge, m2, keyobj)));
672 #endif
673 STXXL_VERBOSE("Merging " << runs2merge << " runs");
674 merge_runs<block_type, run_type, KeyExtractor>(runs + nruns - runs_left,
675 runs2merge, *(new_runs + (cur_out_run++)), _m, keyobj);
676 runs_left -= runs2merge;
677 }
678
679 nruns = new_nruns;
680 delete[] runs;
681 runs = new_runs;
682 }
683
684 run_type* result = *runs;
685 delete[] runs;
686
687 end = timestamp();
688
689 STXXL_VERBOSE("Elapsed time : " << end - begin << " s. Run creation time: " <<
690 after_runs_creation - begin << " s");
691 STXXL_VERBOSE("Time in I/O wait(rf): " << io_wait_after_rf << " s");
692 STXXL_VERBOSE(*stats::get_instance());
693
694 return result;
695 }
696
697 } // namespace ksort_local
698
699 /*!
700 * Sort records with integer keys, see \ref design_algo_ksort.
701 *
702 * stxxl::ksort sorts the elements in [first, last) into ascending order,
703 * meaning that if \c i and \c j are any two valid iterators in [first, last)
704 * such that \c i precedes \c j, then \c *j is not less than \c *i. Note: as
705 * std::sort and stxxl::sort, stxxl::ksort is not guaranteed to be stable. That
706 * is, suppose that \c *i and \c *j are equivalent: neither one is less than
707 * the other. It is not guaranteed that the relative order of these two
708 * elements will be preserved by stxxl::ksort.
709 *
710 * The two versions of stxxl::ksort differ in how they define whether one
711 * element is less than another. The first version assumes that the elements
712 * have \c key() member function that returns an integral key (32 or 64 bit),
713 * as well as the minimum and the maximum element values. The second version
714 * compares objects extracting the keys using \c keyobj object, that is in turn
715 * provides min and max element values.
716 *
717 * The sorter's internal memory consumption is bounded by \c M bytes.
718 *
719 * \param first object of model of \c ext_random_access_iterator concept
720 * \param last object of model of \c ext_random_access_iterator concept
721 * \param keyobj \link design_algo_ksort_key_extractor key extractor \endlink object
722 * \param M amount of memory for internal use (in bytes)
723 */
724 template <typename ExtIterator, typename KeyExtractor>
ksort(ExtIterator first,ExtIterator last,KeyExtractor keyobj,unsigned_type M)725 void ksort(ExtIterator first, ExtIterator last, KeyExtractor keyobj, unsigned_type M)
726 {
727 typedef simple_vector<
728 ksort_local::trigger_entry<
729 typename ExtIterator::bid_type, typename KeyExtractor::key_type
730 >
731 > run_type;
732 typedef typename ExtIterator::vector_type::value_type value_type;
733 typedef typename ExtIterator::bid_type bid_type;
734 typedef typename ExtIterator::block_type block_type;
735 typedef typename ExtIterator::vector_type::alloc_strategy_type alloc_strategy_type;
736 typedef typename ExtIterator::bids_container_iterator bids_container_iterator;
737
738 unsigned_type n = 0;
739 block_manager* mng = block_manager::get_instance();
740
741 first.flush();
742
743 if ((last - first) * sizeof(value_type) < M)
744 {
745 stl_in_memory_sort(first, last,
746 ksort_local::key_comparison<value_type, KeyExtractor>(keyobj));
747 }
748 else
749 {
750 assert(2 * block_type::raw_size <= M);
751
752 if (first.block_offset())
753 {
754 if (last.block_offset()) // first and last element reside
755 // not in the beginning of the block
756 {
757 block_type* first_block = new block_type;
758 block_type* last_block = new block_type;
759 bid_type first_bid, last_bid;
760 request_ptr req;
761
762 req = first_block->read(*first.bid());
763 mng->new_block(FR(), first_bid); // try to overlap
764 mng->new_block(FR(), last_bid);
765 req->wait();
766
767 req = last_block->read(*last.bid());
768
769 unsigned_type i = 0;
770 for ( ; i < first.block_offset(); i++)
771 {
772 first_block->elem[i] = keyobj.min_value();
773 }
774
775 req->wait();
776
777 req = first_block->write(first_bid);
778 for (i = last.block_offset(); i < block_type::size; i++)
779 {
780 last_block->elem[i] = keyobj.max_value();
781 }
782
783 req->wait();
784
785 req = last_block->write(last_bid);
786
787 n = last.bid() - first.bid() + 1;
788
789 std::swap(first_bid, *first.bid());
790 std::swap(last_bid, *last.bid());
791
792 req->wait();
793
794 delete first_block;
795 delete last_block;
796
797 run_type* out =
798 ksort_local::ksort_blocks<
799 block_type, alloc_strategy_type,
800 bids_container_iterator, KeyExtractor
801 >(first.bid(), n, M / block_type::raw_size, keyobj);
802
803 first_block = new block_type;
804 last_block = new block_type;
805 block_type* sorted_first_block = new block_type;
806 block_type* sorted_last_block = new block_type;
807 request_ptr* reqs = new request_ptr[2];
808
809 reqs[0] = first_block->read(first_bid);
810 reqs[1] = sorted_first_block->read((*(out->begin())).bid);
811 wait_all(reqs, 2);
812
813 reqs[0] = last_block->read(last_bid);
814 reqs[1] = sorted_last_block->read(((*out)[out->size() - 1]).bid);
815
816 for (i = first.block_offset(); i < block_type::size; i++)
817 {
818 first_block->elem[i] = sorted_first_block->elem[i];
819 }
820 wait_all(reqs, 2);
821
822 req = first_block->write(first_bid);
823
824 for (i = 0; i < last.block_offset(); i++)
825 {
826 last_block->elem[i] = sorted_last_block->elem[i];
827 }
828
829 req->wait();
830
831 req = last_block->write(last_bid);
832
833 mng->delete_block(out->begin()->bid);
834 mng->delete_block((*out)[out->size() - 1].bid);
835
836 *first.bid() = first_bid;
837 *last.bid() = last_bid;
838
839 typename run_type::iterator it = out->begin();
840 it++;
841 bids_container_iterator cur_bid = first.bid();
842 cur_bid++;
843
844 for ( ; cur_bid != last.bid(); cur_bid++, it++)
845 {
846 *cur_bid = (*it).bid;
847 }
848
849 delete first_block;
850 delete sorted_first_block;
851 delete sorted_last_block;
852 delete[] reqs;
853 delete out;
854
855 req->wait();
856
857 delete last_block;
858 }
859 else
860 {
861 // first element resides
862 // not in the beginning of the block
863
864 block_type* first_block = new block_type;
865 bid_type first_bid;
866 request_ptr req;
867
868 req = first_block->read(*first.bid());
869 mng->new_block(FR(), first_bid); // try to overlap
870 req->wait();
871
872 unsigned_type i = 0;
873 for ( ; i < first.block_offset(); i++)
874 {
875 first_block->elem[i] = keyobj.min_value();
876 }
877
878 req = first_block->write(first_bid);
879
880 n = last.bid() - first.bid();
881
882 std::swap(first_bid, *first.bid());
883
884 req->wait();
885
886 delete first_block;
887
888 run_type* out =
889 ksort_local::ksort_blocks<
890 block_type, alloc_strategy_type,
891 bids_container_iterator, KeyExtractor
892 >(first.bid(), n, M / block_type::raw_size, keyobj);
893
894 first_block = new block_type;
895
896 block_type* sorted_first_block = new block_type;
897
898 request_ptr* reqs = new request_ptr[2];
899
900 reqs[0] = first_block->read(first_bid);
901 reqs[1] = sorted_first_block->read((*(out->begin())).bid);
902 wait_all(reqs, 2);
903
904 for (i = first.block_offset(); i < block_type::size; i++)
905 {
906 first_block->elem[i] = sorted_first_block->elem[i];
907 }
908
909 req = first_block->write(first_bid);
910
911 mng->delete_block(out->begin()->bid);
912
913 *first.bid() = first_bid;
914
915 typename run_type::iterator it = out->begin();
916 it++;
917 bids_container_iterator cur_bid = first.bid();
918 cur_bid++;
919
920 for ( ; cur_bid != last.bid(); cur_bid++, it++)
921 {
922 *cur_bid = (*it).bid;
923 }
924
925 *cur_bid = (*it).bid;
926
927 delete sorted_first_block;
928 delete[] reqs;
929 delete out;
930
931 req->wait();
932
933 delete first_block;
934 }
935 }
936 else
937 {
938 if (last.block_offset()) // last element resides
939 // not in the beginning of the block
940 {
941 block_type* last_block = new block_type;
942 bid_type last_bid;
943 request_ptr req;
944 unsigned_type i;
945
946 req = last_block->read(*last.bid());
947 mng->new_block(FR(), last_bid);
948 req->wait();
949
950 for (i = last.block_offset(); i < block_type::size; i++)
951 {
952 last_block->elem[i] = keyobj.max_value();
953 }
954
955 req = last_block->write(last_bid);
956
957 n = last.bid() - first.bid() + 1;
958
959 std::swap(last_bid, *last.bid());
960
961 req->wait();
962
963 delete last_block;
964
965 run_type* out =
966 ksort_local::ksort_blocks<
967 block_type, alloc_strategy_type,
968 bids_container_iterator, KeyExtractor
969 >(first.bid(), n, M / block_type::raw_size, keyobj);
970
971 last_block = new block_type;
972 block_type* sorted_last_block = new block_type;
973 request_ptr* reqs = new request_ptr[2];
974
975 reqs[0] = last_block->read(last_bid);
976 reqs[1] = sorted_last_block->read(((*out)[out->size() - 1]).bid);
977 wait_all(reqs, 2);
978
979 for (i = 0; i < last.block_offset(); i++)
980 {
981 last_block->elem[i] = sorted_last_block->elem[i];
982 }
983
984 req = last_block->write(last_bid);
985
986 mng->delete_block((*out)[out->size() - 1].bid);
987
988 *last.bid() = last_bid;
989
990 typename run_type::iterator it = out->begin();
991 bids_container_iterator cur_bid = first.bid();
992
993 for ( ; cur_bid != last.bid(); cur_bid++, it++)
994 {
995 *cur_bid = (*it).bid;
996 }
997
998 delete sorted_last_block;
999 delete[] reqs;
1000 delete out;
1001
1002 req->wait();
1003
1004 delete last_block;
1005 }
1006 else
1007 {
1008 // first and last element reside in the beginning of blocks
1009 n = last.bid() - first.bid();
1010
1011 run_type* out =
1012 ksort_local::ksort_blocks<
1013 block_type, alloc_strategy_type,
1014 bids_container_iterator, KeyExtractor
1015 >(first.bid(), n, M / block_type::raw_size, keyobj);
1016
1017 typename run_type::iterator it = out->begin();
1018 bids_container_iterator cur_bid = first.bid();
1019
1020 for ( ; cur_bid != last.bid(); cur_bid++, it++)
1021 {
1022 *cur_bid = (*it).bid;
1023 }
1024
1025 delete out;
1026 }
1027 }
1028 }
1029
1030 #if STXXL_CHECK_ORDER_IN_SORTS
1031 typedef typename ExtIterator::const_iterator const_iterator;
1032 STXXL_ASSERT(stxxl::is_sorted(const_iterator(first), const_iterator(last),
1033 ksort_local::key_comparison<value_type, KeyExtractor>()));
1034 #endif
1035 }
1036
1037 template <typename RecordType>
1038 struct ksort_defaultkey
1039 {
1040 typedef typename RecordType::key_type key_type;
operatorksort_defaultkey1041 key_type operator () (const RecordType& obj) const
1042 {
1043 return obj.key();
1044 }
max_valueksort_defaultkey1045 RecordType max_value() const
1046 {
1047 return RecordType::max_value();
1048 }
min_valueksort_defaultkey1049 RecordType min_value() const
1050 {
1051 return RecordType::min_value();
1052 }
1053 };
1054
1055 /*!
1056 * Sort records with integer keys, see \ref design_algo_ksort.
1057 *
1058 * stxxl::ksort sorts the elements in [first, last) into ascending order,
1059 * meaning that if \c i and \c j are any two valid iterators in [first, last)
1060 * such that \c i precedes \c j, then \c *j is not less than \c *i. Note: as
1061 * std::sort and stxxl::sort, stxxl::ksort is not guaranteed to be stable. That
1062 * is, suppose that \c *i and \c *j are equivalent: neither one is less than
1063 * the other. It is not guaranteed that the relative order of these two
1064 * elements will be preserved by stxxl::ksort.
1065 *
1066 * \param first object of model of \c ext_random_access_iterator concept
1067 * \param last object of model of \c ext_random_access_iterator concept
1068 * \param M amount of buffers for internal use
1069 * \remark Order in the result is non-stable
1070 */
1071 template <typename ExtIterator>
ksort(ExtIterator first,ExtIterator last,unsigned_type M)1072 void ksort(ExtIterator first, ExtIterator last, unsigned_type M)
1073 {
1074 ksort(first, last,
1075 ksort_defaultkey<typename ExtIterator::vector_type::value_type>(), M);
1076 }
1077
1078 //! \}
1079
1080 STXXL_END_NAMESPACE
1081
1082 #endif // !STXXL_ALGO_KSORT_HEADER
1083 // vim: et:ts=4:sw=4
1084