1 // The Funnelsort Project - Cache oblivious sorting algorithm implementation
2 // Copyright (C) 2005 Kristoffer Vinther
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
17
18
19 #include <cassert>
20 #include <algorithm>
21 #include <utility>
22 #include <memory>
23 #include <iterator>
24 #include <stdexcept>
25
26 namespace iosort
27 {
28
29 namespace base_
30 {
31
32 enum tag_RECURSELIMIT { RECURSELIMIT = 24 };
33
34 template<class It, class Pred>
insertion_sort(It begin,It end,Pred comp)35 inline void insertion_sort(It begin, It end, Pred comp)
36 {
37 for( It j=begin+1; j<end; j++ )
38 {
39 typename std::iterator_traits<It>::value_type e = *j;
40 for( It i=j; begin<i; *i=e )
41 if( comp(e,*(i-1)) )
42 *i = *(i-1), --i;
43 else
44 break;
45 }
46 }
47
48 template<typename T, class Pred>
median(const T & a,const T & b,const T & c,Pred comp)49 inline T median(const T& a, const T& b, const T& c, Pred comp)
50 {
51 if( comp(a,b) )
52 if( comp(b,c) )
53 return b;
54 else if( comp(a,c) )
55 return c;
56 else
57 return a;
58 else
59 if( comp(a,c) )
60 return a;
61 else if( comp(b,c) )
62 return c;
63 else
64 return b;
65 }
66
67 template<class BidIt, class T, class Pred>
partition(BidIt begin,BidIt end,T pivot,Pred comp)68 inline BidIt partition(BidIt begin, BidIt end, T pivot, Pred comp)
69 {
70 for( ;; )
71 {
72 for( ; comp(*begin,pivot); ++begin );
73 for( --end; comp(pivot,*end); --end );
74 if( !(begin<end) )
75 return begin;
76 std::iter_swap(begin, end);
77 ++begin;
78 }
79 }
80
81 template<class RanIt, class Pred>
quicksort(RanIt First,RanIt Last,Pred comp)82 inline void quicksort(RanIt First, RanIt Last, Pred comp)
83 {
84 typename std::iterator_traits<RanIt>::difference_type Count = Last-First;
85 while( RECURSELIMIT < Count )
86 {
87 RanIt part = partition(First, Last, median(*First,*(First+(Last-First)/2),*(Last-1),comp),comp);
88 if( part-First < Last-part ) // loop on larger half
89 quicksort(First, part, comp), First = part;
90 else
91 quicksort(part, Last, comp), Last = part;
92 Count = Last-First;
93 }
94 if( Count > 1 )
95 insertion_sort(First, Last, comp);
96 }
97
98 template<class Int>
logc(Int k)99 inline Int logc(Int k)
100 {
101 height_t h = 0;
102 for( order_t i=k-1; i; h++, i/=2 );
103 return h;
104 }
105
106 template<class RanIt, class Pred>
batcher_sort(RanIt begin,RanIt end,Pred comp)107 void batcher_sort(RanIt begin, RanIt end, Pred comp)
108 {
109 typedef typename std::iterator_traits<RanIt>::difference_type ItDiff;
110 ItDiff t = logc(end-begin)-1;
111 ItDiff p = 1<<t;
112 for( ; p!=1; p=p/2 )
113 {
114 RanIt i, j;
115 for( i=begin; i<end-p-p; i+=p )
116 {
117 j = i+p;
118 for( ; i<j; i++ )
119 if( comp(*(i+p),*i) )
120 std::iter_swap(i+p, i);
121 }
122 for( ; i<end-p; i++ )
123 if( comp(*(i+p),*i) )
124 std::iter_swap(i+p, i);
125 ItDiff q = 1<<t;
126 while( q != p )
127 {
128 ItDiff d=q-p;
129 q = q/2;
130 for( i=begin+p; i<end-d-p; i+=p )
131 {
132 j = i+p;
133 for( ; i<j; i++ )
134 if( comp(*(i+d),*i) )
135 std::iter_swap(i+d, i);
136 }
137 for( ; i<end-d; i++ )
138 if( comp(*(i+d),*i) )
139 std::iter_swap(i+d, i);
140 }
141 }
142 if( p )
143 {
144 for( RanIt i=begin; i<end-1; i+=2 )
145 if( comp(*(i+1),*i) )
146 std::iter_swap(i+1, i);
147 ItDiff q = 1<<t;
148 while( q != 1 )
149 {
150 ItDiff d=q-1;
151 q = q/2;
152 for( RanIt i=begin+1; i<end-d; i+=2 )
153 if( comp(*(i+d),*i) )
154 std::iter_swap(i+d, i);
155 }
156 }
157 }
158
159 template<class RanIt, class Pred>
inplace_base_sort(RanIt begin,RanIt end,Pred comp)160 void inplace_base_sort(RanIt begin, RanIt end, Pred comp)
161 {
162 #ifdef __ia64__
163 batcher_sort(begin, end, comp);
164 #else // __ia64__
165 quicksort(begin, end, comp);
166 #endif // __ia64__
167 }
168
169 } // namespace base_
170
171 template<class RanIt, class Splitter>
172 class stream_slices
173 {
174 typedef typename std::iterator_traits<RanIt>::difference_type diff;
175 public:
176 class iterator : public std::iterator<std::bidirectional_iterator_tag, std::pair<RanIt,RanIt>, diff>
177 {
178 friend class stream_slices;
179 public:
180 inline iterator& operator++()
181 {
182 begin = end;
183 end = end+run_size;
184 if( --big_runs == 0 )
185 --run_size;
186 return *this;
187 }
188 inline iterator& operator--()
189 {
190 end = begin;
191 begin = end-run_size;
192 if( ++big_runs == 0 )
193 ++run_size;
194 return *this;
195 }
196 std::pair<RanIt,RanIt> operator*()
197 { return make_pair(begin,end); }
198 bool operator==(const iterator& rhs)
199 { return begin == rhs.begin; }
200 private:
iterator(RanIt begin,diff run_size,size_t big_runs)201 inline iterator(RanIt begin, diff run_size, size_t big_runs) : begin(begin), run_size(run_size), big_runs(big_runs)
202 {
203 if( big_runs )
204 ++run_size;
205 end = begin+run_size;
206 }
207 RanIt begin, end;
208 diff run_size;
209 size_t big_runs;
210 };
stream_slices(RanIt begin,RanIt end)211 stream_slices(RanIt begin, RanIt end) : b(begin), e(end), order_(Splitter::prefered_order_output(end-begin))
212 {
213 if( order_ < 2 )
214 throw std::logic_error("Splitter::prefered_order_output returned less than two");
215 run_size = (end-begin)/order_;
216 size_t rem = static_cast<size_t>((end-begin) % order_);
217 run_size += rem/order_;
218 big_runs = rem % order_;
219 }
order()220 inline diff order() const
221 { return order_; }
begin()222 inline iterator begin()
223 { return iterator(b, run_size, big_runs); }
end()224 inline iterator end()
225 { return iterator(e, run_size, big_runs); }
226 private:
227 RanIt b, e;
228 diff run_size;
229 size_t order_, big_runs;
230 };
231
232 template<class Splitter, class Diff>
run_size_(Diff d)233 inline Diff run_size_(Diff d)
234 {
235 size_t order = Splitter::prefered_order_output(d);
236 Diff run_size = d/order;
237 size_t rem = static_cast<size_t>(d % order);
238 run_size += rem/order;
239 size_t big_runs = rem % order;
240 if( big_runs )
241 run_size++;
242 return run_size;
243 }
244
245 template<class Merger, class Splitter, class Diff, class MAlloc, class TAlloc>
build_cache_(Diff run_size,MAlloc & malloc,TAlloc & talloc)246 inline std::pair<Merger*,Merger*> build_cache_(Diff run_size, MAlloc& malloc, TAlloc& talloc)
247 {
248 Merger *begin_cache, *end_cache;
249 int rec_calls = 0;
250 for( Diff d=run_size; d>static_cast<Diff>(Splitter::switch_to_cache_aware()); d=run_size_<Splitter>(d), ++rec_calls );
251 begin_cache = end_cache = malloc.allocate(rec_calls);
252 for( Diff d=run_size; d>static_cast<Diff>(Splitter::switch_to_cache_aware()); d=run_size_<Splitter>(d), ++end_cache )
253 new(end_cache) Merger(Splitter::prefered_order_output(d), talloc);
254 return make_pair(begin_cache,end_cache);
255 }
256
257 template<class Merger, class Splitter, class It, class OutIt, class Alloc>
258 void merge_sort_(It begin, It end, OutIt dest, Merger* cache, Alloc alloc);
259
260 template<class Merger, class Splitter, class MPtr, class It, class Alloc>
sort_streams_(MPtr cache,It begin,size_t order,size_t run_size,size_t big_runs,Alloc alloc)261 inline void sort_streams_(MPtr cache, It begin, size_t order, size_t run_size, size_t big_runs, Alloc alloc)
262 {
263 typedef typename std::iterator_traits<It>::value_type T;
264 typename Alloc::template rebind<T>::other talloc(alloc);
265 typedef typename Alloc::template rebind<T>::other::pointer TPtr;
266 TPtr tmp = talloc.allocate((size_t)(run_size));
267 It last = begin;
268 size_t i;
269 if( big_runs )
270 {
271 merge_sort_<Merger,Splitter>(last, last+run_size, std::raw_storage_iterator<TPtr,T>(tmp), cache, alloc);
272 for( i=1, last+=run_size; i!=big_runs; i++, last+=run_size )
273 merge_sort_<Merger,Splitter>(last, last+run_size, last-run_size, cache, alloc);
274 run_size--;
275 for( ; i!=order; i++, last+=run_size )
276 merge_sort_<Merger,Splitter>(last, last+run_size, last-run_size-1, cache, alloc);
277 run_size++;
278 }
279 else
280 {
281 merge_sort_<Merger,Splitter>(last, last+run_size, tmp, cache, alloc);
282 for( --order, last+=run_size; order; --order, last+=run_size )
283 merge_sort_<Merger,Splitter>(last, last+run_size, last-run_size, cache, alloc);
284 }
285 std::copy(tmp, tmp+run_size, last-run_size);
286 for( TPtr p=tmp+run_size-1; p>=tmp; --p )
287 talloc.destroy(p);
288 talloc.deallocate(tmp,static_cast<size_t>(run_size));
289 }
290
291 template<class MPtr, class It>
add_sorted_streams_(MPtr merger,It end,size_t order,size_t run_size,size_t big_runs)292 inline void add_sorted_streams_(MPtr merger, It end, size_t order, size_t run_size, size_t big_runs)
293 {
294 if( big_runs )
295 {
296 merger->add_stream(end-run_size, end);
297 end -= run_size;
298 --run_size, --big_runs;
299 for( --order; order!=big_runs; --order, end-=run_size )
300 merger->add_stream(end-run_size, end);
301 ++run_size;
302 for( ; order; --order, end-=run_size )
303 merger->add_stream(end-run_size, end);
304 }
305 else
306 for( ; order; --order, end-=run_size )
307 merger->add_stream(end-run_size, end);
308 }
309
310 template<class MPtr, class It, class Pred>
add_streams_(MPtr merger,It begin,size_t order,size_t run_size,size_t big_runs,Pred comp)311 inline void add_streams_(MPtr merger, It begin, size_t order, size_t run_size, size_t big_runs, Pred comp)
312 {
313 size_t i;
314 for( i=0; i!=order-big_runs; ++i, begin+=run_size )
315 base_::inplace_base_sort(begin, begin+run_size, comp);
316 ++run_size;
317 for( ; i!=order; ++i, begin+=run_size )
318 base_::inplace_base_sort(begin, begin+run_size, comp);
319
320 for( ; i!=order-big_runs; --i, begin-=run_size )
321 merger->add_stream(begin-run_size, begin);
322 --run_size;
323 for( ; i; --i, begin-=run_size )
324 merger->add_stream(begin-run_size, begin);
325 }
326
327 template<class Merger, class Splitter, class It, class OutIt, class Alloc>
merge_sort_(It begin,It end,OutIt dest,Merger * cache,Alloc alloc)328 void merge_sort_(It begin, It end, OutIt dest, Merger* cache, Alloc alloc)
329 {
330 typedef typename std::iterator_traits<It>::value_type T;
331 typedef typename std::iterator_traits<It>::difference_type ItDiff;
332 typedef typename Merger::stream Stream;
333
334 order_t order = Splitter::prefered_order_output(end-begin);
335 if( order < 2 )
336 throw std::logic_error("Splitter::prefered_order_output returned less than two");
337 ItDiff run_size = (end-begin)/order;
338 size_t rem = (size_t)((end-begin) % order);
339 run_size += rem/order;
340 size_t big_runs = rem % order;
341
342 cache->reset();
343 if( run_size > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
344 {
345 if( big_runs )
346 run_size++;
347 sort_streams_<Merger,Splitter>(cache+1,begin,order,run_size,big_runs,alloc);
348 add_sorted_streams_(cache,end,order,run_size,big_runs);
349 }
350 else
351 add_streams_(cache,begin,order,run_size,big_runs, typename Merger::predicate());
352 (*cache)(dest);
353 }
354
355 template<class Merger, class Splitter, class It, class OutIt>
merge_sort(It begin,It end,OutIt dest,const typename Merger::allocator & alloc)356 inline void merge_sort(It begin, It end, OutIt dest, const typename Merger::allocator& alloc)
357 {
358 typedef typename std::iterator_traits<It>::difference_type ItDiff;
359 if( end-begin > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
360 {
361 order_t order = Splitter::prefered_order_output(end-begin);
362 if( order < 2 )
363 throw std::logic_error("Splitter::prefered_order_output returned less than two");
364 ItDiff run_size = (end-begin)/order;
365 size_t rem = (size_t)((end-begin) % order);
366 run_size += rem/order;
367 size_t big_runs = rem % order;
368 if( run_size > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
369 {
370 if( big_runs )
371 run_size++;
372 typename Merger::allocator::template rebind<Merger>::other malloc(alloc);
373 std::pair<Merger*,Merger*> cache = build_cache_<Merger,Splitter>(run_size, malloc, alloc);
374 sort_streams_<Merger,Splitter>(cache.first, begin, order, run_size, big_runs, alloc);
375 for( Merger *pm=cache.second-1; pm>=cache.first; --pm )
376 malloc.destroy(pm);
377 malloc.deallocate(cache.first, cache.second-cache.first);
378 }
379 Merger merger(order,alloc);
380 if( run_size > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
381 add_sorted_streams_(&merger, end, order, run_size, big_runs);
382 else
383 add_streams_(&merger, begin, order, run_size, big_runs, typename Merger::predicate());
384 #ifdef _DEBUG
385 OutIt e = merger(dest, dest+(end-begin));
386 merger.empty(dest, dest+(end-begin));
387 size_t N = 0;
388 for( typename Merger::stream_iterator i=merger.begin(); i!=merger.end(); ++i )
389 N += (*i).end()-(*i).begin();
390 assert( e == dest+(end-begin) );
391 #else // _DEBUG
392 merger(dest);
393 #endif // _DEBUG
394 }
395 else
396 {
397 base_::inplace_base_sort(begin, end, typename Merger::predicate());
398 std::copy(begin, end, dest);
399 }
400 }
401 } // namespace iosort
402