1 // The Funnelsort Project  -  Cache oblivious sorting algorithm implementation
2 // Copyright (C) 2005  Kristoffer Vinther
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation; either version 2 of the License, or
7 // (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with this program; if not, write to the Free Software
16 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
17 
18 
19 #include <cassert>
20 #include <algorithm>
21 #include <utility>
22 #include <memory>
23 #include <iterator>
24 #include <stdexcept>
25 
26 namespace iosort
27 {
28 
29 namespace base_
30 {
31 
32 enum tag_RECURSELIMIT { RECURSELIMIT = 24 };
33 
34 template<class It, class Pred>
insertion_sort(It begin,It end,Pred comp)35 inline void insertion_sort(It begin, It end, Pred comp)
36 {
37 	for( It j=begin+1; j<end; j++ )
38 	{
39 		typename std::iterator_traits<It>::value_type e = *j;
40 		for( It i=j; begin<i; *i=e )
41 			if( comp(e,*(i-1)) )
42 				*i = *(i-1), --i;
43 			else
44 				break;
45 	}
46 }
47 
48 template<typename T, class Pred>
median(const T & a,const T & b,const T & c,Pred comp)49 inline T median(const T& a, const T& b, const T& c, Pred comp)
50 {
51 	if( comp(a,b) )
52 		if( comp(b,c) )
53 			return b;
54 		else if( comp(a,c) )
55 			return c;
56 		else
57 			return a;
58 	else
59 		if( comp(a,c) )
60 			return a;
61 		else if( comp(b,c) )
62 			return c;
63 		else
64 			return b;
65 }
66 
67 template<class BidIt, class T, class Pred>
partition(BidIt begin,BidIt end,T pivot,Pred comp)68 inline BidIt partition(BidIt begin, BidIt end, T pivot, Pred comp)
69 {
70 	for( ;; )
71 	{
72 		for( ; comp(*begin,pivot); ++begin );
73 		for( --end; comp(pivot,*end); --end );
74 		if( !(begin<end) )
75 			return begin;
76 		std::iter_swap(begin, end);
77 		++begin;
78 	}
79 }
80 
81 template<class RanIt, class Pred>
quicksort(RanIt First,RanIt Last,Pred comp)82 inline void quicksort(RanIt First, RanIt Last, Pred comp)
83 {
84 	typename std::iterator_traits<RanIt>::difference_type Count = Last-First;
85 	while( RECURSELIMIT < Count )
86 	{
87 		RanIt part = partition(First, Last, median(*First,*(First+(Last-First)/2),*(Last-1),comp),comp);
88 		if( part-First < Last-part ) // loop on larger half
89 			quicksort(First, part, comp), First = part;
90 		else
91 			quicksort(part, Last, comp), Last = part;
92 		Count = Last-First;
93 	}
94 	if( Count > 1 )
95         insertion_sort(First, Last, comp);
96 }
97 
98 template<class Int>
logc(Int k)99 inline Int logc(Int k)
100 {
101 	height_t h = 0;
102 	for( order_t i=k-1; i; h++, i/=2 );
103 	return h;
104 }
105 
106 template<class RanIt, class Pred>
batcher_sort(RanIt begin,RanIt end,Pred comp)107 void batcher_sort(RanIt begin, RanIt end, Pred comp)
108 {
109 	typedef typename std::iterator_traits<RanIt>::difference_type ItDiff;
110 	ItDiff t = logc(end-begin)-1;
111 	ItDiff p = 1<<t;
112 	for( ; p!=1; p=p/2 )
113 	{
114 		RanIt i, j;
115 		for( i=begin; i<end-p-p; i+=p )
116 		{
117 			j = i+p;
118 			for( ; i<j; i++ )
119 				if( comp(*(i+p),*i) )
120 					std::iter_swap(i+p, i);
121 		}
122 		for( ; i<end-p; i++ )
123 			if( comp(*(i+p),*i) )
124 				std::iter_swap(i+p, i);
125 		ItDiff q = 1<<t;
126 		while( q != p )
127 		{
128 			ItDiff d=q-p;
129 			q = q/2;
130 			for( i=begin+p; i<end-d-p; i+=p )
131 			{
132 				j = i+p;
133 				for( ; i<j; i++ )
134 					if( comp(*(i+d),*i) )
135 						std::iter_swap(i+d, i);
136 			}
137 			for( ; i<end-d; i++ )
138 				if( comp(*(i+d),*i) )
139 					std::iter_swap(i+d, i);
140 		}
141 	}
142 	if( p )
143 	{
144 		for( RanIt i=begin; i<end-1; i+=2 )
145 			if( comp(*(i+1),*i) )
146 				std::iter_swap(i+1, i);
147 		ItDiff q = 1<<t;
148 		while( q != 1 )
149 		{
150 			ItDiff d=q-1;
151 			q = q/2;
152 			for( RanIt i=begin+1; i<end-d; i+=2 )
153 				if( comp(*(i+d),*i) )
154 					std::iter_swap(i+d, i);
155 		}
156 	}
157 }
158 
159 template<class RanIt, class Pred>
inplace_base_sort(RanIt begin,RanIt end,Pred comp)160 void inplace_base_sort(RanIt begin, RanIt end, Pred comp)
161 {
162 #ifdef __ia64__
163 	batcher_sort(begin, end, comp);
164 #else // __ia64__
165 	quicksort(begin, end, comp);
166 #endif // __ia64__
167 }
168 
169 } // namespace base_
170 
171 template<class RanIt, class Splitter>
172 class stream_slices
173 {
174 	typedef typename std::iterator_traits<RanIt>::difference_type diff;
175 public:
176 	class iterator : public std::iterator<std::bidirectional_iterator_tag, std::pair<RanIt,RanIt>, diff>
177 	{
178 		friend class stream_slices;
179 	public:
180 		inline iterator& operator++()
181 		{
182 			begin = end;
183 			end = end+run_size;
184 			if( --big_runs == 0 )
185 				--run_size;
186 			return *this;
187 		}
188 		inline iterator& operator--()
189 		{
190 			end = begin;
191 			begin = end-run_size;
192 			if( ++big_runs == 0 )
193 				++run_size;
194 			return *this;
195 		}
196 		std::pair<RanIt,RanIt> operator*()
197 			{ return make_pair(begin,end); }
198 		bool operator==(const iterator& rhs)
199 			{ return begin == rhs.begin; }
200 	private:
iterator(RanIt begin,diff run_size,size_t big_runs)201 		inline iterator(RanIt begin, diff run_size, size_t big_runs) : begin(begin), run_size(run_size), big_runs(big_runs)
202 		{
203 			if( big_runs )
204 				++run_size;
205 			end = begin+run_size;
206 		}
207 		RanIt begin, end;
208 		diff run_size;
209 		size_t big_runs;
210 	};
stream_slices(RanIt begin,RanIt end)211 	stream_slices(RanIt begin, RanIt end) : b(begin), e(end), order_(Splitter::prefered_order_output(end-begin))
212 	{
213 		if( order_ < 2 )
214 			throw std::logic_error("Splitter::prefered_order_output returned less than two");
215 		run_size = (end-begin)/order_;
216 		size_t rem = static_cast<size_t>((end-begin) % order_);
217 		run_size += rem/order_;
218 		big_runs = rem % order_;
219 	}
order()220 	inline diff order() const
221 		{ return order_; }
begin()222 	inline iterator begin()
223 		{ return iterator(b, run_size, big_runs); }
end()224 	inline iterator end()
225 		{ return iterator(e, run_size, big_runs); }
226 private:
227 	RanIt b, e;
228 	diff run_size;
229 	size_t order_, big_runs;
230 };
231 
232 template<class Splitter, class Diff>
run_size_(Diff d)233 inline Diff run_size_(Diff d)
234 {
235 	size_t order = Splitter::prefered_order_output(d);
236 	Diff run_size = d/order;
237 	size_t rem = static_cast<size_t>(d % order);
238 	run_size += rem/order;
239 	size_t big_runs = rem % order;
240 	if( big_runs )
241 		run_size++;
242 	return run_size;
243 }
244 
245 template<class Merger, class Splitter, class Diff, class MAlloc, class TAlloc>
build_cache_(Diff run_size,MAlloc & malloc,TAlloc & talloc)246 inline std::pair<Merger*,Merger*> build_cache_(Diff run_size, MAlloc& malloc, TAlloc& talloc)
247 {
248 	Merger *begin_cache, *end_cache;
249 	int rec_calls = 0;
250 	for( Diff d=run_size; d>static_cast<Diff>(Splitter::switch_to_cache_aware()); d=run_size_<Splitter>(d), ++rec_calls );
251 	begin_cache = end_cache = malloc.allocate(rec_calls);
252 	for( Diff d=run_size; d>static_cast<Diff>(Splitter::switch_to_cache_aware()); d=run_size_<Splitter>(d), ++end_cache )
253 		new(end_cache) Merger(Splitter::prefered_order_output(d), talloc);
254 	return make_pair(begin_cache,end_cache);
255 }
256 
257 template<class Merger, class Splitter, class It, class OutIt, class Alloc>
258 void merge_sort_(It begin, It end, OutIt dest, Merger* cache, Alloc alloc);
259 
260 template<class Merger, class Splitter, class MPtr, class It, class Alloc>
sort_streams_(MPtr cache,It begin,size_t order,size_t run_size,size_t big_runs,Alloc alloc)261 inline void sort_streams_(MPtr cache, It begin, size_t order, size_t run_size, size_t big_runs, Alloc alloc)
262 {
263 	typedef typename std::iterator_traits<It>::value_type T;
264 	typename Alloc::template rebind<T>::other talloc(alloc);
265 	typedef typename Alloc::template rebind<T>::other::pointer TPtr;
266 	TPtr tmp = talloc.allocate((size_t)(run_size));
267 	It last = begin;
268 	size_t i;
269 	if( big_runs )
270 	{
271 		merge_sort_<Merger,Splitter>(last, last+run_size, std::raw_storage_iterator<TPtr,T>(tmp), cache, alloc);
272 		for( i=1, last+=run_size; i!=big_runs; i++, last+=run_size )
273 			merge_sort_<Merger,Splitter>(last, last+run_size, last-run_size, cache, alloc);
274 		run_size--;
275 		for( ; i!=order; i++, last+=run_size )
276 			merge_sort_<Merger,Splitter>(last, last+run_size, last-run_size-1, cache, alloc);
277 		run_size++;
278 	}
279 	else
280 	{
281 		merge_sort_<Merger,Splitter>(last, last+run_size, tmp, cache, alloc);
282 		for( --order, last+=run_size; order; --order, last+=run_size )
283 			merge_sort_<Merger,Splitter>(last, last+run_size, last-run_size, cache, alloc);
284 	}
285 	std::copy(tmp, tmp+run_size, last-run_size);
286 	for( TPtr p=tmp+run_size-1; p>=tmp; --p )
287 		talloc.destroy(p);
288 	talloc.deallocate(tmp,static_cast<size_t>(run_size));
289 }
290 
291 template<class MPtr, class It>
add_sorted_streams_(MPtr merger,It end,size_t order,size_t run_size,size_t big_runs)292 inline void add_sorted_streams_(MPtr merger, It end, size_t order, size_t run_size, size_t big_runs)
293 {
294 	if( big_runs )
295 	{
296 		merger->add_stream(end-run_size, end);
297 		end -= run_size;
298 		--run_size, --big_runs;
299 		for( --order; order!=big_runs; --order, end-=run_size )
300 			merger->add_stream(end-run_size, end);
301 		++run_size;
302 		for( ; order; --order, end-=run_size )
303 			merger->add_stream(end-run_size, end);
304 	}
305 	else
306 		for( ; order; --order, end-=run_size )
307 			merger->add_stream(end-run_size, end);
308 }
309 
310 template<class MPtr, class It, class Pred>
add_streams_(MPtr merger,It begin,size_t order,size_t run_size,size_t big_runs,Pred comp)311 inline void add_streams_(MPtr merger, It begin, size_t order, size_t run_size, size_t big_runs, Pred comp)
312 {
313 	size_t i;
314 	for( i=0; i!=order-big_runs; ++i, begin+=run_size )
315 		base_::inplace_base_sort(begin, begin+run_size, comp);
316 	++run_size;
317 	for( ; i!=order; ++i, begin+=run_size )
318 		base_::inplace_base_sort(begin, begin+run_size, comp);
319 
320 	for( ; i!=order-big_runs; --i, begin-=run_size )
321 		merger->add_stream(begin-run_size, begin);
322 	--run_size;
323 	for( ; i; --i, begin-=run_size )
324 		merger->add_stream(begin-run_size, begin);
325 }
326 
327 template<class Merger, class Splitter, class It, class OutIt, class Alloc>
merge_sort_(It begin,It end,OutIt dest,Merger * cache,Alloc alloc)328 void merge_sort_(It begin, It end, OutIt dest, Merger* cache, Alloc alloc)
329 {
330 	typedef typename std::iterator_traits<It>::value_type T;
331 	typedef typename std::iterator_traits<It>::difference_type ItDiff;
332 	typedef typename Merger::stream Stream;
333 
334 	order_t order = Splitter::prefered_order_output(end-begin);
335 	if( order < 2 )
336 		throw std::logic_error("Splitter::prefered_order_output returned less than two");
337 	ItDiff run_size = (end-begin)/order;
338 	size_t rem = (size_t)((end-begin) % order);
339 	run_size += rem/order;
340 	size_t big_runs = rem % order;
341 
342 	cache->reset();
343 	if( run_size > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
344 	{
345 		if( big_runs )
346 			run_size++;
347 		sort_streams_<Merger,Splitter>(cache+1,begin,order,run_size,big_runs,alloc);
348 		add_sorted_streams_(cache,end,order,run_size,big_runs);
349 	}
350 	else
351 		add_streams_(cache,begin,order,run_size,big_runs, typename Merger::predicate());
352 	(*cache)(dest);
353 }
354 
355 template<class Merger, class Splitter, class It, class OutIt>
merge_sort(It begin,It end,OutIt dest,const typename Merger::allocator & alloc)356 inline void merge_sort(It begin, It end, OutIt dest, const typename Merger::allocator& alloc)
357 {
358 	typedef typename std::iterator_traits<It>::difference_type ItDiff;
359 	if( end-begin > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
360 	{
361 		order_t order = Splitter::prefered_order_output(end-begin);
362 		if( order < 2 )
363 			throw std::logic_error("Splitter::prefered_order_output returned less than two");
364 		ItDiff run_size = (end-begin)/order;
365 		size_t rem = (size_t)((end-begin) % order);
366 		run_size += rem/order;
367 		size_t big_runs = rem % order;
368 		if( run_size > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
369 		{
370 			if( big_runs )
371 				run_size++;
372 			typename Merger::allocator::template rebind<Merger>::other malloc(alloc);
373 			std::pair<Merger*,Merger*> cache = build_cache_<Merger,Splitter>(run_size, malloc, alloc);
374 			sort_streams_<Merger,Splitter>(cache.first, begin, order, run_size, big_runs, alloc);
375 			for( Merger *pm=cache.second-1; pm>=cache.first; --pm )
376 				malloc.destroy(pm);
377 			malloc.deallocate(cache.first, cache.second-cache.first);
378 		}
379 		Merger merger(order,alloc);
380 		if( run_size > static_cast<ItDiff>(Splitter::switch_to_cache_aware()) )
381 			add_sorted_streams_(&merger, end, order, run_size, big_runs);
382 		else
383 			add_streams_(&merger, begin, order, run_size, big_runs, typename Merger::predicate());
384 #ifdef _DEBUG
385 		OutIt e = merger(dest, dest+(end-begin));
386 		merger.empty(dest, dest+(end-begin));
387 		size_t N = 0;
388 		for( typename Merger::stream_iterator i=merger.begin(); i!=merger.end(); ++i )
389 			N += (*i).end()-(*i).begin();
390 		assert( e == dest+(end-begin) );
391 #else // _DEBUG
392 		merger(dest);
393 #endif // _DEBUG
394 	}
395 	else
396 	{
397 		base_::inplace_base_sort(begin, end, typename Merger::predicate());
398 		std::copy(begin, end, dest);
399 	}
400 }
401 } // namespace iosort
402