1 //////////////////////////////////////////////////////////////////////////////
2 //
3 // (C) Copyright Ion Gaztanaga 2015-2016.
4 // Distributed under the Boost Software License, Version 1.0.
5 // (See accompanying file LICENSE_1_0.txt or copy at
6 // http://www.boost.org/LICENSE_1_0.txt)
7 //
8 // See http://www.boost.org/libs/move for documentation.
9 //
10 //////////////////////////////////////////////////////////////////////////////
11 
12 #include <cstdlib>   //std::srand
13 #include <algorithm> //std::stable_sort, std::make|sort_heap, std::random_shuffle
14 #include <cstdio>    //std::printf
15 #include <iostream>  //std::cout
16 #include <boost/container/vector.hpp>  //boost::container::vector
17 
18 #include <boost/config.hpp>
19 #include <boost/move/unique_ptr.hpp>
20 #include <boost/timer/timer.hpp>
21 
22 using boost::timer::cpu_timer;
23 using boost::timer::cpu_times;
24 using boost::timer::nanosecond_type;
25 
26 #include "order_type.hpp"
27 #include "random_shuffle.hpp"
28 
29 //#define BOOST_MOVE_ADAPTIVE_SORT_STATS
30 //#define BOOST_MOVE_ADAPTIVE_SORT_INVARIANTS
print_stats(const char * str,boost::ulong_long_type element_count)31 void print_stats(const char *str, boost::ulong_long_type element_count)
32 {
33    std::printf("%sCmp:%7.03f Cpy:%8.03f\n", str, double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
34 }
35 
36 
37 #include <boost/move/algo/adaptive_sort.hpp>
38 #include <boost/move/algo/detail/merge_sort.hpp>
39 #include <boost/move/algo/detail/pdqsort.hpp>
40 #include <boost/move/algo/detail/heap_sort.hpp>
41 #include <boost/move/core.hpp>
42 
43 template<class T>
generate_elements(boost::container::vector<T> & elements,std::size_t L,std::size_t NK)44 void generate_elements(boost::container::vector<T> &elements, std::size_t L, std::size_t NK)
45 {
46    elements.resize(L);
47    boost::movelib::unique_ptr<std::size_t[]> key_reps(new std::size_t[NK ? NK : L]);
48 
49    std::srand(0);
50    for (std::size_t i = 0; i < (NK ? NK : L); ++i) {
51       key_reps[i] = 0;
52    }
53    for (std::size_t i = 0; i < L; ++i) {
54       std::size_t  key = NK ? (i % NK) : i;
55       elements[i].key = key;
56    }
57    ::random_shuffle(elements.data(), elements.data() + L);
58    ::random_shuffle(elements.data(), elements.data() + L);
59 
60    for (std::size_t i = 0; i < L; ++i) {
61       elements[i].val = key_reps[elements[i].key]++;
62    }
63 }
64 
65 template<class T, class Compare>
adaptive_sort_buffered(T * elements,std::size_t element_count,Compare comp,std::size_t BufLen)66 void adaptive_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen)
67 {
68    boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
69    boost::movelib::adaptive_sort(elements, elements + element_count, comp, reinterpret_cast<T*>(mem.get()), BufLen);
70 }
71 
72 template<class T, class Compare>
std_like_adaptive_stable_sort_buffered(T * elements,std::size_t element_count,Compare comp,std::size_t BufLen)73 void std_like_adaptive_stable_sort_buffered(T *elements, std::size_t element_count, Compare comp, std::size_t BufLen)
74 {
75    boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*BufLen]);
76    boost::movelib::stable_sort_adaptive_ONlogN2(elements, elements + element_count, comp, reinterpret_cast<T*>(mem.get()), BufLen);
77 }
78 
79 template<class T, class Compare>
merge_sort_buffered(T * elements,std::size_t element_count,Compare comp)80 void merge_sort_buffered(T *elements, std::size_t element_count, Compare comp)
81 {
82    boost::movelib::unique_ptr<char[]> mem(new char[sizeof(T)*((element_count+1)/2)]);
83    boost::movelib::merge_sort(elements, elements + element_count, comp, reinterpret_cast<T*>(mem.get()));
84 }
85 
86 enum AlgoType
87 {
88    MergeSort,
89    StableSort,
90    PdQsort,
91    StdSort,
92    AdaptiveSort,
93    SqrtHAdaptiveSort,
94    SqrtAdaptiveSort,
95    Sqrt2AdaptiveSort,
96    QuartAdaptiveSort,
97    InplaceStableSort,
98    StdSqrtHAdpSort,
99    StdSqrtAdpSort,
100    StdSqrt2AdpSort,
101    StdQuartAdpSort,
102    SlowStableSort,
103    HeapSort,
104    MaxSort
105 };
106 
107 const char *AlgoNames [] = { "MergeSort      "
108                            , "StableSort     "
109                            , "PdQsort        "
110                            , "StdSort        "
111                            , "AdaptSort      "
112                            , "SqrtHAdaptSort "
113                            , "SqrtAdaptSort  "
114                            , "Sqrt2AdaptSort "
115                            , "QuartAdaptSort "
116                            , "InplStableSort "
117                            , "StdSqrtHAdpSort"
118                            , "StdSqrtAdpSort "
119                            , "StdSqrt2AdpSort"
120                            , "StdQuartAdpSort"
121                            , "SlowSort       "
122                            , "HeapSort       "
123                            };
124 
125 BOOST_STATIC_ASSERT((sizeof(AlgoNames)/sizeof(*AlgoNames)) == MaxSort);
126 
127 template<class T>
measure_algo(T * elements,std::size_t element_count,std::size_t alg,nanosecond_type & prev_clock)128 bool measure_algo(T *elements, std::size_t element_count, std::size_t alg, nanosecond_type &prev_clock)
129 {
130    std::printf("%s ", AlgoNames[alg]);
131    order_perf_type::num_compare=0;
132    order_perf_type::num_copy=0;
133    order_perf_type::num_elements = element_count;
134    cpu_timer timer;
135    timer.resume();
136    switch(alg)
137    {
138       case MergeSort:
139          merge_sort_buffered(elements, element_count, order_type_less());
140       break;
141       case StableSort:
142          std::stable_sort(elements,elements+element_count,order_type_less());
143       break;
144       case PdQsort:
145          boost::movelib::pdqsort(elements,elements+element_count,order_type_less());
146       break;
147       case StdSort:
148          std::sort(elements,elements+element_count,order_type_less());
149       break;
150       case AdaptiveSort:
151          boost::movelib::adaptive_sort(elements, elements+element_count, order_type_less());
152       break;
153       case SqrtHAdaptiveSort:
154          adaptive_sort_buffered( elements, element_count, order_type_less()
155                             , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
156       break;
157       case SqrtAdaptiveSort:
158          adaptive_sort_buffered( elements, element_count, order_type_less()
159                             , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
160       break;
161       case Sqrt2AdaptiveSort:
162          adaptive_sort_buffered( elements, element_count, order_type_less()
163                             , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
164       break;
165       case QuartAdaptiveSort:
166          adaptive_sort_buffered( elements, element_count, order_type_less()
167                             , (element_count-1)/4+1);
168       break;
169       case InplaceStableSort:
170          boost::movelib::inplace_stable_sort(elements, elements+element_count, order_type_less());
171       break;
172       case StdSqrtHAdpSort:
173          std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
174                                               , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count)/2+1);
175       break;
176       case StdSqrtAdpSort:
177          std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
178                                                , boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
179       break;
180       case StdSqrt2AdpSort:
181          std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
182                                                , 2*boost::movelib::detail_adaptive::ceil_sqrt_multiple(element_count));
183       break;
184       case StdQuartAdpSort:
185          std_like_adaptive_stable_sort_buffered( elements, element_count, order_type_less()
186                                                , (element_count-1)/4+1);
187       break;
188       case SlowStableSort:
189          boost::movelib::detail_adaptive::slow_stable_sort(elements, elements+element_count, order_type_less());
190       break;
191       case HeapSort:
192          boost::movelib::heap_sort(elements, elements+element_count, order_type_less());
193          boost::movelib::heap_sort((order_move_type*)0, (order_move_type*)0, order_type_less());
194 
195       break;
196    }
197    timer.stop();
198 
199    if(order_perf_type::num_elements == element_count){
200       std::printf(" Tmp Ok ");
201    } else{
202       std::printf(" Tmp KO ");
203    }
204    nanosecond_type new_clock = timer.elapsed().wall;
205 
206    //std::cout << "Cmp:" << order_perf_type::num_compare << " Cpy:" << order_perf_type::num_copy;   //for old compilers without ll size argument
207    std::printf("Cmp:%7.03f Cpy:%8.03f", double(order_perf_type::num_compare)/element_count, double(order_perf_type::num_copy)/element_count );
208 
209    double time = double(new_clock);
210 
211    const char *units = "ns";
212    if(time >= 1000000000.0){
213       time /= 1000000000.0;
214       units = " s";
215    }
216    else if(time >= 1000000.0){
217       time /= 1000000.0;
218       units = "ms";
219    }
220    else if(time >= 1000.0){
221       time /= 1000.0;
222       units = "us";
223    }
224 
225    std::printf(" %6.02f%s (%6.02f)\n"
226               , time
227               , units
228               , prev_clock ? double(new_clock)/double(prev_clock): 1.0);
229    prev_clock = new_clock;
230    bool res = is_order_type_ordered(elements, element_count, alg != HeapSort && alg != PdQsort && alg != StdSort);
231    return res;
232 }
233 
234 template<class T>
measure_all(std::size_t L,std::size_t NK)235 bool measure_all(std::size_t L, std::size_t NK)
236 {
237    boost::container::vector<T> original_elements, elements;
238    generate_elements(original_elements, L, NK);
239    std::printf("\n - - N: %u, NK: %u - -\n", (unsigned)L, (unsigned)NK);
240 
241    nanosecond_type prev_clock = 0;
242    nanosecond_type back_clock;
243    bool res = true;
244    elements = original_elements;
245    res = res && measure_algo(elements.data(), L,MergeSort, prev_clock);
246    back_clock = prev_clock;
247    //
248    prev_clock = back_clock;
249    elements = original_elements;
250    res = res && measure_algo(elements.data(), L,StableSort, prev_clock);
251    //
252    prev_clock = back_clock;
253    elements = original_elements;
254    res = res && measure_algo(elements.data(), L,PdQsort, prev_clock);
255    //
256    prev_clock = back_clock;
257    elements = original_elements;
258    res = res && measure_algo(elements.data(), L,StdSort, prev_clock);
259    //
260    prev_clock = back_clock;
261    elements = original_elements;
262    res = res && measure_algo(elements.data(), L,HeapSort, prev_clock);
263    //
264    prev_clock = back_clock;
265    elements = original_elements;
266    res = res && measure_algo(elements.data(), L,QuartAdaptiveSort, prev_clock);
267    //
268    prev_clock = back_clock;
269    elements = original_elements;
270    res = res && measure_algo(elements.data(), L, StdQuartAdpSort, prev_clock);
271    //
272    prev_clock = back_clock;
273    elements = original_elements;
274    res = res && measure_algo(elements.data(), L,Sqrt2AdaptiveSort, prev_clock);
275    //
276    prev_clock = back_clock;
277    elements = original_elements;
278    res = res && measure_algo(elements.data(), L, StdSqrt2AdpSort, prev_clock);
279    //
280    prev_clock = back_clock;
281    elements = original_elements;
282    res = res && measure_algo(elements.data(), L,SqrtAdaptiveSort, prev_clock);
283    //
284    prev_clock = back_clock;
285    elements = original_elements;
286    res = res && measure_algo(elements.data(), L, StdSqrtAdpSort, prev_clock);
287    //
288    prev_clock = back_clock;
289    elements = original_elements;
290    res = res && measure_algo(elements.data(), L,SqrtHAdaptiveSort, prev_clock);
291    //
292    prev_clock = back_clock;
293    elements = original_elements;
294    res = res && measure_algo(elements.data(), L, StdSqrtHAdpSort, prev_clock);
295    //
296    prev_clock = back_clock;
297    elements = original_elements;
298    res = res && measure_algo(elements.data(), L,AdaptiveSort, prev_clock);
299    //
300    prev_clock = back_clock;
301    elements = original_elements;
302    res = res && measure_algo(elements.data(), L,InplaceStableSort, prev_clock);
303    //
304    //prev_clock = back_clock;
305    //elements = original_elements;
306    //res = res && measure_algo(elements.data(), L,SlowStableSort, prev_clock);
307    //
308    if(!res)
309       throw int(0);
310    return res;
311 }
312 
313 //Undef it to run the long test
314 #define BENCH_SORT_SHORT
315 #define BENCH_SORT_UNIQUE_VALUES
316 
main()317 int main()
318 {
319    #ifndef BENCH_SORT_UNIQUE_VALUES
320    measure_all<order_perf_type>(101,1);
321    measure_all<order_perf_type>(101,7);
322    measure_all<order_perf_type>(101,31);
323    #endif
324    measure_all<order_perf_type>(101,0);
325 
326    //
327    #ifndef BENCH_SORT_UNIQUE_VALUES
328    measure_all<order_perf_type>(1101,1);
329    measure_all<order_perf_type>(1001,7);
330    measure_all<order_perf_type>(1001,31);
331    measure_all<order_perf_type>(1001,127);
332    measure_all<order_perf_type>(1001,511);
333    #endif
334    measure_all<order_perf_type>(1001,0);
335    //
336    #ifndef BENCH_SORT_UNIQUE_VALUES
337    measure_all<order_perf_type>(10001,65);
338    measure_all<order_perf_type>(10001,255);
339    measure_all<order_perf_type>(10001,1023);
340    measure_all<order_perf_type>(10001,4095);
341    #endif
342    measure_all<order_perf_type>(10001,0);
343 
344    //
345    #ifdef NDEBUG
346    #ifndef BENCH_SORT_UNIQUE_VALUES
347    measure_all<order_perf_type>(100001,511);
348    measure_all<order_perf_type>(100001,2047);
349    measure_all<order_perf_type>(100001,8191);
350    measure_all<order_perf_type>(100001,32767);
351    #endif
352    measure_all<order_perf_type>(100001,0);
353 
354    //
355    #ifndef BENCH_SORT_SHORT
356    #ifndef BENCH_SORT_UNIQUE_VALUES
357    measure_all<order_perf_type>(1000001, 8192);
358    measure_all<order_perf_type>(1000001, 32768);
359    measure_all<order_perf_type>(1000001, 131072);
360    measure_all<order_perf_type>(1000001, 524288);
361    #endif
362    measure_all<order_perf_type>(1000001,0);
363 
364    #ifndef BENCH_SORT_UNIQUE_VALUES
365    measure_all<order_perf_type>(10000001, 65536);
366    measure_all<order_perf_type>(10000001, 262144);
367    measure_all<order_perf_type>(10000001, 1048576);
368    measure_all<order_perf_type>(10000001, 4194304);
369    #endif
370    measure_all<order_perf_type>(1000001,0);
371    #endif   //#ifndef BENCH_SORT_SHORT
372    #endif   //NDEBUG
373 
374    //measure_all<order_perf_type>(100000001,0);
375 
376    return 0;
377 }
378