1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #ifndef _MSC_VER
5 # include <stdint.h>
6 #endif
7 
8 #ifndef SORT_NAME
9 #error "Must declare SORT_NAME"
10 #endif
11 
12 #ifndef SORT_TYPE
13 #error "Must declare SORT_TYPE"
14 #endif
15 
16 #ifndef SORT_CMP
17 #define SORT_CMP(x, y)  ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
18 #endif
19 
20 
21 #ifndef CLZ
22 #ifdef __GNUC__
23 #define CLZ __builtin_clzll
24 #else
25 
26 // adapted from Hacker's Delight
clzll(uint64_t x)27 int clzll(uint64_t x) {
28   int n;
29 
30   if (x == 0) return(64);
31   n = 0;
32   if (x <= 0x00000000FFFFFFFFL) {n = n + 32; x = x << 32;}
33   if (x <= 0x0000FFFFFFFFFFFFL) {n = n + 16; x = x << 16;}
34   if (x <= 0x00FFFFFFFFFFFFFFL) {n = n + 8; x = x << 8;}
35   if (x <= 0x0FFFFFFFFFFFFFFFL) {n = n + 4; x = x << 4;}
36   if (x <= 0x3FFFFFFFFFFFFFFFL) {n = n + 2; x = x << 2;}
37   if (x <= 0x7FFFFFFFFFFFFFFFL) {n = n + 1;}
38   return n;
39 }
40 
41 #define CLZ clzll
42 #endif
43 #endif
44 
45 
46 #define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
47 
48 #define SORT_CONCAT(x, y) x ## _ ## y
49 #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
50 #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
51 
52 
53 #define SHELL_SORT             SORT_MAKE_STR(shell_sort)
54 #define BINARY_INSERTION_SORT  SORT_MAKE_STR(binary_insertion_sort)
55 #define HEAP_SORT              SORT_MAKE_STR(heap_sort)
56 #define QUICK_SORT             SORT_MAKE_STR(quick_sort)
57 #define MERGE_SORT             SORT_MAKE_STR(merge_sort)
58 #define BUBBLE_SORT            SORT_MAKE_STR(bubble_sort)
59 #define TIM_SORT               SORT_MAKE_STR(tim_sort)
60 
61 #define TIM_SORT_RUN_T         SORT_MAKE_STR(tim_sort_run_t)
62 #define TEMP_STORAGE_T         SORT_MAKE_STR(temp_storage_t)
63 
64 #ifndef MAX
65 #define MAX(x,y) (((x) > (y) ? (x) : (y)))
66 #endif
67 #ifndef MIN
68 #define MIN(x,y) (((x) < (y) ? (x) : (y)))
69 #endif
70 
71 typedef struct {
72   int64_t start;
73   int64_t length;
74 } TIM_SORT_RUN_T;
75 
76 
77 void SHELL_SORT(SORT_TYPE *dst, const size_t size);
78 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
79 void HEAP_SORT(SORT_TYPE *dst, const size_t size);
80 void QUICK_SORT(SORT_TYPE *dst, const size_t size);
81 void MERGE_SORT(SORT_TYPE *dst, const size_t size);
82 void BUBBLE_SORT(SORT_TYPE *dst, const size_t size);
83 void TIM_SORT(SORT_TYPE *dst, const size_t size);
84 
85 
86 /* From http://oeis.org/classic/A102549 */
87 static const uint64_t shell_gaps[48] = {1, 4, 10, 23, 57, 132, 301, 701, 1750, 4376, 10941, 27353, 68383, 170958, 427396, 1068491, 2671228, 6678071, 16695178, 41737946, 104344866, 260862166, 652155416, 1630388541, 4075971353LL, 10189928383LL, 25474820958LL, 63687052396LL, 159217630991LL, 398044077478LL, 995110193696LL, 2487775484241LL, 6219438710603LL, 15548596776508LL, 38871491941271LL, 97178729853178LL, 242946824632946LL, 607367061582366LL, 1518417653955916LL, 3796044134889791LL, 9490110337224478LL, 23725275843061196LL, 59313189607652991LL, 148282974019132478LL, 370707435047831196LL, 926768587619577991LL, 2316921469048944978LL, 5792303672622362446LL};
88 
89 /* Shell sort implementation based on Wikipedia article
90    http://en.wikipedia.org/wiki/Shell_sort
91 */
SHELL_SORT(SORT_TYPE * dst,const size_t size)92 void SHELL_SORT(SORT_TYPE *dst, const size_t size)
93 {
94   // TODO: binary search to find first gap?
95   int inci = 47;
96   int64_t inc = shell_gaps[inci];
97   while (inc > int64_t(size >> 1))
98   {
99     inc = shell_gaps[--inci];
100   }
101   int64_t i;
102   while (1)
103   {
104     for (i = inc; i < int64_t(size); i++)
105     {
106       SORT_TYPE temp = dst[i];
107       int64_t j = i;
108       while ((j >= inc) && (SORT_CMP(dst[j - inc], temp) > 0))
109       {
110         dst[j] = dst[j - inc];
111         j -= inc;
112       }
113       dst[j] = temp;
114     }
115     if (inc == 1) break;
116     inc = shell_gaps[--inci];
117   }
118 }
119 
120 /* Function used to do a binary search for binary insertion sort */
binary_insertion_find(SORT_TYPE * dst,const SORT_TYPE x,const size_t size)121 static inline int64_t binary_insertion_find(SORT_TYPE *dst, const SORT_TYPE x, const size_t size)
122 {
123   int64_t l, c, r;
124   l = 0;
125   r = size - 1;
126   c = r >> 1;
127   SORT_TYPE lx, cx, rx;
128   lx = dst[l];
129 
130   /* check for beginning conditions */
131   if (SORT_CMP(x, lx) < 0)
132     return 0;
133   else if (SORT_CMP(x, lx) == 0)
134   {
135     int64_t i = 1;
136     while (SORT_CMP(x, dst[i]) == 0) i++;
137     return i;
138   }
139 
140   rx = dst[r];
141   // guaranteed not to be >= rx
142   cx = dst[c];
143   while (1)
144   {
145     const int val = SORT_CMP(x, cx);
146     if (val < 0)
147     {
148       if (c - l <= 1) return c;
149       r = c;
150       rx = cx;
151     }
152     else if (val > 0)
153     {
154       if (r - c <= 1) return c + 1;
155       l = c;
156       lx = cx;
157     }
158     else
159     {
160       do
161       {
162         cx = dst[++c];
163       } while (SORT_CMP(x, cx) == 0);
164       return c;
165     }
166     c = l + ((r - l) >> 1);
167     cx = dst[c];
168   }
169 }
170 
171 /* Binary insertion sort, but knowing that the first "start" entries are sorted.  Used in timsort. */
binary_insertion_sort_start(SORT_TYPE * dst,const size_t start,const size_t size)172 static inline void binary_insertion_sort_start(SORT_TYPE *dst, const size_t start, const size_t size)
173 {
174   int64_t i;
175   for (i = start; i < int64_t(size); i++)
176   {
177     int64_t j;
178     /* If this entry is already correct, just move along */
179     if (SORT_CMP(dst[i - 1], dst[i]) <= 0) continue;
180 
181     /* Else we need to find the right place, shift everything over, and squeeze in */
182     SORT_TYPE x = dst[i];
183     int64_t location = binary_insertion_find(dst, x, i);
184     for (j = i - 1; j >= location; j--)
185     {
186       dst[j + 1] = dst[j];
187     }
188     dst[location] = x;
189   }
190 }
191 
192 /* Binary insertion sort */
BINARY_INSERTION_SORT(SORT_TYPE * dst,const size_t size)193 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size)
194 {
195   binary_insertion_sort_start(dst, 1, size);
196 }
197 
198 
BUBBLE_SORT(SORT_TYPE * dst,const size_t size)199 void BUBBLE_SORT(SORT_TYPE *dst, const size_t size)
200 {
201   int64_t i;
202   int64_t j;
203   for (i = 0; i < int64_t(size); i++)
204   {
205     for (j = i + 1; j < int64_t(size); j++)
206     {
207       if (SORT_CMP(dst[j], dst[i]) < 0)
208         SORT_SWAP(dst[i], dst[j]);
209     }
210   }
211 }
212 
MERGE_SORT(SORT_TYPE * dst,const size_t size)213 void MERGE_SORT(SORT_TYPE *dst, const size_t size)
214 {
215   if (size < 16)
216   {
217     BINARY_INSERTION_SORT(dst, size);
218     return;
219   }
220 
221   const int64_t middle = size / 2;
222 
223   MERGE_SORT(dst, middle);
224   MERGE_SORT(&dst[middle], size - middle);
225 
226 #ifdef _MSC_VER
227   SORT_TYPE* newdst = (SORT_TYPE*)malloc(size * sizeof(SORT_TYPE));
228 #else
229   SORT_TYPE newdst[size];
230 #endif
231   int64_t out = 0;
232   int64_t i = 0;
233   int64_t j = middle;
234   while (out != size)
235   {
236     if (i < middle)
237     {
238       if (j < int64_t(size))
239       {
240         if (SORT_CMP(dst[i], dst[j]) <= 0)
241           newdst[out] = dst[i++];
242         else
243           newdst[out] = dst[j++];
244       }
245       else
246         newdst[out] = dst[i++];
247     }
248     else
249       newdst[out] = dst[j++];
250     out++;
251   }
252   memcpy(dst, newdst, size * sizeof(SORT_TYPE));
253 
254 #ifdef _MSC_VER
255   free(newdst);
256 #endif
257 }
258 
259 
260 /* quick sort: based on wikipedia */
261 
quick_sort_partition(SORT_TYPE * dst,const int64_t left,const int64_t right,const int64_t pivot)262 static inline int64_t quick_sort_partition(SORT_TYPE *dst, const int64_t left, const int64_t right, const int64_t pivot)
263 {
264   SORT_TYPE value = dst[pivot];
265   SORT_SWAP(dst[pivot], dst[right]);
266   int64_t index = left;
267   int64_t i;
268   for (i = left; i < right; i++)
269   {
270     if (SORT_CMP(dst[i], value) <= 0)
271     {
272       SORT_SWAP(dst[i], dst[index]);
273       index++;
274     }
275   }
276   SORT_SWAP(dst[right], dst[index]);
277   return index;
278 }
279 
quick_sort_recursive(SORT_TYPE * dst,const int64_t left,const int64_t right)280 static void quick_sort_recursive(SORT_TYPE *dst, const int64_t left, const int64_t right)
281 {
282   if (right <= left) return;
283   if ((right - left + 1) < 16)
284   {
285     BINARY_INSERTION_SORT(&dst[left], right - left + 1);
286     return;
287   }
288   const int64_t pivot = left + ((right - left) >> 1);
289   const int64_t new_pivot = quick_sort_partition(dst, left, right, pivot);
290   quick_sort_recursive(dst, left, new_pivot - 1);
291   quick_sort_recursive(dst, new_pivot + 1, right);
292 }
293 
QUICK_SORT(SORT_TYPE * dst,const size_t size)294 void QUICK_SORT(SORT_TYPE *dst, const size_t size)
295 {
296   quick_sort_recursive(dst, 0, size - 1);
297 }
298 
299 
300 /* timsort implementation, based on timsort.txt */
301 
reverse_elements(SORT_TYPE * dst,int64_t start,int64_t end)302 static inline void reverse_elements(SORT_TYPE *dst, int64_t start, int64_t end)
303 {
304   while (1)
305   {
306     if (start >= end) return;
307     SORT_SWAP(dst[start], dst[end]);
308     start++;
309     end--;
310   }
311 }
312 
count_run(SORT_TYPE * dst,const int64_t start,const size_t size)313 static inline int64_t count_run(SORT_TYPE *dst, const int64_t start, const size_t size)
314 {
315   if (size - start == 1) return 1;
316   if (start >= int64_t(size) - 2)
317   {
318     if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0)
319       SORT_SWAP(dst[size - 2], dst[size - 1]);
320     return 2;
321   }
322 
323   int64_t curr = start + 2;
324 
325   if (SORT_CMP(dst[start], dst[start + 1]) <= 0)
326   {
327     // increasing run
328     while (1)
329     {
330       if (curr == size - 1) break;
331       if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) break;
332       curr++;
333     }
334     return curr - start;
335   }
336   else
337   {
338     // decreasing run
339     while (1)
340     {
341       if (curr == size - 1) break;
342       if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) break;
343       curr++;
344     }
345     // reverse in-place
346     reverse_elements(dst, start, curr - 1);
347     return curr - start;
348   }
349 }
350 
compute_minrun(const uint64_t size)351 static inline int compute_minrun(const uint64_t size)
352 {
353   const int top_bit = 64 - CLZ(size);
354   const int shift = MAX(top_bit, 6) - 6;
355   const int minrun = size >> shift;
356   const uint64_t mask = (1ULL << shift) - 1;
357   if (mask & size) return minrun + 1;
358   return minrun;
359 }
360 
361 #define PUSH_NEXT() do {\
362 len = count_run(dst, curr, size);\
363 run = minrun;\
364 if (run < minrun) run = minrun;\
365 if (run > int64_t(size) - curr) run = size - curr;\
366 if (run > len)\
367 {\
368   binary_insertion_sort_start(&dst[curr], len, run);\
369   len = run;\
370 }\
371 run_stack[stack_curr].start = curr;\
372 run_stack[stack_curr++].length = len;\
373 curr += len;\
374 if (curr == size)\
375 {\
376   /* finish up */ \
377   while (stack_curr > 1) \
378   { \
379     tim_sort_merge(dst, run_stack, stack_curr, store); \
380     run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \
381     stack_curr--; \
382   } \
383   if (store->storage != NULL)\
384   {\
385     free(store->storage);\
386     store->storage = NULL;\
387   }\
388   return;\
389 }\
390 }\
391 while (0)
392 
check_invariant(TIM_SORT_RUN_T * stack,const int stack_curr)393 static inline int check_invariant(TIM_SORT_RUN_T *stack, const int stack_curr)
394 {
395   if (stack_curr < 2) return 1;
396   if (stack_curr == 2)
397   {
398     const int64_t A = stack[stack_curr - 2].length;
399     const int64_t B = stack[stack_curr - 1].length;
400     if (A <= B) return 0;
401     return 1;
402   }
403   const int64_t A = stack[stack_curr - 3].length;
404   const int64_t B = stack[stack_curr - 2].length;
405   const int64_t C = stack[stack_curr - 1].length;
406   if ((A <= B + C) || (B <= C)) return 0;
407   return 1;
408 }
409 
410 typedef struct {
411   size_t alloc;
412   SORT_TYPE *storage;
413 } TEMP_STORAGE_T;
414 
415 
tim_sort_resize(TEMP_STORAGE_T * store,const size_t new_size)416 static inline void tim_sort_resize(TEMP_STORAGE_T *store, const size_t new_size)
417 {
418   if (store->alloc < new_size)
419   {
420     SORT_TYPE *tempstore = (SORT_TYPE*)realloc(store->storage, new_size * sizeof(SORT_TYPE));
421     if (tempstore == NULL)
422     {
423       fprintf(stderr, "Error allocating temporary storage for tim sort: need %zu bytes", sizeof(SORT_TYPE) * new_size);
424       exit(1);
425     }
426     store->storage = tempstore;
427     store->alloc = new_size;
428   }
429 }
430 
tim_sort_merge(SORT_TYPE * dst,const TIM_SORT_RUN_T * stack,const int stack_curr,TEMP_STORAGE_T * store)431 static inline void tim_sort_merge(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr, TEMP_STORAGE_T *store)
432 {
433   const int64_t A = stack[stack_curr - 2].length;
434   const int64_t B = stack[stack_curr - 1].length;
435   const int64_t curr = stack[stack_curr - 2].start;
436 
437   tim_sort_resize(store, MIN(A, B));
438   SORT_TYPE *storage = store->storage;
439 
440   int64_t i, j, k;
441 
442   // left merge
443   if (A < B)
444   {
445     memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
446     i = 0;
447     j = curr + A;
448 
449     for (k = curr; k < curr + A + B; k++)
450     {
451       if ((i < A) && (j < curr + A + B))
452       {
453         if (SORT_CMP(storage[i], dst[j]) <= 0)
454           dst[k] = storage[i++];
455         else
456           dst[k] = dst[j++];
457       }
458       else if (i < A)
459       {
460         dst[k] = storage[i++];
461       }
462       else
463         dst[k] = dst[j++];
464     }
465   }
466   // right merge
467   else
468   {
469     memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
470     i = B - 1;
471     j = curr + A - 1;
472 
473     for (k = curr + A + B - 1; k >= curr; k--)
474     {
475       if ((i >= 0) && (j >= curr))
476       {
477           if (SORT_CMP(dst[j], storage[i]) > 0)
478             dst[k] = dst[j--];
479           else
480             dst[k] = storage[i--];
481       }
482       else if (i >= 0)
483         dst[k] = storage[i--];
484       else
485         dst[k] = dst[j--];
486     }
487   }
488 }
489 
tim_sort_collapse(SORT_TYPE * dst,TIM_SORT_RUN_T * stack,int stack_curr,TEMP_STORAGE_T * store,const size_t size)490 static inline int tim_sort_collapse(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr, TEMP_STORAGE_T *store, const size_t size)
491 {
492   while (1)
493   {
494     // if the stack only has one thing on it, we are done with the collapse
495     if (stack_curr <= 1) break;
496     // if this is the last merge, just do it
497     if ((stack_curr == 2) && (stack[0].length + stack[1].length == size))
498     {
499       tim_sort_merge(dst, stack, stack_curr, store);
500       stack[0].length += stack[1].length;
501       stack_curr--;
502       break;
503     }
504     // check if the invariant is off for a stack of 2 elements
505     else if ((stack_curr == 2) && (stack[0].length <= stack[1].length))
506     {
507       tim_sort_merge(dst, stack, stack_curr, store);
508       stack[0].length += stack[1].length;
509       stack_curr--;
510       break;
511     }
512     else if (stack_curr == 2)
513       break;
514 
515     const int64_t A = stack[stack_curr - 3].length;
516     const int64_t B = stack[stack_curr - 2].length;
517     const int64_t C = stack[stack_curr - 1].length;
518 
519     // check first invariant
520     if (A <= B + C)
521     {
522       if (A < C)
523       {
524         tim_sort_merge(dst, stack, stack_curr - 1, store);
525         stack[stack_curr - 3].length += stack[stack_curr - 2].length;
526         stack[stack_curr - 2] = stack[stack_curr - 1];
527         stack_curr--;
528       }
529       else
530       {
531         tim_sort_merge(dst, stack, stack_curr, store);
532         stack[stack_curr - 2].length += stack[stack_curr - 1].length;
533         stack_curr--;
534       }
535     }
536     // check second invariant
537     else if (B <= C)
538     {
539       tim_sort_merge(dst, stack, stack_curr, store);
540       stack[stack_curr - 2].length += stack[stack_curr - 1].length;
541       stack_curr--;
542     }
543     else
544       break;
545   }
546   return stack_curr;
547 }
548 
TIM_SORT(SORT_TYPE * dst,const size_t size)549 void TIM_SORT(SORT_TYPE *dst, const size_t size)
550 {
551   if (size < 64)
552   {
553     BINARY_INSERTION_SORT(dst, size);
554     return;
555   }
556 
557   // compute the minimum run length
558   const int minrun = compute_minrun(size);
559 
560   // temporary storage for merges
561   TEMP_STORAGE_T _store, *store = &_store;
562   store->alloc = 0;
563   store->storage = NULL;
564 
565   TIM_SORT_RUN_T run_stack[128];
566   int stack_curr = 0;
567   int64_t len, run;
568   int64_t curr = 0;
569 
570   PUSH_NEXT();
571   PUSH_NEXT();
572   PUSH_NEXT();
573 
574   while (1)
575   {
576     if (!check_invariant(run_stack, stack_curr))
577     {
578       stack_curr = tim_sort_collapse(dst, run_stack, stack_curr, store, size);
579       continue;
580     }
581     PUSH_NEXT();
582   }
583 }
584 
585 
586 
587 /* heap sort: based on wikipedia */
588 
heap_sift_down(SORT_TYPE * dst,const int64_t start,const int64_t end)589 static inline void heap_sift_down(SORT_TYPE *dst, const int64_t start, const int64_t end)
590 {
591   int64_t root = start;
592 
593   while ((root << 1) <= end)
594   {
595     int64_t child = root << 1;
596     if ((child < end) && (SORT_CMP(dst[child], dst[child + 1]) < 0))
597       child++;
598     if (SORT_CMP(dst[root], dst[child]) < 0)
599     {
600       SORT_SWAP(dst[root], dst[child]);
601       root = child;
602     }
603     else
604       return;
605   }
606 }
607 
heapify(SORT_TYPE * dst,const size_t size)608 static inline void heapify(SORT_TYPE *dst, const size_t size)
609 {
610   int64_t start = size >> 1;
611   while (start >= 0)
612   {
613     heap_sift_down(dst, start, size - 1);
614     start--;
615   }
616 }
617 
HEAP_SORT(SORT_TYPE * dst,const size_t size)618 void HEAP_SORT(SORT_TYPE *dst, const size_t size)
619 {
620   heapify(dst, size);
621   int64_t end = size - 1;
622 
623   while (end > 0)
624   {
625     SORT_SWAP(dst[end], dst[0]);
626     heap_sift_down(dst, 0, end - 1);
627     end--;
628   }
629 }
630 
631 #undef SORT_CONCAT
632 #undef SORT_MAKE_STR1
633 #undef SORT_MAKE_STR
634 #undef SORT_NAME
635 
636 #undef TEMP_STORAGE_T
637 #undef TIM_SORT_RUN_T
638 #undef PUSH_NEXT
639