1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #ifndef _MSC_VER
5 # include <stdint.h>
6 #endif
7
8 #ifndef SORT_NAME
9 #error "Must declare SORT_NAME"
10 #endif
11
12 #ifndef SORT_TYPE
13 #error "Must declare SORT_TYPE"
14 #endif
15
16 #ifndef SORT_CMP
17 #define SORT_CMP(x, y) ((x) < (y) ? -1 : ((x) == (y) ? 0 : 1))
18 #endif
19
20
21 #ifndef CLZ
22 #ifdef __GNUC__
23 #define CLZ __builtin_clzll
24 #else
25
26 // adapted from Hacker's Delight
clzll(uint64_t x)27 int clzll(uint64_t x) {
28 int n;
29
30 if (x == 0) return(64);
31 n = 0;
32 if (x <= 0x00000000FFFFFFFFL) {n = n + 32; x = x << 32;}
33 if (x <= 0x0000FFFFFFFFFFFFL) {n = n + 16; x = x << 16;}
34 if (x <= 0x00FFFFFFFFFFFFFFL) {n = n + 8; x = x << 8;}
35 if (x <= 0x0FFFFFFFFFFFFFFFL) {n = n + 4; x = x << 4;}
36 if (x <= 0x3FFFFFFFFFFFFFFFL) {n = n + 2; x = x << 2;}
37 if (x <= 0x7FFFFFFFFFFFFFFFL) {n = n + 1;}
38 return n;
39 }
40
41 #define CLZ clzll
42 #endif
43 #endif
44
45
46 #define SORT_SWAP(x,y) {SORT_TYPE __SORT_SWAP_t = (x); (x) = (y); (y) = __SORT_SWAP_t;}
47
48 #define SORT_CONCAT(x, y) x ## _ ## y
49 #define SORT_MAKE_STR1(x, y) SORT_CONCAT(x,y)
50 #define SORT_MAKE_STR(x) SORT_MAKE_STR1(SORT_NAME,x)
51
52
53 #define SHELL_SORT SORT_MAKE_STR(shell_sort)
54 #define BINARY_INSERTION_SORT SORT_MAKE_STR(binary_insertion_sort)
55 #define HEAP_SORT SORT_MAKE_STR(heap_sort)
56 #define QUICK_SORT SORT_MAKE_STR(quick_sort)
57 #define MERGE_SORT SORT_MAKE_STR(merge_sort)
58 #define BUBBLE_SORT SORT_MAKE_STR(bubble_sort)
59 #define TIM_SORT SORT_MAKE_STR(tim_sort)
60
61 #define TIM_SORT_RUN_T SORT_MAKE_STR(tim_sort_run_t)
62 #define TEMP_STORAGE_T SORT_MAKE_STR(temp_storage_t)
63
64 #ifndef MAX
65 #define MAX(x,y) (((x) > (y) ? (x) : (y)))
66 #endif
67 #ifndef MIN
68 #define MIN(x,y) (((x) < (y) ? (x) : (y)))
69 #endif
70
71 typedef struct {
72 int64_t start;
73 int64_t length;
74 } TIM_SORT_RUN_T;
75
76
77 void SHELL_SORT(SORT_TYPE *dst, const size_t size);
78 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size);
79 void HEAP_SORT(SORT_TYPE *dst, const size_t size);
80 void QUICK_SORT(SORT_TYPE *dst, const size_t size);
81 void MERGE_SORT(SORT_TYPE *dst, const size_t size);
82 void BUBBLE_SORT(SORT_TYPE *dst, const size_t size);
83 void TIM_SORT(SORT_TYPE *dst, const size_t size);
84
85
86 /* From http://oeis.org/classic/A102549 */
87 static const uint64_t shell_gaps[48] = {1, 4, 10, 23, 57, 132, 301, 701, 1750, 4376, 10941, 27353, 68383, 170958, 427396, 1068491, 2671228, 6678071, 16695178, 41737946, 104344866, 260862166, 652155416, 1630388541, 4075971353LL, 10189928383LL, 25474820958LL, 63687052396LL, 159217630991LL, 398044077478LL, 995110193696LL, 2487775484241LL, 6219438710603LL, 15548596776508LL, 38871491941271LL, 97178729853178LL, 242946824632946LL, 607367061582366LL, 1518417653955916LL, 3796044134889791LL, 9490110337224478LL, 23725275843061196LL, 59313189607652991LL, 148282974019132478LL, 370707435047831196LL, 926768587619577991LL, 2316921469048944978LL, 5792303672622362446LL};
88
89 /* Shell sort implementation based on Wikipedia article
90 http://en.wikipedia.org/wiki/Shell_sort
91 */
SHELL_SORT(SORT_TYPE * dst,const size_t size)92 void SHELL_SORT(SORT_TYPE *dst, const size_t size)
93 {
94 // TODO: binary search to find first gap?
95 int inci = 47;
96 int64_t inc = shell_gaps[inci];
97 while (inc > int64_t(size >> 1))
98 {
99 inc = shell_gaps[--inci];
100 }
101 int64_t i;
102 while (1)
103 {
104 for (i = inc; i < int64_t(size); i++)
105 {
106 SORT_TYPE temp = dst[i];
107 int64_t j = i;
108 while ((j >= inc) && (SORT_CMP(dst[j - inc], temp) > 0))
109 {
110 dst[j] = dst[j - inc];
111 j -= inc;
112 }
113 dst[j] = temp;
114 }
115 if (inc == 1) break;
116 inc = shell_gaps[--inci];
117 }
118 }
119
120 /* Function used to do a binary search for binary insertion sort */
binary_insertion_find(SORT_TYPE * dst,const SORT_TYPE x,const size_t size)121 static inline int64_t binary_insertion_find(SORT_TYPE *dst, const SORT_TYPE x, const size_t size)
122 {
123 int64_t l, c, r;
124 l = 0;
125 r = size - 1;
126 c = r >> 1;
127 SORT_TYPE lx, cx, rx;
128 lx = dst[l];
129
130 /* check for beginning conditions */
131 if (SORT_CMP(x, lx) < 0)
132 return 0;
133 else if (SORT_CMP(x, lx) == 0)
134 {
135 int64_t i = 1;
136 while (SORT_CMP(x, dst[i]) == 0) i++;
137 return i;
138 }
139
140 rx = dst[r];
141 // guaranteed not to be >= rx
142 cx = dst[c];
143 while (1)
144 {
145 const int val = SORT_CMP(x, cx);
146 if (val < 0)
147 {
148 if (c - l <= 1) return c;
149 r = c;
150 rx = cx;
151 }
152 else if (val > 0)
153 {
154 if (r - c <= 1) return c + 1;
155 l = c;
156 lx = cx;
157 }
158 else
159 {
160 do
161 {
162 cx = dst[++c];
163 } while (SORT_CMP(x, cx) == 0);
164 return c;
165 }
166 c = l + ((r - l) >> 1);
167 cx = dst[c];
168 }
169 }
170
171 /* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
binary_insertion_sort_start(SORT_TYPE * dst,const size_t start,const size_t size)172 static inline void binary_insertion_sort_start(SORT_TYPE *dst, const size_t start, const size_t size)
173 {
174 int64_t i;
175 for (i = start; i < int64_t(size); i++)
176 {
177 int64_t j;
178 /* If this entry is already correct, just move along */
179 if (SORT_CMP(dst[i - 1], dst[i]) <= 0) continue;
180
181 /* Else we need to find the right place, shift everything over, and squeeze in */
182 SORT_TYPE x = dst[i];
183 int64_t location = binary_insertion_find(dst, x, i);
184 for (j = i - 1; j >= location; j--)
185 {
186 dst[j + 1] = dst[j];
187 }
188 dst[location] = x;
189 }
190 }
191
192 /* Binary insertion sort */
BINARY_INSERTION_SORT(SORT_TYPE * dst,const size_t size)193 void BINARY_INSERTION_SORT(SORT_TYPE *dst, const size_t size)
194 {
195 binary_insertion_sort_start(dst, 1, size);
196 }
197
198
BUBBLE_SORT(SORT_TYPE * dst,const size_t size)199 void BUBBLE_SORT(SORT_TYPE *dst, const size_t size)
200 {
201 int64_t i;
202 int64_t j;
203 for (i = 0; i < int64_t(size); i++)
204 {
205 for (j = i + 1; j < int64_t(size); j++)
206 {
207 if (SORT_CMP(dst[j], dst[i]) < 0)
208 SORT_SWAP(dst[i], dst[j]);
209 }
210 }
211 }
212
MERGE_SORT(SORT_TYPE * dst,const size_t size)213 void MERGE_SORT(SORT_TYPE *dst, const size_t size)
214 {
215 if (size < 16)
216 {
217 BINARY_INSERTION_SORT(dst, size);
218 return;
219 }
220
221 const int64_t middle = size / 2;
222
223 MERGE_SORT(dst, middle);
224 MERGE_SORT(&dst[middle], size - middle);
225
226 #ifdef _MSC_VER
227 SORT_TYPE* newdst = (SORT_TYPE*)malloc(size * sizeof(SORT_TYPE));
228 #else
229 SORT_TYPE newdst[size];
230 #endif
231 int64_t out = 0;
232 int64_t i = 0;
233 int64_t j = middle;
234 while (out != size)
235 {
236 if (i < middle)
237 {
238 if (j < int64_t(size))
239 {
240 if (SORT_CMP(dst[i], dst[j]) <= 0)
241 newdst[out] = dst[i++];
242 else
243 newdst[out] = dst[j++];
244 }
245 else
246 newdst[out] = dst[i++];
247 }
248 else
249 newdst[out] = dst[j++];
250 out++;
251 }
252 memcpy(dst, newdst, size * sizeof(SORT_TYPE));
253
254 #ifdef _MSC_VER
255 free(newdst);
256 #endif
257 }
258
259
260 /* quick sort: based on wikipedia */
261
quick_sort_partition(SORT_TYPE * dst,const int64_t left,const int64_t right,const int64_t pivot)262 static inline int64_t quick_sort_partition(SORT_TYPE *dst, const int64_t left, const int64_t right, const int64_t pivot)
263 {
264 SORT_TYPE value = dst[pivot];
265 SORT_SWAP(dst[pivot], dst[right]);
266 int64_t index = left;
267 int64_t i;
268 for (i = left; i < right; i++)
269 {
270 if (SORT_CMP(dst[i], value) <= 0)
271 {
272 SORT_SWAP(dst[i], dst[index]);
273 index++;
274 }
275 }
276 SORT_SWAP(dst[right], dst[index]);
277 return index;
278 }
279
quick_sort_recursive(SORT_TYPE * dst,const int64_t left,const int64_t right)280 static void quick_sort_recursive(SORT_TYPE *dst, const int64_t left, const int64_t right)
281 {
282 if (right <= left) return;
283 if ((right - left + 1) < 16)
284 {
285 BINARY_INSERTION_SORT(&dst[left], right - left + 1);
286 return;
287 }
288 const int64_t pivot = left + ((right - left) >> 1);
289 const int64_t new_pivot = quick_sort_partition(dst, left, right, pivot);
290 quick_sort_recursive(dst, left, new_pivot - 1);
291 quick_sort_recursive(dst, new_pivot + 1, right);
292 }
293
QUICK_SORT(SORT_TYPE * dst,const size_t size)294 void QUICK_SORT(SORT_TYPE *dst, const size_t size)
295 {
296 quick_sort_recursive(dst, 0, size - 1);
297 }
298
299
300 /* timsort implementation, based on timsort.txt */
301
reverse_elements(SORT_TYPE * dst,int64_t start,int64_t end)302 static inline void reverse_elements(SORT_TYPE *dst, int64_t start, int64_t end)
303 {
304 while (1)
305 {
306 if (start >= end) return;
307 SORT_SWAP(dst[start], dst[end]);
308 start++;
309 end--;
310 }
311 }
312
count_run(SORT_TYPE * dst,const int64_t start,const size_t size)313 static inline int64_t count_run(SORT_TYPE *dst, const int64_t start, const size_t size)
314 {
315 if (size - start == 1) return 1;
316 if (start >= int64_t(size) - 2)
317 {
318 if (SORT_CMP(dst[size - 2], dst[size - 1]) > 0)
319 SORT_SWAP(dst[size - 2], dst[size - 1]);
320 return 2;
321 }
322
323 int64_t curr = start + 2;
324
325 if (SORT_CMP(dst[start], dst[start + 1]) <= 0)
326 {
327 // increasing run
328 while (1)
329 {
330 if (curr == size - 1) break;
331 if (SORT_CMP(dst[curr - 1], dst[curr]) > 0) break;
332 curr++;
333 }
334 return curr - start;
335 }
336 else
337 {
338 // decreasing run
339 while (1)
340 {
341 if (curr == size - 1) break;
342 if (SORT_CMP(dst[curr - 1], dst[curr]) <= 0) break;
343 curr++;
344 }
345 // reverse in-place
346 reverse_elements(dst, start, curr - 1);
347 return curr - start;
348 }
349 }
350
compute_minrun(const uint64_t size)351 static inline int compute_minrun(const uint64_t size)
352 {
353 const int top_bit = 64 - CLZ(size);
354 const int shift = MAX(top_bit, 6) - 6;
355 const int minrun = size >> shift;
356 const uint64_t mask = (1ULL << shift) - 1;
357 if (mask & size) return minrun + 1;
358 return minrun;
359 }
360
361 #define PUSH_NEXT() do {\
362 len = count_run(dst, curr, size);\
363 run = minrun;\
364 if (run < minrun) run = minrun;\
365 if (run > int64_t(size) - curr) run = size - curr;\
366 if (run > len)\
367 {\
368 binary_insertion_sort_start(&dst[curr], len, run);\
369 len = run;\
370 }\
371 run_stack[stack_curr].start = curr;\
372 run_stack[stack_curr++].length = len;\
373 curr += len;\
374 if (curr == size)\
375 {\
376 /* finish up */ \
377 while (stack_curr > 1) \
378 { \
379 tim_sort_merge(dst, run_stack, stack_curr, store); \
380 run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \
381 stack_curr--; \
382 } \
383 if (store->storage != NULL)\
384 {\
385 free(store->storage);\
386 store->storage = NULL;\
387 }\
388 return;\
389 }\
390 }\
391 while (0)
392
check_invariant(TIM_SORT_RUN_T * stack,const int stack_curr)393 static inline int check_invariant(TIM_SORT_RUN_T *stack, const int stack_curr)
394 {
395 if (stack_curr < 2) return 1;
396 if (stack_curr == 2)
397 {
398 const int64_t A = stack[stack_curr - 2].length;
399 const int64_t B = stack[stack_curr - 1].length;
400 if (A <= B) return 0;
401 return 1;
402 }
403 const int64_t A = stack[stack_curr - 3].length;
404 const int64_t B = stack[stack_curr - 2].length;
405 const int64_t C = stack[stack_curr - 1].length;
406 if ((A <= B + C) || (B <= C)) return 0;
407 return 1;
408 }
409
410 typedef struct {
411 size_t alloc;
412 SORT_TYPE *storage;
413 } TEMP_STORAGE_T;
414
415
tim_sort_resize(TEMP_STORAGE_T * store,const size_t new_size)416 static inline void tim_sort_resize(TEMP_STORAGE_T *store, const size_t new_size)
417 {
418 if (store->alloc < new_size)
419 {
420 SORT_TYPE *tempstore = (SORT_TYPE*)realloc(store->storage, new_size * sizeof(SORT_TYPE));
421 if (tempstore == NULL)
422 {
423 fprintf(stderr, "Error allocating temporary storage for tim sort: need %zu bytes", sizeof(SORT_TYPE) * new_size);
424 exit(1);
425 }
426 store->storage = tempstore;
427 store->alloc = new_size;
428 }
429 }
430
tim_sort_merge(SORT_TYPE * dst,const TIM_SORT_RUN_T * stack,const int stack_curr,TEMP_STORAGE_T * store)431 static inline void tim_sort_merge(SORT_TYPE *dst, const TIM_SORT_RUN_T *stack, const int stack_curr, TEMP_STORAGE_T *store)
432 {
433 const int64_t A = stack[stack_curr - 2].length;
434 const int64_t B = stack[stack_curr - 1].length;
435 const int64_t curr = stack[stack_curr - 2].start;
436
437 tim_sort_resize(store, MIN(A, B));
438 SORT_TYPE *storage = store->storage;
439
440 int64_t i, j, k;
441
442 // left merge
443 if (A < B)
444 {
445 memcpy(storage, &dst[curr], A * sizeof(SORT_TYPE));
446 i = 0;
447 j = curr + A;
448
449 for (k = curr; k < curr + A + B; k++)
450 {
451 if ((i < A) && (j < curr + A + B))
452 {
453 if (SORT_CMP(storage[i], dst[j]) <= 0)
454 dst[k] = storage[i++];
455 else
456 dst[k] = dst[j++];
457 }
458 else if (i < A)
459 {
460 dst[k] = storage[i++];
461 }
462 else
463 dst[k] = dst[j++];
464 }
465 }
466 // right merge
467 else
468 {
469 memcpy(storage, &dst[curr + A], B * sizeof(SORT_TYPE));
470 i = B - 1;
471 j = curr + A - 1;
472
473 for (k = curr + A + B - 1; k >= curr; k--)
474 {
475 if ((i >= 0) && (j >= curr))
476 {
477 if (SORT_CMP(dst[j], storage[i]) > 0)
478 dst[k] = dst[j--];
479 else
480 dst[k] = storage[i--];
481 }
482 else if (i >= 0)
483 dst[k] = storage[i--];
484 else
485 dst[k] = dst[j--];
486 }
487 }
488 }
489
tim_sort_collapse(SORT_TYPE * dst,TIM_SORT_RUN_T * stack,int stack_curr,TEMP_STORAGE_T * store,const size_t size)490 static inline int tim_sort_collapse(SORT_TYPE *dst, TIM_SORT_RUN_T *stack, int stack_curr, TEMP_STORAGE_T *store, const size_t size)
491 {
492 while (1)
493 {
494 // if the stack only has one thing on it, we are done with the collapse
495 if (stack_curr <= 1) break;
496 // if this is the last merge, just do it
497 if ((stack_curr == 2) && (stack[0].length + stack[1].length == size))
498 {
499 tim_sort_merge(dst, stack, stack_curr, store);
500 stack[0].length += stack[1].length;
501 stack_curr--;
502 break;
503 }
504 // check if the invariant is off for a stack of 2 elements
505 else if ((stack_curr == 2) && (stack[0].length <= stack[1].length))
506 {
507 tim_sort_merge(dst, stack, stack_curr, store);
508 stack[0].length += stack[1].length;
509 stack_curr--;
510 break;
511 }
512 else if (stack_curr == 2)
513 break;
514
515 const int64_t A = stack[stack_curr - 3].length;
516 const int64_t B = stack[stack_curr - 2].length;
517 const int64_t C = stack[stack_curr - 1].length;
518
519 // check first invariant
520 if (A <= B + C)
521 {
522 if (A < C)
523 {
524 tim_sort_merge(dst, stack, stack_curr - 1, store);
525 stack[stack_curr - 3].length += stack[stack_curr - 2].length;
526 stack[stack_curr - 2] = stack[stack_curr - 1];
527 stack_curr--;
528 }
529 else
530 {
531 tim_sort_merge(dst, stack, stack_curr, store);
532 stack[stack_curr - 2].length += stack[stack_curr - 1].length;
533 stack_curr--;
534 }
535 }
536 // check second invariant
537 else if (B <= C)
538 {
539 tim_sort_merge(dst, stack, stack_curr, store);
540 stack[stack_curr - 2].length += stack[stack_curr - 1].length;
541 stack_curr--;
542 }
543 else
544 break;
545 }
546 return stack_curr;
547 }
548
TIM_SORT(SORT_TYPE * dst,const size_t size)549 void TIM_SORT(SORT_TYPE *dst, const size_t size)
550 {
551 if (size < 64)
552 {
553 BINARY_INSERTION_SORT(dst, size);
554 return;
555 }
556
557 // compute the minimum run length
558 const int minrun = compute_minrun(size);
559
560 // temporary storage for merges
561 TEMP_STORAGE_T _store, *store = &_store;
562 store->alloc = 0;
563 store->storage = NULL;
564
565 TIM_SORT_RUN_T run_stack[128];
566 int stack_curr = 0;
567 int64_t len, run;
568 int64_t curr = 0;
569
570 PUSH_NEXT();
571 PUSH_NEXT();
572 PUSH_NEXT();
573
574 while (1)
575 {
576 if (!check_invariant(run_stack, stack_curr))
577 {
578 stack_curr = tim_sort_collapse(dst, run_stack, stack_curr, store, size);
579 continue;
580 }
581 PUSH_NEXT();
582 }
583 }
584
585
586
587 /* heap sort: based on wikipedia */
588
heap_sift_down(SORT_TYPE * dst,const int64_t start,const int64_t end)589 static inline void heap_sift_down(SORT_TYPE *dst, const int64_t start, const int64_t end)
590 {
591 int64_t root = start;
592
593 while ((root << 1) <= end)
594 {
595 int64_t child = root << 1;
596 if ((child < end) && (SORT_CMP(dst[child], dst[child + 1]) < 0))
597 child++;
598 if (SORT_CMP(dst[root], dst[child]) < 0)
599 {
600 SORT_SWAP(dst[root], dst[child]);
601 root = child;
602 }
603 else
604 return;
605 }
606 }
607
heapify(SORT_TYPE * dst,const size_t size)608 static inline void heapify(SORT_TYPE *dst, const size_t size)
609 {
610 int64_t start = size >> 1;
611 while (start >= 0)
612 {
613 heap_sift_down(dst, start, size - 1);
614 start--;
615 }
616 }
617
HEAP_SORT(SORT_TYPE * dst,const size_t size)618 void HEAP_SORT(SORT_TYPE *dst, const size_t size)
619 {
620 heapify(dst, size);
621 int64_t end = size - 1;
622
623 while (end > 0)
624 {
625 SORT_SWAP(dst[end], dst[0]);
626 heap_sift_down(dst, 0, end - 1);
627 end--;
628 }
629 }
630
631 #undef SORT_CONCAT
632 #undef SORT_MAKE_STR1
633 #undef SORT_MAKE_STR
634 #undef SORT_NAME
635
636 #undef TEMP_STORAGE_T
637 #undef TIM_SORT_RUN_T
638 #undef PUSH_NEXT
639