1 /*
2  * The code of this file was taken from http://jeffreystedfast.blogspot.be,
3  * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
4  * The MIT license text is as follows:
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include <errno.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <isl_sort.h>
29 
30 #define MID(lo, hi) (lo + ((hi - lo) >> 1))
31 
32 /* The code here is an optimized merge sort. Starting from a generic merge sort
33  * the following optimizations were applied:
34  *
35  * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
36  *   every element into a temporary buffer, blocks of elements are copied
37  *   at a time.
38  *
39  * o To reduce the number of memcpy() calls further, copying leading
40  *   and trailing elements into our temporary buffer is avoided, in case it is
41  *   not necessary to merge them.
42  *
43  * A further optimization could be to specialize memcpy calls based on the
44  * size of the types we compare. For now, this code does not include the
45  * relevant optimization, as clang e.g. inlines a very efficient memcpy()
46  * implementation. It is not clear, that the specialized version as provided in
47  * the blog post, is really superior to the one that will be inlined by
48  * default. So we decided to keep the code simple until this optimization was
49  * proven to be beneficial.
50  */
51 
52 static void
msort(void * array,void * buf,size_t low,size_t high,size_t size,int (* compare)(const void *,const void *,void *),void * arg)53 msort (void *array, void *buf, size_t low, size_t high, size_t size,
54        int (* compare) (const void *, const void *, void *), void *arg)
55 {
56     char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
57     size_t copied = 0;
58     size_t mid;
59 
60     mid = MID (low, high);
61 
62     if (mid + 1 < high)
63         msort (array, buf, mid + 1, high, size, compare, arg);
64 
65     if (mid > low)
66         msort (array, buf, low, mid, size, compare, arg);
67 
68     ah = ((char *) array) + ((high + 1) * size);
69     am = ((char *) array) + ((mid + 1) * size);
70     a1 = al = ((char *) array) + (low * size);
71 
72     b = (char *) buf;
73     lo = al;
74     hi = am;
75 
76     do {
77         ls = lo;
78         hs = hi;
79 
80         if (lo > al || hi > am) {
81             /* our last loop already compared lo & hi and found lo <= hi */
82             lo += size;
83         }
84 
85         while (lo < am && compare (lo, hi, arg) <= 0)
86             lo += size;
87 
88         if (lo < am) {
89             if (copied == 0) {
90                 /* avoid copying the leading items */
91                 a1 = lo;
92                 ls = lo;
93             }
94 
95             /* our last compare tells us hi < lo */
96             hi += size;
97 
98             while (hi < ah && compare (hi, lo, arg) < 0)
99                 hi += size;
100 
101             if (lo > ls) {
102                 memcpy (b, ls, lo - ls);
103                 copied += (lo - ls);
104                 b += (lo - ls);
105             }
106 
107             memcpy (b, hs, hi - hs);
108             copied += (hi - hs);
109             b += (hi - hs);
110         } else if (copied) {
111             memcpy (b, ls, lo - ls);
112             copied += (lo - ls);
113             b += (lo - ls);
114 
115             /* copy everything we needed to re-order back into array */
116             memcpy (a1, buf, copied);
117             return;
118         } else {
119             /* everything already in order */
120             return;
121         }
122     } while (hi < ah);
123 
124     if (lo < am) {
125         memcpy (b, lo, am - lo);
126         copied += (am - lo);
127     }
128 
129     memcpy (a1, buf, copied);
130 }
131 
132 static int
MergeSort(void * base,size_t nmemb,size_t size,int (* compare)(const void *,const void *,void *),void * arg)133 MergeSort (void *base, size_t nmemb, size_t size,
134            int (* compare) (const void *, const void *, void *), void *arg)
135 {
136     void *tmp;
137 
138     if (nmemb < 2)
139         return 0;
140 
141     if (!(tmp = malloc (nmemb * size))) {
142         errno = ENOMEM;
143         return -1;
144     }
145 
146     msort (base, tmp, 0, nmemb - 1, size, compare, arg);
147 
148     free (tmp);
149 
150     return 0;
151 }
152 
isl_sort(void * const pbase,size_t total_elems,size_t size,int (* cmp)(const void *,const void *,void * arg),void * arg)153 int isl_sort(void *const pbase, size_t total_elems, size_t size,
154 	int (*cmp)(const void *, const void *, void *arg), void *arg)
155 {
156     return MergeSort (pbase, total_elems, size, cmp, arg);
157 }
158