1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 
8 #include "common.h"
9 
10 /**
11  * An array-of-pointers implementation of Python's Timsort
12  * Based on code by Christopher Swenson under the MIT license
13  *
14  * Copyright (c) 2010 Christopher Swenson
15  * Copyright (c) 2011 Vicent Marti
16  */
17 
18 #ifndef MAX
19 #	define MAX(x,y) (((x) > (y) ? (x) : (y)))
20 #endif
21 
22 #ifndef MIN
23 #	define MIN(x,y) (((x) < (y) ? (x) : (y)))
24 #endif
25 
binsearch(void ** dst,const void * x,size_t size,git__sort_r_cmp cmp,void * payload)26 static int binsearch(
27 	void **dst, const void *x, size_t size, git__sort_r_cmp cmp, void *payload)
28 {
29 	int l, c, r;
30 	void *lx, *cx;
31 
32 	l = 0;
33 	r = (int)size - 1;
34 	c = r >> 1;
35 	lx = dst[l];
36 
37 	/* check for beginning conditions */
38 	if (cmp(x, lx, payload) < 0)
39 		return 0;
40 
41 	else if (cmp(x, lx, payload) == 0) {
42 		int i = 1;
43 		while (cmp(x, dst[i], payload) == 0)
44 			i++;
45 		return i;
46 	}
47 
48 	/* guaranteed not to be >= rx */
49 	cx = dst[c];
50 	while (1) {
51 		const int val = cmp(x, cx, payload);
52 		if (val < 0) {
53 			if (c - l <= 1) return c;
54 			r = c;
55 		} else if (val > 0) {
56 			if (r - c <= 1) return c + 1;
57 			l = c;
58 			lx = cx;
59 		} else {
60 			do {
61 				cx = dst[++c];
62 			} while (cmp(x, cx, payload) == 0);
63 			return c;
64 		}
65 		c = l + ((r - l) >> 1);
66 		cx = dst[c];
67 	}
68 }
69 
70 /* Binary insertion sort, but knowing that the first "start" entries are sorted. Used in timsort. */
bisort(void ** dst,size_t start,size_t size,git__sort_r_cmp cmp,void * payload)71 static void bisort(
72 	void **dst, size_t start, size_t size, git__sort_r_cmp cmp, void *payload)
73 {
74 	size_t i;
75 	void *x;
76 	int location;
77 
78 	for (i = start; i < size; i++) {
79 		int j;
80 		/* If this entry is already correct, just move along */
81 		if (cmp(dst[i - 1], dst[i], payload) <= 0)
82 			continue;
83 
84 		/* Else we need to find the right place, shift everything over, and squeeze in */
85 		x = dst[i];
86 		location = binsearch(dst, x, i, cmp, payload);
87 		for (j = (int)i - 1; j >= location; j--) {
88 			dst[j + 1] = dst[j];
89 		}
90 		dst[location] = x;
91 	}
92 }
93 
94 
95 /* timsort implementation, based on timsort.txt */
96 struct tsort_run {
97 	ssize_t start;
98 	ssize_t length;
99 };
100 
101 struct tsort_store {
102 	size_t alloc;
103 	git__sort_r_cmp cmp;
104 	void *payload;
105 	void **storage;
106 };
107 
reverse_elements(void ** dst,ssize_t start,ssize_t end)108 static void reverse_elements(void **dst, ssize_t start, ssize_t end)
109 {
110 	while (start < end) {
111 		void *tmp = dst[start];
112 		dst[start] = dst[end];
113 		dst[end] = tmp;
114 
115 		start++;
116 		end--;
117 	}
118 }
119 
count_run(void ** dst,ssize_t start,ssize_t size,struct tsort_store * store)120 static ssize_t count_run(
121 	void **dst, ssize_t start, ssize_t size, struct tsort_store *store)
122 {
123 	ssize_t curr = start + 2;
124 
125 	if (size - start == 1)
126 		return 1;
127 
128 	if (start >= size - 2) {
129 		if (store->cmp(dst[size - 2], dst[size - 1], store->payload) > 0) {
130 			void *tmp = dst[size - 1];
131 			dst[size - 1] = dst[size - 2];
132 			dst[size - 2] = tmp;
133 		}
134 
135 		return 2;
136 	}
137 
138 	if (store->cmp(dst[start], dst[start + 1], store->payload) <= 0) {
139 		while (curr < size - 1 &&
140 				store->cmp(dst[curr - 1], dst[curr], store->payload) <= 0)
141 			curr++;
142 
143 		return curr - start;
144 	} else {
145 		while (curr < size - 1 &&
146 				store->cmp(dst[curr - 1], dst[curr], store->payload) > 0)
147 			curr++;
148 
149 		/* reverse in-place */
150 		reverse_elements(dst, start, curr - 1);
151 		return curr - start;
152 	}
153 }
154 
compute_minrun(size_t n)155 static size_t compute_minrun(size_t n)
156 {
157 	int r = 0;
158 	while (n >= 64) {
159 		r |= n & 1;
160 		n >>= 1;
161 	}
162 	return n + r;
163 }
164 
check_invariant(struct tsort_run * stack,ssize_t stack_curr)165 static int check_invariant(struct tsort_run *stack, ssize_t stack_curr)
166 {
167 	if (stack_curr < 2)
168 		return 1;
169 
170 	else if (stack_curr == 2) {
171 		const ssize_t A = stack[stack_curr - 2].length;
172 		const ssize_t B = stack[stack_curr - 1].length;
173 		return (A > B);
174 	} else {
175 		const ssize_t A = stack[stack_curr - 3].length;
176 		const ssize_t B = stack[stack_curr - 2].length;
177 		const ssize_t C = stack[stack_curr - 1].length;
178 		return !((A <= B + C) || (B <= C));
179 	}
180 }
181 
resize(struct tsort_store * store,size_t new_size)182 static int resize(struct tsort_store *store, size_t new_size)
183 {
184 	if (store->alloc < new_size) {
185 		void **tempstore;
186 
187 		tempstore = git__reallocarray(store->storage, new_size, sizeof(void *));
188 
189 		/**
190 		 * Do not propagate on OOM; this will abort the sort and
191 		 * leave the array unsorted, but no error code will be
192 		 * raised
193 		 */
194 		if (tempstore == NULL)
195 			return -1;
196 
197 		store->storage = tempstore;
198 		store->alloc = new_size;
199 	}
200 
201 	return 0;
202 }
203 
merge(void ** dst,const struct tsort_run * stack,ssize_t stack_curr,struct tsort_store * store)204 static void merge(void **dst, const struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store)
205 {
206 	const ssize_t A = stack[stack_curr - 2].length;
207 	const ssize_t B = stack[stack_curr - 1].length;
208 	const ssize_t curr = stack[stack_curr - 2].start;
209 
210 	void **storage;
211 	ssize_t i, j, k;
212 
213 	if (resize(store, MIN(A, B)) < 0)
214 		return;
215 
216 	storage = store->storage;
217 
218 	/* left merge */
219 	if (A < B) {
220 		memcpy(storage, &dst[curr], A * sizeof(void *));
221 		i = 0;
222 		j = curr + A;
223 
224 		for (k = curr; k < curr + A + B; k++) {
225 			if ((i < A) && (j < curr + A + B)) {
226 				if (store->cmp(storage[i], dst[j], store->payload) <= 0)
227 					dst[k] = storage[i++];
228 				else
229 					dst[k] = dst[j++];
230 			} else if (i < A) {
231 				dst[k] = storage[i++];
232 			} else
233 				dst[k] = dst[j++];
234 		}
235 	} else {
236 		memcpy(storage, &dst[curr + A], B * sizeof(void *));
237 		i = B - 1;
238 		j = curr + A - 1;
239 
240 		for (k = curr + A + B - 1; k >= curr; k--) {
241 			if ((i >= 0) && (j >= curr)) {
242 				if (store->cmp(dst[j], storage[i], store->payload) > 0)
243 					dst[k] = dst[j--];
244 				else
245 					dst[k] = storage[i--];
246 			} else if (i >= 0)
247 				dst[k] = storage[i--];
248 			else
249 				dst[k] = dst[j--];
250 		}
251 	}
252 }
253 
collapse(void ** dst,struct tsort_run * stack,ssize_t stack_curr,struct tsort_store * store,ssize_t size)254 static ssize_t collapse(void **dst, struct tsort_run *stack, ssize_t stack_curr, struct tsort_store *store, ssize_t size)
255 {
256 	ssize_t A, B, C;
257 
258 	while (1) {
259 		/* if the stack only has one thing on it, we are done with the collapse */
260 		if (stack_curr <= 1)
261 			break;
262 
263 		/* if this is the last merge, just do it */
264 		if ((stack_curr == 2) && (stack[0].length + stack[1].length == size)) {
265 			merge(dst, stack, stack_curr, store);
266 			stack[0].length += stack[1].length;
267 			stack_curr--;
268 			break;
269 		}
270 
271 		/* check if the invariant is off for a stack of 2 elements */
272 		else if ((stack_curr == 2) && (stack[0].length <= stack[1].length)) {
273 			merge(dst, stack, stack_curr, store);
274 			stack[0].length += stack[1].length;
275 			stack_curr--;
276 			break;
277 		}
278 		else if (stack_curr == 2)
279 			break;
280 
281 		A = stack[stack_curr - 3].length;
282 		B = stack[stack_curr - 2].length;
283 		C = stack[stack_curr - 1].length;
284 
285 		/* check first invariant */
286 		if (A <= B + C) {
287 			if (A < C) {
288 				merge(dst, stack, stack_curr - 1, store);
289 				stack[stack_curr - 3].length += stack[stack_curr - 2].length;
290 				stack[stack_curr - 2] = stack[stack_curr - 1];
291 				stack_curr--;
292 			} else {
293 				merge(dst, stack, stack_curr, store);
294 				stack[stack_curr - 2].length += stack[stack_curr - 1].length;
295 				stack_curr--;
296 			}
297 		} else if (B <= C) {
298 			merge(dst, stack, stack_curr, store);
299 			stack[stack_curr - 2].length += stack[stack_curr - 1].length;
300 			stack_curr--;
301 		} else
302 			break;
303 	}
304 
305 	return stack_curr;
306 }
307 
308 #define PUSH_NEXT() do {\
309 	len = count_run(dst, curr, size, store);\
310 	run = minrun;\
311 	if (run > (ssize_t)size - curr) run = size - curr;\
312 	if (run > len) {\
313 		bisort(&dst[curr], len, run, cmp, payload);\
314 		len = run;\
315 	}\
316 	run_stack[stack_curr].start = curr;\
317 	run_stack[stack_curr++].length = len;\
318 	curr += len;\
319 	if (curr == (ssize_t)size) {\
320 		/* finish up */ \
321 		while (stack_curr > 1) { \
322 			merge(dst, run_stack, stack_curr, store); \
323 			run_stack[stack_curr - 2].length += run_stack[stack_curr - 1].length; \
324 			stack_curr--; \
325 		} \
326 		if (store->storage != NULL) {\
327 			git__free(store->storage);\
328 			store->storage = NULL;\
329 		}\
330 		return;\
331 	}\
332 }\
333 while (0)
334 
git__tsort_r(void ** dst,size_t size,git__sort_r_cmp cmp,void * payload)335 void git__tsort_r(
336 	void **dst, size_t size, git__sort_r_cmp cmp, void *payload)
337 {
338 	struct tsort_store _store, *store = &_store;
339 	struct tsort_run run_stack[128];
340 
341 	ssize_t stack_curr = 0;
342 	ssize_t len, run;
343 	ssize_t curr = 0;
344 	ssize_t minrun;
345 
346 	if (size < 64) {
347 		bisort(dst, 1, size, cmp, payload);
348 		return;
349 	}
350 
351 	/* compute the minimum run length */
352 	minrun = (ssize_t)compute_minrun(size);
353 
354 	/* temporary storage for merges */
355 	store->alloc = 0;
356 	store->storage = NULL;
357 	store->cmp = cmp;
358 	store->payload = payload;
359 
360 	PUSH_NEXT();
361 	PUSH_NEXT();
362 	PUSH_NEXT();
363 
364 	while (1) {
365 		if (!check_invariant(run_stack, stack_curr)) {
366 			stack_curr = collapse(dst, run_stack, stack_curr, store, size);
367 			continue;
368 		}
369 
370 		PUSH_NEXT();
371 	}
372 }
373 
tsort_r_cmp(const void * a,const void * b,void * payload)374 static int tsort_r_cmp(const void *a, const void *b, void *payload)
375 {
376 	return ((git__tsort_cmp)payload)(a, b);
377 }
378 
git__tsort(void ** dst,size_t size,git__tsort_cmp cmp)379 void git__tsort(void **dst, size_t size, git__tsort_cmp cmp)
380 {
381 	git__tsort_r(dst, size, tsort_r_cmp, cmp);
382 }
383