xref: /openbsd/usr.bin/sort/radixsort.c (revision f0a85c33)
1 /*	$OpenBSD: radixsort.c,v 1.5 2015/04/02 21:00:08 tobias Exp $	*/
2 
3 /*-
4  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5  * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <errno.h>
31 #include <err.h>
32 #include <langinfo.h>
33 #include <math.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <wchar.h>
37 #include <wctype.h>
38 #include <unistd.h>
39 
40 #include "coll.h"
41 #include "radixsort.h"
42 
43 #define DEFAULT_SORT_FUNC_RADIXSORT mergesort
44 
45 #define TINY_NODE(sl) ((sl)->tosort_num < 65)
46 #define SMALL_NODE(sl) ((sl)->tosort_num < 5)
47 
48 /* are we sorting in reverse order ? */
49 static bool reverse_sort;
50 
51 /* sort sub-levels array size */
52 static const size_t slsz = 256 * sizeof(struct sort_level *);
53 
54 /* one sort level structure */
55 struct sort_level {
56 	struct sort_level	**sublevels;
57 	struct sort_list_item	**leaves;
58 	struct sort_list_item	**sorted;
59 	struct sort_list_item	**tosort;
60 	size_t			  leaves_num;
61 	size_t			  leaves_sz;
62 	size_t			  level;
63 	size_t			  real_sln;
64 	size_t			  start_position;
65 	size_t			  sln;
66 	size_t			  tosort_num;
67 	size_t			  tosort_sz;
68 };
69 
70 /* stack of sort levels ready to be sorted */
71 struct level_stack {
72 	struct level_stack	 *next;
73 	struct sort_level	 *sl;
74 };
75 
76 static struct level_stack *g_ls;
77 
78 /*
79  * Push sort level to the stack
80  */
81 static inline void
push_ls(struct sort_level * sl)82 push_ls(struct sort_level *sl)
83 {
84 	struct level_stack *new_ls;
85 
86 	new_ls = sort_malloc(sizeof(struct level_stack));
87 	new_ls->sl = sl;
88 
89 	new_ls->next = g_ls;
90 	g_ls = new_ls;
91 }
92 
93 /*
94  * Pop sort level from the stack (single-threaded style)
95  */
96 static inline struct sort_level *
pop_ls_st(void)97 pop_ls_st(void)
98 {
99 	struct sort_level *sl;
100 
101 	if (g_ls) {
102 		struct level_stack *saved_ls;
103 
104 		sl = g_ls->sl;
105 		saved_ls = g_ls;
106 		g_ls = g_ls->next;
107 		sort_free(saved_ls);
108 	} else
109 		sl = NULL;
110 
111 	return sl;
112 }
113 
114 static void
add_to_sublevel(struct sort_level * sl,struct sort_list_item * item,size_t indx)115 add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx)
116 {
117 	struct sort_level *ssl;
118 
119 	ssl = sl->sublevels[indx];
120 
121 	if (ssl == NULL) {
122 		ssl = sort_calloc(1, sizeof(struct sort_level));
123 		ssl->level = sl->level + 1;
124 		sl->sublevels[indx] = ssl;
125 
126 		++(sl->real_sln);
127 	}
128 
129 	if (++(ssl->tosort_num) > ssl->tosort_sz) {
130 		ssl->tosort_sz = ssl->tosort_num + 128;
131 		ssl->tosort = sort_reallocarray(ssl->tosort, ssl->tosort_sz,
132 		    sizeof(struct sort_list_item *));
133 	}
134 
135 	ssl->tosort[ssl->tosort_num - 1] = item;
136 }
137 
138 static inline void
add_leaf(struct sort_level * sl,struct sort_list_item * item)139 add_leaf(struct sort_level *sl, struct sort_list_item *item)
140 {
141 	if (++(sl->leaves_num) > sl->leaves_sz) {
142 		sl->leaves_sz = sl->leaves_num + 128;
143 		sl->leaves = sort_reallocarray(sl->leaves, sl->leaves_sz,
144 		    sizeof(struct sort_list_item *));
145 	}
146 	sl->leaves[sl->leaves_num - 1] = item;
147 }
148 
149 static inline int
get_wc_index(struct sort_list_item * sli,size_t level)150 get_wc_index(struct sort_list_item *sli, size_t level)
151 {
152 	const struct bwstring *bws;
153 
154 	bws = sli->ka.key[0].k;
155 
156 	if ((BWSLEN(bws) > level))
157 		return (unsigned char)BWS_GET(bws, level);
158 	return -1;
159 }
160 
161 static void
place_item(struct sort_level * sl,size_t item)162 place_item(struct sort_level *sl, size_t item)
163 {
164 	struct sort_list_item *sli;
165 	int c;
166 
167 	sli = sl->tosort[item];
168 	c = get_wc_index(sli, sl->level);
169 
170 	if (c == -1)
171 		add_leaf(sl, sli);
172 	else
173 		add_to_sublevel(sl, sli, c);
174 }
175 
176 static void
free_sort_level(struct sort_level * sl)177 free_sort_level(struct sort_level *sl)
178 {
179 	if (sl) {
180 		sort_free(sl->leaves);
181 
182 		if (sl->level > 0)
183 			sort_free(sl->tosort);
184 
185 		if (sl->sublevels) {
186 			struct sort_level *slc;
187 			size_t i, sln;
188 
189 			sln = sl->sln;
190 
191 			for (i = 0; i < sln; ++i) {
192 				slc = sl->sublevels[i];
193 				free_sort_level(slc);
194 			}
195 
196 			sort_free(sl->sublevels);
197 		}
198 
199 		sort_free(sl);
200 	}
201 }
202 
203 static void
run_sort_level_next(struct sort_level * sl)204 run_sort_level_next(struct sort_level *sl)
205 {
206 	struct sort_level *slc;
207 	size_t i, sln, tosort_num;
208 
209 	sort_free(sl->sublevels);
210 	sl->sublevels = NULL;
211 
212 	switch (sl->tosort_num) {
213 	case 0:
214 		goto end;
215 	case 1:
216 		sl->sorted[sl->start_position] = sl->tosort[0];
217 		goto end;
218 	case 2:
219 		if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
220 		    sl->level) > 0) {
221 			sl->sorted[sl->start_position++] = sl->tosort[1];
222 			sl->sorted[sl->start_position] = sl->tosort[0];
223 		} else {
224 			sl->sorted[sl->start_position++] = sl->tosort[0];
225 			sl->sorted[sl->start_position] = sl->tosort[1];
226 		}
227 
228 		goto end;
229 	default:
230 		if (TINY_NODE(sl) || (sl->level > 15)) {
231 			listcoll_t func;
232 
233 			func = get_list_call_func(sl->level);
234 
235 			sl->leaves = sl->tosort;
236 			sl->leaves_num = sl->tosort_num;
237 			sl->leaves_sz = sl->leaves_num;
238 			sl->leaves = sort_reallocarray(sl->leaves,
239 			    sl->leaves_sz, sizeof(struct sort_list_item *));
240 			sl->tosort = NULL;
241 			sl->tosort_num = 0;
242 			sl->tosort_sz = 0;
243 			sl->sln = 0;
244 			sl->real_sln = 0;
245 			if (sort_opts_vals.sflag) {
246 				if (mergesort(sl->leaves, sl->leaves_num,
247 				    sizeof(struct sort_list_item *),
248 				    (int(*)(const void *, const void *)) func) == -1)
249 					/* NOTREACHED */
250 					err(2, "Radix sort error 3");
251 			} else
252 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
253 				    sizeof(struct sort_list_item *),
254 				    (int(*)(const void *, const void *)) func);
255 
256 			memcpy(sl->sorted + sl->start_position,
257 			    sl->leaves, sl->leaves_num *
258 			    sizeof(struct sort_list_item *));
259 
260 			goto end;
261 		} else {
262 			sl->tosort_sz = sl->tosort_num;
263 			sl->tosort = sort_reallocarray(sl->tosort,
264 			    sl->tosort_sz, sizeof(struct sort_list_item *));
265 		}
266 	}
267 
268 	sl->sln = 256;
269 	sl->sublevels = sort_calloc(1, slsz);
270 	sl->real_sln = 0;
271 
272 	tosort_num = sl->tosort_num;
273 	for (i = 0; i < tosort_num; ++i)
274 		place_item(sl, i);
275 
276 	sort_free(sl->tosort);
277 	sl->tosort = NULL;
278 	sl->tosort_num = 0;
279 	sl->tosort_sz = 0;
280 
281 	if (sl->leaves_num > 1) {
282 		if (keys_num > 1) {
283 			if (sort_opts_vals.sflag) {
284 				mergesort(sl->leaves, sl->leaves_num,
285 				    sizeof(struct sort_list_item *), list_coll);
286 			} else {
287 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
288 				    sizeof(struct sort_list_item *), list_coll);
289 			}
290 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
291 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
292 			    sizeof(struct sort_list_item *), list_coll);
293 		}
294 	}
295 
296 	sl->leaves_sz = sl->leaves_num;
297 	sl->leaves = sort_reallocarray(sl->leaves, sl->leaves_sz,
298 	    sizeof(struct sort_list_item *));
299 
300 	if (!reverse_sort) {
301 		memcpy(sl->sorted + sl->start_position, sl->leaves,
302 		    sl->leaves_num * sizeof(struct sort_list_item *));
303 		sl->start_position += sl->leaves_num;
304 
305 		sort_free(sl->leaves);
306 		sl->leaves = NULL;
307 		sl->leaves_num = 0;
308 		sl->leaves_sz = 0;
309 
310 		sln = sl->sln;
311 
312 		for (i = 0; i < sln; ++i) {
313 			slc = sl->sublevels[i];
314 
315 			if (slc) {
316 				slc->sorted = sl->sorted;
317 				slc->start_position = sl->start_position;
318 				sl->start_position += slc->tosort_num;
319 				if (SMALL_NODE(slc))
320 					run_sort_level_next(slc);
321 				else
322 					push_ls(slc);
323 				sl->sublevels[i] = NULL;
324 			}
325 		}
326 
327 	} else {
328 		size_t n;
329 
330 		sln = sl->sln;
331 
332 		for (i = 0; i < sln; ++i) {
333 			n = sln - i - 1;
334 			slc = sl->sublevels[n];
335 
336 			if (slc) {
337 				slc->sorted = sl->sorted;
338 				slc->start_position = sl->start_position;
339 				sl->start_position += slc->tosort_num;
340 				if (SMALL_NODE(slc))
341 					run_sort_level_next(slc);
342 				else
343 					push_ls(slc);
344 				sl->sublevels[n] = NULL;
345 			}
346 		}
347 
348 		memcpy(sl->sorted + sl->start_position, sl->leaves,
349 		    sl->leaves_num * sizeof(struct sort_list_item *));
350 	}
351 
352 end:
353 	free_sort_level(sl);
354 }
355 
356 /*
357  * Single-threaded sort cycle
358  */
359 static void
run_sort_cycle_st(void)360 run_sort_cycle_st(void)
361 {
362 	struct sort_level *slc;
363 
364 	for (;;) {
365 		slc = pop_ls_st();
366 		if (slc == NULL) {
367 			break;
368 		}
369 		run_sort_level_next(slc);
370 	}
371 }
372 
373 static void
run_top_sort_level(struct sort_level * sl)374 run_top_sort_level(struct sort_level *sl)
375 {
376 	struct sort_level *slc;
377 	size_t i;
378 
379 	reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag :
380 	    default_sort_mods->rflag;
381 
382 	sl->start_position = 0;
383 	sl->sln = 256;
384 	sl->sublevels = sort_calloc(1, slsz);
385 
386 	for (i = 0; i < sl->tosort_num; ++i)
387 		place_item(sl, i);
388 
389 	if (sl->leaves_num > 1) {
390 		if (keys_num > 1) {
391 			if (sort_opts_vals.sflag) {
392 				mergesort(sl->leaves, sl->leaves_num,
393 				    sizeof(struct sort_list_item *), list_coll);
394 			} else {
395 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
396 				    sizeof(struct sort_list_item *), list_coll);
397 			}
398 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
399 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
400 			    sizeof(struct sort_list_item *), list_coll);
401 		}
402 	}
403 
404 	if (!reverse_sort) {
405 		size_t i;
406 
407 		memcpy(sl->tosort + sl->start_position, sl->leaves,
408 		    sl->leaves_num * sizeof(struct sort_list_item *));
409 		sl->start_position += sl->leaves_num;
410 
411 		for (i = 0; i < sl->sln; ++i) {
412 			slc = sl->sublevels[i];
413 
414 			if (slc) {
415 				slc->sorted = sl->tosort;
416 				slc->start_position = sl->start_position;
417 				sl->start_position += slc->tosort_num;
418 				push_ls(slc);
419 				sl->sublevels[i] = NULL;
420 			}
421 		}
422 
423 	} else {
424 		size_t i, n;
425 
426 		for (i = 0; i < sl->sln; ++i) {
427 
428 			n = sl->sln - i - 1;
429 			slc = sl->sublevels[n];
430 
431 			if (slc) {
432 				slc->sorted = sl->tosort;
433 				slc->start_position = sl->start_position;
434 				sl->start_position += slc->tosort_num;
435 				push_ls(slc);
436 				sl->sublevels[n] = NULL;
437 			}
438 		}
439 
440 		memcpy(sl->tosort + sl->start_position, sl->leaves,
441 		    sl->leaves_num * sizeof(struct sort_list_item *));
442 	}
443 	run_sort_cycle_st();
444 }
445 
446 void
rxsort(struct sort_list_item ** base,size_t nmemb)447 rxsort(struct sort_list_item **base, size_t nmemb)
448 {
449 	struct sort_level *sl;
450 
451 	sl = sort_calloc(1, sizeof(struct sort_level));
452 	sl->tosort = base;
453 	sl->tosort_num = nmemb;
454 	sl->tosort_sz = nmemb;
455 
456 	run_top_sort_level(sl);
457 
458 	free_sort_level(sl);
459 }
460