xref: /openbsd/usr.bin/sort/radixsort.c (revision f0a85c33)
1*f0a85c33Stobias /*	$OpenBSD: radixsort.c,v 1.5 2015/04/02 21:00:08 tobias Exp $	*/
279428148Smillert 
379428148Smillert /*-
479428148Smillert  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
579428148Smillert  * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
679428148Smillert  * All rights reserved.
779428148Smillert  *
879428148Smillert  * Redistribution and use in source and binary forms, with or without
979428148Smillert  * modification, are permitted provided that the following conditions
1079428148Smillert  * are met:
1179428148Smillert  * 1. Redistributions of source code must retain the above copyright
1279428148Smillert  *    notice, this list of conditions and the following disclaimer.
1379428148Smillert  * 2. Redistributions in binary form must reproduce the above copyright
1479428148Smillert  *    notice, this list of conditions and the following disclaimer in the
1579428148Smillert  *    documentation and/or other materials provided with the distribution.
1679428148Smillert  *
1779428148Smillert  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1879428148Smillert  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1979428148Smillert  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2079428148Smillert  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
2179428148Smillert  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2279428148Smillert  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2379428148Smillert  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2479428148Smillert  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2579428148Smillert  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2679428148Smillert  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2779428148Smillert  * SUCH DAMAGE.
2879428148Smillert  */
2979428148Smillert 
3079428148Smillert #include <errno.h>
3179428148Smillert #include <err.h>
3279428148Smillert #include <langinfo.h>
3379428148Smillert #include <math.h>
3479428148Smillert #include <stdlib.h>
3579428148Smillert #include <string.h>
3679428148Smillert #include <wchar.h>
3779428148Smillert #include <wctype.h>
3879428148Smillert #include <unistd.h>
3979428148Smillert 
4079428148Smillert #include "coll.h"
4179428148Smillert #include "radixsort.h"
4279428148Smillert 
4379428148Smillert #define DEFAULT_SORT_FUNC_RADIXSORT mergesort
4479428148Smillert 
4579428148Smillert #define TINY_NODE(sl) ((sl)->tosort_num < 65)
4679428148Smillert #define SMALL_NODE(sl) ((sl)->tosort_num < 5)
4779428148Smillert 
4879428148Smillert /* are we sorting in reverse order ? */
4979428148Smillert static bool reverse_sort;
5079428148Smillert 
5179428148Smillert /* sort sub-levels array size */
5279428148Smillert static const size_t slsz = 256 * sizeof(struct sort_level *);
5379428148Smillert 
5479428148Smillert /* one sort level structure */
5579428148Smillert struct sort_level {
5679428148Smillert 	struct sort_level	**sublevels;
5779428148Smillert 	struct sort_list_item	**leaves;
5879428148Smillert 	struct sort_list_item	**sorted;
5979428148Smillert 	struct sort_list_item	**tosort;
6079428148Smillert 	size_t			  leaves_num;
6179428148Smillert 	size_t			  leaves_sz;
6279428148Smillert 	size_t			  level;
6379428148Smillert 	size_t			  real_sln;
6479428148Smillert 	size_t			  start_position;
6579428148Smillert 	size_t			  sln;
6679428148Smillert 	size_t			  tosort_num;
6779428148Smillert 	size_t			  tosort_sz;
6879428148Smillert };
6979428148Smillert 
7079428148Smillert /* stack of sort levels ready to be sorted */
7179428148Smillert struct level_stack {
7279428148Smillert 	struct level_stack	 *next;
7379428148Smillert 	struct sort_level	 *sl;
7479428148Smillert };
7579428148Smillert 
7679428148Smillert static struct level_stack *g_ls;
7779428148Smillert 
7879428148Smillert /*
7979428148Smillert  * Push sort level to the stack
8079428148Smillert  */
8179428148Smillert static inline void
push_ls(struct sort_level * sl)8279428148Smillert push_ls(struct sort_level *sl)
8379428148Smillert {
8479428148Smillert 	struct level_stack *new_ls;
8579428148Smillert 
8679428148Smillert 	new_ls = sort_malloc(sizeof(struct level_stack));
8779428148Smillert 	new_ls->sl = sl;
8879428148Smillert 
8979428148Smillert 	new_ls->next = g_ls;
9079428148Smillert 	g_ls = new_ls;
9179428148Smillert }
9279428148Smillert 
9379428148Smillert /*
9479428148Smillert  * Pop sort level from the stack (single-threaded style)
9579428148Smillert  */
9679428148Smillert static inline struct sort_level *
pop_ls_st(void)9779428148Smillert pop_ls_st(void)
9879428148Smillert {
9979428148Smillert 	struct sort_level *sl;
10079428148Smillert 
10179428148Smillert 	if (g_ls) {
10279428148Smillert 		struct level_stack *saved_ls;
10379428148Smillert 
10479428148Smillert 		sl = g_ls->sl;
10579428148Smillert 		saved_ls = g_ls;
10679428148Smillert 		g_ls = g_ls->next;
10779428148Smillert 		sort_free(saved_ls);
10879428148Smillert 	} else
10979428148Smillert 		sl = NULL;
11079428148Smillert 
11179428148Smillert 	return sl;
11279428148Smillert }
11379428148Smillert 
11479428148Smillert static void
add_to_sublevel(struct sort_level * sl,struct sort_list_item * item,size_t indx)11579428148Smillert add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx)
11679428148Smillert {
11779428148Smillert 	struct sort_level *ssl;
11879428148Smillert 
11979428148Smillert 	ssl = sl->sublevels[indx];
12079428148Smillert 
12179428148Smillert 	if (ssl == NULL) {
1228fd2d339Smillert 		ssl = sort_calloc(1, sizeof(struct sort_level));
12379428148Smillert 		ssl->level = sl->level + 1;
12479428148Smillert 		sl->sublevels[indx] = ssl;
12579428148Smillert 
12679428148Smillert 		++(sl->real_sln);
12779428148Smillert 	}
12879428148Smillert 
12979428148Smillert 	if (++(ssl->tosort_num) > ssl->tosort_sz) {
13079428148Smillert 		ssl->tosort_sz = ssl->tosort_num + 128;
13179428148Smillert 		ssl->tosort = sort_reallocarray(ssl->tosort, ssl->tosort_sz,
13279428148Smillert 		    sizeof(struct sort_list_item *));
13379428148Smillert 	}
13479428148Smillert 
13579428148Smillert 	ssl->tosort[ssl->tosort_num - 1] = item;
13679428148Smillert }
13779428148Smillert 
13879428148Smillert static inline void
add_leaf(struct sort_level * sl,struct sort_list_item * item)13979428148Smillert add_leaf(struct sort_level *sl, struct sort_list_item *item)
14079428148Smillert {
14179428148Smillert 	if (++(sl->leaves_num) > sl->leaves_sz) {
14279428148Smillert 		sl->leaves_sz = sl->leaves_num + 128;
14379428148Smillert 		sl->leaves = sort_reallocarray(sl->leaves, sl->leaves_sz,
14479428148Smillert 		    sizeof(struct sort_list_item *));
14579428148Smillert 	}
14679428148Smillert 	sl->leaves[sl->leaves_num - 1] = item;
14779428148Smillert }
14879428148Smillert 
14979428148Smillert static inline int
get_wc_index(struct sort_list_item * sli,size_t level)15079428148Smillert get_wc_index(struct sort_list_item *sli, size_t level)
15179428148Smillert {
15279428148Smillert 	const struct bwstring *bws;
15379428148Smillert 
15479428148Smillert 	bws = sli->ka.key[0].k;
15579428148Smillert 
15679428148Smillert 	if ((BWSLEN(bws) > level))
15779428148Smillert 		return (unsigned char)BWS_GET(bws, level);
15879428148Smillert 	return -1;
15979428148Smillert }
16079428148Smillert 
16179428148Smillert static void
place_item(struct sort_level * sl,size_t item)16279428148Smillert place_item(struct sort_level *sl, size_t item)
16379428148Smillert {
16479428148Smillert 	struct sort_list_item *sli;
16579428148Smillert 	int c;
16679428148Smillert 
16779428148Smillert 	sli = sl->tosort[item];
16879428148Smillert 	c = get_wc_index(sli, sl->level);
16979428148Smillert 
17079428148Smillert 	if (c == -1)
17179428148Smillert 		add_leaf(sl, sli);
17279428148Smillert 	else
17379428148Smillert 		add_to_sublevel(sl, sli, c);
17479428148Smillert }
17579428148Smillert 
17679428148Smillert static void
free_sort_level(struct sort_level * sl)17779428148Smillert free_sort_level(struct sort_level *sl)
17879428148Smillert {
17979428148Smillert 	if (sl) {
18079428148Smillert 		sort_free(sl->leaves);
18179428148Smillert 
18279428148Smillert 		if (sl->level > 0)
18379428148Smillert 			sort_free(sl->tosort);
18479428148Smillert 
18579428148Smillert 		if (sl->sublevels) {
18679428148Smillert 			struct sort_level *slc;
18779428148Smillert 			size_t i, sln;
18879428148Smillert 
18979428148Smillert 			sln = sl->sln;
19079428148Smillert 
19179428148Smillert 			for (i = 0; i < sln; ++i) {
19279428148Smillert 				slc = sl->sublevels[i];
19379428148Smillert 				free_sort_level(slc);
19479428148Smillert 			}
19579428148Smillert 
19679428148Smillert 			sort_free(sl->sublevels);
19779428148Smillert 		}
19879428148Smillert 
19979428148Smillert 		sort_free(sl);
20079428148Smillert 	}
20179428148Smillert }
20279428148Smillert 
20379428148Smillert static void
run_sort_level_next(struct sort_level * sl)20479428148Smillert run_sort_level_next(struct sort_level *sl)
20579428148Smillert {
20679428148Smillert 	struct sort_level *slc;
20779428148Smillert 	size_t i, sln, tosort_num;
20879428148Smillert 
20979428148Smillert 	sort_free(sl->sublevels);
21079428148Smillert 	sl->sublevels = NULL;
21179428148Smillert 
21279428148Smillert 	switch (sl->tosort_num) {
21379428148Smillert 	case 0:
21479428148Smillert 		goto end;
21579428148Smillert 	case 1:
21679428148Smillert 		sl->sorted[sl->start_position] = sl->tosort[0];
21779428148Smillert 		goto end;
21879428148Smillert 	case 2:
21979428148Smillert 		if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
22079428148Smillert 		    sl->level) > 0) {
22179428148Smillert 			sl->sorted[sl->start_position++] = sl->tosort[1];
22279428148Smillert 			sl->sorted[sl->start_position] = sl->tosort[0];
22379428148Smillert 		} else {
22479428148Smillert 			sl->sorted[sl->start_position++] = sl->tosort[0];
22579428148Smillert 			sl->sorted[sl->start_position] = sl->tosort[1];
22679428148Smillert 		}
22779428148Smillert 
22879428148Smillert 		goto end;
22979428148Smillert 	default:
23079428148Smillert 		if (TINY_NODE(sl) || (sl->level > 15)) {
23179428148Smillert 			listcoll_t func;
23279428148Smillert 
23379428148Smillert 			func = get_list_call_func(sl->level);
23479428148Smillert 
23579428148Smillert 			sl->leaves = sl->tosort;
23679428148Smillert 			sl->leaves_num = sl->tosort_num;
23779428148Smillert 			sl->leaves_sz = sl->leaves_num;
23879428148Smillert 			sl->leaves = sort_reallocarray(sl->leaves,
23979428148Smillert 			    sl->leaves_sz, sizeof(struct sort_list_item *));
24079428148Smillert 			sl->tosort = NULL;
24179428148Smillert 			sl->tosort_num = 0;
24279428148Smillert 			sl->tosort_sz = 0;
24379428148Smillert 			sl->sln = 0;
24479428148Smillert 			sl->real_sln = 0;
24579428148Smillert 			if (sort_opts_vals.sflag) {
24679428148Smillert 				if (mergesort(sl->leaves, sl->leaves_num,
24779428148Smillert 				    sizeof(struct sort_list_item *),
24879428148Smillert 				    (int(*)(const void *, const void *)) func) == -1)
24979428148Smillert 					/* NOTREACHED */
25079428148Smillert 					err(2, "Radix sort error 3");
25179428148Smillert 			} else
25279428148Smillert 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
25379428148Smillert 				    sizeof(struct sort_list_item *),
25479428148Smillert 				    (int(*)(const void *, const void *)) func);
25579428148Smillert 
25679428148Smillert 			memcpy(sl->sorted + sl->start_position,
25779428148Smillert 			    sl->leaves, sl->leaves_num *
25879428148Smillert 			    sizeof(struct sort_list_item *));
25979428148Smillert 
26079428148Smillert 			goto end;
26179428148Smillert 		} else {
26279428148Smillert 			sl->tosort_sz = sl->tosort_num;
26379428148Smillert 			sl->tosort = sort_reallocarray(sl->tosort,
26479428148Smillert 			    sl->tosort_sz, sizeof(struct sort_list_item *));
26579428148Smillert 		}
26679428148Smillert 	}
26779428148Smillert 
26879428148Smillert 	sl->sln = 256;
2698fd2d339Smillert 	sl->sublevels = sort_calloc(1, slsz);
27079428148Smillert 	sl->real_sln = 0;
27179428148Smillert 
27279428148Smillert 	tosort_num = sl->tosort_num;
27379428148Smillert 	for (i = 0; i < tosort_num; ++i)
27479428148Smillert 		place_item(sl, i);
27579428148Smillert 
27679428148Smillert 	sort_free(sl->tosort);
27779428148Smillert 	sl->tosort = NULL;
27879428148Smillert 	sl->tosort_num = 0;
27979428148Smillert 	sl->tosort_sz = 0;
28079428148Smillert 
28179428148Smillert 	if (sl->leaves_num > 1) {
28279428148Smillert 		if (keys_num > 1) {
28379428148Smillert 			if (sort_opts_vals.sflag) {
28479428148Smillert 				mergesort(sl->leaves, sl->leaves_num,
28579428148Smillert 				    sizeof(struct sort_list_item *), list_coll);
28679428148Smillert 			} else {
28779428148Smillert 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
28879428148Smillert 				    sizeof(struct sort_list_item *), list_coll);
28979428148Smillert 			}
29079428148Smillert 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
29179428148Smillert 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
29279428148Smillert 			    sizeof(struct sort_list_item *), list_coll);
29379428148Smillert 		}
29479428148Smillert 	}
29579428148Smillert 
29679428148Smillert 	sl->leaves_sz = sl->leaves_num;
29779428148Smillert 	sl->leaves = sort_reallocarray(sl->leaves, sl->leaves_sz,
29879428148Smillert 	    sizeof(struct sort_list_item *));
29979428148Smillert 
30079428148Smillert 	if (!reverse_sort) {
30179428148Smillert 		memcpy(sl->sorted + sl->start_position, sl->leaves,
30279428148Smillert 		    sl->leaves_num * sizeof(struct sort_list_item *));
30379428148Smillert 		sl->start_position += sl->leaves_num;
30479428148Smillert 
30579428148Smillert 		sort_free(sl->leaves);
30679428148Smillert 		sl->leaves = NULL;
30779428148Smillert 		sl->leaves_num = 0;
30879428148Smillert 		sl->leaves_sz = 0;
30979428148Smillert 
31079428148Smillert 		sln = sl->sln;
31179428148Smillert 
31279428148Smillert 		for (i = 0; i < sln; ++i) {
31379428148Smillert 			slc = sl->sublevels[i];
31479428148Smillert 
31579428148Smillert 			if (slc) {
31679428148Smillert 				slc->sorted = sl->sorted;
31779428148Smillert 				slc->start_position = sl->start_position;
31879428148Smillert 				sl->start_position += slc->tosort_num;
31979428148Smillert 				if (SMALL_NODE(slc))
32079428148Smillert 					run_sort_level_next(slc);
32179428148Smillert 				else
32279428148Smillert 					push_ls(slc);
32379428148Smillert 				sl->sublevels[i] = NULL;
32479428148Smillert 			}
32579428148Smillert 		}
32679428148Smillert 
32779428148Smillert 	} else {
32879428148Smillert 		size_t n;
32979428148Smillert 
33079428148Smillert 		sln = sl->sln;
33179428148Smillert 
33279428148Smillert 		for (i = 0; i < sln; ++i) {
33379428148Smillert 			n = sln - i - 1;
33479428148Smillert 			slc = sl->sublevels[n];
33579428148Smillert 
33679428148Smillert 			if (slc) {
33779428148Smillert 				slc->sorted = sl->sorted;
33879428148Smillert 				slc->start_position = sl->start_position;
33979428148Smillert 				sl->start_position += slc->tosort_num;
34079428148Smillert 				if (SMALL_NODE(slc))
34179428148Smillert 					run_sort_level_next(slc);
34279428148Smillert 				else
34379428148Smillert 					push_ls(slc);
34479428148Smillert 				sl->sublevels[n] = NULL;
34579428148Smillert 			}
34679428148Smillert 		}
34779428148Smillert 
34879428148Smillert 		memcpy(sl->sorted + sl->start_position, sl->leaves,
34979428148Smillert 		    sl->leaves_num * sizeof(struct sort_list_item *));
35079428148Smillert 	}
35179428148Smillert 
35279428148Smillert end:
35379428148Smillert 	free_sort_level(sl);
35479428148Smillert }
35579428148Smillert 
35679428148Smillert /*
35779428148Smillert  * Single-threaded sort cycle
35879428148Smillert  */
35979428148Smillert static void
run_sort_cycle_st(void)36079428148Smillert run_sort_cycle_st(void)
36179428148Smillert {
36279428148Smillert 	struct sort_level *slc;
36379428148Smillert 
36479428148Smillert 	for (;;) {
36579428148Smillert 		slc = pop_ls_st();
36679428148Smillert 		if (slc == NULL) {
36779428148Smillert 			break;
36879428148Smillert 		}
36979428148Smillert 		run_sort_level_next(slc);
37079428148Smillert 	}
37179428148Smillert }
37279428148Smillert 
37379428148Smillert static void
run_top_sort_level(struct sort_level * sl)37479428148Smillert run_top_sort_level(struct sort_level *sl)
37579428148Smillert {
37679428148Smillert 	struct sort_level *slc;
37779428148Smillert 	size_t i;
37879428148Smillert 
37979428148Smillert 	reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag :
38079428148Smillert 	    default_sort_mods->rflag;
38179428148Smillert 
38279428148Smillert 	sl->start_position = 0;
38379428148Smillert 	sl->sln = 256;
3848fd2d339Smillert 	sl->sublevels = sort_calloc(1, slsz);
38579428148Smillert 
38679428148Smillert 	for (i = 0; i < sl->tosort_num; ++i)
38779428148Smillert 		place_item(sl, i);
38879428148Smillert 
38979428148Smillert 	if (sl->leaves_num > 1) {
39079428148Smillert 		if (keys_num > 1) {
39179428148Smillert 			if (sort_opts_vals.sflag) {
39279428148Smillert 				mergesort(sl->leaves, sl->leaves_num,
39379428148Smillert 				    sizeof(struct sort_list_item *), list_coll);
39479428148Smillert 			} else {
39579428148Smillert 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
39679428148Smillert 				    sizeof(struct sort_list_item *), list_coll);
39779428148Smillert 			}
39879428148Smillert 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
39979428148Smillert 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
40079428148Smillert 			    sizeof(struct sort_list_item *), list_coll);
40179428148Smillert 		}
40279428148Smillert 	}
40379428148Smillert 
40479428148Smillert 	if (!reverse_sort) {
40579428148Smillert 		size_t i;
40679428148Smillert 
40779428148Smillert 		memcpy(sl->tosort + sl->start_position, sl->leaves,
40879428148Smillert 		    sl->leaves_num * sizeof(struct sort_list_item *));
40979428148Smillert 		sl->start_position += sl->leaves_num;
41079428148Smillert 
41179428148Smillert 		for (i = 0; i < sl->sln; ++i) {
41279428148Smillert 			slc = sl->sublevels[i];
41379428148Smillert 
41479428148Smillert 			if (slc) {
41579428148Smillert 				slc->sorted = sl->tosort;
41679428148Smillert 				slc->start_position = sl->start_position;
41779428148Smillert 				sl->start_position += slc->tosort_num;
41879428148Smillert 				push_ls(slc);
41979428148Smillert 				sl->sublevels[i] = NULL;
42079428148Smillert 			}
42179428148Smillert 		}
42279428148Smillert 
42379428148Smillert 	} else {
42479428148Smillert 		size_t i, n;
42579428148Smillert 
42679428148Smillert 		for (i = 0; i < sl->sln; ++i) {
42779428148Smillert 
42879428148Smillert 			n = sl->sln - i - 1;
42979428148Smillert 			slc = sl->sublevels[n];
43079428148Smillert 
43179428148Smillert 			if (slc) {
43279428148Smillert 				slc->sorted = sl->tosort;
43379428148Smillert 				slc->start_position = sl->start_position;
43479428148Smillert 				sl->start_position += slc->tosort_num;
43579428148Smillert 				push_ls(slc);
43679428148Smillert 				sl->sublevels[n] = NULL;
43779428148Smillert 			}
43879428148Smillert 		}
43979428148Smillert 
44079428148Smillert 		memcpy(sl->tosort + sl->start_position, sl->leaves,
44179428148Smillert 		    sl->leaves_num * sizeof(struct sort_list_item *));
44279428148Smillert 	}
44379428148Smillert 	run_sort_cycle_st();
44479428148Smillert }
44579428148Smillert 
44679428148Smillert void
rxsort(struct sort_list_item ** base,size_t nmemb)44779428148Smillert rxsort(struct sort_list_item **base, size_t nmemb)
44879428148Smillert {
44979428148Smillert 	struct sort_level *sl;
45079428148Smillert 
4518fd2d339Smillert 	sl = sort_calloc(1, sizeof(struct sort_level));
45279428148Smillert 	sl->tosort = base;
45379428148Smillert 	sl->tosort_num = nmemb;
45479428148Smillert 	sl->tosort_sz = nmemb;
45579428148Smillert 
45679428148Smillert 	run_top_sort_level(sl);
45779428148Smillert 
45879428148Smillert 	free_sort_level(sl);
45979428148Smillert }
460