xref: /freebsd/usr.bin/sort/radixsort.c (revision 535af610)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
5  * Copyright (C) 2012 Gabor Kovesdan <gabor@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <math.h>
37 #if defined(SORT_THREADS)
38 #include <pthread.h>
39 #include <semaphore.h>
40 #endif
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44 #include <wctype.h>
45 #include <unistd.h>
46 
47 #include "coll.h"
48 #include "radixsort.h"
49 
50 #define DEFAULT_SORT_FUNC_RADIXSORT mergesort
51 
52 #define TINY_NODE(sl) ((sl)->tosort_num < 65)
53 #define SMALL_NODE(sl) ((sl)->tosort_num < 5)
54 
55 /* are we sorting in reverse order ? */
56 static bool reverse_sort;
57 
58 /* sort sub-levels array size */
59 static const size_t slsz = 256 * sizeof(struct sort_level*);
60 
61 /* one sort level structure */
62 struct sort_level
63 {
64 	struct sort_level	**sublevels;
65 	struct sort_list_item	**leaves;
66 	struct sort_list_item	**sorted;
67 	struct sort_list_item	**tosort;
68 	size_t			  leaves_num;
69 	size_t			  leaves_sz;
70 	size_t			  level;
71 	size_t			  real_sln;
72 	size_t			  start_position;
73 	size_t			  sln;
74 	size_t			  tosort_num;
75 	size_t			  tosort_sz;
76 };
77 
78 /* stack of sort levels ready to be sorted */
79 struct level_stack {
80 	struct level_stack	 *next;
81 	struct sort_level	 *sl;
82 };
83 
84 static struct level_stack *g_ls;
85 
86 #if defined(SORT_THREADS)
87 /* stack guarding mutex */
88 static pthread_cond_t g_ls_cond;
89 static pthread_mutex_t g_ls_mutex;
90 
91 /* counter: how many items are left */
92 static size_t sort_left;
93 /* guarding mutex */
94 
95 /* semaphore to count threads */
96 static sem_t mtsem;
97 
98 /*
99  * Decrement items counter
100  */
101 static inline void
102 sort_left_dec(size_t n)
103 {
104 	pthread_mutex_lock(&g_ls_mutex);
105 	sort_left -= n;
106 	if (sort_left == 0 && nthreads > 1)
107 		pthread_cond_broadcast(&g_ls_cond);
108 	pthread_mutex_unlock(&g_ls_mutex);
109 }
110 
111 /*
112  * Do we have something to sort ?
113  *
114  * This routine does not need to be locked.
115  */
116 static inline bool
117 have_sort_left(void)
118 {
119 	bool ret;
120 
121 	ret = (sort_left > 0);
122 
123 	return (ret);
124 }
125 
126 #else
127 
128 #define sort_left_dec(n)
129 
130 #endif /* SORT_THREADS */
131 
132 static void
133 _push_ls(struct level_stack *ls)
134 {
135 
136 	ls->next = g_ls;
137 	g_ls = ls;
138 }
139 
140 /*
141  * Push sort level to the stack
142  */
143 static inline void
144 push_ls(struct sort_level *sl)
145 {
146 	struct level_stack *new_ls;
147 
148 	new_ls = sort_malloc(sizeof(struct level_stack));
149 	new_ls->sl = sl;
150 
151 #if defined(SORT_THREADS)
152 	if (nthreads > 1) {
153 		pthread_mutex_lock(&g_ls_mutex);
154 		_push_ls(new_ls);
155 		pthread_cond_signal(&g_ls_cond);
156 		pthread_mutex_unlock(&g_ls_mutex);
157 	} else
158 #endif
159 		_push_ls(new_ls);
160 }
161 
162 /*
163  * Pop sort level from the stack (single-threaded style)
164  */
165 static inline struct sort_level*
166 pop_ls_st(void)
167 {
168 	struct sort_level *sl;
169 
170 	if (g_ls) {
171 		struct level_stack *saved_ls;
172 
173 		sl = g_ls->sl;
174 		saved_ls = g_ls;
175 		g_ls = g_ls->next;
176 		sort_free(saved_ls);
177 	} else
178 		sl = NULL;
179 
180 	return (sl);
181 }
182 
183 #if defined(SORT_THREADS)
184 
185 /*
186  * Pop sort level from the stack (multi-threaded style)
187  */
188 static inline struct sort_level*
189 pop_ls_mt(void)
190 {
191 	struct level_stack *saved_ls;
192 	struct sort_level *sl;
193 
194 	pthread_mutex_lock(&g_ls_mutex);
195 
196 	for (;;) {
197 		if (g_ls) {
198 			sl = g_ls->sl;
199 			saved_ls = g_ls;
200 			g_ls = g_ls->next;
201 			break;
202 		}
203 		sl = NULL;
204 		saved_ls = NULL;
205 
206 		if (have_sort_left() == 0)
207 			break;
208 		pthread_cond_wait(&g_ls_cond, &g_ls_mutex);
209 	}
210 
211 	pthread_mutex_unlock(&g_ls_mutex);
212 
213 	sort_free(saved_ls);
214 
215 	return (sl);
216 }
217 
218 #endif /* defined(SORT_THREADS) */
219 
220 static void
221 add_to_sublevel(struct sort_level *sl, struct sort_list_item *item, size_t indx)
222 {
223 	struct sort_level *ssl;
224 
225 	ssl = sl->sublevels[indx];
226 
227 	if (ssl == NULL) {
228 		ssl = sort_calloc(1, sizeof(struct sort_level));
229 
230 		ssl->level = sl->level + 1;
231 		sl->sublevels[indx] = ssl;
232 
233 		++(sl->real_sln);
234 	}
235 
236 	if (++(ssl->tosort_num) > ssl->tosort_sz) {
237 		ssl->tosort_sz = ssl->tosort_num + 128;
238 		ssl->tosort = sort_realloc(ssl->tosort,
239 		    sizeof(struct sort_list_item*) * (ssl->tosort_sz));
240 	}
241 
242 	ssl->tosort[ssl->tosort_num - 1] = item;
243 }
244 
245 static inline void
246 add_leaf(struct sort_level *sl, struct sort_list_item *item)
247 {
248 
249 	if (++(sl->leaves_num) > sl->leaves_sz) {
250 		sl->leaves_sz = sl->leaves_num + 128;
251 		sl->leaves = sort_realloc(sl->leaves,
252 		    (sizeof(struct sort_list_item*) * (sl->leaves_sz)));
253 	}
254 	sl->leaves[sl->leaves_num - 1] = item;
255 }
256 
257 static inline int
258 get_wc_index(struct sort_list_item *sli, size_t level)
259 {
260 	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
261 	const struct key_value *kv;
262 	const struct bwstring *bws;
263 
264 	kv = get_key_from_keys_array(&sli->ka, 0);
265 	bws = kv->k;
266 
267 	if ((BWSLEN(bws) * wcfact > level)) {
268 		wchar_t res;
269 
270 		/*
271 		 * Sort wchar strings a byte at a time, rather than a single
272 		 * byte from each wchar.
273 		 */
274 		res = (wchar_t)BWS_GET(bws, level / wcfact);
275 		/* Sort most-significant byte first. */
276 		if (level % wcfact < wcfact - 1)
277 			res = (res >> (8 * (wcfact - 1 - (level % wcfact))));
278 
279 		return (res & 0xff);
280 	}
281 
282 	return (-1);
283 }
284 
285 static void
286 place_item(struct sort_level *sl, size_t item)
287 {
288 	struct sort_list_item *sli;
289 	int c;
290 
291 	sli = sl->tosort[item];
292 	c = get_wc_index(sli, sl->level);
293 
294 	if (c == -1)
295 		add_leaf(sl, sli);
296 	else
297 		add_to_sublevel(sl, sli, c);
298 }
299 
300 static void
301 free_sort_level(struct sort_level *sl)
302 {
303 
304 	if (sl) {
305 		if (sl->leaves)
306 			sort_free(sl->leaves);
307 
308 		if (sl->level > 0)
309 			sort_free(sl->tosort);
310 
311 		if (sl->sublevels) {
312 			struct sort_level *slc;
313 			size_t sln;
314 
315 			sln = sl->sln;
316 
317 			for (size_t i = 0; i < sln; ++i) {
318 				slc = sl->sublevels[i];
319 				if (slc)
320 					free_sort_level(slc);
321 			}
322 
323 			sort_free(sl->sublevels);
324 		}
325 
326 		sort_free(sl);
327 	}
328 }
329 
330 static void
331 run_sort_level_next(struct sort_level *sl)
332 {
333 	const size_t wcfact = (mb_cur_max == 1) ? 1 : sizeof(wchar_t);
334 	struct sort_level *slc;
335 	size_t i, sln, tosort_num;
336 
337 	if (sl->sublevels) {
338 		sort_free(sl->sublevels);
339 		sl->sublevels = NULL;
340 	}
341 
342 	switch (sl->tosort_num) {
343 	case 0:
344 		goto end;
345 	case (1):
346 		sl->sorted[sl->start_position] = sl->tosort[0];
347 		sort_left_dec(1);
348 		goto end;
349 	case (2):
350 		/*
351 		 * Radixsort only processes a single byte at a time.  In wchar
352 		 * mode, this can be a subset of the length of a character.
353 		 * list_coll_offset() offset is in units of wchar, not bytes.
354 		 * So to calculate the offset, we must divide by
355 		 * sizeof(wchar_t) and round down to the index of the first
356 		 * character this level references.
357 		 */
358 		if (list_coll_offset(&(sl->tosort[0]), &(sl->tosort[1]),
359 		    sl->level / wcfact) > 0) {
360 			sl->sorted[sl->start_position++] = sl->tosort[1];
361 			sl->sorted[sl->start_position] = sl->tosort[0];
362 		} else {
363 			sl->sorted[sl->start_position++] = sl->tosort[0];
364 			sl->sorted[sl->start_position] = sl->tosort[1];
365 		}
366 		sort_left_dec(2);
367 
368 		goto end;
369 	default:
370 		if (TINY_NODE(sl) || (sl->level > 15)) {
371 			listcoll_t func;
372 
373 			/*
374 			 * Collate comparison offset is in units of
375 			 * character-width, so we must divide the level (bytes)
376 			 * by operating character width (wchar_t or char).  See
377 			 * longer comment above.
378 			 */
379 			func = get_list_call_func(sl->level / wcfact);
380 
381 			sl->leaves = sl->tosort;
382 			sl->leaves_num = sl->tosort_num;
383 			sl->leaves_sz = sl->leaves_num;
384 			sl->leaves = sort_realloc(sl->leaves,
385 			    (sizeof(struct sort_list_item *) *
386 			    (sl->leaves_sz)));
387 			sl->tosort = NULL;
388 			sl->tosort_num = 0;
389 			sl->tosort_sz = 0;
390 			sl->sln = 0;
391 			sl->real_sln = 0;
392 			if (sort_opts_vals.sflag) {
393 				if (mergesort(sl->leaves, sl->leaves_num,
394 				    sizeof(struct sort_list_item *),
395 				    (int(*)(const void *, const void *)) func) == -1)
396 					/* NOTREACHED */
397 					err(2, "Radix sort error 3");
398 			} else
399 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
400 				    sizeof(struct sort_list_item *),
401 				    (int(*)(const void *, const void *)) func);
402 
403 			memcpy(sl->sorted + sl->start_position,
404 			    sl->leaves, sl->leaves_num *
405 			    sizeof(struct sort_list_item*));
406 
407 			sort_left_dec(sl->leaves_num);
408 
409 			goto end;
410 		} else {
411 			sl->tosort_sz = sl->tosort_num;
412 			sl->tosort = sort_realloc(sl->tosort,
413 			    sizeof(struct sort_list_item*) * (sl->tosort_sz));
414 		}
415 	}
416 
417 	sl->sln = 256;
418 	sl->sublevels = sort_calloc(1, slsz);
419 
420 	sl->real_sln = 0;
421 
422 	tosort_num = sl->tosort_num;
423 	for (i = 0; i < tosort_num; ++i)
424 		place_item(sl, i);
425 
426 	sort_free(sl->tosort);
427 	sl->tosort = NULL;
428 	sl->tosort_num = 0;
429 	sl->tosort_sz = 0;
430 
431 	if (sl->leaves_num > 1) {
432 		if (keys_num > 1) {
433 			if (sort_opts_vals.sflag) {
434 				mergesort(sl->leaves, sl->leaves_num,
435 				    sizeof(struct sort_list_item *),
436 				    (int(*)(const void *, const void *)) list_coll);
437 			} else {
438 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
439 				    sizeof(struct sort_list_item *),
440 				    (int(*)(const void *, const void *)) list_coll);
441 			}
442 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
443 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
444 			    sizeof(struct sort_list_item *),
445 			    (int(*)(const void *, const void *)) list_coll_by_str_only);
446 		}
447 	}
448 
449 	sl->leaves_sz = sl->leaves_num;
450 	sl->leaves = sort_realloc(sl->leaves, (sizeof(struct sort_list_item *) *
451 	    (sl->leaves_sz)));
452 
453 	if (!reverse_sort) {
454 		memcpy(sl->sorted + sl->start_position, sl->leaves,
455 		    sl->leaves_num * sizeof(struct sort_list_item*));
456 		sl->start_position += sl->leaves_num;
457 		sort_left_dec(sl->leaves_num);
458 
459 		sort_free(sl->leaves);
460 		sl->leaves = NULL;
461 		sl->leaves_num = 0;
462 		sl->leaves_sz = 0;
463 
464 		sln = sl->sln;
465 
466 		for (i = 0; i < sln; ++i) {
467 			slc = sl->sublevels[i];
468 
469 			if (slc) {
470 				slc->sorted = sl->sorted;
471 				slc->start_position = sl->start_position;
472 				sl->start_position += slc->tosort_num;
473 				if (SMALL_NODE(slc))
474 					run_sort_level_next(slc);
475 				else
476 					push_ls(slc);
477 				sl->sublevels[i] = NULL;
478 			}
479 		}
480 
481 	} else {
482 		size_t n;
483 
484 		sln = sl->sln;
485 
486 		for (i = 0; i < sln; ++i) {
487 			n = sln - i - 1;
488 			slc = sl->sublevels[n];
489 
490 			if (slc) {
491 				slc->sorted = sl->sorted;
492 				slc->start_position = sl->start_position;
493 				sl->start_position += slc->tosort_num;
494 				if (SMALL_NODE(slc))
495 					run_sort_level_next(slc);
496 				else
497 					push_ls(slc);
498 				sl->sublevels[n] = NULL;
499 			}
500 		}
501 
502 		memcpy(sl->sorted + sl->start_position, sl->leaves,
503 		    sl->leaves_num * sizeof(struct sort_list_item*));
504 		sort_left_dec(sl->leaves_num);
505 	}
506 
507 end:
508 	free_sort_level(sl);
509 }
510 
511 /*
512  * Single-threaded sort cycle
513  */
514 static void
515 run_sort_cycle_st(void)
516 {
517 	struct sort_level *slc;
518 
519 	for (;;) {
520 		slc = pop_ls_st();
521 		if (slc == NULL) {
522 			break;
523 		}
524 		run_sort_level_next(slc);
525 	}
526 }
527 
528 #if defined(SORT_THREADS)
529 
530 /*
531  * Multi-threaded sort cycle
532  */
533 static void
534 run_sort_cycle_mt(void)
535 {
536 	struct sort_level *slc;
537 
538 	for (;;) {
539 		slc = pop_ls_mt();
540 		if (slc == NULL)
541 			break;
542 		run_sort_level_next(slc);
543 	}
544 }
545 
546 /*
547  * Sort cycle thread (in multi-threaded mode)
548  */
549 static void*
550 sort_thread(void* arg)
551 {
552 	run_sort_cycle_mt();
553 	sem_post(&mtsem);
554 
555 	return (arg);
556 }
557 
558 #endif /* defined(SORT_THREADS) */
559 
560 static void
561 run_top_sort_level(struct sort_level *sl)
562 {
563 	struct sort_level *slc;
564 
565 	reverse_sort = sort_opts_vals.kflag ? keys[0].sm.rflag :
566 	    default_sort_mods->rflag;
567 
568 	sl->start_position = 0;
569 	sl->sln = 256;
570 	sl->sublevels = sort_calloc(1, slsz);
571 
572 	for (size_t i = 0; i < sl->tosort_num; ++i)
573 		place_item(sl, i);
574 
575 	if (sl->leaves_num > 1) {
576 		if (keys_num > 1) {
577 			if (sort_opts_vals.sflag) {
578 				mergesort(sl->leaves, sl->leaves_num,
579 				    sizeof(struct sort_list_item *),
580 				    (int(*)(const void *, const void *)) list_coll);
581 			} else {
582 				DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
583 				    sizeof(struct sort_list_item *),
584 				    (int(*)(const void *, const void *)) list_coll);
585 			}
586 		} else if (!sort_opts_vals.sflag && sort_opts_vals.complex_sort) {
587 			DEFAULT_SORT_FUNC_RADIXSORT(sl->leaves, sl->leaves_num,
588 			    sizeof(struct sort_list_item *),
589 			    (int(*)(const void *, const void *)) list_coll_by_str_only);
590 		}
591 	}
592 
593 	if (!reverse_sort) {
594 		memcpy(sl->tosort + sl->start_position, sl->leaves,
595 		    sl->leaves_num * sizeof(struct sort_list_item*));
596 		sl->start_position += sl->leaves_num;
597 		sort_left_dec(sl->leaves_num);
598 
599 		for (size_t i = 0; i < sl->sln; ++i) {
600 			slc = sl->sublevels[i];
601 
602 			if (slc) {
603 				slc->sorted = sl->tosort;
604 				slc->start_position = sl->start_position;
605 				sl->start_position += slc->tosort_num;
606 				push_ls(slc);
607 				sl->sublevels[i] = NULL;
608 			}
609 		}
610 
611 	} else {
612 		size_t n;
613 
614 		for (size_t i = 0; i < sl->sln; ++i) {
615 
616 			n = sl->sln - i - 1;
617 			slc = sl->sublevels[n];
618 
619 			if (slc) {
620 				slc->sorted = sl->tosort;
621 				slc->start_position = sl->start_position;
622 				sl->start_position += slc->tosort_num;
623 				push_ls(slc);
624 				sl->sublevels[n] = NULL;
625 			}
626 		}
627 
628 		memcpy(sl->tosort + sl->start_position, sl->leaves,
629 		    sl->leaves_num * sizeof(struct sort_list_item*));
630 
631 		sort_left_dec(sl->leaves_num);
632 	}
633 
634 #if defined(SORT_THREADS)
635 	if (nthreads < 2) {
636 #endif
637 		run_sort_cycle_st();
638 #if defined(SORT_THREADS)
639 	} else {
640 		size_t i;
641 
642 		for(i = 0; i < nthreads; ++i) {
643 			pthread_attr_t attr;
644 			pthread_t pth;
645 
646 			pthread_attr_init(&attr);
647 			pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
648 
649 			for (;;) {
650 				int res = pthread_create(&pth, &attr,
651 				    sort_thread, NULL);
652 				if (res >= 0)
653 					break;
654 				if (errno == EAGAIN) {
655 					pthread_yield();
656 					continue;
657 				}
658 				err(2, NULL);
659 			}
660 
661 			pthread_attr_destroy(&attr);
662 		}
663 
664 		for (i = 0; i < nthreads; ++i)
665 			sem_wait(&mtsem);
666 	}
667 #endif /* defined(SORT_THREADS) */
668 }
669 
670 static void
671 run_sort(struct sort_list_item **base, size_t nmemb)
672 {
673 	struct sort_level *sl;
674 
675 #if defined(SORT_THREADS)
676 	size_t nthreads_save = nthreads;
677 	if (nmemb < MT_SORT_THRESHOLD)
678 		nthreads = 1;
679 
680 	if (nthreads > 1) {
681 		pthread_mutexattr_t mattr;
682 
683 		pthread_mutexattr_init(&mattr);
684 		pthread_mutexattr_settype(&mattr, PTHREAD_MUTEX_ADAPTIVE_NP);
685 
686 		pthread_mutex_init(&g_ls_mutex, &mattr);
687 		pthread_cond_init(&g_ls_cond, NULL);
688 
689 		pthread_mutexattr_destroy(&mattr);
690 
691 		sem_init(&mtsem, 0, 0);
692 
693 	}
694 #endif
695 
696 	sl = sort_calloc(1, sizeof(struct sort_level));
697 
698 	sl->tosort = base;
699 	sl->tosort_num = nmemb;
700 	sl->tosort_sz = nmemb;
701 
702 #if defined(SORT_THREADS)
703 	sort_left = nmemb;
704 #endif
705 
706 	run_top_sort_level(sl);
707 
708 	free_sort_level(sl);
709 
710 #if defined(SORT_THREADS)
711 	if (nthreads > 1) {
712 		sem_destroy(&mtsem);
713 		pthread_mutex_destroy(&g_ls_mutex);
714 	}
715 	nthreads = nthreads_save;
716 #endif
717 }
718 
719 void
720 rxsort(struct sort_list_item **base, size_t nmemb)
721 {
722 
723 	run_sort(base, nmemb);
724 }
725