xref: /dragonfly/usr.bin/sort/coll.c (revision 3d33658b)
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: head/usr.bin/sort/coll.c 281132 2015-04-06 02:35:55Z pfg $
28  */
29 
30 
31 #include <sys/types.h>
32 
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42 #if defined(SORT_RANDOM)
43 #include <openssl/md5.h>
44 #endif
45 
46 #include "coll.h"
47 #include "vsort.h"
48 
49 struct key_specs *keys;
50 size_t keys_num = 0;
51 
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
57 
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 #if defined(SORT_RANDOM)
64 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
65 #endif
66 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
67 
68 /*
69  * Allocate keys array
70  */
71 struct keys_array *
72 keys_array_alloc(void)
73 {
74 	struct keys_array *ka;
75 	size_t sz;
76 
77 	sz = keys_array_size();
78 	ka = sort_malloc(sz);
79 	memset(ka, 0, sz);
80 
81 	return (ka);
82 }
83 
84 /*
85  * Calculate whether we need key hint space
86  */
87 static size_t
88 key_hint_size(void)
89 {
90 
91 	return (need_hint ? sizeof(struct key_hint) : 0);
92 }
93 
94 /*
95  * Calculate keys array size
96  */
97 size_t
98 keys_array_size(void)
99 {
100 
101 	return (keys_num * (sizeof(struct key_value) + key_hint_size()));
102 }
103 
104 /*
105  * Clean data of keys array
106  */
107 void
108 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
109 {
110 
111 	if (ka) {
112 		for (size_t i = 0; i < keys_num; ++i)
113 			if (ka->key[i].k && ka->key[i].k != s)
114 				bwsfree(ka->key[i].k);
115 		memset(ka, 0, keys_array_size());
116 	}
117 }
118 
119 /*
120  * Set value of a key in the keys set
121  */
122 void
123 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
124 {
125 
126 	if (ka && keys_num > ind) {
127 		struct key_value *kv;
128 
129 		kv = &(ka->key[ind]);
130 
131 		if (kv->k && kv->k != s)
132 			bwsfree(kv->k);
133 		kv->k = s;
134 	}
135 }
136 
137 /*
138  * Initialize a sort list item
139  */
140 struct sort_list_item *
141 sort_list_item_alloc(void)
142 {
143 	struct sort_list_item *si;
144 	size_t sz;
145 
146 	sz = sizeof(struct sort_list_item) + keys_array_size();
147 	si = sort_malloc(sz);
148 	memset(si, 0, sz);
149 
150 	return (si);
151 }
152 
153 size_t
154 sort_list_item_size(struct sort_list_item *si)
155 {
156 	size_t ret = 0;
157 
158 	if (si) {
159 		ret = sizeof(struct sort_list_item) + keys_array_size();
160 		if (si->str)
161 			ret += bws_memsize(si->str);
162 		for (size_t i = 0; i < keys_num; ++i) {
163 			struct key_value *kv;
164 
165 			kv = &(si->ka.key[i]);
166 
167 			if (kv->k != si->str)
168 				ret += bws_memsize(kv->k);
169 		}
170 	}
171 	return (ret);
172 }
173 
174 /*
175  * Calculate key for a sort list item
176  */
177 static void
178 sort_list_item_make_key(struct sort_list_item *si)
179 {
180 
181 	preproc(si->str, &(si->ka));
182 }
183 
184 /*
185  * Set value of a sort list item.
186  * Return combined string and keys memory size.
187  */
188 void
189 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
190 {
191 
192 	if (si) {
193 		clean_keys_array(si->str, &(si->ka));
194 		if (si->str) {
195 			if (si->str == str) {
196 				/* we are trying to reset the same string */
197 				return;
198 			} else {
199 				bwsfree(si->str);
200 				si->str = NULL;
201 			}
202 		}
203 		si->str = str;
204 		sort_list_item_make_key(si);
205 	}
206 }
207 
208 /*
209  * De-allocate a sort list item object memory
210  */
211 void
212 sort_list_item_clean(struct sort_list_item *si)
213 {
214 
215 	if (si) {
216 		clean_keys_array(si->str, &(si->ka));
217 		if (si->str) {
218 			bwsfree(si->str);
219 			si->str = NULL;
220 		}
221 	}
222 }
223 
224 /*
225  * Skip columns according to specs
226  */
227 static size_t
228 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
229     bool skip_blanks, bool *empty_key)
230 {
231 	if (cols < 1)
232 		return (BWSLEN(s) + 1);
233 
234 	if (skip_blanks)
235 		while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
236 			++start;
237 
238 	while (start < BWSLEN(s) && cols > 1) {
239 		--cols;
240 		++start;
241 	}
242 
243 	if (start >= BWSLEN(s))
244 		*empty_key = true;
245 
246 	return (start);
247 }
248 
249 /*
250  * Skip fields according to specs
251  */
252 static size_t
253 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
254 {
255 
256 	if (fields < 2) {
257 		if (BWSLEN(s) == 0)
258 			*empty_field = true;
259 		return (0);
260 	} else if (!(sort_opts_vals.tflag)) {
261 		size_t cpos = 0;
262 		bool pb = true;
263 
264 		while (cpos < BWSLEN(s)) {
265 			bool isblank;
266 
267 			isblank = iswblank(BWS_GET(s, cpos));
268 
269 			if (isblank && !pb) {
270 				--fields;
271 				if (fields <= 1)
272 					return (cpos);
273 			}
274 			pb = isblank;
275 			++cpos;
276 		}
277 		if (fields > 1)
278 			*empty_field = true;
279 		return (cpos);
280 	} else {
281 		size_t cpos = 0;
282 
283 		while (cpos < BWSLEN(s)) {
284 			if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
285 				--fields;
286 				if (fields <= 1)
287 					return (cpos + 1);
288 			}
289 			++cpos;
290 		}
291 		if (fields > 1)
292 			*empty_field = true;
293 		return (cpos);
294 	}
295 }
296 
297 /*
298  * Find fields start
299  */
300 static void
301 find_field_start(const struct bwstring *s, struct key_specs *ks,
302     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
303 {
304 
305 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
306 	if (!*empty_field)
307 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
308 		    ks->pos1b, empty_key);
309 	else
310 		*empty_key = true;
311 }
312 
313 /*
314  * Find end key position
315  */
316 static size_t
317 find_field_end(const struct bwstring *s, struct key_specs *ks)
318 {
319 	size_t f2, next_field_start, pos_end;
320 	bool empty_field, empty_key;
321 
322 	pos_end = 0;
323 	next_field_start = 0;
324 	empty_field = false;
325 	empty_key = false;
326 	f2 = ks->f2;
327 
328 	if (f2 == 0)
329 		return (BWSLEN(s) + 1);
330 	else {
331 		if (ks->c2 == 0) {
332 			next_field_start = skip_fields_to_start(s, f2 + 1,
333 			    &empty_field);
334 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
335 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
336 			    next_field_start - 1)))
337 				--next_field_start;
338 		} else
339 			next_field_start = skip_fields_to_start(s, f2,
340 			    &empty_field);
341 	}
342 
343 	if (empty_field || (next_field_start >= BWSLEN(s)))
344 		return (BWSLEN(s) + 1);
345 
346 	if (ks->c2) {
347 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
348 		    ks->pos2b, &empty_key);
349 		if (pos_end < BWSLEN(s))
350 			++pos_end;
351 	} else
352 		pos_end = next_field_start;
353 
354 	return (pos_end);
355 }
356 
357 /*
358  * Cut a field according to the key specs
359  */
360 static struct bwstring *
361 cut_field(const struct bwstring *s, struct key_specs *ks)
362 {
363 	struct bwstring *ret = NULL;
364 
365 	if (s && ks) {
366 		size_t field_start, key_end, key_start, sz;
367 		bool empty_field, empty_key;
368 
369 		field_start = 0;
370 		key_start = 0;
371 		empty_field = false;
372 		empty_key = false;
373 
374 		find_field_start(s, ks, &field_start, &key_start,
375 		    &empty_field, &empty_key);
376 
377 		if (empty_key)
378 			sz = 0;
379 		else {
380 			key_end = find_field_end(s, ks);
381 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
382 		}
383 
384 		ret = bwsalloc(sz);
385 		if (sz)
386 			bwsnocpy(ret, s, key_start, sz);
387 	} else
388 		ret = bwsalloc(0);
389 
390 	return (ret);
391 }
392 
393 /*
394  * Preprocesses a line applying the necessary transformations
395  * specified by command line options and returns the preprocessed
396  * string, which can be used to compare.
397  */
398 int
399 preproc(struct bwstring *s, struct keys_array *ka)
400 {
401 
402 	if (sort_opts_vals.kflag)
403 		for (size_t i = 0; i < keys_num; i++) {
404 			struct bwstring *key;
405 			struct key_specs *kspecs;
406 			struct sort_mods *sm;
407 
408 			kspecs = &(keys[i]);
409 			key = cut_field(s, kspecs);
410 
411 			sm = &(kspecs->sm);
412 			if (sm->dflag)
413 				key = dictionary_order(key);
414 			else if (sm->iflag)
415 				key = ignore_nonprinting(key);
416 			if (sm->fflag || sm->Mflag)
417 				key = ignore_case(key);
418 
419 			set_key_on_keys_array(ka, key, i);
420 		}
421 	else {
422 		struct bwstring *ret = NULL;
423 		struct sort_mods *sm = default_sort_mods;
424 
425 		if (sm->bflag) {
426 			if (ret == NULL)
427 				ret = bwsdup(s);
428 			ret = ignore_leading_blanks(ret);
429 		}
430 		if (sm->dflag) {
431 			if (ret == NULL)
432 				ret = bwsdup(s);
433 			ret = dictionary_order(ret);
434 		} else if (sm->iflag) {
435 			if (ret == NULL)
436 				ret = bwsdup(s);
437 			ret = ignore_nonprinting(ret);
438 		}
439 		if (sm->fflag || sm->Mflag) {
440 			if (ret == NULL)
441 				ret = bwsdup(s);
442 			ret = ignore_case(ret);
443 		}
444 		if (ret == NULL)
445 			set_key_on_keys_array(ka, s, 0);
446 		else
447 			set_key_on_keys_array(ka, ret, 0);
448 	}
449 
450 	return 0;
451 }
452 
453 cmpcoll_t
454 get_sort_func(struct sort_mods *sm)
455 {
456 
457 	if (sm->nflag)
458 		return (numcoll);
459 	else if (sm->hflag)
460 		return (hnumcoll);
461 	else if (sm->gflag)
462 		return (gnumcoll);
463 	else if (sm->Mflag)
464 		return (monthcoll);
465 #if defined(SORT_RANDOM)
466 	else if (sm->Rflag)
467 		return (randomcoll);
468 #endif
469 	else if (sm->Vflag)
470 		return (versioncoll);
471 	else
472 		return (wstrcoll);
473 }
474 
475 /*
476  * Compares the given strings.  Returns a positive number if
477  * the first precedes the second, a negative number if the second is
478  * the preceding one, and zero if they are equal.  This function calls
479  * the underlying collate functions, which done the actual comparison.
480  */
481 int
482 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
483 {
484 	struct sort_mods *sm;
485 	int res = 0;
486 
487 	for (size_t i = 0; i < keys_num; ++i) {
488 		sm = &(keys[i].sm);
489 
490 		if (sm->rflag)
491 			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
492 		else
493 			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
494 
495 		if (res)
496 			break;
497 
498 		/* offset applies to only the first key */
499 		offset = 0;
500 	}
501 	return (res);
502 }
503 
504 /*
505  * Compare two strings.
506  * Plain symbol-by-symbol comparison.
507  */
508 int
509 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
510 {
511 
512 	if (default_sort_mods->rflag) {
513 		const struct bwstring *tmp;
514 
515 		tmp = s1;
516 		s1 = s2;
517 		s2 = tmp;
518 	}
519 
520 	return (bwscoll(s1, s2, 0));
521 }
522 
523 /*
524  * Compare a string and a sort list item, according to the sort specs.
525  */
526 int
527 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
528 {
529 	struct keys_array *ka1;
530 	int ret = 0;
531 
532 	ka1 = keys_array_alloc();
533 
534 	preproc(str1, ka1);
535 
536 	sort_list_item_make_key(*ss2);
537 
538 	if (debug_sort) {
539 		bwsprintf(stdout, str1, "; s1=<", ">");
540 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
541 	}
542 
543 	ret = key_coll(ka1, &((*ss2)->ka), 0);
544 
545 	if (debug_sort)
546 		printf("; cmp1=%d", ret);
547 
548 	clean_keys_array(str1, ka1);
549 	sort_free(ka1);
550 
551 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
552 		ret = top_level_str_coll(str1, ((*ss2)->str));
553 		if (debug_sort)
554 			printf("; cmp2=%d", ret);
555 	}
556 
557 	if (debug_sort)
558 		printf("\n");
559 
560 	return (ret);
561 }
562 
563 /*
564  * Compare two sort list items, according to the sort specs.
565  */
566 int
567 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
568     size_t offset)
569 {
570 	int ret;
571 
572 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
573 
574 	if (debug_sort) {
575 		if (offset)
576 			printf("; offset=%d", (int) offset);
577 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
578 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
579 		printf("; cmp1=%d\n", ret);
580 	}
581 
582 	if (ret)
583 		return (ret);
584 
585 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
586 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
587 		if (debug_sort)
588 			printf("; cmp2=%d\n", ret);
589 	}
590 
591 	return (ret);
592 }
593 
594 /*
595  * Compare two sort list items, according to the sort specs.
596  */
597 int
598 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
599 {
600 
601 	return (list_coll_offset(ss1, ss2, 0));
602 }
603 
604 #define	LSCDEF(N)							\
605 static int 								\
606 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)	\
607 {									\
608 									\
609 	return (list_coll_offset(ss1, ss2, N));				\
610 }
611 
612 LSCDEF(1)
613 LSCDEF(2)
614 LSCDEF(3)
615 LSCDEF(4)
616 LSCDEF(5)
617 LSCDEF(6)
618 LSCDEF(7)
619 LSCDEF(8)
620 LSCDEF(9)
621 LSCDEF(10)
622 LSCDEF(11)
623 LSCDEF(12)
624 LSCDEF(13)
625 LSCDEF(14)
626 LSCDEF(15)
627 LSCDEF(16)
628 LSCDEF(17)
629 LSCDEF(18)
630 LSCDEF(19)
631 LSCDEF(20)
632 
633 listcoll_t
634 get_list_call_func(size_t offset)
635 {
636 	static const listcoll_t lsarray[] = { list_coll, list_coll_1,
637 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
638 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
639 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
640 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
641 	    list_coll_18, list_coll_19, list_coll_20 };
642 
643 	if (offset <= 20)
644 		return (lsarray[offset]);
645 
646 	return (list_coll);
647 }
648 
649 /*
650  * Compare two sort list items, only by their original string.
651  */
652 int
653 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
654 {
655 
656 	return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
657 }
658 
659 /*
660  * Maximum size of a number in the string (before or after decimal point)
661  */
662 #define MAX_NUM_SIZE (128)
663 
664 /*
665  * Set suffix value
666  */
667 static void setsuffix(wchar_t c, unsigned char *si)
668 {
669 	switch (c){
670 	case L'k':
671 	case L'K':
672 		*si = 1;
673 		break;
674 	case L'M':
675 		*si = 2;
676 		break;
677 	case L'G':
678 		*si = 3;
679 		break;
680 	case L'T':
681 		*si = 4;
682 		break;
683 	case L'P':
684 		*si = 5;
685 		break;
686 	case L'E':
687 		*si = 6;
688 		break;
689 	case L'Z':
690 		*si = 7;
691 		break;
692 	case L'Y':
693 		*si = 8;
694 		break;
695 	default:
696 		*si = 0;
697 	};
698 }
699 
700 /*
701  * Read string s and parse the string into a fixed-decimal-point number.
702  * sign equals -1 if the number is negative (explicit plus is not allowed,
703  * according to GNU sort's "info sort".
704  * The number part before decimal point is in the smain, after the decimal
705  * point is in sfrac, tail is the pointer to the remainder of the string.
706  */
707 static int
708 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
709 {
710 	bwstring_iterator s;
711 
712 	s = bws_begin(s0);
713 
714 	/* always end the fraction with zero, even if we have no fraction */
715 	sfrac[0] = 0;
716 
717 	while (iswblank(bws_get_iter_value(s)))
718 		s = bws_iterator_inc(s, 1);
719 
720 	if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
721 		*sign = -1;
722 		s = bws_iterator_inc(s, 1);
723 	}
724 
725 	// This is '0', not '\0', do not change this
726 	while (iswdigit(bws_get_iter_value(s)) &&
727 	    (bws_get_iter_value(s) == L'0'))
728 		s = bws_iterator_inc(s, 1);
729 
730 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
731 		if (iswdigit(bws_get_iter_value(s))) {
732 			smain[*main_len] = bws_get_iter_value(s);
733 			s = bws_iterator_inc(s, 1);
734 			*main_len += 1;
735 		} else if (symbol_thousands_sep &&
736 		    (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
737 			s = bws_iterator_inc(s, 1);
738 		else
739 			break;
740 	}
741 
742 	smain[*main_len] = 0;
743 
744 	if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
745 		s = bws_iterator_inc(s, 1);
746 		while (iswdigit(bws_get_iter_value(s)) &&
747 		    *frac_len < MAX_NUM_SIZE) {
748 			sfrac[*frac_len] = bws_get_iter_value(s);
749 			s = bws_iterator_inc(s, 1);
750 			*frac_len += 1;
751 		}
752 		sfrac[*frac_len] = 0;
753 
754 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
755 			--(*frac_len);
756 			sfrac[*frac_len] = L'\0';
757 		}
758 	}
759 
760 	setsuffix(bws_get_iter_value(s),si);
761 
762 	if ((*main_len + *frac_len) == 0)
763 		*sign = 0;
764 
765 	return (0);
766 }
767 
768 /*
769  * Implements string sort.
770  */
771 static int
772 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
773 {
774 
775 	if (debug_sort) {
776 		if (offset)
777 			printf("; offset=%d\n", (int) offset);
778 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
779 		printf("(%zu)", BWSLEN(kv1->k));
780 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
781 		printf("(%zu)", BWSLEN(kv2->k));
782 	}
783 
784 	return (bwscoll(kv1->k, kv2->k, offset));
785 }
786 
787 /*
788  * Compare two suffixes
789  */
790 static inline int
791 cmpsuffix(unsigned char si1, unsigned char si2)
792 {
793 
794 	return ((char)si1 - (char)si2);
795 }
796 
797 /*
798  * Implements numeric sort for -n and -h.
799  */
800 static int
801 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
802     size_t offset __unused, bool use_suffix)
803 {
804 	struct bwstring *s1, *s2;
805 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
806 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
807 	int cmp_res, sign1, sign2;
808 	size_t frac1, frac2, main1, main2;
809 	unsigned char SI1, SI2;
810 	bool e1, e2, key1_read, key2_read;
811 
812 	s1 = kv1->k;
813 	s2 = kv2->k;
814 	sign1 = sign2 = 0;
815 	main1 = main2 = 0;
816 	frac1 = frac2 = 0;
817 
818 	cmp_res = 0;
819 	key1_read = key2_read = false;
820 
821 	if (debug_sort) {
822 		bwsprintf(stdout, s1, "; k1=<", ">");
823 		bwsprintf(stdout, s2, ", k2=<", ">");
824 	}
825 
826 	if (s1 == s2)
827 		return (0);
828 
829 	if (kv1->hint->status == HS_UNINITIALIZED) {
830 		/* read the number from the string */
831 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
832 		key1_read = true;
833 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
834 		if(main1 < 1 && frac1 < 1)
835 			kv1->hint->v.nh.empty=true;
836 		kv1->hint->v.nh.si = SI1;
837 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
838 		    HS_INITIALIZED : HS_ERROR;
839 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
840 	}
841 
842 	if (kv2->hint->status == HS_UNINITIALIZED) {
843 		/* read the number from the string */
844 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
845 		key2_read = true;
846 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
847 		if(main2 < 1 && frac2 < 1)
848 			kv2->hint->v.nh.empty=true;
849 		kv2->hint->v.nh.si = SI2;
850 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
851 		    HS_INITIALIZED : HS_ERROR;
852 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
853 	}
854 
855 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
856 	    HS_INITIALIZED) {
857 		unsigned long long n1, n2;
858 		bool neg1, neg2;
859 
860 		e1 = kv1->hint->v.nh.empty;
861 		e2 = kv2->hint->v.nh.empty;
862 
863 		if (e1 && e2)
864 			return (0);
865 
866 		neg1 = kv1->hint->v.nh.neg;
867 		neg2 = kv2->hint->v.nh.neg;
868 
869 		if (neg1 && !neg2)
870 			return (-1);
871 		if (neg2 && !neg1)
872 			return (+1);
873 
874 		if (e1)
875 			return (neg2 ? +1 : -1);
876 		else if (e2)
877 			return (neg1 ? -1 : +1);
878 
879 
880 		if (use_suffix) {
881 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
882 			if (cmp_res)
883 				return (neg1 ? -cmp_res : cmp_res);
884 		}
885 
886 		n1 = kv1->hint->v.nh.n1;
887 		n2 = kv2->hint->v.nh.n1;
888 		if (n1 < n2)
889 			return (neg1 ? +1 : -1);
890 		else if (n1 > n2)
891 			return (neg1 ? -1 : +1);
892 	}
893 
894 	/* read the numbers from the strings */
895 	if (!key1_read)
896 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
897 	if (!key2_read)
898 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
899 
900 	e1 = ((main1 + frac1) == 0);
901 	e2 = ((main2 + frac2) == 0);
902 
903 	if (e1 && e2)
904 		return (0);
905 
906 	/* we know the result if the signs are different */
907 	if (sign1 < 0 && sign2 >= 0)
908 		return (-1);
909 	if (sign1 >= 0 && sign2 < 0)
910 		return (+1);
911 
912 	if (e1)
913 		return ((sign2 < 0) ? +1 : -1);
914 	else if (e2)
915 		return ((sign1 < 0) ? -1 : +1);
916 
917 	if (use_suffix) {
918 		cmp_res = cmpsuffix(SI1, SI2);
919 		if (cmp_res)
920 			return ((sign1 < 0) ? -cmp_res : cmp_res);
921 	}
922 
923 	/* if both numbers are empty assume that the strings are equal */
924 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
925 		return (0);
926 
927 	/*
928 	 * if the main part is of different size, we know the result
929 	 * (because the leading zeros are removed)
930 	 */
931 	if (main1 < main2)
932 		cmp_res = -1;
933 	else if (main1 > main2)
934 		cmp_res = +1;
935 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
936 	else
937 		cmp_res = wcscmp(smain1, smain2);
938 
939 	/* check fraction */
940 	if (!cmp_res)
941 		cmp_res = wcscmp(sfrac1, sfrac2);
942 
943 	if (!cmp_res)
944 		return (0);
945 
946 	/* reverse result if the signs are negative */
947 	if (sign1 < 0 && sign2 < 0)
948 		cmp_res = -cmp_res;
949 
950 	return (cmp_res);
951 }
952 
953 /*
954  * Implements numeric sort (-n).
955  */
956 static int
957 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
958 {
959 
960 	return (numcoll_impl(kv1, kv2, offset, false));
961 }
962 
963 /*
964  * Implements 'human' numeric sort (-h).
965  */
966 static int
967 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
968 {
969 
970 	return (numcoll_impl(kv1, kv2, offset, true));
971 }
972 
973 /*
974  * Implements random sort (-R).
975  */
976 #if defined(SORT_RANDOM)
977 static char *
978 randomcollend(MD5_CTX *ctx)
979 {
980 	unsigned char digest[MD5_DIGEST_LENGTH];
981 	static const char hex[]="0123456789abcdef";
982 	char *buf;
983 	int i;
984 
985 	buf = malloc(MD5_DIGEST_LENGTH * 2 + 1);
986 	if (!buf)
987 		return NULL;
988 	MD5_Final(digest, ctx);
989 	for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
990 		buf[2*i] = hex[digest[i] >> 4];
991 		buf[2*i+1] = hex[digest[i] & 0x0f];
992 	}
993 	buf[MD5_DIGEST_LENGTH * 2] = '\0';
994 	return buf;
995 }
996 
997 static int
998 randomcoll(struct key_value *kv1, struct key_value *kv2,
999     size_t offset __unused)
1000 {
1001 	struct bwstring *s1, *s2;
1002 	MD5_CTX ctx1, ctx2;
1003 	char *b1, *b2;
1004 
1005 	s1 = kv1->k;
1006 	s2 = kv2->k;
1007 
1008 	if (debug_sort) {
1009 		bwsprintf(stdout, s1, "; k1=<", ">");
1010 		bwsprintf(stdout, s2, ", k2=<", ">");
1011 	}
1012 
1013 	if (s1 == s2)
1014 		return (0);
1015 
1016 	memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1017 	memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1018 
1019 	MD5_Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1020 	MD5_Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1021 	b1 = randomcollend(&ctx1);
1022 	b2 = randomcollend(&ctx2);
1023 	if (b1 == NULL) {
1024 		if (b2 == NULL)
1025 			return (0);
1026 		else {
1027 			sort_free(b2);
1028 			return (-1);
1029 		}
1030 	} else if (b2 == NULL) {
1031 		sort_free(b1);
1032 		return (+1);
1033 	} else {
1034 		int cmp_res;
1035 
1036 		cmp_res = strcmp(b1,b2);
1037 		sort_free(b1);
1038 		sort_free(b2);
1039 
1040 		if (!cmp_res)
1041 			cmp_res = bwscoll(s1, s2, 0);
1042 
1043 		return (cmp_res);
1044 	}
1045 }
1046 #endif
1047 
1048 /*
1049  * Implements version sort (-V).
1050  */
1051 static int
1052 versioncoll(struct key_value *kv1, struct key_value *kv2,
1053     size_t offset __unused)
1054 {
1055 	struct bwstring *s1, *s2;
1056 
1057 	s1 = kv1->k;
1058 	s2 = kv2->k;
1059 
1060 	if (debug_sort) {
1061 		bwsprintf(stdout, s1, "; k1=<", ">");
1062 		bwsprintf(stdout, s2, ", k2=<", ">");
1063 	}
1064 
1065 	if (s1 == s2)
1066 		return (0);
1067 
1068 	return (vcmp(s1, s2));
1069 }
1070 
1071 /*
1072  * Check for minus infinity
1073  */
1074 static inline bool
1075 huge_minus(double d, int err1)
1076 {
1077 
1078 	if (err1 == ERANGE)
1079 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1080 			return (+1);
1081 
1082 	return (0);
1083 }
1084 
1085 /*
1086  * Check for plus infinity
1087  */
1088 static inline bool
1089 huge_plus(double d, int err1)
1090 {
1091 
1092 	if (err1 == ERANGE)
1093 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1094 			return (+1);
1095 
1096 	return (0);
1097 }
1098 
1099 /*
1100  * Check whether a function is a NAN
1101  */
1102 static bool
1103 is_nan(double d)
1104 {
1105 	return ((d == NAN) || (isnan(d)));
1106 }
1107 
1108 /*
1109  * Compare two NANs
1110  */
1111 static int
1112 cmp_nans(double d1, double d2)
1113 {
1114 
1115 	if (d1 < d2)
1116 		return (-1);
1117 	if (d1 > d2)
1118 		return (+1);
1119 	return (0);
1120 }
1121 
1122 /*
1123  * Implements general numeric sort (-g).
1124  */
1125 static int
1126 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1127     size_t offset __unused)
1128 {
1129 	double d1, d2;
1130 	int err1, err2;
1131 	bool empty1, empty2, key1_read, key2_read;
1132 
1133 	d1 = d2 = 0;
1134 	err1 = err2 = 0;
1135 	key1_read = key2_read = false;
1136 
1137 	if (debug_sort) {
1138 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1139 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1140 	}
1141 
1142 	if (kv1->hint->status == HS_UNINITIALIZED) {
1143 		errno = 0;
1144 		d1 = bwstod(kv1->k, &empty1);
1145 		err1 = errno;
1146 
1147 		if (empty1)
1148 			kv1->hint->v.gh.notnum = true;
1149 		else if (err1 == 0) {
1150 			kv1->hint->v.gh.d = d1;
1151 			kv1->hint->v.gh.nan = is_nan(d1);
1152 			kv1->hint->status = HS_INITIALIZED;
1153 		} else
1154 			kv1->hint->status = HS_ERROR;
1155 
1156 		key1_read = true;
1157 	}
1158 
1159 	if (kv2->hint->status == HS_UNINITIALIZED) {
1160 		errno = 0;
1161 		d2 = bwstod(kv2->k, &empty2);
1162 		err2 = errno;
1163 
1164 		if (empty2)
1165 			kv2->hint->v.gh.notnum = true;
1166 		else if (err2 == 0) {
1167 			kv2->hint->v.gh.d = d2;
1168 			kv2->hint->v.gh.nan = is_nan(d2);
1169 			kv2->hint->status = HS_INITIALIZED;
1170 		} else
1171 			kv2->hint->status = HS_ERROR;
1172 
1173 		key2_read = true;
1174 	}
1175 
1176 	if (kv1->hint->status == HS_INITIALIZED &&
1177 	    kv2->hint->status == HS_INITIALIZED) {
1178 		if (kv1->hint->v.gh.notnum)
1179 			return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1180 		else if (kv2->hint->v.gh.notnum)
1181 			return (+1);
1182 
1183 		if (kv1->hint->v.gh.nan)
1184 			return ((kv2->hint->v.gh.nan) ?
1185 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1186 			    -1);
1187 		else if (kv2->hint->v.gh.nan)
1188 			return (+1);
1189 
1190 		d1 = kv1->hint->v.gh.d;
1191 		d2 = kv2->hint->v.gh.d;
1192 
1193 		if (d1 < d2)
1194 			return (-1);
1195 		else if (d1 > d2)
1196 			return (+1);
1197 		else
1198 			return (0);
1199 	}
1200 
1201 	if (!key1_read) {
1202 		errno = 0;
1203 		d1 = bwstod(kv1->k, &empty1);
1204 		err1 = errno;
1205 	}
1206 
1207 	if (!key2_read) {
1208 		errno = 0;
1209 		d2 = bwstod(kv2->k, &empty2);
1210 		err2 = errno;
1211 	}
1212 
1213 	/* Non-value case: */
1214 	if (empty1)
1215 		return (empty2 ? 0 : -1);
1216 	else if (empty2)
1217 		return (+1);
1218 
1219 	/* NAN case */
1220 	if (is_nan(d1))
1221 		return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1222 	else if (is_nan(d2))
1223 		return (+1);
1224 
1225 	/* Infinities */
1226 	if (err1 == ERANGE || err2 == ERANGE) {
1227 		/* Minus infinity case */
1228 		if (huge_minus(d1, err1)) {
1229 			if (huge_minus(d2, err2)) {
1230 				if (d1 < d2)
1231 					return (-1);
1232 				if (d1 > d2)
1233 					return (+1);
1234 				return (0);
1235 			} else
1236 				return (-1);
1237 
1238 		} else if (huge_minus(d2, err2)) {
1239 			if (huge_minus(d1, err1)) {
1240 				if (d1 < d2)
1241 					return (-1);
1242 				if (d1 > d2)
1243 					return (+1);
1244 				return (0);
1245 			} else
1246 				return (+1);
1247 		}
1248 
1249 		/* Plus infinity case */
1250 		if (huge_plus(d1, err1)) {
1251 			if (huge_plus(d2, err2)) {
1252 				if (d1 < d2)
1253 					return (-1);
1254 				if (d1 > d2)
1255 					return (+1);
1256 				return (0);
1257 			} else
1258 				return (+1);
1259 		} else if (huge_plus(d2, err2)) {
1260 			if (huge_plus(d1, err1)) {
1261 				if (d1 < d2)
1262 					return (-1);
1263 				if (d1 > d2)
1264 					return (+1);
1265 				return (0);
1266 			} else
1267 				return (-1);
1268 		}
1269 	}
1270 
1271 	if (d1 < d2)
1272 		return (-1);
1273 	if (d1 > d2)
1274 		return (+1);
1275 
1276 	return (0);
1277 }
1278 
1279 /*
1280  * Implements month sort (-M).
1281  */
1282 static int
1283 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1284 {
1285 	int val1, val2;
1286 	bool key1_read, key2_read;
1287 
1288 	val1 = val2 = 0;
1289 	key1_read = key2_read = false;
1290 
1291 	if (debug_sort) {
1292 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1293 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1294 	}
1295 
1296 	if (kv1->hint->status == HS_UNINITIALIZED) {
1297 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1298 		key1_read = true;
1299 		kv1->hint->status = HS_INITIALIZED;
1300 	}
1301 
1302 	if (kv2->hint->status == HS_UNINITIALIZED) {
1303 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1304 		key2_read = true;
1305 		kv2->hint->status = HS_INITIALIZED;
1306 	}
1307 
1308 	if (kv1->hint->status == HS_INITIALIZED) {
1309 		val1 = kv1->hint->v.Mh.m;
1310 		key1_read = true;
1311 	}
1312 
1313 	if (kv2->hint->status == HS_INITIALIZED) {
1314 		val2 = kv2->hint->v.Mh.m;
1315 		key2_read = true;
1316 	}
1317 
1318 	if (!key1_read)
1319 		val1 = bws_month_score(kv1->k);
1320 	if (!key2_read)
1321 		val2 = bws_month_score(kv2->k);
1322 
1323 	if (val1 == val2) {
1324 		return (0);
1325 	}
1326 	if (val1 < val2)
1327 		return (-1);
1328 	return (+1);
1329 }
1330