xref: /openbsd/usr.bin/sort/coll.c (revision 479c151d)
1 /*	$OpenBSD: coll.c,v 1.13 2024/09/20 02:00:46 jsg Exp $	*/
2 
3 /*-
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/types.h>
31 
32 #include <errno.h>
33 #include <err.h>
34 #include <langinfo.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <md5.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42 
43 #include "coll.h"
44 #include "vsort.h"
45 
46 struct key_specs *keys;
47 size_t keys_num = 0;
48 
49 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
50 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
51 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
52 static int numcoll(struct key_value*, struct key_value *, size_t offset);
53 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
54 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
55 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
56 
57 /*
58  * Allocate keys array
59  */
60 struct keys_array *
keys_array_alloc(void)61 keys_array_alloc(void)
62 {
63 	struct keys_array *ka;
64 	size_t sz;
65 
66 	sz = keys_array_size();
67 	ka = sort_calloc(1, sz);
68 
69 	return ka;
70 }
71 
72 /*
73  * Calculate whether we need key hint space
74  */
75 static size_t
key_hint_size(void)76 key_hint_size(void)
77 {
78 	return need_hint ? sizeof(struct key_hint) : 0;
79 }
80 
81 /*
82  * Calculate keys array size
83  */
84 size_t
keys_array_size(void)85 keys_array_size(void)
86 {
87 	return keys_num * (sizeof(struct key_value) + key_hint_size());
88 }
89 
90 /*
91  * Clean data of keys array
92  */
93 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)94 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
95 {
96 	if (ka) {
97 		size_t i;
98 
99 		for (i = 0; i < keys_num; ++i)
100 			if (ka->key[i].k && ka->key[i].k != s)
101 				bwsfree(ka->key[i].k);
102 		memset(ka, 0, keys_array_size());
103 	}
104 }
105 
106 /*
107  * Set value of a key in the keys set
108  */
109 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)110 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
111 {
112 	if (ka && keys_num > ind) {
113 		struct key_value *kv;
114 
115 		kv = &(ka->key[ind]);
116 
117 		if (kv->k != s)
118 			bwsfree(kv->k);
119 		kv->k = s;
120 	}
121 }
122 
123 /*
124  * Initialize a sort list item
125  */
126 struct sort_list_item *
sort_list_item_alloc(void)127 sort_list_item_alloc(void)
128 {
129 	struct sort_list_item *si;
130 	size_t sz;
131 
132 	sz = sizeof(struct sort_list_item) + keys_array_size();
133 	si = sort_calloc(1, sz);
134 
135 	return si;
136 }
137 
138 size_t
sort_list_item_size(struct sort_list_item * si)139 sort_list_item_size(struct sort_list_item *si)
140 {
141 	size_t i, ret = 0;
142 
143 	if (si) {
144 		ret = sizeof(struct sort_list_item) + keys_array_size();
145 		if (si->str)
146 			ret += bws_memsize(si->str);
147 		for (i = 0; i < keys_num; ++i) {
148 			struct key_value *kv;
149 
150 			kv = &(si->ka.key[i]);
151 
152 			if (kv->k != si->str)
153 				ret += bws_memsize(kv->k);
154 		}
155 	}
156 	return ret;
157 }
158 
159 /*
160  * Calculate key for a sort list item
161  */
162 static void
sort_list_item_make_key(struct sort_list_item * si)163 sort_list_item_make_key(struct sort_list_item *si)
164 {
165 	preproc(si->str, &(si->ka));
166 }
167 
168 /*
169  * Set value of a sort list item.
170  * Return combined string and keys memory size.
171  */
172 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)173 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
174 {
175 	if (si) {
176 		clean_keys_array(si->str, &(si->ka));
177 		if (si->str) {
178 			if (si->str == str) {
179 				/* we are trying to reset the same string */
180 				return;
181 			} else {
182 				bwsfree(si->str);
183 				si->str = NULL;
184 			}
185 		}
186 		si->str = str;
187 		sort_list_item_make_key(si);
188 	}
189 }
190 
191 /*
192  * De-allocate a sort list item object memory
193  */
194 void
sort_list_item_clean(struct sort_list_item * si)195 sort_list_item_clean(struct sort_list_item *si)
196 {
197 	if (si) {
198 		clean_keys_array(si->str, &(si->ka));
199 		if (si->str) {
200 			bwsfree(si->str);
201 			si->str = NULL;
202 		}
203 	}
204 }
205 
206 /*
207  * Skip columns according to specs
208  */
209 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)210 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
211     bool skip_blanks, bool *empty_key)
212 {
213 	if (cols < 1)
214 		return BWSLEN(s) + 1;
215 
216 	if (skip_blanks)
217 		while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
218 			++start;
219 
220 	while (start < BWSLEN(s) && cols > 1) {
221 		--cols;
222 		++start;
223 	}
224 
225 	if (start >= BWSLEN(s))
226 		*empty_key = true;
227 
228 	return start;
229 }
230 
231 /*
232  * Skip fields according to specs
233  */
234 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)235 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
236 {
237 	if (fields < 2) {
238 		if (BWSLEN(s) == 0)
239 			*empty_field = true;
240 		return 0;
241 	} else if (!(sort_opts_vals.tflag)) {
242 		size_t cpos = 0;
243 		bool pb = true;
244 
245 		while (cpos < BWSLEN(s)) {
246 			bool isblank;
247 
248 			isblank = iswblank(BWS_GET(s, cpos));
249 
250 			if (isblank && !pb) {
251 				--fields;
252 				if (fields <= 1)
253 					return cpos;
254 			}
255 			pb = isblank;
256 			++cpos;
257 		}
258 		if (fields > 1)
259 			*empty_field = true;
260 		return cpos;
261 	} else {
262 		size_t cpos = 0;
263 
264 		while (cpos < BWSLEN(s)) {
265 			if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
266 				--fields;
267 				if (fields <= 1)
268 					return cpos + 1;
269 			}
270 			++cpos;
271 		}
272 		if (fields > 1)
273 			*empty_field = true;
274 		return cpos;
275 	}
276 }
277 
278 /*
279  * Find fields start
280  */
281 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)282 find_field_start(const struct bwstring *s, struct key_specs *ks,
283     size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
284 {
285 	*field_start = skip_fields_to_start(s, ks->f1, empty_field);
286 	if (!*empty_field)
287 		*key_start = skip_cols_to_start(s, ks->c1, *field_start,
288 		    ks->pos1b, empty_key);
289 	else
290 		*empty_key = true;
291 }
292 
293 /*
294  * Find end key position
295  */
296 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)297 find_field_end(const struct bwstring *s, struct key_specs *ks)
298 {
299 	size_t f2, next_field_start, pos_end;
300 	bool empty_field, empty_key;
301 
302 	empty_field = false;
303 	empty_key = false;
304 	f2 = ks->f2;
305 
306 	if (f2 == 0)
307 		return BWSLEN(s) + 1;
308 	else {
309 		if (ks->c2 == 0) {
310 			next_field_start = skip_fields_to_start(s, f2 + 1,
311 			    &empty_field);
312 			if ((next_field_start > 0) && sort_opts_vals.tflag &&
313 			    ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
314 			    next_field_start - 1)))
315 				--next_field_start;
316 		} else
317 			next_field_start = skip_fields_to_start(s, f2,
318 			    &empty_field);
319 	}
320 
321 	if (empty_field || (next_field_start >= BWSLEN(s)))
322 		return BWSLEN(s) + 1;
323 
324 	if (ks->c2) {
325 		pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
326 		    ks->pos2b, &empty_key);
327 		if (pos_end < BWSLEN(s))
328 			++pos_end;
329 	} else
330 		pos_end = next_field_start;
331 
332 	return pos_end;
333 }
334 
335 /*
336  * Cut a field according to the key specs
337  */
338 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)339 cut_field(const struct bwstring *s, struct key_specs *ks)
340 {
341 	struct bwstring *ret = NULL;
342 
343 	if (s && ks) {
344 		size_t field_start, key_end, key_start, sz;
345 		bool empty_field, empty_key;
346 
347 		field_start = 0;
348 		key_start = 0;
349 		empty_field = false;
350 		empty_key = false;
351 
352 		find_field_start(s, ks, &field_start, &key_start,
353 		    &empty_field, &empty_key);
354 
355 		if (empty_key)
356 			sz = 0;
357 		else {
358 			key_end = find_field_end(s, ks);
359 			sz = (key_end < key_start) ? 0 : (key_end - key_start);
360 		}
361 
362 		ret = bwsalloc(sz);
363 		if (sz)
364 			bwsnocpy(ret, s, key_start, sz);
365 	} else
366 		ret = bwsalloc(0);
367 
368 	return ret;
369 }
370 
371 /*
372  * Preprocesses a line applying the necessary transformations
373  * specified by command line options and returns the preprocessed
374  * string, which can be used to compare.
375  */
376 int
preproc(struct bwstring * s,struct keys_array * ka)377 preproc(struct bwstring *s, struct keys_array *ka)
378 {
379 	if (sort_opts_vals.kflag) {
380 		size_t i;
381 		for (i = 0; i < keys_num; i++) {
382 			struct bwstring *key;
383 			struct key_specs *kspecs;
384 			struct sort_mods *sm;
385 
386 			kspecs = &(keys[i]);
387 			key = cut_field(s, kspecs);
388 
389 			sm = &(kspecs->sm);
390 			if (sm->dflag)
391 				key = dictionary_order(key);
392 			else if (sm->iflag)
393 				key = ignore_nonprinting(key);
394 			if (sm->fflag || sm->Mflag)
395 				key = ignore_case(key);
396 
397 			set_key_on_keys_array(ka, key, i);
398 		}
399 	} else {
400 		struct bwstring *ret = NULL;
401 		struct sort_mods *sm = default_sort_mods;
402 
403 #ifdef GNUSORT_COMPATIBILITY
404 		if (sm->bflag) {
405 			if (ret == NULL)
406 				ret = bwsdup(s);
407 			ret = ignore_leading_blanks(ret);
408 		}
409 #endif
410 		if (sm->dflag) {
411 			if (ret == NULL)
412 				ret = bwsdup(s);
413 			ret = dictionary_order(ret);
414 		} else if (sm->iflag) {
415 			if (ret == NULL)
416 				ret = bwsdup(s);
417 			ret = ignore_nonprinting(ret);
418 		}
419 		if (sm->fflag || sm->Mflag) {
420 			if (ret == NULL)
421 				ret = bwsdup(s);
422 			ret = ignore_case(ret);
423 		}
424 		if (ret == NULL)
425 			set_key_on_keys_array(ka, s, 0);
426 		else
427 			set_key_on_keys_array(ka, ret, 0);
428 	}
429 
430 	return 0;
431 }
432 
433 cmpcoll_t
get_sort_func(struct sort_mods * sm)434 get_sort_func(struct sort_mods *sm)
435 {
436 	if (sm->nflag)
437 		return numcoll;
438 	else if (sm->hflag)
439 		return hnumcoll;
440 	else if (sm->gflag)
441 		return gnumcoll;
442 	else if (sm->Mflag)
443 		return monthcoll;
444 	else if (sm->Rflag)
445 		return randomcoll;
446 	else if (sm->Vflag)
447 		return versioncoll;
448 	else
449 		return wstrcoll;
450 }
451 
452 /*
453  * Compares the given strings.  Returns a positive number if
454  * the first precedes the second, a negative number if the second is
455  * the preceding one, and zero if they are equal.  This function calls
456  * the underlying collate functions, which done the actual comparison.
457  */
458 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)459 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
460 {
461 	struct sort_mods *sm;
462 	int res = 0;
463 	size_t i;
464 
465 	for (i = 0; i < keys_num; ++i) {
466 		sm = &(keys[i].sm);
467 
468 		if (sm->rflag)
469 			res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
470 		else
471 			res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
472 
473 		if (res)
474 			break;
475 
476 		/* offset applies to only the first key */
477 		offset = 0;
478 	}
479 	return res;
480 }
481 
482 /*
483  * Compare two strings.
484  * Plain symbol-by-symbol comparison.
485  */
486 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)487 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
488 {
489 	if (default_sort_mods->rflag) {
490 		const struct bwstring *tmp;
491 
492 		tmp = s1;
493 		s1 = s2;
494 		s2 = tmp;
495 	}
496 
497 	return bwscoll(s1, s2, 0);
498 }
499 
500 /*
501  * Compare a string and a sort list item, according to the sort specs.
502  */
503 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)504 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
505 {
506 	struct keys_array *ka1;
507 	int ret = 0;
508 
509 	ka1 = keys_array_alloc();
510 
511 	preproc(str1, ka1);
512 
513 	sort_list_item_make_key(*ss2);
514 
515 	if (debug_sort) {
516 		bwsprintf(stdout, str1, "; s1=<", ">");
517 		bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
518 	}
519 
520 	ret = key_coll(ka1, &((*ss2)->ka), 0);
521 
522 	if (debug_sort)
523 		printf("; cmp1=%d", ret);
524 
525 	clean_keys_array(str1, ka1);
526 	sort_free(ka1);
527 
528 	if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
529 		ret = top_level_str_coll(str1, ((*ss2)->str));
530 		if (debug_sort)
531 			printf("; cmp2=%d", ret);
532 	}
533 
534 	if (debug_sort)
535 		putchar('\n');
536 
537 	return ret;
538 }
539 
540 /*
541  * Compare two sort list items, according to the sort specs.
542  */
543 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)544 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
545     size_t offset)
546 {
547 	int ret;
548 
549 	ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
550 
551 	if (debug_sort) {
552 		if (offset)
553 			printf("; offset=%zu", offset);
554 		bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
555 		bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
556 		printf("; cmp1=%d\n", ret);
557 	}
558 
559 	if (ret)
560 		return ret;
561 
562 	if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
563 		ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
564 		if (debug_sort)
565 			printf("; cmp2=%d\n", ret);
566 	}
567 
568 	return ret;
569 }
570 
571 /*
572  * Compare two sort list items, according to the sort specs.
573  */
574 int
list_coll(const void * ss1,const void * ss2)575 list_coll(const void *ss1, const void *ss2)
576 {
577 	return list_coll_offset((struct sort_list_item **)ss1,
578 	    (struct sort_list_item **)ss2, 0);
579 }
580 
581 #define LSCDEF(N)											\
582 static int												\
583 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2)					\
584 {													\
585 													\
586 	return list_coll_offset(ss1, ss2, N);								\
587 }
588 
589 LSCDEF(0)
590 LSCDEF(1)
591 LSCDEF(2)
592 LSCDEF(3)
593 LSCDEF(4)
594 LSCDEF(5)
595 LSCDEF(6)
596 LSCDEF(7)
597 LSCDEF(8)
598 LSCDEF(9)
599 LSCDEF(10)
600 LSCDEF(11)
601 LSCDEF(12)
602 LSCDEF(13)
603 LSCDEF(14)
604 LSCDEF(15)
605 LSCDEF(16)
606 LSCDEF(17)
607 LSCDEF(18)
608 LSCDEF(19)
609 LSCDEF(20)
610 
611 listcoll_t
get_list_call_func(size_t offset)612 get_list_call_func(size_t offset)
613 {
614 	static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
615 	    list_coll_2, list_coll_3, list_coll_4, list_coll_5,
616 	    list_coll_6, list_coll_7, list_coll_8, list_coll_9,
617 	    list_coll_10, list_coll_11, list_coll_12, list_coll_13,
618 	    list_coll_14, list_coll_15, list_coll_16, list_coll_17,
619 	    list_coll_18, list_coll_19, list_coll_20 };
620 
621 	if (offset <= 20)
622 		return lsarray[offset];
623 
624 	return list_coll_0;
625 }
626 
627 /*
628  * Compare two sort list items, only by their original string.
629  */
630 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)631 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
632 {
633 	return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
634 }
635 
636 /*
637  * Maximum size of a number in the string (before or after decimal point)
638  */
639 #define MAX_NUM_SIZE (128)
640 
641 /*
642  * Set suffix value
643  */
644 static void
setsuffix(wchar_t c,unsigned char * si)645 setsuffix(wchar_t c, unsigned char *si)
646 {
647 	switch (c){
648 	case L'k':
649 	case L'K':
650 		*si = 1;
651 		break;
652 	case L'M':
653 		*si = 2;
654 		break;
655 	case L'G':
656 		*si = 3;
657 		break;
658 	case L'T':
659 		*si = 4;
660 		break;
661 	case L'P':
662 		*si = 5;
663 		break;
664 	case L'E':
665 		*si = 6;
666 		break;
667 	case L'Z':
668 		*si = 7;
669 		break;
670 	case L'Y':
671 		*si = 8;
672 		break;
673 	default:
674 		*si = 0;
675 	}
676 }
677 
678 /*
679  * Read string s and parse the string into a fixed-decimal-point number.
680  * sign equals -1 if the number is negative (explicit plus is not allowed,
681  * according to GNU sort's "info sort".
682  * The number part before decimal point is in the smain, after the decimal
683  * point is in sfrac, tail is the pointer to the remainder of the string.
684  */
685 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)686 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
687 {
688 	bwstring_iterator s;
689 
690 	s = bws_begin(s0);
691 
692 	/* always end the fraction with zero, even if we have no fraction */
693 	sfrac[0] = 0;
694 
695 	while (iswblank(bws_get_iter_value(s)))
696 		s = bws_iterator_inc(s, 1);
697 
698 	if (bws_get_iter_value(s) == L'-') {
699 		*sign = -1;
700 		s = bws_iterator_inc(s, 1);
701 	}
702 
703 	// This is '0', not '\0', do not change this
704 	while (iswdigit(bws_get_iter_value(s)) &&
705 	    (bws_get_iter_value(s) == L'0'))
706 		s = bws_iterator_inc(s, 1);
707 
708 	while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
709 		if (iswdigit(bws_get_iter_value(s))) {
710 			smain[*main_len] = bws_get_iter_value(s);
711 			s = bws_iterator_inc(s, 1);
712 			*main_len += 1;
713 		} else
714 			break;
715 	}
716 
717 	smain[*main_len] = 0;
718 
719 	if (bws_get_iter_value(s) == L'.') {
720 		s = bws_iterator_inc(s, 1);
721 		while (iswdigit(bws_get_iter_value(s)) &&
722 		    *frac_len < MAX_NUM_SIZE) {
723 			sfrac[*frac_len] = bws_get_iter_value(s);
724 			s = bws_iterator_inc(s, 1);
725 			*frac_len += 1;
726 		}
727 		sfrac[*frac_len] = 0;
728 
729 		while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
730 			--(*frac_len);
731 			sfrac[*frac_len] = L'\0';
732 		}
733 	}
734 
735 	setsuffix(bws_get_iter_value(s), si);
736 
737 	if ((*main_len + *frac_len) == 0)
738 		*sign = 0;
739 
740 	return 0;
741 }
742 
743 /*
744  * Implements string sort.
745  */
746 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)747 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
748 {
749 
750 	if (debug_sort) {
751 		if (offset)
752 			printf("; offset=%zu\n", offset);
753 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
754 		printf("(%zu)", BWSLEN(kv1->k));
755 		bwsprintf(stdout, kv2->k, ", k2=<", ">");
756 		printf("(%zu)", BWSLEN(kv2->k));
757 	}
758 
759 	return bwscoll(kv1->k, kv2->k, offset);
760 }
761 
762 /*
763  * Compare two suffixes
764  */
765 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)766 cmpsuffix(unsigned char si1, unsigned char si2)
767 {
768 	return (char)si1 - (char)si2;
769 }
770 
771 /*
772  * Implements numeric sort for -n and -h.
773  */
774 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)775 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
776     size_t offset __unused, bool use_suffix)
777 {
778 	struct bwstring *s1, *s2;
779 	wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
780 	wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
781 	int cmp_res, sign1, sign2;
782 	size_t frac1, frac2, main1, main2;
783 	unsigned char SI1, SI2;
784 	bool e1, e2, key1_read, key2_read;
785 
786 	s1 = kv1->k;
787 	s2 = kv2->k;
788 	sign1 = sign2 = 0;
789 	main1 = main2 = 0;
790 	frac1 = frac2 = 0;
791 
792 	key1_read = key2_read = false;
793 
794 	if (debug_sort) {
795 		bwsprintf(stdout, s1, "; k1=<", ">");
796 		bwsprintf(stdout, s2, ", k2=<", ">");
797 	}
798 
799 	if (s1 == s2)
800 		return 0;
801 
802 	if (kv1->hint->status == HS_UNINITIALIZED) {
803 		/* read the number from the string */
804 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
805 		key1_read = true;
806 		kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
807 		if (main1 < 1 && frac1 < 1)
808 			kv1->hint->v.nh.empty=true;
809 		kv1->hint->v.nh.si = SI1;
810 		kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
811 		    HS_INITIALIZED : HS_ERROR;
812 		kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
813 	}
814 
815 	if (kv2->hint->status == HS_UNINITIALIZED) {
816 		/* read the number from the string */
817 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
818 		key2_read = true;
819 		kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
820 		if (main2 < 1 && frac2 < 1)
821 			kv2->hint->v.nh.empty=true;
822 		kv2->hint->v.nh.si = SI2;
823 		kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
824 		    HS_INITIALIZED : HS_ERROR;
825 		kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
826 	}
827 
828 	if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
829 	    HS_INITIALIZED) {
830 		unsigned long long n1, n2;
831 		bool neg1, neg2;
832 
833 		e1 = kv1->hint->v.nh.empty;
834 		e2 = kv2->hint->v.nh.empty;
835 
836 		if (e1 && e2)
837 			return 0;
838 
839 		neg1 = kv1->hint->v.nh.neg;
840 		neg2 = kv2->hint->v.nh.neg;
841 
842 		if (neg1 && !neg2)
843 			return -1;
844 		if (neg2 && !neg1)
845 			return 1;
846 
847 		if (e1)
848 			return neg2 ? 1 : -1;
849 		else if (e2)
850 			return neg1 ? -1 : 1;
851 
852 
853 		if (use_suffix) {
854 			cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
855 			if (cmp_res)
856 				return neg1 ? -cmp_res : cmp_res;
857 		}
858 
859 		n1 = kv1->hint->v.nh.n1;
860 		n2 = kv2->hint->v.nh.n1;
861 		if (n1 < n2)
862 			return neg1 ? 1 : -1;
863 		else if (n1 > n2)
864 			return neg1 ? -1 : 1;
865 	}
866 
867 	/* read the numbers from the strings */
868 	if (!key1_read)
869 		read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
870 	if (!key2_read)
871 		read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
872 
873 	e1 = ((main1 + frac1) == 0);
874 	e2 = ((main2 + frac2) == 0);
875 
876 	if (e1 && e2)
877 		return 0;
878 
879 	/* we know the result if the signs are different */
880 	if (sign1 < 0 && sign2 >= 0)
881 		return -1;
882 	if (sign1 >= 0 && sign2 < 0)
883 		return 1;
884 
885 	if (e1)
886 		return (sign2 < 0) ? +1 : -1;
887 	else if (e2)
888 		return (sign1 < 0) ? -1 : 1;
889 
890 	if (use_suffix) {
891 		cmp_res = cmpsuffix(SI1, SI2);
892 		if (cmp_res)
893 			return (sign1 < 0) ? -cmp_res : cmp_res;
894 	}
895 
896 	/* if both numbers are empty assume that the strings are equal */
897 	if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
898 		return 0;
899 
900 	/*
901 	 * if the main part is of different size, we know the result
902 	 * (because the leading zeros are removed)
903 	 */
904 	if (main1 < main2)
905 		cmp_res = -1;
906 	else if (main1 > main2)
907 		cmp_res = +1;
908 	/* if the sizes are equal then simple non-collate string compare gives the correct result */
909 	else
910 		cmp_res = wcscmp(smain1, smain2);
911 
912 	/* check fraction */
913 	if (!cmp_res)
914 		cmp_res = wcscmp(sfrac1, sfrac2);
915 
916 	if (!cmp_res)
917 		return 0;
918 
919 	/* reverse result if the signs are negative */
920 	if (sign1 < 0 && sign2 < 0)
921 		cmp_res = -cmp_res;
922 
923 	return cmp_res;
924 }
925 
926 /*
927  * Implements numeric sort (-n).
928  */
929 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)930 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
931 {
932 	return numcoll_impl(kv1, kv2, offset, false);
933 }
934 
935 /*
936  * Implements 'human' numeric sort (-h).
937  */
938 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)939 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940 {
941 	return numcoll_impl(kv1, kv2, offset, true);
942 }
943 
944 /*
945  * Implements random sort (-R).
946  */
947 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)948 randomcoll(struct key_value *kv1, struct key_value *kv2,
949     size_t offset __unused)
950 {
951 	struct bwstring *s1, *s2;
952 	MD5_CTX ctx1, ctx2;
953 	char *b1, *b2;
954 
955 	s1 = kv1->k;
956 	s2 = kv2->k;
957 
958 	if (debug_sort) {
959 		bwsprintf(stdout, s1, "; k1=<", ">");
960 		bwsprintf(stdout, s2, ", k2=<", ">");
961 	}
962 
963 	if (s1 == s2)
964 		return 0;
965 
966 	memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
967 	memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
968 
969 	MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
970 	MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
971 	b1 = MD5End(&ctx1, NULL);
972 	b2 = MD5End(&ctx2, NULL);
973 	if (b1 == NULL) {
974 		if (b2 == NULL)
975 			return 0;
976 		else {
977 			sort_free(b2);
978 			return -1;
979 		}
980 	} else if (b2 == NULL) {
981 		sort_free(b1);
982 		return 1;
983 	} else {
984 		int cmp_res;
985 
986 		cmp_res = strcmp(b1, b2);
987 		sort_free(b1);
988 		sort_free(b2);
989 
990 		if (!cmp_res)
991 			cmp_res = bwscoll(s1, s2, 0);
992 
993 		return cmp_res;
994 	}
995 }
996 
997 /*
998  * Implements version sort (-V).
999  */
1000 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1001 versioncoll(struct key_value *kv1, struct key_value *kv2,
1002     size_t offset __unused)
1003 {
1004 	struct bwstring *s1, *s2;
1005 
1006 	s1 = kv1->k;
1007 	s2 = kv2->k;
1008 
1009 	if (debug_sort) {
1010 		bwsprintf(stdout, s1, "; k1=<", ">");
1011 		bwsprintf(stdout, s2, ", k2=<", ">");
1012 	}
1013 
1014 	if (s1 == s2)
1015 		return 0;
1016 
1017 	return vcmp(s1, s2);
1018 }
1019 
1020 /*
1021  * Check for minus infinity
1022  */
1023 static inline bool
huge_minus(double d,int err1)1024 huge_minus(double d, int err1)
1025 {
1026 	if (err1 == ERANGE)
1027 		if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1028 			return 1;
1029 
1030 	return 0;
1031 }
1032 
1033 /*
1034  * Check for plus infinity
1035  */
1036 static inline bool
huge_plus(double d,int err1)1037 huge_plus(double d, int err1)
1038 {
1039 	if (err1 == ERANGE)
1040 		if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1041 			return 1;
1042 
1043 	return 0;
1044 }
1045 
1046 /*
1047  * Check whether a function is a NAN
1048  */
1049 static bool
is_nan(double d)1050 is_nan(double d)
1051 {
1052 #if defined(NAN)
1053 	return (d == NAN || isnan(d));
1054 #else
1055 	return (isnan(d));
1056 #endif
1057 }
1058 
1059 /*
1060  * Compare two NANs
1061  */
1062 static int
cmp_nans(double d1,double d2)1063 cmp_nans(double d1, double d2)
1064 {
1065 	if (d1 == d2)
1066 		return 0;
1067 	return d1 < d2 ? -1 : 1;
1068 }
1069 
1070 /*
1071  * Implements general numeric sort (-g).
1072  */
1073 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1074 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1075     size_t offset __unused)
1076 {
1077 	double d1, d2;
1078 	int err1, err2;
1079 	bool empty1, empty2, key1_read, key2_read;
1080 
1081 	d1 = d2 = 0;
1082 	err1 = err2 = 0;
1083 	key1_read = key2_read = false;
1084 
1085 	if (debug_sort) {
1086 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1087 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1088 	}
1089 
1090 	if (kv1->hint->status == HS_UNINITIALIZED) {
1091 		errno = 0;
1092 		d1 = bwstod(kv1->k, &empty1);
1093 		err1 = errno;
1094 
1095 		if (empty1) {
1096 			kv1->hint->v.gh.notnum = true;
1097 			kv1->hint->status = HS_INITIALIZED;
1098 		} else if (err1 == 0) {
1099 			kv1->hint->v.gh.d = d1;
1100 			kv1->hint->v.gh.nan = is_nan(d1);
1101 			kv1->hint->status = HS_INITIALIZED;
1102 		} else
1103 			kv1->hint->status = HS_ERROR;
1104 
1105 		key1_read = true;
1106 	}
1107 
1108 	if (kv2->hint->status == HS_UNINITIALIZED) {
1109 		errno = 0;
1110 		d2 = bwstod(kv2->k, &empty2);
1111 		err2 = errno;
1112 
1113 		if (empty2) {
1114 			kv2->hint->v.gh.notnum = true;
1115 			kv2->hint->status = HS_INITIALIZED;
1116 		} else if (err2 == 0) {
1117 			kv2->hint->v.gh.d = d2;
1118 			kv2->hint->v.gh.nan = is_nan(d2);
1119 			kv2->hint->status = HS_INITIALIZED;
1120 		} else
1121 			kv2->hint->status = HS_ERROR;
1122 
1123 		key2_read = true;
1124 	}
1125 
1126 	if (kv1->hint->status == HS_INITIALIZED &&
1127 	    kv2->hint->status == HS_INITIALIZED) {
1128 #ifdef GNUSORT_COMPATIBILITY
1129 		if (kv1->hint->v.gh.notnum)
1130 			return kv2->hint->v.gh.notnum ? 0 : -1;
1131 		else if (kv2->hint->v.gh.notnum)
1132 			return 1;
1133 #else
1134 		if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1135 			return 0;
1136 #endif
1137 
1138 		if (kv1->hint->v.gh.nan)
1139 			return kv2->hint->v.gh.nan ?
1140 			    cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1141 		else if (kv2->hint->v.gh.nan)
1142 			return 1;
1143 
1144 		d1 = kv1->hint->v.gh.d;
1145 		d2 = kv2->hint->v.gh.d;
1146 
1147 		if (d1 < d2)
1148 			return -1;
1149 		else if (d1 > d2)
1150 			return 1;
1151 		else
1152 			return 0;
1153 	}
1154 
1155 	if (!key1_read) {
1156 		errno = 0;
1157 		d1 = bwstod(kv1->k, &empty1);
1158 		err1 = errno;
1159 	}
1160 
1161 	if (!key2_read) {
1162 		errno = 0;
1163 		d2 = bwstod(kv2->k, &empty2);
1164 		err2 = errno;
1165 	}
1166 
1167 	/* Non-value case */
1168 #ifdef GNUSORT_COMPATIBILITY
1169 	if (empty1)
1170 		return empty2 ? 0 : -1;
1171 	else if (empty2)
1172 		return 1;
1173 #else
1174 	if (empty1 && empty2)
1175 		return 0;
1176 #endif
1177 
1178 	/* NAN case */
1179 	if (is_nan(d1))
1180 		return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1181 	else if (is_nan(d2))
1182 		return 1;
1183 
1184 	/* Infinities */
1185 	if (err1 == ERANGE || err2 == ERANGE) {
1186 		/* Minus infinity case */
1187 		if (huge_minus(d1, err1)) {
1188 			if (huge_minus(d2, err2)) {
1189 				if (d1 == d2)
1190 					return 0;
1191 				return d1 < d2 ? -1 : 1;
1192 			} else
1193 				return -1;
1194 
1195 		} else if (huge_minus(d2, err2)) {
1196 			if (huge_minus(d1, err1)) {
1197 				if (d1 == d2)
1198 					return 0;
1199 				return d1 < d2 ? -1 : 1;
1200 			} else
1201 				return 1;
1202 		}
1203 
1204 		/* Plus infinity case */
1205 		if (huge_plus(d1, err1)) {
1206 			if (huge_plus(d2, err2)) {
1207 				if (d1 == d2)
1208 					return 0;
1209 				return d1 < d2 ? -1 : 1;
1210 			} else
1211 				return 1;
1212 		} else if (huge_plus(d2, err2)) {
1213 			if (huge_plus(d1, err1)) {
1214 				if (d1 == d2)
1215 					return 0;
1216 				return d1 < d2 ? -1 : 1;
1217 			} else
1218 				return -1;
1219 		}
1220 	}
1221 
1222 	if (d1 == d2)
1223 		return 0;
1224 	return d1 < d2 ? -1 : 1;
1225 }
1226 
1227 /*
1228  * Implements month sort (-M).
1229  */
1230 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1231 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1232 {
1233 	int val1, val2;
1234 	bool key1_read, key2_read;
1235 
1236 	val1 = val2 = 0;
1237 	key1_read = key2_read = false;
1238 
1239 	if (debug_sort) {
1240 		bwsprintf(stdout, kv1->k, "; k1=<", ">");
1241 		bwsprintf(stdout, kv2->k, "; k2=<", ">");
1242 	}
1243 
1244 	if (kv1->hint->status == HS_UNINITIALIZED) {
1245 		kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1246 		key1_read = true;
1247 		kv1->hint->status = HS_INITIALIZED;
1248 	}
1249 
1250 	if (kv2->hint->status == HS_UNINITIALIZED) {
1251 		kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1252 		key2_read = true;
1253 		kv2->hint->status = HS_INITIALIZED;
1254 	}
1255 
1256 	if (kv1->hint->status == HS_INITIALIZED) {
1257 		val1 = kv1->hint->v.Mh.m;
1258 		key1_read = true;
1259 	}
1260 
1261 	if (kv2->hint->status == HS_INITIALIZED) {
1262 		val2 = kv2->hint->v.Mh.m;
1263 		key2_read = true;
1264 	}
1265 
1266 	if (!key1_read)
1267 		val1 = bws_month_score(kv1->k);
1268 	if (!key2_read)
1269 		val2 = bws_month_score(kv2->k);
1270 
1271 	if (val1 == val2)
1272 		return 0;
1273 	return val1 < val2 ? -1 : 1;
1274 }
1275