1 /* $OpenBSD: coll.c,v 1.13 2024/09/20 02:00:46 jsg Exp $ */
2
3 /*-
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/types.h>
31
32 #include <errno.h>
33 #include <err.h>
34 #include <langinfo.h>
35 #include <limits.h>
36 #include <math.h>
37 #include <md5.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42
43 #include "coll.h"
44 #include "vsort.h"
45
46 struct key_specs *keys;
47 size_t keys_num = 0;
48
49 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
50 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
51 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
52 static int numcoll(struct key_value*, struct key_value *, size_t offset);
53 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
54 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
55 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
56
57 /*
58 * Allocate keys array
59 */
60 struct keys_array *
keys_array_alloc(void)61 keys_array_alloc(void)
62 {
63 struct keys_array *ka;
64 size_t sz;
65
66 sz = keys_array_size();
67 ka = sort_calloc(1, sz);
68
69 return ka;
70 }
71
72 /*
73 * Calculate whether we need key hint space
74 */
75 static size_t
key_hint_size(void)76 key_hint_size(void)
77 {
78 return need_hint ? sizeof(struct key_hint) : 0;
79 }
80
81 /*
82 * Calculate keys array size
83 */
84 size_t
keys_array_size(void)85 keys_array_size(void)
86 {
87 return keys_num * (sizeof(struct key_value) + key_hint_size());
88 }
89
90 /*
91 * Clean data of keys array
92 */
93 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)94 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
95 {
96 if (ka) {
97 size_t i;
98
99 for (i = 0; i < keys_num; ++i)
100 if (ka->key[i].k && ka->key[i].k != s)
101 bwsfree(ka->key[i].k);
102 memset(ka, 0, keys_array_size());
103 }
104 }
105
106 /*
107 * Set value of a key in the keys set
108 */
109 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)110 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
111 {
112 if (ka && keys_num > ind) {
113 struct key_value *kv;
114
115 kv = &(ka->key[ind]);
116
117 if (kv->k != s)
118 bwsfree(kv->k);
119 kv->k = s;
120 }
121 }
122
123 /*
124 * Initialize a sort list item
125 */
126 struct sort_list_item *
sort_list_item_alloc(void)127 sort_list_item_alloc(void)
128 {
129 struct sort_list_item *si;
130 size_t sz;
131
132 sz = sizeof(struct sort_list_item) + keys_array_size();
133 si = sort_calloc(1, sz);
134
135 return si;
136 }
137
138 size_t
sort_list_item_size(struct sort_list_item * si)139 sort_list_item_size(struct sort_list_item *si)
140 {
141 size_t i, ret = 0;
142
143 if (si) {
144 ret = sizeof(struct sort_list_item) + keys_array_size();
145 if (si->str)
146 ret += bws_memsize(si->str);
147 for (i = 0; i < keys_num; ++i) {
148 struct key_value *kv;
149
150 kv = &(si->ka.key[i]);
151
152 if (kv->k != si->str)
153 ret += bws_memsize(kv->k);
154 }
155 }
156 return ret;
157 }
158
159 /*
160 * Calculate key for a sort list item
161 */
162 static void
sort_list_item_make_key(struct sort_list_item * si)163 sort_list_item_make_key(struct sort_list_item *si)
164 {
165 preproc(si->str, &(si->ka));
166 }
167
168 /*
169 * Set value of a sort list item.
170 * Return combined string and keys memory size.
171 */
172 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)173 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
174 {
175 if (si) {
176 clean_keys_array(si->str, &(si->ka));
177 if (si->str) {
178 if (si->str == str) {
179 /* we are trying to reset the same string */
180 return;
181 } else {
182 bwsfree(si->str);
183 si->str = NULL;
184 }
185 }
186 si->str = str;
187 sort_list_item_make_key(si);
188 }
189 }
190
191 /*
192 * De-allocate a sort list item object memory
193 */
194 void
sort_list_item_clean(struct sort_list_item * si)195 sort_list_item_clean(struct sort_list_item *si)
196 {
197 if (si) {
198 clean_keys_array(si->str, &(si->ka));
199 if (si->str) {
200 bwsfree(si->str);
201 si->str = NULL;
202 }
203 }
204 }
205
206 /*
207 * Skip columns according to specs
208 */
209 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)210 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
211 bool skip_blanks, bool *empty_key)
212 {
213 if (cols < 1)
214 return BWSLEN(s) + 1;
215
216 if (skip_blanks)
217 while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
218 ++start;
219
220 while (start < BWSLEN(s) && cols > 1) {
221 --cols;
222 ++start;
223 }
224
225 if (start >= BWSLEN(s))
226 *empty_key = true;
227
228 return start;
229 }
230
231 /*
232 * Skip fields according to specs
233 */
234 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)235 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
236 {
237 if (fields < 2) {
238 if (BWSLEN(s) == 0)
239 *empty_field = true;
240 return 0;
241 } else if (!(sort_opts_vals.tflag)) {
242 size_t cpos = 0;
243 bool pb = true;
244
245 while (cpos < BWSLEN(s)) {
246 bool isblank;
247
248 isblank = iswblank(BWS_GET(s, cpos));
249
250 if (isblank && !pb) {
251 --fields;
252 if (fields <= 1)
253 return cpos;
254 }
255 pb = isblank;
256 ++cpos;
257 }
258 if (fields > 1)
259 *empty_field = true;
260 return cpos;
261 } else {
262 size_t cpos = 0;
263
264 while (cpos < BWSLEN(s)) {
265 if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
266 --fields;
267 if (fields <= 1)
268 return cpos + 1;
269 }
270 ++cpos;
271 }
272 if (fields > 1)
273 *empty_field = true;
274 return cpos;
275 }
276 }
277
278 /*
279 * Find fields start
280 */
281 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)282 find_field_start(const struct bwstring *s, struct key_specs *ks,
283 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
284 {
285 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
286 if (!*empty_field)
287 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
288 ks->pos1b, empty_key);
289 else
290 *empty_key = true;
291 }
292
293 /*
294 * Find end key position
295 */
296 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)297 find_field_end(const struct bwstring *s, struct key_specs *ks)
298 {
299 size_t f2, next_field_start, pos_end;
300 bool empty_field, empty_key;
301
302 empty_field = false;
303 empty_key = false;
304 f2 = ks->f2;
305
306 if (f2 == 0)
307 return BWSLEN(s) + 1;
308 else {
309 if (ks->c2 == 0) {
310 next_field_start = skip_fields_to_start(s, f2 + 1,
311 &empty_field);
312 if ((next_field_start > 0) && sort_opts_vals.tflag &&
313 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
314 next_field_start - 1)))
315 --next_field_start;
316 } else
317 next_field_start = skip_fields_to_start(s, f2,
318 &empty_field);
319 }
320
321 if (empty_field || (next_field_start >= BWSLEN(s)))
322 return BWSLEN(s) + 1;
323
324 if (ks->c2) {
325 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
326 ks->pos2b, &empty_key);
327 if (pos_end < BWSLEN(s))
328 ++pos_end;
329 } else
330 pos_end = next_field_start;
331
332 return pos_end;
333 }
334
335 /*
336 * Cut a field according to the key specs
337 */
338 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)339 cut_field(const struct bwstring *s, struct key_specs *ks)
340 {
341 struct bwstring *ret = NULL;
342
343 if (s && ks) {
344 size_t field_start, key_end, key_start, sz;
345 bool empty_field, empty_key;
346
347 field_start = 0;
348 key_start = 0;
349 empty_field = false;
350 empty_key = false;
351
352 find_field_start(s, ks, &field_start, &key_start,
353 &empty_field, &empty_key);
354
355 if (empty_key)
356 sz = 0;
357 else {
358 key_end = find_field_end(s, ks);
359 sz = (key_end < key_start) ? 0 : (key_end - key_start);
360 }
361
362 ret = bwsalloc(sz);
363 if (sz)
364 bwsnocpy(ret, s, key_start, sz);
365 } else
366 ret = bwsalloc(0);
367
368 return ret;
369 }
370
371 /*
372 * Preprocesses a line applying the necessary transformations
373 * specified by command line options and returns the preprocessed
374 * string, which can be used to compare.
375 */
376 int
preproc(struct bwstring * s,struct keys_array * ka)377 preproc(struct bwstring *s, struct keys_array *ka)
378 {
379 if (sort_opts_vals.kflag) {
380 size_t i;
381 for (i = 0; i < keys_num; i++) {
382 struct bwstring *key;
383 struct key_specs *kspecs;
384 struct sort_mods *sm;
385
386 kspecs = &(keys[i]);
387 key = cut_field(s, kspecs);
388
389 sm = &(kspecs->sm);
390 if (sm->dflag)
391 key = dictionary_order(key);
392 else if (sm->iflag)
393 key = ignore_nonprinting(key);
394 if (sm->fflag || sm->Mflag)
395 key = ignore_case(key);
396
397 set_key_on_keys_array(ka, key, i);
398 }
399 } else {
400 struct bwstring *ret = NULL;
401 struct sort_mods *sm = default_sort_mods;
402
403 #ifdef GNUSORT_COMPATIBILITY
404 if (sm->bflag) {
405 if (ret == NULL)
406 ret = bwsdup(s);
407 ret = ignore_leading_blanks(ret);
408 }
409 #endif
410 if (sm->dflag) {
411 if (ret == NULL)
412 ret = bwsdup(s);
413 ret = dictionary_order(ret);
414 } else if (sm->iflag) {
415 if (ret == NULL)
416 ret = bwsdup(s);
417 ret = ignore_nonprinting(ret);
418 }
419 if (sm->fflag || sm->Mflag) {
420 if (ret == NULL)
421 ret = bwsdup(s);
422 ret = ignore_case(ret);
423 }
424 if (ret == NULL)
425 set_key_on_keys_array(ka, s, 0);
426 else
427 set_key_on_keys_array(ka, ret, 0);
428 }
429
430 return 0;
431 }
432
433 cmpcoll_t
get_sort_func(struct sort_mods * sm)434 get_sort_func(struct sort_mods *sm)
435 {
436 if (sm->nflag)
437 return numcoll;
438 else if (sm->hflag)
439 return hnumcoll;
440 else if (sm->gflag)
441 return gnumcoll;
442 else if (sm->Mflag)
443 return monthcoll;
444 else if (sm->Rflag)
445 return randomcoll;
446 else if (sm->Vflag)
447 return versioncoll;
448 else
449 return wstrcoll;
450 }
451
452 /*
453 * Compares the given strings. Returns a positive number if
454 * the first precedes the second, a negative number if the second is
455 * the preceding one, and zero if they are equal. This function calls
456 * the underlying collate functions, which done the actual comparison.
457 */
458 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)459 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
460 {
461 struct sort_mods *sm;
462 int res = 0;
463 size_t i;
464
465 for (i = 0; i < keys_num; ++i) {
466 sm = &(keys[i].sm);
467
468 if (sm->rflag)
469 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
470 else
471 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
472
473 if (res)
474 break;
475
476 /* offset applies to only the first key */
477 offset = 0;
478 }
479 return res;
480 }
481
482 /*
483 * Compare two strings.
484 * Plain symbol-by-symbol comparison.
485 */
486 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)487 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
488 {
489 if (default_sort_mods->rflag) {
490 const struct bwstring *tmp;
491
492 tmp = s1;
493 s1 = s2;
494 s2 = tmp;
495 }
496
497 return bwscoll(s1, s2, 0);
498 }
499
500 /*
501 * Compare a string and a sort list item, according to the sort specs.
502 */
503 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)504 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
505 {
506 struct keys_array *ka1;
507 int ret = 0;
508
509 ka1 = keys_array_alloc();
510
511 preproc(str1, ka1);
512
513 sort_list_item_make_key(*ss2);
514
515 if (debug_sort) {
516 bwsprintf(stdout, str1, "; s1=<", ">");
517 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
518 }
519
520 ret = key_coll(ka1, &((*ss2)->ka), 0);
521
522 if (debug_sort)
523 printf("; cmp1=%d", ret);
524
525 clean_keys_array(str1, ka1);
526 sort_free(ka1);
527
528 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
529 ret = top_level_str_coll(str1, ((*ss2)->str));
530 if (debug_sort)
531 printf("; cmp2=%d", ret);
532 }
533
534 if (debug_sort)
535 putchar('\n');
536
537 return ret;
538 }
539
540 /*
541 * Compare two sort list items, according to the sort specs.
542 */
543 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)544 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
545 size_t offset)
546 {
547 int ret;
548
549 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
550
551 if (debug_sort) {
552 if (offset)
553 printf("; offset=%zu", offset);
554 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
555 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
556 printf("; cmp1=%d\n", ret);
557 }
558
559 if (ret)
560 return ret;
561
562 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
563 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
564 if (debug_sort)
565 printf("; cmp2=%d\n", ret);
566 }
567
568 return ret;
569 }
570
571 /*
572 * Compare two sort list items, according to the sort specs.
573 */
574 int
list_coll(const void * ss1,const void * ss2)575 list_coll(const void *ss1, const void *ss2)
576 {
577 return list_coll_offset((struct sort_list_item **)ss1,
578 (struct sort_list_item **)ss2, 0);
579 }
580
581 #define LSCDEF(N) \
582 static int \
583 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
584 { \
585 \
586 return list_coll_offset(ss1, ss2, N); \
587 }
588
589 LSCDEF(0)
590 LSCDEF(1)
591 LSCDEF(2)
592 LSCDEF(3)
593 LSCDEF(4)
594 LSCDEF(5)
595 LSCDEF(6)
596 LSCDEF(7)
597 LSCDEF(8)
598 LSCDEF(9)
599 LSCDEF(10)
600 LSCDEF(11)
601 LSCDEF(12)
602 LSCDEF(13)
603 LSCDEF(14)
604 LSCDEF(15)
605 LSCDEF(16)
606 LSCDEF(17)
607 LSCDEF(18)
608 LSCDEF(19)
609 LSCDEF(20)
610
611 listcoll_t
get_list_call_func(size_t offset)612 get_list_call_func(size_t offset)
613 {
614 static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
615 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
616 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
617 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
618 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
619 list_coll_18, list_coll_19, list_coll_20 };
620
621 if (offset <= 20)
622 return lsarray[offset];
623
624 return list_coll_0;
625 }
626
627 /*
628 * Compare two sort list items, only by their original string.
629 */
630 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)631 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
632 {
633 return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
634 }
635
636 /*
637 * Maximum size of a number in the string (before or after decimal point)
638 */
639 #define MAX_NUM_SIZE (128)
640
641 /*
642 * Set suffix value
643 */
644 static void
setsuffix(wchar_t c,unsigned char * si)645 setsuffix(wchar_t c, unsigned char *si)
646 {
647 switch (c){
648 case L'k':
649 case L'K':
650 *si = 1;
651 break;
652 case L'M':
653 *si = 2;
654 break;
655 case L'G':
656 *si = 3;
657 break;
658 case L'T':
659 *si = 4;
660 break;
661 case L'P':
662 *si = 5;
663 break;
664 case L'E':
665 *si = 6;
666 break;
667 case L'Z':
668 *si = 7;
669 break;
670 case L'Y':
671 *si = 8;
672 break;
673 default:
674 *si = 0;
675 }
676 }
677
678 /*
679 * Read string s and parse the string into a fixed-decimal-point number.
680 * sign equals -1 if the number is negative (explicit plus is not allowed,
681 * according to GNU sort's "info sort".
682 * The number part before decimal point is in the smain, after the decimal
683 * point is in sfrac, tail is the pointer to the remainder of the string.
684 */
685 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)686 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
687 {
688 bwstring_iterator s;
689
690 s = bws_begin(s0);
691
692 /* always end the fraction with zero, even if we have no fraction */
693 sfrac[0] = 0;
694
695 while (iswblank(bws_get_iter_value(s)))
696 s = bws_iterator_inc(s, 1);
697
698 if (bws_get_iter_value(s) == L'-') {
699 *sign = -1;
700 s = bws_iterator_inc(s, 1);
701 }
702
703 // This is '0', not '\0', do not change this
704 while (iswdigit(bws_get_iter_value(s)) &&
705 (bws_get_iter_value(s) == L'0'))
706 s = bws_iterator_inc(s, 1);
707
708 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
709 if (iswdigit(bws_get_iter_value(s))) {
710 smain[*main_len] = bws_get_iter_value(s);
711 s = bws_iterator_inc(s, 1);
712 *main_len += 1;
713 } else
714 break;
715 }
716
717 smain[*main_len] = 0;
718
719 if (bws_get_iter_value(s) == L'.') {
720 s = bws_iterator_inc(s, 1);
721 while (iswdigit(bws_get_iter_value(s)) &&
722 *frac_len < MAX_NUM_SIZE) {
723 sfrac[*frac_len] = bws_get_iter_value(s);
724 s = bws_iterator_inc(s, 1);
725 *frac_len += 1;
726 }
727 sfrac[*frac_len] = 0;
728
729 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
730 --(*frac_len);
731 sfrac[*frac_len] = L'\0';
732 }
733 }
734
735 setsuffix(bws_get_iter_value(s), si);
736
737 if ((*main_len + *frac_len) == 0)
738 *sign = 0;
739
740 return 0;
741 }
742
743 /*
744 * Implements string sort.
745 */
746 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)747 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
748 {
749
750 if (debug_sort) {
751 if (offset)
752 printf("; offset=%zu\n", offset);
753 bwsprintf(stdout, kv1->k, "; k1=<", ">");
754 printf("(%zu)", BWSLEN(kv1->k));
755 bwsprintf(stdout, kv2->k, ", k2=<", ">");
756 printf("(%zu)", BWSLEN(kv2->k));
757 }
758
759 return bwscoll(kv1->k, kv2->k, offset);
760 }
761
762 /*
763 * Compare two suffixes
764 */
765 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)766 cmpsuffix(unsigned char si1, unsigned char si2)
767 {
768 return (char)si1 - (char)si2;
769 }
770
771 /*
772 * Implements numeric sort for -n and -h.
773 */
774 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)775 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
776 size_t offset __unused, bool use_suffix)
777 {
778 struct bwstring *s1, *s2;
779 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
780 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
781 int cmp_res, sign1, sign2;
782 size_t frac1, frac2, main1, main2;
783 unsigned char SI1, SI2;
784 bool e1, e2, key1_read, key2_read;
785
786 s1 = kv1->k;
787 s2 = kv2->k;
788 sign1 = sign2 = 0;
789 main1 = main2 = 0;
790 frac1 = frac2 = 0;
791
792 key1_read = key2_read = false;
793
794 if (debug_sort) {
795 bwsprintf(stdout, s1, "; k1=<", ">");
796 bwsprintf(stdout, s2, ", k2=<", ">");
797 }
798
799 if (s1 == s2)
800 return 0;
801
802 if (kv1->hint->status == HS_UNINITIALIZED) {
803 /* read the number from the string */
804 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
805 key1_read = true;
806 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
807 if (main1 < 1 && frac1 < 1)
808 kv1->hint->v.nh.empty=true;
809 kv1->hint->v.nh.si = SI1;
810 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
811 HS_INITIALIZED : HS_ERROR;
812 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
813 }
814
815 if (kv2->hint->status == HS_UNINITIALIZED) {
816 /* read the number from the string */
817 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
818 key2_read = true;
819 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
820 if (main2 < 1 && frac2 < 1)
821 kv2->hint->v.nh.empty=true;
822 kv2->hint->v.nh.si = SI2;
823 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
824 HS_INITIALIZED : HS_ERROR;
825 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
826 }
827
828 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
829 HS_INITIALIZED) {
830 unsigned long long n1, n2;
831 bool neg1, neg2;
832
833 e1 = kv1->hint->v.nh.empty;
834 e2 = kv2->hint->v.nh.empty;
835
836 if (e1 && e2)
837 return 0;
838
839 neg1 = kv1->hint->v.nh.neg;
840 neg2 = kv2->hint->v.nh.neg;
841
842 if (neg1 && !neg2)
843 return -1;
844 if (neg2 && !neg1)
845 return 1;
846
847 if (e1)
848 return neg2 ? 1 : -1;
849 else if (e2)
850 return neg1 ? -1 : 1;
851
852
853 if (use_suffix) {
854 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
855 if (cmp_res)
856 return neg1 ? -cmp_res : cmp_res;
857 }
858
859 n1 = kv1->hint->v.nh.n1;
860 n2 = kv2->hint->v.nh.n1;
861 if (n1 < n2)
862 return neg1 ? 1 : -1;
863 else if (n1 > n2)
864 return neg1 ? -1 : 1;
865 }
866
867 /* read the numbers from the strings */
868 if (!key1_read)
869 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
870 if (!key2_read)
871 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
872
873 e1 = ((main1 + frac1) == 0);
874 e2 = ((main2 + frac2) == 0);
875
876 if (e1 && e2)
877 return 0;
878
879 /* we know the result if the signs are different */
880 if (sign1 < 0 && sign2 >= 0)
881 return -1;
882 if (sign1 >= 0 && sign2 < 0)
883 return 1;
884
885 if (e1)
886 return (sign2 < 0) ? +1 : -1;
887 else if (e2)
888 return (sign1 < 0) ? -1 : 1;
889
890 if (use_suffix) {
891 cmp_res = cmpsuffix(SI1, SI2);
892 if (cmp_res)
893 return (sign1 < 0) ? -cmp_res : cmp_res;
894 }
895
896 /* if both numbers are empty assume that the strings are equal */
897 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
898 return 0;
899
900 /*
901 * if the main part is of different size, we know the result
902 * (because the leading zeros are removed)
903 */
904 if (main1 < main2)
905 cmp_res = -1;
906 else if (main1 > main2)
907 cmp_res = +1;
908 /* if the sizes are equal then simple non-collate string compare gives the correct result */
909 else
910 cmp_res = wcscmp(smain1, smain2);
911
912 /* check fraction */
913 if (!cmp_res)
914 cmp_res = wcscmp(sfrac1, sfrac2);
915
916 if (!cmp_res)
917 return 0;
918
919 /* reverse result if the signs are negative */
920 if (sign1 < 0 && sign2 < 0)
921 cmp_res = -cmp_res;
922
923 return cmp_res;
924 }
925
926 /*
927 * Implements numeric sort (-n).
928 */
929 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)930 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
931 {
932 return numcoll_impl(kv1, kv2, offset, false);
933 }
934
935 /*
936 * Implements 'human' numeric sort (-h).
937 */
938 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)939 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940 {
941 return numcoll_impl(kv1, kv2, offset, true);
942 }
943
944 /*
945 * Implements random sort (-R).
946 */
947 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)948 randomcoll(struct key_value *kv1, struct key_value *kv2,
949 size_t offset __unused)
950 {
951 struct bwstring *s1, *s2;
952 MD5_CTX ctx1, ctx2;
953 char *b1, *b2;
954
955 s1 = kv1->k;
956 s2 = kv2->k;
957
958 if (debug_sort) {
959 bwsprintf(stdout, s1, "; k1=<", ">");
960 bwsprintf(stdout, s2, ", k2=<", ">");
961 }
962
963 if (s1 == s2)
964 return 0;
965
966 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
967 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
968
969 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
970 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
971 b1 = MD5End(&ctx1, NULL);
972 b2 = MD5End(&ctx2, NULL);
973 if (b1 == NULL) {
974 if (b2 == NULL)
975 return 0;
976 else {
977 sort_free(b2);
978 return -1;
979 }
980 } else if (b2 == NULL) {
981 sort_free(b1);
982 return 1;
983 } else {
984 int cmp_res;
985
986 cmp_res = strcmp(b1, b2);
987 sort_free(b1);
988 sort_free(b2);
989
990 if (!cmp_res)
991 cmp_res = bwscoll(s1, s2, 0);
992
993 return cmp_res;
994 }
995 }
996
997 /*
998 * Implements version sort (-V).
999 */
1000 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1001 versioncoll(struct key_value *kv1, struct key_value *kv2,
1002 size_t offset __unused)
1003 {
1004 struct bwstring *s1, *s2;
1005
1006 s1 = kv1->k;
1007 s2 = kv2->k;
1008
1009 if (debug_sort) {
1010 bwsprintf(stdout, s1, "; k1=<", ">");
1011 bwsprintf(stdout, s2, ", k2=<", ">");
1012 }
1013
1014 if (s1 == s2)
1015 return 0;
1016
1017 return vcmp(s1, s2);
1018 }
1019
1020 /*
1021 * Check for minus infinity
1022 */
1023 static inline bool
huge_minus(double d,int err1)1024 huge_minus(double d, int err1)
1025 {
1026 if (err1 == ERANGE)
1027 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1028 return 1;
1029
1030 return 0;
1031 }
1032
1033 /*
1034 * Check for plus infinity
1035 */
1036 static inline bool
huge_plus(double d,int err1)1037 huge_plus(double d, int err1)
1038 {
1039 if (err1 == ERANGE)
1040 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1041 return 1;
1042
1043 return 0;
1044 }
1045
1046 /*
1047 * Check whether a function is a NAN
1048 */
1049 static bool
is_nan(double d)1050 is_nan(double d)
1051 {
1052 #if defined(NAN)
1053 return (d == NAN || isnan(d));
1054 #else
1055 return (isnan(d));
1056 #endif
1057 }
1058
1059 /*
1060 * Compare two NANs
1061 */
1062 static int
cmp_nans(double d1,double d2)1063 cmp_nans(double d1, double d2)
1064 {
1065 if (d1 == d2)
1066 return 0;
1067 return d1 < d2 ? -1 : 1;
1068 }
1069
1070 /*
1071 * Implements general numeric sort (-g).
1072 */
1073 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1074 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1075 size_t offset __unused)
1076 {
1077 double d1, d2;
1078 int err1, err2;
1079 bool empty1, empty2, key1_read, key2_read;
1080
1081 d1 = d2 = 0;
1082 err1 = err2 = 0;
1083 key1_read = key2_read = false;
1084
1085 if (debug_sort) {
1086 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1087 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1088 }
1089
1090 if (kv1->hint->status == HS_UNINITIALIZED) {
1091 errno = 0;
1092 d1 = bwstod(kv1->k, &empty1);
1093 err1 = errno;
1094
1095 if (empty1) {
1096 kv1->hint->v.gh.notnum = true;
1097 kv1->hint->status = HS_INITIALIZED;
1098 } else if (err1 == 0) {
1099 kv1->hint->v.gh.d = d1;
1100 kv1->hint->v.gh.nan = is_nan(d1);
1101 kv1->hint->status = HS_INITIALIZED;
1102 } else
1103 kv1->hint->status = HS_ERROR;
1104
1105 key1_read = true;
1106 }
1107
1108 if (kv2->hint->status == HS_UNINITIALIZED) {
1109 errno = 0;
1110 d2 = bwstod(kv2->k, &empty2);
1111 err2 = errno;
1112
1113 if (empty2) {
1114 kv2->hint->v.gh.notnum = true;
1115 kv2->hint->status = HS_INITIALIZED;
1116 } else if (err2 == 0) {
1117 kv2->hint->v.gh.d = d2;
1118 kv2->hint->v.gh.nan = is_nan(d2);
1119 kv2->hint->status = HS_INITIALIZED;
1120 } else
1121 kv2->hint->status = HS_ERROR;
1122
1123 key2_read = true;
1124 }
1125
1126 if (kv1->hint->status == HS_INITIALIZED &&
1127 kv2->hint->status == HS_INITIALIZED) {
1128 #ifdef GNUSORT_COMPATIBILITY
1129 if (kv1->hint->v.gh.notnum)
1130 return kv2->hint->v.gh.notnum ? 0 : -1;
1131 else if (kv2->hint->v.gh.notnum)
1132 return 1;
1133 #else
1134 if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1135 return 0;
1136 #endif
1137
1138 if (kv1->hint->v.gh.nan)
1139 return kv2->hint->v.gh.nan ?
1140 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1141 else if (kv2->hint->v.gh.nan)
1142 return 1;
1143
1144 d1 = kv1->hint->v.gh.d;
1145 d2 = kv2->hint->v.gh.d;
1146
1147 if (d1 < d2)
1148 return -1;
1149 else if (d1 > d2)
1150 return 1;
1151 else
1152 return 0;
1153 }
1154
1155 if (!key1_read) {
1156 errno = 0;
1157 d1 = bwstod(kv1->k, &empty1);
1158 err1 = errno;
1159 }
1160
1161 if (!key2_read) {
1162 errno = 0;
1163 d2 = bwstod(kv2->k, &empty2);
1164 err2 = errno;
1165 }
1166
1167 /* Non-value case */
1168 #ifdef GNUSORT_COMPATIBILITY
1169 if (empty1)
1170 return empty2 ? 0 : -1;
1171 else if (empty2)
1172 return 1;
1173 #else
1174 if (empty1 && empty2)
1175 return 0;
1176 #endif
1177
1178 /* NAN case */
1179 if (is_nan(d1))
1180 return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1181 else if (is_nan(d2))
1182 return 1;
1183
1184 /* Infinities */
1185 if (err1 == ERANGE || err2 == ERANGE) {
1186 /* Minus infinity case */
1187 if (huge_minus(d1, err1)) {
1188 if (huge_minus(d2, err2)) {
1189 if (d1 == d2)
1190 return 0;
1191 return d1 < d2 ? -1 : 1;
1192 } else
1193 return -1;
1194
1195 } else if (huge_minus(d2, err2)) {
1196 if (huge_minus(d1, err1)) {
1197 if (d1 == d2)
1198 return 0;
1199 return d1 < d2 ? -1 : 1;
1200 } else
1201 return 1;
1202 }
1203
1204 /* Plus infinity case */
1205 if (huge_plus(d1, err1)) {
1206 if (huge_plus(d2, err2)) {
1207 if (d1 == d2)
1208 return 0;
1209 return d1 < d2 ? -1 : 1;
1210 } else
1211 return 1;
1212 } else if (huge_plus(d2, err2)) {
1213 if (huge_plus(d1, err1)) {
1214 if (d1 == d2)
1215 return 0;
1216 return d1 < d2 ? -1 : 1;
1217 } else
1218 return -1;
1219 }
1220 }
1221
1222 if (d1 == d2)
1223 return 0;
1224 return d1 < d2 ? -1 : 1;
1225 }
1226
1227 /*
1228 * Implements month sort (-M).
1229 */
1230 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1231 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1232 {
1233 int val1, val2;
1234 bool key1_read, key2_read;
1235
1236 val1 = val2 = 0;
1237 key1_read = key2_read = false;
1238
1239 if (debug_sort) {
1240 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1241 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1242 }
1243
1244 if (kv1->hint->status == HS_UNINITIALIZED) {
1245 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1246 key1_read = true;
1247 kv1->hint->status = HS_INITIALIZED;
1248 }
1249
1250 if (kv2->hint->status == HS_UNINITIALIZED) {
1251 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1252 key2_read = true;
1253 kv2->hint->status = HS_INITIALIZED;
1254 }
1255
1256 if (kv1->hint->status == HS_INITIALIZED) {
1257 val1 = kv1->hint->v.Mh.m;
1258 key1_read = true;
1259 }
1260
1261 if (kv2->hint->status == HS_INITIALIZED) {
1262 val2 = kv2->hint->v.Mh.m;
1263 key2_read = true;
1264 }
1265
1266 if (!key1_read)
1267 val1 = bws_month_score(kv1->k);
1268 if (!key2_read)
1269 val2 = bws_month_score(kv2->k);
1270
1271 if (val1 == val2)
1272 return 0;
1273 return val1 < val2 ? -1 : 1;
1274 }
1275