1 /*-
2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD: head/usr.bin/sort/coll.c 281132 2015-04-06 02:35:55Z pfg $
28 */
29
30
31 #include <sys/types.h>
32
33 #include <errno.h>
34 #include <err.h>
35 #include <langinfo.h>
36 #include <limits.h>
37 #include <math.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <wctype.h>
42 #if defined(SORT_RANDOM)
43 #include <openssl/md5.h>
44 #endif
45
46 #include "coll.h"
47 #include "vsort.h"
48
49 struct key_specs *keys;
50 size_t keys_num = 0;
51
52 wint_t symbol_decimal_point = L'.';
53 /* there is no default thousands separator in collate rules: */
54 wint_t symbol_thousands_sep = 0;
55 wint_t symbol_negative_sign = L'-';
56 wint_t symbol_positive_sign = L'+';
57
58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
60 static int monthcoll(struct key_value*, struct key_value *, size_t offset);
61 static int numcoll(struct key_value*, struct key_value *, size_t offset);
62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
63 #if defined(SORT_RANDOM)
64 static int randomcoll(struct key_value*, struct key_value *, size_t offset);
65 #endif
66 static int versioncoll(struct key_value*, struct key_value *, size_t offset);
67
68 /*
69 * Allocate keys array
70 */
71 struct keys_array *
keys_array_alloc(void)72 keys_array_alloc(void)
73 {
74 struct keys_array *ka;
75 size_t sz;
76
77 sz = keys_array_size();
78 ka = sort_malloc(sz);
79 memset(ka, 0, sz);
80
81 return (ka);
82 }
83
84 /*
85 * Calculate whether we need key hint space
86 */
87 static size_t
key_hint_size(void)88 key_hint_size(void)
89 {
90
91 return (need_hint ? sizeof(struct key_hint) : 0);
92 }
93
94 /*
95 * Calculate keys array size
96 */
97 size_t
keys_array_size(void)98 keys_array_size(void)
99 {
100
101 return (keys_num * (sizeof(struct key_value) + key_hint_size()));
102 }
103
104 /*
105 * Clean data of keys array
106 */
107 void
clean_keys_array(const struct bwstring * s,struct keys_array * ka)108 clean_keys_array(const struct bwstring *s, struct keys_array *ka)
109 {
110
111 if (ka) {
112 for (size_t i = 0; i < keys_num; ++i)
113 if (ka->key[i].k && ka->key[i].k != s)
114 bwsfree(ka->key[i].k);
115 memset(ka, 0, keys_array_size());
116 }
117 }
118
119 /*
120 * Set value of a key in the keys set
121 */
122 void
set_key_on_keys_array(struct keys_array * ka,struct bwstring * s,size_t ind)123 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
124 {
125
126 if (ka && keys_num > ind) {
127 struct key_value *kv;
128
129 kv = &(ka->key[ind]);
130
131 if (kv->k && kv->k != s)
132 bwsfree(kv->k);
133 kv->k = s;
134 }
135 }
136
137 /*
138 * Initialize a sort list item
139 */
140 struct sort_list_item *
sort_list_item_alloc(void)141 sort_list_item_alloc(void)
142 {
143 struct sort_list_item *si;
144 size_t sz;
145
146 sz = sizeof(struct sort_list_item) + keys_array_size();
147 si = sort_malloc(sz);
148 memset(si, 0, sz);
149
150 return (si);
151 }
152
153 size_t
sort_list_item_size(struct sort_list_item * si)154 sort_list_item_size(struct sort_list_item *si)
155 {
156 size_t ret = 0;
157
158 if (si) {
159 ret = sizeof(struct sort_list_item) + keys_array_size();
160 if (si->str)
161 ret += bws_memsize(si->str);
162 for (size_t i = 0; i < keys_num; ++i) {
163 struct key_value *kv;
164
165 kv = &(si->ka.key[i]);
166
167 if (kv->k != si->str)
168 ret += bws_memsize(kv->k);
169 }
170 }
171 return (ret);
172 }
173
174 /*
175 * Calculate key for a sort list item
176 */
177 static void
sort_list_item_make_key(struct sort_list_item * si)178 sort_list_item_make_key(struct sort_list_item *si)
179 {
180
181 preproc(si->str, &(si->ka));
182 }
183
184 /*
185 * Set value of a sort list item.
186 * Return combined string and keys memory size.
187 */
188 void
sort_list_item_set(struct sort_list_item * si,struct bwstring * str)189 sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
190 {
191
192 if (si) {
193 clean_keys_array(si->str, &(si->ka));
194 if (si->str) {
195 if (si->str == str) {
196 /* we are trying to reset the same string */
197 return;
198 } else {
199 bwsfree(si->str);
200 si->str = NULL;
201 }
202 }
203 si->str = str;
204 sort_list_item_make_key(si);
205 }
206 }
207
208 /*
209 * De-allocate a sort list item object memory
210 */
211 void
sort_list_item_clean(struct sort_list_item * si)212 sort_list_item_clean(struct sort_list_item *si)
213 {
214
215 if (si) {
216 clean_keys_array(si->str, &(si->ka));
217 if (si->str) {
218 bwsfree(si->str);
219 si->str = NULL;
220 }
221 }
222 }
223
224 /*
225 * Skip columns according to specs
226 */
227 static size_t
skip_cols_to_start(const struct bwstring * s,size_t cols,size_t start,bool skip_blanks,bool * empty_key)228 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
229 bool skip_blanks, bool *empty_key)
230 {
231 if (cols < 1)
232 return (BWSLEN(s) + 1);
233
234 if (skip_blanks)
235 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start)))
236 ++start;
237
238 while (start < BWSLEN(s) && cols > 1) {
239 --cols;
240 ++start;
241 }
242
243 if (start >= BWSLEN(s))
244 *empty_key = true;
245
246 return (start);
247 }
248
249 /*
250 * Skip fields according to specs
251 */
252 static size_t
skip_fields_to_start(const struct bwstring * s,size_t fields,bool * empty_field)253 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
254 {
255
256 if (fields < 2) {
257 if (BWSLEN(s) == 0)
258 *empty_field = true;
259 return (0);
260 } else if (!(sort_opts_vals.tflag)) {
261 size_t cpos = 0;
262 bool pb = true;
263
264 while (cpos < BWSLEN(s)) {
265 bool isblank;
266
267 isblank = iswblank(BWS_GET(s, cpos));
268
269 if (isblank && !pb) {
270 --fields;
271 if (fields <= 1)
272 return (cpos);
273 }
274 pb = isblank;
275 ++cpos;
276 }
277 if (fields > 1)
278 *empty_field = true;
279 return (cpos);
280 } else {
281 size_t cpos = 0;
282
283 while (cpos < BWSLEN(s)) {
284 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) {
285 --fields;
286 if (fields <= 1)
287 return (cpos + 1);
288 }
289 ++cpos;
290 }
291 if (fields > 1)
292 *empty_field = true;
293 return (cpos);
294 }
295 }
296
297 /*
298 * Find fields start
299 */
300 static void
find_field_start(const struct bwstring * s,struct key_specs * ks,size_t * field_start,size_t * key_start,bool * empty_field,bool * empty_key)301 find_field_start(const struct bwstring *s, struct key_specs *ks,
302 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
303 {
304
305 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
306 if (!*empty_field)
307 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
308 ks->pos1b, empty_key);
309 else
310 *empty_key = true;
311 }
312
313 /*
314 * Find end key position
315 */
316 static size_t
find_field_end(const struct bwstring * s,struct key_specs * ks)317 find_field_end(const struct bwstring *s, struct key_specs *ks)
318 {
319 size_t f2, next_field_start, pos_end;
320 bool empty_field, empty_key;
321
322 pos_end = 0;
323 next_field_start = 0;
324 empty_field = false;
325 empty_key = false;
326 f2 = ks->f2;
327
328 if (f2 == 0)
329 return (BWSLEN(s) + 1);
330 else {
331 if (ks->c2 == 0) {
332 next_field_start = skip_fields_to_start(s, f2 + 1,
333 &empty_field);
334 if ((next_field_start > 0) && sort_opts_vals.tflag &&
335 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
336 next_field_start - 1)))
337 --next_field_start;
338 } else
339 next_field_start = skip_fields_to_start(s, f2,
340 &empty_field);
341 }
342
343 if (empty_field || (next_field_start >= BWSLEN(s)))
344 return (BWSLEN(s) + 1);
345
346 if (ks->c2) {
347 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
348 ks->pos2b, &empty_key);
349 if (pos_end < BWSLEN(s))
350 ++pos_end;
351 } else
352 pos_end = next_field_start;
353
354 return (pos_end);
355 }
356
357 /*
358 * Cut a field according to the key specs
359 */
360 static struct bwstring *
cut_field(const struct bwstring * s,struct key_specs * ks)361 cut_field(const struct bwstring *s, struct key_specs *ks)
362 {
363 struct bwstring *ret = NULL;
364
365 if (s && ks) {
366 size_t field_start, key_end, key_start, sz;
367 bool empty_field, empty_key;
368
369 field_start = 0;
370 key_start = 0;
371 empty_field = false;
372 empty_key = false;
373
374 find_field_start(s, ks, &field_start, &key_start,
375 &empty_field, &empty_key);
376
377 if (empty_key)
378 sz = 0;
379 else {
380 key_end = find_field_end(s, ks);
381 sz = (key_end < key_start) ? 0 : (key_end - key_start);
382 }
383
384 ret = bwsalloc(sz);
385 if (sz)
386 bwsnocpy(ret, s, key_start, sz);
387 } else
388 ret = bwsalloc(0);
389
390 return (ret);
391 }
392
393 /*
394 * Preprocesses a line applying the necessary transformations
395 * specified by command line options and returns the preprocessed
396 * string, which can be used to compare.
397 */
398 int
preproc(struct bwstring * s,struct keys_array * ka)399 preproc(struct bwstring *s, struct keys_array *ka)
400 {
401
402 if (sort_opts_vals.kflag)
403 for (size_t i = 0; i < keys_num; i++) {
404 struct bwstring *key;
405 struct key_specs *kspecs;
406 struct sort_mods *sm;
407
408 kspecs = &(keys[i]);
409 key = cut_field(s, kspecs);
410
411 sm = &(kspecs->sm);
412 if (sm->dflag)
413 key = dictionary_order(key);
414 else if (sm->iflag)
415 key = ignore_nonprinting(key);
416 if (sm->fflag || sm->Mflag)
417 key = ignore_case(key);
418
419 set_key_on_keys_array(ka, key, i);
420 }
421 else {
422 struct bwstring *ret = NULL;
423 struct sort_mods *sm = default_sort_mods;
424
425 if (sm->bflag) {
426 if (ret == NULL)
427 ret = bwsdup(s);
428 ret = ignore_leading_blanks(ret);
429 }
430 if (sm->dflag) {
431 if (ret == NULL)
432 ret = bwsdup(s);
433 ret = dictionary_order(ret);
434 } else if (sm->iflag) {
435 if (ret == NULL)
436 ret = bwsdup(s);
437 ret = ignore_nonprinting(ret);
438 }
439 if (sm->fflag || sm->Mflag) {
440 if (ret == NULL)
441 ret = bwsdup(s);
442 ret = ignore_case(ret);
443 }
444 if (ret == NULL)
445 set_key_on_keys_array(ka, s, 0);
446 else
447 set_key_on_keys_array(ka, ret, 0);
448 }
449
450 return 0;
451 }
452
453 cmpcoll_t
get_sort_func(struct sort_mods * sm)454 get_sort_func(struct sort_mods *sm)
455 {
456
457 if (sm->nflag)
458 return (numcoll);
459 else if (sm->hflag)
460 return (hnumcoll);
461 else if (sm->gflag)
462 return (gnumcoll);
463 else if (sm->Mflag)
464 return (monthcoll);
465 #if defined(SORT_RANDOM)
466 else if (sm->Rflag)
467 return (randomcoll);
468 #endif
469 else if (sm->Vflag)
470 return (versioncoll);
471 else
472 return (wstrcoll);
473 }
474
475 /*
476 * Compares the given strings. Returns a positive number if
477 * the first precedes the second, a negative number if the second is
478 * the preceding one, and zero if they are equal. This function calls
479 * the underlying collate functions, which done the actual comparison.
480 */
481 int
key_coll(struct keys_array * ps1,struct keys_array * ps2,size_t offset)482 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
483 {
484 struct sort_mods *sm;
485 int res = 0;
486
487 for (size_t i = 0; i < keys_num; ++i) {
488 sm = &(keys[i].sm);
489
490 if (sm->rflag)
491 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
492 else
493 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
494
495 if (res)
496 break;
497
498 /* offset applies to only the first key */
499 offset = 0;
500 }
501 return (res);
502 }
503
504 /*
505 * Compare two strings.
506 * Plain symbol-by-symbol comparison.
507 */
508 int
top_level_str_coll(const struct bwstring * s1,const struct bwstring * s2)509 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
510 {
511
512 if (default_sort_mods->rflag) {
513 const struct bwstring *tmp;
514
515 tmp = s1;
516 s1 = s2;
517 s2 = tmp;
518 }
519
520 return (bwscoll(s1, s2, 0));
521 }
522
523 /*
524 * Compare a string and a sort list item, according to the sort specs.
525 */
526 int
str_list_coll(struct bwstring * str1,struct sort_list_item ** ss2)527 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
528 {
529 struct keys_array *ka1;
530 int ret = 0;
531
532 ka1 = keys_array_alloc();
533
534 preproc(str1, ka1);
535
536 sort_list_item_make_key(*ss2);
537
538 if (debug_sort) {
539 bwsprintf(stdout, str1, "; s1=<", ">");
540 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
541 }
542
543 ret = key_coll(ka1, &((*ss2)->ka), 0);
544
545 if (debug_sort)
546 printf("; cmp1=%d", ret);
547
548 clean_keys_array(str1, ka1);
549 sort_free(ka1);
550
551 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
552 ret = top_level_str_coll(str1, ((*ss2)->str));
553 if (debug_sort)
554 printf("; cmp2=%d", ret);
555 }
556
557 if (debug_sort)
558 printf("\n");
559
560 return (ret);
561 }
562
563 /*
564 * Compare two sort list items, according to the sort specs.
565 */
566 int
list_coll_offset(struct sort_list_item ** ss1,struct sort_list_item ** ss2,size_t offset)567 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
568 size_t offset)
569 {
570 int ret;
571
572 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
573
574 if (debug_sort) {
575 if (offset)
576 printf("; offset=%d", (int) offset);
577 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
578 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
579 printf("; cmp1=%d\n", ret);
580 }
581
582 if (ret)
583 return (ret);
584
585 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
586 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
587 if (debug_sort)
588 printf("; cmp2=%d\n", ret);
589 }
590
591 return (ret);
592 }
593
594 /*
595 * Compare two sort list items, according to the sort specs.
596 */
597 int
list_coll(struct sort_list_item ** ss1,struct sort_list_item ** ss2)598 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2)
599 {
600
601 return (list_coll_offset(ss1, ss2, 0));
602 }
603
604 #define LSCDEF(N) \
605 static int \
606 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
607 { \
608 \
609 return (list_coll_offset(ss1, ss2, N)); \
610 }
611
612 LSCDEF(1)
613 LSCDEF(2)
614 LSCDEF(3)
615 LSCDEF(4)
616 LSCDEF(5)
617 LSCDEF(6)
618 LSCDEF(7)
619 LSCDEF(8)
620 LSCDEF(9)
621 LSCDEF(10)
622 LSCDEF(11)
623 LSCDEF(12)
624 LSCDEF(13)
625 LSCDEF(14)
626 LSCDEF(15)
627 LSCDEF(16)
628 LSCDEF(17)
629 LSCDEF(18)
630 LSCDEF(19)
631 LSCDEF(20)
632
633 listcoll_t
get_list_call_func(size_t offset)634 get_list_call_func(size_t offset)
635 {
636 static const listcoll_t lsarray[] = { list_coll, list_coll_1,
637 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
638 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
639 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
640 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
641 list_coll_18, list_coll_19, list_coll_20 };
642
643 if (offset <= 20)
644 return (lsarray[offset]);
645
646 return (list_coll);
647 }
648
649 /*
650 * Compare two sort list items, only by their original string.
651 */
652 int
list_coll_by_str_only(struct sort_list_item ** ss1,struct sort_list_item ** ss2)653 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
654 {
655
656 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str)));
657 }
658
659 /*
660 * Maximum size of a number in the string (before or after decimal point)
661 */
662 #define MAX_NUM_SIZE (128)
663
664 /*
665 * Set suffix value
666 */
setsuffix(wchar_t c,unsigned char * si)667 static void setsuffix(wchar_t c, unsigned char *si)
668 {
669 switch (c){
670 case L'k':
671 case L'K':
672 *si = 1;
673 break;
674 case L'M':
675 *si = 2;
676 break;
677 case L'G':
678 *si = 3;
679 break;
680 case L'T':
681 *si = 4;
682 break;
683 case L'P':
684 *si = 5;
685 break;
686 case L'E':
687 *si = 6;
688 break;
689 case L'Z':
690 *si = 7;
691 break;
692 case L'Y':
693 *si = 8;
694 break;
695 default:
696 *si = 0;
697 };
698 }
699
700 /*
701 * Read string s and parse the string into a fixed-decimal-point number.
702 * sign equals -1 if the number is negative (explicit plus is not allowed,
703 * according to GNU sort's "info sort".
704 * The number part before decimal point is in the smain, after the decimal
705 * point is in sfrac, tail is the pointer to the remainder of the string.
706 */
707 static int
read_number(struct bwstring * s0,int * sign,wchar_t * smain,size_t * main_len,wchar_t * sfrac,size_t * frac_len,unsigned char * si)708 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
709 {
710 bwstring_iterator s;
711
712 s = bws_begin(s0);
713
714 /* always end the fraction with zero, even if we have no fraction */
715 sfrac[0] = 0;
716
717 while (iswblank(bws_get_iter_value(s)))
718 s = bws_iterator_inc(s, 1);
719
720 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
721 *sign = -1;
722 s = bws_iterator_inc(s, 1);
723 }
724
725 // This is '0', not '\0', do not change this
726 while (iswdigit(bws_get_iter_value(s)) &&
727 (bws_get_iter_value(s) == L'0'))
728 s = bws_iterator_inc(s, 1);
729
730 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
731 if (iswdigit(bws_get_iter_value(s))) {
732 smain[*main_len] = bws_get_iter_value(s);
733 s = bws_iterator_inc(s, 1);
734 *main_len += 1;
735 } else if (symbol_thousands_sep &&
736 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
737 s = bws_iterator_inc(s, 1);
738 else
739 break;
740 }
741
742 smain[*main_len] = 0;
743
744 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
745 s = bws_iterator_inc(s, 1);
746 while (iswdigit(bws_get_iter_value(s)) &&
747 *frac_len < MAX_NUM_SIZE) {
748 sfrac[*frac_len] = bws_get_iter_value(s);
749 s = bws_iterator_inc(s, 1);
750 *frac_len += 1;
751 }
752 sfrac[*frac_len] = 0;
753
754 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
755 --(*frac_len);
756 sfrac[*frac_len] = L'\0';
757 }
758 }
759
760 setsuffix(bws_get_iter_value(s),si);
761
762 if ((*main_len + *frac_len) == 0)
763 *sign = 0;
764
765 return (0);
766 }
767
768 /*
769 * Implements string sort.
770 */
771 static int
wstrcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)772 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
773 {
774
775 if (debug_sort) {
776 if (offset)
777 printf("; offset=%d\n", (int) offset);
778 bwsprintf(stdout, kv1->k, "; k1=<", ">");
779 printf("(%zu)", BWSLEN(kv1->k));
780 bwsprintf(stdout, kv2->k, ", k2=<", ">");
781 printf("(%zu)", BWSLEN(kv2->k));
782 }
783
784 return (bwscoll(kv1->k, kv2->k, offset));
785 }
786
787 /*
788 * Compare two suffixes
789 */
790 static inline int
cmpsuffix(unsigned char si1,unsigned char si2)791 cmpsuffix(unsigned char si1, unsigned char si2)
792 {
793
794 return ((char)si1 - (char)si2);
795 }
796
797 /*
798 * Implements numeric sort for -n and -h.
799 */
800 static int
numcoll_impl(struct key_value * kv1,struct key_value * kv2,size_t offset __unused,bool use_suffix)801 numcoll_impl(struct key_value *kv1, struct key_value *kv2,
802 size_t offset __unused, bool use_suffix)
803 {
804 struct bwstring *s1, *s2;
805 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
806 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
807 int cmp_res, sign1, sign2;
808 size_t frac1, frac2, main1, main2;
809 unsigned char SI1, SI2;
810 bool e1, e2, key1_read, key2_read;
811
812 s1 = kv1->k;
813 s2 = kv2->k;
814 sign1 = sign2 = 0;
815 main1 = main2 = 0;
816 frac1 = frac2 = 0;
817
818 cmp_res = 0;
819 key1_read = key2_read = false;
820
821 if (debug_sort) {
822 bwsprintf(stdout, s1, "; k1=<", ">");
823 bwsprintf(stdout, s2, ", k2=<", ">");
824 }
825
826 if (s1 == s2)
827 return (0);
828
829 if (kv1->hint->status == HS_UNINITIALIZED) {
830 /* read the number from the string */
831 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
832 key1_read = true;
833 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
834 if(main1 < 1 && frac1 < 1)
835 kv1->hint->v.nh.empty=true;
836 kv1->hint->v.nh.si = SI1;
837 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
838 HS_INITIALIZED : HS_ERROR;
839 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
840 }
841
842 if (kv2->hint->status == HS_UNINITIALIZED) {
843 /* read the number from the string */
844 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2);
845 key2_read = true;
846 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
847 if(main2 < 1 && frac2 < 1)
848 kv2->hint->v.nh.empty=true;
849 kv2->hint->v.nh.si = SI2;
850 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
851 HS_INITIALIZED : HS_ERROR;
852 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
853 }
854
855 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
856 HS_INITIALIZED) {
857 unsigned long long n1, n2;
858 bool neg1, neg2;
859
860 e1 = kv1->hint->v.nh.empty;
861 e2 = kv2->hint->v.nh.empty;
862
863 if (e1 && e2)
864 return (0);
865
866 neg1 = kv1->hint->v.nh.neg;
867 neg2 = kv2->hint->v.nh.neg;
868
869 if (neg1 && !neg2)
870 return (-1);
871 if (neg2 && !neg1)
872 return (+1);
873
874 if (e1)
875 return (neg2 ? +1 : -1);
876 else if (e2)
877 return (neg1 ? -1 : +1);
878
879
880 if (use_suffix) {
881 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
882 if (cmp_res)
883 return (neg1 ? -cmp_res : cmp_res);
884 }
885
886 n1 = kv1->hint->v.nh.n1;
887 n2 = kv2->hint->v.nh.n1;
888 if (n1 < n2)
889 return (neg1 ? +1 : -1);
890 else if (n1 > n2)
891 return (neg1 ? -1 : +1);
892 }
893
894 /* read the numbers from the strings */
895 if (!key1_read)
896 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
897 if (!key2_read)
898 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
899
900 e1 = ((main1 + frac1) == 0);
901 e2 = ((main2 + frac2) == 0);
902
903 if (e1 && e2)
904 return (0);
905
906 /* we know the result if the signs are different */
907 if (sign1 < 0 && sign2 >= 0)
908 return (-1);
909 if (sign1 >= 0 && sign2 < 0)
910 return (+1);
911
912 if (e1)
913 return ((sign2 < 0) ? +1 : -1);
914 else if (e2)
915 return ((sign1 < 0) ? -1 : +1);
916
917 if (use_suffix) {
918 cmp_res = cmpsuffix(SI1, SI2);
919 if (cmp_res)
920 return ((sign1 < 0) ? -cmp_res : cmp_res);
921 }
922
923 /* if both numbers are empty assume that the strings are equal */
924 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
925 return (0);
926
927 /*
928 * if the main part is of different size, we know the result
929 * (because the leading zeros are removed)
930 */
931 if (main1 < main2)
932 cmp_res = -1;
933 else if (main1 > main2)
934 cmp_res = +1;
935 /* if the sizes are equal then simple non-collate string compare gives the correct result */
936 else
937 cmp_res = wcscmp(smain1, smain2);
938
939 /* check fraction */
940 if (!cmp_res)
941 cmp_res = wcscmp(sfrac1, sfrac2);
942
943 if (!cmp_res)
944 return (0);
945
946 /* reverse result if the signs are negative */
947 if (sign1 < 0 && sign2 < 0)
948 cmp_res = -cmp_res;
949
950 return (cmp_res);
951 }
952
953 /*
954 * Implements numeric sort (-n).
955 */
956 static int
numcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)957 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
958 {
959
960 return (numcoll_impl(kv1, kv2, offset, false));
961 }
962
963 /*
964 * Implements 'human' numeric sort (-h).
965 */
966 static int
hnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset)967 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
968 {
969
970 return (numcoll_impl(kv1, kv2, offset, true));
971 }
972
973 /*
974 * Implements random sort (-R).
975 */
976 #if defined(SORT_RANDOM)
977 static char *
randomcollend(MD5_CTX * ctx)978 randomcollend(MD5_CTX *ctx)
979 {
980 unsigned char digest[MD5_DIGEST_LENGTH];
981 static const char hex[]="0123456789abcdef";
982 char *buf;
983 int i;
984
985 buf = malloc(MD5_DIGEST_LENGTH * 2 + 1);
986 if (!buf)
987 return NULL;
988 MD5_Final(digest, ctx);
989 for (i = 0; i < MD5_DIGEST_LENGTH; i++) {
990 buf[2*i] = hex[digest[i] >> 4];
991 buf[2*i+1] = hex[digest[i] & 0x0f];
992 }
993 buf[MD5_DIGEST_LENGTH * 2] = '\0';
994 return buf;
995 }
996
997 static int
randomcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)998 randomcoll(struct key_value *kv1, struct key_value *kv2,
999 size_t offset __unused)
1000 {
1001 struct bwstring *s1, *s2;
1002 MD5_CTX ctx1, ctx2;
1003 char *b1, *b2;
1004
1005 s1 = kv1->k;
1006 s2 = kv2->k;
1007
1008 if (debug_sort) {
1009 bwsprintf(stdout, s1, "; k1=<", ">");
1010 bwsprintf(stdout, s2, ", k2=<", ">");
1011 }
1012
1013 if (s1 == s2)
1014 return (0);
1015
1016 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX));
1017 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX));
1018
1019 MD5_Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
1020 MD5_Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
1021 b1 = randomcollend(&ctx1);
1022 b2 = randomcollend(&ctx2);
1023 if (b1 == NULL) {
1024 if (b2 == NULL)
1025 return (0);
1026 else {
1027 sort_free(b2);
1028 return (-1);
1029 }
1030 } else if (b2 == NULL) {
1031 sort_free(b1);
1032 return (+1);
1033 } else {
1034 int cmp_res;
1035
1036 cmp_res = strcmp(b1,b2);
1037 sort_free(b1);
1038 sort_free(b2);
1039
1040 if (!cmp_res)
1041 cmp_res = bwscoll(s1, s2, 0);
1042
1043 return (cmp_res);
1044 }
1045 }
1046 #endif
1047
1048 /*
1049 * Implements version sort (-V).
1050 */
1051 static int
versioncoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1052 versioncoll(struct key_value *kv1, struct key_value *kv2,
1053 size_t offset __unused)
1054 {
1055 struct bwstring *s1, *s2;
1056
1057 s1 = kv1->k;
1058 s2 = kv2->k;
1059
1060 if (debug_sort) {
1061 bwsprintf(stdout, s1, "; k1=<", ">");
1062 bwsprintf(stdout, s2, ", k2=<", ">");
1063 }
1064
1065 if (s1 == s2)
1066 return (0);
1067
1068 return (vcmp(s1, s2));
1069 }
1070
1071 /*
1072 * Check for minus infinity
1073 */
1074 static inline bool
huge_minus(double d,int err1)1075 huge_minus(double d, int err1)
1076 {
1077
1078 if (err1 == ERANGE)
1079 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1080 return (+1);
1081
1082 return (0);
1083 }
1084
1085 /*
1086 * Check for plus infinity
1087 */
1088 static inline bool
huge_plus(double d,int err1)1089 huge_plus(double d, int err1)
1090 {
1091
1092 if (err1 == ERANGE)
1093 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1094 return (+1);
1095
1096 return (0);
1097 }
1098
1099 /*
1100 * Check whether a function is a NAN
1101 */
1102 static bool
is_nan(double d)1103 is_nan(double d)
1104 {
1105 return ((d == NAN) || (isnan(d)));
1106 }
1107
1108 /*
1109 * Compare two NANs
1110 */
1111 static int
cmp_nans(double d1,double d2)1112 cmp_nans(double d1, double d2)
1113 {
1114
1115 if (d1 < d2)
1116 return (-1);
1117 if (d1 > d2)
1118 return (+1);
1119 return (0);
1120 }
1121
1122 /*
1123 * Implements general numeric sort (-g).
1124 */
1125 static int
gnumcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1126 gnumcoll(struct key_value *kv1, struct key_value *kv2,
1127 size_t offset __unused)
1128 {
1129 double d1, d2;
1130 int err1, err2;
1131 bool empty1, empty2, key1_read, key2_read;
1132
1133 d1 = d2 = 0;
1134 err1 = err2 = 0;
1135 key1_read = key2_read = false;
1136
1137 if (debug_sort) {
1138 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1139 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1140 }
1141
1142 if (kv1->hint->status == HS_UNINITIALIZED) {
1143 errno = 0;
1144 d1 = bwstod(kv1->k, &empty1);
1145 err1 = errno;
1146
1147 if (empty1)
1148 kv1->hint->v.gh.notnum = true;
1149 else if (err1 == 0) {
1150 kv1->hint->v.gh.d = d1;
1151 kv1->hint->v.gh.nan = is_nan(d1);
1152 kv1->hint->status = HS_INITIALIZED;
1153 } else
1154 kv1->hint->status = HS_ERROR;
1155
1156 key1_read = true;
1157 }
1158
1159 if (kv2->hint->status == HS_UNINITIALIZED) {
1160 errno = 0;
1161 d2 = bwstod(kv2->k, &empty2);
1162 err2 = errno;
1163
1164 if (empty2)
1165 kv2->hint->v.gh.notnum = true;
1166 else if (err2 == 0) {
1167 kv2->hint->v.gh.d = d2;
1168 kv2->hint->v.gh.nan = is_nan(d2);
1169 kv2->hint->status = HS_INITIALIZED;
1170 } else
1171 kv2->hint->status = HS_ERROR;
1172
1173 key2_read = true;
1174 }
1175
1176 if (kv1->hint->status == HS_INITIALIZED &&
1177 kv2->hint->status == HS_INITIALIZED) {
1178 if (kv1->hint->v.gh.notnum)
1179 return ((kv2->hint->v.gh.notnum) ? 0 : -1);
1180 else if (kv2->hint->v.gh.notnum)
1181 return (+1);
1182
1183 if (kv1->hint->v.gh.nan)
1184 return ((kv2->hint->v.gh.nan) ?
1185 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) :
1186 -1);
1187 else if (kv2->hint->v.gh.nan)
1188 return (+1);
1189
1190 d1 = kv1->hint->v.gh.d;
1191 d2 = kv2->hint->v.gh.d;
1192
1193 if (d1 < d2)
1194 return (-1);
1195 else if (d1 > d2)
1196 return (+1);
1197 else
1198 return (0);
1199 }
1200
1201 if (!key1_read) {
1202 errno = 0;
1203 d1 = bwstod(kv1->k, &empty1);
1204 err1 = errno;
1205 }
1206
1207 if (!key2_read) {
1208 errno = 0;
1209 d2 = bwstod(kv2->k, &empty2);
1210 err2 = errno;
1211 }
1212
1213 /* Non-value case: */
1214 if (empty1)
1215 return (empty2 ? 0 : -1);
1216 else if (empty2)
1217 return (+1);
1218
1219 /* NAN case */
1220 if (is_nan(d1))
1221 return (is_nan(d2) ? cmp_nans(d1, d2) : -1);
1222 else if (is_nan(d2))
1223 return (+1);
1224
1225 /* Infinities */
1226 if (err1 == ERANGE || err2 == ERANGE) {
1227 /* Minus infinity case */
1228 if (huge_minus(d1, err1)) {
1229 if (huge_minus(d2, err2)) {
1230 if (d1 < d2)
1231 return (-1);
1232 if (d1 > d2)
1233 return (+1);
1234 return (0);
1235 } else
1236 return (-1);
1237
1238 } else if (huge_minus(d2, err2)) {
1239 if (huge_minus(d1, err1)) {
1240 if (d1 < d2)
1241 return (-1);
1242 if (d1 > d2)
1243 return (+1);
1244 return (0);
1245 } else
1246 return (+1);
1247 }
1248
1249 /* Plus infinity case */
1250 if (huge_plus(d1, err1)) {
1251 if (huge_plus(d2, err2)) {
1252 if (d1 < d2)
1253 return (-1);
1254 if (d1 > d2)
1255 return (+1);
1256 return (0);
1257 } else
1258 return (+1);
1259 } else if (huge_plus(d2, err2)) {
1260 if (huge_plus(d1, err1)) {
1261 if (d1 < d2)
1262 return (-1);
1263 if (d1 > d2)
1264 return (+1);
1265 return (0);
1266 } else
1267 return (-1);
1268 }
1269 }
1270
1271 if (d1 < d2)
1272 return (-1);
1273 if (d1 > d2)
1274 return (+1);
1275
1276 return (0);
1277 }
1278
1279 /*
1280 * Implements month sort (-M).
1281 */
1282 static int
monthcoll(struct key_value * kv1,struct key_value * kv2,size_t offset __unused)1283 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1284 {
1285 int val1, val2;
1286 bool key1_read, key2_read;
1287
1288 val1 = val2 = 0;
1289 key1_read = key2_read = false;
1290
1291 if (debug_sort) {
1292 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1293 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1294 }
1295
1296 if (kv1->hint->status == HS_UNINITIALIZED) {
1297 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1298 key1_read = true;
1299 kv1->hint->status = HS_INITIALIZED;
1300 }
1301
1302 if (kv2->hint->status == HS_UNINITIALIZED) {
1303 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1304 key2_read = true;
1305 kv2->hint->status = HS_INITIALIZED;
1306 }
1307
1308 if (kv1->hint->status == HS_INITIALIZED) {
1309 val1 = kv1->hint->v.Mh.m;
1310 key1_read = true;
1311 }
1312
1313 if (kv2->hint->status == HS_INITIALIZED) {
1314 val2 = kv2->hint->v.Mh.m;
1315 key2_read = true;
1316 }
1317
1318 if (!key1_read)
1319 val1 = bws_month_score(kv1->k);
1320 if (!key2_read)
1321 val2 = bws_month_score(kv2->k);
1322
1323 if (val1 == val2) {
1324 return (0);
1325 }
1326 if (val1 < val2)
1327 return (-1);
1328 return (+1);
1329 }
1330