1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: head/usr.bin/sort/coll.c 281132 2015-04-06 02:35:55Z pfg $ 28 */ 29 30 31 #include <sys/types.h> 32 33 #include <errno.h> 34 #include <err.h> 35 #include <langinfo.h> 36 #include <limits.h> 37 #include <math.h> 38 #if defined(SORT_RANDOM) 39 #include <md5.h> 40 #endif 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 #include <wctype.h> 45 46 #include "coll.h" 47 #include "vsort.h" 48 49 struct key_specs *keys; 50 size_t keys_num = 0; 51 52 wint_t symbol_decimal_point = L'.'; 53 /* there is no default thousands separator in collate rules: */ 54 wint_t symbol_thousands_sep = 0; 55 wint_t symbol_negative_sign = L'-'; 56 wint_t symbol_positive_sign = L'+'; 57 58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 60 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 61 static int numcoll(struct key_value*, struct key_value *, size_t offset); 62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 63 #if defined(SORT_RANDOM) 64 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 65 #endif 66 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 67 68 /* 69 * Allocate keys array 70 */ 71 struct keys_array * 72 keys_array_alloc(void) 73 { 74 struct keys_array *ka; 75 size_t sz; 76 77 sz = keys_array_size(); 78 ka = sort_malloc(sz); 79 memset(ka, 0, sz); 80 81 return (ka); 82 } 83 84 /* 85 * Calculate whether we need key hint space 86 */ 87 static size_t 88 key_hint_size(void) 89 { 90 91 return (need_hint ? sizeof(struct key_hint) : 0); 92 } 93 94 /* 95 * Calculate keys array size 96 */ 97 size_t 98 keys_array_size(void) 99 { 100 101 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 102 } 103 104 /* 105 * Clean data of keys array 106 */ 107 void 108 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 109 { 110 111 if (ka) { 112 for (size_t i = 0; i < keys_num; ++i) 113 if (ka->key[i].k && ka->key[i].k != s) 114 bwsfree(ka->key[i].k); 115 memset(ka, 0, keys_array_size()); 116 } 117 } 118 119 /* 120 * Set value of a key in the keys set 121 */ 122 void 123 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 124 { 125 126 if (ka && keys_num > ind) { 127 struct key_value *kv; 128 129 kv = &(ka->key[ind]); 130 131 if (kv->k && kv->k != s) 132 bwsfree(kv->k); 133 kv->k = s; 134 } 135 } 136 137 /* 138 * Initialize a sort list item 139 */ 140 struct sort_list_item * 141 sort_list_item_alloc(void) 142 { 143 struct sort_list_item *si; 144 size_t sz; 145 146 sz = sizeof(struct sort_list_item) + keys_array_size(); 147 si = sort_malloc(sz); 148 memset(si, 0, sz); 149 150 return (si); 151 } 152 153 size_t 154 sort_list_item_size(struct sort_list_item *si) 155 { 156 size_t ret = 0; 157 158 if (si) { 159 ret = sizeof(struct sort_list_item) + keys_array_size(); 160 if (si->str) 161 ret += bws_memsize(si->str); 162 for (size_t i = 0; i < keys_num; ++i) { 163 struct key_value *kv; 164 165 kv = &(si->ka.key[i]); 166 167 if (kv->k != si->str) 168 ret += bws_memsize(kv->k); 169 } 170 } 171 return (ret); 172 } 173 174 /* 175 * Calculate key for a sort list item 176 */ 177 static void 178 sort_list_item_make_key(struct sort_list_item *si) 179 { 180 181 preproc(si->str, &(si->ka)); 182 } 183 184 /* 185 * Set value of a sort list item. 186 * Return combined string and keys memory size. 187 */ 188 void 189 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 190 { 191 192 if (si) { 193 clean_keys_array(si->str, &(si->ka)); 194 if (si->str) { 195 if (si->str == str) { 196 /* we are trying to reset the same string */ 197 return; 198 } else { 199 bwsfree(si->str); 200 si->str = NULL; 201 } 202 } 203 si->str = str; 204 sort_list_item_make_key(si); 205 } 206 } 207 208 /* 209 * De-allocate a sort list item object memory 210 */ 211 void 212 sort_list_item_clean(struct sort_list_item *si) 213 { 214 215 if (si) { 216 clean_keys_array(si->str, &(si->ka)); 217 if (si->str) { 218 bwsfree(si->str); 219 si->str = NULL; 220 } 221 } 222 } 223 224 /* 225 * Skip columns according to specs 226 */ 227 static size_t 228 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 229 bool skip_blanks, bool *empty_key) 230 { 231 if (cols < 1) 232 return (BWSLEN(s) + 1); 233 234 if (skip_blanks) 235 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 236 ++start; 237 238 while (start < BWSLEN(s) && cols > 1) { 239 --cols; 240 ++start; 241 } 242 243 if (start >= BWSLEN(s)) 244 *empty_key = true; 245 246 return (start); 247 } 248 249 /* 250 * Skip fields according to specs 251 */ 252 static size_t 253 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 254 { 255 256 if (fields < 2) { 257 if (BWSLEN(s) == 0) 258 *empty_field = true; 259 return (0); 260 } else if (!(sort_opts_vals.tflag)) { 261 size_t cpos = 0; 262 bool pb = true; 263 264 while (cpos < BWSLEN(s)) { 265 bool isblank; 266 267 isblank = iswblank(BWS_GET(s, cpos)); 268 269 if (isblank && !pb) { 270 --fields; 271 if (fields <= 1) 272 return (cpos); 273 } 274 pb = isblank; 275 ++cpos; 276 } 277 if (fields > 1) 278 *empty_field = true; 279 return (cpos); 280 } else { 281 size_t cpos = 0; 282 283 while (cpos < BWSLEN(s)) { 284 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 285 --fields; 286 if (fields <= 1) 287 return (cpos + 1); 288 } 289 ++cpos; 290 } 291 if (fields > 1) 292 *empty_field = true; 293 return (cpos); 294 } 295 } 296 297 /* 298 * Find fields start 299 */ 300 static void 301 find_field_start(const struct bwstring *s, struct key_specs *ks, 302 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 303 { 304 305 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 306 if (!*empty_field) 307 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 308 ks->pos1b, empty_key); 309 else 310 *empty_key = true; 311 } 312 313 /* 314 * Find end key position 315 */ 316 static size_t 317 find_field_end(const struct bwstring *s, struct key_specs *ks) 318 { 319 size_t f2, next_field_start, pos_end; 320 bool empty_field, empty_key; 321 322 pos_end = 0; 323 next_field_start = 0; 324 empty_field = false; 325 empty_key = false; 326 f2 = ks->f2; 327 328 if (f2 == 0) 329 return (BWSLEN(s) + 1); 330 else { 331 if (ks->c2 == 0) { 332 next_field_start = skip_fields_to_start(s, f2 + 1, 333 &empty_field); 334 if ((next_field_start > 0) && sort_opts_vals.tflag && 335 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 336 next_field_start - 1))) 337 --next_field_start; 338 } else 339 next_field_start = skip_fields_to_start(s, f2, 340 &empty_field); 341 } 342 343 if (empty_field || (next_field_start >= BWSLEN(s))) 344 return (BWSLEN(s) + 1); 345 346 if (ks->c2) { 347 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 348 ks->pos2b, &empty_key); 349 if (pos_end < BWSLEN(s)) 350 ++pos_end; 351 } else 352 pos_end = next_field_start; 353 354 return (pos_end); 355 } 356 357 /* 358 * Cut a field according to the key specs 359 */ 360 static struct bwstring * 361 cut_field(const struct bwstring *s, struct key_specs *ks) 362 { 363 struct bwstring *ret = NULL; 364 365 if (s && ks) { 366 size_t field_start, key_end, key_start, sz; 367 bool empty_field, empty_key; 368 369 field_start = 0; 370 key_start = 0; 371 empty_field = false; 372 empty_key = false; 373 374 find_field_start(s, ks, &field_start, &key_start, 375 &empty_field, &empty_key); 376 377 if (empty_key) 378 sz = 0; 379 else { 380 key_end = find_field_end(s, ks); 381 sz = (key_end < key_start) ? 0 : (key_end - key_start); 382 } 383 384 ret = bwsalloc(sz); 385 if (sz) 386 bwsnocpy(ret, s, key_start, sz); 387 } else 388 ret = bwsalloc(0); 389 390 return (ret); 391 } 392 393 /* 394 * Preprocesses a line applying the necessary transformations 395 * specified by command line options and returns the preprocessed 396 * string, which can be used to compare. 397 */ 398 int 399 preproc(struct bwstring *s, struct keys_array *ka) 400 { 401 402 if (sort_opts_vals.kflag) 403 for (size_t i = 0; i < keys_num; i++) { 404 struct bwstring *key; 405 struct key_specs *kspecs; 406 struct sort_mods *sm; 407 408 kspecs = &(keys[i]); 409 key = cut_field(s, kspecs); 410 411 sm = &(kspecs->sm); 412 if (sm->dflag) 413 key = dictionary_order(key); 414 else if (sm->iflag) 415 key = ignore_nonprinting(key); 416 if (sm->fflag || sm->Mflag) 417 key = ignore_case(key); 418 419 set_key_on_keys_array(ka, key, i); 420 } 421 else { 422 struct bwstring *ret = NULL; 423 struct sort_mods *sm = default_sort_mods; 424 425 if (sm->bflag) { 426 if (ret == NULL) 427 ret = bwsdup(s); 428 ret = ignore_leading_blanks(ret); 429 } 430 if (sm->dflag) { 431 if (ret == NULL) 432 ret = bwsdup(s); 433 ret = dictionary_order(ret); 434 } else if (sm->iflag) { 435 if (ret == NULL) 436 ret = bwsdup(s); 437 ret = ignore_nonprinting(ret); 438 } 439 if (sm->fflag || sm->Mflag) { 440 if (ret == NULL) 441 ret = bwsdup(s); 442 ret = ignore_case(ret); 443 } 444 if (ret == NULL) 445 set_key_on_keys_array(ka, s, 0); 446 else 447 set_key_on_keys_array(ka, ret, 0); 448 } 449 450 return 0; 451 } 452 453 cmpcoll_t 454 get_sort_func(struct sort_mods *sm) 455 { 456 457 if (sm->nflag) 458 return (numcoll); 459 else if (sm->hflag) 460 return (hnumcoll); 461 else if (sm->gflag) 462 return (gnumcoll); 463 else if (sm->Mflag) 464 return (monthcoll); 465 #if defined(SORT_RANDOM) 466 else if (sm->Rflag) 467 return (randomcoll); 468 #endif 469 else if (sm->Vflag) 470 return (versioncoll); 471 else 472 return (wstrcoll); 473 } 474 475 /* 476 * Compares the given strings. Returns a positive number if 477 * the first precedes the second, a negative number if the second is 478 * the preceding one, and zero if they are equal. This function calls 479 * the underlying collate functions, which done the actual comparison. 480 */ 481 int 482 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 483 { 484 struct sort_mods *sm; 485 int res = 0; 486 487 for (size_t i = 0; i < keys_num; ++i) { 488 sm = &(keys[i].sm); 489 490 if (sm->rflag) 491 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 492 else 493 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 494 495 if (res) 496 break; 497 498 /* offset applies to only the first key */ 499 offset = 0; 500 } 501 return (res); 502 } 503 504 /* 505 * Compare two strings. 506 * Plain symbol-by-symbol comparison. 507 */ 508 int 509 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 510 { 511 512 if (default_sort_mods->rflag) { 513 const struct bwstring *tmp; 514 515 tmp = s1; 516 s1 = s2; 517 s2 = tmp; 518 } 519 520 return (bwscoll(s1, s2, 0)); 521 } 522 523 /* 524 * Compare a string and a sort list item, according to the sort specs. 525 */ 526 int 527 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 528 { 529 struct keys_array *ka1; 530 int ret = 0; 531 532 ka1 = keys_array_alloc(); 533 534 preproc(str1, ka1); 535 536 sort_list_item_make_key(*ss2); 537 538 if (debug_sort) { 539 bwsprintf(stdout, str1, "; s1=<", ">"); 540 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 541 } 542 543 ret = key_coll(ka1, &((*ss2)->ka), 0); 544 545 if (debug_sort) 546 printf("; cmp1=%d", ret); 547 548 clean_keys_array(str1, ka1); 549 sort_free(ka1); 550 551 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 552 ret = top_level_str_coll(str1, ((*ss2)->str)); 553 if (debug_sort) 554 printf("; cmp2=%d", ret); 555 } 556 557 if (debug_sort) 558 printf("\n"); 559 560 return (ret); 561 } 562 563 /* 564 * Compare two sort list items, according to the sort specs. 565 */ 566 int 567 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 568 size_t offset) 569 { 570 int ret; 571 572 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 573 574 if (debug_sort) { 575 if (offset) 576 printf("; offset=%d", (int) offset); 577 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 578 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 579 printf("; cmp1=%d\n", ret); 580 } 581 582 if (ret) 583 return (ret); 584 585 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 586 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 587 if (debug_sort) 588 printf("; cmp2=%d\n", ret); 589 } 590 591 return (ret); 592 } 593 594 /* 595 * Compare two sort list items, according to the sort specs. 596 */ 597 int 598 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 599 { 600 601 return (list_coll_offset(ss1, ss2, 0)); 602 } 603 604 #define LSCDEF(N) \ 605 static int \ 606 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 607 { \ 608 \ 609 return (list_coll_offset(ss1, ss2, N)); \ 610 } 611 612 LSCDEF(1) 613 LSCDEF(2) 614 LSCDEF(3) 615 LSCDEF(4) 616 LSCDEF(5) 617 LSCDEF(6) 618 LSCDEF(7) 619 LSCDEF(8) 620 LSCDEF(9) 621 LSCDEF(10) 622 LSCDEF(11) 623 LSCDEF(12) 624 LSCDEF(13) 625 LSCDEF(14) 626 LSCDEF(15) 627 LSCDEF(16) 628 LSCDEF(17) 629 LSCDEF(18) 630 LSCDEF(19) 631 LSCDEF(20) 632 633 listcoll_t 634 get_list_call_func(size_t offset) 635 { 636 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 637 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 638 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 639 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 640 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 641 list_coll_18, list_coll_19, list_coll_20 }; 642 643 if (offset <= 20) 644 return (lsarray[offset]); 645 646 return (list_coll); 647 } 648 649 /* 650 * Compare two sort list items, only by their original string. 651 */ 652 int 653 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 654 { 655 656 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 657 } 658 659 /* 660 * Maximum size of a number in the string (before or after decimal point) 661 */ 662 #define MAX_NUM_SIZE (128) 663 664 /* 665 * Set suffix value 666 */ 667 static void setsuffix(wchar_t c, unsigned char *si) 668 { 669 switch (c){ 670 case L'k': 671 case L'K': 672 *si = 1; 673 break; 674 case L'M': 675 *si = 2; 676 break; 677 case L'G': 678 *si = 3; 679 break; 680 case L'T': 681 *si = 4; 682 break; 683 case L'P': 684 *si = 5; 685 break; 686 case L'E': 687 *si = 6; 688 break; 689 case L'Z': 690 *si = 7; 691 break; 692 case L'Y': 693 *si = 8; 694 break; 695 default: 696 *si = 0; 697 }; 698 } 699 700 /* 701 * Read string s and parse the string into a fixed-decimal-point number. 702 * sign equals -1 if the number is negative (explicit plus is not allowed, 703 * according to GNU sort's "info sort". 704 * The number part before decimal point is in the smain, after the decimal 705 * point is in sfrac, tail is the pointer to the remainder of the string. 706 */ 707 static int 708 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 709 { 710 bwstring_iterator s; 711 712 s = bws_begin(s0); 713 714 /* always end the fraction with zero, even if we have no fraction */ 715 sfrac[0] = 0; 716 717 while (iswblank(bws_get_iter_value(s))) 718 s = bws_iterator_inc(s, 1); 719 720 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 721 *sign = -1; 722 s = bws_iterator_inc(s, 1); 723 } 724 725 // This is '0', not '\0', do not change this 726 while (iswdigit(bws_get_iter_value(s)) && 727 (bws_get_iter_value(s) == L'0')) 728 s = bws_iterator_inc(s, 1); 729 730 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 731 if (iswdigit(bws_get_iter_value(s))) { 732 smain[*main_len] = bws_get_iter_value(s); 733 s = bws_iterator_inc(s, 1); 734 *main_len += 1; 735 } else if (symbol_thousands_sep && 736 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 737 s = bws_iterator_inc(s, 1); 738 else 739 break; 740 } 741 742 smain[*main_len] = 0; 743 744 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 745 s = bws_iterator_inc(s, 1); 746 while (iswdigit(bws_get_iter_value(s)) && 747 *frac_len < MAX_NUM_SIZE) { 748 sfrac[*frac_len] = bws_get_iter_value(s); 749 s = bws_iterator_inc(s, 1); 750 *frac_len += 1; 751 } 752 sfrac[*frac_len] = 0; 753 754 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 755 --(*frac_len); 756 sfrac[*frac_len] = L'\0'; 757 } 758 } 759 760 setsuffix(bws_get_iter_value(s),si); 761 762 if ((*main_len + *frac_len) == 0) 763 *sign = 0; 764 765 return (0); 766 } 767 768 /* 769 * Implements string sort. 770 */ 771 static int 772 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 773 { 774 775 if (debug_sort) { 776 if (offset) 777 printf("; offset=%d\n", (int) offset); 778 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 779 printf("(%zu)", BWSLEN(kv1->k)); 780 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 781 printf("(%zu)", BWSLEN(kv2->k)); 782 } 783 784 return (bwscoll(kv1->k, kv2->k, offset)); 785 } 786 787 /* 788 * Compare two suffixes 789 */ 790 static inline int 791 cmpsuffix(unsigned char si1, unsigned char si2) 792 { 793 794 return ((char)si1 - (char)si2); 795 } 796 797 /* 798 * Implements numeric sort for -n and -h. 799 */ 800 static int 801 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 802 size_t offset __unused, bool use_suffix) 803 { 804 struct bwstring *s1, *s2; 805 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 806 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 807 int cmp_res, sign1, sign2; 808 size_t frac1, frac2, main1, main2; 809 unsigned char SI1, SI2; 810 bool e1, e2, key1_read, key2_read; 811 812 s1 = kv1->k; 813 s2 = kv2->k; 814 sign1 = sign2 = 0; 815 main1 = main2 = 0; 816 frac1 = frac2 = 0; 817 818 cmp_res = 0; 819 key1_read = key2_read = false; 820 821 if (debug_sort) { 822 bwsprintf(stdout, s1, "; k1=<", ">"); 823 bwsprintf(stdout, s2, ", k2=<", ">"); 824 } 825 826 if (s1 == s2) 827 return (0); 828 829 if (kv1->hint->status == HS_UNINITIALIZED) { 830 /* read the number from the string */ 831 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 832 key1_read = true; 833 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 834 if(main1 < 1 && frac1 < 1) 835 kv1->hint->v.nh.empty=true; 836 kv1->hint->v.nh.si = SI1; 837 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 838 HS_INITIALIZED : HS_ERROR; 839 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 840 } 841 842 if (kv2->hint->status == HS_UNINITIALIZED) { 843 /* read the number from the string */ 844 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 845 key2_read = true; 846 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 847 if(main2 < 1 && frac2 < 1) 848 kv2->hint->v.nh.empty=true; 849 kv2->hint->v.nh.si = SI2; 850 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 851 HS_INITIALIZED : HS_ERROR; 852 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 853 } 854 855 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 856 HS_INITIALIZED) { 857 unsigned long long n1, n2; 858 bool neg1, neg2; 859 860 e1 = kv1->hint->v.nh.empty; 861 e2 = kv2->hint->v.nh.empty; 862 863 if (e1 && e2) 864 return (0); 865 866 neg1 = kv1->hint->v.nh.neg; 867 neg2 = kv2->hint->v.nh.neg; 868 869 if (neg1 && !neg2) 870 return (-1); 871 if (neg2 && !neg1) 872 return (+1); 873 874 if (e1) 875 return (neg2 ? +1 : -1); 876 else if (e2) 877 return (neg1 ? -1 : +1); 878 879 880 if (use_suffix) { 881 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 882 if (cmp_res) 883 return (neg1 ? -cmp_res : cmp_res); 884 } 885 886 n1 = kv1->hint->v.nh.n1; 887 n2 = kv2->hint->v.nh.n1; 888 if (n1 < n2) 889 return (neg1 ? +1 : -1); 890 else if (n1 > n2) 891 return (neg1 ? -1 : +1); 892 } 893 894 /* read the numbers from the strings */ 895 if (!key1_read) 896 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 897 if (!key2_read) 898 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 899 900 e1 = ((main1 + frac1) == 0); 901 e2 = ((main2 + frac2) == 0); 902 903 if (e1 && e2) 904 return (0); 905 906 /* we know the result if the signs are different */ 907 if (sign1 < 0 && sign2 >= 0) 908 return (-1); 909 if (sign1 >= 0 && sign2 < 0) 910 return (+1); 911 912 if (e1) 913 return ((sign2 < 0) ? +1 : -1); 914 else if (e2) 915 return ((sign1 < 0) ? -1 : +1); 916 917 if (use_suffix) { 918 cmp_res = cmpsuffix(SI1, SI2); 919 if (cmp_res) 920 return ((sign1 < 0) ? -cmp_res : cmp_res); 921 } 922 923 /* if both numbers are empty assume that the strings are equal */ 924 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 925 return (0); 926 927 /* 928 * if the main part is of different size, we know the result 929 * (because the leading zeros are removed) 930 */ 931 if (main1 < main2) 932 cmp_res = -1; 933 else if (main1 > main2) 934 cmp_res = +1; 935 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 936 else 937 cmp_res = wcscmp(smain1, smain2); 938 939 /* check fraction */ 940 if (!cmp_res) 941 cmp_res = wcscmp(sfrac1, sfrac2); 942 943 if (!cmp_res) 944 return (0); 945 946 /* reverse result if the signs are negative */ 947 if (sign1 < 0 && sign2 < 0) 948 cmp_res = -cmp_res; 949 950 return (cmp_res); 951 } 952 953 /* 954 * Implements numeric sort (-n). 955 */ 956 static int 957 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 958 { 959 960 return (numcoll_impl(kv1, kv2, offset, false)); 961 } 962 963 /* 964 * Implements 'human' numeric sort (-h). 965 */ 966 static int 967 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 968 { 969 970 return (numcoll_impl(kv1, kv2, offset, true)); 971 } 972 973 /* 974 * Implements random sort (-R). 975 */ 976 #if defined(SORT_RANDOM) 977 static int 978 randomcoll(struct key_value *kv1, struct key_value *kv2, 979 size_t offset __unused) 980 { 981 struct bwstring *s1, *s2; 982 MD5_CTX ctx1, ctx2; 983 char *b1, *b2; 984 985 s1 = kv1->k; 986 s2 = kv2->k; 987 988 if (debug_sort) { 989 bwsprintf(stdout, s1, "; k1=<", ">"); 990 bwsprintf(stdout, s2, ", k2=<", ">"); 991 } 992 993 if (s1 == s2) 994 return (0); 995 996 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 997 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 998 999 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1000 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1001 b1 = MD5End(&ctx1, NULL); 1002 b2 = MD5End(&ctx2, NULL); 1003 if (b1 == NULL) { 1004 if (b2 == NULL) 1005 return (0); 1006 else { 1007 sort_free(b2); 1008 return (-1); 1009 } 1010 } else if (b2 == NULL) { 1011 sort_free(b1); 1012 return (+1); 1013 } else { 1014 int cmp_res; 1015 1016 cmp_res = strcmp(b1,b2); 1017 sort_free(b1); 1018 sort_free(b2); 1019 1020 if (!cmp_res) 1021 cmp_res = bwscoll(s1, s2, 0); 1022 1023 return (cmp_res); 1024 } 1025 } 1026 #endif 1027 1028 /* 1029 * Implements version sort (-V). 1030 */ 1031 static int 1032 versioncoll(struct key_value *kv1, struct key_value *kv2, 1033 size_t offset __unused) 1034 { 1035 struct bwstring *s1, *s2; 1036 1037 s1 = kv1->k; 1038 s2 = kv2->k; 1039 1040 if (debug_sort) { 1041 bwsprintf(stdout, s1, "; k1=<", ">"); 1042 bwsprintf(stdout, s2, ", k2=<", ">"); 1043 } 1044 1045 if (s1 == s2) 1046 return (0); 1047 1048 return (vcmp(s1, s2)); 1049 } 1050 1051 /* 1052 * Check for minus infinity 1053 */ 1054 static inline bool 1055 huge_minus(double d, int err1) 1056 { 1057 1058 if (err1 == ERANGE) 1059 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1060 return (+1); 1061 1062 return (0); 1063 } 1064 1065 /* 1066 * Check for plus infinity 1067 */ 1068 static inline bool 1069 huge_plus(double d, int err1) 1070 { 1071 1072 if (err1 == ERANGE) 1073 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1074 return (+1); 1075 1076 return (0); 1077 } 1078 1079 /* 1080 * Check whether a function is a NAN 1081 */ 1082 static bool 1083 is_nan(double d) 1084 { 1085 return ((d == NAN) || (isnan(d))); 1086 } 1087 1088 /* 1089 * Compare two NANs 1090 */ 1091 static int 1092 cmp_nans(double d1, double d2) 1093 { 1094 1095 if (d1 < d2) 1096 return (-1); 1097 if (d1 > d2) 1098 return (+1); 1099 return (0); 1100 } 1101 1102 /* 1103 * Implements general numeric sort (-g). 1104 */ 1105 static int 1106 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1107 size_t offset __unused) 1108 { 1109 double d1, d2; 1110 int err1, err2; 1111 bool empty1, empty2, key1_read, key2_read; 1112 1113 d1 = d2 = 0; 1114 err1 = err2 = 0; 1115 key1_read = key2_read = false; 1116 1117 if (debug_sort) { 1118 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1119 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1120 } 1121 1122 if (kv1->hint->status == HS_UNINITIALIZED) { 1123 errno = 0; 1124 d1 = bwstod(kv1->k, &empty1); 1125 err1 = errno; 1126 1127 if (empty1) 1128 kv1->hint->v.gh.notnum = true; 1129 else if (err1 == 0) { 1130 kv1->hint->v.gh.d = d1; 1131 kv1->hint->v.gh.nan = is_nan(d1); 1132 kv1->hint->status = HS_INITIALIZED; 1133 } else 1134 kv1->hint->status = HS_ERROR; 1135 1136 key1_read = true; 1137 } 1138 1139 if (kv2->hint->status == HS_UNINITIALIZED) { 1140 errno = 0; 1141 d2 = bwstod(kv2->k, &empty2); 1142 err2 = errno; 1143 1144 if (empty2) 1145 kv2->hint->v.gh.notnum = true; 1146 else if (err2 == 0) { 1147 kv2->hint->v.gh.d = d2; 1148 kv2->hint->v.gh.nan = is_nan(d2); 1149 kv2->hint->status = HS_INITIALIZED; 1150 } else 1151 kv2->hint->status = HS_ERROR; 1152 1153 key2_read = true; 1154 } 1155 1156 if (kv1->hint->status == HS_INITIALIZED && 1157 kv2->hint->status == HS_INITIALIZED) { 1158 if (kv1->hint->v.gh.notnum) 1159 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1160 else if (kv2->hint->v.gh.notnum) 1161 return (+1); 1162 1163 if (kv1->hint->v.gh.nan) 1164 return ((kv2->hint->v.gh.nan) ? 1165 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1166 -1); 1167 else if (kv2->hint->v.gh.nan) 1168 return (+1); 1169 1170 d1 = kv1->hint->v.gh.d; 1171 d2 = kv2->hint->v.gh.d; 1172 1173 if (d1 < d2) 1174 return (-1); 1175 else if (d1 > d2) 1176 return (+1); 1177 else 1178 return (0); 1179 } 1180 1181 if (!key1_read) { 1182 errno = 0; 1183 d1 = bwstod(kv1->k, &empty1); 1184 err1 = errno; 1185 } 1186 1187 if (!key2_read) { 1188 errno = 0; 1189 d2 = bwstod(kv2->k, &empty2); 1190 err2 = errno; 1191 } 1192 1193 /* Non-value case: */ 1194 if (empty1) 1195 return (empty2 ? 0 : -1); 1196 else if (empty2) 1197 return (+1); 1198 1199 /* NAN case */ 1200 if (is_nan(d1)) 1201 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1202 else if (is_nan(d2)) 1203 return (+1); 1204 1205 /* Infinities */ 1206 if (err1 == ERANGE || err2 == ERANGE) { 1207 /* Minus infinity case */ 1208 if (huge_minus(d1, err1)) { 1209 if (huge_minus(d2, err2)) { 1210 if (d1 < d2) 1211 return (-1); 1212 if (d1 > d2) 1213 return (+1); 1214 return (0); 1215 } else 1216 return (-1); 1217 1218 } else if (huge_minus(d2, err2)) { 1219 if (huge_minus(d1, err1)) { 1220 if (d1 < d2) 1221 return (-1); 1222 if (d1 > d2) 1223 return (+1); 1224 return (0); 1225 } else 1226 return (+1); 1227 } 1228 1229 /* Plus infinity case */ 1230 if (huge_plus(d1, err1)) { 1231 if (huge_plus(d2, err2)) { 1232 if (d1 < d2) 1233 return (-1); 1234 if (d1 > d2) 1235 return (+1); 1236 return (0); 1237 } else 1238 return (+1); 1239 } else if (huge_plus(d2, err2)) { 1240 if (huge_plus(d1, err1)) { 1241 if (d1 < d2) 1242 return (-1); 1243 if (d1 > d2) 1244 return (+1); 1245 return (0); 1246 } else 1247 return (-1); 1248 } 1249 } 1250 1251 if (d1 < d2) 1252 return (-1); 1253 if (d1 > d2) 1254 return (+1); 1255 1256 return (0); 1257 } 1258 1259 /* 1260 * Implements month sort (-M). 1261 */ 1262 static int 1263 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1264 { 1265 int val1, val2; 1266 bool key1_read, key2_read; 1267 1268 val1 = val2 = 0; 1269 key1_read = key2_read = false; 1270 1271 if (debug_sort) { 1272 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1273 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1274 } 1275 1276 if (kv1->hint->status == HS_UNINITIALIZED) { 1277 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1278 key1_read = true; 1279 kv1->hint->status = HS_INITIALIZED; 1280 } 1281 1282 if (kv2->hint->status == HS_UNINITIALIZED) { 1283 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1284 key2_read = true; 1285 kv2->hint->status = HS_INITIALIZED; 1286 } 1287 1288 if (kv1->hint->status == HS_INITIALIZED) { 1289 val1 = kv1->hint->v.Mh.m; 1290 key1_read = true; 1291 } 1292 1293 if (kv2->hint->status == HS_INITIALIZED) { 1294 val2 = kv2->hint->v.Mh.m; 1295 key2_read = true; 1296 } 1297 1298 if (!key1_read) 1299 val1 = bws_month_score(kv1->k); 1300 if (!key2_read) 1301 val2 = bws_month_score(kv2->k); 1302 1303 if (val1 == val2) { 1304 return (0); 1305 } 1306 if (val1 < val2) 1307 return (-1); 1308 return (+1); 1309 } 1310