1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: head/usr.bin/sort/coll.c 281132 2015-04-06 02:35:55Z pfg $ 28 */ 29 30 31 #include <sys/types.h> 32 33 #include <errno.h> 34 #include <err.h> 35 #include <langinfo.h> 36 #include <limits.h> 37 #include <math.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <wctype.h> 42 #if defined(SORT_RANDOM) 43 #include <openssl/md5.h> 44 #endif 45 46 #include "coll.h" 47 #include "vsort.h" 48 49 struct key_specs *keys; 50 size_t keys_num = 0; 51 52 wint_t symbol_decimal_point = L'.'; 53 /* there is no default thousands separator in collate rules: */ 54 wint_t symbol_thousands_sep = 0; 55 wint_t symbol_negative_sign = L'-'; 56 wint_t symbol_positive_sign = L'+'; 57 58 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 59 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 60 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 61 static int numcoll(struct key_value*, struct key_value *, size_t offset); 62 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 63 #if defined(SORT_RANDOM) 64 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 65 #endif 66 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 67 68 /* 69 * Allocate keys array 70 */ 71 struct keys_array * 72 keys_array_alloc(void) 73 { 74 struct keys_array *ka; 75 size_t sz; 76 77 sz = keys_array_size(); 78 ka = sort_malloc(sz); 79 memset(ka, 0, sz); 80 81 return (ka); 82 } 83 84 /* 85 * Calculate whether we need key hint space 86 */ 87 static size_t 88 key_hint_size(void) 89 { 90 91 return (need_hint ? sizeof(struct key_hint) : 0); 92 } 93 94 /* 95 * Calculate keys array size 96 */ 97 size_t 98 keys_array_size(void) 99 { 100 101 return (keys_num * (sizeof(struct key_value) + key_hint_size())); 102 } 103 104 /* 105 * Clean data of keys array 106 */ 107 void 108 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 109 { 110 111 if (ka) { 112 for (size_t i = 0; i < keys_num; ++i) 113 if (ka->key[i].k && ka->key[i].k != s) 114 bwsfree(ka->key[i].k); 115 memset(ka, 0, keys_array_size()); 116 } 117 } 118 119 /* 120 * Set value of a key in the keys set 121 */ 122 void 123 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 124 { 125 126 if (ka && keys_num > ind) { 127 struct key_value *kv; 128 129 kv = &(ka->key[ind]); 130 131 if (kv->k && kv->k != s) 132 bwsfree(kv->k); 133 kv->k = s; 134 } 135 } 136 137 /* 138 * Initialize a sort list item 139 */ 140 struct sort_list_item * 141 sort_list_item_alloc(void) 142 { 143 struct sort_list_item *si; 144 size_t sz; 145 146 sz = sizeof(struct sort_list_item) + keys_array_size(); 147 si = sort_malloc(sz); 148 memset(si, 0, sz); 149 150 return (si); 151 } 152 153 size_t 154 sort_list_item_size(struct sort_list_item *si) 155 { 156 size_t ret = 0; 157 158 if (si) { 159 ret = sizeof(struct sort_list_item) + keys_array_size(); 160 if (si->str) 161 ret += bws_memsize(si->str); 162 for (size_t i = 0; i < keys_num; ++i) { 163 struct key_value *kv; 164 165 kv = &(si->ka.key[i]); 166 167 if (kv->k != si->str) 168 ret += bws_memsize(kv->k); 169 } 170 } 171 return (ret); 172 } 173 174 /* 175 * Calculate key for a sort list item 176 */ 177 static void 178 sort_list_item_make_key(struct sort_list_item *si) 179 { 180 181 preproc(si->str, &(si->ka)); 182 } 183 184 /* 185 * Set value of a sort list item. 186 * Return combined string and keys memory size. 187 */ 188 void 189 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 190 { 191 192 if (si) { 193 clean_keys_array(si->str, &(si->ka)); 194 if (si->str) { 195 if (si->str == str) { 196 /* we are trying to reset the same string */ 197 return; 198 } else { 199 bwsfree(si->str); 200 si->str = NULL; 201 } 202 } 203 si->str = str; 204 sort_list_item_make_key(si); 205 } 206 } 207 208 /* 209 * De-allocate a sort list item object memory 210 */ 211 void 212 sort_list_item_clean(struct sort_list_item *si) 213 { 214 215 if (si) { 216 clean_keys_array(si->str, &(si->ka)); 217 if (si->str) { 218 bwsfree(si->str); 219 si->str = NULL; 220 } 221 } 222 } 223 224 /* 225 * Skip columns according to specs 226 */ 227 static size_t 228 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 229 bool skip_blanks, bool *empty_key) 230 { 231 if (cols < 1) 232 return (BWSLEN(s) + 1); 233 234 if (skip_blanks) 235 while (start < BWSLEN(s) && iswblank(BWS_GET(s,start))) 236 ++start; 237 238 while (start < BWSLEN(s) && cols > 1) { 239 --cols; 240 ++start; 241 } 242 243 if (start >= BWSLEN(s)) 244 *empty_key = true; 245 246 return (start); 247 } 248 249 /* 250 * Skip fields according to specs 251 */ 252 static size_t 253 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 254 { 255 256 if (fields < 2) { 257 if (BWSLEN(s) == 0) 258 *empty_field = true; 259 return (0); 260 } else if (!(sort_opts_vals.tflag)) { 261 size_t cpos = 0; 262 bool pb = true; 263 264 while (cpos < BWSLEN(s)) { 265 bool isblank; 266 267 isblank = iswblank(BWS_GET(s, cpos)); 268 269 if (isblank && !pb) { 270 --fields; 271 if (fields <= 1) 272 return (cpos); 273 } 274 pb = isblank; 275 ++cpos; 276 } 277 if (fields > 1) 278 *empty_field = true; 279 return (cpos); 280 } else { 281 size_t cpos = 0; 282 283 while (cpos < BWSLEN(s)) { 284 if (BWS_GET(s,cpos) == (wchar_t)sort_opts_vals.field_sep) { 285 --fields; 286 if (fields <= 1) 287 return (cpos + 1); 288 } 289 ++cpos; 290 } 291 if (fields > 1) 292 *empty_field = true; 293 return (cpos); 294 } 295 } 296 297 /* 298 * Find fields start 299 */ 300 static void 301 find_field_start(const struct bwstring *s, struct key_specs *ks, 302 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 303 { 304 305 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 306 if (!*empty_field) 307 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 308 ks->pos1b, empty_key); 309 else 310 *empty_key = true; 311 } 312 313 /* 314 * Find end key position 315 */ 316 static size_t 317 find_field_end(const struct bwstring *s, struct key_specs *ks) 318 { 319 size_t f2, next_field_start, pos_end; 320 bool empty_field, empty_key; 321 322 pos_end = 0; 323 next_field_start = 0; 324 empty_field = false; 325 empty_key = false; 326 f2 = ks->f2; 327 328 if (f2 == 0) 329 return (BWSLEN(s) + 1); 330 else { 331 if (ks->c2 == 0) { 332 next_field_start = skip_fields_to_start(s, f2 + 1, 333 &empty_field); 334 if ((next_field_start > 0) && sort_opts_vals.tflag && 335 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 336 next_field_start - 1))) 337 --next_field_start; 338 } else 339 next_field_start = skip_fields_to_start(s, f2, 340 &empty_field); 341 } 342 343 if (empty_field || (next_field_start >= BWSLEN(s))) 344 return (BWSLEN(s) + 1); 345 346 if (ks->c2) { 347 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 348 ks->pos2b, &empty_key); 349 if (pos_end < BWSLEN(s)) 350 ++pos_end; 351 } else 352 pos_end = next_field_start; 353 354 return (pos_end); 355 } 356 357 /* 358 * Cut a field according to the key specs 359 */ 360 static struct bwstring * 361 cut_field(const struct bwstring *s, struct key_specs *ks) 362 { 363 struct bwstring *ret = NULL; 364 365 if (s && ks) { 366 size_t field_start, key_end, key_start, sz; 367 bool empty_field, empty_key; 368 369 field_start = 0; 370 key_start = 0; 371 empty_field = false; 372 empty_key = false; 373 374 find_field_start(s, ks, &field_start, &key_start, 375 &empty_field, &empty_key); 376 377 if (empty_key) 378 sz = 0; 379 else { 380 key_end = find_field_end(s, ks); 381 sz = (key_end < key_start) ? 0 : (key_end - key_start); 382 } 383 384 ret = bwsalloc(sz); 385 if (sz) 386 bwsnocpy(ret, s, key_start, sz); 387 } else 388 ret = bwsalloc(0); 389 390 return (ret); 391 } 392 393 /* 394 * Preprocesses a line applying the necessary transformations 395 * specified by command line options and returns the preprocessed 396 * string, which can be used to compare. 397 */ 398 int 399 preproc(struct bwstring *s, struct keys_array *ka) 400 { 401 402 if (sort_opts_vals.kflag) 403 for (size_t i = 0; i < keys_num; i++) { 404 struct bwstring *key; 405 struct key_specs *kspecs; 406 struct sort_mods *sm; 407 408 kspecs = &(keys[i]); 409 key = cut_field(s, kspecs); 410 411 sm = &(kspecs->sm); 412 if (sm->dflag) 413 key = dictionary_order(key); 414 else if (sm->iflag) 415 key = ignore_nonprinting(key); 416 if (sm->fflag || sm->Mflag) 417 key = ignore_case(key); 418 419 set_key_on_keys_array(ka, key, i); 420 } 421 else { 422 struct bwstring *ret = NULL; 423 struct sort_mods *sm = default_sort_mods; 424 425 if (sm->bflag) { 426 if (ret == NULL) 427 ret = bwsdup(s); 428 ret = ignore_leading_blanks(ret); 429 } 430 if (sm->dflag) { 431 if (ret == NULL) 432 ret = bwsdup(s); 433 ret = dictionary_order(ret); 434 } else if (sm->iflag) { 435 if (ret == NULL) 436 ret = bwsdup(s); 437 ret = ignore_nonprinting(ret); 438 } 439 if (sm->fflag || sm->Mflag) { 440 if (ret == NULL) 441 ret = bwsdup(s); 442 ret = ignore_case(ret); 443 } 444 if (ret == NULL) 445 set_key_on_keys_array(ka, s, 0); 446 else 447 set_key_on_keys_array(ka, ret, 0); 448 } 449 450 return 0; 451 } 452 453 cmpcoll_t 454 get_sort_func(struct sort_mods *sm) 455 { 456 457 if (sm->nflag) 458 return (numcoll); 459 else if (sm->hflag) 460 return (hnumcoll); 461 else if (sm->gflag) 462 return (gnumcoll); 463 else if (sm->Mflag) 464 return (monthcoll); 465 #if defined(SORT_RANDOM) 466 else if (sm->Rflag) 467 return (randomcoll); 468 #endif 469 else if (sm->Vflag) 470 return (versioncoll); 471 else 472 return (wstrcoll); 473 } 474 475 /* 476 * Compares the given strings. Returns a positive number if 477 * the first precedes the second, a negative number if the second is 478 * the preceding one, and zero if they are equal. This function calls 479 * the underlying collate functions, which done the actual comparison. 480 */ 481 int 482 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 483 { 484 struct sort_mods *sm; 485 int res = 0; 486 487 for (size_t i = 0; i < keys_num; ++i) { 488 sm = &(keys[i].sm); 489 490 if (sm->rflag) 491 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 492 else 493 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 494 495 if (res) 496 break; 497 498 /* offset applies to only the first key */ 499 offset = 0; 500 } 501 return (res); 502 } 503 504 /* 505 * Compare two strings. 506 * Plain symbol-by-symbol comparison. 507 */ 508 int 509 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 510 { 511 512 if (default_sort_mods->rflag) { 513 const struct bwstring *tmp; 514 515 tmp = s1; 516 s1 = s2; 517 s2 = tmp; 518 } 519 520 return (bwscoll(s1, s2, 0)); 521 } 522 523 /* 524 * Compare a string and a sort list item, according to the sort specs. 525 */ 526 int 527 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 528 { 529 struct keys_array *ka1; 530 int ret = 0; 531 532 ka1 = keys_array_alloc(); 533 534 preproc(str1, ka1); 535 536 sort_list_item_make_key(*ss2); 537 538 if (debug_sort) { 539 bwsprintf(stdout, str1, "; s1=<", ">"); 540 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 541 } 542 543 ret = key_coll(ka1, &((*ss2)->ka), 0); 544 545 if (debug_sort) 546 printf("; cmp1=%d", ret); 547 548 clean_keys_array(str1, ka1); 549 sort_free(ka1); 550 551 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 552 ret = top_level_str_coll(str1, ((*ss2)->str)); 553 if (debug_sort) 554 printf("; cmp2=%d", ret); 555 } 556 557 if (debug_sort) 558 printf("\n"); 559 560 return (ret); 561 } 562 563 /* 564 * Compare two sort list items, according to the sort specs. 565 */ 566 int 567 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 568 size_t offset) 569 { 570 int ret; 571 572 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 573 574 if (debug_sort) { 575 if (offset) 576 printf("; offset=%d", (int) offset); 577 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 578 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 579 printf("; cmp1=%d\n", ret); 580 } 581 582 if (ret) 583 return (ret); 584 585 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 586 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 587 if (debug_sort) 588 printf("; cmp2=%d\n", ret); 589 } 590 591 return (ret); 592 } 593 594 /* 595 * Compare two sort list items, according to the sort specs. 596 */ 597 int 598 list_coll(struct sort_list_item **ss1, struct sort_list_item **ss2) 599 { 600 601 return (list_coll_offset(ss1, ss2, 0)); 602 } 603 604 #define LSCDEF(N) \ 605 static int \ 606 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 607 { \ 608 \ 609 return (list_coll_offset(ss1, ss2, N)); \ 610 } 611 612 LSCDEF(1) 613 LSCDEF(2) 614 LSCDEF(3) 615 LSCDEF(4) 616 LSCDEF(5) 617 LSCDEF(6) 618 LSCDEF(7) 619 LSCDEF(8) 620 LSCDEF(9) 621 LSCDEF(10) 622 LSCDEF(11) 623 LSCDEF(12) 624 LSCDEF(13) 625 LSCDEF(14) 626 LSCDEF(15) 627 LSCDEF(16) 628 LSCDEF(17) 629 LSCDEF(18) 630 LSCDEF(19) 631 LSCDEF(20) 632 633 listcoll_t 634 get_list_call_func(size_t offset) 635 { 636 static const listcoll_t lsarray[] = { list_coll, list_coll_1, 637 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 638 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 639 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 640 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 641 list_coll_18, list_coll_19, list_coll_20 }; 642 643 if (offset <= 20) 644 return (lsarray[offset]); 645 646 return (list_coll); 647 } 648 649 /* 650 * Compare two sort list items, only by their original string. 651 */ 652 int 653 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 654 { 655 656 return (top_level_str_coll(((*ss1)->str), ((*ss2)->str))); 657 } 658 659 /* 660 * Maximum size of a number in the string (before or after decimal point) 661 */ 662 #define MAX_NUM_SIZE (128) 663 664 /* 665 * Set suffix value 666 */ 667 static void setsuffix(wchar_t c, unsigned char *si) 668 { 669 switch (c){ 670 case L'k': 671 case L'K': 672 *si = 1; 673 break; 674 case L'M': 675 *si = 2; 676 break; 677 case L'G': 678 *si = 3; 679 break; 680 case L'T': 681 *si = 4; 682 break; 683 case L'P': 684 *si = 5; 685 break; 686 case L'E': 687 *si = 6; 688 break; 689 case L'Z': 690 *si = 7; 691 break; 692 case L'Y': 693 *si = 8; 694 break; 695 default: 696 *si = 0; 697 }; 698 } 699 700 /* 701 * Read string s and parse the string into a fixed-decimal-point number. 702 * sign equals -1 if the number is negative (explicit plus is not allowed, 703 * according to GNU sort's "info sort". 704 * The number part before decimal point is in the smain, after the decimal 705 * point is in sfrac, tail is the pointer to the remainder of the string. 706 */ 707 static int 708 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 709 { 710 bwstring_iterator s; 711 712 s = bws_begin(s0); 713 714 /* always end the fraction with zero, even if we have no fraction */ 715 sfrac[0] = 0; 716 717 while (iswblank(bws_get_iter_value(s))) 718 s = bws_iterator_inc(s, 1); 719 720 if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) { 721 *sign = -1; 722 s = bws_iterator_inc(s, 1); 723 } 724 725 // This is '0', not '\0', do not change this 726 while (iswdigit(bws_get_iter_value(s)) && 727 (bws_get_iter_value(s) == L'0')) 728 s = bws_iterator_inc(s, 1); 729 730 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 731 if (iswdigit(bws_get_iter_value(s))) { 732 smain[*main_len] = bws_get_iter_value(s); 733 s = bws_iterator_inc(s, 1); 734 *main_len += 1; 735 } else if (symbol_thousands_sep && 736 (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep)) 737 s = bws_iterator_inc(s, 1); 738 else 739 break; 740 } 741 742 smain[*main_len] = 0; 743 744 if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) { 745 s = bws_iterator_inc(s, 1); 746 while (iswdigit(bws_get_iter_value(s)) && 747 *frac_len < MAX_NUM_SIZE) { 748 sfrac[*frac_len] = bws_get_iter_value(s); 749 s = bws_iterator_inc(s, 1); 750 *frac_len += 1; 751 } 752 sfrac[*frac_len] = 0; 753 754 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 755 --(*frac_len); 756 sfrac[*frac_len] = L'\0'; 757 } 758 } 759 760 setsuffix(bws_get_iter_value(s),si); 761 762 if ((*main_len + *frac_len) == 0) 763 *sign = 0; 764 765 return (0); 766 } 767 768 /* 769 * Implements string sort. 770 */ 771 static int 772 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 773 { 774 775 if (debug_sort) { 776 if (offset) 777 printf("; offset=%d\n", (int) offset); 778 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 779 printf("(%zu)", BWSLEN(kv1->k)); 780 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 781 printf("(%zu)", BWSLEN(kv2->k)); 782 } 783 784 return (bwscoll(kv1->k, kv2->k, offset)); 785 } 786 787 /* 788 * Compare two suffixes 789 */ 790 static inline int 791 cmpsuffix(unsigned char si1, unsigned char si2) 792 { 793 794 return ((char)si1 - (char)si2); 795 } 796 797 /* 798 * Implements numeric sort for -n and -h. 799 */ 800 static int 801 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 802 size_t offset __unused, bool use_suffix) 803 { 804 struct bwstring *s1, *s2; 805 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 806 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 807 int cmp_res, sign1, sign2; 808 size_t frac1, frac2, main1, main2; 809 unsigned char SI1, SI2; 810 bool e1, e2, key1_read, key2_read; 811 812 s1 = kv1->k; 813 s2 = kv2->k; 814 sign1 = sign2 = 0; 815 main1 = main2 = 0; 816 frac1 = frac2 = 0; 817 818 cmp_res = 0; 819 key1_read = key2_read = false; 820 821 if (debug_sort) { 822 bwsprintf(stdout, s1, "; k1=<", ">"); 823 bwsprintf(stdout, s2, ", k2=<", ">"); 824 } 825 826 if (s1 == s2) 827 return (0); 828 829 if (kv1->hint->status == HS_UNINITIALIZED) { 830 /* read the number from the string */ 831 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 832 key1_read = true; 833 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 834 if(main1 < 1 && frac1 < 1) 835 kv1->hint->v.nh.empty=true; 836 kv1->hint->v.nh.si = SI1; 837 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 838 HS_INITIALIZED : HS_ERROR; 839 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 840 } 841 842 if (kv2->hint->status == HS_UNINITIALIZED) { 843 /* read the number from the string */ 844 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2,&SI2); 845 key2_read = true; 846 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 847 if(main2 < 1 && frac2 < 1) 848 kv2->hint->v.nh.empty=true; 849 kv2->hint->v.nh.si = SI2; 850 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 851 HS_INITIALIZED : HS_ERROR; 852 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 853 } 854 855 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 856 HS_INITIALIZED) { 857 unsigned long long n1, n2; 858 bool neg1, neg2; 859 860 e1 = kv1->hint->v.nh.empty; 861 e2 = kv2->hint->v.nh.empty; 862 863 if (e1 && e2) 864 return (0); 865 866 neg1 = kv1->hint->v.nh.neg; 867 neg2 = kv2->hint->v.nh.neg; 868 869 if (neg1 && !neg2) 870 return (-1); 871 if (neg2 && !neg1) 872 return (+1); 873 874 if (e1) 875 return (neg2 ? +1 : -1); 876 else if (e2) 877 return (neg1 ? -1 : +1); 878 879 880 if (use_suffix) { 881 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 882 if (cmp_res) 883 return (neg1 ? -cmp_res : cmp_res); 884 } 885 886 n1 = kv1->hint->v.nh.n1; 887 n2 = kv2->hint->v.nh.n1; 888 if (n1 < n2) 889 return (neg1 ? +1 : -1); 890 else if (n1 > n2) 891 return (neg1 ? -1 : +1); 892 } 893 894 /* read the numbers from the strings */ 895 if (!key1_read) 896 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 897 if (!key2_read) 898 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 899 900 e1 = ((main1 + frac1) == 0); 901 e2 = ((main2 + frac2) == 0); 902 903 if (e1 && e2) 904 return (0); 905 906 /* we know the result if the signs are different */ 907 if (sign1 < 0 && sign2 >= 0) 908 return (-1); 909 if (sign1 >= 0 && sign2 < 0) 910 return (+1); 911 912 if (e1) 913 return ((sign2 < 0) ? +1 : -1); 914 else if (e2) 915 return ((sign1 < 0) ? -1 : +1); 916 917 if (use_suffix) { 918 cmp_res = cmpsuffix(SI1, SI2); 919 if (cmp_res) 920 return ((sign1 < 0) ? -cmp_res : cmp_res); 921 } 922 923 /* if both numbers are empty assume that the strings are equal */ 924 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 925 return (0); 926 927 /* 928 * if the main part is of different size, we know the result 929 * (because the leading zeros are removed) 930 */ 931 if (main1 < main2) 932 cmp_res = -1; 933 else if (main1 > main2) 934 cmp_res = +1; 935 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 936 else 937 cmp_res = wcscmp(smain1, smain2); 938 939 /* check fraction */ 940 if (!cmp_res) 941 cmp_res = wcscmp(sfrac1, sfrac2); 942 943 if (!cmp_res) 944 return (0); 945 946 /* reverse result if the signs are negative */ 947 if (sign1 < 0 && sign2 < 0) 948 cmp_res = -cmp_res; 949 950 return (cmp_res); 951 } 952 953 /* 954 * Implements numeric sort (-n). 955 */ 956 static int 957 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 958 { 959 960 return (numcoll_impl(kv1, kv2, offset, false)); 961 } 962 963 /* 964 * Implements 'human' numeric sort (-h). 965 */ 966 static int 967 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 968 { 969 970 return (numcoll_impl(kv1, kv2, offset, true)); 971 } 972 973 /* 974 * Implements random sort (-R). 975 */ 976 #if defined(SORT_RANDOM) 977 static char * 978 randomcollend(MD5_CTX *ctx) 979 { 980 unsigned char digest[MD5_DIGEST_LENGTH]; 981 static const char hex[]="0123456789abcdef"; 982 char *buf; 983 int i; 984 985 buf = malloc(MD5_DIGEST_LENGTH * 2 + 1); 986 if (!buf) 987 return NULL; 988 MD5_Final(digest, ctx); 989 for (i = 0; i < MD5_DIGEST_LENGTH; i++) { 990 buf[2*i] = hex[digest[i] >> 4]; 991 buf[2*i+1] = hex[digest[i] & 0x0f]; 992 } 993 buf[MD5_DIGEST_LENGTH * 2] = '\0'; 994 return buf; 995 } 996 997 static int 998 randomcoll(struct key_value *kv1, struct key_value *kv2, 999 size_t offset __unused) 1000 { 1001 struct bwstring *s1, *s2; 1002 MD5_CTX ctx1, ctx2; 1003 char *b1, *b2; 1004 1005 s1 = kv1->k; 1006 s2 = kv2->k; 1007 1008 if (debug_sort) { 1009 bwsprintf(stdout, s1, "; k1=<", ">"); 1010 bwsprintf(stdout, s2, ", k2=<", ">"); 1011 } 1012 1013 if (s1 == s2) 1014 return (0); 1015 1016 memcpy(&ctx1,&md5_ctx,sizeof(MD5_CTX)); 1017 memcpy(&ctx2,&md5_ctx,sizeof(MD5_CTX)); 1018 1019 MD5_Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 1020 MD5_Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 1021 b1 = randomcollend(&ctx1); 1022 b2 = randomcollend(&ctx2); 1023 if (b1 == NULL) { 1024 if (b2 == NULL) 1025 return (0); 1026 else { 1027 sort_free(b2); 1028 return (-1); 1029 } 1030 } else if (b2 == NULL) { 1031 sort_free(b1); 1032 return (+1); 1033 } else { 1034 int cmp_res; 1035 1036 cmp_res = strcmp(b1,b2); 1037 sort_free(b1); 1038 sort_free(b2); 1039 1040 if (!cmp_res) 1041 cmp_res = bwscoll(s1, s2, 0); 1042 1043 return (cmp_res); 1044 } 1045 } 1046 #endif 1047 1048 /* 1049 * Implements version sort (-V). 1050 */ 1051 static int 1052 versioncoll(struct key_value *kv1, struct key_value *kv2, 1053 size_t offset __unused) 1054 { 1055 struct bwstring *s1, *s2; 1056 1057 s1 = kv1->k; 1058 s2 = kv2->k; 1059 1060 if (debug_sort) { 1061 bwsprintf(stdout, s1, "; k1=<", ">"); 1062 bwsprintf(stdout, s2, ", k2=<", ">"); 1063 } 1064 1065 if (s1 == s2) 1066 return (0); 1067 1068 return (vcmp(s1, s2)); 1069 } 1070 1071 /* 1072 * Check for minus infinity 1073 */ 1074 static inline bool 1075 huge_minus(double d, int err1) 1076 { 1077 1078 if (err1 == ERANGE) 1079 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1080 return (+1); 1081 1082 return (0); 1083 } 1084 1085 /* 1086 * Check for plus infinity 1087 */ 1088 static inline bool 1089 huge_plus(double d, int err1) 1090 { 1091 1092 if (err1 == ERANGE) 1093 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1094 return (+1); 1095 1096 return (0); 1097 } 1098 1099 /* 1100 * Check whether a function is a NAN 1101 */ 1102 static bool 1103 is_nan(double d) 1104 { 1105 return ((d == NAN) || (isnan(d))); 1106 } 1107 1108 /* 1109 * Compare two NANs 1110 */ 1111 static int 1112 cmp_nans(double d1, double d2) 1113 { 1114 1115 if (d1 < d2) 1116 return (-1); 1117 if (d1 > d2) 1118 return (+1); 1119 return (0); 1120 } 1121 1122 /* 1123 * Implements general numeric sort (-g). 1124 */ 1125 static int 1126 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1127 size_t offset __unused) 1128 { 1129 double d1, d2; 1130 int err1, err2; 1131 bool empty1, empty2, key1_read, key2_read; 1132 1133 d1 = d2 = 0; 1134 err1 = err2 = 0; 1135 key1_read = key2_read = false; 1136 1137 if (debug_sort) { 1138 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1139 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1140 } 1141 1142 if (kv1->hint->status == HS_UNINITIALIZED) { 1143 errno = 0; 1144 d1 = bwstod(kv1->k, &empty1); 1145 err1 = errno; 1146 1147 if (empty1) 1148 kv1->hint->v.gh.notnum = true; 1149 else if (err1 == 0) { 1150 kv1->hint->v.gh.d = d1; 1151 kv1->hint->v.gh.nan = is_nan(d1); 1152 kv1->hint->status = HS_INITIALIZED; 1153 } else 1154 kv1->hint->status = HS_ERROR; 1155 1156 key1_read = true; 1157 } 1158 1159 if (kv2->hint->status == HS_UNINITIALIZED) { 1160 errno = 0; 1161 d2 = bwstod(kv2->k, &empty2); 1162 err2 = errno; 1163 1164 if (empty2) 1165 kv2->hint->v.gh.notnum = true; 1166 else if (err2 == 0) { 1167 kv2->hint->v.gh.d = d2; 1168 kv2->hint->v.gh.nan = is_nan(d2); 1169 kv2->hint->status = HS_INITIALIZED; 1170 } else 1171 kv2->hint->status = HS_ERROR; 1172 1173 key2_read = true; 1174 } 1175 1176 if (kv1->hint->status == HS_INITIALIZED && 1177 kv2->hint->status == HS_INITIALIZED) { 1178 if (kv1->hint->v.gh.notnum) 1179 return ((kv2->hint->v.gh.notnum) ? 0 : -1); 1180 else if (kv2->hint->v.gh.notnum) 1181 return (+1); 1182 1183 if (kv1->hint->v.gh.nan) 1184 return ((kv2->hint->v.gh.nan) ? 1185 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : 1186 -1); 1187 else if (kv2->hint->v.gh.nan) 1188 return (+1); 1189 1190 d1 = kv1->hint->v.gh.d; 1191 d2 = kv2->hint->v.gh.d; 1192 1193 if (d1 < d2) 1194 return (-1); 1195 else if (d1 > d2) 1196 return (+1); 1197 else 1198 return (0); 1199 } 1200 1201 if (!key1_read) { 1202 errno = 0; 1203 d1 = bwstod(kv1->k, &empty1); 1204 err1 = errno; 1205 } 1206 1207 if (!key2_read) { 1208 errno = 0; 1209 d2 = bwstod(kv2->k, &empty2); 1210 err2 = errno; 1211 } 1212 1213 /* Non-value case: */ 1214 if (empty1) 1215 return (empty2 ? 0 : -1); 1216 else if (empty2) 1217 return (+1); 1218 1219 /* NAN case */ 1220 if (is_nan(d1)) 1221 return (is_nan(d2) ? cmp_nans(d1, d2) : -1); 1222 else if (is_nan(d2)) 1223 return (+1); 1224 1225 /* Infinities */ 1226 if (err1 == ERANGE || err2 == ERANGE) { 1227 /* Minus infinity case */ 1228 if (huge_minus(d1, err1)) { 1229 if (huge_minus(d2, err2)) { 1230 if (d1 < d2) 1231 return (-1); 1232 if (d1 > d2) 1233 return (+1); 1234 return (0); 1235 } else 1236 return (-1); 1237 1238 } else if (huge_minus(d2, err2)) { 1239 if (huge_minus(d1, err1)) { 1240 if (d1 < d2) 1241 return (-1); 1242 if (d1 > d2) 1243 return (+1); 1244 return (0); 1245 } else 1246 return (+1); 1247 } 1248 1249 /* Plus infinity case */ 1250 if (huge_plus(d1, err1)) { 1251 if (huge_plus(d2, err2)) { 1252 if (d1 < d2) 1253 return (-1); 1254 if (d1 > d2) 1255 return (+1); 1256 return (0); 1257 } else 1258 return (+1); 1259 } else if (huge_plus(d2, err2)) { 1260 if (huge_plus(d1, err1)) { 1261 if (d1 < d2) 1262 return (-1); 1263 if (d1 > d2) 1264 return (+1); 1265 return (0); 1266 } else 1267 return (-1); 1268 } 1269 } 1270 1271 if (d1 < d2) 1272 return (-1); 1273 if (d1 > d2) 1274 return (+1); 1275 1276 return (0); 1277 } 1278 1279 /* 1280 * Implements month sort (-M). 1281 */ 1282 static int 1283 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1284 { 1285 int val1, val2; 1286 bool key1_read, key2_read; 1287 1288 val1 = val2 = 0; 1289 key1_read = key2_read = false; 1290 1291 if (debug_sort) { 1292 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1293 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1294 } 1295 1296 if (kv1->hint->status == HS_UNINITIALIZED) { 1297 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1298 key1_read = true; 1299 kv1->hint->status = HS_INITIALIZED; 1300 } 1301 1302 if (kv2->hint->status == HS_UNINITIALIZED) { 1303 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1304 key2_read = true; 1305 kv2->hint->status = HS_INITIALIZED; 1306 } 1307 1308 if (kv1->hint->status == HS_INITIALIZED) { 1309 val1 = kv1->hint->v.Mh.m; 1310 key1_read = true; 1311 } 1312 1313 if (kv2->hint->status == HS_INITIALIZED) { 1314 val2 = kv2->hint->v.Mh.m; 1315 key2_read = true; 1316 } 1317 1318 if (!key1_read) 1319 val1 = bws_month_score(kv1->k); 1320 if (!key2_read) 1321 val2 = bws_month_score(kv2->k); 1322 1323 if (val1 == val2) { 1324 return (0); 1325 } 1326 if (val1 < val2) 1327 return (-1); 1328 return (+1); 1329 } 1330