1 /* $OpenBSD: coll.c,v 1.12 2019/05/13 17:00:12 schwarze Exp $ */ 2 3 /*- 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/types.h> 31 32 #include <errno.h> 33 #include <err.h> 34 #include <langinfo.h> 35 #include <limits.h> 36 #include <math.h> 37 #include <md5.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <wchar.h> 41 #include <wctype.h> 42 43 #include "coll.h" 44 #include "vsort.h" 45 46 struct key_specs *keys; 47 size_t keys_num = 0; 48 49 static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 50 static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 51 static int monthcoll(struct key_value*, struct key_value *, size_t offset); 52 static int numcoll(struct key_value*, struct key_value *, size_t offset); 53 static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 54 static int randomcoll(struct key_value*, struct key_value *, size_t offset); 55 static int versioncoll(struct key_value*, struct key_value *, size_t offset); 56 57 /* 58 * Allocate keys array 59 */ 60 struct keys_array * 61 keys_array_alloc(void) 62 { 63 struct keys_array *ka; 64 size_t sz; 65 66 sz = keys_array_size(); 67 ka = sort_calloc(1, sz); 68 69 return ka; 70 } 71 72 /* 73 * Calculate whether we need key hint space 74 */ 75 static size_t 76 key_hint_size(void) 77 { 78 return need_hint ? sizeof(struct key_hint) : 0; 79 } 80 81 /* 82 * Calculate keys array size 83 */ 84 size_t 85 keys_array_size(void) 86 { 87 return keys_num * (sizeof(struct key_value) + key_hint_size()); 88 } 89 90 /* 91 * Clean data of keys array 92 */ 93 void 94 clean_keys_array(const struct bwstring *s, struct keys_array *ka) 95 { 96 if (ka) { 97 size_t i; 98 99 for (i = 0; i < keys_num; ++i) 100 if (ka->key[i].k && ka->key[i].k != s) 101 bwsfree(ka->key[i].k); 102 memset(ka, 0, keys_array_size()); 103 } 104 } 105 106 /* 107 * Set value of a key in the keys set 108 */ 109 void 110 set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 111 { 112 if (ka && keys_num > ind) { 113 struct key_value *kv; 114 115 kv = &(ka->key[ind]); 116 117 if (kv->k != s) 118 bwsfree(kv->k); 119 kv->k = s; 120 } 121 } 122 123 /* 124 * Initialize a sort list item 125 */ 126 struct sort_list_item * 127 sort_list_item_alloc(void) 128 { 129 struct sort_list_item *si; 130 size_t sz; 131 132 sz = sizeof(struct sort_list_item) + keys_array_size(); 133 si = sort_calloc(1, sz); 134 135 return si; 136 } 137 138 size_t 139 sort_list_item_size(struct sort_list_item *si) 140 { 141 size_t i, ret = 0; 142 143 if (si) { 144 ret = sizeof(struct sort_list_item) + keys_array_size(); 145 if (si->str) 146 ret += bws_memsize(si->str); 147 for (i = 0; i < keys_num; ++i) { 148 struct key_value *kv; 149 150 kv = &(si->ka.key[i]); 151 152 if (kv->k != si->str) 153 ret += bws_memsize(kv->k); 154 } 155 } 156 return ret; 157 } 158 159 /* 160 * Calculate key for a sort list item 161 */ 162 static void 163 sort_list_item_make_key(struct sort_list_item *si) 164 { 165 preproc(si->str, &(si->ka)); 166 } 167 168 /* 169 * Set value of a sort list item. 170 * Return combined string and keys memory size. 171 */ 172 void 173 sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 174 { 175 if (si) { 176 clean_keys_array(si->str, &(si->ka)); 177 if (si->str) { 178 if (si->str == str) { 179 /* we are trying to reset the same string */ 180 return; 181 } else { 182 bwsfree(si->str); 183 si->str = NULL; 184 } 185 } 186 si->str = str; 187 sort_list_item_make_key(si); 188 } 189 } 190 191 /* 192 * De-allocate a sort list item object memory 193 */ 194 void 195 sort_list_item_clean(struct sort_list_item *si) 196 { 197 if (si) { 198 clean_keys_array(si->str, &(si->ka)); 199 if (si->str) { 200 bwsfree(si->str); 201 si->str = NULL; 202 } 203 } 204 } 205 206 /* 207 * Skip columns according to specs 208 */ 209 static size_t 210 skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 211 bool skip_blanks, bool *empty_key) 212 { 213 if (cols < 1) 214 return BWSLEN(s) + 1; 215 216 if (skip_blanks) 217 while (start < BWSLEN(s) && iswblank(BWS_GET(s, start))) 218 ++start; 219 220 while (start < BWSLEN(s) && cols > 1) { 221 --cols; 222 ++start; 223 } 224 225 if (start >= BWSLEN(s)) 226 *empty_key = true; 227 228 return start; 229 } 230 231 /* 232 * Skip fields according to specs 233 */ 234 static size_t 235 skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 236 { 237 if (fields < 2) { 238 if (BWSLEN(s) == 0) 239 *empty_field = true; 240 return 0; 241 } else if (!(sort_opts_vals.tflag)) { 242 size_t cpos = 0; 243 bool pb = true; 244 245 while (cpos < BWSLEN(s)) { 246 bool isblank; 247 248 isblank = iswblank(BWS_GET(s, cpos)); 249 250 if (isblank && !pb) { 251 --fields; 252 if (fields <= 1) 253 return cpos; 254 } 255 pb = isblank; 256 ++cpos; 257 } 258 if (fields > 1) 259 *empty_field = true; 260 return cpos; 261 } else { 262 size_t cpos = 0; 263 264 while (cpos < BWSLEN(s)) { 265 if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) { 266 --fields; 267 if (fields <= 1) 268 return cpos + 1; 269 } 270 ++cpos; 271 } 272 if (fields > 1) 273 *empty_field = true; 274 return cpos; 275 } 276 } 277 278 /* 279 * Find fields start 280 */ 281 static void 282 find_field_start(const struct bwstring *s, struct key_specs *ks, 283 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 284 { 285 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 286 if (!*empty_field) 287 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 288 ks->pos1b, empty_key); 289 else 290 *empty_key = true; 291 } 292 293 /* 294 * Find end key position 295 */ 296 static size_t 297 find_field_end(const struct bwstring *s, struct key_specs *ks) 298 { 299 size_t f2, next_field_start, pos_end; 300 bool empty_field, empty_key; 301 302 empty_field = false; 303 empty_key = false; 304 f2 = ks->f2; 305 306 if (f2 == 0) 307 return BWSLEN(s) + 1; 308 else { 309 if (ks->c2 == 0) { 310 next_field_start = skip_fields_to_start(s, f2 + 1, 311 &empty_field); 312 if ((next_field_start > 0) && sort_opts_vals.tflag && 313 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 314 next_field_start - 1))) 315 --next_field_start; 316 } else 317 next_field_start = skip_fields_to_start(s, f2, 318 &empty_field); 319 } 320 321 if (empty_field || (next_field_start >= BWSLEN(s))) 322 return BWSLEN(s) + 1; 323 324 if (ks->c2) { 325 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 326 ks->pos2b, &empty_key); 327 if (pos_end < BWSLEN(s)) 328 ++pos_end; 329 } else 330 pos_end = next_field_start; 331 332 return pos_end; 333 } 334 335 /* 336 * Cut a field according to the key specs 337 */ 338 static struct bwstring * 339 cut_field(const struct bwstring *s, struct key_specs *ks) 340 { 341 struct bwstring *ret = NULL; 342 343 if (s && ks) { 344 size_t field_start, key_end, key_start, sz; 345 bool empty_field, empty_key; 346 347 field_start = 0; 348 key_start = 0; 349 empty_field = false; 350 empty_key = false; 351 352 find_field_start(s, ks, &field_start, &key_start, 353 &empty_field, &empty_key); 354 355 if (empty_key) 356 sz = 0; 357 else { 358 key_end = find_field_end(s, ks); 359 sz = (key_end < key_start) ? 0 : (key_end - key_start); 360 } 361 362 ret = bwsalloc(sz); 363 if (sz) 364 bwsnocpy(ret, s, key_start, sz); 365 } else 366 ret = bwsalloc(0); 367 368 return ret; 369 } 370 371 /* 372 * Preprocesses a line applying the necessary transformations 373 * specified by command line options and returns the preprocessed 374 * string, which can be used to compare. 375 */ 376 int 377 preproc(struct bwstring *s, struct keys_array *ka) 378 { 379 if (sort_opts_vals.kflag) { 380 size_t i; 381 for (i = 0; i < keys_num; i++) { 382 struct bwstring *key; 383 struct key_specs *kspecs; 384 struct sort_mods *sm; 385 386 kspecs = &(keys[i]); 387 key = cut_field(s, kspecs); 388 389 sm = &(kspecs->sm); 390 if (sm->dflag) 391 key = dictionary_order(key); 392 else if (sm->iflag) 393 key = ignore_nonprinting(key); 394 if (sm->fflag || sm->Mflag) 395 key = ignore_case(key); 396 397 set_key_on_keys_array(ka, key, i); 398 } 399 } else { 400 struct bwstring *ret = NULL; 401 struct sort_mods *sm = default_sort_mods; 402 403 #ifdef GNUSORT_COMPATIBILITY 404 if (sm->bflag) { 405 if (ret == NULL) 406 ret = bwsdup(s); 407 ret = ignore_leading_blanks(ret); 408 } 409 #endif 410 if (sm->dflag) { 411 if (ret == NULL) 412 ret = bwsdup(s); 413 ret = dictionary_order(ret); 414 } else if (sm->iflag) { 415 if (ret == NULL) 416 ret = bwsdup(s); 417 ret = ignore_nonprinting(ret); 418 } 419 if (sm->fflag || sm->Mflag) { 420 if (ret == NULL) 421 ret = bwsdup(s); 422 ret = ignore_case(ret); 423 } 424 if (ret == NULL) 425 set_key_on_keys_array(ka, s, 0); 426 else 427 set_key_on_keys_array(ka, ret, 0); 428 } 429 430 return 0; 431 } 432 433 cmpcoll_t 434 get_sort_func(struct sort_mods *sm) 435 { 436 if (sm->nflag) 437 return numcoll; 438 else if (sm->hflag) 439 return hnumcoll; 440 else if (sm->gflag) 441 return gnumcoll; 442 else if (sm->Mflag) 443 return monthcoll; 444 else if (sm->Rflag) 445 return randomcoll; 446 else if (sm->Vflag) 447 return versioncoll; 448 else 449 return wstrcoll; 450 } 451 452 /* 453 * Compares the given strings. Returns a positive number if 454 * the first precedes the second, a negative number if the second is 455 * the preceding one, and zero if they are equal. This function calls 456 * the underlying collate functions, which done the actual comparison. 457 */ 458 int 459 key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 460 { 461 struct sort_mods *sm; 462 int res = 0; 463 size_t i; 464 465 for (i = 0; i < keys_num; ++i) { 466 sm = &(keys[i].sm); 467 468 if (sm->rflag) 469 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 470 else 471 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 472 473 if (res) 474 break; 475 476 /* offset applies to only the first key */ 477 offset = 0; 478 } 479 return res; 480 } 481 482 /* 483 * Compare two strings. 484 * Plain symbol-by-symbol comparison. 485 */ 486 int 487 top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 488 { 489 if (default_sort_mods->rflag) { 490 const struct bwstring *tmp; 491 492 tmp = s1; 493 s1 = s2; 494 s2 = tmp; 495 } 496 497 return bwscoll(s1, s2, 0); 498 } 499 500 /* 501 * Compare a string and a sort list item, according to the sort specs. 502 */ 503 int 504 str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 505 { 506 struct keys_array *ka1; 507 int ret = 0; 508 509 ka1 = keys_array_alloc(); 510 511 preproc(str1, ka1); 512 513 sort_list_item_make_key(*ss2); 514 515 if (debug_sort) { 516 bwsprintf(stdout, str1, "; s1=<", ">"); 517 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 518 } 519 520 ret = key_coll(ka1, &((*ss2)->ka), 0); 521 522 if (debug_sort) 523 printf("; cmp1=%d", ret); 524 525 clean_keys_array(str1, ka1); 526 sort_free(ka1); 527 528 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 529 ret = top_level_str_coll(str1, ((*ss2)->str)); 530 if (debug_sort) 531 printf("; cmp2=%d", ret); 532 } 533 534 if (debug_sort) 535 putchar('\n'); 536 537 return ret; 538 } 539 540 /* 541 * Compare two sort list items, according to the sort specs. 542 */ 543 int 544 list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 545 size_t offset) 546 { 547 int ret; 548 549 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 550 551 if (debug_sort) { 552 if (offset) 553 printf("; offset=%zu", offset); 554 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 555 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 556 printf("; cmp1=%d\n", ret); 557 } 558 559 if (ret) 560 return ret; 561 562 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 563 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 564 if (debug_sort) 565 printf("; cmp2=%d\n", ret); 566 } 567 568 return ret; 569 } 570 571 /* 572 * Compare two sort list items, according to the sort specs. 573 */ 574 int 575 list_coll(const void *ss1, const void *ss2) 576 { 577 return list_coll_offset((struct sort_list_item **)ss1, 578 (struct sort_list_item **)ss2, 0); 579 } 580 581 #define LSCDEF(N) \ 582 static int \ 583 list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 584 { \ 585 \ 586 return list_coll_offset(ss1, ss2, N); \ 587 } 588 589 LSCDEF(0) 590 LSCDEF(1) 591 LSCDEF(2) 592 LSCDEF(3) 593 LSCDEF(4) 594 LSCDEF(5) 595 LSCDEF(6) 596 LSCDEF(7) 597 LSCDEF(8) 598 LSCDEF(9) 599 LSCDEF(10) 600 LSCDEF(11) 601 LSCDEF(12) 602 LSCDEF(13) 603 LSCDEF(14) 604 LSCDEF(15) 605 LSCDEF(16) 606 LSCDEF(17) 607 LSCDEF(18) 608 LSCDEF(19) 609 LSCDEF(20) 610 611 listcoll_t 612 get_list_call_func(size_t offset) 613 { 614 static const listcoll_t lsarray[] = { list_coll_0, list_coll_1, 615 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 616 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 617 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 618 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 619 list_coll_18, list_coll_19, list_coll_20 }; 620 621 if (offset <= 20) 622 return lsarray[offset]; 623 624 return list_coll_0; 625 } 626 627 /* 628 * Compare two sort list items, only by their original string. 629 */ 630 int 631 list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 632 { 633 return top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 634 } 635 636 /* 637 * Maximum size of a number in the string (before or after decimal point) 638 */ 639 #define MAX_NUM_SIZE (128) 640 641 /* 642 * Set suffix value 643 */ 644 static void 645 setsuffix(wchar_t c, unsigned char *si) 646 { 647 switch (c){ 648 case L'k': 649 case L'K': 650 *si = 1; 651 break; 652 case L'M': 653 *si = 2; 654 break; 655 case L'G': 656 *si = 3; 657 break; 658 case L'T': 659 *si = 4; 660 break; 661 case L'P': 662 *si = 5; 663 break; 664 case L'E': 665 *si = 6; 666 break; 667 case L'Z': 668 *si = 7; 669 break; 670 case L'Y': 671 *si = 8; 672 break; 673 default: 674 *si = 0; 675 }; 676 } 677 678 /* 679 * Read string s and parse the string into a fixed-decimal-point number. 680 * sign equals -1 if the number is negative (explicit plus is not allowed, 681 * according to GNU sort's "info sort". 682 * The number part before decimal point is in the smain, after the decimal 683 * point is in sfrac, tail is the pointer to the remainder of the string. 684 */ 685 static int 686 read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 687 { 688 bwstring_iterator s; 689 690 s = bws_begin(s0); 691 692 /* always end the fraction with zero, even if we have no fraction */ 693 sfrac[0] = 0; 694 695 while (iswblank(bws_get_iter_value(s))) 696 s = bws_iterator_inc(s, 1); 697 698 if (bws_get_iter_value(s) == L'-') { 699 *sign = -1; 700 s = bws_iterator_inc(s, 1); 701 } 702 703 // This is '0', not '\0', do not change this 704 while (iswdigit(bws_get_iter_value(s)) && 705 (bws_get_iter_value(s) == L'0')) 706 s = bws_iterator_inc(s, 1); 707 708 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 709 if (iswdigit(bws_get_iter_value(s))) { 710 smain[*main_len] = bws_get_iter_value(s); 711 s = bws_iterator_inc(s, 1); 712 *main_len += 1; 713 } else 714 break; 715 } 716 717 smain[*main_len] = 0; 718 719 if (bws_get_iter_value(s) == L'.') { 720 s = bws_iterator_inc(s, 1); 721 while (iswdigit(bws_get_iter_value(s)) && 722 *frac_len < MAX_NUM_SIZE) { 723 sfrac[*frac_len] = bws_get_iter_value(s); 724 s = bws_iterator_inc(s, 1); 725 *frac_len += 1; 726 } 727 sfrac[*frac_len] = 0; 728 729 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 730 --(*frac_len); 731 sfrac[*frac_len] = L'\0'; 732 } 733 } 734 735 setsuffix(bws_get_iter_value(s), si); 736 737 if ((*main_len + *frac_len) == 0) 738 *sign = 0; 739 740 return 0; 741 } 742 743 /* 744 * Implements string sort. 745 */ 746 static int 747 wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 748 { 749 750 if (debug_sort) { 751 if (offset) 752 printf("; offset=%zu\n", offset); 753 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 754 printf("(%zu)", BWSLEN(kv1->k)); 755 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 756 printf("(%zu)", BWSLEN(kv2->k)); 757 } 758 759 return bwscoll(kv1->k, kv2->k, offset); 760 } 761 762 /* 763 * Compare two suffixes 764 */ 765 static inline int 766 cmpsuffix(unsigned char si1, unsigned char si2) 767 { 768 return (char)si1 - (char)si2; 769 } 770 771 /* 772 * Implements numeric sort for -n and -h. 773 */ 774 static int 775 numcoll_impl(struct key_value *kv1, struct key_value *kv2, 776 size_t offset __unused, bool use_suffix) 777 { 778 struct bwstring *s1, *s2; 779 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 780 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 781 int cmp_res, sign1, sign2; 782 size_t frac1, frac2, main1, main2; 783 unsigned char SI1, SI2; 784 bool e1, e2, key1_read, key2_read; 785 786 s1 = kv1->k; 787 s2 = kv2->k; 788 sign1 = sign2 = 0; 789 main1 = main2 = 0; 790 frac1 = frac2 = 0; 791 792 key1_read = key2_read = false; 793 794 if (debug_sort) { 795 bwsprintf(stdout, s1, "; k1=<", ">"); 796 bwsprintf(stdout, s2, ", k2=<", ">"); 797 } 798 799 if (s1 == s2) 800 return 0; 801 802 if (kv1->hint->status == HS_UNINITIALIZED) { 803 /* read the number from the string */ 804 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 805 key1_read = true; 806 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 807 if (main1 < 1 && frac1 < 1) 808 kv1->hint->v.nh.empty=true; 809 kv1->hint->v.nh.si = SI1; 810 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 811 HS_INITIALIZED : HS_ERROR; 812 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 813 } 814 815 if (kv2->hint->status == HS_UNINITIALIZED) { 816 /* read the number from the string */ 817 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 818 key2_read = true; 819 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 820 if (main2 < 1 && frac2 < 1) 821 kv2->hint->v.nh.empty=true; 822 kv2->hint->v.nh.si = SI2; 823 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 824 HS_INITIALIZED : HS_ERROR; 825 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 826 } 827 828 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 829 HS_INITIALIZED) { 830 unsigned long long n1, n2; 831 bool neg1, neg2; 832 833 e1 = kv1->hint->v.nh.empty; 834 e2 = kv2->hint->v.nh.empty; 835 836 if (e1 && e2) 837 return 0; 838 839 neg1 = kv1->hint->v.nh.neg; 840 neg2 = kv2->hint->v.nh.neg; 841 842 if (neg1 && !neg2) 843 return -1; 844 if (neg2 && !neg1) 845 return 1; 846 847 if (e1) 848 return neg2 ? 1 : -1; 849 else if (e2) 850 return neg1 ? -1 : 1; 851 852 853 if (use_suffix) { 854 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 855 if (cmp_res) 856 return neg1 ? -cmp_res : cmp_res; 857 } 858 859 n1 = kv1->hint->v.nh.n1; 860 n2 = kv2->hint->v.nh.n1; 861 if (n1 < n2) 862 return neg1 ? 1 : -1; 863 else if (n1 > n2) 864 return neg1 ? -1 : 1; 865 } 866 867 /* read the numbers from the strings */ 868 if (!key1_read) 869 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 870 if (!key2_read) 871 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 872 873 e1 = ((main1 + frac1) == 0); 874 e2 = ((main2 + frac2) == 0); 875 876 if (e1 && e2) 877 return 0; 878 879 /* we know the result if the signs are different */ 880 if (sign1 < 0 && sign2 >= 0) 881 return -1; 882 if (sign1 >= 0 && sign2 < 0) 883 return 1; 884 885 if (e1) 886 return (sign2 < 0) ? +1 : -1; 887 else if (e2) 888 return (sign1 < 0) ? -1 : 1; 889 890 if (use_suffix) { 891 cmp_res = cmpsuffix(SI1, SI2); 892 if (cmp_res) 893 return (sign1 < 0) ? -cmp_res : cmp_res; 894 } 895 896 /* if both numbers are empty assume that the strings are equal */ 897 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 898 return 0; 899 900 /* 901 * if the main part is of different size, we know the result 902 * (because the leading zeros are removed) 903 */ 904 if (main1 < main2) 905 cmp_res = -1; 906 else if (main1 > main2) 907 cmp_res = +1; 908 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 909 else 910 cmp_res = wcscmp(smain1, smain2); 911 912 /* check fraction */ 913 if (!cmp_res) 914 cmp_res = wcscmp(sfrac1, sfrac2); 915 916 if (!cmp_res) 917 return 0; 918 919 /* reverse result if the signs are negative */ 920 if (sign1 < 0 && sign2 < 0) 921 cmp_res = -cmp_res; 922 923 return cmp_res; 924 } 925 926 /* 927 * Implements numeric sort (-n). 928 */ 929 static int 930 numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 931 { 932 return numcoll_impl(kv1, kv2, offset, false); 933 } 934 935 /* 936 * Implements 'human' numeric sort (-h). 937 */ 938 static int 939 hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 940 { 941 return numcoll_impl(kv1, kv2, offset, true); 942 } 943 944 /* 945 * Implements random sort (-R). 946 */ 947 static int 948 randomcoll(struct key_value *kv1, struct key_value *kv2, 949 size_t offset __unused) 950 { 951 struct bwstring *s1, *s2; 952 MD5_CTX ctx1, ctx2; 953 char *b1, *b2; 954 955 s1 = kv1->k; 956 s2 = kv2->k; 957 958 if (debug_sort) { 959 bwsprintf(stdout, s1, "; k1=<", ">"); 960 bwsprintf(stdout, s2, ", k2=<", ">"); 961 } 962 963 if (s1 == s2) 964 return 0; 965 966 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 967 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 968 969 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 970 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 971 b1 = MD5End(&ctx1, NULL); 972 b2 = MD5End(&ctx2, NULL); 973 if (b1 == NULL) { 974 if (b2 == NULL) 975 return 0; 976 else { 977 sort_free(b2); 978 return -1; 979 } 980 } else if (b2 == NULL) { 981 sort_free(b1); 982 return 1; 983 } else { 984 int cmp_res; 985 986 cmp_res = strcmp(b1, b2); 987 sort_free(b1); 988 sort_free(b2); 989 990 if (!cmp_res) 991 cmp_res = bwscoll(s1, s2, 0); 992 993 return cmp_res; 994 } 995 } 996 997 /* 998 * Implements version sort (-V). 999 */ 1000 static int 1001 versioncoll(struct key_value *kv1, struct key_value *kv2, 1002 size_t offset __unused) 1003 { 1004 struct bwstring *s1, *s2; 1005 1006 s1 = kv1->k; 1007 s2 = kv2->k; 1008 1009 if (debug_sort) { 1010 bwsprintf(stdout, s1, "; k1=<", ">"); 1011 bwsprintf(stdout, s2, ", k2=<", ">"); 1012 } 1013 1014 if (s1 == s2) 1015 return 0; 1016 1017 return vcmp(s1, s2); 1018 } 1019 1020 /* 1021 * Check for minus infinity 1022 */ 1023 static inline bool 1024 huge_minus(double d, int err1) 1025 { 1026 if (err1 == ERANGE) 1027 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1028 return 1; 1029 1030 return 0; 1031 } 1032 1033 /* 1034 * Check for plus infinity 1035 */ 1036 static inline bool 1037 huge_plus(double d, int err1) 1038 { 1039 if (err1 == ERANGE) 1040 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1041 return 1; 1042 1043 return 0; 1044 } 1045 1046 /* 1047 * Check whether a function is a NAN 1048 */ 1049 static bool 1050 is_nan(double d) 1051 { 1052 #if defined(NAN) 1053 return (d == NAN || isnan(d)); 1054 #else 1055 return (isnan(d)); 1056 #endif 1057 } 1058 1059 /* 1060 * Compare two NANs 1061 */ 1062 static int 1063 cmp_nans(double d1, double d2) 1064 { 1065 if (d1 == d2) 1066 return 0; 1067 return d1 < d2 ? -1 : 1; 1068 } 1069 1070 /* 1071 * Implements general numeric sort (-g). 1072 */ 1073 static int 1074 gnumcoll(struct key_value *kv1, struct key_value *kv2, 1075 size_t offset __unused) 1076 { 1077 double d1, d2; 1078 int err1, err2; 1079 bool empty1, empty2, key1_read, key2_read; 1080 1081 d1 = d2 = 0; 1082 err1 = err2 = 0; 1083 key1_read = key2_read = false; 1084 1085 if (debug_sort) { 1086 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1087 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1088 } 1089 1090 if (kv1->hint->status == HS_UNINITIALIZED) { 1091 errno = 0; 1092 d1 = bwstod(kv1->k, &empty1); 1093 err1 = errno; 1094 1095 if (empty1) { 1096 kv1->hint->v.gh.notnum = true; 1097 kv1->hint->status = HS_INITIALIZED; 1098 } else if (err1 == 0) { 1099 kv1->hint->v.gh.d = d1; 1100 kv1->hint->v.gh.nan = is_nan(d1); 1101 kv1->hint->status = HS_INITIALIZED; 1102 } else 1103 kv1->hint->status = HS_ERROR; 1104 1105 key1_read = true; 1106 } 1107 1108 if (kv2->hint->status == HS_UNINITIALIZED) { 1109 errno = 0; 1110 d2 = bwstod(kv2->k, &empty2); 1111 err2 = errno; 1112 1113 if (empty2) { 1114 kv2->hint->v.gh.notnum = true; 1115 kv2->hint->status = HS_INITIALIZED; 1116 } else if (err2 == 0) { 1117 kv2->hint->v.gh.d = d2; 1118 kv2->hint->v.gh.nan = is_nan(d2); 1119 kv2->hint->status = HS_INITIALIZED; 1120 } else 1121 kv2->hint->status = HS_ERROR; 1122 1123 key2_read = true; 1124 } 1125 1126 if (kv1->hint->status == HS_INITIALIZED && 1127 kv2->hint->status == HS_INITIALIZED) { 1128 #ifdef GNUSORT_COMPATIBILITY 1129 if (kv1->hint->v.gh.notnum) 1130 return kv2->hint->v.gh.notnum ? 0 : -1; 1131 else if (kv2->hint->v.gh.notnum) 1132 return 1; 1133 #else 1134 if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum) 1135 return 0; 1136 #endif 1137 1138 if (kv1->hint->v.gh.nan) 1139 return kv2->hint->v.gh.nan ? 1140 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1; 1141 else if (kv2->hint->v.gh.nan) 1142 return 1; 1143 1144 d1 = kv1->hint->v.gh.d; 1145 d2 = kv2->hint->v.gh.d; 1146 1147 if (d1 < d2) 1148 return -1; 1149 else if (d1 > d2) 1150 return 1; 1151 else 1152 return 0; 1153 } 1154 1155 if (!key1_read) { 1156 errno = 0; 1157 d1 = bwstod(kv1->k, &empty1); 1158 err1 = errno; 1159 } 1160 1161 if (!key2_read) { 1162 errno = 0; 1163 d2 = bwstod(kv2->k, &empty2); 1164 err2 = errno; 1165 } 1166 1167 /* Non-value case */ 1168 #ifdef GNUSORT_COMPATIBILITY 1169 if (empty1) 1170 return empty2 ? 0 : -1; 1171 else if (empty2) 1172 return 1; 1173 #else 1174 if (empty1 && empty2) 1175 return 0; 1176 #endif 1177 1178 /* NAN case */ 1179 if (is_nan(d1)) 1180 return is_nan(d2) ? cmp_nans(d1, d2) : -1; 1181 else if (is_nan(d2)) 1182 return 1; 1183 1184 /* Infinities */ 1185 if (err1 == ERANGE || err2 == ERANGE) { 1186 /* Minus infinity case */ 1187 if (huge_minus(d1, err1)) { 1188 if (huge_minus(d2, err2)) { 1189 if (d1 == d2) 1190 return 0; 1191 return d1 < d2 ? -1 : 1; 1192 } else 1193 return -1; 1194 1195 } else if (huge_minus(d2, err2)) { 1196 if (huge_minus(d1, err1)) { 1197 if (d1 == d2) 1198 return 0; 1199 return d1 < d2 ? -1 : 1; 1200 } else 1201 return 1; 1202 } 1203 1204 /* Plus infinity case */ 1205 if (huge_plus(d1, err1)) { 1206 if (huge_plus(d2, err2)) { 1207 if (d1 == d2) 1208 return 0; 1209 return d1 < d2 ? -1 : 1; 1210 } else 1211 return 1; 1212 } else if (huge_plus(d2, err2)) { 1213 if (huge_plus(d1, err1)) { 1214 if (d1 == d2) 1215 return 0; 1216 return d1 < d2 ? -1 : 1; 1217 } else 1218 return -1; 1219 } 1220 } 1221 1222 if (d1 == d2) 1223 return 0; 1224 return d1 < d2 ? -1 : 1; 1225 } 1226 1227 /* 1228 * Implements month sort (-M). 1229 */ 1230 static int 1231 monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1232 { 1233 int val1, val2; 1234 bool key1_read, key2_read; 1235 1236 val1 = val2 = 0; 1237 key1_read = key2_read = false; 1238 1239 if (debug_sort) { 1240 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1241 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1242 } 1243 1244 if (kv1->hint->status == HS_UNINITIALIZED) { 1245 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1246 key1_read = true; 1247 kv1->hint->status = HS_INITIALIZED; 1248 } 1249 1250 if (kv2->hint->status == HS_UNINITIALIZED) { 1251 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1252 key2_read = true; 1253 kv2->hint->status = HS_INITIALIZED; 1254 } 1255 1256 if (kv1->hint->status == HS_INITIALIZED) { 1257 val1 = kv1->hint->v.Mh.m; 1258 key1_read = true; 1259 } 1260 1261 if (kv2->hint->status == HS_INITIALIZED) { 1262 val2 = kv2->hint->v.Mh.m; 1263 key2_read = true; 1264 } 1265 1266 if (!key1_read) 1267 val1 = bws_month_score(kv1->k); 1268 if (!key2_read) 1269 val2 = bws_month_score(kv2->k); 1270 1271 if (val1 == val2) 1272 return 0; 1273 return val1 < val2 ? -1 : 1; 1274 } 1275