1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: head/usr.bin/sort/sort.c 281182 2015-04-07 01:17:49Z pfg $ 28 */ 29 30 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <regex.h> 41 #include <signal.h> 42 #include <stdbool.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <string.h> 46 #include <unistd.h> 47 #include <wchar.h> 48 #include <wctype.h> 49 #if defined(SORT_RANDOM) 50 #include <openssl/md5.h> 51 #endif 52 53 #include "coll.h" 54 #include "file.h" 55 #include "sort.h" 56 57 #ifndef WITHOUT_NLS 58 #include <nl_types.h> 59 nl_catd catalog; 60 #endif 61 62 #if defined(SORT_RANDOM) 63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 64 #else 65 #define OPTIONS "bcCdfghik:Mmno:rsS:t:T:uVz" 66 #endif 67 68 #if defined(SORT_RANDOM) 69 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") 70 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) 71 72 static bool need_random; 73 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; 74 static const void *random_seed; 75 static size_t random_seed_size; 76 77 MD5_CTX md5_ctx; 78 #endif 79 80 /* 81 * Default messages to use when NLS is disabled or no catalogue 82 * is found. 83 */ 84 const char *nlsstr[] = { "", 85 /* 1*/"mutually exclusive flags", 86 /* 2*/"extra argument not allowed with -c", 87 /* 3*/"Unknown feature", 88 /* 4*/"Wrong memory buffer specification", 89 /* 5*/"0 field in key specs", 90 /* 6*/"0 column in key specs", 91 /* 7*/"Wrong file mode", 92 /* 8*/"Cannot open file for reading", 93 /* 9*/"Radix sort cannot be used with these sort options", 94 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 95 /*11*/"Invalid key position", 96 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 97 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 98 "[-o outfile] [--batch-size size] [--files0-from file] " 99 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 100 "[--mmap] " 101 #if defined(SORT_THREADS) 102 "[--parallel thread_no] " 103 #endif 104 "[--human-numeric-sort] " 105 #if defined(SORT_RANDOM) 106 "[--version-sort] [--random-sort [--random-source file]] " 107 #else 108 "[--version-sort] " 109 #endif 110 "[--compress-program program] [file ...]\n" }; 111 112 struct sort_opts sort_opts_vals; 113 114 bool debug_sort; 115 bool need_hint; 116 117 #if defined(SORT_THREADS) 118 unsigned int ncpu = 1; 119 size_t nthreads = 1; 120 #endif 121 122 static bool gnusort_numeric_compatibility; 123 124 static struct sort_mods default_sort_mods_object; 125 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 126 127 static bool print_symbols_on_debug; 128 129 /* 130 * Arguments from file (when file0-from option is used: 131 */ 132 static size_t argc_from_file0 = (size_t)-1; 133 static char **argv_from_file0; 134 135 /* 136 * Placeholder symbols for options which have no single-character equivalent 137 */ 138 enum 139 { 140 SORT_OPT = CHAR_MAX + 1, 141 HELP_OPT, 142 FF_OPT, 143 BS_OPT, 144 VERSION_OPT, 145 DEBUG_OPT, 146 #if defined(SORT_THREADS) 147 PARALLEL_OPT, 148 #endif 149 #if defined(SORT_RANDOM) 150 RANDOMSOURCE_OPT, 151 #endif 152 COMPRESSPROGRAM_OPT, 153 QSORT_OPT, 154 MERGESORT_OPT, 155 HEAPSORT_OPT, 156 RADIXSORT_OPT, 157 MMAP_OPT 158 }; 159 160 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 161 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 162 163 static struct option long_options[] = { 164 { "batch-size", required_argument, NULL, BS_OPT }, 165 { "buffer-size", required_argument, NULL, 'S' }, 166 { "check", optional_argument, NULL, 'c' }, 167 { "check=silent|quiet", optional_argument, NULL, 'C' }, 168 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 169 { "debug", no_argument, NULL, DEBUG_OPT }, 170 { "dictionary-order", no_argument, NULL, 'd' }, 171 { "field-separator", required_argument, NULL, 't' }, 172 { "files0-from", required_argument, NULL, FF_OPT }, 173 { "general-numeric-sort", no_argument, NULL, 'g' }, 174 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 175 { "help",no_argument, NULL, HELP_OPT }, 176 { "human-numeric-sort", no_argument, NULL, 'h' }, 177 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 178 { "ignore-case", no_argument, NULL, 'f' }, 179 { "ignore-nonprinting", no_argument, NULL, 'i' }, 180 { "key", required_argument, NULL, 'k' }, 181 { "merge", no_argument, NULL, 'm' }, 182 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 183 { "mmap", no_argument, NULL, MMAP_OPT }, 184 { "month-sort", no_argument, NULL, 'M' }, 185 { "numeric-sort", no_argument, NULL, 'n' }, 186 { "output", required_argument, NULL, 'o' }, 187 #if defined(SORT_THREADS) 188 { "parallel", required_argument, NULL, PARALLEL_OPT }, 189 #endif 190 { "qsort", no_argument, NULL, QSORT_OPT }, 191 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 192 #if defined(SORT_RANDOM) 193 { "random-sort", no_argument, NULL, 'R' }, 194 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 195 #endif 196 { "reverse", no_argument, NULL, 'r' }, 197 { "sort", required_argument, NULL, SORT_OPT }, 198 { "stable", no_argument, NULL, 's' }, 199 { "temporary-directory",required_argument, NULL, 'T' }, 200 { "unique", no_argument, NULL, 'u' }, 201 { "version", no_argument, NULL, VERSION_OPT }, 202 { "version-sort",no_argument, NULL, 'V' }, 203 { "zero-terminated", no_argument, NULL, 'z' }, 204 { NULL, no_argument, NULL, 0 } 205 }; 206 207 static void fix_obsolete_keys(int *argc, char **argv); 208 209 /* 210 * Check where sort modifier is present 211 */ 212 static bool 213 sort_modifier_empty(struct sort_mods *sm) 214 { 215 216 if (sm == NULL) 217 return (true); 218 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 219 #ifdef SORT_RANDOM 220 sm->Rflag || 221 #endif 222 sm->rflag || sm->hflag || sm->dflag || sm->fflag)); 223 } 224 225 /* 226 * Print out usage text. 227 */ 228 static void 229 usage(bool opt_err) 230 { 231 FILE *out; 232 233 out = opt_err ? stderr : stdout; 234 235 fprintf(out, getstr(12), getprogname()); 236 if (opt_err) 237 exit(2); 238 exit(0); 239 } 240 241 /* 242 * Read input file names from a file (file0-from option). 243 */ 244 static void 245 read_fns_from_file0(const char *fn) 246 { 247 FILE *f; 248 char *line = NULL; 249 size_t linesize = 0; 250 ssize_t linelen; 251 252 if (fn == NULL) 253 return; 254 255 f = fopen(fn, "r"); 256 if (f == NULL) 257 err(2, "%s", fn); 258 259 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 260 if (*line != '\0') { 261 if (argc_from_file0 == (size_t) - 1) 262 argc_from_file0 = 0; 263 ++argc_from_file0; 264 argv_from_file0 = sort_realloc(argv_from_file0, 265 argc_from_file0 * sizeof(char *)); 266 if (argv_from_file0 == NULL) 267 err(2, NULL); 268 argv_from_file0[argc_from_file0 - 1] = line; 269 } else { 270 free(line); 271 } 272 line = NULL; 273 linesize = 0; 274 } 275 if (ferror(f)) 276 err(2, "%s: getdelim", fn); 277 278 closefile(f, fn); 279 } 280 281 /* 282 * Check how much RAM is available for the sort. 283 */ 284 static void 285 set_hw_params(void) 286 { 287 long pages, psize; 288 289 pages = psize = 0; 290 291 #if defined(SORT_THREADS) 292 ncpu = 1; 293 #endif 294 295 pages = sysconf(_SC_PHYS_PAGES); 296 if (pages < 1) { 297 perror("sysconf pages"); 298 pages = 1; 299 } 300 psize = sysconf(_SC_PAGESIZE); 301 if (psize < 1) { 302 perror("sysconf psize"); 303 psize = 4096; 304 } 305 #if defined(SORT_THREADS) 306 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 307 if (ncpu < 1) 308 ncpu = 1; 309 else if(ncpu > 32) 310 ncpu = 32; 311 312 nthreads = ncpu; 313 #endif 314 315 free_memory = (unsigned long long) pages * (unsigned long long) psize; 316 available_free_memory = free_memory / 2; 317 318 if (available_free_memory < 1024) 319 available_free_memory = 1024; 320 } 321 322 /* 323 * Convert "plain" symbol to wide symbol, with default value. 324 */ 325 static void 326 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 327 { 328 329 if (wc && c) { 330 int res; 331 332 res = mbtowc(wc, c, MB_CUR_MAX); 333 if (res < 1) 334 *wc = def; 335 } 336 } 337 338 /* 339 * Set current locale symbols. 340 */ 341 static void 342 set_locale(void) 343 { 344 struct lconv *lc; 345 const char *locale; 346 347 setlocale(LC_ALL, ""); 348 349 lc = localeconv(); 350 351 if (lc) { 352 /* obtain LC_NUMERIC info */ 353 /* Convert to wide char form */ 354 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 355 symbol_decimal_point); 356 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 357 symbol_thousands_sep); 358 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 359 symbol_positive_sign); 360 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 361 symbol_negative_sign); 362 } 363 364 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 365 gnusort_numeric_compatibility = true; 366 367 locale = setlocale(LC_COLLATE, NULL); 368 369 if (locale) { 370 char *tmpl; 371 const char *cclocale; 372 373 tmpl = sort_strdup(locale); 374 cclocale = setlocale(LC_COLLATE, "C"); 375 if (cclocale && !strcmp(cclocale, tmpl)) 376 byte_sort = true; 377 else { 378 const char *pclocale; 379 380 pclocale = setlocale(LC_COLLATE, "POSIX"); 381 if (pclocale && !strcmp(pclocale, tmpl)) 382 byte_sort = true; 383 } 384 setlocale(LC_COLLATE, tmpl); 385 sort_free(tmpl); 386 } 387 } 388 389 /* 390 * Set directory temporary files. 391 */ 392 static void 393 set_tmpdir(void) 394 { 395 char *td; 396 397 td = getenv("TMPDIR"); 398 if (td != NULL) 399 tmpdir = sort_strdup(td); 400 } 401 402 /* 403 * Parse -S option. 404 */ 405 static unsigned long long 406 parse_memory_buffer_value(const char *value) 407 { 408 409 if (value == NULL) 410 return (available_free_memory); 411 else { 412 char *endptr; 413 unsigned long long membuf; 414 415 endptr = NULL; 416 errno = 0; 417 membuf = strtoll(value, &endptr, 10); 418 419 if (errno != 0) { 420 warn("%s",getstr(4)); 421 membuf = available_free_memory; 422 } else { 423 switch (*endptr){ 424 case 'Y': 425 membuf *= 1024; 426 /* FALLTHROUGH */ 427 case 'Z': 428 membuf *= 1024; 429 /* FALLTHROUGH */ 430 case 'E': 431 membuf *= 1024; 432 /* FALLTHROUGH */ 433 case 'P': 434 membuf *= 1024; 435 /* FALLTHROUGH */ 436 case 'T': 437 membuf *= 1024; 438 /* FALLTHROUGH */ 439 case 'G': 440 membuf *= 1024; 441 /* FALLTHROUGH */ 442 case 'M': 443 membuf *= 1024; 444 /* FALLTHROUGH */ 445 case '\0': 446 case 'K': 447 membuf *= 1024; 448 /* FALLTHROUGH */ 449 case 'b': 450 break; 451 case '%': 452 membuf = (available_free_memory * membuf) / 453 100; 454 break; 455 default: 456 warnc(EINVAL, "%s", optarg); 457 membuf = available_free_memory; 458 } 459 } 460 return (membuf); 461 } 462 } 463 464 /* 465 * Signal handler that clears the temporary files. 466 */ 467 static void 468 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 469 void *context __unused) 470 { 471 472 clear_tmp_files(); 473 exit(-1); 474 } 475 476 /* 477 * Set signal handler on panic signals. 478 */ 479 static void 480 set_signal_handler(void) 481 { 482 struct sigaction sa; 483 484 memset(&sa, 0, sizeof(sa)); 485 sa.sa_sigaction = &sig_handler; 486 sa.sa_flags = SA_SIGINFO; 487 488 if (sigaction(SIGTERM, &sa, NULL) < 0) { 489 perror("sigaction"); 490 return; 491 } 492 if (sigaction(SIGHUP, &sa, NULL) < 0) { 493 perror("sigaction"); 494 return; 495 } 496 if (sigaction(SIGINT, &sa, NULL) < 0) { 497 perror("sigaction"); 498 return; 499 } 500 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 501 perror("sigaction"); 502 return; 503 } 504 if (sigaction(SIGABRT, &sa, NULL) < 0) { 505 perror("sigaction"); 506 return; 507 } 508 if (sigaction(SIGBUS, &sa, NULL) < 0) { 509 perror("sigaction"); 510 return; 511 } 512 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 513 perror("sigaction"); 514 return; 515 } 516 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 517 perror("sigaction"); 518 return; 519 } 520 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 521 perror("sigaction"); 522 return; 523 } 524 } 525 526 /* 527 * Print "unknown" message and exit with status 2. 528 */ 529 static void 530 unknown(const char *what) 531 { 532 533 errx(2, "%s: %s", getstr(3), what); 534 } 535 536 /* 537 * Check whether contradictory input options are used. 538 */ 539 static void 540 check_mutually_exclusive_flags(char c, bool *mef_flags) 541 { 542 int fo_index, mec; 543 bool found_others, found_this; 544 545 found_others = found_this = false; 546 fo_index = 0; 547 548 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 549 mec = mutually_exclusive_flags[i]; 550 551 if (mec != c) { 552 if (mef_flags[i]) { 553 if (found_this) 554 errx(1, "%c:%c: %s", c, mec, getstr(1)); 555 found_others = true; 556 fo_index = i; 557 } 558 } else { 559 if (found_others) 560 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 561 mef_flags[i] = true; 562 found_this = true; 563 } 564 } 565 } 566 567 /* 568 * Initialise sort opts data. 569 */ 570 static void 571 set_sort_opts(void) 572 { 573 574 memset(&default_sort_mods_object, 0, 575 sizeof(default_sort_mods_object)); 576 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 577 default_sort_mods_object.func = 578 get_sort_func(&default_sort_mods_object); 579 } 580 581 /* 582 * Set a sort modifier on a sort modifiers object. 583 */ 584 static bool 585 set_sort_modifier(struct sort_mods *sm, int c) 586 { 587 588 if (sm) { 589 switch (c){ 590 case 'b': 591 sm->bflag = true; 592 break; 593 case 'd': 594 sm->dflag = true; 595 break; 596 case 'f': 597 sm->fflag = true; 598 break; 599 case 'g': 600 sm->gflag = true; 601 need_hint = true; 602 break; 603 case 'i': 604 sm->iflag = true; 605 break; 606 #ifdef SORT_RANDOM 607 case 'R': 608 sm->Rflag = true; 609 need_random = true; 610 break; 611 #endif 612 case 'M': 613 initialise_months(); 614 sm->Mflag = true; 615 need_hint = true; 616 break; 617 case 'n': 618 sm->nflag = true; 619 need_hint = true; 620 print_symbols_on_debug = true; 621 break; 622 case 'r': 623 sm->rflag = true; 624 break; 625 case 'V': 626 sm->Vflag = true; 627 break; 628 case 'h': 629 sm->hflag = true; 630 need_hint = true; 631 print_symbols_on_debug = true; 632 break; 633 default: 634 return false; 635 } 636 sort_opts_vals.complex_sort = true; 637 sm->func = get_sort_func(sm); 638 } 639 return (true); 640 } 641 642 /* 643 * Parse POS in -k option. 644 */ 645 static int 646 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 647 { 648 regmatch_t pmatch[4]; 649 regex_t re; 650 char *c, *f; 651 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 652 size_t len, nmatch; 653 int ret; 654 655 ret = -1; 656 nmatch = 4; 657 c = f = NULL; 658 659 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 660 return (-1); 661 662 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 663 goto end; 664 665 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 666 goto end; 667 668 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 669 goto end; 670 671 len = pmatch[1].rm_eo - pmatch[1].rm_so; 672 f = sort_malloc((len + 1) * sizeof(char)); 673 674 strncpy(f, s + pmatch[1].rm_so, len); 675 f[len] = '\0'; 676 677 if (second) { 678 errno = 0; 679 ks->f2 = (size_t) strtoul(f, NULL, 10); 680 if (errno != 0) 681 err(2, "-k"); 682 if (ks->f2 == 0) { 683 warn("%s",getstr(5)); 684 goto end; 685 } 686 } else { 687 errno = 0; 688 ks->f1 = (size_t) strtoul(f, NULL, 10); 689 if (errno != 0) 690 err(2, "-k"); 691 if (ks->f1 == 0) { 692 warn("%s",getstr(5)); 693 goto end; 694 } 695 } 696 697 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 698 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 699 c = sort_malloc((len + 1) * sizeof(char)); 700 701 strncpy(c, s + pmatch[2].rm_so + 1, len); 702 c[len] = '\0'; 703 704 if (second) { 705 errno = 0; 706 ks->c2 = (size_t) strtoul(c, NULL, 10); 707 if (errno != 0) 708 err(2, "-k"); 709 } else { 710 errno = 0; 711 ks->c1 = (size_t) strtoul(c, NULL, 10); 712 if (errno != 0) 713 err(2, "-k"); 714 if (ks->c1 == 0) { 715 warn("%s",getstr(6)); 716 goto end; 717 } 718 } 719 } else { 720 if (second) 721 ks->c2 = 0; 722 else 723 ks->c1 = 1; 724 } 725 726 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 727 regoff_t i = 0; 728 729 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 730 check_mutually_exclusive_flags(s[i], mef_flags); 731 if (s[i] == 'b') { 732 if (second) 733 ks->pos2b = true; 734 else 735 ks->pos1b = true; 736 } else if (!set_sort_modifier(&(ks->sm), s[i])) 737 goto end; 738 } 739 } 740 741 ret = 0; 742 743 end: 744 745 if (c) 746 sort_free(c); 747 if (f) 748 sort_free(f); 749 regfree(&re); 750 751 return (ret); 752 } 753 754 /* 755 * Parse -k option value. 756 */ 757 static int 758 parse_k(const char *s, struct key_specs *ks) 759 { 760 int ret = -1; 761 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 762 { false, false, false, false, false, false }; 763 764 if (s && *s) { 765 char *sptr; 766 767 sptr = strchr(s, ','); 768 if (sptr) { 769 size_t size1; 770 char *pos1, *pos2; 771 772 size1 = sptr - s; 773 774 if (size1 < 1) 775 return (-1); 776 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 777 778 strncpy(pos1, s, size1); 779 pos1[size1] = '\0'; 780 781 ret = parse_pos(pos1, ks, mef_flags, false); 782 783 sort_free(pos1); 784 if (ret < 0) 785 return (ret); 786 787 pos2 = sort_strdup(sptr + 1); 788 ret = parse_pos(pos2, ks, mef_flags, true); 789 sort_free(pos2); 790 } else 791 ret = parse_pos(s, ks, mef_flags, false); 792 } 793 794 return (ret); 795 } 796 797 /* 798 * Parse POS in +POS -POS option. 799 */ 800 static int 801 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 802 { 803 regex_t re; 804 regmatch_t pmatch[4]; 805 char *c, *f; 806 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 807 int ret; 808 size_t len, nmatch; 809 810 ret = -1; 811 nmatch = 4; 812 c = f = NULL; 813 *nc = *nf = 0; 814 815 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 816 return (-1); 817 818 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 819 goto end; 820 821 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 822 goto end; 823 824 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 825 goto end; 826 827 len = pmatch[1].rm_eo - pmatch[1].rm_so; 828 f = sort_malloc((len + 1) * sizeof(char)); 829 830 strncpy(f, s + pmatch[1].rm_so, len); 831 f[len] = '\0'; 832 833 errno = 0; 834 *nf = (size_t) strtoul(f, NULL, 10); 835 if (errno != 0) 836 errx(2, "%s", getstr(11)); 837 838 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 839 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 840 c = sort_malloc((len + 1) * sizeof(char)); 841 842 strncpy(c, s + pmatch[2].rm_so + 1, len); 843 c[len] = '\0'; 844 845 errno = 0; 846 *nc = (size_t) strtoul(c, NULL, 10); 847 if (errno != 0) 848 errx(2, "%s", getstr(11)); 849 } 850 851 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 852 853 len = pmatch[3].rm_eo - pmatch[3].rm_so; 854 855 strncpy(sopts, s + pmatch[3].rm_so, len); 856 sopts[len] = '\0'; 857 } 858 859 ret = 0; 860 861 end: 862 if (c) 863 sort_free(c); 864 if (f) 865 sort_free(f); 866 regfree(&re); 867 868 return (ret); 869 } 870 871 /* 872 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 873 */ 874 static void 875 fix_obsolete_keys(int *argc, char **argv) 876 { 877 char *snew = NULL; 878 879 for (int i = 1; i < *argc; i++) { 880 char *arg1; 881 882 arg1 = argv[i]; 883 884 if (strlen(arg1) > 1 && arg1[0] == '+') { 885 int c1, f1; 886 char sopts1[128]; 887 888 sopts1[0] = 0; 889 c1 = f1 = 0; 890 891 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 892 continue; 893 else { 894 f1 += 1; 895 c1 += 1; 896 if (i + 1 < *argc) { 897 char *arg2 = argv[i + 1]; 898 899 if (strlen(arg2) > 1 && 900 arg2[0] == '-') { 901 int c2, f2; 902 char sopts2[128]; 903 904 sopts2[0] = 0; 905 c2 = f2 = 0; 906 907 if (parse_pos_obs(arg2 + 1, 908 &f2, &c2, sopts2) >= 0) { 909 if (c2 > 0) 910 f2 += 1; 911 if (asprintf(&snew, 912 "-k%d.%d%s,%d.%d%s", 913 f1, c1, sopts1, 914 f2, c2, sopts2)== -1) 915 return; 916 argv[i] = snew; 917 for (int j = i + 1; j + 1 < *argc; j++) 918 argv[j] = argv[j + 1]; 919 *argc -= 1; 920 continue; 921 } 922 } 923 } 924 asprintf(&snew, "-k%d.%d%s", f1, c1, sopts1); 925 argv[i] = snew; 926 } 927 } 928 } 929 } 930 931 /* 932 * Set random seed 933 */ 934 #if defined(SORT_RANDOM) 935 static char * 936 random_md5end(MD5_CTX *ctx) 937 { 938 unsigned char digest[MD5_DIGEST_LENGTH]; 939 static const char hex[]="0123456789abcdef"; 940 char *buf; 941 int i; 942 943 buf = malloc(MD5_DIGEST_LENGTH * 2 + 1); 944 if (!buf) 945 return NULL; 946 MD5_Final(digest, ctx); 947 for (i = 0; i < MD5_DIGEST_LENGTH; i++) { 948 buf[2*i] = hex[digest[i] >> 4]; 949 buf[2*i+1] = hex[digest[i] & 0x0f]; 950 } 951 buf[MD5_DIGEST_LENGTH * 2] = '\0'; 952 return buf; 953 } 954 955 static char * 956 random_fromfile(const char *filename) 957 { 958 MD5_CTX ctx; 959 FILE* fp; 960 unsigned char buffer[4096]; 961 struct stat st; 962 off_t size; 963 int bytes; 964 965 fp = openfile(filename, "r"); 966 if (fp == NULL) 967 return NULL; 968 if (fstat(fileno(fp), &st) < 0) { 969 bytes = -1; 970 goto err; 971 } 972 973 MD5_Init(&ctx); 974 size = st.st_size; 975 bytes = 0; 976 while (size > 0 && (bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { 977 MD5_Update(&ctx, buffer, bytes); 978 size -= bytes; 979 } 980 981 err: 982 closefile(fp, NULL); 983 if (bytes < 0) 984 return NULL; 985 986 return (random_md5end(&ctx)); 987 } 988 989 static void 990 set_random_seed(void) 991 { 992 if (need_random) { 993 994 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { 995 FILE* fseed; 996 MD5_CTX ctx; 997 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; 998 size_t sz = 0; 999 1000 fseed = openfile(random_source, "r"); 1001 while (!feof(fseed)) { 1002 int cr; 1003 1004 cr = fgetc(fseed); 1005 if (cr == EOF) 1006 break; 1007 1008 rsd[sz++] = (char) cr; 1009 1010 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) 1011 break; 1012 } 1013 1014 closefile(fseed, random_source); 1015 1016 MD5_Init(&ctx); 1017 MD5_Update(&ctx, rsd, sz); 1018 1019 random_seed = random_md5end(&ctx); 1020 random_seed_size = strlen(random_seed); 1021 1022 } else { 1023 MD5_CTX ctx; 1024 char *b; 1025 1026 MD5_Init(&ctx); 1027 b = random_fromfile(random_source); 1028 if (b == NULL) 1029 err(2, NULL); 1030 1031 random_seed = b; 1032 random_seed_size = strlen(b); 1033 } 1034 1035 MD5_Init(&md5_ctx); 1036 if(random_seed_size>0) { 1037 MD5_Update(&md5_ctx, random_seed, random_seed_size); 1038 } 1039 } 1040 } 1041 #endif 1042 1043 /* 1044 * Main function. 1045 */ 1046 int 1047 main(int argc, char **argv) 1048 { 1049 char *outfile, *real_outfile; 1050 int c, result; 1051 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 1052 { false, false, false, false, false, false }; 1053 1054 result = 0; 1055 outfile = sort_strdup("-"); 1056 real_outfile = NULL; 1057 1058 struct sort_mods *sm = &default_sort_mods_object; 1059 1060 init_tmp_files(); 1061 1062 set_signal_handler(); 1063 1064 set_hw_params(); 1065 set_locale(); 1066 set_tmpdir(); 1067 set_sort_opts(); 1068 1069 fix_obsolete_keys(&argc, argv); 1070 1071 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1072 != -1)) { 1073 1074 check_mutually_exclusive_flags(c, mef_flags); 1075 1076 if (!set_sort_modifier(sm, c)) { 1077 1078 switch (c) { 1079 case 'c': 1080 sort_opts_vals.cflag = true; 1081 if (optarg) { 1082 if (!strcmp(optarg, "diagnose-first")) 1083 ; 1084 else if (!strcmp(optarg, "silent") || 1085 !strcmp(optarg, "quiet")) 1086 sort_opts_vals.csilentflag = true; 1087 else if (*optarg) 1088 unknown(optarg); 1089 } 1090 break; 1091 case 'C': 1092 sort_opts_vals.cflag = true; 1093 sort_opts_vals.csilentflag = true; 1094 break; 1095 case 'k': 1096 { 1097 sort_opts_vals.complex_sort = true; 1098 sort_opts_vals.kflag = true; 1099 1100 keys_num++; 1101 keys = sort_realloc(keys, keys_num * 1102 sizeof(struct key_specs)); 1103 memset(&(keys[keys_num - 1]), 0, 1104 sizeof(struct key_specs)); 1105 1106 if (parse_k(optarg, &(keys[keys_num - 1])) 1107 < 0) { 1108 errc(2, EINVAL, "-k %s", optarg); 1109 } 1110 1111 break; 1112 } 1113 case 'm': 1114 sort_opts_vals.mflag = true; 1115 break; 1116 case 'o': 1117 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1118 strcpy(outfile, optarg); 1119 break; 1120 case 's': 1121 sort_opts_vals.sflag = true; 1122 break; 1123 case 'S': 1124 available_free_memory = 1125 parse_memory_buffer_value(optarg); 1126 break; 1127 case 'T': 1128 tmpdir = sort_strdup(optarg); 1129 break; 1130 case 't': 1131 while (strlen(optarg) > 1) { 1132 if (optarg[0] != '\\') { 1133 errc(2, EINVAL, "%s", optarg); 1134 } 1135 optarg += 1; 1136 if (*optarg == '0') { 1137 *optarg = 0; 1138 break; 1139 } 1140 } 1141 sort_opts_vals.tflag = true; 1142 sort_opts_vals.field_sep = btowc(optarg[0]); 1143 if (sort_opts_vals.field_sep == WEOF) { 1144 errno = EINVAL; 1145 err(2, NULL); 1146 } 1147 if (!gnusort_numeric_compatibility) { 1148 if (symbol_decimal_point == sort_opts_vals.field_sep) 1149 symbol_decimal_point = WEOF; 1150 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1151 symbol_thousands_sep = WEOF; 1152 if (symbol_negative_sign == sort_opts_vals.field_sep) 1153 symbol_negative_sign = WEOF; 1154 if (symbol_positive_sign == sort_opts_vals.field_sep) 1155 symbol_positive_sign = WEOF; 1156 } 1157 break; 1158 case 'u': 1159 sort_opts_vals.uflag = true; 1160 /* stable sort for the correct unique val */ 1161 sort_opts_vals.sflag = true; 1162 break; 1163 case 'z': 1164 sort_opts_vals.zflag = true; 1165 break; 1166 case SORT_OPT: 1167 if (optarg) { 1168 if (!strcmp(optarg, "general-numeric")) 1169 set_sort_modifier(sm, 'g'); 1170 else if (!strcmp(optarg, "human-numeric")) 1171 set_sort_modifier(sm, 'h'); 1172 else if (!strcmp(optarg, "numeric")) 1173 set_sort_modifier(sm, 'n'); 1174 else if (!strcmp(optarg, "month")) 1175 set_sort_modifier(sm, 'M'); 1176 #if defined(SORT_RANDOM) 1177 else if (!strcmp(optarg, "random")) 1178 set_sort_modifier(sm, 'R'); 1179 #endif 1180 else 1181 unknown(optarg); 1182 } 1183 break; 1184 #if defined(SORT_THREADS) 1185 case PARALLEL_OPT: 1186 nthreads = (size_t)(atoi(optarg)); 1187 if (nthreads < 1) 1188 nthreads = 1; 1189 if (nthreads > 1024) 1190 nthreads = 1024; 1191 break; 1192 #endif 1193 case QSORT_OPT: 1194 sort_opts_vals.sort_method = SORT_QSORT; 1195 break; 1196 case MERGESORT_OPT: 1197 sort_opts_vals.sort_method = SORT_MERGESORT; 1198 break; 1199 case MMAP_OPT: 1200 use_mmap = true; 1201 break; 1202 case HEAPSORT_OPT: 1203 sort_opts_vals.sort_method = SORT_HEAPSORT; 1204 break; 1205 case RADIXSORT_OPT: 1206 sort_opts_vals.sort_method = SORT_RADIXSORT; 1207 break; 1208 #if defined(SORT_RANDOM) 1209 case RANDOMSOURCE_OPT: 1210 random_source = strdup(optarg); 1211 break; 1212 #endif 1213 case COMPRESSPROGRAM_OPT: 1214 compress_program = strdup(optarg); 1215 break; 1216 case FF_OPT: 1217 read_fns_from_file0(optarg); 1218 break; 1219 case BS_OPT: 1220 { 1221 errno = 0; 1222 long mof = strtol(optarg, NULL, 10); 1223 if (errno != 0) 1224 err(2, "--batch-size"); 1225 if (mof >= 2) 1226 max_open_files = (size_t) mof + 1; 1227 } 1228 break; 1229 case VERSION_OPT: 1230 printf("%s\n", VERSION); 1231 exit(EXIT_SUCCESS); 1232 /* NOTREACHED */ 1233 break; 1234 case DEBUG_OPT: 1235 debug_sort = true; 1236 break; 1237 case HELP_OPT: 1238 usage(false); 1239 /* NOTREACHED */ 1240 break; 1241 default: 1242 usage(true); 1243 /* NOTREACHED */ 1244 } 1245 } 1246 } 1247 1248 argc -= optind; 1249 argv += optind; 1250 1251 #ifndef WITHOUT_NLS 1252 catalog = catopen("sort", NL_CAT_LOCALE); 1253 #endif 1254 1255 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1256 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1257 1258 #ifndef WITHOUT_NLS 1259 catclose(catalog); 1260 #endif 1261 1262 if (keys_num == 0) { 1263 keys_num = 1; 1264 keys = sort_realloc(keys, sizeof(struct key_specs)); 1265 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1266 keys[0].c1 = 1; 1267 keys[0].pos1b = default_sort_mods->bflag; 1268 keys[0].pos2b = default_sort_mods->bflag; 1269 memcpy(&(keys[0].sm), default_sort_mods, 1270 sizeof(struct sort_mods)); 1271 } 1272 1273 for (size_t i = 0; i < keys_num; i++) { 1274 struct key_specs *ks; 1275 1276 ks = &(keys[i]); 1277 1278 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1279 !(ks->pos2b)) { 1280 ks->pos1b = sm->bflag; 1281 ks->pos2b = sm->bflag; 1282 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1283 } 1284 1285 ks->sm.func = get_sort_func(&(ks->sm)); 1286 } 1287 1288 if (argv_from_file0) { 1289 argc = argc_from_file0; 1290 argv = argv_from_file0; 1291 } 1292 1293 if (debug_sort) { 1294 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1295 #if defined(SORT_THREADS) 1296 printf("Number of CPUs: %d\n",(int)ncpu); 1297 nthreads = 1; 1298 #endif 1299 printf("Using collate rules of %s locale\n", 1300 setlocale(LC_COLLATE, NULL)); 1301 if (byte_sort) 1302 printf("Byte sort is used\n"); 1303 if (print_symbols_on_debug) { 1304 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1305 if (symbol_thousands_sep) 1306 printf("Thousands separator: <%lc>\n", 1307 symbol_thousands_sep); 1308 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1309 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1310 } 1311 } 1312 1313 #if defined(SORT_RANDOM) 1314 set_random_seed(); 1315 #endif 1316 1317 /* Case when the outfile equals one of the input files: */ 1318 if (strcmp(outfile, "-")) { 1319 1320 for(int i = 0; i < argc; ++i) { 1321 if (strcmp(argv[i], outfile) == 0) { 1322 real_outfile = sort_strdup(outfile); 1323 for(;;) { 1324 char* tmp = sort_malloc(strlen(outfile) + 1325 strlen(".tmp") + 1); 1326 1327 strcpy(tmp, outfile); 1328 strcpy(tmp + strlen(tmp), ".tmp"); 1329 sort_free(outfile); 1330 outfile = tmp; 1331 if (access(outfile, F_OK) < 0) 1332 break; 1333 } 1334 tmp_file_atexit(outfile); 1335 } 1336 } 1337 } 1338 1339 #if defined(SORT_THREADS) 1340 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1341 nthreads = 1; 1342 #endif 1343 1344 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1345 struct file_list fl; 1346 struct sort_list list; 1347 1348 sort_list_init(&list); 1349 file_list_init(&fl, true); 1350 1351 if (argc < 1) 1352 procfile("-", &list, &fl); 1353 else { 1354 while (argc > 0) { 1355 procfile(*argv, &list, &fl); 1356 --argc; 1357 ++argv; 1358 } 1359 } 1360 1361 if (fl.count < 1) 1362 sort_list_to_file(&list, outfile); 1363 else { 1364 if (list.count > 0) { 1365 char *flast = new_tmp_file_name(); 1366 1367 sort_list_to_file(&list, flast); 1368 file_list_add(&fl, flast, false); 1369 } 1370 merge_files(&fl, outfile); 1371 } 1372 1373 file_list_clean(&fl); 1374 1375 /* 1376 * We are about to exit the program, so we can ignore 1377 * the clean-up for speed 1378 * 1379 * sort_list_clean(&list); 1380 */ 1381 1382 } else if (sort_opts_vals.cflag) { 1383 result = (argc == 0) ? (check("-")) : (check(*argv)); 1384 } else if (sort_opts_vals.mflag) { 1385 struct file_list fl; 1386 1387 file_list_init(&fl, false); 1388 file_list_populate(&fl, argc, argv, true); 1389 merge_files(&fl, outfile); 1390 file_list_clean(&fl); 1391 } 1392 1393 if (real_outfile) { 1394 unlink(real_outfile); 1395 if (rename(outfile, real_outfile) < 0) 1396 err(2, NULL); 1397 sort_free(real_outfile); 1398 } 1399 1400 sort_free(outfile); 1401 1402 return (result); 1403 } 1404