1 /*- 2 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 3 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: head/usr.bin/sort/sort.c 281182 2015-04-07 01:17:49Z pfg $ 28 */ 29 30 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #if defined(SORT_RANDOM) 41 #include <md5.h> 42 #endif 43 #include <regex.h> 44 #include <signal.h> 45 #include <stdbool.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <unistd.h> 50 #include <wchar.h> 51 #include <wctype.h> 52 53 #include "coll.h" 54 #include "file.h" 55 #include "sort.h" 56 57 #ifndef WITHOUT_NLS 58 #include <nl_types.h> 59 nl_catd catalog; 60 #endif 61 62 #if defined(SORT_RANDOM) 63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz" 64 #else 65 #define OPTIONS "bcCdfghik:Mmno:rsS:t:T:uVz" 66 #endif 67 68 #if defined(SORT_RANDOM) 69 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random") 70 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024) 71 72 static bool need_random; 73 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE; 74 static const void *random_seed; 75 static size_t random_seed_size; 76 77 MD5_CTX md5_ctx; 78 #endif 79 80 /* 81 * Default messages to use when NLS is disabled or no catalogue 82 * is found. 83 */ 84 const char *nlsstr[] = { "", 85 /* 1*/"mutually exclusive flags", 86 /* 2*/"extra argument not allowed with -c", 87 /* 3*/"Unknown feature", 88 /* 4*/"Wrong memory buffer specification", 89 /* 5*/"0 field in key specs", 90 /* 6*/"0 column in key specs", 91 /* 7*/"Wrong file mode", 92 /* 8*/"Cannot open file for reading", 93 /* 9*/"Radix sort cannot be used with these sort options", 94 /*10*/"The chosen sort method cannot be used with stable and/or unique sort", 95 /*11*/"Invalid key position", 96 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] " 97 "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] " 98 "[-o outfile] [--batch-size size] [--files0-from file] " 99 "[--heapsort] [--mergesort] [--radixsort] [--qsort] " 100 "[--mmap] " 101 #if defined(SORT_THREADS) 102 "[--parallel thread_no] " 103 #endif 104 "[--human-numeric-sort] " 105 #if defined(SORT_RANDOM) 106 "[--version-sort] [--random-sort [--random-source file]] " 107 #else 108 "[--version-sort] " 109 #endif 110 "[--compress-program program] [file ...]\n" }; 111 112 struct sort_opts sort_opts_vals; 113 114 bool debug_sort; 115 bool need_hint; 116 117 #if defined(SORT_THREADS) 118 unsigned int ncpu = 1; 119 size_t nthreads = 1; 120 #endif 121 122 static bool gnusort_numeric_compatibility; 123 124 static struct sort_mods default_sort_mods_object; 125 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 126 127 static bool print_symbols_on_debug; 128 129 /* 130 * Arguments from file (when file0-from option is used: 131 */ 132 static size_t argc_from_file0 = (size_t)-1; 133 static char **argv_from_file0; 134 135 /* 136 * Placeholder symbols for options which have no single-character equivalent 137 */ 138 enum 139 { 140 SORT_OPT = CHAR_MAX + 1, 141 HELP_OPT, 142 FF_OPT, 143 BS_OPT, 144 VERSION_OPT, 145 DEBUG_OPT, 146 #if defined(SORT_THREADS) 147 PARALLEL_OPT, 148 #endif 149 #if defined(SORT_RANDOM) 150 RANDOMSOURCE_OPT, 151 #endif 152 COMPRESSPROGRAM_OPT, 153 QSORT_OPT, 154 MERGESORT_OPT, 155 HEAPSORT_OPT, 156 RADIXSORT_OPT, 157 MMAP_OPT 158 }; 159 160 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 161 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 162 163 static struct option long_options[] = { 164 { "batch-size", required_argument, NULL, BS_OPT }, 165 { "buffer-size", required_argument, NULL, 'S' }, 166 { "check", optional_argument, NULL, 'c' }, 167 { "check=silent|quiet", optional_argument, NULL, 'C' }, 168 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 169 { "debug", no_argument, NULL, DEBUG_OPT }, 170 { "dictionary-order", no_argument, NULL, 'd' }, 171 { "field-separator", required_argument, NULL, 't' }, 172 { "files0-from", required_argument, NULL, FF_OPT }, 173 { "general-numeric-sort", no_argument, NULL, 'g' }, 174 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 175 { "help",no_argument, NULL, HELP_OPT }, 176 { "human-numeric-sort", no_argument, NULL, 'h' }, 177 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 178 { "ignore-case", no_argument, NULL, 'f' }, 179 { "ignore-nonprinting", no_argument, NULL, 'i' }, 180 { "key", required_argument, NULL, 'k' }, 181 { "merge", no_argument, NULL, 'm' }, 182 { "mergesort", no_argument, NULL, MERGESORT_OPT }, 183 { "mmap", no_argument, NULL, MMAP_OPT }, 184 { "month-sort", no_argument, NULL, 'M' }, 185 { "numeric-sort", no_argument, NULL, 'n' }, 186 { "output", required_argument, NULL, 'o' }, 187 #if defined(SORT_THREADS) 188 { "parallel", required_argument, NULL, PARALLEL_OPT }, 189 #endif 190 { "qsort", no_argument, NULL, QSORT_OPT }, 191 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 192 #if defined(SORT_RANDOM) 193 { "random-sort", no_argument, NULL, 'R' }, 194 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 195 #endif 196 { "reverse", no_argument, NULL, 'r' }, 197 { "sort", required_argument, NULL, SORT_OPT }, 198 { "stable", no_argument, NULL, 's' }, 199 { "temporary-directory",required_argument, NULL, 'T' }, 200 { "unique", no_argument, NULL, 'u' }, 201 { "version", no_argument, NULL, VERSION_OPT }, 202 { "version-sort",no_argument, NULL, 'V' }, 203 { "zero-terminated", no_argument, NULL, 'z' }, 204 { NULL, no_argument, NULL, 0 } 205 }; 206 207 void fix_obsolete_keys(int *argc, char **argv); 208 209 /* 210 * Check where sort modifier is present 211 */ 212 static bool 213 sort_modifier_empty(struct sort_mods *sm) 214 { 215 216 if (sm == NULL) 217 return (true); 218 return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 219 #ifdef SORT_RANDOM 220 sm->Rflag || 221 #endif 222 sm->rflag || sm->hflag || sm->dflag || sm->fflag)); 223 } 224 225 /* 226 * Print out usage text. 227 */ 228 static void 229 usage(bool opt_err) 230 { 231 // struct option *o; 232 FILE *out; 233 234 out = stdout; 235 // o = &(long_options[0]); 236 237 if (opt_err) 238 out = stderr; 239 fprintf(out, getstr(12), getprogname()); 240 if (opt_err) 241 exit(2); 242 exit(0); 243 } 244 245 /* 246 * Read input file names from a file (file0-from option). 247 */ 248 static void 249 read_fns_from_file0(const char *fn) 250 { 251 FILE *f; 252 char *line = NULL; 253 size_t linesize = 0; 254 ssize_t linelen; 255 256 if (fn == NULL) 257 return; 258 259 f = fopen(fn, "r"); 260 if (f == NULL) 261 err(2, "%s", fn); 262 263 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 264 if (*line != '\0') { 265 if (argc_from_file0 == (size_t) - 1) 266 argc_from_file0 = 0; 267 ++argc_from_file0; 268 argv_from_file0 = sort_realloc(argv_from_file0, 269 argc_from_file0 * sizeof(char *)); 270 if (argv_from_file0 == NULL) 271 err(2, NULL); 272 argv_from_file0[argc_from_file0 - 1] = line; 273 } else { 274 free(line); 275 } 276 line = NULL; 277 linesize = 0; 278 } 279 if (ferror(f)) 280 err(2, "%s: getdelim", fn); 281 282 closefile(f, fn); 283 } 284 285 /* 286 * Check how much RAM is available for the sort. 287 */ 288 static void 289 set_hw_params(void) 290 { 291 long pages, psize; 292 293 pages = psize = 0; 294 295 #if defined(SORT_THREADS) 296 ncpu = 1; 297 #endif 298 299 pages = sysconf(_SC_PHYS_PAGES); 300 if (pages < 1) { 301 perror("sysconf pages"); 302 pages = 1; 303 } 304 psize = sysconf(_SC_PAGESIZE); 305 if (psize < 1) { 306 perror("sysconf psize"); 307 psize = 4096; 308 } 309 #if defined(SORT_THREADS) 310 ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN); 311 if (ncpu < 1) 312 ncpu = 1; 313 else if(ncpu > 32) 314 ncpu = 32; 315 316 nthreads = ncpu; 317 #endif 318 319 free_memory = (unsigned long long) pages * (unsigned long long) psize; 320 available_free_memory = free_memory / 2; 321 322 if (available_free_memory < 1024) 323 available_free_memory = 1024; 324 } 325 326 /* 327 * Convert "plain" symbol to wide symbol, with default value. 328 */ 329 static void 330 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 331 { 332 333 if (wc && c) { 334 int res; 335 336 res = mbtowc(wc, c, MB_CUR_MAX); 337 if (res < 1) 338 *wc = def; 339 } 340 } 341 342 /* 343 * Set current locale symbols. 344 */ 345 static void 346 set_locale(void) 347 { 348 struct lconv *lc; 349 const char *locale; 350 351 setlocale(LC_ALL, ""); 352 353 lc = localeconv(); 354 355 if (lc) { 356 /* obtain LC_NUMERIC info */ 357 /* Convert to wide char form */ 358 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 359 symbol_decimal_point); 360 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 361 symbol_thousands_sep); 362 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 363 symbol_positive_sign); 364 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 365 symbol_negative_sign); 366 } 367 368 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 369 gnusort_numeric_compatibility = true; 370 371 locale = setlocale(LC_COLLATE, NULL); 372 373 if (locale) { 374 char *tmpl; 375 const char *cclocale; 376 377 tmpl = sort_strdup(locale); 378 cclocale = setlocale(LC_COLLATE, "C"); 379 if (cclocale && !strcmp(cclocale, tmpl)) 380 byte_sort = true; 381 else { 382 const char *pclocale; 383 384 pclocale = setlocale(LC_COLLATE, "POSIX"); 385 if (pclocale && !strcmp(pclocale, tmpl)) 386 byte_sort = true; 387 } 388 setlocale(LC_COLLATE, tmpl); 389 sort_free(tmpl); 390 } 391 } 392 393 /* 394 * Set directory temporary files. 395 */ 396 static void 397 set_tmpdir(void) 398 { 399 char *td; 400 401 td = getenv("TMPDIR"); 402 if (td != NULL) 403 tmpdir = sort_strdup(td); 404 } 405 406 /* 407 * Parse -S option. 408 */ 409 static unsigned long long 410 parse_memory_buffer_value(const char *value) 411 { 412 413 if (value == NULL) 414 return (available_free_memory); 415 else { 416 char *endptr; 417 unsigned long long membuf; 418 419 endptr = NULL; 420 errno = 0; 421 membuf = strtoll(value, &endptr, 10); 422 423 if (errno != 0) { 424 warn("%s",getstr(4)); 425 membuf = available_free_memory; 426 } else { 427 switch (*endptr){ 428 case 'Y': 429 membuf *= 1024; 430 /* FALLTHROUGH */ 431 case 'Z': 432 membuf *= 1024; 433 /* FALLTHROUGH */ 434 case 'E': 435 membuf *= 1024; 436 /* FALLTHROUGH */ 437 case 'P': 438 membuf *= 1024; 439 /* FALLTHROUGH */ 440 case 'T': 441 membuf *= 1024; 442 /* FALLTHROUGH */ 443 case 'G': 444 membuf *= 1024; 445 /* FALLTHROUGH */ 446 case 'M': 447 membuf *= 1024; 448 /* FALLTHROUGH */ 449 case '\0': 450 case 'K': 451 membuf *= 1024; 452 /* FALLTHROUGH */ 453 case 'b': 454 break; 455 case '%': 456 membuf = (available_free_memory * membuf) / 457 100; 458 break; 459 default: 460 warnc(EINVAL, "%s", optarg); 461 membuf = available_free_memory; 462 } 463 } 464 return (membuf); 465 } 466 } 467 468 /* 469 * Signal handler that clears the temporary files. 470 */ 471 static void 472 sig_handler(int sig __unused, siginfo_t *siginfo __unused, 473 void *context __unused) 474 { 475 476 clear_tmp_files(); 477 exit(-1); 478 } 479 480 /* 481 * Set signal handler on panic signals. 482 */ 483 static void 484 set_signal_handler(void) 485 { 486 struct sigaction sa; 487 488 memset(&sa, 0, sizeof(sa)); 489 sa.sa_sigaction = &sig_handler; 490 sa.sa_flags = SA_SIGINFO; 491 492 if (sigaction(SIGTERM, &sa, NULL) < 0) { 493 perror("sigaction"); 494 return; 495 } 496 if (sigaction(SIGHUP, &sa, NULL) < 0) { 497 perror("sigaction"); 498 return; 499 } 500 if (sigaction(SIGINT, &sa, NULL) < 0) { 501 perror("sigaction"); 502 return; 503 } 504 if (sigaction(SIGQUIT, &sa, NULL) < 0) { 505 perror("sigaction"); 506 return; 507 } 508 if (sigaction(SIGABRT, &sa, NULL) < 0) { 509 perror("sigaction"); 510 return; 511 } 512 if (sigaction(SIGBUS, &sa, NULL) < 0) { 513 perror("sigaction"); 514 return; 515 } 516 if (sigaction(SIGSEGV, &sa, NULL) < 0) { 517 perror("sigaction"); 518 return; 519 } 520 if (sigaction(SIGUSR1, &sa, NULL) < 0) { 521 perror("sigaction"); 522 return; 523 } 524 if (sigaction(SIGUSR2, &sa, NULL) < 0) { 525 perror("sigaction"); 526 return; 527 } 528 } 529 530 /* 531 * Print "unknown" message and exit with status 2. 532 */ 533 static void 534 unknown(const char *what) 535 { 536 537 errx(2, "%s: %s", getstr(3), what); 538 } 539 540 /* 541 * Check whether contradictory input options are used. 542 */ 543 static void 544 check_mutually_exclusive_flags(char c, bool *mef_flags) 545 { 546 int fo_index, mec; 547 bool found_others, found_this; 548 549 found_others = found_this = false; 550 fo_index = 0; 551 552 for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 553 mec = mutually_exclusive_flags[i]; 554 555 if (mec != c) { 556 if (mef_flags[i]) { 557 if (found_this) 558 errx(1, "%c:%c: %s", c, mec, getstr(1)); 559 found_others = true; 560 fo_index = i; 561 } 562 } else { 563 if (found_others) 564 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1)); 565 mef_flags[i] = true; 566 found_this = true; 567 } 568 } 569 } 570 571 /* 572 * Initialise sort opts data. 573 */ 574 static void 575 set_sort_opts(void) 576 { 577 578 memset(&default_sort_mods_object, 0, 579 sizeof(default_sort_mods_object)); 580 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 581 default_sort_mods_object.func = 582 get_sort_func(&default_sort_mods_object); 583 } 584 585 /* 586 * Set a sort modifier on a sort modifiers object. 587 */ 588 static bool 589 set_sort_modifier(struct sort_mods *sm, int c) 590 { 591 592 if (sm) { 593 switch (c){ 594 case 'b': 595 sm->bflag = true; 596 break; 597 case 'd': 598 sm->dflag = true; 599 break; 600 case 'f': 601 sm->fflag = true; 602 break; 603 case 'g': 604 sm->gflag = true; 605 need_hint = true; 606 break; 607 case 'i': 608 sm->iflag = true; 609 break; 610 #ifdef SORT_RANDOM 611 case 'R': 612 sm->Rflag = true; 613 need_random = true; 614 break; 615 #endif 616 case 'M': 617 initialise_months(); 618 sm->Mflag = true; 619 need_hint = true; 620 break; 621 case 'n': 622 sm->nflag = true; 623 need_hint = true; 624 print_symbols_on_debug = true; 625 break; 626 case 'r': 627 sm->rflag = true; 628 break; 629 case 'V': 630 sm->Vflag = true; 631 break; 632 case 'h': 633 sm->hflag = true; 634 need_hint = true; 635 print_symbols_on_debug = true; 636 break; 637 default: 638 return false; 639 } 640 sort_opts_vals.complex_sort = true; 641 sm->func = get_sort_func(sm); 642 } 643 return (true); 644 } 645 646 /* 647 * Parse POS in -k option. 648 */ 649 static int 650 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 651 { 652 regmatch_t pmatch[4]; 653 regex_t re; 654 char *c, *f; 655 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 656 size_t len, nmatch; 657 int ret; 658 659 ret = -1; 660 nmatch = 4; 661 c = f = NULL; 662 663 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 664 return (-1); 665 666 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 667 goto end; 668 669 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 670 goto end; 671 672 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 673 goto end; 674 675 len = pmatch[1].rm_eo - pmatch[1].rm_so; 676 f = sort_malloc((len + 1) * sizeof(char)); 677 678 strncpy(f, s + pmatch[1].rm_so, len); 679 f[len] = '\0'; 680 681 if (second) { 682 errno = 0; 683 ks->f2 = (size_t) strtoul(f, NULL, 10); 684 if (errno != 0) 685 err(2, "-k"); 686 if (ks->f2 == 0) { 687 warn("%s",getstr(5)); 688 goto end; 689 } 690 } else { 691 errno = 0; 692 ks->f1 = (size_t) strtoul(f, NULL, 10); 693 if (errno != 0) 694 err(2, "-k"); 695 if (ks->f1 == 0) { 696 warn("%s",getstr(5)); 697 goto end; 698 } 699 } 700 701 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 702 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 703 c = sort_malloc((len + 1) * sizeof(char)); 704 705 strncpy(c, s + pmatch[2].rm_so + 1, len); 706 c[len] = '\0'; 707 708 if (second) { 709 errno = 0; 710 ks->c2 = (size_t) strtoul(c, NULL, 10); 711 if (errno != 0) 712 err(2, "-k"); 713 } else { 714 errno = 0; 715 ks->c1 = (size_t) strtoul(c, NULL, 10); 716 if (errno != 0) 717 err(2, "-k"); 718 if (ks->c1 == 0) { 719 warn("%s",getstr(6)); 720 goto end; 721 } 722 } 723 } else { 724 if (second) 725 ks->c2 = 0; 726 else 727 ks->c1 = 1; 728 } 729 730 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 731 regoff_t i = 0; 732 733 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 734 check_mutually_exclusive_flags(s[i], mef_flags); 735 if (s[i] == 'b') { 736 if (second) 737 ks->pos2b = true; 738 else 739 ks->pos1b = true; 740 } else if (!set_sort_modifier(&(ks->sm), s[i])) 741 goto end; 742 } 743 } 744 745 ret = 0; 746 747 end: 748 749 if (c) 750 sort_free(c); 751 if (f) 752 sort_free(f); 753 regfree(&re); 754 755 return (ret); 756 } 757 758 /* 759 * Parse -k option value. 760 */ 761 static int 762 parse_k(const char *s, struct key_specs *ks) 763 { 764 int ret = -1; 765 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 766 { false, false, false, false, false, false }; 767 768 if (s && *s) { 769 char *sptr; 770 771 sptr = strchr(s, ','); 772 if (sptr) { 773 size_t size1; 774 char *pos1, *pos2; 775 776 size1 = sptr - s; 777 778 if (size1 < 1) 779 return (-1); 780 pos1 = sort_malloc((size1 + 1) * sizeof(char)); 781 782 strncpy(pos1, s, size1); 783 pos1[size1] = '\0'; 784 785 ret = parse_pos(pos1, ks, mef_flags, false); 786 787 sort_free(pos1); 788 if (ret < 0) 789 return (ret); 790 791 pos2 = sort_strdup(sptr + 1); 792 ret = parse_pos(pos2, ks, mef_flags, true); 793 sort_free(pos2); 794 } else 795 ret = parse_pos(s, ks, mef_flags, false); 796 } 797 798 return (ret); 799 } 800 801 /* 802 * Parse POS in +POS -POS option. 803 */ 804 static int 805 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts) 806 { 807 regex_t re; 808 regmatch_t pmatch[4]; 809 char *c, *f; 810 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 811 int ret; 812 size_t len, nmatch; 813 814 ret = -1; 815 nmatch = 4; 816 c = f = NULL; 817 *nc = *nf = 0; 818 819 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 820 return (-1); 821 822 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 823 goto end; 824 825 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 826 goto end; 827 828 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 829 goto end; 830 831 len = pmatch[1].rm_eo - pmatch[1].rm_so; 832 f = sort_malloc((len + 1) * sizeof(char)); 833 834 strncpy(f, s + pmatch[1].rm_so, len); 835 f[len] = '\0'; 836 837 errno = 0; 838 *nf = (size_t) strtoul(f, NULL, 10); 839 if (errno != 0) 840 errx(2, "%s", getstr(11)); 841 842 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 843 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 844 c = sort_malloc((len + 1) * sizeof(char)); 845 846 strncpy(c, s + pmatch[2].rm_so + 1, len); 847 c[len] = '\0'; 848 849 errno = 0; 850 *nc = (size_t) strtoul(c, NULL, 10); 851 if (errno != 0) 852 errx(2, "%s", getstr(11)); 853 } 854 855 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 856 857 len = pmatch[3].rm_eo - pmatch[3].rm_so; 858 859 strncpy(sopts, s + pmatch[3].rm_so, len); 860 sopts[len] = '\0'; 861 } 862 863 ret = 0; 864 865 end: 866 if (c) 867 sort_free(c); 868 if (f) 869 sort_free(f); 870 regfree(&re); 871 872 return (ret); 873 } 874 875 /* 876 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 877 */ 878 void 879 fix_obsolete_keys(int *argc, char **argv) 880 { 881 char sopt[129]; 882 883 for (int i = 1; i < *argc; i++) { 884 char *arg1; 885 886 arg1 = argv[i]; 887 888 if (strlen(arg1) > 1 && arg1[0] == '+') { 889 int c1, f1; 890 char sopts1[128]; 891 892 sopts1[0] = 0; 893 c1 = f1 = 0; 894 895 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0) 896 continue; 897 else { 898 f1 += 1; 899 c1 += 1; 900 if (i + 1 < *argc) { 901 char *arg2 = argv[i + 1]; 902 903 if (strlen(arg2) > 1 && 904 arg2[0] == '-') { 905 int c2, f2; 906 char sopts2[128]; 907 908 sopts2[0] = 0; 909 c2 = f2 = 0; 910 911 if (parse_pos_obs(arg2 + 1, 912 &f2, &c2, sopts2) >= 0) { 913 if (c2 > 0) 914 f2 += 1; 915 sprintf(sopt, "-k%d.%d%s,%d.%d%s", 916 f1, c1, sopts1, f2, c2, sopts2); 917 argv[i] = sort_strdup(sopt); 918 for (int j = i + 1; j + 1 < *argc; j++) 919 argv[j] = argv[j + 1]; 920 *argc -= 1; 921 continue; 922 } 923 } 924 } 925 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1); 926 argv[i] = sort_strdup(sopt); 927 } 928 } 929 } 930 } 931 932 /* 933 * Set random seed 934 */ 935 #if defined(SORT_RANDOM) 936 static void 937 set_random_seed(void) 938 { 939 if (need_random) { 940 941 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) { 942 FILE* fseed; 943 MD5_CTX ctx; 944 char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE]; 945 size_t sz = 0; 946 947 fseed = openfile(random_source, "r"); 948 while (!feof(fseed)) { 949 int cr; 950 951 cr = fgetc(fseed); 952 if (cr == EOF) 953 break; 954 955 rsd[sz++] = (char) cr; 956 957 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE) 958 break; 959 } 960 961 closefile(fseed, random_source); 962 963 MD5Init(&ctx); 964 MD5Update(&ctx, rsd, sz); 965 966 random_seed = MD5End(&ctx, NULL); 967 random_seed_size = strlen(random_seed); 968 969 } else { 970 MD5_CTX ctx; 971 char *b; 972 973 MD5Init(&ctx); 974 b = MD5File(random_source, NULL); 975 if (b == NULL) 976 err(2, NULL); 977 978 random_seed = b; 979 random_seed_size = strlen(b); 980 } 981 982 MD5Init(&md5_ctx); 983 if(random_seed_size>0) { 984 MD5Update(&md5_ctx, random_seed, random_seed_size); 985 } 986 } 987 } 988 #endif 989 990 /* 991 * Main function. 992 */ 993 int 994 main(int argc, char **argv) 995 { 996 char *outfile, *real_outfile; 997 int c, result; 998 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 999 { false, false, false, false, false, false }; 1000 1001 result = 0; 1002 outfile = sort_strdup("-"); 1003 real_outfile = NULL; 1004 1005 struct sort_mods *sm = &default_sort_mods_object; 1006 1007 init_tmp_files(); 1008 1009 set_signal_handler(); 1010 1011 set_hw_params(); 1012 set_locale(); 1013 set_tmpdir(); 1014 set_sort_opts(); 1015 1016 fix_obsolete_keys(&argc, argv); 1017 1018 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 1019 != -1)) { 1020 1021 check_mutually_exclusive_flags(c, mef_flags); 1022 1023 if (!set_sort_modifier(sm, c)) { 1024 1025 switch (c) { 1026 case 'c': 1027 sort_opts_vals.cflag = true; 1028 if (optarg) { 1029 if (!strcmp(optarg, "diagnose-first")) 1030 ; 1031 else if (!strcmp(optarg, "silent") || 1032 !strcmp(optarg, "quiet")) 1033 sort_opts_vals.csilentflag = true; 1034 else if (*optarg) 1035 unknown(optarg); 1036 } 1037 break; 1038 case 'C': 1039 sort_opts_vals.cflag = true; 1040 sort_opts_vals.csilentflag = true; 1041 break; 1042 case 'k': 1043 { 1044 sort_opts_vals.complex_sort = true; 1045 sort_opts_vals.kflag = true; 1046 1047 keys_num++; 1048 keys = sort_realloc(keys, keys_num * 1049 sizeof(struct key_specs)); 1050 memset(&(keys[keys_num - 1]), 0, 1051 sizeof(struct key_specs)); 1052 1053 if (parse_k(optarg, &(keys[keys_num - 1])) 1054 < 0) { 1055 errc(2, EINVAL, "-k %s", optarg); 1056 } 1057 1058 break; 1059 } 1060 case 'm': 1061 sort_opts_vals.mflag = true; 1062 break; 1063 case 'o': 1064 outfile = sort_realloc(outfile, (strlen(optarg) + 1)); 1065 strcpy(outfile, optarg); 1066 break; 1067 case 's': 1068 sort_opts_vals.sflag = true; 1069 break; 1070 case 'S': 1071 available_free_memory = 1072 parse_memory_buffer_value(optarg); 1073 break; 1074 case 'T': 1075 tmpdir = sort_strdup(optarg); 1076 break; 1077 case 't': 1078 while (strlen(optarg) > 1) { 1079 if (optarg[0] != '\\') { 1080 errc(2, EINVAL, "%s", optarg); 1081 } 1082 optarg += 1; 1083 if (*optarg == '0') { 1084 *optarg = 0; 1085 break; 1086 } 1087 } 1088 sort_opts_vals.tflag = true; 1089 sort_opts_vals.field_sep = btowc(optarg[0]); 1090 if (sort_opts_vals.field_sep == WEOF) { 1091 errno = EINVAL; 1092 err(2, NULL); 1093 } 1094 if (!gnusort_numeric_compatibility) { 1095 if (symbol_decimal_point == sort_opts_vals.field_sep) 1096 symbol_decimal_point = WEOF; 1097 if (symbol_thousands_sep == sort_opts_vals.field_sep) 1098 symbol_thousands_sep = WEOF; 1099 if (symbol_negative_sign == sort_opts_vals.field_sep) 1100 symbol_negative_sign = WEOF; 1101 if (symbol_positive_sign == sort_opts_vals.field_sep) 1102 symbol_positive_sign = WEOF; 1103 } 1104 break; 1105 case 'u': 1106 sort_opts_vals.uflag = true; 1107 /* stable sort for the correct unique val */ 1108 sort_opts_vals.sflag = true; 1109 break; 1110 case 'z': 1111 sort_opts_vals.zflag = true; 1112 break; 1113 case SORT_OPT: 1114 if (optarg) { 1115 if (!strcmp(optarg, "general-numeric")) 1116 set_sort_modifier(sm, 'g'); 1117 else if (!strcmp(optarg, "human-numeric")) 1118 set_sort_modifier(sm, 'h'); 1119 else if (!strcmp(optarg, "numeric")) 1120 set_sort_modifier(sm, 'n'); 1121 else if (!strcmp(optarg, "month")) 1122 set_sort_modifier(sm, 'M'); 1123 #if defined(SORT_RANDOM) 1124 else if (!strcmp(optarg, "random")) 1125 set_sort_modifier(sm, 'R'); 1126 #endif 1127 else 1128 unknown(optarg); 1129 } 1130 break; 1131 #if defined(SORT_THREADS) 1132 case PARALLEL_OPT: 1133 nthreads = (size_t)(atoi(optarg)); 1134 if (nthreads < 1) 1135 nthreads = 1; 1136 if (nthreads > 1024) 1137 nthreads = 1024; 1138 break; 1139 #endif 1140 case QSORT_OPT: 1141 sort_opts_vals.sort_method = SORT_QSORT; 1142 break; 1143 case MERGESORT_OPT: 1144 sort_opts_vals.sort_method = SORT_MERGESORT; 1145 break; 1146 case MMAP_OPT: 1147 use_mmap = true; 1148 break; 1149 case HEAPSORT_OPT: 1150 sort_opts_vals.sort_method = SORT_HEAPSORT; 1151 break; 1152 case RADIXSORT_OPT: 1153 sort_opts_vals.sort_method = SORT_RADIXSORT; 1154 break; 1155 #if defined(SORT_RANDOM) 1156 case RANDOMSOURCE_OPT: 1157 random_source = strdup(optarg); 1158 break; 1159 #endif 1160 case COMPRESSPROGRAM_OPT: 1161 compress_program = strdup(optarg); 1162 break; 1163 case FF_OPT: 1164 read_fns_from_file0(optarg); 1165 break; 1166 case BS_OPT: 1167 { 1168 errno = 0; 1169 long mof = strtol(optarg, NULL, 10); 1170 if (errno != 0) 1171 err(2, "--batch-size"); 1172 if (mof >= 2) 1173 max_open_files = (size_t) mof + 1; 1174 } 1175 break; 1176 case VERSION_OPT: 1177 printf("%s\n", VERSION); 1178 exit(EXIT_SUCCESS); 1179 /* NOTREACHED */ 1180 break; 1181 case DEBUG_OPT: 1182 debug_sort = true; 1183 break; 1184 case HELP_OPT: 1185 usage(false); 1186 /* NOTREACHED */ 1187 break; 1188 default: 1189 usage(true); 1190 /* NOTREACHED */ 1191 } 1192 } 1193 } 1194 1195 argc -= optind; 1196 argv += optind; 1197 1198 #ifndef WITHOUT_NLS 1199 catalog = catopen("sort", NL_CAT_LOCALE); 1200 #endif 1201 1202 if (sort_opts_vals.cflag && sort_opts_vals.mflag) 1203 errx(1, "%c:%c: %s", 'm', 'c', getstr(1)); 1204 1205 #ifndef WITHOUT_NLS 1206 catclose(catalog); 1207 #endif 1208 1209 if (keys_num == 0) { 1210 keys_num = 1; 1211 keys = sort_realloc(keys, sizeof(struct key_specs)); 1212 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1213 keys[0].c1 = 1; 1214 keys[0].pos1b = default_sort_mods->bflag; 1215 keys[0].pos2b = default_sort_mods->bflag; 1216 memcpy(&(keys[0].sm), default_sort_mods, 1217 sizeof(struct sort_mods)); 1218 } 1219 1220 for (size_t i = 0; i < keys_num; i++) { 1221 struct key_specs *ks; 1222 1223 ks = &(keys[i]); 1224 1225 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1226 !(ks->pos2b)) { 1227 ks->pos1b = sm->bflag; 1228 ks->pos2b = sm->bflag; 1229 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1230 } 1231 1232 ks->sm.func = get_sort_func(&(ks->sm)); 1233 } 1234 1235 if (argv_from_file0) { 1236 argc = argc_from_file0; 1237 argv = argv_from_file0; 1238 } 1239 1240 if (debug_sort) { 1241 printf("Memory to be used for sorting: %llu\n",available_free_memory); 1242 #if defined(SORT_THREADS) 1243 printf("Number of CPUs: %d\n",(int)ncpu); 1244 nthreads = 1; 1245 #endif 1246 printf("Using collate rules of %s locale\n", 1247 setlocale(LC_COLLATE, NULL)); 1248 if (byte_sort) 1249 printf("Byte sort is used\n"); 1250 if (print_symbols_on_debug) { 1251 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1252 if (symbol_thousands_sep) 1253 printf("Thousands separator: <%lc>\n", 1254 symbol_thousands_sep); 1255 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1256 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1257 } 1258 } 1259 1260 #if defined(SORT_RANDOM) 1261 set_random_seed(); 1262 #endif 1263 1264 /* Case when the outfile equals one of the input files: */ 1265 if (strcmp(outfile, "-")) { 1266 1267 for(int i = 0; i < argc; ++i) { 1268 if (strcmp(argv[i], outfile) == 0) { 1269 real_outfile = sort_strdup(outfile); 1270 for(;;) { 1271 char* tmp = sort_malloc(strlen(outfile) + 1272 strlen(".tmp") + 1); 1273 1274 strcpy(tmp, outfile); 1275 strcpy(tmp + strlen(tmp), ".tmp"); 1276 sort_free(outfile); 1277 outfile = tmp; 1278 if (access(outfile, F_OK) < 0) 1279 break; 1280 } 1281 tmp_file_atexit(outfile); 1282 } 1283 } 1284 } 1285 1286 #if defined(SORT_THREADS) 1287 if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0)) 1288 nthreads = 1; 1289 #endif 1290 1291 if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) { 1292 struct file_list fl; 1293 struct sort_list list; 1294 1295 sort_list_init(&list); 1296 file_list_init(&fl, true); 1297 1298 if (argc < 1) 1299 procfile("-", &list, &fl); 1300 else { 1301 while (argc > 0) { 1302 procfile(*argv, &list, &fl); 1303 --argc; 1304 ++argv; 1305 } 1306 } 1307 1308 if (fl.count < 1) 1309 sort_list_to_file(&list, outfile); 1310 else { 1311 if (list.count > 0) { 1312 char *flast = new_tmp_file_name(); 1313 1314 sort_list_to_file(&list, flast); 1315 file_list_add(&fl, flast, false); 1316 } 1317 merge_files(&fl, outfile); 1318 } 1319 1320 file_list_clean(&fl); 1321 1322 /* 1323 * We are about to exit the program, so we can ignore 1324 * the clean-up for speed 1325 * 1326 * sort_list_clean(&list); 1327 */ 1328 1329 } else if (sort_opts_vals.cflag) { 1330 result = (argc == 0) ? (check("-")) : (check(*argv)); 1331 } else if (sort_opts_vals.mflag) { 1332 struct file_list fl; 1333 1334 file_list_init(&fl, false); 1335 file_list_populate(&fl, argc, argv, true); 1336 merge_files(&fl, outfile); 1337 file_list_clean(&fl); 1338 } 1339 1340 if (real_outfile) { 1341 unlink(real_outfile); 1342 if (rename(outfile, real_outfile) < 0) 1343 err(2, NULL); 1344 sort_free(real_outfile); 1345 } 1346 1347 sort_free(outfile); 1348 1349 return (result); 1350 } 1351