1 /* $OpenBSD: sort.c,v 1.79 2015/04/05 13:56:04 millert Exp $ */ 2 3 /*- 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/resource.h> 31 #include <sys/stat.h> 32 #include <sys/sysctl.h> 33 #include <sys/types.h> 34 35 #include <err.h> 36 #include <errno.h> 37 #include <getopt.h> 38 #include <limits.h> 39 #include <locale.h> 40 #include <md5.h> 41 #include <regex.h> 42 #include <signal.h> 43 #include <stdbool.h> 44 #include <stdint.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <unistd.h> 49 #include <wchar.h> 50 #include <wctype.h> 51 52 #include "coll.h" 53 #include "file.h" 54 #include "sort.h" 55 56 #ifdef GNUSORT_COMPATIBILITY 57 # define PERMUTE "" 58 #else 59 # define PERMUTE "+" 60 #endif 61 #define OPTIONS PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz" 62 63 static bool need_random; 64 static const char *random_source; 65 66 MD5_CTX md5_ctx; 67 68 struct sort_opts sort_opts_vals; 69 70 bool debug_sort; 71 bool need_hint; 72 73 static bool gnusort_numeric_compatibility; 74 75 static struct sort_mods default_sort_mods_object; 76 struct sort_mods * const default_sort_mods = &default_sort_mods_object; 77 78 static bool print_symbols_on_debug; 79 80 /* 81 * Arguments from file (when file0-from option is used: 82 */ 83 static size_t argc_from_file0 = (size_t)-1; 84 static char **argv_from_file0; 85 86 /* 87 * Placeholder symbols for options which have no single-character equivalent 88 */ 89 enum { 90 SORT_OPT = CHAR_MAX + 1, 91 HELP_OPT, 92 FF_OPT, 93 BS_OPT, 94 VERSION_OPT, 95 DEBUG_OPT, 96 RANDOMSOURCE_OPT, 97 COMPRESSPROGRAM_OPT, 98 QSORT_OPT, 99 HEAPSORT_OPT, 100 RADIXSORT_OPT, 101 MMAP_OPT 102 }; 103 104 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6 105 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' }; 106 107 static const struct option long_options[] = { 108 { "batch-size", required_argument, NULL, BS_OPT }, 109 { "buffer-size", required_argument, NULL, 'S' }, 110 { "check", optional_argument, NULL, 'c' }, 111 { "check=silent|quiet", optional_argument, NULL, 'C' }, 112 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT }, 113 { "debug", no_argument, NULL, DEBUG_OPT }, 114 { "dictionary-order", no_argument, NULL, 'd' }, 115 { "field-separator", required_argument, NULL, 't' }, 116 { "files0-from", required_argument, NULL, FF_OPT }, 117 { "general-numeric-sort", no_argument, NULL, 'g' }, 118 { "heapsort", no_argument, NULL, HEAPSORT_OPT }, 119 { "help", no_argument, NULL, HELP_OPT }, 120 { "human-numeric-sort", no_argument, NULL, 'h' }, 121 { "ignore-leading-blanks", no_argument, NULL, 'b' }, 122 { "ignore-case", no_argument, NULL, 'f' }, 123 { "ignore-nonprinting", no_argument, NULL, 'i' }, 124 { "key", required_argument, NULL, 'k' }, 125 { "merge", no_argument, NULL, 'm' }, 126 { "mergesort", no_argument, NULL, 'H' }, 127 { "mmap", no_argument, NULL, MMAP_OPT }, 128 { "month-sort", no_argument, NULL, 'M' }, 129 { "numeric-sort", no_argument, NULL, 'n' }, 130 { "output", required_argument, NULL, 'o' }, 131 { "qsort", no_argument, NULL, QSORT_OPT }, 132 { "radixsort", no_argument, NULL, RADIXSORT_OPT }, 133 { "random-sort", no_argument, NULL, 'R' }, 134 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT }, 135 { "reverse", no_argument, NULL, 'r' }, 136 { "sort", required_argument, NULL, SORT_OPT }, 137 { "stable", no_argument, NULL, 's' }, 138 { "temporary-directory", required_argument, NULL, 'T' }, 139 { "unique", no_argument, NULL, 'u' }, 140 { "version", no_argument, NULL, VERSION_OPT }, 141 { "version-sort", no_argument, NULL, 'V' }, 142 { "zero-terminated", no_argument, NULL, 'z' }, 143 { NULL, no_argument, NULL, 0 } 144 }; 145 146 /* 147 * Check where sort modifier is present 148 */ 149 static bool 150 sort_modifier_empty(struct sort_mods *sm) 151 { 152 return !(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag || 153 sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag); 154 } 155 156 /* 157 * Print out usage text. 158 */ 159 static __dead void 160 usage(int exit_val) 161 { 162 fprintf(exit_val ? stderr : stdout, 163 "usage: %s [-bCcdfgHhiMmnRrsuVz] [-k field1[,field2]] [-o output] " 164 "[-S size]\n\t[-T dir] [-t char] [file ...]\n", getprogname()); 165 exit(exit_val); 166 } 167 168 /* 169 * Read input file names from a file (file0-from option). 170 */ 171 static void 172 read_fns_from_file0(const char *fn) 173 { 174 FILE *f; 175 char *line = NULL; 176 size_t linesize = 0; 177 ssize_t linelen; 178 179 f = fopen(fn, "r"); 180 if (f == NULL) 181 err(2, "%s", fn); 182 183 while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) { 184 if (*line != '\0') { 185 if (argc_from_file0 == (size_t)-1) 186 argc_from_file0 = 0; 187 ++argc_from_file0; 188 argv_from_file0 = sort_reallocarray(argv_from_file0, 189 argc_from_file0, sizeof(char *)); 190 argv_from_file0[argc_from_file0 - 1] = line; 191 } else { 192 free(line); 193 } 194 line = NULL; 195 linesize = 0; 196 } 197 if (ferror(f)) 198 err(2, "%s: getdelim", fn); 199 200 closefile(f, fn); 201 } 202 203 /* 204 * Check how much RAM is available for the sort. 205 */ 206 static void 207 set_hw_params(void) 208 { 209 unsigned long long free_memory; 210 long long user_memory; 211 struct rlimit rl; 212 size_t len; 213 int mib[] = { CTL_HW, HW_USERMEM64 }; 214 215 /* Get total user (non-kernel) memory. */ 216 len = sizeof(user_memory); 217 if (sysctl(mib, 2, &user_memory, &len, NULL, 0) == -1) 218 user_memory = -1; 219 220 /* Increase our data size to the max */ 221 if (getrlimit(RLIMIT_DATA, &rl) == 0) { 222 free_memory = (unsigned long long)rl.rlim_cur; 223 rl.rlim_cur = rl.rlim_max; 224 if (setrlimit(RLIMIT_DATA, &rl) == 0) { 225 free_memory = (unsigned long long)rl.rlim_max; 226 } else { 227 warn("Can't set resource limit to max data size"); 228 } 229 } else { 230 free_memory = 1000000; 231 warn("Can't get resource limit for data size"); 232 } 233 234 /* We prefer to use temp files rather than swap space. */ 235 if (user_memory != -1 && free_memory > user_memory) 236 free_memory = user_memory; 237 238 available_free_memory = free_memory / 2; 239 } 240 241 /* 242 * Convert "plain" symbol to wide symbol, with default value. 243 */ 244 static void 245 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def) 246 { 247 int res; 248 249 res = mbtowc(wc, c, MB_CUR_MAX); 250 if (res < 1) 251 *wc = def; 252 } 253 254 /* 255 * Set current locale symbols. 256 */ 257 static void 258 set_locale(void) 259 { 260 struct lconv *lc; 261 const char *locale; 262 263 setlocale(LC_ALL, ""); 264 265 /* Obtain LC_NUMERIC info */ 266 lc = localeconv(); 267 268 /* Convert to wide char form */ 269 conv_mbtowc(&symbol_decimal_point, lc->decimal_point, 270 symbol_decimal_point); 271 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep, 272 symbol_thousands_sep); 273 conv_mbtowc(&symbol_positive_sign, lc->positive_sign, 274 symbol_positive_sign); 275 conv_mbtowc(&symbol_negative_sign, lc->negative_sign, 276 symbol_negative_sign); 277 278 if (getenv("GNUSORT_NUMERIC_COMPATIBILITY")) 279 gnusort_numeric_compatibility = true; 280 281 locale = setlocale(LC_COLLATE, NULL); 282 if (locale != NULL) { 283 char *tmpl; 284 const char *byteclocale; 285 286 tmpl = sort_strdup(locale); 287 byteclocale = setlocale(LC_COLLATE, "C"); 288 if (byteclocale && strcmp(byteclocale, tmpl) == 0) { 289 byte_sort = true; 290 } else { 291 byteclocale = setlocale(LC_COLLATE, "POSIX"); 292 if (byteclocale && strcmp(byteclocale, tmpl) == 0) 293 byte_sort = true; 294 else 295 setlocale(LC_COLLATE, tmpl); 296 } 297 sort_free(tmpl); 298 } 299 if (!byte_sort) 300 sort_mb_cur_max = MB_CUR_MAX; 301 } 302 303 /* 304 * Set directory temporary files. 305 */ 306 static void 307 set_tmpdir(void) 308 { 309 if (!issetugid()) { 310 char *td; 311 312 td = getenv("TMPDIR"); 313 if (td != NULL) 314 tmpdir = td; 315 } 316 } 317 318 /* 319 * Parse -S option. 320 */ 321 static unsigned long long 322 parse_memory_buffer_value(const char *value) 323 { 324 char *endptr; 325 unsigned long long membuf; 326 327 membuf = strtoll(value, &endptr, 10); 328 if (endptr == value || (long long)membuf < 0 || 329 (errno == ERANGE && membuf == LLONG_MAX)) 330 goto invalid; 331 332 switch (*endptr) { 333 case 'Y': 334 if (membuf > ULLONG_MAX / 1024) 335 goto invalid; 336 membuf *= 1024; 337 /* FALLTHROUGH */ 338 case 'Z': 339 if (membuf > ULLONG_MAX / 1024) 340 goto invalid; 341 membuf *= 1024; 342 /* FALLTHROUGH */ 343 case 'E': 344 if (membuf > ULLONG_MAX / 1024) 345 goto invalid; 346 membuf *= 1024; 347 /* FALLTHROUGH */ 348 case 'P': 349 if (membuf > ULLONG_MAX / 1024) 350 goto invalid; 351 membuf *= 1024; 352 /* FALLTHROUGH */ 353 case 'T': 354 if (membuf > ULLONG_MAX / 1024) 355 goto invalid; 356 membuf *= 1024; 357 /* FALLTHROUGH */ 358 case 'G': 359 if (membuf > ULLONG_MAX / 1024) 360 goto invalid; 361 membuf *= 1024; 362 /* FALLTHROUGH */ 363 case 'M': 364 if (membuf > ULLONG_MAX / 1024) 365 goto invalid; 366 membuf *= 1024; 367 /* FALLTHROUGH */ 368 case '\0': 369 case 'K': 370 if (membuf > ULLONG_MAX / 1024) 371 goto invalid; 372 membuf *= 1024; 373 /* FALLTHROUGH */ 374 case 'b': 375 break; 376 case '%': 377 if (available_free_memory != 0 && 378 membuf > ULLONG_MAX / available_free_memory) 379 goto invalid; 380 membuf = (available_free_memory * membuf) / 381 100; 382 break; 383 default: 384 warnc(EINVAL, "%s", optarg); 385 membuf = available_free_memory; 386 } 387 if (membuf > SIZE_MAX) 388 goto invalid; 389 return membuf; 390 invalid: 391 errx(2, "invalid memory buffer size: %s", value); 392 } 393 394 /* 395 * Signal handler that clears the temporary files. 396 */ 397 static void 398 sig_handler(int sig __unused) 399 { 400 clear_tmp_files(); 401 _exit(2); 402 } 403 404 /* 405 * Set signal handler on panic signals. 406 */ 407 static void 408 set_signal_handler(void) 409 { 410 struct sigaction sa; 411 int i, signals[] = {SIGTERM, SIGHUP, SIGINT, SIGUSR1, SIGUSR2, 412 SIGPIPE, SIGXCPU, SIGXFSZ, 0}; 413 414 memset(&sa, 0, sizeof(sa)); 415 sigfillset(&sa.sa_mask); 416 sa.sa_flags = SA_RESTART; 417 sa.sa_handler = sig_handler; 418 419 for (i = 0; signals[i] != 0; i++) { 420 if (sigaction(signals[i], &sa, NULL) < 0) { 421 warn("sigaction(%s)", strsignal(signals[i])); 422 continue; 423 } 424 } 425 } 426 427 /* 428 * Print "unknown" message and exit with status 2. 429 */ 430 static void 431 unknown(const char *what) 432 { 433 errx(2, "Unknown feature: %s", what); 434 } 435 436 /* 437 * Check whether contradictory input options are used. 438 */ 439 static void 440 check_mutually_exclusive_flags(char c, bool *mef_flags) 441 { 442 int i, fo_index, mec; 443 bool found_others, found_this; 444 445 found_others = found_this = false; 446 fo_index = 0; 447 448 for (i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) { 449 mec = mutually_exclusive_flags[i]; 450 451 if (mec != c) { 452 if (mef_flags[i]) { 453 if (found_this) { 454 errx(2, 455 "%c:%c: mutually exclusive flags", 456 c, mec); 457 } 458 found_others = true; 459 fo_index = i; 460 } 461 } else { 462 if (found_others) { 463 errx(2, 464 "%c:%c: mutually exclusive flags", 465 c, mutually_exclusive_flags[fo_index]); 466 } 467 mef_flags[i] = true; 468 found_this = true; 469 } 470 } 471 } 472 473 /* 474 * Initialise sort opts data. 475 */ 476 static void 477 set_sort_opts(void) 478 { 479 memset(&default_sort_mods_object, 0, 480 sizeof(default_sort_mods_object)); 481 memset(&sort_opts_vals, 0, sizeof(sort_opts_vals)); 482 default_sort_mods_object.func = 483 get_sort_func(&default_sort_mods_object); 484 } 485 486 /* 487 * Set a sort modifier on a sort modifiers object. 488 */ 489 static bool 490 set_sort_modifier(struct sort_mods *sm, int c) 491 { 492 switch (c) { 493 case 'b': 494 sm->bflag = true; 495 break; 496 case 'd': 497 sm->dflag = true; 498 break; 499 case 'f': 500 sm->fflag = true; 501 break; 502 case 'g': 503 sm->gflag = true; 504 need_hint = true; 505 break; 506 case 'i': 507 sm->iflag = true; 508 break; 509 case 'R': 510 sm->Rflag = true; 511 need_random = true; 512 break; 513 case 'M': 514 initialise_months(); 515 sm->Mflag = true; 516 need_hint = true; 517 break; 518 case 'n': 519 sm->nflag = true; 520 need_hint = true; 521 print_symbols_on_debug = true; 522 break; 523 case 'r': 524 sm->rflag = true; 525 break; 526 case 'V': 527 sm->Vflag = true; 528 break; 529 case 'h': 530 sm->hflag = true; 531 need_hint = true; 532 print_symbols_on_debug = true; 533 break; 534 default: 535 return false; 536 } 537 sort_opts_vals.complex_sort = true; 538 sm->func = get_sort_func(sm); 539 540 return true; 541 } 542 543 /* 544 * Parse POS in -k option. 545 */ 546 static int 547 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second) 548 { 549 regmatch_t pmatch[4]; 550 regex_t re; 551 char *c, *f; 552 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$"; 553 size_t len, nmatch; 554 int ret; 555 556 ret = -1; 557 nmatch = 4; 558 c = f = NULL; 559 560 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 561 return -1; 562 563 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 564 goto end; 565 566 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 567 goto end; 568 569 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 570 goto end; 571 572 len = pmatch[1].rm_eo - pmatch[1].rm_so; 573 574 f = sort_malloc(len + 1); 575 memcpy(f, s + pmatch[1].rm_so, len); 576 f[len] = '\0'; 577 578 if (second) { 579 errno = 0; 580 ks->f2 = (size_t)strtoul(f, NULL, 10); 581 if (errno != 0) 582 goto end; 583 if (ks->f2 == 0) { 584 warn("0 field in key specs"); 585 goto end; 586 } 587 } else { 588 errno = 0; 589 ks->f1 = (size_t)strtoul(f, NULL, 10); 590 if (errno != 0) 591 goto end; 592 if (ks->f1 == 0) { 593 warn("0 field in key specs"); 594 goto end; 595 } 596 } 597 598 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 599 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 600 601 c = sort_malloc(len + 1); 602 memcpy(c, s + pmatch[2].rm_so + 1, len); 603 c[len] = '\0'; 604 605 if (second) { 606 errno = 0; 607 ks->c2 = (size_t)strtoul(c, NULL, 10); 608 if (errno != 0) 609 goto end; 610 } else { 611 errno = 0; 612 ks->c1 = (size_t)strtoul(c, NULL, 10); 613 if (errno != 0) 614 goto end; 615 if (ks->c1 == 0) { 616 warn("0 column in key specs"); 617 goto end; 618 } 619 } 620 } else { 621 if (second) 622 ks->c2 = 0; 623 else 624 ks->c1 = 1; 625 } 626 627 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 628 regoff_t i = 0; 629 630 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) { 631 check_mutually_exclusive_flags(s[i], mef_flags); 632 if (s[i] == 'b') { 633 if (second) 634 ks->pos2b = true; 635 else 636 ks->pos1b = true; 637 } else if (!set_sort_modifier(&(ks->sm), s[i])) 638 goto end; 639 } 640 } 641 642 ret = 0; 643 644 end: 645 sort_free(c); 646 sort_free(f); 647 regfree(&re); 648 649 return ret; 650 } 651 652 /* 653 * Parse -k option value. 654 */ 655 static int 656 parse_k(const char *s, struct key_specs *ks) 657 { 658 int ret = -1; 659 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 660 { false, false, false, false, false, false }; 661 662 if (*s != '\0') { 663 char *sptr; 664 665 sptr = strchr(s, ','); 666 if (sptr) { 667 size_t size1; 668 char *pos1, *pos2; 669 670 size1 = sptr - s; 671 672 if (size1 < 1) 673 return -1; 674 675 pos1 = sort_malloc(size1 + 1); 676 memcpy(pos1, s, size1); 677 pos1[size1] = '\0'; 678 679 ret = parse_pos(pos1, ks, mef_flags, false); 680 681 sort_free(pos1); 682 if (ret < 0) 683 return ret; 684 685 pos2 = sort_strdup(sptr + 1); 686 ret = parse_pos(pos2, ks, mef_flags, true); 687 sort_free(pos2); 688 } else 689 ret = parse_pos(s, ks, mef_flags, false); 690 } 691 692 return ret; 693 } 694 695 /* 696 * Parse POS in +POS -POS option. 697 */ 698 static int 699 parse_pos_obs(const char *s, size_t *nf, size_t *nc, char *sopts, size_t sopts_size) 700 { 701 regex_t re; 702 regmatch_t pmatch[4]; 703 char *c, *f; 704 const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$"; 705 int ret; 706 size_t len, nmatch; 707 708 ret = -1; 709 nmatch = 4; 710 c = f = NULL; 711 *nc = *nf = 0; 712 713 if (regcomp(&re, sregexp, REG_EXTENDED) != 0) 714 return -1; 715 716 if (regexec(&re, s, nmatch, pmatch, 0) != 0) 717 goto end; 718 719 if (pmatch[0].rm_eo <= pmatch[0].rm_so) 720 goto end; 721 722 if (pmatch[1].rm_eo <= pmatch[1].rm_so) 723 goto end; 724 725 len = pmatch[1].rm_eo - pmatch[1].rm_so; 726 727 f = sort_malloc(len + 1); 728 memcpy(f, s + pmatch[1].rm_so, len); 729 f[len] = '\0'; 730 731 errno = 0; 732 *nf = (size_t)strtoul(f, NULL, 10); 733 if (errno != 0) 734 errx(2, "Invalid key position"); 735 736 if (pmatch[2].rm_eo > pmatch[2].rm_so) { 737 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1; 738 739 c = sort_malloc(len + 1); 740 memcpy(c, s + pmatch[2].rm_so + 1, len); 741 c[len] = '\0'; 742 743 errno = 0; 744 *nc = (size_t)strtoul(c, NULL, 10); 745 if (errno != 0) 746 errx(2, "Invalid key position"); 747 } 748 749 if (pmatch[3].rm_eo > pmatch[3].rm_so) { 750 751 len = pmatch[3].rm_eo - pmatch[3].rm_so; 752 753 if (len >= sopts_size) 754 errx(2, "Invalid key position"); 755 memcpy(sopts, s + pmatch[3].rm_so, len); 756 sopts[len] = '\0'; 757 } 758 759 ret = 0; 760 761 end: 762 sort_free(c); 763 sort_free(f); 764 regfree(&re); 765 766 return ret; 767 } 768 769 /* 770 * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax 771 */ 772 static void 773 fix_obsolete_keys(int *argc, char **argv) 774 { 775 char sopt[129]; 776 int i; 777 778 for (i = 1; i < *argc; i++) { 779 const char *arg1 = argv[i]; 780 781 if (arg1[0] == '+') { 782 size_t c1, f1; 783 char sopts1[128]; 784 785 sopts1[0] = 0; 786 c1 = f1 = 0; 787 788 if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1, 789 sizeof(sopts1)) < 0) 790 continue; 791 792 f1 += 1; 793 c1 += 1; 794 if (i + 1 < *argc) { 795 const char *arg2 = argv[i + 1]; 796 797 if (arg2[0] == '-') { 798 size_t c2, f2; 799 char sopts2[128]; 800 801 sopts2[0] = 0; 802 c2 = f2 = 0; 803 804 if (parse_pos_obs(arg2 + 1, &f2, &c2, 805 sopts2, sizeof(sopts2)) >= 0) { 806 int j; 807 if (c2 > 0) 808 f2 += 1; 809 snprintf(sopt, sizeof(sopt), 810 "-k%zu.%zu%s,%zu.%zu%s", 811 f1, c1, sopts1, f2, 812 c2, sopts2); 813 argv[i] = sort_strdup(sopt); 814 for (j = i + 1; j + 1 < *argc; j++) 815 argv[j] = argv[j + 1]; 816 *argc -= 1; 817 continue; 818 } 819 } 820 } 821 snprintf(sopt, sizeof(sopt), "-k%zu.%zu%s", 822 f1, c1, sopts1); 823 argv[i] = sort_strdup(sopt); 824 } 825 } 826 } 827 828 /* 829 * Set random seed 830 */ 831 static void 832 set_random_seed(void) 833 { 834 if (!need_random) 835 return; 836 837 MD5Init(&md5_ctx); 838 if (random_source != NULL) { 839 unsigned char buf[BUFSIZ]; 840 size_t nr; 841 FILE *fp; 842 843 if ((fp = fopen(random_source, "r")) == NULL) 844 err(2, "%s", random_source); 845 while ((nr = fread(buf, 1, sizeof(buf), fp)) != 0) 846 MD5Update(&md5_ctx, buf, nr); 847 if (ferror(fp)) 848 err(2, "%s", random_source); 849 fclose(fp); 850 } else { 851 unsigned char rsd[1024]; 852 853 arc4random_buf(rsd, sizeof(rsd)); 854 MD5Update(&md5_ctx, rsd, sizeof(rsd)); 855 } 856 } 857 858 /* 859 * Main function. 860 */ 861 int 862 main(int argc, char *argv[]) 863 { 864 char *outfile, *real_outfile, *sflag; 865 int c; 866 size_t i; 867 struct sort_mods *sm = &default_sort_mods_object; 868 bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = 869 { false, false, false, false, false, false }; 870 871 outfile = "-"; 872 real_outfile = NULL; 873 sflag = NULL; 874 875 init_tmp_files(); 876 877 set_signal_handler(); 878 879 atexit(clear_tmp_files); 880 881 set_hw_params(); 882 set_locale(); 883 set_tmpdir(); 884 set_sort_opts(); 885 886 fix_obsolete_keys(&argc, argv); 887 888 while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL)) 889 != -1)) { 890 891 check_mutually_exclusive_flags(c, mef_flags); 892 893 if (!set_sort_modifier(sm, c)) { 894 switch (c) { 895 case 'c': 896 sort_opts_vals.cflag = true; 897 if (optarg) { 898 if (!strcmp(optarg, "diagnose-first")) 899 ; 900 else if (!strcmp(optarg, "silent") || 901 !strcmp(optarg, "quiet")) 902 sort_opts_vals.csilentflag = true; 903 else if (*optarg) 904 unknown(optarg); 905 } 906 break; 907 case 'C': 908 sort_opts_vals.cflag = true; 909 sort_opts_vals.csilentflag = true; 910 break; 911 case 'k': 912 { 913 sort_opts_vals.complex_sort = true; 914 sort_opts_vals.kflag = true; 915 916 keys = sort_reallocarray(keys, keys_num + 1, 917 sizeof(struct key_specs)); 918 memset(&(keys[keys_num]), 0, 919 sizeof(struct key_specs)); 920 #ifndef GNUSORT_COMPATIBILITY 921 keys[keys_num].pos1b = default_sort_mods->bflag; 922 keys[keys_num].pos2b = default_sort_mods->bflag; 923 #endif 924 925 if (parse_k(optarg, &(keys[keys_num++])) < 0) 926 errc(2, EINVAL, "-k %s", optarg); 927 928 break; 929 } 930 case 'm': 931 sort_opts_vals.mflag = true; 932 break; 933 case 'o': 934 outfile = optarg; 935 break; 936 case 's': 937 sort_opts_vals.sflag = true; 938 break; 939 case 'S': 940 sflag = optarg; 941 break; 942 case 'T': 943 tmpdir = optarg; 944 break; 945 case 't': 946 while (strlen(optarg) > 1) { 947 if (optarg[0] != '\\') { 948 errc(2, EINVAL, "%s", optarg); 949 } 950 optarg += 1; 951 if (*optarg == '0') { 952 *optarg = 0; 953 break; 954 } 955 } 956 sort_opts_vals.tflag = true; 957 sort_opts_vals.field_sep = btowc(optarg[0]); 958 if (sort_opts_vals.field_sep == WEOF) { 959 errno = EINVAL; 960 err(2, NULL); 961 } 962 if (!gnusort_numeric_compatibility) { 963 if (symbol_decimal_point == sort_opts_vals.field_sep) 964 symbol_decimal_point = WEOF; 965 if (symbol_thousands_sep == sort_opts_vals.field_sep) 966 symbol_thousands_sep = WEOF; 967 if (symbol_negative_sign == sort_opts_vals.field_sep) 968 symbol_negative_sign = WEOF; 969 if (symbol_positive_sign == sort_opts_vals.field_sep) 970 symbol_positive_sign = WEOF; 971 } 972 break; 973 case 'u': 974 sort_opts_vals.uflag = true; 975 /* stable sort for the correct unique val */ 976 sort_opts_vals.sflag = true; 977 break; 978 case 'z': 979 sort_opts_vals.zflag = true; 980 break; 981 case SORT_OPT: 982 if (!strcmp(optarg, "general-numeric")) 983 set_sort_modifier(sm, 'g'); 984 else if (!strcmp(optarg, "human-numeric")) 985 set_sort_modifier(sm, 'h'); 986 else if (!strcmp(optarg, "numeric")) 987 set_sort_modifier(sm, 'n'); 988 else if (!strcmp(optarg, "month")) 989 set_sort_modifier(sm, 'M'); 990 else if (!strcmp(optarg, "random")) 991 set_sort_modifier(sm, 'R'); 992 else 993 unknown(optarg); 994 break; 995 case QSORT_OPT: 996 sort_opts_vals.sort_method = SORT_QSORT; 997 break; 998 case 'H': 999 sort_opts_vals.sort_method = SORT_MERGESORT; 1000 break; 1001 case MMAP_OPT: 1002 use_mmap = true; 1003 break; 1004 case HEAPSORT_OPT: 1005 sort_opts_vals.sort_method = SORT_HEAPSORT; 1006 break; 1007 case RADIXSORT_OPT: 1008 sort_opts_vals.sort_method = SORT_RADIXSORT; 1009 break; 1010 case RANDOMSOURCE_OPT: 1011 random_source = optarg; 1012 break; 1013 case COMPRESSPROGRAM_OPT: 1014 compress_program = optarg; 1015 break; 1016 case FF_OPT: 1017 read_fns_from_file0(optarg); 1018 break; 1019 case BS_OPT: 1020 { 1021 const char *errstr; 1022 1023 max_open_files = strtonum(optarg, 2, 1024 UINT_MAX - 1, &errstr) + 1; 1025 if (errstr != NULL) 1026 errx(2, "--batch-size argument is %s", 1027 errstr); 1028 break; 1029 } 1030 case VERSION_OPT: 1031 printf("%s\n", VERSION); 1032 exit(EXIT_SUCCESS); 1033 /* NOTREACHED */ 1034 break; 1035 case DEBUG_OPT: 1036 debug_sort = true; 1037 break; 1038 case HELP_OPT: 1039 usage(0); 1040 /* NOTREACHED */ 1041 break; 1042 default: 1043 usage(2); 1044 /* NOTREACHED */ 1045 } 1046 } 1047 } 1048 argc -= optind; 1049 argv += optind; 1050 1051 #ifndef GNUSORT_COMPATIBILITY 1052 if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) { 1053 outfile = argv[argc - 1]; 1054 argc -= 2; 1055 } 1056 #endif 1057 1058 if (sort_opts_vals.cflag && argc > 1) 1059 errx(2, "only one input file is allowed with the -%c flag", 1060 sort_opts_vals.csilentflag ? 'C' : 'c'); 1061 1062 if (sflag != NULL) 1063 available_free_memory = parse_memory_buffer_value(sflag); 1064 1065 if (keys_num == 0) { 1066 keys_num = 1; 1067 keys = sort_reallocarray(keys, 1, sizeof(struct key_specs)); 1068 memset(&(keys[0]), 0, sizeof(struct key_specs)); 1069 keys[0].c1 = 1; 1070 #ifdef GNUSORT_COMPATIBILITY 1071 keys[0].pos1b = sm->bflag; 1072 keys[0].pos2b = sm->bflag; 1073 #endif 1074 memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods)); 1075 } 1076 1077 for (i = 0; i < keys_num; i++) { 1078 struct key_specs *ks; 1079 1080 ks = &(keys[i]); 1081 1082 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) && 1083 !(ks->pos2b)) { 1084 #ifdef GNUSORT_COMPATIBILITY 1085 ks->pos1b = sm->bflag; 1086 ks->pos2b = sm->bflag; 1087 #endif 1088 memcpy(&(ks->sm), sm, sizeof(struct sort_mods)); 1089 } 1090 1091 ks->sm.func = get_sort_func(&(ks->sm)); 1092 } 1093 1094 if (argv_from_file0) { 1095 argc = argc_from_file0; 1096 argv = argv_from_file0; 1097 } 1098 1099 if (debug_sort) { 1100 printf("Memory to be used for sorting: %llu\n", 1101 available_free_memory); 1102 printf("Using collate rules of %s locale\n", 1103 setlocale(LC_COLLATE, NULL)); 1104 if (byte_sort) 1105 printf("Byte sort is used\n"); 1106 if (print_symbols_on_debug) { 1107 printf("Decimal Point: <%lc>\n", symbol_decimal_point); 1108 if (symbol_thousands_sep) 1109 printf("Thousands separator: <%lc>\n", 1110 symbol_thousands_sep); 1111 printf("Positive sign: <%lc>\n", symbol_positive_sign); 1112 printf("Negative sign: <%lc>\n", symbol_negative_sign); 1113 } 1114 } 1115 1116 if (sort_opts_vals.cflag) 1117 return check(argc ? *argv : "-"); 1118 1119 set_random_seed(); 1120 1121 /* Case when the outfile equals one of the input files: */ 1122 if (strcmp(outfile, "-") != 0) { 1123 struct stat sb; 1124 int fd, i; 1125 1126 for (i = 0; i < argc; ++i) { 1127 if (strcmp(argv[i], outfile) == 0) { 1128 if (stat(outfile, &sb) == -1) 1129 err(2, "%s", outfile); 1130 if (access(outfile, W_OK) == -1) 1131 err(2, "%s", outfile); 1132 real_outfile = outfile; 1133 sort_asprintf(&outfile, "%s.XXXXXXXXXX", 1134 real_outfile); 1135 if ((fd = mkstemp(outfile)) == -1 || 1136 fchmod(fd, sb.st_mode & ALLPERMS) == -1) 1137 err(2, "%s", outfile); 1138 close(fd); 1139 tmp_file_atexit(outfile); 1140 break; 1141 } 1142 } 1143 } 1144 1145 if (!sort_opts_vals.mflag) { 1146 struct file_list fl; 1147 struct sort_list list; 1148 1149 sort_list_init(&list); 1150 file_list_init(&fl, true); 1151 1152 if (argc < 1) 1153 procfile("-", &list, &fl); 1154 else { 1155 while (argc > 0) { 1156 procfile(*argv, &list, &fl); 1157 --argc; 1158 ++argv; 1159 } 1160 } 1161 1162 if (fl.count < 1) 1163 sort_list_to_file(&list, outfile); 1164 else { 1165 if (list.count > 0) { 1166 char *flast = new_tmp_file_name(); 1167 1168 sort_list_to_file(&list, flast); 1169 file_list_add(&fl, flast, false); 1170 } 1171 merge_files(&fl, outfile); 1172 } 1173 1174 file_list_clean(&fl); 1175 1176 /* 1177 * We are about to exit the program, so we can ignore 1178 * the clean-up for speed 1179 * 1180 * sort_list_clean(&list); 1181 */ 1182 1183 } else { 1184 struct file_list fl; 1185 1186 file_list_init(&fl, false); 1187 file_list_populate(&fl, argc, argv, true); 1188 merge_files(&fl, outfile); 1189 file_list_clean(&fl); 1190 } 1191 1192 if (real_outfile) { 1193 if (rename(outfile, real_outfile) < 0) 1194 err(2, "%s", real_outfile); 1195 sort_free(outfile); 1196 } 1197 1198 return 0; 1199 } 1200