1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file args.c 4 /// \brief Argument parsing 5 /// 6 /// \note Filter-specific options parsing is in options.c. 7 // 8 // Author: Lasse Collin 9 // 10 // This file has been put into the public domain. 11 // You can do whatever you want with this file. 12 // 13 /////////////////////////////////////////////////////////////////////////////// 14 15 #include "private.h" 16 17 #include "getopt.h" 18 #include <ctype.h> 19 20 21 bool opt_stdout = false; 22 bool opt_force = false; 23 bool opt_keep_original = false; 24 bool opt_robot = false; 25 bool opt_ignore_check = false; 26 27 // We don't modify or free() this, but we need to assign it in some 28 // non-const pointers. 29 const char stdin_filename[] = "(stdin)"; 30 31 32 /// Parse and set the memory usage limit for compression, decompression, 33 /// and/or multithreaded decompression. 34 static void 35 parse_memlimit(const char *name, const char *name_percentage, const char *str, 36 bool set_compress, bool set_decompress, bool set_mtdec) 37 { 38 bool is_percentage = false; 39 uint64_t value; 40 41 const size_t len = strlen(str); 42 if (len > 0 && str[len - 1] == '%') { 43 // Make a copy so that we can get rid of %. 44 // 45 // In the past str wasn't const and we modified it directly 46 // but that modified argv[] and thus affected what was visible 47 // in "ps auxf" or similar tools which was confusing. For 48 // example, --memlimit=50% would show up as --memlimit=50 49 // since the percent sign was overwritten here. 50 char *s = xstrdup(str); 51 s[len - 1] = '\0'; 52 is_percentage = true; 53 value = str_to_uint64(name_percentage, s, 1, 100); 54 free(s); 55 } else { 56 // On 32-bit systems, SIZE_MAX would make more sense than 57 // UINT64_MAX. But use UINT64_MAX still so that scripts 58 // that assume > 4 GiB values don't break. 59 value = str_to_uint64(name, str, 0, UINT64_MAX); 60 } 61 62 hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec, 63 is_percentage); 64 return; 65 } 66 67 68 static void 69 parse_block_list(const char *str_const) 70 { 71 // We need a modifiable string in the for-loop. 72 char *str_start = xstrdup(str_const); 73 char *str = str_start; 74 75 // It must be non-empty and not begin with a comma. 76 if (str[0] == '\0' || str[0] == ',') 77 message_fatal(_("%s: Invalid argument to --block-list"), str); 78 79 // Count the number of comma-separated strings. 80 size_t count = 1; 81 for (size_t i = 0; str[i] != '\0'; ++i) 82 if (str[i] == ',') 83 ++count; 84 85 // Prevent an unlikely integer overflow. 86 if (count > SIZE_MAX / sizeof(uint64_t) - 1) 87 message_fatal(_("%s: Too many arguments to --block-list"), 88 str); 89 90 // Allocate memory to hold all the sizes specified. 91 // If --block-list was specified already, its value is forgotten. 92 free(opt_block_list); 93 opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); 94 95 for (size_t i = 0; i < count; ++i) { 96 // Locate the next comma and replace it with \0. 97 char *p = strchr(str, ','); 98 if (p != NULL) 99 *p = '\0'; 100 101 if (str[0] == '\0') { 102 // There is no string, that is, a comma follows 103 // another comma. Use the previous value. 104 // 105 // NOTE: We checked earlier that the first char 106 // of the whole list cannot be a comma. 107 assert(i > 0); 108 opt_block_list[i] = opt_block_list[i - 1]; 109 } else { 110 opt_block_list[i] = str_to_uint64("block-list", str, 111 0, UINT64_MAX); 112 113 // Zero indicates no more new Blocks. 114 if (opt_block_list[i] == 0) { 115 if (i + 1 != count) 116 message_fatal(_("0 can only be used " 117 "as the last element " 118 "in --block-list")); 119 120 opt_block_list[i] = UINT64_MAX; 121 } 122 } 123 124 str = p + 1; 125 } 126 127 // Terminate the array. 128 opt_block_list[count] = 0; 129 130 free(str_start); 131 return; 132 } 133 134 135 static void 136 parse_real(args_info *args, int argc, char **argv) 137 { 138 enum { 139 OPT_X86 = INT_MIN, 140 OPT_POWERPC, 141 OPT_IA64, 142 OPT_ARM, 143 OPT_ARMTHUMB, 144 OPT_ARM64, 145 OPT_SPARC, 146 OPT_DELTA, 147 OPT_LZMA1, 148 OPT_LZMA2, 149 150 OPT_SINGLE_STREAM, 151 OPT_NO_SPARSE, 152 OPT_FILES, 153 OPT_FILES0, 154 OPT_BLOCK_SIZE, 155 OPT_BLOCK_LIST, 156 OPT_MEM_COMPRESS, 157 OPT_MEM_DECOMPRESS, 158 OPT_MEM_MT_DECOMPRESS, 159 OPT_NO_ADJUST, 160 OPT_INFO_MEMORY, 161 OPT_ROBOT, 162 OPT_FLUSH_TIMEOUT, 163 OPT_IGNORE_CHECK, 164 }; 165 166 static const char short_opts[] 167 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; 168 169 static const struct option long_opts[] = { 170 // Operation mode 171 { "compress", no_argument, NULL, 'z' }, 172 { "decompress", no_argument, NULL, 'd' }, 173 { "uncompress", no_argument, NULL, 'd' }, 174 { "test", no_argument, NULL, 't' }, 175 { "list", no_argument, NULL, 'l' }, 176 177 // Operation modifiers 178 { "keep", no_argument, NULL, 'k' }, 179 { "force", no_argument, NULL, 'f' }, 180 { "stdout", no_argument, NULL, 'c' }, 181 { "to-stdout", no_argument, NULL, 'c' }, 182 { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, 183 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, 184 { "suffix", required_argument, NULL, 'S' }, 185 // { "recursive", no_argument, NULL, 'r' }, // TODO 186 { "files", optional_argument, NULL, OPT_FILES }, 187 { "files0", optional_argument, NULL, OPT_FILES0 }, 188 189 // Basic compression settings 190 { "format", required_argument, NULL, 'F' }, 191 { "check", required_argument, NULL, 'C' }, 192 { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, 193 { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, 194 { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, 195 { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, 196 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, 197 { "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS }, 198 { "memlimit", required_argument, NULL, 'M' }, 199 { "memory", required_argument, NULL, 'M' }, // Old alias 200 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, 201 { "threads", required_argument, NULL, 'T' }, 202 { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, 203 204 { "extreme", no_argument, NULL, 'e' }, 205 { "fast", no_argument, NULL, '0' }, 206 { "best", no_argument, NULL, '9' }, 207 208 // Filters 209 { "lzma1", optional_argument, NULL, OPT_LZMA1 }, 210 { "lzma2", optional_argument, NULL, OPT_LZMA2 }, 211 { "x86", optional_argument, NULL, OPT_X86 }, 212 { "powerpc", optional_argument, NULL, OPT_POWERPC }, 213 { "ia64", optional_argument, NULL, OPT_IA64 }, 214 { "arm", optional_argument, NULL, OPT_ARM }, 215 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, 216 { "arm64", optional_argument, NULL, OPT_ARM64 }, 217 { "sparc", optional_argument, NULL, OPT_SPARC }, 218 { "delta", optional_argument, NULL, OPT_DELTA }, 219 220 // Other options 221 { "quiet", no_argument, NULL, 'q' }, 222 { "verbose", no_argument, NULL, 'v' }, 223 { "no-warn", no_argument, NULL, 'Q' }, 224 { "robot", no_argument, NULL, OPT_ROBOT }, 225 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, 226 { "help", no_argument, NULL, 'h' }, 227 { "long-help", no_argument, NULL, 'H' }, 228 { "version", no_argument, NULL, 'V' }, 229 230 { NULL, 0, NULL, 0 } 231 }; 232 233 int c; 234 235 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 236 != -1) { 237 switch (c) { 238 // Compression preset (also for decompression if --format=raw) 239 case '0': case '1': case '2': case '3': case '4': 240 case '5': case '6': case '7': case '8': case '9': 241 coder_set_preset((uint32_t)(c - '0')); 242 break; 243 244 // --memlimit-compress 245 case OPT_MEM_COMPRESS: 246 parse_memlimit("memlimit-compress", 247 "memlimit-compress%", optarg, 248 true, false, false); 249 break; 250 251 // --memlimit-decompress 252 case OPT_MEM_DECOMPRESS: 253 parse_memlimit("memlimit-decompress", 254 "memlimit-decompress%", optarg, 255 false, true, false); 256 break; 257 258 // --memlimit-mt-decompress 259 case OPT_MEM_MT_DECOMPRESS: 260 parse_memlimit("memlimit-mt-decompress", 261 "memlimit-mt-decompress%", optarg, 262 false, false, true); 263 break; 264 265 // --memlimit 266 case 'M': 267 parse_memlimit("memlimit", "memlimit%", optarg, 268 true, true, true); 269 break; 270 271 // --suffix 272 case 'S': 273 suffix_set(optarg); 274 break; 275 276 case 'T': { 277 // Since xz 5.4.0: Ignore leading '+' first. 278 const char *s = optarg; 279 if (optarg[0] == '+') 280 ++s; 281 282 // The max is from src/liblzma/common/common.h. 283 uint32_t t = str_to_uint64("threads", s, 0, 16384); 284 285 // If leading '+' was used then use multi-threaded 286 // mode even if exactly one thread was specified. 287 if (t == 1 && optarg[0] == '+') 288 t = UINT32_MAX; 289 290 hardware_threads_set(t); 291 break; 292 } 293 294 // --version 295 case 'V': 296 // This doesn't return. 297 message_version(); 298 299 // --stdout 300 case 'c': 301 opt_stdout = true; 302 break; 303 304 // --decompress 305 case 'd': 306 opt_mode = MODE_DECOMPRESS; 307 break; 308 309 // --extreme 310 case 'e': 311 coder_set_extreme(); 312 break; 313 314 // --force 315 case 'f': 316 opt_force = true; 317 break; 318 319 // --info-memory 320 case OPT_INFO_MEMORY: 321 // This doesn't return. 322 hardware_memlimit_show(); 323 324 // --help 325 case 'h': 326 // This doesn't return. 327 message_help(false); 328 329 // --long-help 330 case 'H': 331 // This doesn't return. 332 message_help(true); 333 334 // --list 335 case 'l': 336 opt_mode = MODE_LIST; 337 break; 338 339 // --keep 340 case 'k': 341 opt_keep_original = true; 342 break; 343 344 // --quiet 345 case 'q': 346 message_verbosity_decrease(); 347 break; 348 349 case 'Q': 350 set_exit_no_warn(); 351 break; 352 353 case 't': 354 opt_mode = MODE_TEST; 355 break; 356 357 // --verbose 358 case 'v': 359 message_verbosity_increase(); 360 break; 361 362 // --robot 363 case OPT_ROBOT: 364 opt_robot = true; 365 366 // This is to make sure that floating point numbers 367 // always have a dot as decimal separator. 368 setlocale(LC_NUMERIC, "C"); 369 break; 370 371 case 'z': 372 opt_mode = MODE_COMPRESS; 373 break; 374 375 // Filter setup 376 377 case OPT_X86: 378 coder_add_filter(LZMA_FILTER_X86, 379 options_bcj(optarg)); 380 break; 381 382 case OPT_POWERPC: 383 coder_add_filter(LZMA_FILTER_POWERPC, 384 options_bcj(optarg)); 385 break; 386 387 case OPT_IA64: 388 coder_add_filter(LZMA_FILTER_IA64, 389 options_bcj(optarg)); 390 break; 391 392 case OPT_ARM: 393 coder_add_filter(LZMA_FILTER_ARM, 394 options_bcj(optarg)); 395 break; 396 397 case OPT_ARMTHUMB: 398 coder_add_filter(LZMA_FILTER_ARMTHUMB, 399 options_bcj(optarg)); 400 break; 401 402 case OPT_ARM64: 403 coder_add_filter(LZMA_FILTER_ARM64, 404 options_bcj(optarg)); 405 break; 406 407 case OPT_SPARC: 408 coder_add_filter(LZMA_FILTER_SPARC, 409 options_bcj(optarg)); 410 break; 411 412 case OPT_DELTA: 413 coder_add_filter(LZMA_FILTER_DELTA, 414 options_delta(optarg)); 415 break; 416 417 case OPT_LZMA1: 418 coder_add_filter(LZMA_FILTER_LZMA1, 419 options_lzma(optarg)); 420 break; 421 422 case OPT_LZMA2: 423 coder_add_filter(LZMA_FILTER_LZMA2, 424 options_lzma(optarg)); 425 break; 426 427 // Other 428 429 // --format 430 case 'F': { 431 // Just in case, support both "lzma" and "alone" since 432 // the latter was used for forward compatibility in 433 // LZMA Utils 4.32.x. 434 static const struct { 435 char str[8]; 436 enum format_type format; 437 } types[] = { 438 { "auto", FORMAT_AUTO }, 439 { "xz", FORMAT_XZ }, 440 { "lzma", FORMAT_LZMA }, 441 { "alone", FORMAT_LZMA }, 442 #ifdef HAVE_LZIP_DECODER 443 { "lzip", FORMAT_LZIP }, 444 #endif 445 { "raw", FORMAT_RAW }, 446 }; 447 448 size_t i = 0; 449 while (strcmp(types[i].str, optarg) != 0) 450 if (++i == ARRAY_SIZE(types)) 451 message_fatal(_("%s: Unknown file " 452 "format type"), 453 optarg); 454 455 opt_format = types[i].format; 456 break; 457 } 458 459 // --check 460 case 'C': { 461 static const struct { 462 char str[8]; 463 lzma_check check; 464 } types[] = { 465 { "none", LZMA_CHECK_NONE }, 466 { "crc32", LZMA_CHECK_CRC32 }, 467 { "crc64", LZMA_CHECK_CRC64 }, 468 { "sha256", LZMA_CHECK_SHA256 }, 469 }; 470 471 size_t i = 0; 472 while (strcmp(types[i].str, optarg) != 0) { 473 if (++i == ARRAY_SIZE(types)) 474 message_fatal(_("%s: Unsupported " 475 "integrity " 476 "check type"), optarg); 477 } 478 479 // Use a separate check in case we are using different 480 // liblzma than what was used to compile us. 481 if (!lzma_check_is_supported(types[i].check)) 482 message_fatal(_("%s: Unsupported integrity " 483 "check type"), optarg); 484 485 coder_set_check(types[i].check); 486 break; 487 } 488 489 case OPT_IGNORE_CHECK: 490 opt_ignore_check = true; 491 break; 492 493 case OPT_BLOCK_SIZE: 494 opt_block_size = str_to_uint64("block-size", optarg, 495 0, LZMA_VLI_MAX); 496 break; 497 498 case OPT_BLOCK_LIST: { 499 parse_block_list(optarg); 500 break; 501 } 502 503 case OPT_SINGLE_STREAM: 504 opt_single_stream = true; 505 break; 506 507 case OPT_NO_SPARSE: 508 io_no_sparse(); 509 break; 510 511 case OPT_FILES: 512 args->files_delim = '\n'; 513 514 // Fall through 515 516 case OPT_FILES0: 517 if (args->files_name != NULL) 518 message_fatal(_("Only one file can be " 519 "specified with `--files' " 520 "or `--files0'.")); 521 522 if (optarg == NULL) { 523 args->files_name = stdin_filename; 524 args->files_file = stdin; 525 } else { 526 args->files_name = optarg; 527 args->files_file = fopen(optarg, 528 c == OPT_FILES ? "r" : "rb"); 529 if (args->files_file == NULL) 530 // TRANSLATORS: This is a translatable 531 // string because French needs a space 532 // before the colon ("%s : %s"). 533 message_fatal(_("%s: %s"), optarg, 534 strerror(errno)); 535 } 536 537 break; 538 539 case OPT_NO_ADJUST: 540 opt_auto_adjust = false; 541 break; 542 543 case OPT_FLUSH_TIMEOUT: 544 opt_flush_timeout = str_to_uint64("flush-timeout", 545 optarg, 0, UINT64_MAX); 546 break; 547 548 default: 549 message_try_help(); 550 tuklib_exit(E_ERROR, E_ERROR, false); 551 } 552 } 553 554 return; 555 } 556 557 558 static void 559 parse_environment(args_info *args, char *argv0, const char *varname) 560 { 561 char *env = getenv(varname); 562 if (env == NULL) 563 return; 564 565 // We modify the string, so make a copy of it. 566 env = xstrdup(env); 567 568 // Calculate the number of arguments in env. argc stats at one 569 // to include space for the program name. 570 int argc = 1; 571 bool prev_was_space = true; 572 for (size_t i = 0; env[i] != '\0'; ++i) { 573 // NOTE: Cast to unsigned char is needed so that correct 574 // value gets passed to isspace(), which expects 575 // unsigned char cast to int. Casting to int is done 576 // automatically due to integer promotion, but we need to 577 // force char to unsigned char manually. Otherwise 8-bit 578 // characters would get promoted to wrong value if 579 // char is signed. 580 if (isspace((unsigned char)env[i])) { 581 prev_was_space = true; 582 } else if (prev_was_space) { 583 prev_was_space = false; 584 585 // Keep argc small enough to fit into a signed int 586 // and to keep it usable for memory allocation. 587 if (++argc == my_min( 588 INT_MAX, SIZE_MAX / sizeof(char *))) 589 message_fatal(_("The environment variable " 590 "%s contains too many " 591 "arguments"), varname); 592 } 593 } 594 595 // Allocate memory to hold pointers to the arguments. Add one to get 596 // space for the terminating NULL (if some systems happen to need it). 597 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); 598 argv[0] = argv0; 599 argv[argc] = NULL; 600 601 // Go through the string again. Split the arguments using '\0' 602 // characters and add pointers to the resulting strings to argv. 603 argc = 1; 604 prev_was_space = true; 605 for (size_t i = 0; env[i] != '\0'; ++i) { 606 if (isspace((unsigned char)env[i])) { 607 prev_was_space = true; 608 env[i] = '\0'; 609 } else if (prev_was_space) { 610 prev_was_space = false; 611 argv[argc++] = env + i; 612 } 613 } 614 615 // Parse the argument list we got from the environment. All non-option 616 // arguments i.e. filenames are ignored. 617 parse_real(args, argc, argv); 618 619 // Reset the state of the getopt_long() so that we can parse the 620 // command line options too. There are two incompatible ways to 621 // do it. 622 #ifdef HAVE_OPTRESET 623 // BSD 624 optind = 1; 625 optreset = 1; 626 #else 627 // GNU, Solaris 628 optind = 0; 629 #endif 630 631 // We don't need the argument list from environment anymore. 632 free(argv); 633 free(env); 634 635 return; 636 } 637 638 639 extern void 640 args_parse(args_info *args, int argc, char **argv) 641 { 642 // Initialize those parts of *args that we need later. 643 args->files_name = NULL; 644 args->files_file = NULL; 645 args->files_delim = '\0'; 646 647 // Check how we were called. 648 { 649 // Remove the leading path name, if any. 650 const char *name = strrchr(argv[0], '/'); 651 if (name == NULL) 652 name = argv[0]; 653 else 654 ++name; 655 656 // NOTE: It's possible that name[0] is now '\0' if argv[0] 657 // is weird, but it doesn't matter here. 658 659 // Look for full command names instead of substrings like 660 // "un", "cat", and "lz" to reduce possibility of false 661 // positives when the programs have been renamed. 662 if (strstr(name, "xzcat") != NULL) { 663 opt_mode = MODE_DECOMPRESS; 664 opt_stdout = true; 665 } else if (strstr(name, "unxz") != NULL) { 666 opt_mode = MODE_DECOMPRESS; 667 } else if (strstr(name, "lzcat") != NULL) { 668 opt_format = FORMAT_LZMA; 669 opt_mode = MODE_DECOMPRESS; 670 opt_stdout = true; 671 } else if (strstr(name, "unlzma") != NULL) { 672 opt_format = FORMAT_LZMA; 673 opt_mode = MODE_DECOMPRESS; 674 } else if (strstr(name, "lzma") != NULL) { 675 opt_format = FORMAT_LZMA; 676 } 677 } 678 679 // First the flags from the environment 680 parse_environment(args, argv[0], "XZ_DEFAULTS"); 681 parse_environment(args, argv[0], "XZ_OPT"); 682 683 // Then from the command line 684 parse_real(args, argc, argv); 685 686 // If encoder or decoder support was omitted at build time, 687 // show an error now so that the rest of the code can rely on 688 // that whatever is in opt_mode is also supported. 689 #ifndef HAVE_ENCODERS 690 if (opt_mode == MODE_COMPRESS) 691 message_fatal(_("Compression support was disabled " 692 "at build time")); 693 #endif 694 #ifndef HAVE_DECODERS 695 // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS 696 // is the only valid choice. 697 if (opt_mode != MODE_COMPRESS) 698 message_fatal(_("Decompression support was disabled " 699 "at build time")); 700 #endif 701 702 #ifdef HAVE_LZIP_DECODER 703 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP) 704 message_fatal(_("Compression of lzip files (.lz) " 705 "is not supported")); 706 #endif 707 708 // Never remove the source file when the destination is not on disk. 709 // In test mode the data is written nowhere, but setting opt_stdout 710 // will make the rest of the code behave well. 711 if (opt_stdout || opt_mode == MODE_TEST) { 712 opt_keep_original = true; 713 opt_stdout = true; 714 } 715 716 // When compressing, if no --format flag was used, or it 717 // was --format=auto, we compress to the .xz format. 718 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) 719 opt_format = FORMAT_XZ; 720 721 // Compression settings need to be validated (options themselves and 722 // their memory usage) when compressing to any file format. It has to 723 // be done also when uncompressing raw data, since for raw decoding 724 // the options given on the command line are used to know what kind 725 // of raw data we are supposed to decode. 726 if (opt_mode == MODE_COMPRESS || (opt_format == FORMAT_RAW 727 && opt_mode != MODE_LIST)) 728 coder_set_compression_settings(); 729 730 // If raw format is used and a custom suffix is not provided, 731 // then only stdout mode can be used when compressing or decompressing. 732 if (opt_format == FORMAT_RAW && !suffix_is_set() && !opt_stdout 733 && (opt_mode == MODE_COMPRESS 734 || opt_mode == MODE_DECOMPRESS)) 735 message_fatal(_("With --format=raw, --suffix=.SUF is " 736 "required unless writing to stdout")); 737 738 // If no filenames are given, use stdin. 739 if (argv[optind] == NULL && args->files_name == NULL) { 740 // We don't modify or free() the "-" constant. The caller 741 // modifies this so don't make the struct itself const. 742 static char *names_stdin[2] = { (char *)"-", NULL }; 743 args->arg_names = names_stdin; 744 args->arg_count = 1; 745 } else { 746 // We got at least one filename from the command line, or 747 // --files or --files0 was specified. 748 args->arg_names = argv + optind; 749 args->arg_count = (unsigned int)(argc - optind); 750 } 751 752 return; 753 } 754 755 756 #ifndef NDEBUG 757 extern void 758 args_free(void) 759 { 760 free(opt_block_list); 761 return; 762 } 763 #endif 764