1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file args.c 4 /// \brief Argument parsing 5 /// 6 /// \note Filter-specific options parsing is in options.c. 7 // 8 // Author: Lasse Collin 9 // 10 // This file has been put into the public domain. 11 // You can do whatever you want with this file. 12 // 13 /////////////////////////////////////////////////////////////////////////////// 14 15 #include "private.h" 16 17 #include "getopt.h" 18 #include <ctype.h> 19 20 21 bool opt_stdout = false; 22 bool opt_force = false; 23 bool opt_keep_original = false; 24 bool opt_robot = false; 25 bool opt_ignore_check = false; 26 27 // We don't modify or free() this, but we need to assign it in some 28 // non-const pointers. 29 const char stdin_filename[] = "(stdin)"; 30 31 32 /// Parse and set the memory usage limit for compression, decompression, 33 /// and/or multithreaded decompression. 34 static void 35 parse_memlimit(const char *name, const char *name_percentage, const char *str, 36 bool set_compress, bool set_decompress, bool set_mtdec) 37 { 38 bool is_percentage = false; 39 uint64_t value; 40 41 const size_t len = strlen(str); 42 if (len > 0 && str[len - 1] == '%') { 43 // Make a copy so that we can get rid of %. 44 // 45 // In the past str wasn't const and we modified it directly 46 // but that modified argv[] and thus affected what was visible 47 // in "ps auxf" or similar tools which was confusing. For 48 // example, --memlimit=50% would show up as --memlimit=50 49 // since the percent sign was overwritten here. 50 char *s = xstrdup(str); 51 s[len - 1] = '\0'; 52 is_percentage = true; 53 value = str_to_uint64(name_percentage, s, 1, 100); 54 free(s); 55 } else { 56 // On 32-bit systems, SIZE_MAX would make more sense than 57 // UINT64_MAX. But use UINT64_MAX still so that scripts 58 // that assume > 4 GiB values don't break. 59 value = str_to_uint64(name, str, 0, UINT64_MAX); 60 } 61 62 hardware_memlimit_set(value, set_compress, set_decompress, set_mtdec, 63 is_percentage); 64 return; 65 } 66 67 68 static void 69 parse_block_list(const char *str_const) 70 { 71 // We need a modifiable string in the for-loop. 72 char *str_start = xstrdup(str_const); 73 char *str = str_start; 74 75 // It must be non-empty and not begin with a comma. 76 if (str[0] == '\0' || str[0] == ',') 77 message_fatal(_("%s: Invalid argument to --block-list"), str); 78 79 // Count the number of comma-separated strings. 80 size_t count = 1; 81 for (size_t i = 0; str[i] != '\0'; ++i) 82 if (str[i] == ',') 83 ++count; 84 85 // Prevent an unlikely integer overflow. 86 if (count > SIZE_MAX / sizeof(uint64_t) - 1) 87 message_fatal(_("%s: Too many arguments to --block-list"), 88 str); 89 90 // Allocate memory to hold all the sizes specified. 91 // If --block-list was specified already, its value is forgotten. 92 free(opt_block_list); 93 opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); 94 95 for (size_t i = 0; i < count; ++i) { 96 // Locate the next comma and replace it with \0. 97 char *p = strchr(str, ','); 98 if (p != NULL) 99 *p = '\0'; 100 101 if (str[0] == '\0') { 102 // There is no string, that is, a comma follows 103 // another comma. Use the previous value. 104 // 105 // NOTE: We checked earlier that the first char 106 // of the whole list cannot be a comma. 107 assert(i > 0); 108 opt_block_list[i] = opt_block_list[i - 1]; 109 } else { 110 opt_block_list[i] = str_to_uint64("block-list", str, 111 0, UINT64_MAX); 112 113 // Zero indicates no more new Blocks. 114 if (opt_block_list[i] == 0) { 115 if (i + 1 != count) 116 message_fatal(_("0 can only be used " 117 "as the last element " 118 "in --block-list")); 119 120 opt_block_list[i] = UINT64_MAX; 121 } 122 } 123 124 str = p + 1; 125 } 126 127 // Terminate the array. 128 opt_block_list[count] = 0; 129 130 free(str_start); 131 return; 132 } 133 134 135 static void 136 parse_real(args_info *args, int argc, char **argv) 137 { 138 enum { 139 OPT_X86 = INT_MIN, 140 OPT_POWERPC, 141 OPT_IA64, 142 OPT_ARM, 143 OPT_ARMTHUMB, 144 OPT_ARM64, 145 OPT_SPARC, 146 OPT_DELTA, 147 OPT_LZMA1, 148 OPT_LZMA2, 149 150 OPT_SINGLE_STREAM, 151 OPT_NO_SPARSE, 152 OPT_FILES, 153 OPT_FILES0, 154 OPT_BLOCK_SIZE, 155 OPT_BLOCK_LIST, 156 OPT_MEM_COMPRESS, 157 OPT_MEM_DECOMPRESS, 158 OPT_MEM_MT_DECOMPRESS, 159 OPT_NO_ADJUST, 160 OPT_INFO_MEMORY, 161 OPT_ROBOT, 162 OPT_FLUSH_TIMEOUT, 163 OPT_IGNORE_CHECK, 164 }; 165 166 static const char short_opts[] 167 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; 168 169 static const struct option long_opts[] = { 170 // Operation mode 171 { "compress", no_argument, NULL, 'z' }, 172 { "decompress", no_argument, NULL, 'd' }, 173 { "uncompress", no_argument, NULL, 'd' }, 174 { "test", no_argument, NULL, 't' }, 175 { "list", no_argument, NULL, 'l' }, 176 177 // Operation modifiers 178 { "keep", no_argument, NULL, 'k' }, 179 { "force", no_argument, NULL, 'f' }, 180 { "stdout", no_argument, NULL, 'c' }, 181 { "to-stdout", no_argument, NULL, 'c' }, 182 { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, 183 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, 184 { "suffix", required_argument, NULL, 'S' }, 185 // { "recursive", no_argument, NULL, 'r' }, // TODO 186 { "files", optional_argument, NULL, OPT_FILES }, 187 { "files0", optional_argument, NULL, OPT_FILES0 }, 188 189 // Basic compression settings 190 { "format", required_argument, NULL, 'F' }, 191 { "check", required_argument, NULL, 'C' }, 192 { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, 193 { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, 194 { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, 195 { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, 196 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, 197 { "memlimit-mt-decompress", required_argument, NULL, OPT_MEM_MT_DECOMPRESS }, 198 { "memlimit", required_argument, NULL, 'M' }, 199 { "memory", required_argument, NULL, 'M' }, // Old alias 200 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, 201 { "threads", required_argument, NULL, 'T' }, 202 { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, 203 204 { "extreme", no_argument, NULL, 'e' }, 205 { "fast", no_argument, NULL, '0' }, 206 { "best", no_argument, NULL, '9' }, 207 208 // Filters 209 { "lzma1", optional_argument, NULL, OPT_LZMA1 }, 210 { "lzma2", optional_argument, NULL, OPT_LZMA2 }, 211 { "x86", optional_argument, NULL, OPT_X86 }, 212 { "powerpc", optional_argument, NULL, OPT_POWERPC }, 213 { "ia64", optional_argument, NULL, OPT_IA64 }, 214 { "arm", optional_argument, NULL, OPT_ARM }, 215 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, 216 { "arm64", optional_argument, NULL, OPT_ARM64 }, 217 { "sparc", optional_argument, NULL, OPT_SPARC }, 218 { "delta", optional_argument, NULL, OPT_DELTA }, 219 220 // Other options 221 { "quiet", no_argument, NULL, 'q' }, 222 { "verbose", no_argument, NULL, 'v' }, 223 { "no-warn", no_argument, NULL, 'Q' }, 224 { "robot", no_argument, NULL, OPT_ROBOT }, 225 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, 226 { "help", no_argument, NULL, 'h' }, 227 { "long-help", no_argument, NULL, 'H' }, 228 { "version", no_argument, NULL, 'V' }, 229 230 { NULL, 0, NULL, 0 } 231 }; 232 233 int c; 234 235 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 236 != -1) { 237 switch (c) { 238 // Compression preset (also for decompression if --format=raw) 239 case '0': case '1': case '2': case '3': case '4': 240 case '5': case '6': case '7': case '8': case '9': 241 coder_set_preset((uint32_t)(c - '0')); 242 break; 243 244 // --memlimit-compress 245 case OPT_MEM_COMPRESS: 246 parse_memlimit("memlimit-compress", 247 "memlimit-compress%", optarg, 248 true, false, false); 249 break; 250 251 // --memlimit-decompress 252 case OPT_MEM_DECOMPRESS: 253 parse_memlimit("memlimit-decompress", 254 "memlimit-decompress%", optarg, 255 false, true, false); 256 break; 257 258 // --memlimit-mt-decompress 259 case OPT_MEM_MT_DECOMPRESS: 260 parse_memlimit("memlimit-mt-decompress", 261 "memlimit-mt-decompress%", optarg, 262 false, false, true); 263 break; 264 265 // --memlimit 266 case 'M': 267 parse_memlimit("memlimit", "memlimit%", optarg, 268 true, true, true); 269 break; 270 271 // --suffix 272 case 'S': 273 suffix_set(optarg); 274 break; 275 276 case 'T': { 277 // Since xz 5.4.0: Ignore leading '+' first. 278 const char *s = optarg; 279 if (optarg[0] == '+') 280 ++s; 281 282 // The max is from src/liblzma/common/common.h. 283 uint32_t t = str_to_uint64("threads", s, 0, 16384); 284 285 // If leading '+' was used then use multi-threaded 286 // mode even if exactly one thread was specified. 287 if (t == 1 && optarg[0] == '+') 288 t = UINT32_MAX; 289 290 hardware_threads_set(t); 291 break; 292 } 293 294 // --version 295 case 'V': 296 // This doesn't return. 297 message_version(); 298 299 // --stdout 300 case 'c': 301 opt_stdout = true; 302 break; 303 304 // --decompress 305 case 'd': 306 opt_mode = MODE_DECOMPRESS; 307 break; 308 309 // --extreme 310 case 'e': 311 coder_set_extreme(); 312 break; 313 314 // --force 315 case 'f': 316 opt_force = true; 317 break; 318 319 // --info-memory 320 case OPT_INFO_MEMORY: 321 // This doesn't return. 322 hardware_memlimit_show(); 323 324 // --help 325 case 'h': 326 // This doesn't return. 327 message_help(false); 328 329 // --long-help 330 case 'H': 331 // This doesn't return. 332 message_help(true); 333 334 // --list 335 case 'l': 336 opt_mode = MODE_LIST; 337 break; 338 339 // --keep 340 case 'k': 341 opt_keep_original = true; 342 break; 343 344 // --quiet 345 case 'q': 346 message_verbosity_decrease(); 347 break; 348 349 case 'Q': 350 set_exit_no_warn(); 351 break; 352 353 case 't': 354 opt_mode = MODE_TEST; 355 break; 356 357 // --verbose 358 case 'v': 359 message_verbosity_increase(); 360 break; 361 362 // --robot 363 case OPT_ROBOT: 364 opt_robot = true; 365 366 // This is to make sure that floating point numbers 367 // always have a dot as decimal separator. 368 setlocale(LC_NUMERIC, "C"); 369 break; 370 371 case 'z': 372 opt_mode = MODE_COMPRESS; 373 break; 374 375 // Filter setup 376 377 case OPT_X86: 378 coder_add_filter(LZMA_FILTER_X86, 379 options_bcj(optarg)); 380 break; 381 382 case OPT_POWERPC: 383 coder_add_filter(LZMA_FILTER_POWERPC, 384 options_bcj(optarg)); 385 break; 386 387 case OPT_IA64: 388 coder_add_filter(LZMA_FILTER_IA64, 389 options_bcj(optarg)); 390 break; 391 392 case OPT_ARM: 393 coder_add_filter(LZMA_FILTER_ARM, 394 options_bcj(optarg)); 395 break; 396 397 case OPT_ARMTHUMB: 398 coder_add_filter(LZMA_FILTER_ARMTHUMB, 399 options_bcj(optarg)); 400 break; 401 402 case OPT_ARM64: 403 coder_add_filter(LZMA_FILTER_ARM64, 404 options_bcj(optarg)); 405 break; 406 407 case OPT_SPARC: 408 coder_add_filter(LZMA_FILTER_SPARC, 409 options_bcj(optarg)); 410 break; 411 412 case OPT_DELTA: 413 coder_add_filter(LZMA_FILTER_DELTA, 414 options_delta(optarg)); 415 break; 416 417 case OPT_LZMA1: 418 coder_add_filter(LZMA_FILTER_LZMA1, 419 options_lzma(optarg)); 420 break; 421 422 case OPT_LZMA2: 423 coder_add_filter(LZMA_FILTER_LZMA2, 424 options_lzma(optarg)); 425 break; 426 427 // Other 428 429 // --format 430 case 'F': { 431 // Just in case, support both "lzma" and "alone" since 432 // the latter was used for forward compatibility in 433 // LZMA Utils 4.32.x. 434 static const struct { 435 char str[8]; 436 enum format_type format; 437 } types[] = { 438 { "auto", FORMAT_AUTO }, 439 { "xz", FORMAT_XZ }, 440 { "lzma", FORMAT_LZMA }, 441 { "alone", FORMAT_LZMA }, 442 #ifdef HAVE_LZIP_DECODER 443 { "lzip", FORMAT_LZIP }, 444 #endif 445 { "raw", FORMAT_RAW }, 446 }; 447 448 size_t i = 0; 449 while (strcmp(types[i].str, optarg) != 0) 450 if (++i == ARRAY_SIZE(types)) 451 message_fatal(_("%s: Unknown file " 452 "format type"), 453 optarg); 454 455 opt_format = types[i].format; 456 break; 457 } 458 459 // --check 460 case 'C': { 461 static const struct { 462 char str[8]; 463 lzma_check check; 464 } types[] = { 465 { "none", LZMA_CHECK_NONE }, 466 { "crc32", LZMA_CHECK_CRC32 }, 467 { "crc64", LZMA_CHECK_CRC64 }, 468 { "sha256", LZMA_CHECK_SHA256 }, 469 }; 470 471 size_t i = 0; 472 while (strcmp(types[i].str, optarg) != 0) { 473 if (++i == ARRAY_SIZE(types)) 474 message_fatal(_("%s: Unsupported " 475 "integrity " 476 "check type"), optarg); 477 } 478 479 // Use a separate check in case we are using different 480 // liblzma than what was used to compile us. 481 if (!lzma_check_is_supported(types[i].check)) 482 message_fatal(_("%s: Unsupported integrity " 483 "check type"), optarg); 484 485 coder_set_check(types[i].check); 486 break; 487 } 488 489 case OPT_IGNORE_CHECK: 490 opt_ignore_check = true; 491 break; 492 493 case OPT_BLOCK_SIZE: 494 opt_block_size = str_to_uint64("block-size", optarg, 495 0, LZMA_VLI_MAX); 496 break; 497 498 case OPT_BLOCK_LIST: { 499 parse_block_list(optarg); 500 break; 501 } 502 503 case OPT_SINGLE_STREAM: 504 opt_single_stream = true; 505 break; 506 507 case OPT_NO_SPARSE: 508 io_no_sparse(); 509 break; 510 511 case OPT_FILES: 512 args->files_delim = '\n'; 513 514 // Fall through 515 516 case OPT_FILES0: 517 if (args->files_name != NULL) 518 message_fatal(_("Only one file can be " 519 "specified with `--files' " 520 "or `--files0'.")); 521 522 if (optarg == NULL) { 523 args->files_name = stdin_filename; 524 args->files_file = stdin; 525 } else { 526 args->files_name = optarg; 527 args->files_file = fopen(optarg, 528 c == OPT_FILES ? "r" : "rb"); 529 if (args->files_file == NULL) 530 message_fatal("%s: %s", optarg, 531 strerror(errno)); 532 } 533 534 break; 535 536 case OPT_NO_ADJUST: 537 opt_auto_adjust = false; 538 break; 539 540 case OPT_FLUSH_TIMEOUT: 541 opt_flush_timeout = str_to_uint64("flush-timeout", 542 optarg, 0, UINT64_MAX); 543 break; 544 545 default: 546 message_try_help(); 547 tuklib_exit(E_ERROR, E_ERROR, false); 548 } 549 } 550 551 return; 552 } 553 554 555 static void 556 parse_environment(args_info *args, char *argv0, const char *varname) 557 { 558 char *env = getenv(varname); 559 if (env == NULL) 560 return; 561 562 // We modify the string, so make a copy of it. 563 env = xstrdup(env); 564 565 // Calculate the number of arguments in env. argc stats at one 566 // to include space for the program name. 567 int argc = 1; 568 bool prev_was_space = true; 569 for (size_t i = 0; env[i] != '\0'; ++i) { 570 // NOTE: Cast to unsigned char is needed so that correct 571 // value gets passed to isspace(), which expects 572 // unsigned char cast to int. Casting to int is done 573 // automatically due to integer promotion, but we need to 574 // force char to unsigned char manually. Otherwise 8-bit 575 // characters would get promoted to wrong value if 576 // char is signed. 577 if (isspace((unsigned char)env[i])) { 578 prev_was_space = true; 579 } else if (prev_was_space) { 580 prev_was_space = false; 581 582 // Keep argc small enough to fit into a signed int 583 // and to keep it usable for memory allocation. 584 if (++argc == my_min( 585 INT_MAX, SIZE_MAX / sizeof(char *))) 586 message_fatal(_("The environment variable " 587 "%s contains too many " 588 "arguments"), varname); 589 } 590 } 591 592 // Allocate memory to hold pointers to the arguments. Add one to get 593 // space for the terminating NULL (if some systems happen to need it). 594 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); 595 argv[0] = argv0; 596 argv[argc] = NULL; 597 598 // Go through the string again. Split the arguments using '\0' 599 // characters and add pointers to the resulting strings to argv. 600 argc = 1; 601 prev_was_space = true; 602 for (size_t i = 0; env[i] != '\0'; ++i) { 603 if (isspace((unsigned char)env[i])) { 604 prev_was_space = true; 605 env[i] = '\0'; 606 } else if (prev_was_space) { 607 prev_was_space = false; 608 argv[argc++] = env + i; 609 } 610 } 611 612 // Parse the argument list we got from the environment. All non-option 613 // arguments i.e. filenames are ignored. 614 parse_real(args, argc, argv); 615 616 // Reset the state of the getopt_long() so that we can parse the 617 // command line options too. There are two incompatible ways to 618 // do it. 619 #ifdef HAVE_OPTRESET 620 // BSD 621 optind = 1; 622 optreset = 1; 623 #else 624 // GNU, Solaris 625 optind = 0; 626 #endif 627 628 // We don't need the argument list from environment anymore. 629 free(argv); 630 free(env); 631 632 return; 633 } 634 635 636 extern void 637 args_parse(args_info *args, int argc, char **argv) 638 { 639 // Initialize those parts of *args that we need later. 640 args->files_name = NULL; 641 args->files_file = NULL; 642 args->files_delim = '\0'; 643 644 // Check how we were called. 645 { 646 // Remove the leading path name, if any. 647 const char *name = strrchr(argv[0], '/'); 648 if (name == NULL) 649 name = argv[0]; 650 else 651 ++name; 652 653 // NOTE: It's possible that name[0] is now '\0' if argv[0] 654 // is weird, but it doesn't matter here. 655 656 // Look for full command names instead of substrings like 657 // "un", "cat", and "lz" to reduce possibility of false 658 // positives when the programs have been renamed. 659 if (strstr(name, "xzcat") != NULL) { 660 opt_mode = MODE_DECOMPRESS; 661 opt_stdout = true; 662 } else if (strstr(name, "unxz") != NULL) { 663 opt_mode = MODE_DECOMPRESS; 664 } else if (strstr(name, "lzcat") != NULL) { 665 opt_format = FORMAT_LZMA; 666 opt_mode = MODE_DECOMPRESS; 667 opt_stdout = true; 668 } else if (strstr(name, "unlzma") != NULL) { 669 opt_format = FORMAT_LZMA; 670 opt_mode = MODE_DECOMPRESS; 671 } else if (strstr(name, "lzma") != NULL) { 672 opt_format = FORMAT_LZMA; 673 } 674 } 675 676 // First the flags from the environment 677 parse_environment(args, argv[0], "XZ_DEFAULTS"); 678 parse_environment(args, argv[0], "XZ_OPT"); 679 680 // Then from the command line 681 parse_real(args, argc, argv); 682 683 // If encoder or decoder support was omitted at build time, 684 // show an error now so that the rest of the code can rely on 685 // that whatever is in opt_mode is also supported. 686 #ifndef HAVE_ENCODERS 687 if (opt_mode == MODE_COMPRESS) 688 message_fatal(_("Compression support was disabled " 689 "at build time")); 690 #endif 691 #ifndef HAVE_DECODERS 692 // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS 693 // is the only valid choice. 694 if (opt_mode != MODE_COMPRESS) 695 message_fatal(_("Decompression support was disabled " 696 "at build time")); 697 #endif 698 699 #ifdef HAVE_LZIP_DECODER 700 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP) 701 message_fatal(_("Compression of lzip files (.lz) " 702 "is not supported")); 703 #endif 704 705 // Never remove the source file when the destination is not on disk. 706 // In test mode the data is written nowhere, but setting opt_stdout 707 // will make the rest of the code behave well. 708 if (opt_stdout || opt_mode == MODE_TEST) { 709 opt_keep_original = true; 710 opt_stdout = true; 711 } 712 713 // When compressing, if no --format flag was used, or it 714 // was --format=auto, we compress to the .xz format. 715 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) 716 opt_format = FORMAT_XZ; 717 718 // Compression settings need to be validated (options themselves and 719 // their memory usage) when compressing to any file format. It has to 720 // be done also when uncompressing raw data, since for raw decoding 721 // the options given on the command line are used to know what kind 722 // of raw data we are supposed to decode. 723 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) 724 coder_set_compression_settings(); 725 726 // If no filenames are given, use stdin. 727 if (argv[optind] == NULL && args->files_name == NULL) { 728 // We don't modify or free() the "-" constant. The caller 729 // modifies this so don't make the struct itself const. 730 static char *names_stdin[2] = { (char *)"-", NULL }; 731 args->arg_names = names_stdin; 732 args->arg_count = 1; 733 } else { 734 // We got at least one filename from the command line, or 735 // --files or --files0 was specified. 736 args->arg_names = argv + optind; 737 args->arg_count = (unsigned int)(argc - optind); 738 } 739 740 return; 741 } 742 743 744 #ifndef NDEBUG 745 extern void 746 args_free(void) 747 { 748 free(opt_block_list); 749 return; 750 } 751 #endif 752