1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file args.c 4 /// \brief Argument parsing 5 /// 6 /// \note Filter-specific options parsing is in options.c. 7 // 8 // Author: Lasse Collin 9 // 10 // This file has been put into the public domain. 11 // You can do whatever you want with this file. 12 // 13 /////////////////////////////////////////////////////////////////////////////// 14 15 #include "private.h" 16 17 #include "getopt.h" 18 #include <ctype.h> 19 20 21 bool opt_stdout = false; 22 bool opt_force = false; 23 bool opt_keep_original = false; 24 bool opt_robot = false; 25 bool opt_ignore_check = false; 26 27 // We don't modify or free() this, but we need to assign it in some 28 // non-const pointers. 29 const char stdin_filename[] = "(stdin)"; 30 31 32 /// Parse and set the memory usage limit for compression and/or decompression. 33 static void 34 parse_memlimit(const char *name, const char *name_percentage, char *str, 35 bool set_compress, bool set_decompress) 36 { 37 bool is_percentage = false; 38 uint64_t value; 39 40 const size_t len = strlen(str); 41 if (len > 0 && str[len - 1] == '%') { 42 str[len - 1] = '\0'; 43 is_percentage = true; 44 value = str_to_uint64(name_percentage, str, 1, 100); 45 } else { 46 // On 32-bit systems, SIZE_MAX would make more sense than 47 // UINT64_MAX. But use UINT64_MAX still so that scripts 48 // that assume > 4 GiB values don't break. 49 value = str_to_uint64(name, str, 0, UINT64_MAX); 50 } 51 52 hardware_memlimit_set( 53 value, set_compress, set_decompress, is_percentage); 54 return; 55 } 56 57 58 static void 59 parse_block_list(char *str) 60 { 61 // It must be non-empty and not begin with a comma. 62 if (str[0] == '\0' || str[0] == ',') 63 message_fatal(_("%s: Invalid argument to --block-list"), str); 64 65 // Count the number of comma-separated strings. 66 size_t count = 1; 67 for (size_t i = 0; str[i] != '\0'; ++i) 68 if (str[i] == ',') 69 ++count; 70 71 // Prevent an unlikely integer overflow. 72 if (count > SIZE_MAX / sizeof(uint64_t) - 1) 73 message_fatal(_("%s: Too many arguments to --block-list"), 74 str); 75 76 // Allocate memory to hold all the sizes specified. 77 // If --block-list was specified already, its value is forgotten. 78 free(opt_block_list); 79 opt_block_list = xmalloc((count + 1) * sizeof(uint64_t)); 80 81 for (size_t i = 0; i < count; ++i) { 82 // Locate the next comma and replace it with \0. 83 char *p = strchr(str, ','); 84 if (p != NULL) 85 *p = '\0'; 86 87 if (str[0] == '\0') { 88 // There is no string, that is, a comma follows 89 // another comma. Use the previous value. 90 // 91 // NOTE: We checked earler that the first char 92 // of the whole list cannot be a comma. 93 assert(i > 0); 94 opt_block_list[i] = opt_block_list[i - 1]; 95 } else { 96 opt_block_list[i] = str_to_uint64("block-list", str, 97 0, UINT64_MAX); 98 99 // Zero indicates no more new Blocks. 100 if (opt_block_list[i] == 0) { 101 if (i + 1 != count) 102 message_fatal(_("0 can only be used " 103 "as the last element " 104 "in --block-list")); 105 106 opt_block_list[i] = UINT64_MAX; 107 } 108 } 109 110 str = p + 1; 111 } 112 113 // Terminate the array. 114 opt_block_list[count] = 0; 115 return; 116 } 117 118 119 static void 120 parse_real(args_info *args, int argc, char **argv) 121 { 122 enum { 123 OPT_X86 = INT_MIN, 124 OPT_POWERPC, 125 OPT_IA64, 126 OPT_ARM, 127 OPT_ARMTHUMB, 128 OPT_SPARC, 129 OPT_DELTA, 130 OPT_LZMA1, 131 OPT_LZMA2, 132 133 OPT_SINGLE_STREAM, 134 OPT_NO_SPARSE, 135 OPT_FILES, 136 OPT_FILES0, 137 OPT_BLOCK_SIZE, 138 OPT_BLOCK_LIST, 139 OPT_MEM_COMPRESS, 140 OPT_MEM_DECOMPRESS, 141 OPT_NO_ADJUST, 142 OPT_INFO_MEMORY, 143 OPT_ROBOT, 144 OPT_FLUSH_TIMEOUT, 145 OPT_IGNORE_CHECK, 146 }; 147 148 static const char short_opts[] 149 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; 150 151 static const struct option long_opts[] = { 152 // Operation mode 153 { "compress", no_argument, NULL, 'z' }, 154 { "decompress", no_argument, NULL, 'd' }, 155 { "uncompress", no_argument, NULL, 'd' }, 156 { "test", no_argument, NULL, 't' }, 157 { "list", no_argument, NULL, 'l' }, 158 159 // Operation modifiers 160 { "keep", no_argument, NULL, 'k' }, 161 { "force", no_argument, NULL, 'f' }, 162 { "stdout", no_argument, NULL, 'c' }, 163 { "to-stdout", no_argument, NULL, 'c' }, 164 { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM }, 165 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, 166 { "suffix", required_argument, NULL, 'S' }, 167 // { "recursive", no_argument, NULL, 'r' }, // TODO 168 { "files", optional_argument, NULL, OPT_FILES }, 169 { "files0", optional_argument, NULL, OPT_FILES0 }, 170 171 // Basic compression settings 172 { "format", required_argument, NULL, 'F' }, 173 { "check", required_argument, NULL, 'C' }, 174 { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK }, 175 { "block-size", required_argument, NULL, OPT_BLOCK_SIZE }, 176 { "block-list", required_argument, NULL, OPT_BLOCK_LIST }, 177 { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, 178 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, 179 { "memlimit", required_argument, NULL, 'M' }, 180 { "memory", required_argument, NULL, 'M' }, // Old alias 181 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, 182 { "threads", required_argument, NULL, 'T' }, 183 { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT }, 184 185 { "extreme", no_argument, NULL, 'e' }, 186 { "fast", no_argument, NULL, '0' }, 187 { "best", no_argument, NULL, '9' }, 188 189 // Filters 190 { "lzma1", optional_argument, NULL, OPT_LZMA1 }, 191 { "lzma2", optional_argument, NULL, OPT_LZMA2 }, 192 { "x86", optional_argument, NULL, OPT_X86 }, 193 { "powerpc", optional_argument, NULL, OPT_POWERPC }, 194 { "ia64", optional_argument, NULL, OPT_IA64 }, 195 { "arm", optional_argument, NULL, OPT_ARM }, 196 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, 197 { "sparc", optional_argument, NULL, OPT_SPARC }, 198 { "delta", optional_argument, NULL, OPT_DELTA }, 199 200 // Other options 201 { "quiet", no_argument, NULL, 'q' }, 202 { "verbose", no_argument, NULL, 'v' }, 203 { "no-warn", no_argument, NULL, 'Q' }, 204 { "robot", no_argument, NULL, OPT_ROBOT }, 205 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, 206 { "help", no_argument, NULL, 'h' }, 207 { "long-help", no_argument, NULL, 'H' }, 208 { "version", no_argument, NULL, 'V' }, 209 210 { NULL, 0, NULL, 0 } 211 }; 212 213 int c; 214 215 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 216 != -1) { 217 switch (c) { 218 // Compression preset (also for decompression if --format=raw) 219 case '0': case '1': case '2': case '3': case '4': 220 case '5': case '6': case '7': case '8': case '9': 221 coder_set_preset(c - '0'); 222 break; 223 224 // --memlimit-compress 225 case OPT_MEM_COMPRESS: 226 parse_memlimit("memlimit-compress", 227 "memlimit-compress%", optarg, 228 true, false); 229 break; 230 231 // --memlimit-decompress 232 case OPT_MEM_DECOMPRESS: 233 parse_memlimit("memlimit-decompress", 234 "memlimit-decompress%", optarg, 235 false, true); 236 break; 237 238 // --memlimit 239 case 'M': 240 parse_memlimit("memlimit", "memlimit%", optarg, 241 true, true); 242 break; 243 244 // --suffix 245 case 'S': 246 suffix_set(optarg); 247 break; 248 249 case 'T': 250 // The max is from src/liblzma/common/common.h. 251 hardware_threads_set(str_to_uint64("threads", 252 optarg, 0, 16384)); 253 break; 254 255 // --version 256 case 'V': 257 // This doesn't return. 258 message_version(); 259 260 // --stdout 261 case 'c': 262 opt_stdout = true; 263 break; 264 265 // --decompress 266 case 'd': 267 opt_mode = MODE_DECOMPRESS; 268 break; 269 270 // --extreme 271 case 'e': 272 coder_set_extreme(); 273 break; 274 275 // --force 276 case 'f': 277 opt_force = true; 278 break; 279 280 // --info-memory 281 case OPT_INFO_MEMORY: 282 // This doesn't return. 283 hardware_memlimit_show(); 284 285 // --help 286 case 'h': 287 // This doesn't return. 288 message_help(false); 289 290 // --long-help 291 case 'H': 292 // This doesn't return. 293 message_help(true); 294 295 // --list 296 case 'l': 297 opt_mode = MODE_LIST; 298 break; 299 300 // --keep 301 case 'k': 302 opt_keep_original = true; 303 break; 304 305 // --quiet 306 case 'q': 307 message_verbosity_decrease(); 308 break; 309 310 case 'Q': 311 set_exit_no_warn(); 312 break; 313 314 case 't': 315 opt_mode = MODE_TEST; 316 break; 317 318 // --verbose 319 case 'v': 320 message_verbosity_increase(); 321 break; 322 323 // --robot 324 case OPT_ROBOT: 325 opt_robot = true; 326 327 // This is to make sure that floating point numbers 328 // always have a dot as decimal separator. 329 setlocale(LC_NUMERIC, "C"); 330 break; 331 332 case 'z': 333 opt_mode = MODE_COMPRESS; 334 break; 335 336 // Filter setup 337 338 case OPT_X86: 339 coder_add_filter(LZMA_FILTER_X86, 340 options_bcj(optarg)); 341 break; 342 343 case OPT_POWERPC: 344 coder_add_filter(LZMA_FILTER_POWERPC, 345 options_bcj(optarg)); 346 break; 347 348 case OPT_IA64: 349 coder_add_filter(LZMA_FILTER_IA64, 350 options_bcj(optarg)); 351 break; 352 353 case OPT_ARM: 354 coder_add_filter(LZMA_FILTER_ARM, 355 options_bcj(optarg)); 356 break; 357 358 case OPT_ARMTHUMB: 359 coder_add_filter(LZMA_FILTER_ARMTHUMB, 360 options_bcj(optarg)); 361 break; 362 363 case OPT_SPARC: 364 coder_add_filter(LZMA_FILTER_SPARC, 365 options_bcj(optarg)); 366 break; 367 368 case OPT_DELTA: 369 coder_add_filter(LZMA_FILTER_DELTA, 370 options_delta(optarg)); 371 break; 372 373 case OPT_LZMA1: 374 coder_add_filter(LZMA_FILTER_LZMA1, 375 options_lzma(optarg)); 376 break; 377 378 case OPT_LZMA2: 379 coder_add_filter(LZMA_FILTER_LZMA2, 380 options_lzma(optarg)); 381 break; 382 383 // Other 384 385 // --format 386 case 'F': { 387 // Just in case, support both "lzma" and "alone" since 388 // the latter was used for forward compatibility in 389 // LZMA Utils 4.32.x. 390 static const struct { 391 char str[8]; 392 enum format_type format; 393 } types[] = { 394 { "auto", FORMAT_AUTO }, 395 { "xz", FORMAT_XZ }, 396 { "lzma", FORMAT_LZMA }, 397 { "alone", FORMAT_LZMA }, 398 // { "gzip", FORMAT_GZIP }, 399 // { "gz", FORMAT_GZIP }, 400 { "raw", FORMAT_RAW }, 401 }; 402 403 size_t i = 0; 404 while (strcmp(types[i].str, optarg) != 0) 405 if (++i == ARRAY_SIZE(types)) 406 message_fatal(_("%s: Unknown file " 407 "format type"), 408 optarg); 409 410 opt_format = types[i].format; 411 break; 412 } 413 414 // --check 415 case 'C': { 416 static const struct { 417 char str[8]; 418 lzma_check check; 419 } types[] = { 420 { "none", LZMA_CHECK_NONE }, 421 { "crc32", LZMA_CHECK_CRC32 }, 422 { "crc64", LZMA_CHECK_CRC64 }, 423 { "sha256", LZMA_CHECK_SHA256 }, 424 }; 425 426 size_t i = 0; 427 while (strcmp(types[i].str, optarg) != 0) { 428 if (++i == ARRAY_SIZE(types)) 429 message_fatal(_("%s: Unsupported " 430 "integrity " 431 "check type"), optarg); 432 } 433 434 // Use a separate check in case we are using different 435 // liblzma than what was used to compile us. 436 if (!lzma_check_is_supported(types[i].check)) 437 message_fatal(_("%s: Unsupported integrity " 438 "check type"), optarg); 439 440 coder_set_check(types[i].check); 441 break; 442 } 443 444 case OPT_IGNORE_CHECK: 445 opt_ignore_check = true; 446 break; 447 448 case OPT_BLOCK_SIZE: 449 opt_block_size = str_to_uint64("block-size", optarg, 450 0, LZMA_VLI_MAX); 451 break; 452 453 case OPT_BLOCK_LIST: { 454 parse_block_list(optarg); 455 break; 456 } 457 458 case OPT_SINGLE_STREAM: 459 opt_single_stream = true; 460 break; 461 462 case OPT_NO_SPARSE: 463 io_no_sparse(); 464 break; 465 466 case OPT_FILES: 467 args->files_delim = '\n'; 468 469 // Fall through 470 471 case OPT_FILES0: 472 if (args->files_name != NULL) 473 message_fatal(_("Only one file can be " 474 "specified with `--files' " 475 "or `--files0'.")); 476 477 if (optarg == NULL) { 478 args->files_name = (char *)stdin_filename; 479 args->files_file = stdin; 480 } else { 481 args->files_name = optarg; 482 args->files_file = fopen(optarg, 483 c == OPT_FILES ? "r" : "rb"); 484 if (args->files_file == NULL) 485 message_fatal("%s: %s", optarg, 486 strerror(errno)); 487 } 488 489 break; 490 491 case OPT_NO_ADJUST: 492 opt_auto_adjust = false; 493 break; 494 495 case OPT_FLUSH_TIMEOUT: 496 opt_flush_timeout = str_to_uint64("flush-timeout", 497 optarg, 0, UINT64_MAX); 498 break; 499 500 default: 501 message_try_help(); 502 tuklib_exit(E_ERROR, E_ERROR, false); 503 } 504 } 505 506 return; 507 } 508 509 510 static void 511 parse_environment(args_info *args, char *argv0, const char *varname) 512 { 513 char *env = getenv(varname); 514 if (env == NULL) 515 return; 516 517 // We modify the string, so make a copy of it. 518 env = xstrdup(env); 519 520 // Calculate the number of arguments in env. argc stats at one 521 // to include space for the program name. 522 int argc = 1; 523 bool prev_was_space = true; 524 for (size_t i = 0; env[i] != '\0'; ++i) { 525 // NOTE: Cast to unsigned char is needed so that correct 526 // value gets passed to isspace(), which expects 527 // unsigned char cast to int. Casting to int is done 528 // automatically due to integer promotion, but we need to 529 // force char to unsigned char manually. Otherwise 8-bit 530 // characters would get promoted to wrong value if 531 // char is signed. 532 if (isspace((unsigned char)env[i])) { 533 prev_was_space = true; 534 } else if (prev_was_space) { 535 prev_was_space = false; 536 537 // Keep argc small enough to fit into a signed int 538 // and to keep it usable for memory allocation. 539 if (++argc == my_min( 540 INT_MAX, SIZE_MAX / sizeof(char *))) 541 message_fatal(_("The environment variable " 542 "%s contains too many " 543 "arguments"), varname); 544 } 545 } 546 547 // Allocate memory to hold pointers to the arguments. Add one to get 548 // space for the terminating NULL (if some systems happen to need it). 549 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); 550 argv[0] = argv0; 551 argv[argc] = NULL; 552 553 // Go through the string again. Split the arguments using '\0' 554 // characters and add pointers to the resulting strings to argv. 555 argc = 1; 556 prev_was_space = true; 557 for (size_t i = 0; env[i] != '\0'; ++i) { 558 if (isspace((unsigned char)env[i])) { 559 prev_was_space = true; 560 env[i] = '\0'; 561 } else if (prev_was_space) { 562 prev_was_space = false; 563 argv[argc++] = env + i; 564 } 565 } 566 567 // Parse the argument list we got from the environment. All non-option 568 // arguments i.e. filenames are ignored. 569 parse_real(args, argc, argv); 570 571 // Reset the state of the getopt_long() so that we can parse the 572 // command line options too. There are two incompatible ways to 573 // do it. 574 #ifdef HAVE_OPTRESET 575 // BSD 576 optind = 1; 577 optreset = 1; 578 #else 579 // GNU, Solaris 580 optind = 0; 581 #endif 582 583 // We don't need the argument list from environment anymore. 584 free(argv); 585 free(env); 586 587 return; 588 } 589 590 591 extern void 592 args_parse(args_info *args, int argc, char **argv) 593 { 594 // Initialize those parts of *args that we need later. 595 args->files_name = NULL; 596 args->files_file = NULL; 597 args->files_delim = '\0'; 598 599 // Check how we were called. 600 { 601 // Remove the leading path name, if any. 602 const char *name = strrchr(argv[0], '/'); 603 if (name == NULL) 604 name = argv[0]; 605 else 606 ++name; 607 608 // NOTE: It's possible that name[0] is now '\0' if argv[0] 609 // is weird, but it doesn't matter here. 610 611 // Look for full command names instead of substrings like 612 // "un", "cat", and "lz" to reduce possibility of false 613 // positives when the programs have been renamed. 614 if (strstr(name, "xzcat") != NULL) { 615 opt_mode = MODE_DECOMPRESS; 616 opt_stdout = true; 617 } else if (strstr(name, "unxz") != NULL) { 618 opt_mode = MODE_DECOMPRESS; 619 } else if (strstr(name, "lzcat") != NULL) { 620 opt_format = FORMAT_LZMA; 621 opt_mode = MODE_DECOMPRESS; 622 opt_stdout = true; 623 } else if (strstr(name, "unlzma") != NULL) { 624 opt_format = FORMAT_LZMA; 625 opt_mode = MODE_DECOMPRESS; 626 } else if (strstr(name, "lzma") != NULL) { 627 opt_format = FORMAT_LZMA; 628 } 629 } 630 631 // First the flags from the environment 632 parse_environment(args, argv[0], "XZ_DEFAULTS"); 633 parse_environment(args, argv[0], "XZ_OPT"); 634 635 // Then from the command line 636 parse_real(args, argc, argv); 637 638 // If encoder or decoder support was omitted at build time, 639 // show an error now so that the rest of the code can rely on 640 // that whatever is in opt_mode is also supported. 641 #ifndef HAVE_ENCODERS 642 if (opt_mode == MODE_COMPRESS) 643 message_fatal(_("Compression support was disabled " 644 "at build time")); 645 #endif 646 #ifndef HAVE_DECODERS 647 // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS 648 // is the only valid choice. 649 if (opt_mode != MODE_COMPRESS) 650 message_fatal(_("Decompression support was disabled " 651 "at build time")); 652 #endif 653 654 // Never remove the source file when the destination is not on disk. 655 // In test mode the data is written nowhere, but setting opt_stdout 656 // will make the rest of the code behave well. 657 if (opt_stdout || opt_mode == MODE_TEST) { 658 opt_keep_original = true; 659 opt_stdout = true; 660 } 661 662 // When compressing, if no --format flag was used, or it 663 // was --format=auto, we compress to the .xz format. 664 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) 665 opt_format = FORMAT_XZ; 666 667 // Compression settings need to be validated (options themselves and 668 // their memory usage) when compressing to any file format. It has to 669 // be done also when uncompressing raw data, since for raw decoding 670 // the options given on the command line are used to know what kind 671 // of raw data we are supposed to decode. 672 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) 673 coder_set_compression_settings(); 674 675 // If no filenames are given, use stdin. 676 if (argv[optind] == NULL && args->files_name == NULL) { 677 // We don't modify or free() the "-" constant. The caller 678 // modifies this so don't make the struct itself const. 679 static char *names_stdin[2] = { (char *)"-", NULL }; 680 args->arg_names = names_stdin; 681 args->arg_count = 1; 682 } else { 683 // We got at least one filename from the command line, or 684 // --files or --files0 was specified. 685 args->arg_names = argv + optind; 686 args->arg_count = argc - optind; 687 } 688 689 return; 690 } 691 692 693 #ifndef NDEBUG 694 extern void 695 args_free(void) 696 { 697 free(opt_block_list); 698 return; 699 } 700 #endif 701