1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file args.c 4 /// \brief Argument parsing 5 /// 6 /// \note Filter-specific options parsing is in options.c. 7 // 8 // Author: Lasse Collin 9 // 10 // This file has been put into the public domain. 11 // You can do whatever you want with this file. 12 // 13 /////////////////////////////////////////////////////////////////////////////// 14 15 #include "private.h" 16 17 #include "getopt.h" 18 #include <ctype.h> 19 20 21 bool opt_stdout = false; 22 bool opt_force = false; 23 bool opt_keep_original = false; 24 bool opt_robot = false; 25 26 // We don't modify or free() this, but we need to assign it in some 27 // non-const pointers. 28 const char stdin_filename[] = "(stdin)"; 29 30 31 /// Parse and set the memory usage limit for compression and/or decompression. 32 static void 33 parse_memlimit(const char *name, const char *name_percentage, char *str, 34 bool set_compress, bool set_decompress) 35 { 36 bool is_percentage = false; 37 uint64_t value; 38 39 const size_t len = strlen(str); 40 if (len > 0 && str[len - 1] == '%') { 41 str[len - 1] = '\0'; 42 is_percentage = true; 43 value = str_to_uint64(name_percentage, str, 1, 100); 44 } else { 45 // On 32-bit systems, SIZE_MAX would make more sense than 46 // UINT64_MAX. But use UINT64_MAX still so that scripts 47 // that assume > 4 GiB values don't break. 48 value = str_to_uint64(name, str, 0, UINT64_MAX); 49 } 50 51 hardware_memlimit_set( 52 value, set_compress, set_decompress, is_percentage); 53 return; 54 } 55 56 57 static void 58 parse_real(args_info *args, int argc, char **argv) 59 { 60 enum { 61 OPT_X86 = INT_MIN, 62 OPT_POWERPC, 63 OPT_IA64, 64 OPT_ARM, 65 OPT_ARMTHUMB, 66 OPT_SPARC, 67 OPT_DELTA, 68 OPT_LZMA1, 69 OPT_LZMA2, 70 71 OPT_NO_SPARSE, 72 OPT_FILES, 73 OPT_FILES0, 74 OPT_MEM_COMPRESS, 75 OPT_MEM_DECOMPRESS, 76 OPT_NO_ADJUST, 77 OPT_INFO_MEMORY, 78 OPT_ROBOT, 79 }; 80 81 static const char short_opts[] 82 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789"; 83 84 static const struct option long_opts[] = { 85 // Operation mode 86 { "compress", no_argument, NULL, 'z' }, 87 { "decompress", no_argument, NULL, 'd' }, 88 { "uncompress", no_argument, NULL, 'd' }, 89 { "test", no_argument, NULL, 't' }, 90 { "list", no_argument, NULL, 'l' }, 91 92 // Operation modifiers 93 { "keep", no_argument, NULL, 'k' }, 94 { "force", no_argument, NULL, 'f' }, 95 { "stdout", no_argument, NULL, 'c' }, 96 { "to-stdout", no_argument, NULL, 'c' }, 97 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE }, 98 { "suffix", required_argument, NULL, 'S' }, 99 // { "recursive", no_argument, NULL, 'r' }, // TODO 100 { "files", optional_argument, NULL, OPT_FILES }, 101 { "files0", optional_argument, NULL, OPT_FILES0 }, 102 103 // Basic compression settings 104 { "format", required_argument, NULL, 'F' }, 105 { "check", required_argument, NULL, 'C' }, 106 { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS }, 107 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS }, 108 { "memlimit", required_argument, NULL, 'M' }, 109 { "memory", required_argument, NULL, 'M' }, // Old alias 110 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST }, 111 { "threads", required_argument, NULL, 'T' }, 112 113 { "extreme", no_argument, NULL, 'e' }, 114 { "fast", no_argument, NULL, '0' }, 115 { "best", no_argument, NULL, '9' }, 116 117 // Filters 118 { "lzma1", optional_argument, NULL, OPT_LZMA1 }, 119 { "lzma2", optional_argument, NULL, OPT_LZMA2 }, 120 { "x86", optional_argument, NULL, OPT_X86 }, 121 { "powerpc", optional_argument, NULL, OPT_POWERPC }, 122 { "ia64", optional_argument, NULL, OPT_IA64 }, 123 { "arm", optional_argument, NULL, OPT_ARM }, 124 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB }, 125 { "sparc", optional_argument, NULL, OPT_SPARC }, 126 { "delta", optional_argument, NULL, OPT_DELTA }, 127 128 // Other options 129 { "quiet", no_argument, NULL, 'q' }, 130 { "verbose", no_argument, NULL, 'v' }, 131 { "no-warn", no_argument, NULL, 'Q' }, 132 { "robot", no_argument, NULL, OPT_ROBOT }, 133 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY }, 134 { "help", no_argument, NULL, 'h' }, 135 { "long-help", no_argument, NULL, 'H' }, 136 { "version", no_argument, NULL, 'V' }, 137 138 { NULL, 0, NULL, 0 } 139 }; 140 141 int c; 142 143 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL)) 144 != -1) { 145 switch (c) { 146 // Compression preset (also for decompression if --format=raw) 147 case '0': case '1': case '2': case '3': case '4': 148 case '5': case '6': case '7': case '8': case '9': 149 coder_set_preset(c - '0'); 150 break; 151 152 // --memlimit-compress 153 case OPT_MEM_COMPRESS: 154 parse_memlimit("memlimit-compress", 155 "memlimit-compress%", optarg, 156 true, false); 157 break; 158 159 // --memlimit-decompress 160 case OPT_MEM_DECOMPRESS: 161 parse_memlimit("memlimit-decompress", 162 "memlimit-decompress%", optarg, 163 false, true); 164 break; 165 166 // --memlimit 167 case 'M': 168 parse_memlimit("memlimit", "memlimit%", optarg, 169 true, true); 170 break; 171 172 // --suffix 173 case 'S': 174 suffix_set(optarg); 175 break; 176 177 case 'T': 178 hardware_threadlimit_set(str_to_uint64( 179 "threads", optarg, 0, UINT32_MAX)); 180 break; 181 182 // --version 183 case 'V': 184 // This doesn't return. 185 message_version(); 186 187 // --stdout 188 case 'c': 189 opt_stdout = true; 190 break; 191 192 // --decompress 193 case 'd': 194 opt_mode = MODE_DECOMPRESS; 195 break; 196 197 // --extreme 198 case 'e': 199 coder_set_extreme(); 200 break; 201 202 // --force 203 case 'f': 204 opt_force = true; 205 break; 206 207 // --info-memory 208 case OPT_INFO_MEMORY: 209 // This doesn't return. 210 hardware_memlimit_show(); 211 212 // --help 213 case 'h': 214 // This doesn't return. 215 message_help(false); 216 217 // --long-help 218 case 'H': 219 // This doesn't return. 220 message_help(true); 221 222 // --list 223 case 'l': 224 opt_mode = MODE_LIST; 225 break; 226 227 // --keep 228 case 'k': 229 opt_keep_original = true; 230 break; 231 232 // --quiet 233 case 'q': 234 message_verbosity_decrease(); 235 break; 236 237 case 'Q': 238 set_exit_no_warn(); 239 break; 240 241 case 't': 242 opt_mode = MODE_TEST; 243 break; 244 245 // --verbose 246 case 'v': 247 message_verbosity_increase(); 248 break; 249 250 // --robot 251 case OPT_ROBOT: 252 opt_robot = true; 253 254 // This is to make sure that floating point numbers 255 // always have a dot as decimal separator. 256 setlocale(LC_NUMERIC, "C"); 257 break; 258 259 case 'z': 260 opt_mode = MODE_COMPRESS; 261 break; 262 263 // Filter setup 264 265 case OPT_X86: 266 coder_add_filter(LZMA_FILTER_X86, 267 options_bcj(optarg)); 268 break; 269 270 case OPT_POWERPC: 271 coder_add_filter(LZMA_FILTER_POWERPC, 272 options_bcj(optarg)); 273 break; 274 275 case OPT_IA64: 276 coder_add_filter(LZMA_FILTER_IA64, 277 options_bcj(optarg)); 278 break; 279 280 case OPT_ARM: 281 coder_add_filter(LZMA_FILTER_ARM, 282 options_bcj(optarg)); 283 break; 284 285 case OPT_ARMTHUMB: 286 coder_add_filter(LZMA_FILTER_ARMTHUMB, 287 options_bcj(optarg)); 288 break; 289 290 case OPT_SPARC: 291 coder_add_filter(LZMA_FILTER_SPARC, 292 options_bcj(optarg)); 293 break; 294 295 case OPT_DELTA: 296 coder_add_filter(LZMA_FILTER_DELTA, 297 options_delta(optarg)); 298 break; 299 300 case OPT_LZMA1: 301 coder_add_filter(LZMA_FILTER_LZMA1, 302 options_lzma(optarg)); 303 break; 304 305 case OPT_LZMA2: 306 coder_add_filter(LZMA_FILTER_LZMA2, 307 options_lzma(optarg)); 308 break; 309 310 // Other 311 312 // --format 313 case 'F': { 314 // Just in case, support both "lzma" and "alone" since 315 // the latter was used for forward compatibility in 316 // LZMA Utils 4.32.x. 317 static const struct { 318 char str[8]; 319 enum format_type format; 320 } types[] = { 321 { "auto", FORMAT_AUTO }, 322 { "xz", FORMAT_XZ }, 323 { "lzma", FORMAT_LZMA }, 324 { "alone", FORMAT_LZMA }, 325 // { "gzip", FORMAT_GZIP }, 326 // { "gz", FORMAT_GZIP }, 327 { "raw", FORMAT_RAW }, 328 }; 329 330 size_t i = 0; 331 while (strcmp(types[i].str, optarg) != 0) 332 if (++i == ARRAY_SIZE(types)) 333 message_fatal(_("%s: Unknown file " 334 "format type"), 335 optarg); 336 337 opt_format = types[i].format; 338 break; 339 } 340 341 // --check 342 case 'C': { 343 static const struct { 344 char str[8]; 345 lzma_check check; 346 } types[] = { 347 { "none", LZMA_CHECK_NONE }, 348 { "crc32", LZMA_CHECK_CRC32 }, 349 { "crc64", LZMA_CHECK_CRC64 }, 350 { "sha256", LZMA_CHECK_SHA256 }, 351 }; 352 353 size_t i = 0; 354 while (strcmp(types[i].str, optarg) != 0) { 355 if (++i == ARRAY_SIZE(types)) 356 message_fatal(_("%s: Unsupported " 357 "integrity " 358 "check type"), optarg); 359 } 360 361 // Use a separate check in case we are using different 362 // liblzma than what was used to compile us. 363 if (!lzma_check_is_supported(types[i].check)) 364 message_fatal(_("%s: Unsupported integrity " 365 "check type"), optarg); 366 367 coder_set_check(types[i].check); 368 break; 369 } 370 371 case OPT_NO_SPARSE: 372 io_no_sparse(); 373 break; 374 375 case OPT_FILES: 376 args->files_delim = '\n'; 377 378 // Fall through 379 380 case OPT_FILES0: 381 if (args->files_name != NULL) 382 message_fatal(_("Only one file can be " 383 "specified with `--files' " 384 "or `--files0'.")); 385 386 if (optarg == NULL) { 387 args->files_name = (char *)stdin_filename; 388 args->files_file = stdin; 389 } else { 390 args->files_name = optarg; 391 args->files_file = fopen(optarg, 392 c == OPT_FILES ? "r" : "rb"); 393 if (args->files_file == NULL) 394 message_fatal("%s: %s", optarg, 395 strerror(errno)); 396 } 397 398 break; 399 400 case OPT_NO_ADJUST: 401 opt_auto_adjust = false; 402 break; 403 404 default: 405 message_try_help(); 406 tuklib_exit(E_ERROR, E_ERROR, false); 407 } 408 } 409 410 return; 411 } 412 413 414 static void 415 parse_environment(args_info *args, char *argv0, const char *varname) 416 { 417 char *env = getenv(varname); 418 if (env == NULL) 419 return; 420 421 // We modify the string, so make a copy of it. 422 env = xstrdup(env); 423 424 // Calculate the number of arguments in env. argc stats at one 425 // to include space for the program name. 426 int argc = 1; 427 bool prev_was_space = true; 428 for (size_t i = 0; env[i] != '\0'; ++i) { 429 // NOTE: Cast to unsigned char is needed so that correct 430 // value gets passed to isspace(), which expects 431 // unsigned char cast to int. Casting to int is done 432 // automatically due to integer promotion, but we need to 433 // force char to unsigned char manually. Otherwise 8-bit 434 // characters would get promoted to wrong value if 435 // char is signed. 436 if (isspace((unsigned char)env[i])) { 437 prev_was_space = true; 438 } else if (prev_was_space) { 439 prev_was_space = false; 440 441 // Keep argc small enough to fit into a singed int 442 // and to keep it usable for memory allocation. 443 if (++argc == my_min( 444 INT_MAX, SIZE_MAX / sizeof(char *))) 445 message_fatal(_("The environment variable " 446 "%s contains too many " 447 "arguments"), varname); 448 } 449 } 450 451 // Allocate memory to hold pointers to the arguments. Add one to get 452 // space for the terminating NULL (if some systems happen to need it). 453 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *)); 454 argv[0] = argv0; 455 argv[argc] = NULL; 456 457 // Go through the string again. Split the arguments using '\0' 458 // characters and add pointers to the resulting strings to argv. 459 argc = 1; 460 prev_was_space = true; 461 for (size_t i = 0; env[i] != '\0'; ++i) { 462 if (isspace((unsigned char)env[i])) { 463 prev_was_space = true; 464 env[i] = '\0'; 465 } else if (prev_was_space) { 466 prev_was_space = false; 467 argv[argc++] = env + i; 468 } 469 } 470 471 // Parse the argument list we got from the environment. All non-option 472 // arguments i.e. filenames are ignored. 473 parse_real(args, argc, argv); 474 475 // Reset the state of the getopt_long() so that we can parse the 476 // command line options too. There are two incompatible ways to 477 // do it. 478 #ifdef HAVE_OPTRESET 479 // BSD 480 optind = 1; 481 optreset = 1; 482 #else 483 // GNU, Solaris 484 optind = 0; 485 #endif 486 487 // We don't need the argument list from environment anymore. 488 free(argv); 489 free(env); 490 491 return; 492 } 493 494 495 extern void 496 args_parse(args_info *args, int argc, char **argv) 497 { 498 // Initialize those parts of *args that we need later. 499 args->files_name = NULL; 500 args->files_file = NULL; 501 args->files_delim = '\0'; 502 503 // Check how we were called. 504 { 505 // Remove the leading path name, if any. 506 const char *name = strrchr(argv[0], '/'); 507 if (name == NULL) 508 name = argv[0]; 509 else 510 ++name; 511 512 // NOTE: It's possible that name[0] is now '\0' if argv[0] 513 // is weird, but it doesn't matter here. 514 515 // Look for full command names instead of substrings like 516 // "un", "cat", and "lz" to reduce possibility of false 517 // positives when the programs have been renamed. 518 if (strstr(name, "xzcat") != NULL) { 519 opt_mode = MODE_DECOMPRESS; 520 opt_stdout = true; 521 } else if (strstr(name, "unxz") != NULL) { 522 opt_mode = MODE_DECOMPRESS; 523 } else if (strstr(name, "lzcat") != NULL) { 524 opt_format = FORMAT_LZMA; 525 opt_mode = MODE_DECOMPRESS; 526 opt_stdout = true; 527 } else if (strstr(name, "unlzma") != NULL) { 528 opt_format = FORMAT_LZMA; 529 opt_mode = MODE_DECOMPRESS; 530 } else if (strstr(name, "lzma") != NULL) { 531 opt_format = FORMAT_LZMA; 532 } 533 } 534 535 // First the flags from the environment 536 parse_environment(args, argv[0], "XZ_DEFAULTS"); 537 parse_environment(args, argv[0], "XZ_OPT"); 538 539 // Then from the command line 540 parse_real(args, argc, argv); 541 542 // Never remove the source file when the destination is not on disk. 543 // In test mode the data is written nowhere, but setting opt_stdout 544 // will make the rest of the code behave well. 545 if (opt_stdout || opt_mode == MODE_TEST) { 546 opt_keep_original = true; 547 opt_stdout = true; 548 } 549 550 // When compressing, if no --format flag was used, or it 551 // was --format=auto, we compress to the .xz format. 552 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO) 553 opt_format = FORMAT_XZ; 554 555 // Compression settings need to be validated (options themselves and 556 // their memory usage) when compressing to any file format. It has to 557 // be done also when uncompressing raw data, since for raw decoding 558 // the options given on the command line are used to know what kind 559 // of raw data we are supposed to decode. 560 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW) 561 coder_set_compression_settings(); 562 563 // If no filenames are given, use stdin. 564 if (argv[optind] == NULL && args->files_name == NULL) { 565 // We don't modify or free() the "-" constant. The caller 566 // modifies this so don't make the struct itself const. 567 static char *names_stdin[2] = { (char *)"-", NULL }; 568 args->arg_names = names_stdin; 569 args->arg_count = 1; 570 } else { 571 // We got at least one filename from the command line, or 572 // --files or --files0 was specified. 573 args->arg_names = argv + optind; 574 args->arg_count = argc - optind; 575 } 576 577 return; 578 } 579