1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file args.c
4 /// \brief Argument parsing
5 ///
6 /// \note Filter-specific options parsing is in options.c.
7 //
8 // Author: Lasse Collin
9 //
10 // This file has been put into the public domain.
11 // You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 bool opt_ignore_check = false;
26
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char stdin_filename[] = "(stdin)";
30
31
32 /// Parse and set the memory usage limit for compression and/or decompression.
33 static void
parse_memlimit(const char * name,const char * name_percentage,char * str,bool set_compress,bool set_decompress)34 parse_memlimit(const char *name, const char *name_percentage, char *str,
35 bool set_compress, bool set_decompress)
36 {
37 bool is_percentage = false;
38 uint64_t value;
39
40 const size_t len = strlen(str);
41 if (len > 0 && str[len - 1] == '%') {
42 str[len - 1] = '\0';
43 is_percentage = true;
44 value = str_to_uint64(name_percentage, str, 1, 100);
45 } else {
46 // On 32-bit systems, SIZE_MAX would make more sense than
47 // UINT64_MAX. But use UINT64_MAX still so that scripts
48 // that assume > 4 GiB values don't break.
49 value = str_to_uint64(name, str, 0, UINT64_MAX);
50 }
51
52 hardware_memlimit_set(
53 value, set_compress, set_decompress, is_percentage);
54 return;
55 }
56
57
58 static void
parse_block_list(char * str)59 parse_block_list(char *str)
60 {
61 // It must be non-empty and not begin with a comma.
62 if (str[0] == '\0' || str[0] == ',')
63 message_fatal(_("%s: Invalid argument to --block-list"), str);
64
65 // Count the number of comma-separated strings.
66 size_t count = 1;
67 for (size_t i = 0; str[i] != '\0'; ++i)
68 if (str[i] == ',')
69 ++count;
70
71 // Prevent an unlikely integer overflow.
72 if (count > SIZE_MAX / sizeof(uint64_t) - 1)
73 message_fatal(_("%s: Too many arguments to --block-list"),
74 str);
75
76 // Allocate memory to hold all the sizes specified.
77 // If --block-list was specified already, its value is forgotten.
78 free(opt_block_list);
79 opt_block_list = xmalloc((count + 1) * sizeof(uint64_t));
80
81 for (size_t i = 0; i < count; ++i) {
82 // Locate the next comma and replace it with \0.
83 char *p = strchr(str, ',');
84 if (p != NULL)
85 *p = '\0';
86
87 if (str[0] == '\0') {
88 // There is no string, that is, a comma follows
89 // another comma. Use the previous value.
90 //
91 // NOTE: We checked earlier that the first char
92 // of the whole list cannot be a comma.
93 assert(i > 0);
94 opt_block_list[i] = opt_block_list[i - 1];
95 } else {
96 opt_block_list[i] = str_to_uint64("block-list", str,
97 0, UINT64_MAX);
98
99 // Zero indicates no more new Blocks.
100 if (opt_block_list[i] == 0) {
101 if (i + 1 != count)
102 message_fatal(_("0 can only be used "
103 "as the last element "
104 "in --block-list"));
105
106 opt_block_list[i] = UINT64_MAX;
107 }
108 }
109
110 str = p + 1;
111 }
112
113 // Terminate the array.
114 opt_block_list[count] = 0;
115 return;
116 }
117
118
119 static void
parse_real(args_info * args,int argc,char ** argv)120 parse_real(args_info *args, int argc, char **argv)
121 {
122 enum {
123 OPT_X86 = INT_MIN,
124 OPT_POWERPC,
125 OPT_IA64,
126 OPT_ARM,
127 OPT_ARMTHUMB,
128 OPT_SPARC,
129 OPT_DELTA,
130 OPT_LZMA1,
131 OPT_LZMA2,
132
133 OPT_SINGLE_STREAM,
134 OPT_NO_SPARSE,
135 OPT_FILES,
136 OPT_FILES0,
137 OPT_BLOCK_SIZE,
138 OPT_BLOCK_LIST,
139 OPT_MEM_COMPRESS,
140 OPT_MEM_DECOMPRESS,
141 OPT_NO_ADJUST,
142 OPT_INFO_MEMORY,
143 OPT_ROBOT,
144 OPT_FLUSH_TIMEOUT,
145 OPT_IGNORE_CHECK,
146 };
147
148 static const char short_opts[]
149 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
150
151 static const struct option long_opts[] = {
152 // Operation mode
153 { "compress", no_argument, NULL, 'z' },
154 { "decompress", no_argument, NULL, 'd' },
155 { "uncompress", no_argument, NULL, 'd' },
156 { "test", no_argument, NULL, 't' },
157 { "list", no_argument, NULL, 'l' },
158
159 // Operation modifiers
160 { "keep", no_argument, NULL, 'k' },
161 { "force", no_argument, NULL, 'f' },
162 { "stdout", no_argument, NULL, 'c' },
163 { "to-stdout", no_argument, NULL, 'c' },
164 { "single-stream", no_argument, NULL, OPT_SINGLE_STREAM },
165 { "no-sparse", no_argument, NULL, OPT_NO_SPARSE },
166 { "suffix", required_argument, NULL, 'S' },
167 // { "recursive", no_argument, NULL, 'r' }, // TODO
168 { "files", optional_argument, NULL, OPT_FILES },
169 { "files0", optional_argument, NULL, OPT_FILES0 },
170
171 // Basic compression settings
172 { "format", required_argument, NULL, 'F' },
173 { "check", required_argument, NULL, 'C' },
174 { "ignore-check", no_argument, NULL, OPT_IGNORE_CHECK },
175 { "block-size", required_argument, NULL, OPT_BLOCK_SIZE },
176 { "block-list", required_argument, NULL, OPT_BLOCK_LIST },
177 { "memlimit-compress", required_argument, NULL, OPT_MEM_COMPRESS },
178 { "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
179 { "memlimit", required_argument, NULL, 'M' },
180 { "memory", required_argument, NULL, 'M' }, // Old alias
181 { "no-adjust", no_argument, NULL, OPT_NO_ADJUST },
182 { "threads", required_argument, NULL, 'T' },
183 { "flush-timeout", required_argument, NULL, OPT_FLUSH_TIMEOUT },
184
185 { "extreme", no_argument, NULL, 'e' },
186 { "fast", no_argument, NULL, '0' },
187 { "best", no_argument, NULL, '9' },
188
189 // Filters
190 { "lzma1", optional_argument, NULL, OPT_LZMA1 },
191 { "lzma2", optional_argument, NULL, OPT_LZMA2 },
192 { "x86", optional_argument, NULL, OPT_X86 },
193 { "powerpc", optional_argument, NULL, OPT_POWERPC },
194 { "ia64", optional_argument, NULL, OPT_IA64 },
195 { "arm", optional_argument, NULL, OPT_ARM },
196 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB },
197 { "sparc", optional_argument, NULL, OPT_SPARC },
198 { "delta", optional_argument, NULL, OPT_DELTA },
199
200 // Other options
201 { "quiet", no_argument, NULL, 'q' },
202 { "verbose", no_argument, NULL, 'v' },
203 { "no-warn", no_argument, NULL, 'Q' },
204 { "robot", no_argument, NULL, OPT_ROBOT },
205 { "info-memory", no_argument, NULL, OPT_INFO_MEMORY },
206 { "help", no_argument, NULL, 'h' },
207 { "long-help", no_argument, NULL, 'H' },
208 { "version", no_argument, NULL, 'V' },
209
210 { NULL, 0, NULL, 0 }
211 };
212
213 int c;
214
215 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
216 != -1) {
217 switch (c) {
218 // Compression preset (also for decompression if --format=raw)
219 case '0': case '1': case '2': case '3': case '4':
220 case '5': case '6': case '7': case '8': case '9':
221 coder_set_preset((uint32_t)(c - '0'));
222 break;
223
224 // --memlimit-compress
225 case OPT_MEM_COMPRESS:
226 parse_memlimit("memlimit-compress",
227 "memlimit-compress%", optarg,
228 true, false);
229 break;
230
231 // --memlimit-decompress
232 case OPT_MEM_DECOMPRESS:
233 parse_memlimit("memlimit-decompress",
234 "memlimit-decompress%", optarg,
235 false, true);
236 break;
237
238 // --memlimit
239 case 'M':
240 parse_memlimit("memlimit", "memlimit%", optarg,
241 true, true);
242 break;
243
244 // --suffix
245 case 'S':
246 suffix_set(optarg);
247 break;
248
249 case 'T':
250 // The max is from src/liblzma/common/common.h.
251 hardware_threads_set(str_to_uint64("threads",
252 optarg, 0, 16384));
253 break;
254
255 // --version
256 case 'V':
257 // This doesn't return.
258 message_version();
259
260 // --stdout
261 case 'c':
262 opt_stdout = true;
263 break;
264
265 // --decompress
266 case 'd':
267 opt_mode = MODE_DECOMPRESS;
268 break;
269
270 // --extreme
271 case 'e':
272 coder_set_extreme();
273 break;
274
275 // --force
276 case 'f':
277 opt_force = true;
278 break;
279
280 // --info-memory
281 case OPT_INFO_MEMORY:
282 // This doesn't return.
283 hardware_memlimit_show();
284
285 // --help
286 case 'h':
287 // This doesn't return.
288 message_help(false);
289
290 // --long-help
291 case 'H':
292 // This doesn't return.
293 message_help(true);
294
295 // --list
296 case 'l':
297 opt_mode = MODE_LIST;
298 break;
299
300 // --keep
301 case 'k':
302 opt_keep_original = true;
303 break;
304
305 // --quiet
306 case 'q':
307 message_verbosity_decrease();
308 break;
309
310 case 'Q':
311 set_exit_no_warn();
312 break;
313
314 case 't':
315 opt_mode = MODE_TEST;
316 break;
317
318 // --verbose
319 case 'v':
320 message_verbosity_increase();
321 break;
322
323 // --robot
324 case OPT_ROBOT:
325 opt_robot = true;
326
327 // This is to make sure that floating point numbers
328 // always have a dot as decimal separator.
329 setlocale(LC_NUMERIC, "C");
330 break;
331
332 case 'z':
333 opt_mode = MODE_COMPRESS;
334 break;
335
336 // Filter setup
337
338 case OPT_X86:
339 coder_add_filter(LZMA_FILTER_X86,
340 options_bcj(optarg));
341 break;
342
343 case OPT_POWERPC:
344 coder_add_filter(LZMA_FILTER_POWERPC,
345 options_bcj(optarg));
346 break;
347
348 case OPT_IA64:
349 coder_add_filter(LZMA_FILTER_IA64,
350 options_bcj(optarg));
351 break;
352
353 case OPT_ARM:
354 coder_add_filter(LZMA_FILTER_ARM,
355 options_bcj(optarg));
356 break;
357
358 case OPT_ARMTHUMB:
359 coder_add_filter(LZMA_FILTER_ARMTHUMB,
360 options_bcj(optarg));
361 break;
362
363 case OPT_SPARC:
364 coder_add_filter(LZMA_FILTER_SPARC,
365 options_bcj(optarg));
366 break;
367
368 case OPT_DELTA:
369 coder_add_filter(LZMA_FILTER_DELTA,
370 options_delta(optarg));
371 break;
372
373 case OPT_LZMA1:
374 coder_add_filter(LZMA_FILTER_LZMA1,
375 options_lzma(optarg));
376 break;
377
378 case OPT_LZMA2:
379 coder_add_filter(LZMA_FILTER_LZMA2,
380 options_lzma(optarg));
381 break;
382
383 // Other
384
385 // --format
386 case 'F': {
387 // Just in case, support both "lzma" and "alone" since
388 // the latter was used for forward compatibility in
389 // LZMA Utils 4.32.x.
390 static const struct {
391 char str[8];
392 enum format_type format;
393 } types[] = {
394 { "auto", FORMAT_AUTO },
395 { "xz", FORMAT_XZ },
396 { "lzma", FORMAT_LZMA },
397 { "alone", FORMAT_LZMA },
398 // { "gzip", FORMAT_GZIP },
399 // { "gz", FORMAT_GZIP },
400 { "raw", FORMAT_RAW },
401 };
402
403 size_t i = 0;
404 while (strcmp(types[i].str, optarg) != 0)
405 if (++i == ARRAY_SIZE(types))
406 message_fatal(_("%s: Unknown file "
407 "format type"),
408 optarg);
409
410 opt_format = types[i].format;
411 break;
412 }
413
414 // --check
415 case 'C': {
416 static const struct {
417 char str[8];
418 lzma_check check;
419 } types[] = {
420 { "none", LZMA_CHECK_NONE },
421 { "crc32", LZMA_CHECK_CRC32 },
422 { "crc64", LZMA_CHECK_CRC64 },
423 { "sha256", LZMA_CHECK_SHA256 },
424 };
425
426 size_t i = 0;
427 while (strcmp(types[i].str, optarg) != 0) {
428 if (++i == ARRAY_SIZE(types))
429 message_fatal(_("%s: Unsupported "
430 "integrity "
431 "check type"), optarg);
432 }
433
434 // Use a separate check in case we are using different
435 // liblzma than what was used to compile us.
436 if (!lzma_check_is_supported(types[i].check))
437 message_fatal(_("%s: Unsupported integrity "
438 "check type"), optarg);
439
440 coder_set_check(types[i].check);
441 break;
442 }
443
444 case OPT_IGNORE_CHECK:
445 opt_ignore_check = true;
446 break;
447
448 case OPT_BLOCK_SIZE:
449 opt_block_size = str_to_uint64("block-size", optarg,
450 0, LZMA_VLI_MAX);
451 break;
452
453 case OPT_BLOCK_LIST: {
454 parse_block_list(optarg);
455 break;
456 }
457
458 case OPT_SINGLE_STREAM:
459 opt_single_stream = true;
460 break;
461
462 case OPT_NO_SPARSE:
463 io_no_sparse();
464 break;
465
466 case OPT_FILES:
467 args->files_delim = '\n';
468
469 // Fall through
470
471 case OPT_FILES0:
472 if (args->files_name != NULL)
473 message_fatal(_("Only one file can be "
474 "specified with `--files' "
475 "or `--files0'."));
476
477 if (optarg == NULL) {
478 args->files_name = (char *)stdin_filename;
479 args->files_file = stdin;
480 } else {
481 args->files_name = optarg;
482 args->files_file = fopen(optarg,
483 c == OPT_FILES ? "r" : "rb");
484 if (args->files_file == NULL)
485 message_fatal("%s: %s", optarg,
486 strerror(errno));
487 }
488
489 break;
490
491 case OPT_NO_ADJUST:
492 opt_auto_adjust = false;
493 break;
494
495 case OPT_FLUSH_TIMEOUT:
496 opt_flush_timeout = str_to_uint64("flush-timeout",
497 optarg, 0, UINT64_MAX);
498 break;
499
500 default:
501 message_try_help();
502 tuklib_exit(E_ERROR, E_ERROR, false);
503 }
504 }
505
506 return;
507 }
508
509
510 static void
parse_environment(args_info * args,char * argv0,const char * varname)511 parse_environment(args_info *args, char *argv0, const char *varname)
512 {
513 char *env = getenv(varname);
514 if (env == NULL)
515 return;
516
517 // We modify the string, so make a copy of it.
518 env = xstrdup(env);
519
520 // Calculate the number of arguments in env. argc stats at one
521 // to include space for the program name.
522 int argc = 1;
523 bool prev_was_space = true;
524 for (size_t i = 0; env[i] != '\0'; ++i) {
525 // NOTE: Cast to unsigned char is needed so that correct
526 // value gets passed to isspace(), which expects
527 // unsigned char cast to int. Casting to int is done
528 // automatically due to integer promotion, but we need to
529 // force char to unsigned char manually. Otherwise 8-bit
530 // characters would get promoted to wrong value if
531 // char is signed.
532 if (isspace((unsigned char)env[i])) {
533 prev_was_space = true;
534 } else if (prev_was_space) {
535 prev_was_space = false;
536
537 // Keep argc small enough to fit into a signed int
538 // and to keep it usable for memory allocation.
539 if (++argc == my_min(
540 INT_MAX, SIZE_MAX / sizeof(char *)))
541 message_fatal(_("The environment variable "
542 "%s contains too many "
543 "arguments"), varname);
544 }
545 }
546
547 // Allocate memory to hold pointers to the arguments. Add one to get
548 // space for the terminating NULL (if some systems happen to need it).
549 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
550 argv[0] = argv0;
551 argv[argc] = NULL;
552
553 // Go through the string again. Split the arguments using '\0'
554 // characters and add pointers to the resulting strings to argv.
555 argc = 1;
556 prev_was_space = true;
557 for (size_t i = 0; env[i] != '\0'; ++i) {
558 if (isspace((unsigned char)env[i])) {
559 prev_was_space = true;
560 env[i] = '\0';
561 } else if (prev_was_space) {
562 prev_was_space = false;
563 argv[argc++] = env + i;
564 }
565 }
566
567 // Parse the argument list we got from the environment. All non-option
568 // arguments i.e. filenames are ignored.
569 parse_real(args, argc, argv);
570
571 // Reset the state of the getopt_long() so that we can parse the
572 // command line options too. There are two incompatible ways to
573 // do it.
574 #ifdef HAVE_OPTRESET
575 // BSD
576 optind = 1;
577 optreset = 1;
578 #else
579 // GNU, Solaris
580 optind = 0;
581 #endif
582
583 // We don't need the argument list from environment anymore.
584 free(argv);
585 free(env);
586
587 return;
588 }
589
590
591 extern void
args_parse(args_info * args,int argc,char ** argv)592 args_parse(args_info *args, int argc, char **argv)
593 {
594 // Initialize those parts of *args that we need later.
595 args->files_name = NULL;
596 args->files_file = NULL;
597 args->files_delim = '\0';
598
599 // Check how we were called.
600 {
601 // Remove the leading path name, if any.
602 const char *name = strrchr(argv[0], '/');
603 if (name == NULL)
604 name = argv[0];
605 else
606 ++name;
607
608 // NOTE: It's possible that name[0] is now '\0' if argv[0]
609 // is weird, but it doesn't matter here.
610
611 // Look for full command names instead of substrings like
612 // "un", "cat", and "lz" to reduce possibility of false
613 // positives when the programs have been renamed.
614 if (strstr(name, "xzcat") != NULL) {
615 opt_mode = MODE_DECOMPRESS;
616 opt_stdout = true;
617 } else if (strstr(name, "unxz") != NULL) {
618 opt_mode = MODE_DECOMPRESS;
619 } else if (strstr(name, "lzcat") != NULL) {
620 opt_format = FORMAT_LZMA;
621 opt_mode = MODE_DECOMPRESS;
622 opt_stdout = true;
623 } else if (strstr(name, "unlzma") != NULL) {
624 opt_format = FORMAT_LZMA;
625 opt_mode = MODE_DECOMPRESS;
626 } else if (strstr(name, "lzma") != NULL) {
627 opt_format = FORMAT_LZMA;
628 }
629 }
630
631 // First the flags from the environment
632 parse_environment(args, argv[0], "XZ_DEFAULTS");
633 parse_environment(args, argv[0], "XZ_OPT");
634
635 // Then from the command line
636 parse_real(args, argc, argv);
637
638 // If encoder or decoder support was omitted at build time,
639 // show an error now so that the rest of the code can rely on
640 // that whatever is in opt_mode is also supported.
641 #ifndef HAVE_ENCODERS
642 if (opt_mode == MODE_COMPRESS)
643 message_fatal(_("Compression support was disabled "
644 "at build time"));
645 #endif
646 #ifndef HAVE_DECODERS
647 // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
648 // is the only valid choice.
649 if (opt_mode != MODE_COMPRESS)
650 message_fatal(_("Decompression support was disabled "
651 "at build time"));
652 #endif
653
654 // Never remove the source file when the destination is not on disk.
655 // In test mode the data is written nowhere, but setting opt_stdout
656 // will make the rest of the code behave well.
657 if (opt_stdout || opt_mode == MODE_TEST) {
658 opt_keep_original = true;
659 opt_stdout = true;
660 }
661
662 // When compressing, if no --format flag was used, or it
663 // was --format=auto, we compress to the .xz format.
664 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
665 opt_format = FORMAT_XZ;
666
667 // Compression settings need to be validated (options themselves and
668 // their memory usage) when compressing to any file format. It has to
669 // be done also when uncompressing raw data, since for raw decoding
670 // the options given on the command line are used to know what kind
671 // of raw data we are supposed to decode.
672 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
673 coder_set_compression_settings();
674
675 // If no filenames are given, use stdin.
676 if (argv[optind] == NULL && args->files_name == NULL) {
677 // We don't modify or free() the "-" constant. The caller
678 // modifies this so don't make the struct itself const.
679 static char *names_stdin[2] = { (char *)"-", NULL };
680 args->arg_names = names_stdin;
681 args->arg_count = 1;
682 } else {
683 // We got at least one filename from the command line, or
684 // --files or --files0 was specified.
685 args->arg_names = argv + optind;
686 args->arg_count = (unsigned int)(argc - optind);
687 }
688
689 return;
690 }
691
692
693 #ifndef NDEBUG
694 extern void
args_free(void)695 args_free(void)
696 {
697 free(opt_block_list);
698 return;
699 }
700 #endif
701