1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: expandtab:ts=8:sw=4:softtabstop=4:
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file args.c
6 /// \brief Argument parsing
7 ///
8 /// \note Filter-specific options parsing is in options.c.
9 //
10 // Author: Lasse Collin
11 //
12 // This file has been put into the public domain.
13 // You can do whatever you want with this file.
14 //
15 ///////////////////////////////////////////////////////////////////////////////
16
17 #include "private.h"
18
19 #include "getopt.h"
20 #include <ctype.h>
21
22
23 bool opt_stdout = false;
24 bool opt_force = false;
25 bool opt_keep_original = false;
26
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char *stdin_filename = "(stdin)";
30
31
32 static void
parse_real(args_info * args,int argc,char ** argv)33 parse_real(args_info *args, int argc, char **argv)
34 {
35 enum {
36 OPT_SUBBLOCK = INT_MIN,
37 OPT_X86,
38 OPT_POWERPC,
39 OPT_IA64,
40 OPT_ARM,
41 OPT_ARMTHUMB,
42 OPT_SPARC,
43 OPT_DELTA,
44 OPT_LZMA1,
45 OPT_LZMA2,
46
47 OPT_FILES,
48 OPT_FILES0,
49 };
50
51 static const char short_opts[]
52 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
53
54 static const struct option long_opts[] = {
55 // Operation mode
56 { "compress", no_argument, NULL, 'z' },
57 { "decompress", no_argument, NULL, 'd' },
58 { "uncompress", no_argument, NULL, 'd' },
59 { "test", no_argument, NULL, 't' },
60 { "list", no_argument, NULL, 'l' },
61
62 // Operation modifiers
63 { "keep", no_argument, NULL, 'k' },
64 { "force", no_argument, NULL, 'f' },
65 { "stdout", no_argument, NULL, 'c' },
66 { "to-stdout", no_argument, NULL, 'c' },
67 { "suffix", required_argument, NULL, 'S' },
68 // { "recursive", no_argument, NULL, 'r' }, // TODO
69 { "files", optional_argument, NULL, OPT_FILES },
70 { "files0", optional_argument, NULL, OPT_FILES0 },
71
72 // Basic compression settings
73 { "format", required_argument, NULL, 'F' },
74 { "check", required_argument, NULL, 'C' },
75 { "memory", required_argument, NULL, 'M' },
76 { "threads", required_argument, NULL, 'T' },
77
78 { "extreme", no_argument, NULL, 'e' },
79 { "fast", no_argument, NULL, '0' },
80 { "best", no_argument, NULL, '9' },
81
82 // Filters
83 { "lzma1", optional_argument, NULL, OPT_LZMA1 },
84 { "lzma2", optional_argument, NULL, OPT_LZMA2 },
85 { "x86", optional_argument, NULL, OPT_X86 },
86 { "powerpc", optional_argument, NULL, OPT_POWERPC },
87 { "ia64", optional_argument, NULL, OPT_IA64 },
88 { "arm", optional_argument, NULL, OPT_ARM },
89 { "armthumb", optional_argument, NULL, OPT_ARMTHUMB },
90 { "sparc", optional_argument, NULL, OPT_SPARC },
91 { "delta", optional_argument, NULL, OPT_DELTA },
92 { "subblock", optional_argument, NULL, OPT_SUBBLOCK },
93
94 // Other options
95 { "quiet", no_argument, NULL, 'q' },
96 { "verbose", no_argument, NULL, 'v' },
97 { "no-warn", no_argument, NULL, 'Q' },
98 { "help", no_argument, NULL, 'h' },
99 { "long-help", no_argument, NULL, 'H' },
100 { "version", no_argument, NULL, 'V' },
101
102 { NULL, 0, NULL, 0 }
103 };
104
105 int c;
106
107 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
108 != -1) {
109 switch (c) {
110 // Compression preset (also for decompression if --format=raw)
111 case '0': case '1': case '2': case '3': case '4':
112 case '5': case '6': case '7': case '8': case '9':
113 coder_set_preset(c - '0');
114 break;
115
116 // --memory
117 case 'M': {
118 // Support specifying the limit as a percentage of
119 // installed physical RAM.
120 size_t len = strlen(optarg);
121 if (len > 0 && optarg[len - 1] == '%') {
122 optarg[len - 1] = '\0';
123 hardware_memlimit_set_percentage(
124 str_to_uint64(
125 "memory%", optarg, 1, 100));
126 } else {
127 // On 32-bit systems, SIZE_MAX would make more
128 // sense than UINT64_MAX. But use UINT64_MAX
129 // still so that scripts that assume > 4 GiB
130 // values don't break.
131 hardware_memlimit_set(str_to_uint64(
132 "memory", optarg,
133 0, UINT64_MAX));
134 }
135
136 break;
137 }
138
139 // --suffix
140 case 'S':
141 suffix_set(optarg);
142 break;
143
144 case 'T':
145 hardware_threadlimit_set(str_to_uint64(
146 "threads", optarg, 0, UINT32_MAX));
147 break;
148
149 // --version
150 case 'V':
151 // This doesn't return.
152 message_version();
153
154 // --stdout
155 case 'c':
156 opt_stdout = true;
157 break;
158
159 // --decompress
160 case 'd':
161 opt_mode = MODE_DECOMPRESS;
162 break;
163
164 // --extreme
165 case 'e':
166 coder_set_extreme();
167 break;
168
169 // --force
170 case 'f':
171 opt_force = true;
172 break;
173
174 // --help
175 case 'h':
176 // This doesn't return.
177 message_help(false);
178
179 // --long-help
180 case 'H':
181 // This doesn't return.
182 message_help(true);
183
184 // --list
185 case 'l':
186 opt_mode = MODE_LIST;
187 break;
188
189 // --keep
190 case 'k':
191 opt_keep_original = true;
192 break;
193
194 // --quiet
195 case 'q':
196 message_verbosity_decrease();
197 break;
198
199 case 'Q':
200 set_exit_no_warn();
201 break;
202
203 case 't':
204 opt_mode = MODE_TEST;
205 break;
206
207 // --verbose
208 case 'v':
209 message_verbosity_increase();
210 break;
211
212 case 'z':
213 opt_mode = MODE_COMPRESS;
214 break;
215
216 // Filter setup
217
218 case OPT_SUBBLOCK:
219 coder_add_filter(LZMA_FILTER_SUBBLOCK,
220 options_subblock(optarg));
221 break;
222
223 case OPT_X86:
224 coder_add_filter(LZMA_FILTER_X86,
225 options_bcj(optarg));
226 break;
227
228 case OPT_POWERPC:
229 coder_add_filter(LZMA_FILTER_POWERPC,
230 options_bcj(optarg));
231 break;
232
233 case OPT_IA64:
234 coder_add_filter(LZMA_FILTER_IA64,
235 options_bcj(optarg));
236 break;
237
238 case OPT_ARM:
239 coder_add_filter(LZMA_FILTER_ARM,
240 options_bcj(optarg));
241 break;
242
243 case OPT_ARMTHUMB:
244 coder_add_filter(LZMA_FILTER_ARMTHUMB,
245 options_bcj(optarg));
246 break;
247
248 case OPT_SPARC:
249 coder_add_filter(LZMA_FILTER_SPARC,
250 options_bcj(optarg));
251 break;
252
253 case OPT_DELTA:
254 coder_add_filter(LZMA_FILTER_DELTA,
255 options_delta(optarg));
256 break;
257
258 case OPT_LZMA1:
259 coder_add_filter(LZMA_FILTER_LZMA1,
260 options_lzma(optarg));
261 break;
262
263 case OPT_LZMA2:
264 coder_add_filter(LZMA_FILTER_LZMA2,
265 options_lzma(optarg));
266 break;
267
268 // Other
269
270 // --format
271 case 'F': {
272 // Just in case, support both "lzma" and "alone" since
273 // the latter was used for forward compatibility in
274 // LZMA Utils 4.32.x.
275 static const struct {
276 char str[8];
277 enum format_type format;
278 } types[] = {
279 { "auto", FORMAT_AUTO },
280 { "xz", FORMAT_XZ },
281 { "lzma", FORMAT_LZMA },
282 { "alone", FORMAT_LZMA },
283 // { "gzip", FORMAT_GZIP },
284 // { "gz", FORMAT_GZIP },
285 { "raw", FORMAT_RAW },
286 };
287
288 size_t i = 0;
289 while (strcmp(types[i].str, optarg) != 0)
290 if (++i == ARRAY_SIZE(types))
291 message_fatal(_("%s: Unknown file "
292 "format type"),
293 optarg);
294
295 opt_format = types[i].format;
296 break;
297 }
298
299 // --check
300 case 'C': {
301 static const struct {
302 char str[8];
303 lzma_check check;
304 } types[] = {
305 { "none", LZMA_CHECK_NONE },
306 { "crc32", LZMA_CHECK_CRC32 },
307 { "crc64", LZMA_CHECK_CRC64 },
308 { "sha256", LZMA_CHECK_SHA256 },
309 };
310
311 size_t i = 0;
312 while (strcmp(types[i].str, optarg) != 0) {
313 if (++i == ARRAY_SIZE(types))
314 message_fatal(_("%s: Unsupported "
315 "integrity "
316 "check type"), optarg);
317 }
318
319 // Use a separate check in case we are using different
320 // liblzma than what was used to compile us.
321 if (!lzma_check_is_supported(types[i].check))
322 message_fatal(_("%s: Unsupported integrity "
323 "check type"), optarg);
324
325 coder_set_check(types[i].check);
326 break;
327 }
328
329 case OPT_FILES:
330 args->files_delim = '\n';
331
332 // Fall through
333
334 case OPT_FILES0:
335 if (args->files_name != NULL)
336 message_fatal(_("Only one file can be "
337 "specified with `--files'"
338 "or `--files0'."));
339
340 if (optarg == NULL) {
341 args->files_name = (char *)stdin_filename;
342 args->files_file = stdin;
343 } else {
344 args->files_name = optarg;
345 args->files_file = fopen(optarg,
346 c == OPT_FILES ? "r" : "rb");
347 if (args->files_file == NULL)
348 message_fatal("%s: %s", optarg,
349 strerror(errno));
350 }
351
352 break;
353
354 default:
355 message_try_help();
356 my_exit(E_ERROR);
357 }
358 }
359
360 return;
361 }
362
363
364 static void
parse_environment(args_info * args,char * argv0)365 parse_environment(args_info *args, char *argv0)
366 {
367 char *env = getenv("XZ_OPT");
368 if (env == NULL)
369 return;
370
371 // We modify the string, so make a copy of it.
372 env = xstrdup(env);
373
374 // Calculate the number of arguments in env. argc stats at one
375 // to include space for the program name.
376 int argc = 1;
377 bool prev_was_space = true;
378 for (size_t i = 0; env[i] != '\0'; ++i) {
379 // NOTE: Cast to unsigned char is needed so that correct
380 // value gets passed to isspace(), which expects
381 // unsigned char cast to int. Casting to int is done
382 // automatically due to integer promotion, but we need to
383 // force char to unsigned char manually. Otherwise 8-bit
384 // characters would get promoted to wrong value if
385 // char is signed.
386 if (isspace((unsigned char)env[i])) {
387 prev_was_space = true;
388 } else if (prev_was_space) {
389 prev_was_space = false;
390
391 // Keep argc small enough to fit into a singed int
392 // and to keep it usable for memory allocation.
393 if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
394 message_fatal(_("The environment variable "
395 "XZ_OPT contains too many "
396 "arguments"));
397 }
398 }
399
400 // Allocate memory to hold pointers to the arguments. Add one to get
401 // space for the terminating NULL (if some systems happen to need it).
402 char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
403 argv[0] = argv0;
404 argv[argc] = NULL;
405
406 // Go through the string again. Split the arguments using '\0'
407 // characters and add pointers to the resulting strings to argv.
408 argc = 1;
409 prev_was_space = true;
410 for (size_t i = 0; env[i] != '\0'; ++i) {
411 if (isspace((unsigned char)env[i])) {
412 prev_was_space = true;
413 env[i] = '\0';
414 } else if (prev_was_space) {
415 prev_was_space = false;
416 argv[argc++] = env + i;
417 }
418 }
419
420 // Parse the argument list we got from the environment. All non-option
421 // arguments i.e. filenames are ignored.
422 parse_real(args, argc, argv);
423
424 // Reset the state of the getopt_long() so that we can parse the
425 // command line options too. There are two incompatible ways to
426 // do it.
427 #ifdef HAVE_OPTRESET
428 // BSD
429 optind = 1;
430 optreset = 1;
431 #else
432 // GNU, Solaris
433 optind = 0;
434 #endif
435
436 // We don't need the argument list from environment anymore.
437 free(argv);
438 free(env);
439
440 return;
441 }
442
443
444 extern void
args_parse(args_info * args,int argc,char ** argv)445 args_parse(args_info *args, int argc, char **argv)
446 {
447 // Initialize those parts of *args that we need later.
448 args->files_name = NULL;
449 args->files_file = NULL;
450 args->files_delim = '\0';
451
452 // Check how we were called.
453 {
454 #ifdef DOSLIKE
455 // We adjusted argv[0] in the beginning of main() so we don't
456 // need to do anything here.
457 const char *name = argv[0];
458 #else
459 // Remove the leading path name, if any.
460 const char *name = strrchr(argv[0], '/');
461 if (name == NULL)
462 name = argv[0];
463 else
464 ++name;
465 #endif
466
467 // NOTE: It's possible that name[0] is now '\0' if argv[0]
468 // is weird, but it doesn't matter here.
469
470 // Look for full command names instead of substrings like
471 // "un", "cat", and "lz" to reduce possibility of false
472 // positives when the programs have been renamed.
473 if (strstr(name, "xzcat") != NULL) {
474 opt_mode = MODE_DECOMPRESS;
475 opt_stdout = true;
476 } else if (strstr(name, "unxz") != NULL) {
477 opt_mode = MODE_DECOMPRESS;
478 } else if (strstr(name, "lzcat") != NULL) {
479 opt_format = FORMAT_LZMA;
480 opt_mode = MODE_DECOMPRESS;
481 opt_stdout = true;
482 } else if (strstr(name, "unlzma") != NULL) {
483 opt_format = FORMAT_LZMA;
484 opt_mode = MODE_DECOMPRESS;
485 } else if (strstr(name, "lzma") != NULL) {
486 opt_format = FORMAT_LZMA;
487 }
488 }
489
490 // First the flags from environment
491 parse_environment(args, argv[0]);
492
493 // Then from the command line
494 parse_real(args, argc, argv);
495
496 // Never remove the source file when the destination is not on disk.
497 // In test mode the data is written nowhere, but setting opt_stdout
498 // will make the rest of the code behave well.
499 if (opt_stdout || opt_mode == MODE_TEST) {
500 opt_keep_original = true;
501 opt_stdout = true;
502 }
503
504 // When compressing, if no --format flag was used, or it
505 // was --format=auto, we compress to the .xz format.
506 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
507 opt_format = FORMAT_XZ;
508
509 // Compression settings need to be validated (options themselves and
510 // their memory usage) when compressing to any file format. It has to
511 // be done also when uncompressing raw data, since for raw decoding
512 // the options given on the command line are used to know what kind
513 // of raw data we are supposed to decode.
514 if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
515 coder_set_compression_settings();
516
517 // If no filenames are given, use stdin.
518 if (argv[optind] == NULL && args->files_name == NULL) {
519 // We don't modify or free() the "-" constant. The caller
520 // modifies this so don't make the struct itself const.
521 static char *names_stdin[2] = { (char *)"-", NULL };
522 args->arg_names = names_stdin;
523 args->arg_count = 1;
524 } else {
525 // We got at least one filename from the command line, or
526 // --files or --files0 was specified.
527 args->arg_names = argv + optind;
528 args->arg_count = argc - optind;
529 }
530
531 return;
532 }
533