xref: /dragonfly/contrib/xz/src/xz/args.c (revision 9f7604d7)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14 
15 #include "private.h"
16 
17 #include "getopt.h"
18 #include <ctype.h>
19 
20 
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25 
26 // We don't modify or free() this, but we need to assign it in some
27 // non-const pointers.
28 const char stdin_filename[] = "(stdin)";
29 
30 
31 /// Parse and set the memory usage limit for compression and/or decompression.
32 static void
33 parse_memlimit(const char *name, const char *name_percentage, char *str,
34 		bool set_compress, bool set_decompress)
35 {
36 	bool is_percentage = false;
37 	uint64_t value;
38 
39 	const size_t len = strlen(str);
40 	if (len > 0 && str[len - 1] == '%') {
41 		str[len - 1] = '\0';
42 		is_percentage = true;
43 		value = str_to_uint64(name_percentage, str, 1, 100);
44 	} else {
45 		// On 32-bit systems, SIZE_MAX would make more sense than
46 		// UINT64_MAX. But use UINT64_MAX still so that scripts
47 		// that assume > 4 GiB values don't break.
48 		value = str_to_uint64(name, str, 0, UINT64_MAX);
49 	}
50 
51 	hardware_memlimit_set(
52 			value, set_compress, set_decompress, is_percentage);
53 	return;
54 }
55 
56 
57 static void
58 parse_real(args_info *args, int argc, char **argv)
59 {
60 	enum {
61 		OPT_X86 = INT_MIN,
62 		OPT_POWERPC,
63 		OPT_IA64,
64 		OPT_ARM,
65 		OPT_ARMTHUMB,
66 		OPT_SPARC,
67 		OPT_DELTA,
68 		OPT_LZMA1,
69 		OPT_LZMA2,
70 
71 		OPT_NO_SPARSE,
72 		OPT_FILES,
73 		OPT_FILES0,
74 		OPT_MEM_COMPRESS,
75 		OPT_MEM_DECOMPRESS,
76 		OPT_NO_ADJUST,
77 		OPT_INFO_MEMORY,
78 		OPT_ROBOT,
79 	};
80 
81 	static const char short_opts[]
82 			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
83 
84 	static const struct option long_opts[] = {
85 		// Operation mode
86 		{ "compress",     no_argument,       NULL,  'z' },
87 		{ "decompress",   no_argument,       NULL,  'd' },
88 		{ "uncompress",   no_argument,       NULL,  'd' },
89 		{ "test",         no_argument,       NULL,  't' },
90 		{ "list",         no_argument,       NULL,  'l' },
91 
92 		// Operation modifiers
93 		{ "keep",         no_argument,       NULL,  'k' },
94 		{ "force",        no_argument,       NULL,  'f' },
95 		{ "stdout",       no_argument,       NULL,  'c' },
96 		{ "to-stdout",    no_argument,       NULL,  'c' },
97 		{ "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
98 		{ "suffix",       required_argument, NULL,  'S' },
99 		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
100 		{ "files",        optional_argument, NULL,  OPT_FILES },
101 		{ "files0",       optional_argument, NULL,  OPT_FILES0 },
102 
103 		// Basic compression settings
104 		{ "format",       required_argument, NULL,  'F' },
105 		{ "check",        required_argument, NULL,  'C' },
106 		{ "memlimit-compress",   required_argument, NULL, OPT_MEM_COMPRESS },
107 		{ "memlimit-decompress", required_argument, NULL, OPT_MEM_DECOMPRESS },
108 		{ "memlimit",     required_argument, NULL,  'M' },
109 		{ "memory",       required_argument, NULL,  'M' }, // Old alias
110 		{ "no-adjust",    no_argument,       NULL,  OPT_NO_ADJUST },
111 		{ "threads",      required_argument, NULL,  'T' },
112 
113 		{ "extreme",      no_argument,       NULL,  'e' },
114 		{ "fast",         no_argument,       NULL,  '0' },
115 		{ "best",         no_argument,       NULL,  '9' },
116 
117 		// Filters
118 		{ "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
119 		{ "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
120 		{ "x86",          optional_argument, NULL,  OPT_X86 },
121 		{ "powerpc",      optional_argument, NULL,  OPT_POWERPC },
122 		{ "ia64",         optional_argument, NULL,  OPT_IA64 },
123 		{ "arm",          optional_argument, NULL,  OPT_ARM },
124 		{ "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
125 		{ "sparc",        optional_argument, NULL,  OPT_SPARC },
126 		{ "delta",        optional_argument, NULL,  OPT_DELTA },
127 
128 		// Other options
129 		{ "quiet",        no_argument,       NULL,  'q' },
130 		{ "verbose",      no_argument,       NULL,  'v' },
131 		{ "no-warn",      no_argument,       NULL,  'Q' },
132 		{ "robot",        no_argument,       NULL,  OPT_ROBOT },
133 		{ "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
134 		{ "help",         no_argument,       NULL,  'h' },
135 		{ "long-help",    no_argument,       NULL,  'H' },
136 		{ "version",      no_argument,       NULL,  'V' },
137 
138 		{ NULL,           0,                 NULL,   0 }
139 	};
140 
141 	int c;
142 
143 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
144 			!= -1) {
145 		switch (c) {
146 		// Compression preset (also for decompression if --format=raw)
147 		case '0': case '1': case '2': case '3': case '4':
148 		case '5': case '6': case '7': case '8': case '9':
149 			coder_set_preset(c - '0');
150 			break;
151 
152 		// --memlimit-compress
153 		case OPT_MEM_COMPRESS:
154 			parse_memlimit("memlimit-compress",
155 					"memlimit-compress%", optarg,
156 					true, false);
157 			break;
158 
159 		// --memlimit-decompress
160 		case OPT_MEM_DECOMPRESS:
161 			parse_memlimit("memlimit-decompress",
162 					"memlimit-decompress%", optarg,
163 					false, true);
164 			break;
165 
166 		// --memlimit
167 		case 'M':
168 			parse_memlimit("memlimit", "memlimit%", optarg,
169 					true, true);
170 			break;
171 
172 		// --suffix
173 		case 'S':
174 			suffix_set(optarg);
175 			break;
176 
177 		case 'T':
178 			hardware_threadlimit_set(str_to_uint64(
179 					"threads", optarg, 0, UINT32_MAX));
180 			break;
181 
182 		// --version
183 		case 'V':
184 			// This doesn't return.
185 			message_version();
186 
187 		// --stdout
188 		case 'c':
189 			opt_stdout = true;
190 			break;
191 
192 		// --decompress
193 		case 'd':
194 			opt_mode = MODE_DECOMPRESS;
195 			break;
196 
197 		// --extreme
198 		case 'e':
199 			coder_set_extreme();
200 			break;
201 
202 		// --force
203 		case 'f':
204 			opt_force = true;
205 			break;
206 
207 		// --info-memory
208 		case OPT_INFO_MEMORY:
209 			// This doesn't return.
210 			hardware_memlimit_show();
211 
212 		// --help
213 		case 'h':
214 			// This doesn't return.
215 			message_help(false);
216 
217 		// --long-help
218 		case 'H':
219 			// This doesn't return.
220 			message_help(true);
221 
222 		// --list
223 		case 'l':
224 			opt_mode = MODE_LIST;
225 			break;
226 
227 		// --keep
228 		case 'k':
229 			opt_keep_original = true;
230 			break;
231 
232 		// --quiet
233 		case 'q':
234 			message_verbosity_decrease();
235 			break;
236 
237 		case 'Q':
238 			set_exit_no_warn();
239 			break;
240 
241 		case 't':
242 			opt_mode = MODE_TEST;
243 			break;
244 
245 		// --verbose
246 		case 'v':
247 			message_verbosity_increase();
248 			break;
249 
250 		// --robot
251 		case OPT_ROBOT:
252 			opt_robot = true;
253 
254 			// This is to make sure that floating point numbers
255 			// always have a dot as decimal separator.
256 			setlocale(LC_NUMERIC, "C");
257 			break;
258 
259 		case 'z':
260 			opt_mode = MODE_COMPRESS;
261 			break;
262 
263 		// Filter setup
264 
265 		case OPT_X86:
266 			coder_add_filter(LZMA_FILTER_X86,
267 					options_bcj(optarg));
268 			break;
269 
270 		case OPT_POWERPC:
271 			coder_add_filter(LZMA_FILTER_POWERPC,
272 					options_bcj(optarg));
273 			break;
274 
275 		case OPT_IA64:
276 			coder_add_filter(LZMA_FILTER_IA64,
277 					options_bcj(optarg));
278 			break;
279 
280 		case OPT_ARM:
281 			coder_add_filter(LZMA_FILTER_ARM,
282 					options_bcj(optarg));
283 			break;
284 
285 		case OPT_ARMTHUMB:
286 			coder_add_filter(LZMA_FILTER_ARMTHUMB,
287 					options_bcj(optarg));
288 			break;
289 
290 		case OPT_SPARC:
291 			coder_add_filter(LZMA_FILTER_SPARC,
292 					options_bcj(optarg));
293 			break;
294 
295 		case OPT_DELTA:
296 			coder_add_filter(LZMA_FILTER_DELTA,
297 					options_delta(optarg));
298 			break;
299 
300 		case OPT_LZMA1:
301 			coder_add_filter(LZMA_FILTER_LZMA1,
302 					options_lzma(optarg));
303 			break;
304 
305 		case OPT_LZMA2:
306 			coder_add_filter(LZMA_FILTER_LZMA2,
307 					options_lzma(optarg));
308 			break;
309 
310 		// Other
311 
312 		// --format
313 		case 'F': {
314 			// Just in case, support both "lzma" and "alone" since
315 			// the latter was used for forward compatibility in
316 			// LZMA Utils 4.32.x.
317 			static const struct {
318 				char str[8];
319 				enum format_type format;
320 			} types[] = {
321 				{ "auto",   FORMAT_AUTO },
322 				{ "xz",     FORMAT_XZ },
323 				{ "lzma",   FORMAT_LZMA },
324 				{ "alone",  FORMAT_LZMA },
325 				// { "gzip",   FORMAT_GZIP },
326 				// { "gz",     FORMAT_GZIP },
327 				{ "raw",    FORMAT_RAW },
328 			};
329 
330 			size_t i = 0;
331 			while (strcmp(types[i].str, optarg) != 0)
332 				if (++i == ARRAY_SIZE(types))
333 					message_fatal(_("%s: Unknown file "
334 							"format type"),
335 							optarg);
336 
337 			opt_format = types[i].format;
338 			break;
339 		}
340 
341 		// --check
342 		case 'C': {
343 			static const struct {
344 				char str[8];
345 				lzma_check check;
346 			} types[] = {
347 				{ "none",   LZMA_CHECK_NONE },
348 				{ "crc32",  LZMA_CHECK_CRC32 },
349 				{ "crc64",  LZMA_CHECK_CRC64 },
350 				{ "sha256", LZMA_CHECK_SHA256 },
351 			};
352 
353 			size_t i = 0;
354 			while (strcmp(types[i].str, optarg) != 0) {
355 				if (++i == ARRAY_SIZE(types))
356 					message_fatal(_("%s: Unsupported "
357 							"integrity "
358 							"check type"), optarg);
359 			}
360 
361 			// Use a separate check in case we are using different
362 			// liblzma than what was used to compile us.
363 			if (!lzma_check_is_supported(types[i].check))
364 				message_fatal(_("%s: Unsupported integrity "
365 						"check type"), optarg);
366 
367 			coder_set_check(types[i].check);
368 			break;
369 		}
370 
371 		case OPT_NO_SPARSE:
372 			io_no_sparse();
373 			break;
374 
375 		case OPT_FILES:
376 			args->files_delim = '\n';
377 
378 		// Fall through
379 
380 		case OPT_FILES0:
381 			if (args->files_name != NULL)
382 				message_fatal(_("Only one file can be "
383 						"specified with `--files' "
384 						"or `--files0'."));
385 
386 			if (optarg == NULL) {
387 				args->files_name = (char *)stdin_filename;
388 				args->files_file = stdin;
389 			} else {
390 				args->files_name = optarg;
391 				args->files_file = fopen(optarg,
392 						c == OPT_FILES ? "r" : "rb");
393 				if (args->files_file == NULL)
394 					message_fatal("%s: %s", optarg,
395 							strerror(errno));
396 			}
397 
398 			break;
399 
400 		case OPT_NO_ADJUST:
401 			opt_auto_adjust = false;
402 			break;
403 
404 		default:
405 			message_try_help();
406 			tuklib_exit(E_ERROR, E_ERROR, false);
407 		}
408 	}
409 
410 	return;
411 }
412 
413 
414 static void
415 parse_environment(args_info *args, char *argv0, const char *varname)
416 {
417 	char *env = getenv(varname);
418 	if (env == NULL)
419 		return;
420 
421 	// We modify the string, so make a copy of it.
422 	env = xstrdup(env);
423 
424 	// Calculate the number of arguments in env. argc stats at one
425 	// to include space for the program name.
426 	int argc = 1;
427 	bool prev_was_space = true;
428 	for (size_t i = 0; env[i] != '\0'; ++i) {
429 		// NOTE: Cast to unsigned char is needed so that correct
430 		// value gets passed to isspace(), which expects
431 		// unsigned char cast to int. Casting to int is done
432 		// automatically due to integer promotion, but we need to
433 		// force char to unsigned char manually. Otherwise 8-bit
434 		// characters would get promoted to wrong value if
435 		// char is signed.
436 		if (isspace((unsigned char)env[i])) {
437 			prev_was_space = true;
438 		} else if (prev_was_space) {
439 			prev_was_space = false;
440 
441 			// Keep argc small enough to fit into a signed int
442 			// and to keep it usable for memory allocation.
443 			if (++argc == my_min(
444 					INT_MAX, SIZE_MAX / sizeof(char *)))
445 				message_fatal(_("The environment variable "
446 						"%s contains too many "
447 						"arguments"), varname);
448 		}
449 	}
450 
451 	// Allocate memory to hold pointers to the arguments. Add one to get
452 	// space for the terminating NULL (if some systems happen to need it).
453 	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
454 	argv[0] = argv0;
455 	argv[argc] = NULL;
456 
457 	// Go through the string again. Split the arguments using '\0'
458 	// characters and add pointers to the resulting strings to argv.
459 	argc = 1;
460 	prev_was_space = true;
461 	for (size_t i = 0; env[i] != '\0'; ++i) {
462 		if (isspace((unsigned char)env[i])) {
463 			prev_was_space = true;
464 			env[i] = '\0';
465 		} else if (prev_was_space) {
466 			prev_was_space = false;
467 			argv[argc++] = env + i;
468 		}
469 	}
470 
471 	// Parse the argument list we got from the environment. All non-option
472 	// arguments i.e. filenames are ignored.
473 	parse_real(args, argc, argv);
474 
475 	// Reset the state of the getopt_long() so that we can parse the
476 	// command line options too. There are two incompatible ways to
477 	// do it.
478 #ifdef HAVE_OPTRESET
479 	// BSD
480 	optind = 1;
481 	optreset = 1;
482 #else
483 	// GNU, Solaris
484 	optind = 0;
485 #endif
486 
487 	// We don't need the argument list from environment anymore.
488 	free(argv);
489 	free(env);
490 
491 	return;
492 }
493 
494 
495 extern void
496 args_parse(args_info *args, int argc, char **argv)
497 {
498 	// Initialize those parts of *args that we need later.
499 	args->files_name = NULL;
500 	args->files_file = NULL;
501 	args->files_delim = '\0';
502 
503 	// Check how we were called.
504 	{
505 		// Remove the leading path name, if any.
506 		const char *name = strrchr(argv[0], '/');
507 		if (name == NULL)
508 			name = argv[0];
509 		else
510 			++name;
511 
512 		// NOTE: It's possible that name[0] is now '\0' if argv[0]
513 		// is weird, but it doesn't matter here.
514 
515 		// Look for full command names instead of substrings like
516 		// "un", "cat", and "lz" to reduce possibility of false
517 		// positives when the programs have been renamed.
518 		if (strstr(name, "xzcat") != NULL) {
519 			opt_mode = MODE_DECOMPRESS;
520 			opt_stdout = true;
521 		} else if (strstr(name, "unxz") != NULL) {
522 			opt_mode = MODE_DECOMPRESS;
523 		} else if (strstr(name, "lzcat") != NULL) {
524 			opt_format = FORMAT_LZMA;
525 			opt_mode = MODE_DECOMPRESS;
526 			opt_stdout = true;
527 		} else if (strstr(name, "unlzma") != NULL) {
528 			opt_format = FORMAT_LZMA;
529 			opt_mode = MODE_DECOMPRESS;
530 		} else if (strstr(name, "lzma") != NULL) {
531 			opt_format = FORMAT_LZMA;
532 		}
533 	}
534 
535 	// First the flags from the environment
536 	parse_environment(args, argv[0], "XZ_DEFAULTS");
537 	parse_environment(args, argv[0], "XZ_OPT");
538 
539 	// Then from the command line
540 	parse_real(args, argc, argv);
541 
542 	// Never remove the source file when the destination is not on disk.
543 	// In test mode the data is written nowhere, but setting opt_stdout
544 	// will make the rest of the code behave well.
545 	if (opt_stdout || opt_mode == MODE_TEST) {
546 		opt_keep_original = true;
547 		opt_stdout = true;
548 	}
549 
550 	// When compressing, if no --format flag was used, or it
551 	// was --format=auto, we compress to the .xz format.
552 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
553 		opt_format = FORMAT_XZ;
554 
555 	// Compression settings need to be validated (options themselves and
556 	// their memory usage) when compressing to any file format. It has to
557 	// be done also when uncompressing raw data, since for raw decoding
558 	// the options given on the command line are used to know what kind
559 	// of raw data we are supposed to decode.
560 	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
561 		coder_set_compression_settings();
562 
563 	// If no filenames are given, use stdin.
564 	if (argv[optind] == NULL && args->files_name == NULL) {
565 		// We don't modify or free() the "-" constant. The caller
566 		// modifies this so don't make the struct itself const.
567 		static char *names_stdin[2] = { (char *)"-", NULL };
568 		args->arg_names = names_stdin;
569 		args->arg_count = 1;
570 	} else {
571 		// We got at least one filename from the command line, or
572 		// --files or --files0 was specified.
573 		args->arg_names = argv + optind;
574 		args->arg_count = argc - optind;
575 	}
576 
577 	return;
578 }
579