1 /* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 // vim: expandtab:ts=8:sw=4:softtabstop=4:
3 ///////////////////////////////////////////////////////////////////////////////
4 //
5 /// \file       args.c
6 /// \brief      Argument parsing
7 ///
8 /// \note       Filter-specific options parsing is in options.c.
9 //
10 //  Author:     Lasse Collin
11 //
12 //  This file has been put into the public domain.
13 //  You can do whatever you want with this file.
14 //
15 ///////////////////////////////////////////////////////////////////////////////
16 
17 #include "private.h"
18 
19 #include "getopt.h"
20 #include <ctype.h>
21 
22 
23 bool opt_stdout = false;
24 bool opt_force = false;
25 bool opt_keep_original = false;
26 
27 // We don't modify or free() this, but we need to assign it in some
28 // non-const pointers.
29 const char *stdin_filename = "(stdin)";
30 
31 
32 static void
parse_real(args_info * args,int argc,char ** argv)33 parse_real(args_info *args, int argc, char **argv)
34 {
35 	enum {
36 		OPT_SUBBLOCK = INT_MIN,
37 		OPT_X86,
38 		OPT_POWERPC,
39 		OPT_IA64,
40 		OPT_ARM,
41 		OPT_ARMTHUMB,
42 		OPT_SPARC,
43 		OPT_DELTA,
44 		OPT_LZMA1,
45 		OPT_LZMA2,
46 
47 		OPT_FILES,
48 		OPT_FILES0,
49 	};
50 
51 	static const char short_opts[]
52 			= "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
53 
54 	static const struct option long_opts[] = {
55 		// Operation mode
56 		{ "compress",       no_argument,       NULL,  'z' },
57 		{ "decompress",     no_argument,       NULL,  'd' },
58 		{ "uncompress",     no_argument,       NULL,  'd' },
59 		{ "test",           no_argument,       NULL,  't' },
60 		{ "list",           no_argument,       NULL,  'l' },
61 
62 		// Operation modifiers
63 		{ "keep",           no_argument,       NULL,  'k' },
64 		{ "force",          no_argument,       NULL,  'f' },
65 		{ "stdout",         no_argument,       NULL,  'c' },
66 		{ "to-stdout",      no_argument,       NULL,  'c' },
67 		{ "suffix",         required_argument, NULL,  'S' },
68 		// { "recursive",      no_argument,       NULL,  'r' }, // TODO
69 		{ "files",          optional_argument, NULL,  OPT_FILES },
70 		{ "files0",         optional_argument, NULL,  OPT_FILES0 },
71 
72 		// Basic compression settings
73 		{ "format",         required_argument, NULL,  'F' },
74 		{ "check",          required_argument, NULL,  'C' },
75 		{ "memory",         required_argument, NULL,  'M' },
76 		{ "threads",        required_argument, NULL,  'T' },
77 
78 		{ "extreme",        no_argument,       NULL,  'e' },
79 		{ "fast",           no_argument,       NULL,  '0' },
80 		{ "best",           no_argument,       NULL,  '9' },
81 
82 		// Filters
83 		{ "lzma1",          optional_argument, NULL,  OPT_LZMA1 },
84 		{ "lzma2",          optional_argument, NULL,  OPT_LZMA2 },
85 		{ "x86",            optional_argument, NULL,  OPT_X86 },
86 		{ "powerpc",        optional_argument, NULL,  OPT_POWERPC },
87 		{ "ia64",           optional_argument, NULL,  OPT_IA64 },
88 		{ "arm",            optional_argument, NULL,  OPT_ARM },
89 		{ "armthumb",       optional_argument, NULL,  OPT_ARMTHUMB },
90 		{ "sparc",          optional_argument, NULL,  OPT_SPARC },
91 		{ "delta",          optional_argument, NULL,  OPT_DELTA },
92 		{ "subblock",       optional_argument, NULL,  OPT_SUBBLOCK },
93 
94 		// Other options
95 		{ "quiet",          no_argument,       NULL,  'q' },
96 		{ "verbose",        no_argument,       NULL,  'v' },
97 		{ "no-warn",        no_argument,       NULL,  'Q' },
98 		{ "help",           no_argument,       NULL,  'h' },
99 		{ "long-help",      no_argument,       NULL,  'H' },
100 		{ "version",        no_argument,       NULL,  'V' },
101 
102 		{ NULL,                 0,                 NULL,   0 }
103 	};
104 
105 	int c;
106 
107 	while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
108 			!= -1) {
109 		switch (c) {
110 		// Compression preset (also for decompression if --format=raw)
111 		case '0': case '1': case '2': case '3': case '4':
112 		case '5': case '6': case '7': case '8': case '9':
113 			coder_set_preset(c - '0');
114 			break;
115 
116 		// --memory
117 		case 'M': {
118 			// Support specifying the limit as a percentage of
119 			// installed physical RAM.
120 			size_t len = strlen(optarg);
121 			if (len > 0 && optarg[len - 1] == '%') {
122 				optarg[len - 1] = '\0';
123 				hardware_memlimit_set_percentage(
124 						str_to_uint64(
125 						"memory%", optarg, 1, 100));
126 			} else {
127 				// On 32-bit systems, SIZE_MAX would make more
128 				// sense than UINT64_MAX. But use UINT64_MAX
129 				// still so that scripts that assume > 4 GiB
130 				// values don't break.
131 				hardware_memlimit_set(str_to_uint64(
132 						"memory", optarg,
133 						0, UINT64_MAX));
134 			}
135 
136 			break;
137 		}
138 
139 		// --suffix
140 		case 'S':
141 			suffix_set(optarg);
142 			break;
143 
144 		case 'T':
145 			hardware_threadlimit_set(str_to_uint64(
146 					"threads", optarg, 0, UINT32_MAX));
147 			break;
148 
149 		// --version
150 		case 'V':
151 			// This doesn't return.
152 			message_version();
153 
154 		// --stdout
155 		case 'c':
156 			opt_stdout = true;
157 			break;
158 
159 		// --decompress
160 		case 'd':
161 			opt_mode = MODE_DECOMPRESS;
162 			break;
163 
164 		// --extreme
165 		case 'e':
166 			coder_set_extreme();
167 			break;
168 
169 		// --force
170 		case 'f':
171 			opt_force = true;
172 			break;
173 
174 		// --help
175 		case 'h':
176 			// This doesn't return.
177 			message_help(false);
178 
179 		// --long-help
180 		case 'H':
181 			// This doesn't return.
182 			message_help(true);
183 
184 		// --list
185 		case 'l':
186 			opt_mode = MODE_LIST;
187 			break;
188 
189 		// --keep
190 		case 'k':
191 			opt_keep_original = true;
192 			break;
193 
194 		// --quiet
195 		case 'q':
196 			message_verbosity_decrease();
197 			break;
198 
199 		case 'Q':
200 			set_exit_no_warn();
201 			break;
202 
203 		case 't':
204 			opt_mode = MODE_TEST;
205 			break;
206 
207 		// --verbose
208 		case 'v':
209 			message_verbosity_increase();
210 			break;
211 
212 		case 'z':
213 			opt_mode = MODE_COMPRESS;
214 			break;
215 
216 		// Filter setup
217 
218 		case OPT_SUBBLOCK:
219 			coder_add_filter(LZMA_FILTER_SUBBLOCK,
220 					options_subblock(optarg));
221 			break;
222 
223 		case OPT_X86:
224 			coder_add_filter(LZMA_FILTER_X86,
225 					options_bcj(optarg));
226 			break;
227 
228 		case OPT_POWERPC:
229 			coder_add_filter(LZMA_FILTER_POWERPC,
230 					options_bcj(optarg));
231 			break;
232 
233 		case OPT_IA64:
234 			coder_add_filter(LZMA_FILTER_IA64,
235 					options_bcj(optarg));
236 			break;
237 
238 		case OPT_ARM:
239 			coder_add_filter(LZMA_FILTER_ARM,
240 					options_bcj(optarg));
241 			break;
242 
243 		case OPT_ARMTHUMB:
244 			coder_add_filter(LZMA_FILTER_ARMTHUMB,
245 					options_bcj(optarg));
246 			break;
247 
248 		case OPT_SPARC:
249 			coder_add_filter(LZMA_FILTER_SPARC,
250 					options_bcj(optarg));
251 			break;
252 
253 		case OPT_DELTA:
254 			coder_add_filter(LZMA_FILTER_DELTA,
255 					options_delta(optarg));
256 			break;
257 
258 		case OPT_LZMA1:
259 			coder_add_filter(LZMA_FILTER_LZMA1,
260 					options_lzma(optarg));
261 			break;
262 
263 		case OPT_LZMA2:
264 			coder_add_filter(LZMA_FILTER_LZMA2,
265 					options_lzma(optarg));
266 			break;
267 
268 		// Other
269 
270 		// --format
271 		case 'F': {
272 			// Just in case, support both "lzma" and "alone" since
273 			// the latter was used for forward compatibility in
274 			// LZMA Utils 4.32.x.
275 			static const struct {
276 				char str[8];
277 				enum format_type format;
278 			} types[] = {
279 				{ "auto",   FORMAT_AUTO },
280 				{ "xz",     FORMAT_XZ },
281 				{ "lzma",   FORMAT_LZMA },
282 				{ "alone",  FORMAT_LZMA },
283 				// { "gzip",   FORMAT_GZIP },
284 				// { "gz",     FORMAT_GZIP },
285 				{ "raw",    FORMAT_RAW },
286 			};
287 
288 			size_t i = 0;
289 			while (strcmp(types[i].str, optarg) != 0)
290 				if (++i == ARRAY_SIZE(types))
291 					message_fatal(_("%s: Unknown file "
292 							"format type"),
293 							optarg);
294 
295 			opt_format = types[i].format;
296 			break;
297 		}
298 
299 		// --check
300 		case 'C': {
301 			static const struct {
302 				char str[8];
303 				lzma_check check;
304 			} types[] = {
305 				{ "none",   LZMA_CHECK_NONE },
306 				{ "crc32",  LZMA_CHECK_CRC32 },
307 				{ "crc64",  LZMA_CHECK_CRC64 },
308 				{ "sha256", LZMA_CHECK_SHA256 },
309 			};
310 
311 			size_t i = 0;
312 			while (strcmp(types[i].str, optarg) != 0) {
313 				if (++i == ARRAY_SIZE(types))
314 					message_fatal(_("%s: Unsupported "
315 							"integrity "
316 							"check type"), optarg);
317 			}
318 
319 			// Use a separate check in case we are using different
320 			// liblzma than what was used to compile us.
321 			if (!lzma_check_is_supported(types[i].check))
322 				message_fatal(_("%s: Unsupported integrity "
323 						"check type"), optarg);
324 
325 			coder_set_check(types[i].check);
326 			break;
327 		}
328 
329 		case OPT_FILES:
330 			args->files_delim = '\n';
331 
332 		// Fall through
333 
334 		case OPT_FILES0:
335 			if (args->files_name != NULL)
336 				message_fatal(_("Only one file can be "
337 						"specified with `--files'"
338 						"or `--files0'."));
339 
340 			if (optarg == NULL) {
341 				args->files_name = (char *)stdin_filename;
342 				args->files_file = stdin;
343 			} else {
344 				args->files_name = optarg;
345 				args->files_file = fopen(optarg,
346 						c == OPT_FILES ? "r" : "rb");
347 				if (args->files_file == NULL)
348 					message_fatal("%s: %s", optarg,
349 							strerror(errno));
350 			}
351 
352 			break;
353 
354 		default:
355 			message_try_help();
356 			my_exit(E_ERROR);
357 		}
358 	}
359 
360 	return;
361 }
362 
363 
364 static void
parse_environment(args_info * args,char * argv0)365 parse_environment(args_info *args, char *argv0)
366 {
367 	char *env = getenv("XZ_OPT");
368 	if (env == NULL)
369 		return;
370 
371 	// We modify the string, so make a copy of it.
372 	env = xstrdup(env);
373 
374 	// Calculate the number of arguments in env. argc stats at one
375 	// to include space for the program name.
376 	int argc = 1;
377 	bool prev_was_space = true;
378 	for (size_t i = 0; env[i] != '\0'; ++i) {
379 		// NOTE: Cast to unsigned char is needed so that correct
380 		// value gets passed to isspace(), which expects
381 		// unsigned char cast to int. Casting to int is done
382 		// automatically due to integer promotion, but we need to
383 		// force char to unsigned char manually. Otherwise 8-bit
384 		// characters would get promoted to wrong value if
385 		// char is signed.
386 		if (isspace((unsigned char)env[i])) {
387 			prev_was_space = true;
388 		} else if (prev_was_space) {
389 			prev_was_space = false;
390 
391 			// Keep argc small enough to fit into a singed int
392 			// and to keep it usable for memory allocation.
393 			if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
394 				message_fatal(_("The environment variable "
395 						"XZ_OPT contains too many "
396 						"arguments"));
397 		}
398 	}
399 
400 	// Allocate memory to hold pointers to the arguments. Add one to get
401 	// space for the terminating NULL (if some systems happen to need it).
402 	char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
403 	argv[0] = argv0;
404 	argv[argc] = NULL;
405 
406 	// Go through the string again. Split the arguments using '\0'
407 	// characters and add pointers to the resulting strings to argv.
408 	argc = 1;
409 	prev_was_space = true;
410 	for (size_t i = 0; env[i] != '\0'; ++i) {
411 		if (isspace((unsigned char)env[i])) {
412 			prev_was_space = true;
413 			env[i] = '\0';
414 		} else if (prev_was_space) {
415 			prev_was_space = false;
416 			argv[argc++] = env + i;
417 		}
418 	}
419 
420 	// Parse the argument list we got from the environment. All non-option
421 	// arguments i.e. filenames are ignored.
422 	parse_real(args, argc, argv);
423 
424 	// Reset the state of the getopt_long() so that we can parse the
425 	// command line options too. There are two incompatible ways to
426 	// do it.
427 #ifdef HAVE_OPTRESET
428 	// BSD
429 	optind = 1;
430 	optreset = 1;
431 #else
432 	// GNU, Solaris
433 	optind = 0;
434 #endif
435 
436 	// We don't need the argument list from environment anymore.
437 	free(argv);
438 	free(env);
439 
440 	return;
441 }
442 
443 
444 extern void
args_parse(args_info * args,int argc,char ** argv)445 args_parse(args_info *args, int argc, char **argv)
446 {
447 	// Initialize those parts of *args that we need later.
448 	args->files_name = NULL;
449 	args->files_file = NULL;
450 	args->files_delim = '\0';
451 
452 	// Check how we were called.
453 	{
454 #ifdef DOSLIKE
455 		// We adjusted argv[0] in the beginning of main() so we don't
456 		// need to do anything here.
457 		const char *name = argv[0];
458 #else
459 		// Remove the leading path name, if any.
460 		const char *name = strrchr(argv[0], '/');
461 		if (name == NULL)
462 			name = argv[0];
463 		else
464 			++name;
465 #endif
466 
467 		// NOTE: It's possible that name[0] is now '\0' if argv[0]
468 		// is weird, but it doesn't matter here.
469 
470 		// Look for full command names instead of substrings like
471 		// "un", "cat", and "lz" to reduce possibility of false
472 		// positives when the programs have been renamed.
473 		if (strstr(name, "xzcat") != NULL) {
474 			opt_mode = MODE_DECOMPRESS;
475 			opt_stdout = true;
476 		} else if (strstr(name, "unxz") != NULL) {
477 			opt_mode = MODE_DECOMPRESS;
478 		} else if (strstr(name, "lzcat") != NULL) {
479 			opt_format = FORMAT_LZMA;
480 			opt_mode = MODE_DECOMPRESS;
481 			opt_stdout = true;
482 		} else if (strstr(name, "unlzma") != NULL) {
483 			opt_format = FORMAT_LZMA;
484 			opt_mode = MODE_DECOMPRESS;
485 		} else if (strstr(name, "lzma") != NULL) {
486 			opt_format = FORMAT_LZMA;
487 		}
488 	}
489 
490 	// First the flags from environment
491 	parse_environment(args, argv[0]);
492 
493 	// Then from the command line
494 	parse_real(args, argc, argv);
495 
496 	// Never remove the source file when the destination is not on disk.
497 	// In test mode the data is written nowhere, but setting opt_stdout
498 	// will make the rest of the code behave well.
499 	if (opt_stdout || opt_mode == MODE_TEST) {
500 		opt_keep_original = true;
501 		opt_stdout = true;
502 	}
503 
504 	// When compressing, if no --format flag was used, or it
505 	// was --format=auto, we compress to the .xz format.
506 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
507 		opt_format = FORMAT_XZ;
508 
509 	// Compression settings need to be validated (options themselves and
510 	// their memory usage) when compressing to any file format. It has to
511 	// be done also when uncompressing raw data, since for raw decoding
512 	// the options given on the command line are used to know what kind
513 	// of raw data we are supposed to decode.
514 	if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
515 		coder_set_compression_settings();
516 
517 	// If no filenames are given, use stdin.
518 	if (argv[optind] == NULL && args->files_name == NULL) {
519 		// We don't modify or free() the "-" constant. The caller
520 		// modifies this so don't make the struct itself const.
521 		static char *names_stdin[2] = { (char *)"-", NULL };
522 		args->arg_names = names_stdin;
523 		args->arg_count = 1;
524 	} else {
525 		// We got at least one filename from the command line, or
526 		// --files or --files0 was specified.
527 		args->arg_names = argv + optind;
528 		args->arg_count = argc - optind;
529 	}
530 
531 	return;
532 }
533