xref: /freebsd/contrib/xz/src/xz/coder.c (revision 4d846d26)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       coder.c
4 /// \brief      Compresses or uncompresses a file
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "private.h"
14 
15 
16 /// Return value type for coder_init().
17 enum coder_init_ret {
18 	CODER_INIT_NORMAL,
19 	CODER_INIT_PASSTHRU,
20 	CODER_INIT_ERROR,
21 };
22 
23 
24 enum operation_mode opt_mode = MODE_COMPRESS;
25 enum format_type opt_format = FORMAT_AUTO;
26 bool opt_auto_adjust = true;
27 bool opt_single_stream = false;
28 uint64_t opt_block_size = 0;
29 uint64_t *opt_block_list = NULL;
30 
31 
32 /// Stream used to communicate with liblzma
33 static lzma_stream strm = LZMA_STREAM_INIT;
34 
35 /// Filters needed for all encoding all formats, and also decoding in raw data
36 static lzma_filter filters[LZMA_FILTERS_MAX + 1];
37 
38 /// Input and output buffers
39 static io_buf in_buf;
40 static io_buf out_buf;
41 
42 /// Number of filters. Zero indicates that we are using a preset.
43 static uint32_t filters_count = 0;
44 
45 /// Number of the preset (0-9)
46 static uint32_t preset_number = LZMA_PRESET_DEFAULT;
47 
48 /// Integrity check type
49 static lzma_check check;
50 
51 /// This becomes false if the --check=CHECK option is used.
52 static bool check_default = true;
53 
54 /// Indicates if unconsumed input is allowed to remain after
55 /// decoding has successfully finished. This is set for each file
56 /// in coder_init().
57 static bool allow_trailing_input;
58 
59 #ifdef MYTHREAD_ENABLED
60 static lzma_mt mt_options = {
61 	.flags = 0,
62 	.timeout = 300,
63 	.filters = filters,
64 };
65 #endif
66 
67 
68 extern void
69 coder_set_check(lzma_check new_check)
70 {
71 	check = new_check;
72 	check_default = false;
73 	return;
74 }
75 
76 
77 static void
78 forget_filter_chain(void)
79 {
80 	// Setting a preset makes us forget a possibly defined custom
81 	// filter chain.
82 	while (filters_count > 0) {
83 		--filters_count;
84 		free(filters[filters_count].options);
85 		filters[filters_count].options = NULL;
86 	}
87 
88 	return;
89 }
90 
91 
92 extern void
93 coder_set_preset(uint32_t new_preset)
94 {
95 	preset_number &= ~LZMA_PRESET_LEVEL_MASK;
96 	preset_number |= new_preset;
97 	forget_filter_chain();
98 	return;
99 }
100 
101 
102 extern void
103 coder_set_extreme(void)
104 {
105 	preset_number |= LZMA_PRESET_EXTREME;
106 	forget_filter_chain();
107 	return;
108 }
109 
110 
111 extern void
112 coder_add_filter(lzma_vli id, void *options)
113 {
114 	if (filters_count == LZMA_FILTERS_MAX)
115 		message_fatal(_("Maximum number of filters is four"));
116 
117 	filters[filters_count].id = id;
118 	filters[filters_count].options = options;
119 	++filters_count;
120 
121 	// Setting a custom filter chain makes us forget the preset options.
122 	// This makes a difference if one specifies e.g. "xz -9 --lzma2 -e"
123 	// where the custom filter chain resets the preset level back to
124 	// the default 6, making the example equivalent to "xz -6e".
125 	preset_number = LZMA_PRESET_DEFAULT;
126 
127 	return;
128 }
129 
130 
131 static void lzma_attribute((__noreturn__))
132 memlimit_too_small(uint64_t memory_usage)
133 {
134 	message(V_ERROR, _("Memory usage limit is too low for the given "
135 			"filter setup."));
136 	message_mem_needed(V_ERROR, memory_usage);
137 	tuklib_exit(E_ERROR, E_ERROR, false);
138 }
139 
140 
141 extern void
142 coder_set_compression_settings(void)
143 {
144 #ifdef HAVE_LZIP_DECODER
145 	// .lz compression isn't supported.
146 	assert(opt_format != FORMAT_LZIP);
147 #endif
148 
149 	// The default check type is CRC64, but fallback to CRC32
150 	// if CRC64 isn't supported by the copy of liblzma we are
151 	// using. CRC32 is always supported.
152 	if (check_default) {
153 		check = LZMA_CHECK_CRC64;
154 		if (!lzma_check_is_supported(check))
155 			check = LZMA_CHECK_CRC32;
156 	}
157 
158 	// Options for LZMA1 or LZMA2 in case we are using a preset.
159 	static lzma_options_lzma opt_lzma;
160 
161 	if (filters_count == 0) {
162 		// We are using a preset. This is not a good idea in raw mode
163 		// except when playing around with things. Different versions
164 		// of this software may use different options in presets, and
165 		// thus make uncompressing the raw data difficult.
166 		if (opt_format == FORMAT_RAW) {
167 			// The message is shown only if warnings are allowed
168 			// but the exit status isn't changed.
169 			message(V_WARNING, _("Using a preset in raw mode "
170 					"is discouraged."));
171 			message(V_WARNING, _("The exact options of the "
172 					"presets may vary between software "
173 					"versions."));
174 		}
175 
176 		// Get the preset for LZMA1 or LZMA2.
177 		if (lzma_lzma_preset(&opt_lzma, preset_number))
178 			message_bug();
179 
180 		// Use LZMA2 except with --format=lzma we use LZMA1.
181 		filters[0].id = opt_format == FORMAT_LZMA
182 				? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
183 		filters[0].options = &opt_lzma;
184 		filters_count = 1;
185 	}
186 
187 	// Terminate the filter options array.
188 	filters[filters_count].id = LZMA_VLI_UNKNOWN;
189 
190 	// If we are using the .lzma format, allow exactly one filter
191 	// which has to be LZMA1.
192 	if (opt_format == FORMAT_LZMA && (filters_count != 1
193 			|| filters[0].id != LZMA_FILTER_LZMA1))
194 		message_fatal(_("The .lzma format supports only "
195 				"the LZMA1 filter"));
196 
197 	// If we are using the .xz format, make sure that there is no LZMA1
198 	// filter to prevent LZMA_PROG_ERROR.
199 	if (opt_format == FORMAT_XZ)
200 		for (size_t i = 0; i < filters_count; ++i)
201 			if (filters[i].id == LZMA_FILTER_LZMA1)
202 				message_fatal(_("LZMA1 cannot be used "
203 						"with the .xz format"));
204 
205 	// Print the selected filter chain.
206 	message_filters_show(V_DEBUG, filters);
207 
208 	// The --flush-timeout option requires LZMA_SYNC_FLUSH support
209 	// from the filter chain. Currently threaded encoder doesn't support
210 	// LZMA_SYNC_FLUSH so single-threaded mode must be used.
211 	if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
212 		for (size_t i = 0; i < filters_count; ++i) {
213 			switch (filters[i].id) {
214 			case LZMA_FILTER_LZMA2:
215 			case LZMA_FILTER_DELTA:
216 				break;
217 
218 			default:
219 				message_fatal(_("The filter chain is "
220 					"incompatible with --flush-timeout"));
221 			}
222 		}
223 
224 		if (hardware_threads_is_mt()) {
225 			message(V_WARNING, _("Switching to single-threaded "
226 					"mode due to --flush-timeout"));
227 			hardware_threads_set(1);
228 		}
229 	}
230 
231 	// Get the memory usage. Note that if --format=raw was used,
232 	// we can be decompressing.
233 	//
234 	// If multithreaded .xz compression is done, this value will be
235 	// replaced.
236 	uint64_t memory_limit = hardware_memlimit_get(opt_mode);
237 	uint64_t memory_usage = UINT64_MAX;
238 	if (opt_mode == MODE_COMPRESS) {
239 #ifdef HAVE_ENCODERS
240 #	ifdef MYTHREAD_ENABLED
241 		if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
242 			memory_limit = hardware_memlimit_mtenc_get();
243 			mt_options.threads = hardware_threads_get();
244 			mt_options.block_size = opt_block_size;
245 			mt_options.check = check;
246 			memory_usage = lzma_stream_encoder_mt_memusage(
247 					&mt_options);
248 			if (memory_usage != UINT64_MAX)
249 				message(V_DEBUG, _("Using up to %" PRIu32
250 						" threads."),
251 						mt_options.threads);
252 		} else
253 #	endif
254 		{
255 			memory_usage = lzma_raw_encoder_memusage(filters);
256 		}
257 #endif
258 	} else {
259 #ifdef HAVE_DECODERS
260 		memory_usage = lzma_raw_decoder_memusage(filters);
261 #endif
262 	}
263 
264 	if (memory_usage == UINT64_MAX)
265 		message_fatal(_("Unsupported filter chain or filter options"));
266 
267 	// Print memory usage info before possible dictionary
268 	// size auto-adjusting.
269 	//
270 	// NOTE: If only encoder support was built, we cannot show the
271 	// what the decoder memory usage will be.
272 	message_mem_needed(V_DEBUG, memory_usage);
273 #ifdef HAVE_DECODERS
274 	if (opt_mode == MODE_COMPRESS) {
275 		const uint64_t decmem = lzma_raw_decoder_memusage(filters);
276 		if (decmem != UINT64_MAX)
277 			message(V_DEBUG, _("Decompression will need "
278 					"%s MiB of memory."), uint64_to_str(
279 						round_up_to_mib(decmem), 0));
280 	}
281 #endif
282 
283 	if (memory_usage <= memory_limit)
284 		return;
285 
286 	// With --format=raw settings are never adjusted to meet
287 	// the memory usage limit.
288 	if (opt_format == FORMAT_RAW)
289 		memlimit_too_small(memory_usage);
290 
291 	assert(opt_mode == MODE_COMPRESS);
292 
293 #ifdef HAVE_ENCODERS
294 #	ifdef MYTHREAD_ENABLED
295 	if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
296 		// Try to reduce the number of threads before
297 		// adjusting the compression settings down.
298 		while (mt_options.threads > 1) {
299 			// Reduce the number of threads by one and check
300 			// the memory usage.
301 			--mt_options.threads;
302 			memory_usage = lzma_stream_encoder_mt_memusage(
303 					&mt_options);
304 			if (memory_usage == UINT64_MAX)
305 				message_bug();
306 
307 			if (memory_usage <= memory_limit) {
308 				// The memory usage is now low enough.
309 				message(V_WARNING, _("Reduced the number of "
310 					"threads from %s to %s to not exceed "
311 					"the memory usage limit of %s MiB"),
312 					uint64_to_str(
313 						hardware_threads_get(), 0),
314 					uint64_to_str(mt_options.threads, 1),
315 					uint64_to_str(round_up_to_mib(
316 						memory_limit), 2));
317 				return;
318 			}
319 		}
320 
321 		// If the memory usage limit is only a soft limit (automatic
322 		// number of threads and no --memlimit-compress), the limit
323 		// is only used to reduce the number of threads and once at
324 		// just one thread, the limit is completely ignored. This
325 		// way -T0 won't use insane amount of memory but at the same
326 		// time the soft limit will never make xz fail and never make
327 		// xz change settings that would affect the compressed output.
328 		if (hardware_memlimit_mtenc_is_default()) {
329 			message(V_WARNING, _("Reduced the number of threads "
330 				"from %s to one. The automatic memory usage "
331 				"limit of %s MiB is still being exceeded. "
332 				"%s MiB of memory is required. "
333 				"Continuing anyway."),
334 				uint64_to_str(hardware_threads_get(), 0),
335 				uint64_to_str(
336 					round_up_to_mib(memory_limit), 1),
337 				uint64_to_str(
338 					round_up_to_mib(memory_usage), 2));
339 			return;
340 		}
341 
342 		// If --no-adjust was used, we cannot drop to single-threaded
343 		// mode since it produces different compressed output.
344 		//
345 		// NOTE: In xz 5.2.x, --no-adjust also prevented reducing
346 		// the number of threads. This changed in 5.3.3alpha.
347 		if (!opt_auto_adjust)
348 			memlimit_too_small(memory_usage);
349 
350 		// Switch to single-threaded mode. It uses
351 		// less memory than using one thread in
352 		// the multithreaded mode but the output
353 		// is also different.
354 		hardware_threads_set(1);
355 		memory_usage = lzma_raw_encoder_memusage(filters);
356 		message(V_WARNING, _("Switching to single-threaded mode "
357 			"to not exceed the memory usage limit of %s MiB"),
358 			uint64_to_str(round_up_to_mib(memory_limit), 0));
359 	}
360 #	endif
361 
362 	if (memory_usage <= memory_limit)
363 		return;
364 
365 	// Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust
366 	// was specified as that would change the compressed output.
367 	if (!opt_auto_adjust)
368 		memlimit_too_small(memory_usage);
369 
370 	// Look for the last filter if it is LZMA2 or LZMA1, so we can make
371 	// it use less RAM. With other filters we don't know what to do.
372 	size_t i = 0;
373 	while (filters[i].id != LZMA_FILTER_LZMA2
374 			&& filters[i].id != LZMA_FILTER_LZMA1) {
375 		if (filters[i].id == LZMA_VLI_UNKNOWN)
376 			memlimit_too_small(memory_usage);
377 
378 		++i;
379 	}
380 
381 	// Decrease the dictionary size until we meet the memory
382 	// usage limit. First round down to full mebibytes.
383 	lzma_options_lzma *opt = filters[i].options;
384 	const uint32_t orig_dict_size = opt->dict_size;
385 	opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
386 	while (true) {
387 		// If it is below 1 MiB, auto-adjusting failed. We could be
388 		// more sophisticated and scale it down even more, but let's
389 		// see if many complain about this version.
390 		//
391 		// FIXME: Displays the scaled memory usage instead
392 		// of the original.
393 		if (opt->dict_size < (UINT32_C(1) << 20))
394 			memlimit_too_small(memory_usage);
395 
396 		memory_usage = lzma_raw_encoder_memusage(filters);
397 		if (memory_usage == UINT64_MAX)
398 			message_bug();
399 
400 		// Accept it if it is low enough.
401 		if (memory_usage <= memory_limit)
402 			break;
403 
404 		// Otherwise 1 MiB down and try again. I hope this
405 		// isn't too slow method for cases where the original
406 		// dict_size is very big.
407 		opt->dict_size -= UINT32_C(1) << 20;
408 	}
409 
410 	// Tell the user that we decreased the dictionary size.
411 	message(V_WARNING, _("Adjusted LZMA%c dictionary size "
412 			"from %s MiB to %s MiB to not exceed "
413 			"the memory usage limit of %s MiB"),
414 			filters[i].id == LZMA_FILTER_LZMA2
415 				? '2' : '1',
416 			uint64_to_str(orig_dict_size >> 20, 0),
417 			uint64_to_str(opt->dict_size >> 20, 1),
418 			uint64_to_str(round_up_to_mib(memory_limit), 2));
419 #endif
420 
421 	return;
422 }
423 
424 
425 #ifdef HAVE_DECODERS
426 /// Return true if the data in in_buf seems to be in the .xz format.
427 static bool
428 is_format_xz(void)
429 {
430 	// Specify the magic as hex to be compatible with EBCDIC systems.
431 	static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 };
432 	return strm.avail_in >= sizeof(magic)
433 			&& memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
434 }
435 
436 
437 /// Return true if the data in in_buf seems to be in the .lzma format.
438 static bool
439 is_format_lzma(void)
440 {
441 	// The .lzma header is 13 bytes.
442 	if (strm.avail_in < 13)
443 		return false;
444 
445 	// Decode the LZMA1 properties.
446 	lzma_filter filter = { .id = LZMA_FILTER_LZMA1 };
447 	if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK)
448 		return false;
449 
450 	// A hack to ditch tons of false positives: We allow only dictionary
451 	// sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone
452 	// created only files with 2^n, but accepts any dictionary size.
453 	// If someone complains, this will be reconsidered.
454 	lzma_options_lzma *opt = filter.options;
455 	const uint32_t dict_size = opt->dict_size;
456 	free(opt);
457 
458 	if (dict_size != UINT32_MAX) {
459 		uint32_t d = dict_size - 1;
460 		d |= d >> 2;
461 		d |= d >> 3;
462 		d |= d >> 4;
463 		d |= d >> 8;
464 		d |= d >> 16;
465 		++d;
466 		if (d != dict_size || dict_size == 0)
467 			return false;
468 	}
469 
470 	// Another hack to ditch false positives: Assume that if the
471 	// uncompressed size is known, it must be less than 256 GiB.
472 	// Again, if someone complains, this will be reconsidered.
473 	uint64_t uncompressed_size = 0;
474 	for (size_t i = 0; i < 8; ++i)
475 		uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8);
476 
477 	if (uncompressed_size != UINT64_MAX
478 			&& uncompressed_size > (UINT64_C(1) << 38))
479 		return false;
480 
481 	return true;
482 }
483 
484 
485 #ifdef HAVE_LZIP_DECODER
486 /// Return true if the data in in_buf seems to be in the .lz format.
487 static bool
488 is_format_lzip(void)
489 {
490 	static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };
491 	return strm.avail_in >= sizeof(magic)
492 			&& memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
493 }
494 #endif
495 #endif
496 
497 
498 /// Detect the input file type (for now, this done only when decompressing),
499 /// and initialize an appropriate coder. Return value indicates if a normal
500 /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru
501 /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred
502 /// (CODER_INIT_ERROR).
503 static enum coder_init_ret
504 coder_init(file_pair *pair)
505 {
506 	lzma_ret ret = LZMA_PROG_ERROR;
507 
508 	// In most cases if there is input left when coding finishes,
509 	// something has gone wrong. Exceptions are --single-stream
510 	// and decoding .lz files which can contain trailing non-.lz data.
511 	// These will be handled later in this function.
512 	allow_trailing_input = false;
513 
514 	if (opt_mode == MODE_COMPRESS) {
515 #ifdef HAVE_ENCODERS
516 		switch (opt_format) {
517 		case FORMAT_AUTO:
518 			// args.c ensures this.
519 			assert(0);
520 			break;
521 
522 		case FORMAT_XZ:
523 #	ifdef MYTHREAD_ENABLED
524 			if (hardware_threads_is_mt())
525 				ret = lzma_stream_encoder_mt(
526 						&strm, &mt_options);
527 			else
528 #	endif
529 				ret = lzma_stream_encoder(
530 						&strm, filters, check);
531 			break;
532 
533 		case FORMAT_LZMA:
534 			ret = lzma_alone_encoder(&strm, filters[0].options);
535 			break;
536 
537 #	ifdef HAVE_LZIP_DECODER
538 		case FORMAT_LZIP:
539 			// args.c should disallow this.
540 			assert(0);
541 			ret = LZMA_PROG_ERROR;
542 			break;
543 #	endif
544 
545 		case FORMAT_RAW:
546 			ret = lzma_raw_encoder(&strm, filters);
547 			break;
548 		}
549 #endif
550 	} else {
551 #ifdef HAVE_DECODERS
552 		uint32_t flags = 0;
553 
554 		// It seems silly to warn about unsupported check if the
555 		// check won't be verified anyway due to --ignore-check.
556 		if (opt_ignore_check)
557 			flags |= LZMA_IGNORE_CHECK;
558 		else
559 			flags |= LZMA_TELL_UNSUPPORTED_CHECK;
560 
561 		if (opt_single_stream)
562 			allow_trailing_input = true;
563 		else
564 			flags |= LZMA_CONCATENATED;
565 
566 		// We abuse FORMAT_AUTO to indicate unknown file format,
567 		// for which we may consider passthru mode.
568 		enum format_type init_format = FORMAT_AUTO;
569 
570 		switch (opt_format) {
571 		case FORMAT_AUTO:
572 			// .lz is checked before .lzma since .lzma detection
573 			// is more complicated (no magic bytes).
574 			if (is_format_xz())
575 				init_format = FORMAT_XZ;
576 #	ifdef HAVE_LZIP_DECODER
577 			else if (is_format_lzip())
578 				init_format = FORMAT_LZIP;
579 #	endif
580 			else if (is_format_lzma())
581 				init_format = FORMAT_LZMA;
582 			break;
583 
584 		case FORMAT_XZ:
585 			if (is_format_xz())
586 				init_format = FORMAT_XZ;
587 			break;
588 
589 		case FORMAT_LZMA:
590 			if (is_format_lzma())
591 				init_format = FORMAT_LZMA;
592 			break;
593 
594 #	ifdef HAVE_LZIP_DECODER
595 		case FORMAT_LZIP:
596 			if (is_format_lzip())
597 				init_format = FORMAT_LZIP;
598 			break;
599 #	endif
600 
601 		case FORMAT_RAW:
602 			init_format = FORMAT_RAW;
603 			break;
604 		}
605 
606 		switch (init_format) {
607 		case FORMAT_AUTO:
608 			// Unknown file format. If --decompress --stdout
609 			// --force have been given, then we copy the input
610 			// as is to stdout. Checking for MODE_DECOMPRESS
611 			// is needed, because we don't want to do use
612 			// passthru mode with --test.
613 			if (opt_mode == MODE_DECOMPRESS
614 					&& opt_stdout && opt_force) {
615 				// These are needed for progress info.
616 				strm.total_in = 0;
617 				strm.total_out = 0;
618 				return CODER_INIT_PASSTHRU;
619 			}
620 
621 			ret = LZMA_FORMAT_ERROR;
622 			break;
623 
624 		case FORMAT_XZ:
625 #	ifdef MYTHREAD_ENABLED
626 			mt_options.flags = flags;
627 
628 			mt_options.threads = hardware_threads_get();
629 			mt_options.memlimit_stop
630 				= hardware_memlimit_get(MODE_DECOMPRESS);
631 
632 			// If single-threaded mode was requested, set the
633 			// memlimit for threading to zero. This forces the
634 			// decoder to use single-threaded mode which matches
635 			// the behavior of lzma_stream_decoder().
636 			//
637 			// Otherwise use the limit for threaded decompression
638 			// which has a sane default (users are still free to
639 			// make it insanely high though).
640 			mt_options.memlimit_threading
641 					= mt_options.threads == 1
642 					? 0 : hardware_memlimit_mtdec_get();
643 
644 			ret = lzma_stream_decoder_mt(&strm, &mt_options);
645 #	else
646 			ret = lzma_stream_decoder(&strm,
647 					hardware_memlimit_get(
648 						MODE_DECOMPRESS), flags);
649 #	endif
650 			break;
651 
652 		case FORMAT_LZMA:
653 			ret = lzma_alone_decoder(&strm,
654 					hardware_memlimit_get(
655 						MODE_DECOMPRESS));
656 			break;
657 
658 #	ifdef HAVE_LZIP_DECODER
659 		case FORMAT_LZIP:
660 			allow_trailing_input = true;
661 			ret = lzma_lzip_decoder(&strm,
662 					hardware_memlimit_get(
663 						MODE_DECOMPRESS), flags);
664 			break;
665 #	endif
666 
667 		case FORMAT_RAW:
668 			// Memory usage has already been checked in
669 			// coder_set_compression_settings().
670 			ret = lzma_raw_decoder(&strm, filters);
671 			break;
672 		}
673 
674 		// Try to decode the headers. This will catch too low
675 		// memory usage limit in case it happens in the first
676 		// Block of the first Stream, which is where it very
677 		// probably will happen if it is going to happen.
678 		//
679 		// This will also catch unsupported check type which
680 		// we treat as a warning only. If there are empty
681 		// concatenated Streams with unsupported check type then
682 		// the message can be shown more than once here. The loop
683 		// is used in case there is first a warning about
684 		// unsupported check type and then the first Block
685 		// would exceed the memlimit.
686 		if (ret == LZMA_OK && init_format != FORMAT_RAW) {
687 			strm.next_out = NULL;
688 			strm.avail_out = 0;
689 			while ((ret = lzma_code(&strm, LZMA_RUN))
690 					== LZMA_UNSUPPORTED_CHECK)
691 				message_warning("%s: %s", pair->src_name,
692 						message_strm(ret));
693 
694 			// With --single-stream lzma_code won't wait for
695 			// LZMA_FINISH and thus it can return LZMA_STREAM_END
696 			// if the file has no uncompressed data inside.
697 			// So treat LZMA_STREAM_END as LZMA_OK here.
698 			// When lzma_code() is called again in coder_normal()
699 			// it will return LZMA_STREAM_END again.
700 			if (ret == LZMA_STREAM_END)
701 				ret = LZMA_OK;
702 		}
703 #endif
704 	}
705 
706 	if (ret != LZMA_OK) {
707 		message_error("%s: %s", pair->src_name, message_strm(ret));
708 		if (ret == LZMA_MEMLIMIT_ERROR)
709 			message_mem_needed(V_ERROR, lzma_memusage(&strm));
710 
711 		return CODER_INIT_ERROR;
712 	}
713 
714 	return CODER_INIT_NORMAL;
715 }
716 
717 
718 /// Resolve conflicts between opt_block_size and opt_block_list in single
719 /// threaded mode. We want to default to opt_block_list, except when it is
720 /// larger than opt_block_size. If this is the case for the current Block
721 /// at *list_pos, then we break into smaller Blocks. Otherwise advance
722 /// to the next Block in opt_block_list, and break apart if needed.
723 static void
724 split_block(uint64_t *block_remaining,
725 	    uint64_t *next_block_remaining,
726 	    size_t *list_pos)
727 {
728 	if (*next_block_remaining > 0) {
729 		// The Block at *list_pos has previously been split up.
730 		assert(!hardware_threads_is_mt());
731 		assert(opt_block_size > 0);
732 		assert(opt_block_list != NULL);
733 
734 		if (*next_block_remaining > opt_block_size) {
735 			// We have to split the current Block at *list_pos
736 			// into another opt_block_size length Block.
737 			*block_remaining = opt_block_size;
738 		} else {
739 			// This is the last remaining split Block for the
740 			// Block at *list_pos.
741 			*block_remaining = *next_block_remaining;
742 		}
743 
744 		*next_block_remaining -= *block_remaining;
745 
746 	} else {
747 		// The Block at *list_pos has been finished. Go to the next
748 		// entry in the list. If the end of the list has been reached,
749 		// reuse the size of the last Block.
750 		if (opt_block_list[*list_pos + 1] != 0)
751 			++*list_pos;
752 
753 		*block_remaining = opt_block_list[*list_pos];
754 
755 		// If in single-threaded mode, split up the Block if needed.
756 		// This is not needed in multi-threaded mode because liblzma
757 		// will do this due to how threaded encoding works.
758 		if (!hardware_threads_is_mt() && opt_block_size > 0
759 				&& *block_remaining > opt_block_size) {
760 			*next_block_remaining
761 					= *block_remaining - opt_block_size;
762 			*block_remaining = opt_block_size;
763 		}
764 	}
765 }
766 
767 
768 static bool
769 coder_write_output(file_pair *pair)
770 {
771 	if (opt_mode != MODE_TEST) {
772 		if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out))
773 			return true;
774 	}
775 
776 	strm.next_out = out_buf.u8;
777 	strm.avail_out = IO_BUFFER_SIZE;
778 	return false;
779 }
780 
781 
782 /// Compress or decompress using liblzma.
783 static bool
784 coder_normal(file_pair *pair)
785 {
786 	// Encoder needs to know when we have given all the input to it.
787 	// The decoders need to know it too when we are using
788 	// LZMA_CONCATENATED. We need to check for src_eof here, because
789 	// the first input chunk has been already read if decompressing,
790 	// and that may have been the only chunk we will read.
791 	lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
792 
793 	lzma_ret ret;
794 
795 	// Assume that something goes wrong.
796 	bool success = false;
797 
798 	// block_remaining indicates how many input bytes to encode before
799 	// finishing the current .xz Block. The Block size is set with
800 	// --block-size=SIZE and --block-list. They have an effect only when
801 	// compressing to the .xz format. If block_remaining == UINT64_MAX,
802 	// only a single block is created.
803 	uint64_t block_remaining = UINT64_MAX;
804 
805 	// next_block_remaining for when we are in single-threaded mode and
806 	// the Block in --block-list is larger than the --block-size=SIZE.
807 	uint64_t next_block_remaining = 0;
808 
809 	// Position in opt_block_list. Unused if --block-list wasn't used.
810 	size_t list_pos = 0;
811 
812 	// Handle --block-size for single-threaded mode and the first step
813 	// of --block-list.
814 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
815 		// --block-size doesn't do anything here in threaded mode,
816 		// because the threaded encoder will take care of splitting
817 		// to fixed-sized Blocks.
818 		if (!hardware_threads_is_mt() && opt_block_size > 0)
819 			block_remaining = opt_block_size;
820 
821 		// If --block-list was used, start with the first size.
822 		//
823 		// For threaded case, --block-size specifies how big Blocks
824 		// the encoder needs to be prepared to create at maximum
825 		// and --block-list will simultaneously cause new Blocks
826 		// to be started at specified intervals. To keep things
827 		// logical, the same is done in single-threaded mode. The
828 		// output is still not identical because in single-threaded
829 		// mode the size info isn't written into Block Headers.
830 		if (opt_block_list != NULL) {
831 			if (block_remaining < opt_block_list[list_pos]) {
832 				assert(!hardware_threads_is_mt());
833 				next_block_remaining = opt_block_list[list_pos]
834 						- block_remaining;
835 			} else {
836 				block_remaining = opt_block_list[list_pos];
837 			}
838 		}
839 	}
840 
841 	strm.next_out = out_buf.u8;
842 	strm.avail_out = IO_BUFFER_SIZE;
843 
844 	while (!user_abort) {
845 		// Fill the input buffer if it is empty and we aren't
846 		// flushing or finishing.
847 		if (strm.avail_in == 0 && action == LZMA_RUN) {
848 			strm.next_in = in_buf.u8;
849 			strm.avail_in = io_read(pair, &in_buf,
850 					my_min(block_remaining,
851 						IO_BUFFER_SIZE));
852 
853 			if (strm.avail_in == SIZE_MAX)
854 				break;
855 
856 			if (pair->src_eof) {
857 				action = LZMA_FINISH;
858 
859 			} else if (block_remaining != UINT64_MAX) {
860 				// Start a new Block after every
861 				// opt_block_size bytes of input.
862 				block_remaining -= strm.avail_in;
863 				if (block_remaining == 0)
864 					action = LZMA_FULL_BARRIER;
865 			}
866 
867 			if (action == LZMA_RUN && pair->flush_needed)
868 				action = LZMA_SYNC_FLUSH;
869 		}
870 
871 		// Let liblzma do the actual work.
872 		ret = lzma_code(&strm, action);
873 
874 		// Write out if the output buffer became full.
875 		if (strm.avail_out == 0) {
876 			if (coder_write_output(pair))
877 				break;
878 		}
879 
880 		if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
881 				|| action == LZMA_FULL_BARRIER)) {
882 			if (action == LZMA_SYNC_FLUSH) {
883 				// Flushing completed. Write the pending data
884 				// out immediately so that the reading side
885 				// can decompress everything compressed so far.
886 				if (coder_write_output(pair))
887 					break;
888 
889 				// Mark that we haven't seen any new input
890 				// since the previous flush.
891 				pair->src_has_seen_input = false;
892 				pair->flush_needed = false;
893 			} else {
894 				// Start a new Block after LZMA_FULL_BARRIER.
895 				if (opt_block_list == NULL) {
896 					assert(!hardware_threads_is_mt());
897 					assert(opt_block_size > 0);
898 					block_remaining = opt_block_size;
899 				} else {
900 					split_block(&block_remaining,
901 							&next_block_remaining,
902 							&list_pos);
903 				}
904 			}
905 
906 			// Start a new Block after LZMA_FULL_FLUSH or continue
907 			// the same block after LZMA_SYNC_FLUSH.
908 			action = LZMA_RUN;
909 
910 		} else if (ret != LZMA_OK) {
911 			// Determine if the return value indicates that we
912 			// won't continue coding. LZMA_NO_CHECK would be
913 			// here too if LZMA_TELL_ANY_CHECK was used.
914 			const bool stop = ret != LZMA_UNSUPPORTED_CHECK;
915 
916 			if (stop) {
917 				// Write the remaining bytes even if something
918 				// went wrong, because that way the user gets
919 				// as much data as possible, which can be good
920 				// when trying to get at least some useful
921 				// data out of damaged files.
922 				if (coder_write_output(pair))
923 					break;
924 			}
925 
926 			if (ret == LZMA_STREAM_END) {
927 				if (allow_trailing_input) {
928 					io_fix_src_pos(pair, strm.avail_in);
929 					success = true;
930 					break;
931 				}
932 
933 				// Check that there is no trailing garbage.
934 				// This is needed for LZMA_Alone and raw
935 				// streams. This is *not* done with .lz files
936 				// as that format specifically requires
937 				// allowing trailing garbage.
938 				if (strm.avail_in == 0 && !pair->src_eof) {
939 					// Try reading one more byte.
940 					// Hopefully we don't get any more
941 					// input, and thus pair->src_eof
942 					// becomes true.
943 					strm.avail_in = io_read(
944 							pair, &in_buf, 1);
945 					if (strm.avail_in == SIZE_MAX)
946 						break;
947 
948 					assert(strm.avail_in == 0
949 							|| strm.avail_in == 1);
950 				}
951 
952 				if (strm.avail_in == 0) {
953 					assert(pair->src_eof);
954 					success = true;
955 					break;
956 				}
957 
958 				// We hadn't reached the end of the file.
959 				ret = LZMA_DATA_ERROR;
960 				assert(stop);
961 			}
962 
963 			// If we get here and stop is true, something went
964 			// wrong and we print an error. Otherwise it's just
965 			// a warning and coding can continue.
966 			if (stop) {
967 				message_error("%s: %s", pair->src_name,
968 						message_strm(ret));
969 			} else {
970 				message_warning("%s: %s", pair->src_name,
971 						message_strm(ret));
972 
973 				// When compressing, all possible errors set
974 				// stop to true.
975 				assert(opt_mode != MODE_COMPRESS);
976 			}
977 
978 			if (ret == LZMA_MEMLIMIT_ERROR) {
979 				// Display how much memory it would have
980 				// actually needed.
981 				message_mem_needed(V_ERROR,
982 						lzma_memusage(&strm));
983 			}
984 
985 			if (stop)
986 				break;
987 		}
988 
989 		// Show progress information under certain conditions.
990 		message_progress_update();
991 	}
992 
993 	return success;
994 }
995 
996 
997 /// Copy from input file to output file without processing the data in any
998 /// way. This is used only when trying to decompress unrecognized files
999 /// with --decompress --stdout --force, so the output is always stdout.
1000 static bool
1001 coder_passthru(file_pair *pair)
1002 {
1003 	while (strm.avail_in != 0) {
1004 		if (user_abort)
1005 			return false;
1006 
1007 		if (io_write(pair, &in_buf, strm.avail_in))
1008 			return false;
1009 
1010 		strm.total_in += strm.avail_in;
1011 		strm.total_out = strm.total_in;
1012 		message_progress_update();
1013 
1014 		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1015 		if (strm.avail_in == SIZE_MAX)
1016 			return false;
1017 	}
1018 
1019 	return true;
1020 }
1021 
1022 
1023 extern void
1024 coder_run(const char *filename)
1025 {
1026 	// Set and possibly print the filename for the progress message.
1027 	message_filename(filename);
1028 
1029 	// Try to open the input file.
1030 	file_pair *pair = io_open_src(filename);
1031 	if (pair == NULL)
1032 		return;
1033 
1034 	// Assume that something goes wrong.
1035 	bool success = false;
1036 
1037 	if (opt_mode == MODE_COMPRESS) {
1038 		strm.next_in = NULL;
1039 		strm.avail_in = 0;
1040 	} else {
1041 		// Read the first chunk of input data. This is needed
1042 		// to detect the input file type.
1043 		strm.next_in = in_buf.u8;
1044 		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1045 	}
1046 
1047 	if (strm.avail_in != SIZE_MAX) {
1048 		// Initialize the coder. This will detect the file format
1049 		// and, in decompression or testing mode, check the memory
1050 		// usage of the first Block too. This way we don't try to
1051 		// open the destination file if we see that coding wouldn't
1052 		// work at all anyway. This also avoids deleting the old
1053 		// "target" file if --force was used.
1054 		const enum coder_init_ret init_ret = coder_init(pair);
1055 
1056 		if (init_ret != CODER_INIT_ERROR && !user_abort) {
1057 			// Don't open the destination file when --test
1058 			// is used.
1059 			if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
1060 				// Remember the current time. It is needed
1061 				// for progress indicator.
1062 				mytime_set_start_time();
1063 
1064 				// Initialize the progress indicator.
1065 				//
1066 				// NOTE: When reading from stdin, fstat()
1067 				// isn't called on it and thus src_st.st_size
1068 				// is zero. If stdin pointed to a regular
1069 				// file, it would still be possible to know
1070 				// the file size but then we would also need
1071 				// to take into account the current reading
1072 				// position since with stdin it isn't
1073 				// necessarily at the beginning of the file.
1074 				const bool is_passthru = init_ret
1075 						== CODER_INIT_PASSTHRU;
1076 				const uint64_t in_size
1077 					= pair->src_st.st_size <= 0
1078 					? 0 : (uint64_t)(pair->src_st.st_size);
1079 				message_progress_start(&strm,
1080 						is_passthru, in_size);
1081 
1082 				// Do the actual coding or passthru.
1083 				if (is_passthru)
1084 					success = coder_passthru(pair);
1085 				else
1086 					success = coder_normal(pair);
1087 
1088 				message_progress_end(success);
1089 			}
1090 		}
1091 	}
1092 
1093 	// Close the file pair. It needs to know if coding was successful to
1094 	// know if the source or target file should be unlinked.
1095 	io_close(pair, success);
1096 
1097 	return;
1098 }
1099 
1100 
1101 #ifndef NDEBUG
1102 extern void
1103 coder_free(void)
1104 {
1105 	lzma_end(&strm);
1106 	return;
1107 }
1108 #endif
1109