xref: /freebsd/contrib/xz/src/xz/coder.c (revision aa1a8ff2)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       coder.c
4 /// \brief      Compresses or uncompresses a file
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "private.h"
14 
15 
16 /// Return value type for coder_init().
17 enum coder_init_ret {
18 	CODER_INIT_NORMAL,
19 	CODER_INIT_PASSTHRU,
20 	CODER_INIT_ERROR,
21 };
22 
23 
24 enum operation_mode opt_mode = MODE_COMPRESS;
25 enum format_type opt_format = FORMAT_AUTO;
26 bool opt_auto_adjust = true;
27 bool opt_single_stream = false;
28 uint64_t opt_block_size = 0;
29 uint64_t *opt_block_list = NULL;
30 
31 
32 /// Stream used to communicate with liblzma
33 static lzma_stream strm = LZMA_STREAM_INIT;
34 
35 /// Filters needed for all encoding all formats, and also decoding in raw data
36 static lzma_filter filters[LZMA_FILTERS_MAX + 1];
37 
38 /// Input and output buffers
39 static io_buf in_buf;
40 static io_buf out_buf;
41 
42 /// Number of filters. Zero indicates that we are using a preset.
43 static uint32_t filters_count = 0;
44 
45 /// Number of the preset (0-9)
46 static uint32_t preset_number = LZMA_PRESET_DEFAULT;
47 
48 /// Integrity check type
49 static lzma_check check;
50 
51 /// This becomes false if the --check=CHECK option is used.
52 static bool check_default = true;
53 
54 /// Indicates if unconsumed input is allowed to remain after
55 /// decoding has successfully finished. This is set for each file
56 /// in coder_init().
57 static bool allow_trailing_input;
58 
59 #ifdef MYTHREAD_ENABLED
60 static lzma_mt mt_options = {
61 	.flags = 0,
62 	.timeout = 300,
63 	.filters = filters,
64 };
65 #endif
66 
67 
68 extern void
69 coder_set_check(lzma_check new_check)
70 {
71 	check = new_check;
72 	check_default = false;
73 	return;
74 }
75 
76 
77 static void
78 forget_filter_chain(void)
79 {
80 	// Setting a preset makes us forget a possibly defined custom
81 	// filter chain.
82 	while (filters_count > 0) {
83 		--filters_count;
84 		free(filters[filters_count].options);
85 		filters[filters_count].options = NULL;
86 	}
87 
88 	return;
89 }
90 
91 
92 extern void
93 coder_set_preset(uint32_t new_preset)
94 {
95 	preset_number &= ~LZMA_PRESET_LEVEL_MASK;
96 	preset_number |= new_preset;
97 	forget_filter_chain();
98 	return;
99 }
100 
101 
102 extern void
103 coder_set_extreme(void)
104 {
105 	preset_number |= LZMA_PRESET_EXTREME;
106 	forget_filter_chain();
107 	return;
108 }
109 
110 
111 extern void
112 coder_add_filter(lzma_vli id, void *options)
113 {
114 	if (filters_count == LZMA_FILTERS_MAX)
115 		message_fatal(_("Maximum number of filters is four"));
116 
117 	filters[filters_count].id = id;
118 	filters[filters_count].options = options;
119 	++filters_count;
120 
121 	// Setting a custom filter chain makes us forget the preset options.
122 	// This makes a difference if one specifies e.g. "xz -9 --lzma2 -e"
123 	// where the custom filter chain resets the preset level back to
124 	// the default 6, making the example equivalent to "xz -6e".
125 	preset_number = LZMA_PRESET_DEFAULT;
126 
127 	return;
128 }
129 
130 
131 tuklib_attr_noreturn
132 static void
133 memlimit_too_small(uint64_t memory_usage)
134 {
135 	message(V_ERROR, _("Memory usage limit is too low for the given "
136 			"filter setup."));
137 	message_mem_needed(V_ERROR, memory_usage);
138 	tuklib_exit(E_ERROR, E_ERROR, false);
139 }
140 
141 
142 extern void
143 coder_set_compression_settings(void)
144 {
145 #ifdef HAVE_LZIP_DECODER
146 	// .lz compression isn't supported.
147 	assert(opt_format != FORMAT_LZIP);
148 #endif
149 
150 	// The default check type is CRC64, but fallback to CRC32
151 	// if CRC64 isn't supported by the copy of liblzma we are
152 	// using. CRC32 is always supported.
153 	if (check_default) {
154 		check = LZMA_CHECK_CRC64;
155 		if (!lzma_check_is_supported(check))
156 			check = LZMA_CHECK_CRC32;
157 	}
158 
159 	// Options for LZMA1 or LZMA2 in case we are using a preset.
160 	static lzma_options_lzma opt_lzma;
161 
162 	if (filters_count == 0) {
163 		// We are using a preset. This is not a good idea in raw mode
164 		// except when playing around with things. Different versions
165 		// of this software may use different options in presets, and
166 		// thus make uncompressing the raw data difficult.
167 		if (opt_format == FORMAT_RAW) {
168 			// The message is shown only if warnings are allowed
169 			// but the exit status isn't changed.
170 			message(V_WARNING, _("Using a preset in raw mode "
171 					"is discouraged."));
172 			message(V_WARNING, _("The exact options of the "
173 					"presets may vary between software "
174 					"versions."));
175 		}
176 
177 		// Get the preset for LZMA1 or LZMA2.
178 		if (lzma_lzma_preset(&opt_lzma, preset_number))
179 			message_bug();
180 
181 		// Use LZMA2 except with --format=lzma we use LZMA1.
182 		filters[0].id = opt_format == FORMAT_LZMA
183 				? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
184 		filters[0].options = &opt_lzma;
185 		filters_count = 1;
186 	}
187 
188 	// Terminate the filter options array.
189 	filters[filters_count].id = LZMA_VLI_UNKNOWN;
190 
191 	// If we are using the .lzma format, allow exactly one filter
192 	// which has to be LZMA1.
193 	if (opt_format == FORMAT_LZMA && (filters_count != 1
194 			|| filters[0].id != LZMA_FILTER_LZMA1))
195 		message_fatal(_("The .lzma format supports only "
196 				"the LZMA1 filter"));
197 
198 	// If we are using the .xz format, make sure that there is no LZMA1
199 	// filter to prevent LZMA_PROG_ERROR.
200 	if (opt_format == FORMAT_XZ)
201 		for (size_t i = 0; i < filters_count; ++i)
202 			if (filters[i].id == LZMA_FILTER_LZMA1)
203 				message_fatal(_("LZMA1 cannot be used "
204 						"with the .xz format"));
205 
206 	// Print the selected filter chain.
207 	message_filters_show(V_DEBUG, filters);
208 
209 	// The --flush-timeout option requires LZMA_SYNC_FLUSH support
210 	// from the filter chain. Currently threaded encoder doesn't support
211 	// LZMA_SYNC_FLUSH so single-threaded mode must be used.
212 	if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
213 		for (size_t i = 0; i < filters_count; ++i) {
214 			switch (filters[i].id) {
215 			case LZMA_FILTER_LZMA2:
216 			case LZMA_FILTER_DELTA:
217 				break;
218 
219 			default:
220 				message_fatal(_("The filter chain is "
221 					"incompatible with --flush-timeout"));
222 			}
223 		}
224 
225 		if (hardware_threads_is_mt()) {
226 			message(V_WARNING, _("Switching to single-threaded "
227 					"mode due to --flush-timeout"));
228 			hardware_threads_set(1);
229 		}
230 	}
231 
232 	// Get the memory usage. Note that if --format=raw was used,
233 	// we can be decompressing.
234 	//
235 	// If multithreaded .xz compression is done, this value will be
236 	// replaced.
237 	uint64_t memory_limit = hardware_memlimit_get(opt_mode);
238 	uint64_t memory_usage = UINT64_MAX;
239 	if (opt_mode == MODE_COMPRESS) {
240 #ifdef HAVE_ENCODERS
241 #	ifdef MYTHREAD_ENABLED
242 		if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
243 			memory_limit = hardware_memlimit_mtenc_get();
244 			mt_options.threads = hardware_threads_get();
245 			mt_options.block_size = opt_block_size;
246 			mt_options.check = check;
247 			memory_usage = lzma_stream_encoder_mt_memusage(
248 					&mt_options);
249 			if (memory_usage != UINT64_MAX)
250 				message(V_DEBUG, _("Using up to %" PRIu32
251 						" threads."),
252 						mt_options.threads);
253 		} else
254 #	endif
255 		{
256 			memory_usage = lzma_raw_encoder_memusage(filters);
257 		}
258 #endif
259 	} else {
260 #ifdef HAVE_DECODERS
261 		memory_usage = lzma_raw_decoder_memusage(filters);
262 #endif
263 	}
264 
265 	if (memory_usage == UINT64_MAX)
266 		message_fatal(_("Unsupported filter chain or filter options"));
267 
268 	// Print memory usage info before possible dictionary
269 	// size auto-adjusting.
270 	//
271 	// NOTE: If only encoder support was built, we cannot show the
272 	// what the decoder memory usage will be.
273 	message_mem_needed(V_DEBUG, memory_usage);
274 #ifdef HAVE_DECODERS
275 	if (opt_mode == MODE_COMPRESS) {
276 		const uint64_t decmem = lzma_raw_decoder_memusage(filters);
277 		if (decmem != UINT64_MAX)
278 			message(V_DEBUG, _("Decompression will need "
279 					"%s MiB of memory."), uint64_to_str(
280 						round_up_to_mib(decmem), 0));
281 	}
282 #endif
283 
284 	if (memory_usage <= memory_limit)
285 		return;
286 
287 	// With --format=raw settings are never adjusted to meet
288 	// the memory usage limit.
289 	if (opt_format == FORMAT_RAW)
290 		memlimit_too_small(memory_usage);
291 
292 	assert(opt_mode == MODE_COMPRESS);
293 
294 #ifdef HAVE_ENCODERS
295 #	ifdef MYTHREAD_ENABLED
296 	if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
297 		// Try to reduce the number of threads before
298 		// adjusting the compression settings down.
299 		while (mt_options.threads > 1) {
300 			// Reduce the number of threads by one and check
301 			// the memory usage.
302 			--mt_options.threads;
303 			memory_usage = lzma_stream_encoder_mt_memusage(
304 					&mt_options);
305 			if (memory_usage == UINT64_MAX)
306 				message_bug();
307 
308 			if (memory_usage <= memory_limit) {
309 				// The memory usage is now low enough.
310 				message(V_WARNING, _("Reduced the number of "
311 					"threads from %s to %s to not exceed "
312 					"the memory usage limit of %s MiB"),
313 					uint64_to_str(
314 						hardware_threads_get(), 0),
315 					uint64_to_str(mt_options.threads, 1),
316 					uint64_to_str(round_up_to_mib(
317 						memory_limit), 2));
318 				return;
319 			}
320 		}
321 
322 		// If the memory usage limit is only a soft limit (automatic
323 		// number of threads and no --memlimit-compress), the limit
324 		// is only used to reduce the number of threads and once at
325 		// just one thread, the limit is completely ignored. This
326 		// way -T0 won't use insane amount of memory but at the same
327 		// time the soft limit will never make xz fail and never make
328 		// xz change settings that would affect the compressed output.
329 		if (hardware_memlimit_mtenc_is_default()) {
330 			message(V_WARNING, _("Reduced the number of threads "
331 				"from %s to one. The automatic memory usage "
332 				"limit of %s MiB is still being exceeded. "
333 				"%s MiB of memory is required. "
334 				"Continuing anyway."),
335 				uint64_to_str(hardware_threads_get(), 0),
336 				uint64_to_str(
337 					round_up_to_mib(memory_limit), 1),
338 				uint64_to_str(
339 					round_up_to_mib(memory_usage), 2));
340 			return;
341 		}
342 
343 		// If --no-adjust was used, we cannot drop to single-threaded
344 		// mode since it produces different compressed output.
345 		//
346 		// NOTE: In xz 5.2.x, --no-adjust also prevented reducing
347 		// the number of threads. This changed in 5.3.3alpha.
348 		if (!opt_auto_adjust)
349 			memlimit_too_small(memory_usage);
350 
351 		// Switch to single-threaded mode. It uses
352 		// less memory than using one thread in
353 		// the multithreaded mode but the output
354 		// is also different.
355 		hardware_threads_set(1);
356 		memory_usage = lzma_raw_encoder_memusage(filters);
357 		message(V_WARNING, _("Switching to single-threaded mode "
358 			"to not exceed the memory usage limit of %s MiB"),
359 			uint64_to_str(round_up_to_mib(memory_limit), 0));
360 	}
361 #	endif
362 
363 	if (memory_usage <= memory_limit)
364 		return;
365 
366 	// Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust
367 	// was specified as that would change the compressed output.
368 	if (!opt_auto_adjust)
369 		memlimit_too_small(memory_usage);
370 
371 	// Look for the last filter if it is LZMA2 or LZMA1, so we can make
372 	// it use less RAM. With other filters we don't know what to do.
373 	size_t i = 0;
374 	while (filters[i].id != LZMA_FILTER_LZMA2
375 			&& filters[i].id != LZMA_FILTER_LZMA1) {
376 		if (filters[i].id == LZMA_VLI_UNKNOWN)
377 			memlimit_too_small(memory_usage);
378 
379 		++i;
380 	}
381 
382 	// Decrease the dictionary size until we meet the memory
383 	// usage limit. First round down to full mebibytes.
384 	lzma_options_lzma *opt = filters[i].options;
385 	const uint32_t orig_dict_size = opt->dict_size;
386 	opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
387 	while (true) {
388 		// If it is below 1 MiB, auto-adjusting failed. We could be
389 		// more sophisticated and scale it down even more, but let's
390 		// see if many complain about this version.
391 		//
392 		// FIXME: Displays the scaled memory usage instead
393 		// of the original.
394 		if (opt->dict_size < (UINT32_C(1) << 20))
395 			memlimit_too_small(memory_usage);
396 
397 		memory_usage = lzma_raw_encoder_memusage(filters);
398 		if (memory_usage == UINT64_MAX)
399 			message_bug();
400 
401 		// Accept it if it is low enough.
402 		if (memory_usage <= memory_limit)
403 			break;
404 
405 		// Otherwise 1 MiB down and try again. I hope this
406 		// isn't too slow method for cases where the original
407 		// dict_size is very big.
408 		opt->dict_size -= UINT32_C(1) << 20;
409 	}
410 
411 	// Tell the user that we decreased the dictionary size.
412 	message(V_WARNING, _("Adjusted LZMA%c dictionary size "
413 			"from %s MiB to %s MiB to not exceed "
414 			"the memory usage limit of %s MiB"),
415 			filters[i].id == LZMA_FILTER_LZMA2
416 				? '2' : '1',
417 			uint64_to_str(orig_dict_size >> 20, 0),
418 			uint64_to_str(opt->dict_size >> 20, 1),
419 			uint64_to_str(round_up_to_mib(memory_limit), 2));
420 #endif
421 
422 	return;
423 }
424 
425 
426 #ifdef HAVE_DECODERS
427 /// Return true if the data in in_buf seems to be in the .xz format.
428 static bool
429 is_format_xz(void)
430 {
431 	// Specify the magic as hex to be compatible with EBCDIC systems.
432 	static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 };
433 	return strm.avail_in >= sizeof(magic)
434 			&& memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
435 }
436 
437 
438 /// Return true if the data in in_buf seems to be in the .lzma format.
439 static bool
440 is_format_lzma(void)
441 {
442 	// The .lzma header is 13 bytes.
443 	if (strm.avail_in < 13)
444 		return false;
445 
446 	// Decode the LZMA1 properties.
447 	lzma_filter filter = { .id = LZMA_FILTER_LZMA1 };
448 	if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK)
449 		return false;
450 
451 	// A hack to ditch tons of false positives: We allow only dictionary
452 	// sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone
453 	// created only files with 2^n, but accepts any dictionary size.
454 	// If someone complains, this will be reconsidered.
455 	lzma_options_lzma *opt = filter.options;
456 	const uint32_t dict_size = opt->dict_size;
457 	free(opt);
458 
459 	if (dict_size != UINT32_MAX) {
460 		uint32_t d = dict_size - 1;
461 		d |= d >> 2;
462 		d |= d >> 3;
463 		d |= d >> 4;
464 		d |= d >> 8;
465 		d |= d >> 16;
466 		++d;
467 		if (d != dict_size || dict_size == 0)
468 			return false;
469 	}
470 
471 	// Another hack to ditch false positives: Assume that if the
472 	// uncompressed size is known, it must be less than 256 GiB.
473 	// Again, if someone complains, this will be reconsidered.
474 	uint64_t uncompressed_size = 0;
475 	for (size_t i = 0; i < 8; ++i)
476 		uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8);
477 
478 	if (uncompressed_size != UINT64_MAX
479 			&& uncompressed_size > (UINT64_C(1) << 38))
480 		return false;
481 
482 	return true;
483 }
484 
485 
486 #ifdef HAVE_LZIP_DECODER
487 /// Return true if the data in in_buf seems to be in the .lz format.
488 static bool
489 is_format_lzip(void)
490 {
491 	static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };
492 	return strm.avail_in >= sizeof(magic)
493 			&& memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
494 }
495 #endif
496 #endif
497 
498 
499 /// Detect the input file type (for now, this done only when decompressing),
500 /// and initialize an appropriate coder. Return value indicates if a normal
501 /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru
502 /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred
503 /// (CODER_INIT_ERROR).
504 static enum coder_init_ret
505 coder_init(file_pair *pair)
506 {
507 	lzma_ret ret = LZMA_PROG_ERROR;
508 
509 	// In most cases if there is input left when coding finishes,
510 	// something has gone wrong. Exceptions are --single-stream
511 	// and decoding .lz files which can contain trailing non-.lz data.
512 	// These will be handled later in this function.
513 	allow_trailing_input = false;
514 
515 	if (opt_mode == MODE_COMPRESS) {
516 #ifdef HAVE_ENCODERS
517 		switch (opt_format) {
518 		case FORMAT_AUTO:
519 			// args.c ensures this.
520 			assert(0);
521 			break;
522 
523 		case FORMAT_XZ:
524 #	ifdef MYTHREAD_ENABLED
525 			if (hardware_threads_is_mt())
526 				ret = lzma_stream_encoder_mt(
527 						&strm, &mt_options);
528 			else
529 #	endif
530 				ret = lzma_stream_encoder(
531 						&strm, filters, check);
532 			break;
533 
534 		case FORMAT_LZMA:
535 			ret = lzma_alone_encoder(&strm, filters[0].options);
536 			break;
537 
538 #	ifdef HAVE_LZIP_DECODER
539 		case FORMAT_LZIP:
540 			// args.c should disallow this.
541 			assert(0);
542 			ret = LZMA_PROG_ERROR;
543 			break;
544 #	endif
545 
546 		case FORMAT_RAW:
547 			ret = lzma_raw_encoder(&strm, filters);
548 			break;
549 		}
550 #endif
551 	} else {
552 #ifdef HAVE_DECODERS
553 		uint32_t flags = 0;
554 
555 		// It seems silly to warn about unsupported check if the
556 		// check won't be verified anyway due to --ignore-check.
557 		if (opt_ignore_check)
558 			flags |= LZMA_IGNORE_CHECK;
559 		else
560 			flags |= LZMA_TELL_UNSUPPORTED_CHECK;
561 
562 		if (opt_single_stream)
563 			allow_trailing_input = true;
564 		else
565 			flags |= LZMA_CONCATENATED;
566 
567 		// We abuse FORMAT_AUTO to indicate unknown file format,
568 		// for which we may consider passthru mode.
569 		enum format_type init_format = FORMAT_AUTO;
570 
571 		switch (opt_format) {
572 		case FORMAT_AUTO:
573 			// .lz is checked before .lzma since .lzma detection
574 			// is more complicated (no magic bytes).
575 			if (is_format_xz())
576 				init_format = FORMAT_XZ;
577 #	ifdef HAVE_LZIP_DECODER
578 			else if (is_format_lzip())
579 				init_format = FORMAT_LZIP;
580 #	endif
581 			else if (is_format_lzma())
582 				init_format = FORMAT_LZMA;
583 			break;
584 
585 		case FORMAT_XZ:
586 			if (is_format_xz())
587 				init_format = FORMAT_XZ;
588 			break;
589 
590 		case FORMAT_LZMA:
591 			if (is_format_lzma())
592 				init_format = FORMAT_LZMA;
593 			break;
594 
595 #	ifdef HAVE_LZIP_DECODER
596 		case FORMAT_LZIP:
597 			if (is_format_lzip())
598 				init_format = FORMAT_LZIP;
599 			break;
600 #	endif
601 
602 		case FORMAT_RAW:
603 			init_format = FORMAT_RAW;
604 			break;
605 		}
606 
607 		switch (init_format) {
608 		case FORMAT_AUTO:
609 			// Unknown file format. If --decompress --stdout
610 			// --force have been given, then we copy the input
611 			// as is to stdout. Checking for MODE_DECOMPRESS
612 			// is needed, because we don't want to do use
613 			// passthru mode with --test.
614 			if (opt_mode == MODE_DECOMPRESS
615 					&& opt_stdout && opt_force) {
616 				// These are needed for progress info.
617 				strm.total_in = 0;
618 				strm.total_out = 0;
619 				return CODER_INIT_PASSTHRU;
620 			}
621 
622 			ret = LZMA_FORMAT_ERROR;
623 			break;
624 
625 		case FORMAT_XZ:
626 #	ifdef MYTHREAD_ENABLED
627 			mt_options.flags = flags;
628 
629 			mt_options.threads = hardware_threads_get();
630 			mt_options.memlimit_stop
631 				= hardware_memlimit_get(MODE_DECOMPRESS);
632 
633 			// If single-threaded mode was requested, set the
634 			// memlimit for threading to zero. This forces the
635 			// decoder to use single-threaded mode which matches
636 			// the behavior of lzma_stream_decoder().
637 			//
638 			// Otherwise use the limit for threaded decompression
639 			// which has a sane default (users are still free to
640 			// make it insanely high though).
641 			mt_options.memlimit_threading
642 					= mt_options.threads == 1
643 					? 0 : hardware_memlimit_mtdec_get();
644 
645 			ret = lzma_stream_decoder_mt(&strm, &mt_options);
646 #	else
647 			ret = lzma_stream_decoder(&strm,
648 					hardware_memlimit_get(
649 						MODE_DECOMPRESS), flags);
650 #	endif
651 			break;
652 
653 		case FORMAT_LZMA:
654 			ret = lzma_alone_decoder(&strm,
655 					hardware_memlimit_get(
656 						MODE_DECOMPRESS));
657 			break;
658 
659 #	ifdef HAVE_LZIP_DECODER
660 		case FORMAT_LZIP:
661 			allow_trailing_input = true;
662 			ret = lzma_lzip_decoder(&strm,
663 					hardware_memlimit_get(
664 						MODE_DECOMPRESS), flags);
665 			break;
666 #	endif
667 
668 		case FORMAT_RAW:
669 			// Memory usage has already been checked in
670 			// coder_set_compression_settings().
671 			ret = lzma_raw_decoder(&strm, filters);
672 			break;
673 		}
674 
675 		// Try to decode the headers. This will catch too low
676 		// memory usage limit in case it happens in the first
677 		// Block of the first Stream, which is where it very
678 		// probably will happen if it is going to happen.
679 		//
680 		// This will also catch unsupported check type which
681 		// we treat as a warning only. If there are empty
682 		// concatenated Streams with unsupported check type then
683 		// the message can be shown more than once here. The loop
684 		// is used in case there is first a warning about
685 		// unsupported check type and then the first Block
686 		// would exceed the memlimit.
687 		if (ret == LZMA_OK && init_format != FORMAT_RAW) {
688 			strm.next_out = NULL;
689 			strm.avail_out = 0;
690 			while ((ret = lzma_code(&strm, LZMA_RUN))
691 					== LZMA_UNSUPPORTED_CHECK)
692 				message_warning(_("%s: %s"), pair->src_name,
693 						message_strm(ret));
694 
695 			// With --single-stream lzma_code won't wait for
696 			// LZMA_FINISH and thus it can return LZMA_STREAM_END
697 			// if the file has no uncompressed data inside.
698 			// So treat LZMA_STREAM_END as LZMA_OK here.
699 			// When lzma_code() is called again in coder_normal()
700 			// it will return LZMA_STREAM_END again.
701 			if (ret == LZMA_STREAM_END)
702 				ret = LZMA_OK;
703 		}
704 #endif
705 	}
706 
707 	if (ret != LZMA_OK) {
708 		message_error(_("%s: %s"), pair->src_name, message_strm(ret));
709 		if (ret == LZMA_MEMLIMIT_ERROR)
710 			message_mem_needed(V_ERROR, lzma_memusage(&strm));
711 
712 		return CODER_INIT_ERROR;
713 	}
714 
715 	return CODER_INIT_NORMAL;
716 }
717 
718 
719 /// Resolve conflicts between opt_block_size and opt_block_list in single
720 /// threaded mode. We want to default to opt_block_list, except when it is
721 /// larger than opt_block_size. If this is the case for the current Block
722 /// at *list_pos, then we break into smaller Blocks. Otherwise advance
723 /// to the next Block in opt_block_list, and break apart if needed.
724 static void
725 split_block(uint64_t *block_remaining,
726 	    uint64_t *next_block_remaining,
727 	    size_t *list_pos)
728 {
729 	if (*next_block_remaining > 0) {
730 		// The Block at *list_pos has previously been split up.
731 		assert(!hardware_threads_is_mt());
732 		assert(opt_block_size > 0);
733 		assert(opt_block_list != NULL);
734 
735 		if (*next_block_remaining > opt_block_size) {
736 			// We have to split the current Block at *list_pos
737 			// into another opt_block_size length Block.
738 			*block_remaining = opt_block_size;
739 		} else {
740 			// This is the last remaining split Block for the
741 			// Block at *list_pos.
742 			*block_remaining = *next_block_remaining;
743 		}
744 
745 		*next_block_remaining -= *block_remaining;
746 
747 	} else {
748 		// The Block at *list_pos has been finished. Go to the next
749 		// entry in the list. If the end of the list has been reached,
750 		// reuse the size of the last Block.
751 		if (opt_block_list[*list_pos + 1] != 0)
752 			++*list_pos;
753 
754 		*block_remaining = opt_block_list[*list_pos];
755 
756 		// If in single-threaded mode, split up the Block if needed.
757 		// This is not needed in multi-threaded mode because liblzma
758 		// will do this due to how threaded encoding works.
759 		if (!hardware_threads_is_mt() && opt_block_size > 0
760 				&& *block_remaining > opt_block_size) {
761 			*next_block_remaining
762 					= *block_remaining - opt_block_size;
763 			*block_remaining = opt_block_size;
764 		}
765 	}
766 }
767 
768 
769 static bool
770 coder_write_output(file_pair *pair)
771 {
772 	if (opt_mode != MODE_TEST) {
773 		if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out))
774 			return true;
775 	}
776 
777 	strm.next_out = out_buf.u8;
778 	strm.avail_out = IO_BUFFER_SIZE;
779 	return false;
780 }
781 
782 
783 /// Compress or decompress using liblzma.
784 static bool
785 coder_normal(file_pair *pair)
786 {
787 	// Encoder needs to know when we have given all the input to it.
788 	// The decoders need to know it too when we are using
789 	// LZMA_CONCATENATED. We need to check for src_eof here, because
790 	// the first input chunk has been already read if decompressing,
791 	// and that may have been the only chunk we will read.
792 	lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
793 
794 	lzma_ret ret;
795 
796 	// Assume that something goes wrong.
797 	bool success = false;
798 
799 	// block_remaining indicates how many input bytes to encode before
800 	// finishing the current .xz Block. The Block size is set with
801 	// --block-size=SIZE and --block-list. They have an effect only when
802 	// compressing to the .xz format. If block_remaining == UINT64_MAX,
803 	// only a single block is created.
804 	uint64_t block_remaining = UINT64_MAX;
805 
806 	// next_block_remaining for when we are in single-threaded mode and
807 	// the Block in --block-list is larger than the --block-size=SIZE.
808 	uint64_t next_block_remaining = 0;
809 
810 	// Position in opt_block_list. Unused if --block-list wasn't used.
811 	size_t list_pos = 0;
812 
813 	// Handle --block-size for single-threaded mode and the first step
814 	// of --block-list.
815 	if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
816 		// --block-size doesn't do anything here in threaded mode,
817 		// because the threaded encoder will take care of splitting
818 		// to fixed-sized Blocks.
819 		if (!hardware_threads_is_mt() && opt_block_size > 0)
820 			block_remaining = opt_block_size;
821 
822 		// If --block-list was used, start with the first size.
823 		//
824 		// For threaded case, --block-size specifies how big Blocks
825 		// the encoder needs to be prepared to create at maximum
826 		// and --block-list will simultaneously cause new Blocks
827 		// to be started at specified intervals. To keep things
828 		// logical, the same is done in single-threaded mode. The
829 		// output is still not identical because in single-threaded
830 		// mode the size info isn't written into Block Headers.
831 		if (opt_block_list != NULL) {
832 			if (block_remaining < opt_block_list[list_pos]) {
833 				assert(!hardware_threads_is_mt());
834 				next_block_remaining = opt_block_list[list_pos]
835 						- block_remaining;
836 			} else {
837 				block_remaining = opt_block_list[list_pos];
838 			}
839 		}
840 	}
841 
842 	strm.next_out = out_buf.u8;
843 	strm.avail_out = IO_BUFFER_SIZE;
844 
845 	while (!user_abort) {
846 		// Fill the input buffer if it is empty and we aren't
847 		// flushing or finishing.
848 		if (strm.avail_in == 0 && action == LZMA_RUN) {
849 			strm.next_in = in_buf.u8;
850 			strm.avail_in = io_read(pair, &in_buf,
851 					my_min(block_remaining,
852 						IO_BUFFER_SIZE));
853 
854 			if (strm.avail_in == SIZE_MAX)
855 				break;
856 
857 			if (pair->src_eof) {
858 				action = LZMA_FINISH;
859 
860 			} else if (block_remaining != UINT64_MAX) {
861 				// Start a new Block after every
862 				// opt_block_size bytes of input.
863 				block_remaining -= strm.avail_in;
864 				if (block_remaining == 0)
865 					action = LZMA_FULL_BARRIER;
866 			}
867 
868 			if (action == LZMA_RUN && pair->flush_needed)
869 				action = LZMA_SYNC_FLUSH;
870 		}
871 
872 		// Let liblzma do the actual work.
873 		ret = lzma_code(&strm, action);
874 
875 		// Write out if the output buffer became full.
876 		if (strm.avail_out == 0) {
877 			if (coder_write_output(pair))
878 				break;
879 		}
880 
881 		if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
882 				|| action == LZMA_FULL_BARRIER)) {
883 			if (action == LZMA_SYNC_FLUSH) {
884 				// Flushing completed. Write the pending data
885 				// out immediately so that the reading side
886 				// can decompress everything compressed so far.
887 				if (coder_write_output(pair))
888 					break;
889 
890 				// Mark that we haven't seen any new input
891 				// since the previous flush.
892 				pair->src_has_seen_input = false;
893 				pair->flush_needed = false;
894 			} else {
895 				// Start a new Block after LZMA_FULL_BARRIER.
896 				if (opt_block_list == NULL) {
897 					assert(!hardware_threads_is_mt());
898 					assert(opt_block_size > 0);
899 					block_remaining = opt_block_size;
900 				} else {
901 					split_block(&block_remaining,
902 							&next_block_remaining,
903 							&list_pos);
904 				}
905 			}
906 
907 			// Start a new Block after LZMA_FULL_FLUSH or continue
908 			// the same block after LZMA_SYNC_FLUSH.
909 			action = LZMA_RUN;
910 
911 		} else if (ret != LZMA_OK) {
912 			// Determine if the return value indicates that we
913 			// won't continue coding. LZMA_NO_CHECK would be
914 			// here too if LZMA_TELL_ANY_CHECK was used.
915 			const bool stop = ret != LZMA_UNSUPPORTED_CHECK;
916 
917 			if (stop) {
918 				// Write the remaining bytes even if something
919 				// went wrong, because that way the user gets
920 				// as much data as possible, which can be good
921 				// when trying to get at least some useful
922 				// data out of damaged files.
923 				if (coder_write_output(pair))
924 					break;
925 			}
926 
927 			if (ret == LZMA_STREAM_END) {
928 				if (allow_trailing_input) {
929 					io_fix_src_pos(pair, strm.avail_in);
930 					success = true;
931 					break;
932 				}
933 
934 				// Check that there is no trailing garbage.
935 				// This is needed for LZMA_Alone and raw
936 				// streams. This is *not* done with .lz files
937 				// as that format specifically requires
938 				// allowing trailing garbage.
939 				if (strm.avail_in == 0 && !pair->src_eof) {
940 					// Try reading one more byte.
941 					// Hopefully we don't get any more
942 					// input, and thus pair->src_eof
943 					// becomes true.
944 					strm.avail_in = io_read(
945 							pair, &in_buf, 1);
946 					if (strm.avail_in == SIZE_MAX)
947 						break;
948 
949 					assert(strm.avail_in == 0
950 							|| strm.avail_in == 1);
951 				}
952 
953 				if (strm.avail_in == 0) {
954 					assert(pair->src_eof);
955 					success = true;
956 					break;
957 				}
958 
959 				// We hadn't reached the end of the file.
960 				ret = LZMA_DATA_ERROR;
961 				assert(stop);
962 			}
963 
964 			// If we get here and stop is true, something went
965 			// wrong and we print an error. Otherwise it's just
966 			// a warning and coding can continue.
967 			if (stop) {
968 				message_error(_("%s: %s"), pair->src_name,
969 						message_strm(ret));
970 			} else {
971 				message_warning(_("%s: %s"), pair->src_name,
972 						message_strm(ret));
973 
974 				// When compressing, all possible errors set
975 				// stop to true.
976 				assert(opt_mode != MODE_COMPRESS);
977 			}
978 
979 			if (ret == LZMA_MEMLIMIT_ERROR) {
980 				// Display how much memory it would have
981 				// actually needed.
982 				message_mem_needed(V_ERROR,
983 						lzma_memusage(&strm));
984 			}
985 
986 			if (stop)
987 				break;
988 		}
989 
990 		// Show progress information under certain conditions.
991 		message_progress_update();
992 	}
993 
994 	return success;
995 }
996 
997 
998 /// Copy from input file to output file without processing the data in any
999 /// way. This is used only when trying to decompress unrecognized files
1000 /// with --decompress --stdout --force, so the output is always stdout.
1001 static bool
1002 coder_passthru(file_pair *pair)
1003 {
1004 	while (strm.avail_in != 0) {
1005 		if (user_abort)
1006 			return false;
1007 
1008 		if (io_write(pair, &in_buf, strm.avail_in))
1009 			return false;
1010 
1011 		strm.total_in += strm.avail_in;
1012 		strm.total_out = strm.total_in;
1013 		message_progress_update();
1014 
1015 		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1016 		if (strm.avail_in == SIZE_MAX)
1017 			return false;
1018 	}
1019 
1020 	return true;
1021 }
1022 
1023 
1024 extern void
1025 coder_run(const char *filename)
1026 {
1027 	// Set and possibly print the filename for the progress message.
1028 	message_filename(filename);
1029 
1030 	// Try to open the input file.
1031 	file_pair *pair = io_open_src(filename);
1032 	if (pair == NULL)
1033 		return;
1034 
1035 	// Assume that something goes wrong.
1036 	bool success = false;
1037 
1038 	if (opt_mode == MODE_COMPRESS) {
1039 		strm.next_in = NULL;
1040 		strm.avail_in = 0;
1041 	} else {
1042 		// Read the first chunk of input data. This is needed
1043 		// to detect the input file type.
1044 		strm.next_in = in_buf.u8;
1045 		strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1046 	}
1047 
1048 	if (strm.avail_in != SIZE_MAX) {
1049 		// Initialize the coder. This will detect the file format
1050 		// and, in decompression or testing mode, check the memory
1051 		// usage of the first Block too. This way we don't try to
1052 		// open the destination file if we see that coding wouldn't
1053 		// work at all anyway. This also avoids deleting the old
1054 		// "target" file if --force was used.
1055 		const enum coder_init_ret init_ret = coder_init(pair);
1056 
1057 		if (init_ret != CODER_INIT_ERROR && !user_abort) {
1058 			// Don't open the destination file when --test
1059 			// is used.
1060 			if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
1061 				// Remember the current time. It is needed
1062 				// for progress indicator.
1063 				mytime_set_start_time();
1064 
1065 				// Initialize the progress indicator.
1066 				//
1067 				// NOTE: When reading from stdin, fstat()
1068 				// isn't called on it and thus src_st.st_size
1069 				// is zero. If stdin pointed to a regular
1070 				// file, it would still be possible to know
1071 				// the file size but then we would also need
1072 				// to take into account the current reading
1073 				// position since with stdin it isn't
1074 				// necessarily at the beginning of the file.
1075 				const bool is_passthru = init_ret
1076 						== CODER_INIT_PASSTHRU;
1077 				const uint64_t in_size
1078 					= pair->src_st.st_size <= 0
1079 					? 0 : (uint64_t)(pair->src_st.st_size);
1080 				message_progress_start(&strm,
1081 						is_passthru, in_size);
1082 
1083 				// Do the actual coding or passthru.
1084 				if (is_passthru)
1085 					success = coder_passthru(pair);
1086 				else
1087 					success = coder_normal(pair);
1088 
1089 				message_progress_end(success);
1090 			}
1091 		}
1092 	}
1093 
1094 	// Close the file pair. It needs to know if coding was successful to
1095 	// know if the source or target file should be unlinked.
1096 	io_close(pair, success);
1097 
1098 	return;
1099 }
1100 
1101 
1102 #ifndef NDEBUG
1103 extern void
1104 coder_free(void)
1105 {
1106 	lzma_end(&strm);
1107 	return;
1108 }
1109 #endif
1110