1 /////////////////////////////////////////////////////////////////////////////// 2 // 3 /// \file coder.c 4 /// \brief Compresses or uncompresses a file 5 // 6 // Author: Lasse Collin 7 // 8 // This file has been put into the public domain. 9 // You can do whatever you want with this file. 10 // 11 /////////////////////////////////////////////////////////////////////////////// 12 13 #include "private.h" 14 15 16 /// Return value type for coder_init(). 17 enum coder_init_ret { 18 CODER_INIT_NORMAL, 19 CODER_INIT_PASSTHRU, 20 CODER_INIT_ERROR, 21 }; 22 23 24 enum operation_mode opt_mode = MODE_COMPRESS; 25 enum format_type opt_format = FORMAT_AUTO; 26 bool opt_auto_adjust = true; 27 bool opt_single_stream = false; 28 uint64_t opt_block_size = 0; 29 uint64_t *opt_block_list = NULL; 30 31 32 /// Stream used to communicate with liblzma 33 static lzma_stream strm = LZMA_STREAM_INIT; 34 35 /// Filters needed for all encoding all formats, and also decoding in raw data 36 static lzma_filter filters[LZMA_FILTERS_MAX + 1]; 37 38 /// Input and output buffers 39 static io_buf in_buf; 40 static io_buf out_buf; 41 42 /// Number of filters. Zero indicates that we are using a preset. 43 static uint32_t filters_count = 0; 44 45 /// Number of the preset (0-9) 46 static uint32_t preset_number = LZMA_PRESET_DEFAULT; 47 48 /// Integrity check type 49 static lzma_check check; 50 51 /// This becomes false if the --check=CHECK option is used. 52 static bool check_default = true; 53 54 #ifdef MYTHREAD_ENABLED 55 static lzma_mt mt_options = { 56 .flags = 0, 57 .timeout = 300, 58 .filters = filters, 59 }; 60 #endif 61 62 63 extern void 64 coder_set_check(lzma_check new_check) 65 { 66 check = new_check; 67 check_default = false; 68 return; 69 } 70 71 72 static void 73 forget_filter_chain(void) 74 { 75 // Setting a preset makes us forget a possibly defined custom 76 // filter chain. 77 while (filters_count > 0) { 78 --filters_count; 79 free(filters[filters_count].options); 80 filters[filters_count].options = NULL; 81 } 82 83 return; 84 } 85 86 87 extern void 88 coder_set_preset(uint32_t new_preset) 89 { 90 preset_number &= ~LZMA_PRESET_LEVEL_MASK; 91 preset_number |= new_preset; 92 forget_filter_chain(); 93 return; 94 } 95 96 97 extern void 98 coder_set_extreme(void) 99 { 100 preset_number |= LZMA_PRESET_EXTREME; 101 forget_filter_chain(); 102 return; 103 } 104 105 106 extern void 107 coder_add_filter(lzma_vli id, void *options) 108 { 109 if (filters_count == LZMA_FILTERS_MAX) 110 message_fatal(_("Maximum number of filters is four")); 111 112 filters[filters_count].id = id; 113 filters[filters_count].options = options; 114 ++filters_count; 115 116 // Setting a custom filter chain makes us forget the preset options. 117 // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e" 118 // where the custom filter chain resets the preset level back to 119 // the default 6, making the example equivalent to "xz -6e". 120 preset_number = LZMA_PRESET_DEFAULT; 121 122 return; 123 } 124 125 126 static void lzma_attribute((__noreturn__)) 127 memlimit_too_small(uint64_t memory_usage) 128 { 129 message(V_ERROR, _("Memory usage limit is too low for the given " 130 "filter setup.")); 131 message_mem_needed(V_ERROR, memory_usage); 132 tuklib_exit(E_ERROR, E_ERROR, false); 133 } 134 135 136 extern void 137 coder_set_compression_settings(void) 138 { 139 // The default check type is CRC64, but fallback to CRC32 140 // if CRC64 isn't supported by the copy of liblzma we are 141 // using. CRC32 is always supported. 142 if (check_default) { 143 check = LZMA_CHECK_CRC64; 144 if (!lzma_check_is_supported(check)) 145 check = LZMA_CHECK_CRC32; 146 } 147 148 // Options for LZMA1 or LZMA2 in case we are using a preset. 149 static lzma_options_lzma opt_lzma; 150 151 if (filters_count == 0) { 152 // We are using a preset. This is not a good idea in raw mode 153 // except when playing around with things. Different versions 154 // of this software may use different options in presets, and 155 // thus make uncompressing the raw data difficult. 156 if (opt_format == FORMAT_RAW) { 157 // The message is shown only if warnings are allowed 158 // but the exit status isn't changed. 159 message(V_WARNING, _("Using a preset in raw mode " 160 "is discouraged.")); 161 message(V_WARNING, _("The exact options of the " 162 "presets may vary between software " 163 "versions.")); 164 } 165 166 // Get the preset for LZMA1 or LZMA2. 167 if (lzma_lzma_preset(&opt_lzma, preset_number)) 168 message_bug(); 169 170 // Use LZMA2 except with --format=lzma we use LZMA1. 171 filters[0].id = opt_format == FORMAT_LZMA 172 ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2; 173 filters[0].options = &opt_lzma; 174 filters_count = 1; 175 } 176 177 // Terminate the filter options array. 178 filters[filters_count].id = LZMA_VLI_UNKNOWN; 179 180 // If we are using the .lzma format, allow exactly one filter 181 // which has to be LZMA1. 182 if (opt_format == FORMAT_LZMA && (filters_count != 1 183 || filters[0].id != LZMA_FILTER_LZMA1)) 184 message_fatal(_("The .lzma format supports only " 185 "the LZMA1 filter")); 186 187 // If we are using the .xz format, make sure that there is no LZMA1 188 // filter to prevent LZMA_PROG_ERROR. 189 if (opt_format == FORMAT_XZ) 190 for (size_t i = 0; i < filters_count; ++i) 191 if (filters[i].id == LZMA_FILTER_LZMA1) 192 message_fatal(_("LZMA1 cannot be used " 193 "with the .xz format")); 194 195 // Print the selected filter chain. 196 message_filters_show(V_DEBUG, filters); 197 198 // The --flush-timeout option requires LZMA_SYNC_FLUSH support 199 // from the filter chain. Currently threaded encoder doesn't support 200 // LZMA_SYNC_FLUSH so single-threaded mode must be used. 201 if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) { 202 for (size_t i = 0; i < filters_count; ++i) { 203 switch (filters[i].id) { 204 case LZMA_FILTER_LZMA2: 205 case LZMA_FILTER_DELTA: 206 break; 207 208 default: 209 message_fatal(_("The filter chain is " 210 "incompatible with --flush-timeout")); 211 } 212 } 213 214 if (hardware_threads_get() > 1) { 215 message(V_WARNING, _("Switching to single-threaded " 216 "mode due to --flush-timeout")); 217 hardware_threads_set(1); 218 } 219 } 220 221 // Get the memory usage. Note that if --format=raw was used, 222 // we can be decompressing. 223 const uint64_t memory_limit = hardware_memlimit_get(opt_mode); 224 uint64_t memory_usage; 225 if (opt_mode == MODE_COMPRESS) { 226 #ifdef MYTHREAD_ENABLED 227 if (opt_format == FORMAT_XZ && hardware_threads_get() > 1) { 228 mt_options.threads = hardware_threads_get(); 229 mt_options.block_size = opt_block_size; 230 mt_options.check = check; 231 memory_usage = lzma_stream_encoder_mt_memusage( 232 &mt_options); 233 if (memory_usage != UINT64_MAX) 234 message(V_DEBUG, _("Using up to %" PRIu32 235 " threads."), 236 mt_options.threads); 237 } else 238 #endif 239 { 240 memory_usage = lzma_raw_encoder_memusage(filters); 241 } 242 } else { 243 memory_usage = lzma_raw_decoder_memusage(filters); 244 } 245 246 if (memory_usage == UINT64_MAX) 247 message_fatal(_("Unsupported filter chain or filter options")); 248 249 // Print memory usage info before possible dictionary 250 // size auto-adjusting. 251 message_mem_needed(V_DEBUG, memory_usage); 252 if (opt_mode == MODE_COMPRESS) { 253 const uint64_t decmem = lzma_raw_decoder_memusage(filters); 254 if (decmem != UINT64_MAX) 255 message(V_DEBUG, _("Decompression will need " 256 "%s MiB of memory."), uint64_to_str( 257 round_up_to_mib(decmem), 0)); 258 } 259 260 if (memory_usage <= memory_limit) 261 return; 262 263 // If --no-adjust was used or we didn't find LZMA1 or 264 // LZMA2 as the last filter, give an error immediately. 265 // --format=raw implies --no-adjust. 266 if (!opt_auto_adjust || opt_format == FORMAT_RAW) 267 memlimit_too_small(memory_usage); 268 269 assert(opt_mode == MODE_COMPRESS); 270 271 #ifdef MYTHREAD_ENABLED 272 if (opt_format == FORMAT_XZ && mt_options.threads > 1) { 273 // Try to reduce the number of threads before 274 // adjusting the compression settings down. 275 do { 276 // FIXME? The real single-threaded mode has 277 // lower memory usage, but it's not comparable 278 // because it doesn't write the size info 279 // into Block Headers. 280 if (--mt_options.threads == 0) 281 memlimit_too_small(memory_usage); 282 283 memory_usage = lzma_stream_encoder_mt_memusage( 284 &mt_options); 285 if (memory_usage == UINT64_MAX) 286 message_bug(); 287 288 } while (memory_usage > memory_limit); 289 290 message(V_WARNING, _("Adjusted the number of threads " 291 "from %s to %s to not exceed " 292 "the memory usage limit of %s MiB"), 293 uint64_to_str(hardware_threads_get(), 0), 294 uint64_to_str(mt_options.threads, 1), 295 uint64_to_str(round_up_to_mib( 296 memory_limit), 2)); 297 } 298 #endif 299 300 if (memory_usage <= memory_limit) 301 return; 302 303 // Look for the last filter if it is LZMA2 or LZMA1, so we can make 304 // it use less RAM. With other filters we don't know what to do. 305 size_t i = 0; 306 while (filters[i].id != LZMA_FILTER_LZMA2 307 && filters[i].id != LZMA_FILTER_LZMA1) { 308 if (filters[i].id == LZMA_VLI_UNKNOWN) 309 memlimit_too_small(memory_usage); 310 311 ++i; 312 } 313 314 // Decrease the dictionary size until we meet the memory 315 // usage limit. First round down to full mebibytes. 316 lzma_options_lzma *opt = filters[i].options; 317 const uint32_t orig_dict_size = opt->dict_size; 318 opt->dict_size &= ~((UINT32_C(1) << 20) - 1); 319 while (true) { 320 // If it is below 1 MiB, auto-adjusting failed. We could be 321 // more sophisticated and scale it down even more, but let's 322 // see if many complain about this version. 323 // 324 // FIXME: Displays the scaled memory usage instead 325 // of the original. 326 if (opt->dict_size < (UINT32_C(1) << 20)) 327 memlimit_too_small(memory_usage); 328 329 memory_usage = lzma_raw_encoder_memusage(filters); 330 if (memory_usage == UINT64_MAX) 331 message_bug(); 332 333 // Accept it if it is low enough. 334 if (memory_usage <= memory_limit) 335 break; 336 337 // Otherwise 1 MiB down and try again. I hope this 338 // isn't too slow method for cases where the original 339 // dict_size is very big. 340 opt->dict_size -= UINT32_C(1) << 20; 341 } 342 343 // Tell the user that we decreased the dictionary size. 344 message(V_WARNING, _("Adjusted LZMA%c dictionary size " 345 "from %s MiB to %s MiB to not exceed " 346 "the memory usage limit of %s MiB"), 347 filters[i].id == LZMA_FILTER_LZMA2 348 ? '2' : '1', 349 uint64_to_str(orig_dict_size >> 20, 0), 350 uint64_to_str(opt->dict_size >> 20, 1), 351 uint64_to_str(round_up_to_mib(memory_limit), 2)); 352 353 return; 354 } 355 356 357 /// Return true if the data in in_buf seems to be in the .xz format. 358 static bool 359 is_format_xz(void) 360 { 361 // Specify the magic as hex to be compatible with EBCDIC systems. 362 static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 }; 363 return strm.avail_in >= sizeof(magic) 364 && memcmp(in_buf.u8, magic, sizeof(magic)) == 0; 365 } 366 367 368 /// Return true if the data in in_buf seems to be in the .lzma format. 369 static bool 370 is_format_lzma(void) 371 { 372 // The .lzma header is 13 bytes. 373 if (strm.avail_in < 13) 374 return false; 375 376 // Decode the LZMA1 properties. 377 lzma_filter filter = { .id = LZMA_FILTER_LZMA1 }; 378 if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK) 379 return false; 380 381 // A hack to ditch tons of false positives: We allow only dictionary 382 // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone 383 // created only files with 2^n, but accepts any dictionary size. 384 // If someone complains, this will be reconsidered. 385 lzma_options_lzma *opt = filter.options; 386 const uint32_t dict_size = opt->dict_size; 387 free(opt); 388 389 if (dict_size != UINT32_MAX) { 390 uint32_t d = dict_size - 1; 391 d |= d >> 2; 392 d |= d >> 3; 393 d |= d >> 4; 394 d |= d >> 8; 395 d |= d >> 16; 396 ++d; 397 if (d != dict_size || dict_size == 0) 398 return false; 399 } 400 401 // Another hack to ditch false positives: Assume that if the 402 // uncompressed size is known, it must be less than 256 GiB. 403 // Again, if someone complains, this will be reconsidered. 404 uint64_t uncompressed_size = 0; 405 for (size_t i = 0; i < 8; ++i) 406 uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8); 407 408 if (uncompressed_size != UINT64_MAX 409 && uncompressed_size > (UINT64_C(1) << 38)) 410 return false; 411 412 return true; 413 } 414 415 416 /// Detect the input file type (for now, this done only when decompressing), 417 /// and initialize an appropriate coder. Return value indicates if a normal 418 /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru 419 /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred 420 /// (CODER_INIT_ERROR). 421 static enum coder_init_ret 422 coder_init(file_pair *pair) 423 { 424 lzma_ret ret = LZMA_PROG_ERROR; 425 426 if (opt_mode == MODE_COMPRESS) { 427 switch (opt_format) { 428 case FORMAT_AUTO: 429 // args.c ensures this. 430 assert(0); 431 break; 432 433 case FORMAT_XZ: 434 #ifdef MYTHREAD_ENABLED 435 if (hardware_threads_get() > 1) 436 ret = lzma_stream_encoder_mt( 437 &strm, &mt_options); 438 else 439 #endif 440 ret = lzma_stream_encoder( 441 &strm, filters, check); 442 break; 443 444 case FORMAT_LZMA: 445 ret = lzma_alone_encoder(&strm, filters[0].options); 446 break; 447 448 case FORMAT_RAW: 449 ret = lzma_raw_encoder(&strm, filters); 450 break; 451 } 452 } else { 453 uint32_t flags = 0; 454 455 // It seems silly to warn about unsupported check if the 456 // check won't be verified anyway due to --ignore-check. 457 if (opt_ignore_check) 458 flags |= LZMA_IGNORE_CHECK; 459 else 460 flags |= LZMA_TELL_UNSUPPORTED_CHECK; 461 462 if (!opt_single_stream) 463 flags |= LZMA_CONCATENATED; 464 465 // We abuse FORMAT_AUTO to indicate unknown file format, 466 // for which we may consider passthru mode. 467 enum format_type init_format = FORMAT_AUTO; 468 469 switch (opt_format) { 470 case FORMAT_AUTO: 471 if (is_format_xz()) 472 init_format = FORMAT_XZ; 473 else if (is_format_lzma()) 474 init_format = FORMAT_LZMA; 475 break; 476 477 case FORMAT_XZ: 478 if (is_format_xz()) 479 init_format = FORMAT_XZ; 480 break; 481 482 case FORMAT_LZMA: 483 if (is_format_lzma()) 484 init_format = FORMAT_LZMA; 485 break; 486 487 case FORMAT_RAW: 488 init_format = FORMAT_RAW; 489 break; 490 } 491 492 switch (init_format) { 493 case FORMAT_AUTO: 494 // Unknown file format. If --decompress --stdout 495 // --force have been given, then we copy the input 496 // as is to stdout. Checking for MODE_DECOMPRESS 497 // is needed, because we don't want to do use 498 // passthru mode with --test. 499 if (opt_mode == MODE_DECOMPRESS 500 && opt_stdout && opt_force) 501 return CODER_INIT_PASSTHRU; 502 503 ret = LZMA_FORMAT_ERROR; 504 break; 505 506 case FORMAT_XZ: 507 ret = lzma_stream_decoder(&strm, 508 hardware_memlimit_get( 509 MODE_DECOMPRESS), flags); 510 break; 511 512 case FORMAT_LZMA: 513 ret = lzma_alone_decoder(&strm, 514 hardware_memlimit_get( 515 MODE_DECOMPRESS)); 516 break; 517 518 case FORMAT_RAW: 519 // Memory usage has already been checked in 520 // coder_set_compression_settings(). 521 ret = lzma_raw_decoder(&strm, filters); 522 break; 523 } 524 525 // Try to decode the headers. This will catch too low 526 // memory usage limit in case it happens in the first 527 // Block of the first Stream, which is where it very 528 // probably will happen if it is going to happen. 529 if (ret == LZMA_OK && init_format != FORMAT_RAW) { 530 strm.next_out = NULL; 531 strm.avail_out = 0; 532 ret = lzma_code(&strm, LZMA_RUN); 533 } 534 } 535 536 if (ret != LZMA_OK) { 537 message_error("%s: %s", pair->src_name, message_strm(ret)); 538 if (ret == LZMA_MEMLIMIT_ERROR) 539 message_mem_needed(V_ERROR, lzma_memusage(&strm)); 540 541 return CODER_INIT_ERROR; 542 } 543 544 return CODER_INIT_NORMAL; 545 } 546 547 548 /// Resolve conflicts between opt_block_size and opt_block_list in single 549 /// threaded mode. We want to default to opt_block_list, except when it is 550 /// larger than opt_block_size. If this is the case for the current Block 551 /// at *list_pos, then we break into smaller Blocks. Otherwise advance 552 /// to the next Block in opt_block_list, and break apart if needed. 553 static void 554 split_block(uint64_t *block_remaining, 555 uint64_t *next_block_remaining, 556 size_t *list_pos) 557 { 558 if (*next_block_remaining > 0) { 559 // The Block at *list_pos has previously been split up. 560 assert(hardware_threads_get() == 1); 561 assert(opt_block_size > 0); 562 assert(opt_block_list != NULL); 563 564 if (*next_block_remaining > opt_block_size) { 565 // We have to split the current Block at *list_pos 566 // into another opt_block_size length Block. 567 *block_remaining = opt_block_size; 568 } else { 569 // This is the last remaining split Block for the 570 // Block at *list_pos. 571 *block_remaining = *next_block_remaining; 572 } 573 574 *next_block_remaining -= *block_remaining; 575 576 } else { 577 // The Block at *list_pos has been finished. Go to the next 578 // entry in the list. If the end of the list has been reached, 579 // reuse the size of the last Block. 580 if (opt_block_list[*list_pos + 1] != 0) 581 ++*list_pos; 582 583 *block_remaining = opt_block_list[*list_pos]; 584 585 // If in single-threaded mode, split up the Block if needed. 586 // This is not needed in multi-threaded mode because liblzma 587 // will do this due to how threaded encoding works. 588 if (hardware_threads_get() == 1 && opt_block_size > 0 589 && *block_remaining > opt_block_size) { 590 *next_block_remaining 591 = *block_remaining - opt_block_size; 592 *block_remaining = opt_block_size; 593 } 594 } 595 } 596 597 598 /// Compress or decompress using liblzma. 599 static bool 600 coder_normal(file_pair *pair) 601 { 602 // Encoder needs to know when we have given all the input to it. 603 // The decoders need to know it too when we are using 604 // LZMA_CONCATENATED. We need to check for src_eof here, because 605 // the first input chunk has been already read if decompressing, 606 // and that may have been the only chunk we will read. 607 lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN; 608 609 lzma_ret ret; 610 611 // Assume that something goes wrong. 612 bool success = false; 613 614 // block_remaining indicates how many input bytes to encode before 615 // finishing the current .xz Block. The Block size is set with 616 // --block-size=SIZE and --block-list. They have an effect only when 617 // compressing to the .xz format. If block_remaining == UINT64_MAX, 618 // only a single block is created. 619 uint64_t block_remaining = UINT64_MAX; 620 621 // next_block_remining for when we are in single-threaded mode and 622 // the Block in --block-list is larger than the --block-size=SIZE. 623 uint64_t next_block_remaining = 0; 624 625 // Position in opt_block_list. Unused if --block-list wasn't used. 626 size_t list_pos = 0; 627 628 // Handle --block-size for single-threaded mode and the first step 629 // of --block-list. 630 if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) { 631 // --block-size doesn't do anything here in threaded mode, 632 // because the threaded encoder will take care of splitting 633 // to fixed-sized Blocks. 634 if (hardware_threads_get() == 1 && opt_block_size > 0) 635 block_remaining = opt_block_size; 636 637 // If --block-list was used, start with the first size. 638 // 639 // For threaded case, --block-size specifies how big Blocks 640 // the encoder needs to be prepared to create at maximum 641 // and --block-list will simultaneously cause new Blocks 642 // to be started at specified intervals. To keep things 643 // logical, the same is done in single-threaded mode. The 644 // output is still not identical because in single-threaded 645 // mode the size info isn't written into Block Headers. 646 if (opt_block_list != NULL) { 647 if (block_remaining < opt_block_list[list_pos]) { 648 assert(hardware_threads_get() == 1); 649 next_block_remaining = opt_block_list[list_pos] 650 - block_remaining; 651 } else { 652 block_remaining = opt_block_list[list_pos]; 653 } 654 } 655 } 656 657 strm.next_out = out_buf.u8; 658 strm.avail_out = IO_BUFFER_SIZE; 659 660 while (!user_abort) { 661 // Fill the input buffer if it is empty and we aren't 662 // flushing or finishing. 663 if (strm.avail_in == 0 && action == LZMA_RUN) { 664 strm.next_in = in_buf.u8; 665 strm.avail_in = io_read(pair, &in_buf, 666 my_min(block_remaining, 667 IO_BUFFER_SIZE)); 668 669 if (strm.avail_in == SIZE_MAX) 670 break; 671 672 if (pair->src_eof) { 673 action = LZMA_FINISH; 674 675 } else if (block_remaining != UINT64_MAX) { 676 // Start a new Block after every 677 // opt_block_size bytes of input. 678 block_remaining -= strm.avail_in; 679 if (block_remaining == 0) 680 action = LZMA_FULL_BARRIER; 681 } 682 683 if (action == LZMA_RUN && flush_needed) 684 action = LZMA_SYNC_FLUSH; 685 } 686 687 // Let liblzma do the actual work. 688 ret = lzma_code(&strm, action); 689 690 // Write out if the output buffer became full. 691 if (strm.avail_out == 0) { 692 if (opt_mode != MODE_TEST && io_write(pair, &out_buf, 693 IO_BUFFER_SIZE - strm.avail_out)) 694 break; 695 696 strm.next_out = out_buf.u8; 697 strm.avail_out = IO_BUFFER_SIZE; 698 } 699 700 if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH 701 || action == LZMA_FULL_BARRIER)) { 702 if (action == LZMA_SYNC_FLUSH) { 703 // Flushing completed. Write the pending data 704 // out immediatelly so that the reading side 705 // can decompress everything compressed so far. 706 if (io_write(pair, &out_buf, IO_BUFFER_SIZE 707 - strm.avail_out)) 708 break; 709 710 strm.next_out = out_buf.u8; 711 strm.avail_out = IO_BUFFER_SIZE; 712 713 // Set the time of the most recent flushing. 714 mytime_set_flush_time(); 715 } else { 716 // Start a new Block after LZMA_FULL_BARRIER. 717 if (opt_block_list == NULL) { 718 assert(hardware_threads_get() == 1); 719 assert(opt_block_size > 0); 720 block_remaining = opt_block_size; 721 } else { 722 split_block(&block_remaining, 723 &next_block_remaining, 724 &list_pos); 725 } 726 } 727 728 // Start a new Block after LZMA_FULL_FLUSH or continue 729 // the same block after LZMA_SYNC_FLUSH. 730 action = LZMA_RUN; 731 732 } else if (ret != LZMA_OK) { 733 // Determine if the return value indicates that we 734 // won't continue coding. 735 const bool stop = ret != LZMA_NO_CHECK 736 && ret != LZMA_UNSUPPORTED_CHECK; 737 738 if (stop) { 739 // Write the remaining bytes even if something 740 // went wrong, because that way the user gets 741 // as much data as possible, which can be good 742 // when trying to get at least some useful 743 // data out of damaged files. 744 if (opt_mode != MODE_TEST && io_write(pair, 745 &out_buf, IO_BUFFER_SIZE 746 - strm.avail_out)) 747 break; 748 } 749 750 if (ret == LZMA_STREAM_END) { 751 if (opt_single_stream) { 752 io_fix_src_pos(pair, strm.avail_in); 753 success = true; 754 break; 755 } 756 757 // Check that there is no trailing garbage. 758 // This is needed for LZMA_Alone and raw 759 // streams. 760 if (strm.avail_in == 0 && !pair->src_eof) { 761 // Try reading one more byte. 762 // Hopefully we don't get any more 763 // input, and thus pair->src_eof 764 // becomes true. 765 strm.avail_in = io_read( 766 pair, &in_buf, 1); 767 if (strm.avail_in == SIZE_MAX) 768 break; 769 770 assert(strm.avail_in == 0 771 || strm.avail_in == 1); 772 } 773 774 if (strm.avail_in == 0) { 775 assert(pair->src_eof); 776 success = true; 777 break; 778 } 779 780 // We hadn't reached the end of the file. 781 ret = LZMA_DATA_ERROR; 782 assert(stop); 783 } 784 785 // If we get here and stop is true, something went 786 // wrong and we print an error. Otherwise it's just 787 // a warning and coding can continue. 788 if (stop) { 789 message_error("%s: %s", pair->src_name, 790 message_strm(ret)); 791 } else { 792 message_warning("%s: %s", pair->src_name, 793 message_strm(ret)); 794 795 // When compressing, all possible errors set 796 // stop to true. 797 assert(opt_mode != MODE_COMPRESS); 798 } 799 800 if (ret == LZMA_MEMLIMIT_ERROR) { 801 // Display how much memory it would have 802 // actually needed. 803 message_mem_needed(V_ERROR, 804 lzma_memusage(&strm)); 805 } 806 807 if (stop) 808 break; 809 } 810 811 // Show progress information under certain conditions. 812 message_progress_update(); 813 } 814 815 return success; 816 } 817 818 819 /// Copy from input file to output file without processing the data in any 820 /// way. This is used only when trying to decompress unrecognized files 821 /// with --decompress --stdout --force, so the output is always stdout. 822 static bool 823 coder_passthru(file_pair *pair) 824 { 825 while (strm.avail_in != 0) { 826 if (user_abort) 827 return false; 828 829 if (io_write(pair, &in_buf, strm.avail_in)) 830 return false; 831 832 strm.total_in += strm.avail_in; 833 strm.total_out = strm.total_in; 834 message_progress_update(); 835 836 strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); 837 if (strm.avail_in == SIZE_MAX) 838 return false; 839 } 840 841 return true; 842 } 843 844 845 extern void 846 coder_run(const char *filename) 847 { 848 // Set and possibly print the filename for the progress message. 849 message_filename(filename); 850 851 // Try to open the input file. 852 file_pair *pair = io_open_src(filename); 853 if (pair == NULL) 854 return; 855 856 // Assume that something goes wrong. 857 bool success = false; 858 859 if (opt_mode == MODE_COMPRESS) { 860 strm.next_in = NULL; 861 strm.avail_in = 0; 862 } else { 863 // Read the first chunk of input data. This is needed 864 // to detect the input file type. 865 strm.next_in = in_buf.u8; 866 strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE); 867 } 868 869 if (strm.avail_in != SIZE_MAX) { 870 // Initialize the coder. This will detect the file format 871 // and, in decompression or testing mode, check the memory 872 // usage of the first Block too. This way we don't try to 873 // open the destination file if we see that coding wouldn't 874 // work at all anyway. This also avoids deleting the old 875 // "target" file if --force was used. 876 const enum coder_init_ret init_ret = coder_init(pair); 877 878 if (init_ret != CODER_INIT_ERROR && !user_abort) { 879 // Don't open the destination file when --test 880 // is used. 881 if (opt_mode == MODE_TEST || !io_open_dest(pair)) { 882 // Remember the current time. It is needed 883 // for progress indicator and for timed 884 // flushing. 885 mytime_set_start_time(); 886 887 // Initialize the progress indicator. 888 const uint64_t in_size 889 = pair->src_st.st_size <= 0 890 ? 0 : pair->src_st.st_size; 891 message_progress_start(&strm, in_size); 892 893 // Do the actual coding or passthru. 894 if (init_ret == CODER_INIT_NORMAL) 895 success = coder_normal(pair); 896 else 897 success = coder_passthru(pair); 898 899 message_progress_end(success); 900 } 901 } 902 } 903 904 // Close the file pair. It needs to know if coding was successful to 905 // know if the source or target file should be unlinked. 906 io_close(pair, success); 907 908 return; 909 } 910 911 912 #ifndef NDEBUG 913 extern void 914 coder_free(void) 915 { 916 lzma_end(&strm); 917 return; 918 } 919 #endif 920