1 /*- 2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 28 #ifdef HAVE_ERRNO_H 29 #include <errno.h> 30 #endif 31 #ifdef HAVE_LIMITS_H 32 #include <limits.h> 33 #endif 34 #ifdef HAVE_STDLIB_H 35 #include <stdlib.h> 36 #endif 37 #ifdef HAVE_STRING_H 38 #include <string.h> 39 #endif 40 41 #include "archive.h" 42 #include "archive_entry.h" 43 #include "archive_entry_locale.h" 44 #include "archive_private.h" 45 #include "archive_read_private.h" 46 #include "archive_endian.h" 47 48 49 #define MAXMATCH 256 /* Maximum match length. */ 50 #define MINMATCH 3 /* Minimum match length. */ 51 /* 52 * Literal table format: 53 * +0 +256 +510 54 * +---------------+-------------------------+ 55 * | literal code | match length | 56 * | 0 ... 255 | MINMATCH ... MAXMATCH | 57 * +---------------+-------------------------+ 58 * <--- LT_BITLEN_SIZE ---> 59 */ 60 /* Literal table size. */ 61 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1) 62 /* Position table size. 63 * Note: this used for both position table and pre literal table.*/ 64 #define PT_BITLEN_SIZE (3 + 16) 65 66 struct lzh_dec { 67 /* Decoding status. */ 68 int state; 69 70 /* 71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded 72 * data. 73 */ 74 int w_size; 75 int w_mask; 76 /* Window buffer, which is a loop buffer. */ 77 unsigned char *w_buff; 78 /* The insert position to the window. */ 79 int w_pos; 80 /* The position where we can copy decoded code from the window. */ 81 int copy_pos; 82 /* The length how many bytes we can copy decoded code from 83 * the window. */ 84 int copy_len; 85 86 /* 87 * Bit stream reader. 88 */ 89 struct lzh_br { 90 #define CACHE_TYPE uint64_t 91 #define CACHE_BITS (8 * sizeof(CACHE_TYPE)) 92 /* Cache buffer. */ 93 CACHE_TYPE cache_buffer; 94 /* Indicates how many bits avail in cache_buffer. */ 95 int cache_avail; 96 } br; 97 98 /* 99 * Huffman coding. 100 */ 101 struct huffman { 102 int len_size; 103 int len_avail; 104 int len_bits; 105 int freq[17]; 106 unsigned char *bitlen; 107 108 /* 109 * Use a index table. It's faster than searching a huffman 110 * coding tree, which is a binary tree. But a use of a large 111 * index table causes L1 cache read miss many times. 112 */ 113 #define HTBL_BITS 10 114 int max_bits; 115 int shift_bits; 116 int tbl_bits; 117 int tree_used; 118 int tree_avail; 119 /* Direct access table. */ 120 uint16_t *tbl; 121 /* Binary tree table for extra bits over the direct access. */ 122 struct htree_t { 123 uint16_t left; 124 uint16_t right; 125 } *tree; 126 } lt, pt; 127 128 int blocks_avail; 129 int pos_pt_len_size; 130 int pos_pt_len_bits; 131 int literal_pt_len_size; 132 int literal_pt_len_bits; 133 int reading_position; 134 int loop; 135 int error; 136 }; 137 138 struct lzh_stream { 139 const unsigned char *next_in; 140 int avail_in; 141 int64_t total_in; 142 const unsigned char *ref_ptr; 143 int avail_out; 144 int64_t total_out; 145 struct lzh_dec *ds; 146 }; 147 148 struct lha { 149 /* entry_bytes_remaining is the number of bytes we expect. */ 150 int64_t entry_offset; 151 int64_t entry_bytes_remaining; 152 int64_t entry_unconsumed; 153 uint16_t entry_crc_calculated; 154 155 size_t header_size; /* header size */ 156 unsigned char level; /* header level */ 157 char method[3]; /* compress type */ 158 int64_t compsize; /* compressed data size */ 159 int64_t origsize; /* original file size */ 160 int setflag; 161 #define BIRTHTIME_IS_SET 1 162 #define ATIME_IS_SET 2 163 #define UNIX_MODE_IS_SET 4 164 #define CRC_IS_SET 8 165 time_t birthtime; 166 long birthtime_tv_nsec; 167 time_t mtime; 168 long mtime_tv_nsec; 169 time_t atime; 170 long atime_tv_nsec; 171 mode_t mode; 172 int64_t uid; 173 int64_t gid; 174 struct archive_string uname; 175 struct archive_string gname; 176 uint16_t header_crc; 177 uint16_t crc; 178 /* dirname and filename could be in different codepages */ 179 struct archive_string_conv *sconv_dir; 180 struct archive_string_conv *sconv_fname; 181 struct archive_string_conv *opt_sconv; 182 183 struct archive_string dirname; 184 struct archive_string filename; 185 struct archive_wstring ws; 186 187 unsigned char dos_attr; 188 189 /* Flag to mark progress that an archive was read their first header.*/ 190 char found_first_header; 191 /* Flag to mark that indicates an empty directory. */ 192 char directory; 193 194 /* Flags to mark progress of decompression. */ 195 char decompress_init; 196 char end_of_entry; 197 char end_of_entry_cleanup; 198 char entry_is_compressed; 199 200 char format_name[64]; 201 202 struct lzh_stream strm; 203 }; 204 205 /* 206 * LHA header common member offset. 207 */ 208 #define H_METHOD_OFFSET 2 /* Compress type. */ 209 #define H_ATTR_OFFSET 19 /* DOS attribute. */ 210 #define H_LEVEL_OFFSET 20 /* Header Level. */ 211 #define H_SIZE 22 /* Minimum header size. */ 212 213 static int archive_read_format_lha_bid(struct archive_read *, int); 214 static int archive_read_format_lha_options(struct archive_read *, 215 const char *, const char *); 216 static int archive_read_format_lha_read_header(struct archive_read *, 217 struct archive_entry *); 218 static int archive_read_format_lha_read_data(struct archive_read *, 219 const void **, size_t *, int64_t *); 220 static int archive_read_format_lha_read_data_skip(struct archive_read *); 221 static int archive_read_format_lha_cleanup(struct archive_read *); 222 223 static void lha_replace_path_separator(struct lha *, 224 struct archive_entry *); 225 static int lha_read_file_header_0(struct archive_read *, struct lha *); 226 static int lha_read_file_header_1(struct archive_read *, struct lha *); 227 static int lha_read_file_header_2(struct archive_read *, struct lha *); 228 static int lha_read_file_header_3(struct archive_read *, struct lha *); 229 static int lha_read_file_extended_header(struct archive_read *, 230 struct lha *, uint16_t *, int, size_t, size_t *); 231 static size_t lha_check_header_format(const void *); 232 static int lha_skip_sfx(struct archive_read *); 233 static time_t lha_dos_time(const unsigned char *); 234 static time_t lha_win_time(uint64_t, long *); 235 static unsigned char lha_calcsum(unsigned char, const void *, 236 int, size_t); 237 static int lha_parse_linkname(struct archive_wstring *, 238 struct archive_wstring *); 239 static int lha_read_data_none(struct archive_read *, const void **, 240 size_t *, int64_t *); 241 static int lha_read_data_lzh(struct archive_read *, const void **, 242 size_t *, int64_t *); 243 static void lha_crc16_init(void); 244 static uint16_t lha_crc16(uint16_t, const void *, size_t); 245 static int lzh_decode_init(struct lzh_stream *, const char *); 246 static void lzh_decode_free(struct lzh_stream *); 247 static int lzh_decode(struct lzh_stream *, int); 248 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *); 249 static int lzh_huffman_init(struct huffman *, size_t, int); 250 static void lzh_huffman_free(struct huffman *); 251 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end); 252 static int lzh_make_fake_table(struct huffman *, uint16_t); 253 static int lzh_make_huffman_table(struct huffman *); 254 static inline int lzh_decode_huffman(struct huffman *, unsigned); 255 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int); 256 257 258 int 259 archive_read_support_format_lha(struct archive *_a) 260 { 261 struct archive_read *a = (struct archive_read *)_a; 262 struct lha *lha; 263 int r; 264 265 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 266 ARCHIVE_STATE_NEW, "archive_read_support_format_lha"); 267 268 lha = (struct lha *)calloc(1, sizeof(*lha)); 269 if (lha == NULL) { 270 archive_set_error(&a->archive, ENOMEM, 271 "Can't allocate lha data"); 272 return (ARCHIVE_FATAL); 273 } 274 archive_string_init(&lha->ws); 275 276 r = __archive_read_register_format(a, 277 lha, 278 "lha", 279 archive_read_format_lha_bid, 280 archive_read_format_lha_options, 281 archive_read_format_lha_read_header, 282 archive_read_format_lha_read_data, 283 archive_read_format_lha_read_data_skip, 284 NULL, 285 archive_read_format_lha_cleanup, 286 NULL, 287 NULL); 288 289 if (r != ARCHIVE_OK) 290 free(lha); 291 return (ARCHIVE_OK); 292 } 293 294 static size_t 295 lha_check_header_format(const void *h) 296 { 297 const unsigned char *p = h; 298 size_t next_skip_bytes; 299 300 switch (p[H_METHOD_OFFSET+3]) { 301 /* 302 * "-lh0-" ... "-lh7-" "-lhd-" 303 * "-lzs-" "-lz5-" 304 */ 305 case '0': case '1': case '2': case '3': 306 case '4': case '5': case '6': case '7': 307 case 'd': 308 case 's': 309 next_skip_bytes = 4; 310 311 /* b0 == 0 means the end of an LHa archive file. */ 312 if (p[0] == 0) 313 break; 314 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l' 315 || p[H_METHOD_OFFSET+4] != '-') 316 break; 317 318 if (p[H_METHOD_OFFSET+2] == 'h') { 319 /* "-lh?-" */ 320 if (p[H_METHOD_OFFSET+3] == 's') 321 break; 322 if (p[H_LEVEL_OFFSET] == 0) 323 return (0); 324 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20) 325 return (0); 326 } 327 if (p[H_METHOD_OFFSET+2] == 'z') { 328 /* LArc extensions: -lzs-,-lz4- and -lz5- */ 329 if (p[H_LEVEL_OFFSET] != 0) 330 break; 331 if (p[H_METHOD_OFFSET+3] == 's' 332 || p[H_METHOD_OFFSET+3] == '4' 333 || p[H_METHOD_OFFSET+3] == '5') 334 return (0); 335 } 336 break; 337 case 'h': next_skip_bytes = 1; break; 338 case 'z': next_skip_bytes = 1; break; 339 case 'l': next_skip_bytes = 2; break; 340 case '-': next_skip_bytes = 3; break; 341 default : next_skip_bytes = 4; break; 342 } 343 344 return (next_skip_bytes); 345 } 346 347 static int 348 archive_read_format_lha_bid(struct archive_read *a, int best_bid) 349 { 350 const char *p; 351 const void *buff; 352 ssize_t bytes_avail, offset, window; 353 size_t next; 354 355 /* If there's already a better bid than we can ever 356 make, don't bother testing. */ 357 if (best_bid > 30) 358 return (-1); 359 360 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) 361 return (-1); 362 363 if (lha_check_header_format(p) == 0) 364 return (30); 365 366 if (p[0] == 'M' && p[1] == 'Z') { 367 /* PE file */ 368 offset = 0; 369 window = 4096; 370 while (offset < (1024 * 20)) { 371 buff = __archive_read_ahead(a, offset + window, 372 &bytes_avail); 373 if (buff == NULL) { 374 /* Remaining bytes are less than window. */ 375 window >>= 1; 376 if (window < (H_SIZE + 3)) 377 return (0); 378 continue; 379 } 380 p = (const char *)buff + offset; 381 while (p + H_SIZE < (const char *)buff + bytes_avail) { 382 if ((next = lha_check_header_format(p)) == 0) 383 return (30); 384 p += next; 385 } 386 offset = p - (const char *)buff; 387 } 388 } 389 return (0); 390 } 391 392 static int 393 archive_read_format_lha_options(struct archive_read *a, 394 const char *key, const char *val) 395 { 396 struct lha *lha; 397 int ret = ARCHIVE_FAILED; 398 399 lha = (struct lha *)(a->format->data); 400 if (strcmp(key, "hdrcharset") == 0) { 401 if (val == NULL || val[0] == 0) 402 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 403 "lha: hdrcharset option needs a character-set name"); 404 else { 405 lha->opt_sconv = 406 archive_string_conversion_from_charset( 407 &a->archive, val, 0); 408 if (lha->opt_sconv != NULL) 409 ret = ARCHIVE_OK; 410 else 411 ret = ARCHIVE_FATAL; 412 } 413 return (ret); 414 } 415 416 /* Note: The "warn" return is just to inform the options 417 * supervisor that we didn't handle it. It will generate 418 * a suitable error if no one used this option. */ 419 return (ARCHIVE_WARN); 420 } 421 422 static int 423 lha_skip_sfx(struct archive_read *a) 424 { 425 const void *h; 426 const char *p, *q; 427 size_t next, skip; 428 ssize_t bytes, window; 429 430 window = 4096; 431 for (;;) { 432 h = __archive_read_ahead(a, window, &bytes); 433 if (h == NULL) { 434 /* Remaining bytes are less than window. */ 435 window >>= 1; 436 if (window < (H_SIZE + 3)) 437 goto fatal; 438 continue; 439 } 440 if (bytes < H_SIZE) 441 goto fatal; 442 p = h; 443 q = p + bytes; 444 445 /* 446 * Scan ahead until we find something that looks 447 * like the lha header. 448 */ 449 while (p + H_SIZE < q) { 450 if ((next = lha_check_header_format(p)) == 0) { 451 skip = p - (const char *)h; 452 __archive_read_consume(a, skip); 453 return (ARCHIVE_OK); 454 } 455 p += next; 456 } 457 skip = p - (const char *)h; 458 __archive_read_consume(a, skip); 459 } 460 fatal: 461 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 462 "Couldn't find out LHa header"); 463 return (ARCHIVE_FATAL); 464 } 465 466 static int 467 truncated_error(struct archive_read *a) 468 { 469 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 470 "Truncated LHa header"); 471 return (ARCHIVE_FATAL); 472 } 473 474 static int 475 archive_read_format_lha_read_header(struct archive_read *a, 476 struct archive_entry *entry) 477 { 478 struct archive_wstring linkname; 479 struct archive_wstring pathname; 480 struct lha *lha; 481 const unsigned char *p; 482 const char *signature; 483 int err; 484 struct archive_mstring conv_buffer; 485 const wchar_t *conv_buffer_p; 486 487 lha_crc16_init(); 488 489 a->archive.archive_format = ARCHIVE_FORMAT_LHA; 490 if (a->archive.archive_format_name == NULL) 491 a->archive.archive_format_name = "lha"; 492 493 lha = (struct lha *)(a->format->data); 494 lha->decompress_init = 0; 495 lha->end_of_entry = 0; 496 lha->end_of_entry_cleanup = 0; 497 lha->entry_unconsumed = 0; 498 499 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) { 500 /* 501 * LHa archiver added 0 to the tail of its archive file as 502 * the mark of the end of the archive. 503 */ 504 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL); 505 if (signature == NULL || signature[0] == 0) 506 return (ARCHIVE_EOF); 507 return (truncated_error(a)); 508 } 509 510 signature = (const char *)p; 511 if (lha->found_first_header == 0 && 512 signature[0] == 'M' && signature[1] == 'Z') { 513 /* This is an executable? Must be self-extracting... */ 514 err = lha_skip_sfx(a); 515 if (err < ARCHIVE_WARN) 516 return (err); 517 518 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL) 519 return (truncated_error(a)); 520 signature = (const char *)p; 521 } 522 /* signature[0] == 0 means the end of an LHa archive file. */ 523 if (signature[0] == 0) 524 return (ARCHIVE_EOF); 525 526 /* 527 * Check the header format and method type. 528 */ 529 if (lha_check_header_format(p) != 0) { 530 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 531 "Bad LHa file"); 532 return (ARCHIVE_FATAL); 533 } 534 535 /* We've found the first header. */ 536 lha->found_first_header = 1; 537 /* Set a default value and common data */ 538 lha->header_size = 0; 539 lha->level = p[H_LEVEL_OFFSET]; 540 lha->method[0] = p[H_METHOD_OFFSET+1]; 541 lha->method[1] = p[H_METHOD_OFFSET+2]; 542 lha->method[2] = p[H_METHOD_OFFSET+3]; 543 if (memcmp(lha->method, "lhd", 3) == 0) 544 lha->directory = 1; 545 else 546 lha->directory = 0; 547 if (memcmp(lha->method, "lh0", 3) == 0 || 548 memcmp(lha->method, "lz4", 3) == 0) 549 lha->entry_is_compressed = 0; 550 else 551 lha->entry_is_compressed = 1; 552 553 lha->compsize = 0; 554 lha->origsize = 0; 555 lha->setflag = 0; 556 lha->birthtime = 0; 557 lha->birthtime_tv_nsec = 0; 558 lha->mtime = 0; 559 lha->mtime_tv_nsec = 0; 560 lha->atime = 0; 561 lha->atime_tv_nsec = 0; 562 lha->mode = (lha->directory)? 0777 : 0666; 563 lha->uid = 0; 564 lha->gid = 0; 565 archive_string_empty(&lha->dirname); 566 archive_string_empty(&lha->filename); 567 lha->dos_attr = 0; 568 if (lha->opt_sconv != NULL) { 569 lha->sconv_dir = lha->opt_sconv; 570 lha->sconv_fname = lha->opt_sconv; 571 } else { 572 lha->sconv_dir = NULL; 573 lha->sconv_fname = NULL; 574 } 575 576 switch (p[H_LEVEL_OFFSET]) { 577 case 0: 578 err = lha_read_file_header_0(a, lha); 579 break; 580 case 1: 581 err = lha_read_file_header_1(a, lha); 582 break; 583 case 2: 584 err = lha_read_file_header_2(a, lha); 585 break; 586 case 3: 587 err = lha_read_file_header_3(a, lha); 588 break; 589 default: 590 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 591 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]); 592 err = ARCHIVE_FATAL; 593 break; 594 } 595 if (err < ARCHIVE_WARN) 596 return (err); 597 598 599 if (!lha->directory && archive_strlen(&lha->filename) == 0) 600 /* The filename has not been set */ 601 return (truncated_error(a)); 602 603 /* 604 * Make a pathname from a dirname and a filename, after converting to Unicode. 605 * This is because codepages might differ between dirname and filename. 606 */ 607 archive_string_init(&pathname); 608 archive_string_init(&linkname); 609 archive_string_init(&conv_buffer.aes_mbs); 610 archive_string_init(&conv_buffer.aes_mbs_in_locale); 611 archive_string_init(&conv_buffer.aes_utf8); 612 archive_string_init(&conv_buffer.aes_wcs); 613 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->dirname.s, lha->dirname.length, lha->sconv_dir)) { 614 archive_set_error(&a->archive, 615 ARCHIVE_ERRNO_FILE_FORMAT, 616 "Pathname cannot be converted " 617 "from %s to Unicode.", 618 archive_string_conversion_charset_name(lha->sconv_dir)); 619 err = ARCHIVE_FATAL; 620 } else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p)) 621 err = ARCHIVE_FATAL; 622 if (err == ARCHIVE_FATAL) { 623 archive_mstring_clean(&conv_buffer); 624 archive_wstring_free(&pathname); 625 archive_wstring_free(&linkname); 626 return (err); 627 } 628 archive_wstring_copy(&pathname, &conv_buffer.aes_wcs); 629 630 archive_string_empty(&conv_buffer.aes_mbs); 631 archive_string_empty(&conv_buffer.aes_mbs_in_locale); 632 archive_string_empty(&conv_buffer.aes_utf8); 633 archive_wstring_empty(&conv_buffer.aes_wcs); 634 if (0 != archive_mstring_copy_mbs_len_l(&conv_buffer, lha->filename.s, lha->filename.length, lha->sconv_fname)) { 635 archive_set_error(&a->archive, 636 ARCHIVE_ERRNO_FILE_FORMAT, 637 "Pathname cannot be converted " 638 "from %s to Unicode.", 639 archive_string_conversion_charset_name(lha->sconv_fname)); 640 err = ARCHIVE_FATAL; 641 } 642 else if (0 != archive_mstring_get_wcs(&a->archive, &conv_buffer, &conv_buffer_p)) 643 err = ARCHIVE_FATAL; 644 if (err == ARCHIVE_FATAL) { 645 archive_mstring_clean(&conv_buffer); 646 archive_wstring_free(&pathname); 647 archive_wstring_free(&linkname); 648 return (err); 649 } 650 archive_wstring_concat(&pathname, &conv_buffer.aes_wcs); 651 archive_mstring_clean(&conv_buffer); 652 653 if ((lha->mode & AE_IFMT) == AE_IFLNK) { 654 /* 655 * Extract the symlink-name if it's included in the pathname. 656 */ 657 if (!lha_parse_linkname(&linkname, &pathname)) { 658 /* We couldn't get the symlink-name. */ 659 archive_set_error(&a->archive, 660 ARCHIVE_ERRNO_FILE_FORMAT, 661 "Unknown symlink-name"); 662 archive_wstring_free(&pathname); 663 archive_wstring_free(&linkname); 664 return (ARCHIVE_FAILED); 665 } 666 } else { 667 /* 668 * Make sure a file-type is set. 669 * The mode has been overridden if it is in the extended data. 670 */ 671 lha->mode = (lha->mode & ~AE_IFMT) | 672 ((lha->directory)? AE_IFDIR: AE_IFREG); 673 } 674 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 && 675 (lha->dos_attr & 1) != 0) 676 lha->mode &= ~(0222);/* read only. */ 677 678 /* 679 * Set basic file parameters. 680 */ 681 archive_entry_copy_pathname_w(entry, pathname.s); 682 archive_wstring_free(&pathname); 683 if (archive_strlen(&linkname) > 0) { 684 archive_entry_copy_symlink_w(entry, linkname.s); 685 } else 686 archive_entry_set_symlink(entry, NULL); 687 archive_wstring_free(&linkname); 688 /* 689 * When a header level is 0, there is a possibility that 690 * a pathname and a symlink has '\' character, a directory 691 * separator in DOS/Windows. So we should convert it to '/'. 692 */ 693 if (p[H_LEVEL_OFFSET] == 0) 694 lha_replace_path_separator(lha, entry); 695 696 archive_entry_set_mode(entry, lha->mode); 697 archive_entry_set_uid(entry, lha->uid); 698 archive_entry_set_gid(entry, lha->gid); 699 if (archive_strlen(&lha->uname) > 0) 700 archive_entry_set_uname(entry, lha->uname.s); 701 if (archive_strlen(&lha->gname) > 0) 702 archive_entry_set_gname(entry, lha->gname.s); 703 if (lha->setflag & BIRTHTIME_IS_SET) { 704 archive_entry_set_birthtime(entry, lha->birthtime, 705 lha->birthtime_tv_nsec); 706 archive_entry_set_ctime(entry, lha->birthtime, 707 lha->birthtime_tv_nsec); 708 } else { 709 archive_entry_unset_birthtime(entry); 710 archive_entry_unset_ctime(entry); 711 } 712 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec); 713 if (lha->setflag & ATIME_IS_SET) 714 archive_entry_set_atime(entry, lha->atime, 715 lha->atime_tv_nsec); 716 else 717 archive_entry_unset_atime(entry); 718 if (lha->directory || archive_entry_symlink(entry) != NULL) 719 archive_entry_unset_size(entry); 720 else 721 archive_entry_set_size(entry, lha->origsize); 722 723 /* 724 * Prepare variables used to read a file content. 725 */ 726 lha->entry_bytes_remaining = lha->compsize; 727 if (lha->entry_bytes_remaining < 0) { 728 archive_set_error(&a->archive, 729 ARCHIVE_ERRNO_FILE_FORMAT, 730 "Invalid LHa entry size"); 731 return (ARCHIVE_FATAL); 732 } 733 lha->entry_offset = 0; 734 lha->entry_crc_calculated = 0; 735 736 /* 737 * This file does not have a content. 738 */ 739 if (lha->directory || lha->compsize == 0) 740 lha->end_of_entry = 1; 741 742 snprintf(lha->format_name, sizeof(lha->format_name), "lha -%c%c%c-", 743 lha->method[0], lha->method[1], lha->method[2]); 744 a->archive.archive_format_name = lha->format_name; 745 746 return (err); 747 } 748 749 /* 750 * Replace a DOS path separator '\' by a character '/'. 751 * Some multi-byte character set have a character '\' in its second byte. 752 */ 753 static void 754 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry) 755 { 756 const wchar_t *wp; 757 size_t i; 758 759 if ((wp = archive_entry_pathname_w(entry)) != NULL) { 760 archive_wstrcpy(&(lha->ws), wp); 761 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 762 if (lha->ws.s[i] == L'\\') 763 lha->ws.s[i] = L'/'; 764 } 765 archive_entry_copy_pathname_w(entry, lha->ws.s); 766 } 767 768 if ((wp = archive_entry_symlink_w(entry)) != NULL) { 769 archive_wstrcpy(&(lha->ws), wp); 770 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 771 if (lha->ws.s[i] == L'\\') 772 lha->ws.s[i] = L'/'; 773 } 774 archive_entry_copy_symlink_w(entry, lha->ws.s); 775 } 776 } 777 778 /* 779 * Header 0 format 780 * 781 * +0 +1 +2 +7 +11 782 * +---------------+----------+----------------+-------------------+ 783 * |header size(*1)|header sum|compression type|compressed size(*2)| 784 * +---------------+----------+----------------+-------------------+ 785 * <---------------------(*1)----------* 786 * 787 * +11 +15 +17 +19 +20 +21 788 * +-----------------+---------+---------+--------------+----------------+ 789 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)| 790 * +-----------------+---------+---------+--------------+----------------+ 791 * *--------------------------------(*1)---------------------------------* 792 * 793 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4) 794 * +---------------+---------+----------+----------------+------------------+ 795 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data | 796 * +---------------+---------+----------+----------------+------------------+ 797 * <--(*3)-> <------(*2)------> 798 * *----------------------(*1)--------------------------> 799 * 800 */ 801 #define H0_HEADER_SIZE_OFFSET 0 802 #define H0_HEADER_SUM_OFFSET 1 803 #define H0_COMP_SIZE_OFFSET 7 804 #define H0_ORIG_SIZE_OFFSET 11 805 #define H0_DOS_TIME_OFFSET 15 806 #define H0_NAME_LEN_OFFSET 21 807 #define H0_FILE_NAME_OFFSET 22 808 #define H0_FIXED_SIZE 24 809 static int 810 lha_read_file_header_0(struct archive_read *a, struct lha *lha) 811 { 812 const unsigned char *p; 813 int extdsize, namelen; 814 unsigned char headersum, sum_calculated; 815 816 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL) 817 return (truncated_error(a)); 818 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2; 819 headersum = p[H0_HEADER_SUM_OFFSET]; 820 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET); 821 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET); 822 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET); 823 namelen = p[H0_NAME_LEN_OFFSET]; 824 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen; 825 if ((namelen > 221 || extdsize < 0) && extdsize != -2) { 826 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 827 "Invalid LHa header"); 828 return (ARCHIVE_FATAL); 829 } 830 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 831 return (truncated_error(a)); 832 833 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen); 834 /* When extdsize == -2, A CRC16 value is not present in the header. */ 835 if (extdsize >= 0) { 836 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen); 837 lha->setflag |= CRC_IS_SET; 838 } 839 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 840 841 /* Read an extended header */ 842 if (extdsize > 0) { 843 /* This extended data is set by 'LHa for UNIX' only. 844 * Maybe fixed size. 845 */ 846 p += H0_FILE_NAME_OFFSET + namelen + 2; 847 if (p[0] == 'U' && extdsize == 12) { 848 /* p[1] is a minor version. */ 849 lha->mtime = archive_le32dec(&p[2]); 850 lha->mode = archive_le16dec(&p[6]); 851 lha->uid = archive_le16dec(&p[8]); 852 lha->gid = archive_le16dec(&p[10]); 853 lha->setflag |= UNIX_MODE_IS_SET; 854 } 855 } 856 __archive_read_consume(a, lha->header_size); 857 858 if (sum_calculated != headersum) { 859 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 860 "LHa header sum error"); 861 return (ARCHIVE_FATAL); 862 } 863 864 return (ARCHIVE_OK); 865 } 866 867 /* 868 * Header 1 format 869 * 870 * +0 +1 +2 +7 +11 871 * +---------------+----------+----------------+-------------+ 872 * |header size(*1)|header sum|compression type|skip size(*2)| 873 * +---------------+----------+----------------+-------------+ 874 * <---------------(*1)----------* 875 * 876 * +11 +15 +17 +19 +20 +21 877 * +-----------------+---------+---------+--------------+----------------+ 878 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)| 879 * +-----------------+---------+---------+--------------+----------------+ 880 * *-------------------------------(*1)----------------------------------* 881 * 882 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4) 883 * +---------------+---------+----------+-----------+-----------+ 884 * |name length(*3)|file name|file CRC16| creator |padding(*4)| 885 * +---------------+---------+----------+-----------+-----------+ 886 * <--(*3)-> 887 * *----------------------------(*1)----------------------------* 888 * 889 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5) 890 * +----------------+---------------------+------------------------+ 891 * |next header size| extended header(*5) | compressed data | 892 * +----------------+---------------------+------------------------+ 893 * *------(*1)-----> <--------------------(*2)--------------------> 894 */ 895 #define H1_HEADER_SIZE_OFFSET 0 896 #define H1_HEADER_SUM_OFFSET 1 897 #define H1_COMP_SIZE_OFFSET 7 898 #define H1_ORIG_SIZE_OFFSET 11 899 #define H1_DOS_TIME_OFFSET 15 900 #define H1_NAME_LEN_OFFSET 21 901 #define H1_FILE_NAME_OFFSET 22 902 #define H1_FIXED_SIZE 27 903 static int 904 lha_read_file_header_1(struct archive_read *a, struct lha *lha) 905 { 906 const unsigned char *p; 907 size_t extdsize; 908 int i, err, err2; 909 int namelen, padding; 910 unsigned char headersum, sum_calculated; 911 912 err = ARCHIVE_OK; 913 914 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL) 915 return (truncated_error(a)); 916 917 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2; 918 headersum = p[H1_HEADER_SUM_OFFSET]; 919 /* Note: An extended header size is included in a compsize. */ 920 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET); 921 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET); 922 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET); 923 namelen = p[H1_NAME_LEN_OFFSET]; 924 /* Calculate a padding size. The result will be normally 0 only(?) */ 925 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen; 926 927 if (namelen > 230 || padding < 0) 928 goto invalid; 929 930 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 931 return (truncated_error(a)); 932 933 for (i = 0; i < namelen; i++) { 934 if (p[i + H1_FILE_NAME_OFFSET] == 0xff) 935 goto invalid;/* Invalid filename. */ 936 } 937 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen); 938 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen); 939 lha->setflag |= CRC_IS_SET; 940 941 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 942 /* Consume used bytes but not include `next header size' data 943 * since it will be consumed in lha_read_file_extended_header(). */ 944 __archive_read_consume(a, lha->header_size - 2); 945 946 /* Read extended headers */ 947 err2 = lha_read_file_extended_header(a, lha, NULL, 2, 948 (size_t)(lha->compsize + 2), &extdsize); 949 if (err2 < ARCHIVE_WARN) 950 return (err2); 951 if (err2 < err) 952 err = err2; 953 /* Get a real compressed file size. */ 954 lha->compsize -= extdsize - 2; 955 956 if (lha->compsize < 0) 957 goto invalid; /* Invalid compressed file size */ 958 959 if (sum_calculated != headersum) { 960 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 961 "LHa header sum error"); 962 return (ARCHIVE_FATAL); 963 } 964 return (err); 965 invalid: 966 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 967 "Invalid LHa header"); 968 return (ARCHIVE_FATAL); 969 } 970 971 /* 972 * Header 2 format 973 * 974 * +0 +2 +7 +11 +15 975 * +---------------+----------------+-------------------+-----------------+ 976 * |header size(*1)|compression type|compressed size(*2)|uncompressed size| 977 * +---------------+----------------+-------------------+-----------------+ 978 * <--------------------------------(*1)---------------------------------* 979 * 980 * +15 +19 +20 +21 +23 +24 981 * +-----------------+------------+----------------+----------+-----------+ 982 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator | 983 * +-----------------+------------+----------------+----------+-----------+ 984 * *---------------------------------(*1)---------------------------------* 985 * 986 * +24 +26 +26+(*3) +26+(*3)+(*4) 987 * +----------------+-------------------+-------------+-------------------+ 988 * |next header size|extended header(*3)| padding(*4) | compressed data | 989 * +----------------+-------------------+-------------+-------------------+ 990 * *--------------------------(*1)-------------------> <------(*2)-------> 991 * 992 */ 993 #define H2_HEADER_SIZE_OFFSET 0 994 #define H2_COMP_SIZE_OFFSET 7 995 #define H2_ORIG_SIZE_OFFSET 11 996 #define H2_TIME_OFFSET 15 997 #define H2_CRC_OFFSET 21 998 #define H2_FIXED_SIZE 24 999 static int 1000 lha_read_file_header_2(struct archive_read *a, struct lha *lha) 1001 { 1002 const unsigned char *p; 1003 size_t extdsize; 1004 int err, padding; 1005 uint16_t header_crc; 1006 1007 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL) 1008 return (truncated_error(a)); 1009 1010 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET); 1011 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET); 1012 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET); 1013 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET); 1014 lha->crc = archive_le16dec(p + H2_CRC_OFFSET); 1015 lha->setflag |= CRC_IS_SET; 1016 1017 if (lha->header_size < H2_FIXED_SIZE) { 1018 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1019 "Invalid LHa header size"); 1020 return (ARCHIVE_FATAL); 1021 } 1022 1023 header_crc = lha_crc16(0, p, H2_FIXED_SIZE); 1024 __archive_read_consume(a, H2_FIXED_SIZE); 1025 1026 /* Read extended headers */ 1027 err = lha_read_file_extended_header(a, lha, &header_crc, 2, 1028 lha->header_size - H2_FIXED_SIZE, &extdsize); 1029 if (err < ARCHIVE_WARN) 1030 return (err); 1031 1032 /* Calculate a padding size. The result will be normally 0 or 1. */ 1033 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize); 1034 if (padding > 0) { 1035 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL) 1036 return (truncated_error(a)); 1037 header_crc = lha_crc16(header_crc, p, padding); 1038 __archive_read_consume(a, padding); 1039 } 1040 1041 if (header_crc != lha->header_crc) { 1042 #ifndef DONT_FAIL_ON_CRC_ERROR 1043 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1044 "LHa header CRC error"); 1045 return (ARCHIVE_FATAL); 1046 #endif 1047 } 1048 return (err); 1049 } 1050 1051 /* 1052 * Header 3 format 1053 * 1054 * +0 +2 +7 +11 +15 1055 * +------------+----------------+-------------------+-----------------+ 1056 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size| 1057 * +------------+----------------+-------------------+-----------------+ 1058 * <-------------------------------(*1)-------------------------------* 1059 * 1060 * +15 +19 +20 +21 +23 +24 1061 * +-----------------+------------+----------------+----------+-----------+ 1062 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator | 1063 * +-----------------+------------+----------------+----------+-----------+ 1064 * *--------------------------------(*1)----------------------------------* 1065 * 1066 * +24 +28 +32 +32+(*3) 1067 * +---------------+----------------+-------------------+-----------------+ 1068 * |header size(*1)|next header size|extended header(*3)| compressed data | 1069 * +---------------+----------------+-------------------+-----------------+ 1070 * *------------------------(*1)-----------------------> <------(*2)-----> 1071 * 1072 */ 1073 #define H3_FIELD_LEN_OFFSET 0 1074 #define H3_COMP_SIZE_OFFSET 7 1075 #define H3_ORIG_SIZE_OFFSET 11 1076 #define H3_TIME_OFFSET 15 1077 #define H3_CRC_OFFSET 21 1078 #define H3_HEADER_SIZE_OFFSET 24 1079 #define H3_FIXED_SIZE 28 1080 static int 1081 lha_read_file_header_3(struct archive_read *a, struct lha *lha) 1082 { 1083 const unsigned char *p; 1084 size_t extdsize; 1085 int err; 1086 uint16_t header_crc; 1087 1088 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL) 1089 return (truncated_error(a)); 1090 1091 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4) 1092 goto invalid; 1093 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET); 1094 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET); 1095 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET); 1096 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET); 1097 lha->crc = archive_le16dec(p + H3_CRC_OFFSET); 1098 lha->setflag |= CRC_IS_SET; 1099 1100 if (lha->header_size < H3_FIXED_SIZE + 4) 1101 goto invalid; 1102 header_crc = lha_crc16(0, p, H3_FIXED_SIZE); 1103 __archive_read_consume(a, H3_FIXED_SIZE); 1104 1105 /* Read extended headers */ 1106 err = lha_read_file_extended_header(a, lha, &header_crc, 4, 1107 lha->header_size - H3_FIXED_SIZE, &extdsize); 1108 if (err < ARCHIVE_WARN) 1109 return (err); 1110 1111 if (header_crc != lha->header_crc) { 1112 #ifndef DONT_FAIL_ON_CRC_ERROR 1113 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1114 "LHa header CRC error"); 1115 return (ARCHIVE_FATAL); 1116 #endif 1117 } 1118 return (err); 1119 invalid: 1120 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1121 "Invalid LHa header"); 1122 return (ARCHIVE_FATAL); 1123 } 1124 1125 /* 1126 * Extended header format 1127 * 1128 * +0 +2 +3 -- used in header 1 and 2 1129 * +0 +4 +5 -- used in header 3 1130 * +--------------+---------+-------------------+--------------+-- 1131 * |ex-header size|header id| data |ex-header size| ....... 1132 * +--------------+---------+-------------------+--------------+-- 1133 * <-------------( ex-header size)------------> <-- next extended header --* 1134 * 1135 * If the ex-header size is zero, it is the make of the end of extended 1136 * headers. 1137 * 1138 */ 1139 static int 1140 lha_read_file_extended_header(struct archive_read *a, struct lha *lha, 1141 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size) 1142 { 1143 const void *h; 1144 const unsigned char *extdheader; 1145 size_t extdsize; 1146 size_t datasize; 1147 unsigned int i; 1148 unsigned char extdtype; 1149 1150 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/ 1151 #define EXT_FILENAME 0x01 /* Filename */ 1152 #define EXT_DIRECTORY 0x02 /* Directory name */ 1153 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */ 1154 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */ 1155 #define EXT_FILESIZE 0x42 /* Large file size */ 1156 #define EXT_TIMEZONE 0x43 /* Time zone */ 1157 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */ 1158 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */ 1159 #define EXT_CODEPAGE 0x46 /* Codepage */ 1160 #define EXT_UNIX_MODE 0x50 /* File permission */ 1161 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */ 1162 #define EXT_UNIX_GNAME 0x52 /* Group name */ 1163 #define EXT_UNIX_UNAME 0x53 /* User name */ 1164 #define EXT_UNIX_MTIME 0x54 /* Modified time */ 1165 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */ 1166 #define EXT_NEW_ATTR 0xff /* new attribute */ 1167 1168 *total_size = sizefield_length; 1169 1170 for (;;) { 1171 /* Read an extended header size. */ 1172 if ((h = 1173 __archive_read_ahead(a, sizefield_length, NULL)) == NULL) 1174 return (truncated_error(a)); 1175 /* Check if the size is the zero indicates the end of the 1176 * extended header. */ 1177 if (sizefield_length == sizeof(uint16_t)) 1178 extdsize = archive_le16dec(h); 1179 else 1180 extdsize = archive_le32dec(h); 1181 if (extdsize == 0) { 1182 /* End of extended header */ 1183 if (crc != NULL) 1184 *crc = lha_crc16(*crc, h, sizefield_length); 1185 __archive_read_consume(a, sizefield_length); 1186 return (ARCHIVE_OK); 1187 } 1188 1189 /* Sanity check to the extended header size. */ 1190 if (((uint64_t)*total_size + extdsize) > 1191 (uint64_t)limitsize || 1192 extdsize <= (size_t)sizefield_length) 1193 goto invalid; 1194 1195 /* Read the extended header. */ 1196 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL) 1197 return (truncated_error(a)); 1198 *total_size += extdsize; 1199 1200 extdheader = (const unsigned char *)h; 1201 /* Get the extended header type. */ 1202 extdtype = extdheader[sizefield_length]; 1203 /* Calculate an extended data size. */ 1204 datasize = extdsize - (1 + sizefield_length); 1205 /* Skip an extended header size field and type field. */ 1206 extdheader += sizefield_length + 1; 1207 1208 if (crc != NULL && extdtype != EXT_HEADER_CRC) 1209 *crc = lha_crc16(*crc, h, extdsize); 1210 switch (extdtype) { 1211 case EXT_HEADER_CRC: 1212 /* We only use a header CRC. Following data will not 1213 * be used. */ 1214 if (datasize >= 2) { 1215 lha->header_crc = archive_le16dec(extdheader); 1216 if (crc != NULL) { 1217 static const char zeros[2] = {0, 0}; 1218 *crc = lha_crc16(*crc, h, 1219 extdsize - datasize); 1220 /* CRC value itself as zero */ 1221 *crc = lha_crc16(*crc, zeros, 2); 1222 *crc = lha_crc16(*crc, 1223 extdheader+2, datasize - 2); 1224 } 1225 } 1226 break; 1227 case EXT_FILENAME: 1228 if (datasize == 0) { 1229 /* maybe directory header */ 1230 archive_string_empty(&lha->filename); 1231 break; 1232 } 1233 if (extdheader[0] == '\0') 1234 goto invalid; 1235 archive_strncpy(&lha->filename, 1236 (const char *)extdheader, datasize); 1237 break; 1238 case EXT_UTF16_FILENAME: 1239 if (datasize == 0) { 1240 /* maybe directory header */ 1241 archive_string_empty(&lha->filename); 1242 break; 1243 } else if (datasize & 1) { 1244 /* UTF-16 characters take always 2 or 4 bytes */ 1245 goto invalid; 1246 } 1247 if (extdheader[0] == '\0') 1248 goto invalid; 1249 archive_string_empty(&lha->filename); 1250 archive_array_append(&lha->filename, 1251 (const char *)extdheader, datasize); 1252 /* Setup a string conversion for a filename. */ 1253 lha->sconv_fname = 1254 archive_string_conversion_from_charset(&a->archive, 1255 "UTF-16LE", 1); 1256 if (lha->sconv_fname == NULL) 1257 return (ARCHIVE_FATAL); 1258 break; 1259 case EXT_DIRECTORY: 1260 if (datasize == 0 || extdheader[0] == '\0') 1261 /* no directory name data. exit this case. */ 1262 goto invalid; 1263 1264 archive_strncpy(&lha->dirname, 1265 (const char *)extdheader, datasize); 1266 /* 1267 * Convert directory delimiter from 0xFF 1268 * to '/' for local system. 1269 */ 1270 for (i = 0; i < lha->dirname.length; i++) { 1271 if ((unsigned char)lha->dirname.s[i] == 0xFF) 1272 lha->dirname.s[i] = '/'; 1273 } 1274 /* Is last character directory separator? */ 1275 if (lha->dirname.s[lha->dirname.length-1] != '/') 1276 /* invalid directory data */ 1277 goto invalid; 1278 break; 1279 case EXT_UTF16_DIRECTORY: 1280 /* UTF-16 characters take always 2 or 4 bytes */ 1281 if (datasize == 0 || (datasize & 1) || 1282 extdheader[0] == '\0') { 1283 /* no directory name data. exit this case. */ 1284 goto invalid; 1285 } 1286 1287 archive_string_empty(&lha->dirname); 1288 archive_array_append(&lha->dirname, 1289 (const char *)extdheader, datasize); 1290 lha->sconv_dir = 1291 archive_string_conversion_from_charset(&a->archive, 1292 "UTF-16LE", 1); 1293 if (lha->sconv_dir == NULL) 1294 return (ARCHIVE_FATAL); 1295 else { 1296 /* 1297 * Convert directory delimiter from 0xFFFF 1298 * to '/' for local system. 1299 */ 1300 uint16_t dirSep; 1301 uint16_t d = 1; 1302 if (archive_be16dec(&d) == 1) 1303 dirSep = 0x2F00; 1304 else 1305 dirSep = 0x002F; 1306 1307 /* UTF-16LE character */ 1308 uint16_t *utf16name = 1309 (uint16_t *)lha->dirname.s; 1310 for (i = 0; i < lha->dirname.length / 2; i++) { 1311 if (utf16name[i] == 0xFFFF) { 1312 utf16name[i] = dirSep; 1313 } 1314 } 1315 /* Is last character directory separator? */ 1316 if (utf16name[lha->dirname.length / 2 - 1] != 1317 dirSep) { 1318 /* invalid directory data */ 1319 goto invalid; 1320 } 1321 } 1322 break; 1323 case EXT_DOS_ATTR: 1324 if (datasize == 2) 1325 lha->dos_attr = (unsigned char) 1326 (archive_le16dec(extdheader) & 0xff); 1327 break; 1328 case EXT_TIMESTAMP: 1329 if (datasize == (sizeof(uint64_t) * 3)) { 1330 lha->birthtime = lha_win_time( 1331 archive_le64dec(extdheader), 1332 &lha->birthtime_tv_nsec); 1333 extdheader += sizeof(uint64_t); 1334 lha->mtime = lha_win_time( 1335 archive_le64dec(extdheader), 1336 &lha->mtime_tv_nsec); 1337 extdheader += sizeof(uint64_t); 1338 lha->atime = lha_win_time( 1339 archive_le64dec(extdheader), 1340 &lha->atime_tv_nsec); 1341 lha->setflag |= BIRTHTIME_IS_SET | 1342 ATIME_IS_SET; 1343 } 1344 break; 1345 case EXT_FILESIZE: 1346 if (datasize == sizeof(uint64_t) * 2) { 1347 lha->compsize = archive_le64dec(extdheader); 1348 extdheader += sizeof(uint64_t); 1349 lha->origsize = archive_le64dec(extdheader); 1350 } 1351 break; 1352 case EXT_CODEPAGE: 1353 /* Get an archived filename charset from codepage. 1354 * This overwrites the charset specified by 1355 * hdrcharset option. */ 1356 if (datasize == sizeof(uint32_t)) { 1357 struct archive_string cp; 1358 const char *charset; 1359 1360 archive_string_init(&cp); 1361 switch (archive_le32dec(extdheader)) { 1362 case 65001: /* UTF-8 */ 1363 charset = "UTF-8"; 1364 break; 1365 default: 1366 archive_string_sprintf(&cp, "CP%d", 1367 (int)archive_le32dec(extdheader)); 1368 charset = cp.s; 1369 break; 1370 } 1371 lha->sconv_dir = 1372 archive_string_conversion_from_charset( 1373 &(a->archive), charset, 1); 1374 lha->sconv_fname = 1375 archive_string_conversion_from_charset( 1376 &(a->archive), charset, 1); 1377 archive_string_free(&cp); 1378 if (lha->sconv_dir == NULL) 1379 return (ARCHIVE_FATAL); 1380 if (lha->sconv_fname == NULL) 1381 return (ARCHIVE_FATAL); 1382 } 1383 break; 1384 case EXT_UNIX_MODE: 1385 if (datasize == sizeof(uint16_t)) { 1386 lha->mode = archive_le16dec(extdheader); 1387 lha->setflag |= UNIX_MODE_IS_SET; 1388 } 1389 break; 1390 case EXT_UNIX_GID_UID: 1391 if (datasize == (sizeof(uint16_t) * 2)) { 1392 lha->gid = archive_le16dec(extdheader); 1393 lha->uid = archive_le16dec(extdheader+2); 1394 } 1395 break; 1396 case EXT_UNIX_GNAME: 1397 if (datasize > 0) 1398 archive_strncpy(&lha->gname, 1399 (const char *)extdheader, datasize); 1400 break; 1401 case EXT_UNIX_UNAME: 1402 if (datasize > 0) 1403 archive_strncpy(&lha->uname, 1404 (const char *)extdheader, datasize); 1405 break; 1406 case EXT_UNIX_MTIME: 1407 if (datasize == sizeof(uint32_t)) 1408 lha->mtime = archive_le32dec(extdheader); 1409 break; 1410 case EXT_OS2_NEW_ATTR: 1411 /* This extended header is OS/2 depend. */ 1412 if (datasize == 16) { 1413 lha->dos_attr = (unsigned char) 1414 (archive_le16dec(extdheader) & 0xff); 1415 lha->mode = archive_le16dec(extdheader+2); 1416 lha->gid = archive_le16dec(extdheader+4); 1417 lha->uid = archive_le16dec(extdheader+6); 1418 lha->birthtime = archive_le32dec(extdheader+8); 1419 lha->atime = archive_le32dec(extdheader+12); 1420 lha->setflag |= UNIX_MODE_IS_SET 1421 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1422 } 1423 break; 1424 case EXT_NEW_ATTR: 1425 if (datasize == 20) { 1426 lha->mode = (mode_t)archive_le32dec(extdheader); 1427 lha->gid = archive_le32dec(extdheader+4); 1428 lha->uid = archive_le32dec(extdheader+8); 1429 lha->birthtime = archive_le32dec(extdheader+12); 1430 lha->atime = archive_le32dec(extdheader+16); 1431 lha->setflag |= UNIX_MODE_IS_SET 1432 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1433 } 1434 break; 1435 case EXT_TIMEZONE: /* Not supported */ 1436 break; 1437 default: 1438 break; 1439 } 1440 1441 __archive_read_consume(a, extdsize); 1442 } 1443 invalid: 1444 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1445 "Invalid extended LHa header"); 1446 return (ARCHIVE_FATAL); 1447 } 1448 1449 static int 1450 lha_end_of_entry(struct archive_read *a) 1451 { 1452 struct lha *lha = (struct lha *)(a->format->data); 1453 int r = ARCHIVE_EOF; 1454 1455 if (!lha->end_of_entry_cleanup) { 1456 if ((lha->setflag & CRC_IS_SET) && 1457 lha->crc != lha->entry_crc_calculated) { 1458 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1459 "LHa data CRC error"); 1460 r = ARCHIVE_WARN; 1461 } 1462 1463 /* End-of-entry cleanup done. */ 1464 lha->end_of_entry_cleanup = 1; 1465 } 1466 return (r); 1467 } 1468 1469 static int 1470 archive_read_format_lha_read_data(struct archive_read *a, 1471 const void **buff, size_t *size, int64_t *offset) 1472 { 1473 struct lha *lha = (struct lha *)(a->format->data); 1474 int r; 1475 1476 if (lha->entry_unconsumed) { 1477 /* Consume as much as the decompressor actually used. */ 1478 __archive_read_consume(a, lha->entry_unconsumed); 1479 lha->entry_unconsumed = 0; 1480 } 1481 if (lha->end_of_entry) { 1482 *offset = lha->entry_offset; 1483 *size = 0; 1484 *buff = NULL; 1485 return (lha_end_of_entry(a)); 1486 } 1487 1488 if (lha->entry_is_compressed) 1489 r = lha_read_data_lzh(a, buff, size, offset); 1490 else 1491 /* No compression. */ 1492 r = lha_read_data_none(a, buff, size, offset); 1493 return (r); 1494 } 1495 1496 /* 1497 * Read a file content in no compression. 1498 * 1499 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 1500 * lha->end_of_entry if it consumes all of the data. 1501 */ 1502 static int 1503 lha_read_data_none(struct archive_read *a, const void **buff, 1504 size_t *size, int64_t *offset) 1505 { 1506 struct lha *lha = (struct lha *)(a->format->data); 1507 ssize_t bytes_avail; 1508 1509 if (lha->entry_bytes_remaining == 0) { 1510 *buff = NULL; 1511 *size = 0; 1512 *offset = lha->entry_offset; 1513 lha->end_of_entry = 1; 1514 return (ARCHIVE_OK); 1515 } 1516 /* 1517 * Note: '1' here is a performance optimization. 1518 * Recall that the decompression layer returns a count of 1519 * available bytes; asking for more than that forces the 1520 * decompressor to combine reads by copying data. 1521 */ 1522 *buff = __archive_read_ahead(a, 1, &bytes_avail); 1523 if (bytes_avail <= 0) { 1524 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1525 "Truncated LHa file data"); 1526 return (ARCHIVE_FATAL); 1527 } 1528 if (bytes_avail > lha->entry_bytes_remaining) 1529 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1530 lha->entry_crc_calculated = 1531 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail); 1532 *size = bytes_avail; 1533 *offset = lha->entry_offset; 1534 lha->entry_offset += bytes_avail; 1535 lha->entry_bytes_remaining -= bytes_avail; 1536 if (lha->entry_bytes_remaining == 0) 1537 lha->end_of_entry = 1; 1538 lha->entry_unconsumed = bytes_avail; 1539 return (ARCHIVE_OK); 1540 } 1541 1542 /* 1543 * Read a file content in LZHUFF encoding. 1544 * 1545 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is 1546 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes 1547 * all of the data. 1548 */ 1549 static int 1550 lha_read_data_lzh(struct archive_read *a, const void **buff, 1551 size_t *size, int64_t *offset) 1552 { 1553 struct lha *lha = (struct lha *)(a->format->data); 1554 ssize_t bytes_avail; 1555 int r; 1556 1557 /* If we haven't yet read any data, initialize the decompressor. */ 1558 if (!lha->decompress_init) { 1559 r = lzh_decode_init(&(lha->strm), lha->method); 1560 switch (r) { 1561 case ARCHIVE_OK: 1562 break; 1563 case ARCHIVE_FAILED: 1564 /* Unsupported compression. */ 1565 *buff = NULL; 1566 *size = 0; 1567 *offset = 0; 1568 archive_set_error(&a->archive, 1569 ARCHIVE_ERRNO_FILE_FORMAT, 1570 "Unsupported lzh compression method -%c%c%c-", 1571 lha->method[0], lha->method[1], lha->method[2]); 1572 /* We know compressed size; just skip it. */ 1573 archive_read_format_lha_read_data_skip(a); 1574 return (ARCHIVE_WARN); 1575 default: 1576 archive_set_error(&a->archive, ENOMEM, 1577 "Couldn't allocate memory " 1578 "for lzh decompression"); 1579 return (ARCHIVE_FATAL); 1580 } 1581 /* We've initialized decompression for this stream. */ 1582 lha->decompress_init = 1; 1583 lha->strm.avail_out = 0; 1584 lha->strm.total_out = 0; 1585 } 1586 1587 /* 1588 * Note: '1' here is a performance optimization. 1589 * Recall that the decompression layer returns a count of 1590 * available bytes; asking for more than that forces the 1591 * decompressor to combine reads by copying data. 1592 */ 1593 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail); 1594 if (bytes_avail <= 0) { 1595 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1596 "Truncated LHa file body"); 1597 return (ARCHIVE_FATAL); 1598 } 1599 if (bytes_avail > lha->entry_bytes_remaining) 1600 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1601 1602 lha->strm.avail_in = (int)bytes_avail; 1603 lha->strm.total_in = 0; 1604 lha->strm.avail_out = 0; 1605 1606 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining); 1607 switch (r) { 1608 case ARCHIVE_OK: 1609 break; 1610 case ARCHIVE_EOF: 1611 lha->end_of_entry = 1; 1612 break; 1613 default: 1614 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1615 "Bad lzh data"); 1616 return (ARCHIVE_FAILED); 1617 } 1618 lha->entry_unconsumed = lha->strm.total_in; 1619 lha->entry_bytes_remaining -= lha->strm.total_in; 1620 1621 if (lha->strm.avail_out) { 1622 *offset = lha->entry_offset; 1623 *size = lha->strm.avail_out; 1624 *buff = lha->strm.ref_ptr; 1625 lha->entry_crc_calculated = 1626 lha_crc16(lha->entry_crc_calculated, *buff, *size); 1627 lha->entry_offset += *size; 1628 } else { 1629 *offset = lha->entry_offset; 1630 *size = 0; 1631 *buff = NULL; 1632 if (lha->end_of_entry) 1633 return (lha_end_of_entry(a)); 1634 } 1635 return (ARCHIVE_OK); 1636 } 1637 1638 /* 1639 * Skip a file content. 1640 */ 1641 static int 1642 archive_read_format_lha_read_data_skip(struct archive_read *a) 1643 { 1644 struct lha *lha; 1645 int64_t bytes_skipped; 1646 1647 lha = (struct lha *)(a->format->data); 1648 1649 if (lha->entry_unconsumed) { 1650 /* Consume as much as the decompressor actually used. */ 1651 __archive_read_consume(a, lha->entry_unconsumed); 1652 lha->entry_unconsumed = 0; 1653 } 1654 1655 /* if we've already read to end of data, we're done. */ 1656 if (lha->end_of_entry_cleanup) 1657 return (ARCHIVE_OK); 1658 1659 /* 1660 * If the length is at the beginning, we can skip the 1661 * compressed data much more quickly. 1662 */ 1663 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining); 1664 if (bytes_skipped < 0) 1665 return (ARCHIVE_FATAL); 1666 1667 /* This entry is finished and done. */ 1668 lha->end_of_entry_cleanup = lha->end_of_entry = 1; 1669 return (ARCHIVE_OK); 1670 } 1671 1672 static int 1673 archive_read_format_lha_cleanup(struct archive_read *a) 1674 { 1675 struct lha *lha = (struct lha *)(a->format->data); 1676 1677 lzh_decode_free(&(lha->strm)); 1678 archive_string_free(&(lha->dirname)); 1679 archive_string_free(&(lha->filename)); 1680 archive_string_free(&(lha->uname)); 1681 archive_string_free(&(lha->gname)); 1682 archive_wstring_free(&(lha->ws)); 1683 free(lha); 1684 (a->format->data) = NULL; 1685 return (ARCHIVE_OK); 1686 } 1687 1688 /* 1689 * 'LHa for UNIX' utility has archived a symbolic-link name after 1690 * a pathname with '|' character. 1691 * This function extracts the symbolic-link name from the pathname. 1692 * 1693 * example. 1694 * 1. a symbolic-name is 'aaa/bb/cc' 1695 * 2. a filename is 'xxx/bbb' 1696 * then a archived pathname is 'xxx/bbb|aaa/bb/cc' 1697 */ 1698 static int 1699 lha_parse_linkname(struct archive_wstring *linkname, 1700 struct archive_wstring *pathname) 1701 { 1702 wchar_t * linkptr; 1703 size_t symlen; 1704 1705 linkptr = wcschr(pathname->s, L'|'); 1706 if (linkptr != NULL) { 1707 symlen = wcslen(linkptr + 1); 1708 archive_wstrncpy(linkname, linkptr+1, symlen); 1709 1710 *linkptr = 0; 1711 pathname->length = wcslen(pathname->s); 1712 1713 return (1); 1714 } 1715 return (0); 1716 } 1717 1718 /* Convert an MSDOS-style date/time into Unix-style time. */ 1719 static time_t 1720 lha_dos_time(const unsigned char *p) 1721 { 1722 int msTime, msDate; 1723 struct tm ts; 1724 1725 msTime = archive_le16dec(p); 1726 msDate = archive_le16dec(p+2); 1727 1728 memset(&ts, 0, sizeof(ts)); 1729 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 1730 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 1731 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 1732 ts.tm_hour = (msTime >> 11) & 0x1f; 1733 ts.tm_min = (msTime >> 5) & 0x3f; 1734 ts.tm_sec = (msTime << 1) & 0x3e; 1735 ts.tm_isdst = -1; 1736 return (mktime(&ts)); 1737 } 1738 1739 /* Convert an MS-Windows-style date/time into Unix-style time. */ 1740 static time_t 1741 lha_win_time(uint64_t wintime, long *ns) 1742 { 1743 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000) 1744 1745 if (wintime >= EPOC_TIME) { 1746 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */ 1747 if (ns != NULL) 1748 *ns = (long)(wintime % 10000000) * 100; 1749 return (wintime / 10000000); 1750 } else { 1751 if (ns != NULL) 1752 *ns = 0; 1753 return (0); 1754 } 1755 } 1756 1757 static unsigned char 1758 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size) 1759 { 1760 unsigned char const *p = (unsigned char const *)pp; 1761 1762 p += offset; 1763 for (;size > 0; --size) 1764 sum += *p++; 1765 return (sum); 1766 } 1767 1768 static uint16_t crc16tbl[2][256]; 1769 static void 1770 lha_crc16_init(void) 1771 { 1772 unsigned int i; 1773 static int crc16init = 0; 1774 1775 if (crc16init) 1776 return; 1777 crc16init = 1; 1778 1779 for (i = 0; i < 256; i++) { 1780 unsigned int j; 1781 uint16_t crc = (uint16_t)i; 1782 for (j = 8; j; j--) 1783 crc = (crc >> 1) ^ ((crc & 1) * 0xA001); 1784 crc16tbl[0][i] = crc; 1785 } 1786 1787 for (i = 0; i < 256; i++) { 1788 crc16tbl[1][i] = (crc16tbl[0][i] >> 8) 1789 ^ crc16tbl[0][crc16tbl[0][i] & 0xff]; 1790 } 1791 } 1792 1793 static uint16_t 1794 lha_crc16(uint16_t crc, const void *pp, size_t len) 1795 { 1796 const unsigned char *p = (const unsigned char *)pp; 1797 const uint16_t *buff; 1798 const union { 1799 uint32_t i; 1800 char c[4]; 1801 } u = { 0x01020304 }; 1802 1803 if (len == 0) 1804 return crc; 1805 1806 /* Process unaligned address. */ 1807 if (((uintptr_t)p) & (uintptr_t)0x1) { 1808 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1809 len--; 1810 } 1811 buff = (const uint16_t *)p; 1812 /* 1813 * Modern C compiler such as GCC does not unroll automatically yet 1814 * without unrolling pragma, and Clang is so. So we should 1815 * unroll this loop for its performance. 1816 */ 1817 for (;len >= 8; len -= 8) { 1818 /* This if statement expects compiler optimization will 1819 * remove the statement which will not be executed. */ 1820 #undef bswap16 1821 #ifndef __has_builtin 1822 #define __has_builtin(x) 0 1823 #endif 1824 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */ 1825 # define bswap16(x) _byteswap_ushort(x) 1826 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4) 1827 /* GCC 4.8 and later has __builtin_bswap16() */ 1828 # define bswap16(x) __builtin_bswap16(x) 1829 #elif defined(__clang__) && __has_builtin(__builtin_bswap16) 1830 /* Newer clang versions have __builtin_bswap16() */ 1831 # define bswap16(x) __builtin_bswap16(x) 1832 #else 1833 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8)) 1834 #endif 1835 #define CRC16W do { \ 1836 if(u.c[0] == 1) { /* Big endian */ \ 1837 crc ^= bswap16(*buff); buff++; \ 1838 } else \ 1839 crc ^= *buff++; \ 1840 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\ 1841 } while (0) 1842 CRC16W; 1843 CRC16W; 1844 CRC16W; 1845 CRC16W; 1846 #undef CRC16W 1847 #undef bswap16 1848 } 1849 1850 p = (const unsigned char *)buff; 1851 for (;len; len--) { 1852 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1853 } 1854 return crc; 1855 } 1856 1857 /* 1858 * Initialize LZHUF decoder. 1859 * 1860 * Returns ARCHIVE_OK if initialization was successful. 1861 * Returns ARCHIVE_FAILED if method is unsupported. 1862 * Returns ARCHIVE_FATAL if initialization failed; memory allocation 1863 * error occurred. 1864 */ 1865 static int 1866 lzh_decode_init(struct lzh_stream *strm, const char *method) 1867 { 1868 struct lzh_dec *ds; 1869 int w_bits, w_size; 1870 1871 if (strm->ds == NULL) { 1872 strm->ds = calloc(1, sizeof(*strm->ds)); 1873 if (strm->ds == NULL) 1874 return (ARCHIVE_FATAL); 1875 } 1876 ds = strm->ds; 1877 ds->error = ARCHIVE_FAILED; 1878 if (method == NULL || method[0] != 'l' || method[1] != 'h') 1879 return (ARCHIVE_FAILED); 1880 switch (method[2]) { 1881 case '5': 1882 w_bits = 13;/* 8KiB for window */ 1883 break; 1884 case '6': 1885 w_bits = 15;/* 32KiB for window */ 1886 break; 1887 case '7': 1888 w_bits = 16;/* 64KiB for window */ 1889 break; 1890 default: 1891 return (ARCHIVE_FAILED);/* Not supported. */ 1892 } 1893 ds->error = ARCHIVE_FATAL; 1894 /* Expand a window size up to 128 KiB for decompressing process 1895 * performance whatever its original window size is. */ 1896 ds->w_size = 1U << 17; 1897 ds->w_mask = ds->w_size -1; 1898 if (ds->w_buff == NULL) { 1899 ds->w_buff = malloc(ds->w_size); 1900 if (ds->w_buff == NULL) 1901 return (ARCHIVE_FATAL); 1902 } 1903 w_size = 1U << w_bits; 1904 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size); 1905 ds->w_pos = 0; 1906 ds->state = 0; 1907 ds->pos_pt_len_size = w_bits + 1; 1908 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4; 1909 ds->literal_pt_len_size = PT_BITLEN_SIZE; 1910 ds->literal_pt_len_bits = 5; 1911 ds->br.cache_buffer = 0; 1912 ds->br.cache_avail = 0; 1913 1914 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16) 1915 != ARCHIVE_OK) 1916 return (ARCHIVE_FATAL); 1917 ds->lt.len_bits = 9; 1918 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16) 1919 != ARCHIVE_OK) 1920 return (ARCHIVE_FATAL); 1921 ds->error = 0; 1922 1923 return (ARCHIVE_OK); 1924 } 1925 1926 /* 1927 * Release LZHUF decoder. 1928 */ 1929 static void 1930 lzh_decode_free(struct lzh_stream *strm) 1931 { 1932 1933 if (strm->ds == NULL) 1934 return; 1935 free(strm->ds->w_buff); 1936 lzh_huffman_free(&(strm->ds->lt)); 1937 lzh_huffman_free(&(strm->ds->pt)); 1938 free(strm->ds); 1939 strm->ds = NULL; 1940 } 1941 1942 /* 1943 * Bit stream reader. 1944 */ 1945 /* Check that the cache buffer has enough bits. */ 1946 #define lzh_br_has(br, n) ((br)->cache_avail >= n) 1947 /* Get compressed data by bit. */ 1948 #define lzh_br_bits(br, n) \ 1949 (((uint16_t)((br)->cache_buffer >> \ 1950 ((br)->cache_avail - (n)))) & cache_masks[n]) 1951 #define lzh_br_bits_forced(br, n) \ 1952 (((uint16_t)((br)->cache_buffer << \ 1953 ((n) - (br)->cache_avail))) & cache_masks[n]) 1954 /* Read ahead to make sure the cache buffer has enough compressed data we 1955 * will use. 1956 * True : completed, there is enough data in the cache buffer. 1957 * False : we met that strm->next_in is empty, we have to get following 1958 * bytes. */ 1959 #define lzh_br_read_ahead_0(strm, br, n) \ 1960 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br)) 1961 /* True : the cache buffer has some bits as much as we need. 1962 * False : there are no enough bits in the cache buffer to be used, 1963 * we have to get following bytes if we could. */ 1964 #define lzh_br_read_ahead(strm, br, n) \ 1965 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n))) 1966 1967 /* Notify how many bits we consumed. */ 1968 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n)) 1969 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n)) 1970 1971 static const uint16_t cache_masks[] = { 1972 0x0000, 0x0001, 0x0003, 0x0007, 1973 0x000F, 0x001F, 0x003F, 0x007F, 1974 0x00FF, 0x01FF, 0x03FF, 0x07FF, 1975 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 1976 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF 1977 }; 1978 1979 /* 1980 * Shift away used bits in the cache data and fill it up with following bits. 1981 * Call this when cache buffer does not have enough bits you need. 1982 * 1983 * Returns 1 if the cache buffer is full. 1984 * Returns 0 if the cache buffer is not full; input buffer is empty. 1985 */ 1986 static int 1987 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br) 1988 { 1989 int n = CACHE_BITS - br->cache_avail; 1990 1991 for (;;) { 1992 const int x = n >> 3; 1993 if (strm->avail_in >= x) { 1994 switch (x) { 1995 case 8: 1996 br->cache_buffer = 1997 ((uint64_t)strm->next_in[0]) << 56 | 1998 ((uint64_t)strm->next_in[1]) << 48 | 1999 ((uint64_t)strm->next_in[2]) << 40 | 2000 ((uint64_t)strm->next_in[3]) << 32 | 2001 ((uint32_t)strm->next_in[4]) << 24 | 2002 ((uint32_t)strm->next_in[5]) << 16 | 2003 ((uint32_t)strm->next_in[6]) << 8 | 2004 (uint32_t)strm->next_in[7]; 2005 strm->next_in += 8; 2006 strm->avail_in -= 8; 2007 br->cache_avail += 8 * 8; 2008 return (1); 2009 case 7: 2010 br->cache_buffer = 2011 (br->cache_buffer << 56) | 2012 ((uint64_t)strm->next_in[0]) << 48 | 2013 ((uint64_t)strm->next_in[1]) << 40 | 2014 ((uint64_t)strm->next_in[2]) << 32 | 2015 ((uint64_t)strm->next_in[3]) << 24 | 2016 ((uint64_t)strm->next_in[4]) << 16 | 2017 ((uint64_t)strm->next_in[5]) << 8 | 2018 (uint64_t)strm->next_in[6]; 2019 strm->next_in += 7; 2020 strm->avail_in -= 7; 2021 br->cache_avail += 7 * 8; 2022 return (1); 2023 case 6: 2024 br->cache_buffer = 2025 (br->cache_buffer << 48) | 2026 ((uint64_t)strm->next_in[0]) << 40 | 2027 ((uint64_t)strm->next_in[1]) << 32 | 2028 ((uint64_t)strm->next_in[2]) << 24 | 2029 ((uint64_t)strm->next_in[3]) << 16 | 2030 ((uint64_t)strm->next_in[4]) << 8 | 2031 (uint64_t)strm->next_in[5]; 2032 strm->next_in += 6; 2033 strm->avail_in -= 6; 2034 br->cache_avail += 6 * 8; 2035 return (1); 2036 case 0: 2037 /* We have enough compressed data in 2038 * the cache buffer.*/ 2039 return (1); 2040 default: 2041 break; 2042 } 2043 } 2044 if (strm->avail_in == 0) { 2045 /* There is not enough compressed data to fill up the 2046 * cache buffer. */ 2047 return (0); 2048 } 2049 br->cache_buffer = 2050 (br->cache_buffer << 8) | *strm->next_in++; 2051 strm->avail_in--; 2052 br->cache_avail += 8; 2053 n -= 8; 2054 } 2055 } 2056 2057 /* 2058 * Decode LZHUF. 2059 * 2060 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty. 2061 * Please set available buffer and call this function again. 2062 * 2. Returns ARCHIVE_EOF if decompression has been completed. 2063 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data 2064 * is broken or you do not set 'last' flag properly. 2065 * 4. 'last' flag is very important, you must set 1 to the flag if there 2066 * is no input data. The lha compressed data format does not provide how 2067 * to know the compressed data is really finished. 2068 * Note: lha command utility check if the total size of output bytes is 2069 * reached the uncompressed size recorded in its header. it does not mind 2070 * that the decoding process is properly finished. 2071 * GNU ZIP can decompress another compressed file made by SCO LZH compress. 2072 * it handles EOF as null to fill read buffer with zero until the decoding 2073 * process meet 2 bytes of zeros at reading a size of a next chunk, so the 2074 * zeros are treated as the mark of the end of the data although the zeros 2075 * is dummy, not the file data. 2076 */ 2077 static int lzh_read_blocks(struct lzh_stream *, int); 2078 static int lzh_decode_blocks(struct lzh_stream *, int); 2079 #define ST_RD_BLOCK 0 2080 #define ST_RD_PT_1 1 2081 #define ST_RD_PT_2 2 2082 #define ST_RD_PT_3 3 2083 #define ST_RD_PT_4 4 2084 #define ST_RD_LITERAL_1 5 2085 #define ST_RD_LITERAL_2 6 2086 #define ST_RD_LITERAL_3 7 2087 #define ST_RD_POS_DATA_1 8 2088 #define ST_GET_LITERAL 9 2089 #define ST_GET_POS_1 10 2090 #define ST_GET_POS_2 11 2091 #define ST_COPY_DATA 12 2092 2093 static int 2094 lzh_decode(struct lzh_stream *strm, int last) 2095 { 2096 struct lzh_dec *ds = strm->ds; 2097 int avail_in; 2098 int r; 2099 2100 if (ds->error) 2101 return (ds->error); 2102 2103 avail_in = strm->avail_in; 2104 do { 2105 if (ds->state < ST_GET_LITERAL) 2106 r = lzh_read_blocks(strm, last); 2107 else 2108 r = lzh_decode_blocks(strm, last); 2109 } while (r == 100); 2110 strm->total_in += avail_in - strm->avail_in; 2111 return (r); 2112 } 2113 2114 static void 2115 lzh_emit_window(struct lzh_stream *strm, size_t s) 2116 { 2117 strm->ref_ptr = strm->ds->w_buff; 2118 strm->avail_out = (int)s; 2119 strm->total_out += s; 2120 } 2121 2122 static int 2123 lzh_read_blocks(struct lzh_stream *strm, int last) 2124 { 2125 struct lzh_dec *ds = strm->ds; 2126 struct lzh_br *br = &(ds->br); 2127 int c = 0, i; 2128 unsigned rbits; 2129 2130 for (;;) { 2131 switch (ds->state) { 2132 case ST_RD_BLOCK: 2133 /* 2134 * Read a block number indicates how many blocks 2135 * we will handle. The block is composed of a 2136 * literal and a match, sometimes a literal only 2137 * in particular, there are no reference data at 2138 * the beginning of the decompression. 2139 */ 2140 if (!lzh_br_read_ahead_0(strm, br, 16)) { 2141 if (!last) 2142 /* We need following data. */ 2143 return (ARCHIVE_OK); 2144 if (lzh_br_has(br, 8)) { 2145 /* 2146 * It seems there are extra bits. 2147 * 1. Compressed data is broken. 2148 * 2. `last' flag does not properly 2149 * set. 2150 */ 2151 goto failed; 2152 } 2153 if (ds->w_pos > 0) { 2154 lzh_emit_window(strm, ds->w_pos); 2155 ds->w_pos = 0; 2156 return (ARCHIVE_OK); 2157 } 2158 /* End of compressed data; we have completely 2159 * handled all compressed data. */ 2160 return (ARCHIVE_EOF); 2161 } 2162 ds->blocks_avail = lzh_br_bits(br, 16); 2163 if (ds->blocks_avail == 0) 2164 goto failed; 2165 lzh_br_consume(br, 16); 2166 /* 2167 * Read a literal table compressed in huffman 2168 * coding. 2169 */ 2170 ds->pt.len_size = ds->literal_pt_len_size; 2171 ds->pt.len_bits = ds->literal_pt_len_bits; 2172 ds->reading_position = 0; 2173 /* FALL THROUGH */ 2174 case ST_RD_PT_1: 2175 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are 2176 * used in reading both a literal table and a 2177 * position table. */ 2178 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) { 2179 if (last) 2180 goto failed;/* Truncated data. */ 2181 ds->state = ST_RD_PT_1; 2182 return (ARCHIVE_OK); 2183 } 2184 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits); 2185 lzh_br_consume(br, ds->pt.len_bits); 2186 /* FALL THROUGH */ 2187 case ST_RD_PT_2: 2188 if (ds->pt.len_avail == 0) { 2189 /* There is no bitlen. */ 2190 if (!lzh_br_read_ahead(strm, br, 2191 ds->pt.len_bits)) { 2192 if (last) 2193 goto failed;/* Truncated data.*/ 2194 ds->state = ST_RD_PT_2; 2195 return (ARCHIVE_OK); 2196 } 2197 if (!lzh_make_fake_table(&(ds->pt), 2198 lzh_br_bits(br, ds->pt.len_bits))) 2199 goto failed;/* Invalid data. */ 2200 lzh_br_consume(br, ds->pt.len_bits); 2201 if (ds->reading_position) 2202 ds->state = ST_GET_LITERAL; 2203 else 2204 ds->state = ST_RD_LITERAL_1; 2205 break; 2206 } else if (ds->pt.len_avail > ds->pt.len_size) 2207 goto failed;/* Invalid data. */ 2208 ds->loop = 0; 2209 memset(ds->pt.freq, 0, sizeof(ds->pt.freq)); 2210 if (ds->pt.len_avail < 3 || 2211 ds->pt.len_size == ds->pos_pt_len_size) { 2212 ds->state = ST_RD_PT_4; 2213 break; 2214 } 2215 /* FALL THROUGH */ 2216 case ST_RD_PT_3: 2217 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3); 2218 if (ds->loop < 3) { 2219 if (ds->loop < 0 || last) 2220 goto failed;/* Invalid data. */ 2221 /* Not completed, get following data. */ 2222 ds->state = ST_RD_PT_3; 2223 return (ARCHIVE_OK); 2224 } 2225 /* There are some null in bitlen of the literal. */ 2226 if (!lzh_br_read_ahead(strm, br, 2)) { 2227 if (last) 2228 goto failed;/* Truncated data. */ 2229 ds->state = ST_RD_PT_3; 2230 return (ARCHIVE_OK); 2231 } 2232 c = lzh_br_bits(br, 2); 2233 lzh_br_consume(br, 2); 2234 if (c > ds->pt.len_avail - 3) 2235 goto failed;/* Invalid data. */ 2236 for (i = 3; c-- > 0 ;) 2237 ds->pt.bitlen[i++] = 0; 2238 ds->loop = i; 2239 /* FALL THROUGH */ 2240 case ST_RD_PT_4: 2241 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 2242 ds->pt.len_avail); 2243 if (ds->loop < ds->pt.len_avail) { 2244 if (ds->loop < 0 || last) 2245 goto failed;/* Invalid data. */ 2246 /* Not completed, get following data. */ 2247 ds->state = ST_RD_PT_4; 2248 return (ARCHIVE_OK); 2249 } 2250 if (!lzh_make_huffman_table(&(ds->pt))) 2251 goto failed;/* Invalid data */ 2252 if (ds->reading_position) { 2253 ds->state = ST_GET_LITERAL; 2254 break; 2255 } 2256 /* FALL THROUGH */ 2257 case ST_RD_LITERAL_1: 2258 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) { 2259 if (last) 2260 goto failed;/* Truncated data. */ 2261 ds->state = ST_RD_LITERAL_1; 2262 return (ARCHIVE_OK); 2263 } 2264 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits); 2265 lzh_br_consume(br, ds->lt.len_bits); 2266 /* FALL THROUGH */ 2267 case ST_RD_LITERAL_2: 2268 if (ds->lt.len_avail == 0) { 2269 /* There is no bitlen. */ 2270 if (!lzh_br_read_ahead(strm, br, 2271 ds->lt.len_bits)) { 2272 if (last) 2273 goto failed;/* Truncated data.*/ 2274 ds->state = ST_RD_LITERAL_2; 2275 return (ARCHIVE_OK); 2276 } 2277 if (!lzh_make_fake_table(&(ds->lt), 2278 lzh_br_bits(br, ds->lt.len_bits))) 2279 goto failed;/* Invalid data */ 2280 lzh_br_consume(br, ds->lt.len_bits); 2281 ds->state = ST_RD_POS_DATA_1; 2282 break; 2283 } else if (ds->lt.len_avail > ds->lt.len_size) 2284 goto failed;/* Invalid data */ 2285 ds->loop = 0; 2286 memset(ds->lt.freq, 0, sizeof(ds->lt.freq)); 2287 /* FALL THROUGH */ 2288 case ST_RD_LITERAL_3: 2289 i = ds->loop; 2290 while (i < ds->lt.len_avail) { 2291 if (!lzh_br_read_ahead(strm, br, 2292 ds->pt.max_bits)) { 2293 if (last) 2294 goto failed;/* Truncated data.*/ 2295 ds->loop = i; 2296 ds->state = ST_RD_LITERAL_3; 2297 return (ARCHIVE_OK); 2298 } 2299 rbits = lzh_br_bits(br, ds->pt.max_bits); 2300 c = lzh_decode_huffman(&(ds->pt), rbits); 2301 if (c > 2) { 2302 /* Note: 'c' will never be more than 2303 * eighteen since it's limited by 2304 * PT_BITLEN_SIZE, which is being set 2305 * to ds->pt.len_size through 2306 * ds->literal_pt_len_size. */ 2307 lzh_br_consume(br, ds->pt.bitlen[c]); 2308 c -= 2; 2309 ds->lt.freq[c]++; 2310 ds->lt.bitlen[i++] = c; 2311 } else if (c == 0) { 2312 lzh_br_consume(br, ds->pt.bitlen[c]); 2313 ds->lt.bitlen[i++] = 0; 2314 } else { 2315 /* c == 1 or c == 2 */ 2316 int n = (c == 1)?4:9; 2317 if (!lzh_br_read_ahead(strm, br, 2318 ds->pt.bitlen[c] + n)) { 2319 if (last) /* Truncated data. */ 2320 goto failed; 2321 ds->loop = i; 2322 ds->state = ST_RD_LITERAL_3; 2323 return (ARCHIVE_OK); 2324 } 2325 lzh_br_consume(br, ds->pt.bitlen[c]); 2326 c = lzh_br_bits(br, n); 2327 lzh_br_consume(br, n); 2328 c += (n == 4)?3:20; 2329 if (i + c > ds->lt.len_avail) 2330 goto failed;/* Invalid data */ 2331 memset(&(ds->lt.bitlen[i]), 0, c); 2332 i += c; 2333 } 2334 } 2335 if (i > ds->lt.len_avail || 2336 !lzh_make_huffman_table(&(ds->lt))) 2337 goto failed;/* Invalid data */ 2338 /* FALL THROUGH */ 2339 case ST_RD_POS_DATA_1: 2340 /* 2341 * Read a position table compressed in huffman 2342 * coding. 2343 */ 2344 ds->pt.len_size = ds->pos_pt_len_size; 2345 ds->pt.len_bits = ds->pos_pt_len_bits; 2346 ds->reading_position = 1; 2347 ds->state = ST_RD_PT_1; 2348 break; 2349 case ST_GET_LITERAL: 2350 return (100); 2351 } 2352 } 2353 failed: 2354 return (ds->error = ARCHIVE_FAILED); 2355 } 2356 2357 static int 2358 lzh_decode_blocks(struct lzh_stream *strm, int last) 2359 { 2360 struct lzh_dec *ds = strm->ds; 2361 struct lzh_br bre = ds->br; 2362 struct huffman *lt = &(ds->lt); 2363 struct huffman *pt = &(ds->pt); 2364 unsigned char *w_buff = ds->w_buff; 2365 unsigned char *lt_bitlen = lt->bitlen; 2366 unsigned char *pt_bitlen = pt->bitlen; 2367 int blocks_avail = ds->blocks_avail, c = 0; 2368 int copy_len = ds->copy_len, copy_pos = ds->copy_pos; 2369 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size; 2370 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits; 2371 int state = ds->state; 2372 2373 for (;;) { 2374 switch (state) { 2375 case ST_GET_LITERAL: 2376 for (;;) { 2377 if (blocks_avail == 0) { 2378 /* We have decoded all blocks. 2379 * Let's handle next blocks. */ 2380 ds->state = ST_RD_BLOCK; 2381 ds->br = bre; 2382 ds->blocks_avail = 0; 2383 ds->w_pos = w_pos; 2384 ds->copy_pos = 0; 2385 return (100); 2386 } 2387 2388 /* lzh_br_read_ahead() always try to fill the 2389 * cache buffer up. In specific situation we 2390 * are close to the end of the data, the cache 2391 * buffer will not be full and thus we have to 2392 * determine if the cache buffer has some bits 2393 * as much as we need after lzh_br_read_ahead() 2394 * failed. */ 2395 if (!lzh_br_read_ahead(strm, &bre, 2396 lt_max_bits)) { 2397 if (!last) 2398 goto next_data; 2399 /* Remaining bits are less than 2400 * maximum bits(lt.max_bits) but maybe 2401 * it still remains as much as we need, 2402 * so we should try to use it with 2403 * dummy bits. */ 2404 c = lzh_decode_huffman(lt, 2405 lzh_br_bits_forced(&bre, 2406 lt_max_bits)); 2407 lzh_br_consume(&bre, lt_bitlen[c]); 2408 if (!lzh_br_has(&bre, 0)) 2409 goto failed;/* Over read. */ 2410 } else { 2411 c = lzh_decode_huffman(lt, 2412 lzh_br_bits(&bre, lt_max_bits)); 2413 lzh_br_consume(&bre, lt_bitlen[c]); 2414 } 2415 blocks_avail--; 2416 if (c > UCHAR_MAX) 2417 /* Current block is a match data. */ 2418 break; 2419 /* 2420 * 'c' is exactly a literal code. 2421 */ 2422 /* Save a decoded code to reference it 2423 * afterward. */ 2424 w_buff[w_pos] = c; 2425 if (++w_pos >= w_size) { 2426 w_pos = 0; 2427 lzh_emit_window(strm, w_size); 2428 goto next_data; 2429 } 2430 } 2431 /* 'c' is the length of a match pattern we have 2432 * already extracted, which has be stored in 2433 * window(ds->w_buff). */ 2434 copy_len = c - (UCHAR_MAX + 1) + MINMATCH; 2435 /* FALL THROUGH */ 2436 case ST_GET_POS_1: 2437 /* 2438 * Get a reference position. 2439 */ 2440 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) { 2441 if (!last) { 2442 state = ST_GET_POS_1; 2443 ds->copy_len = copy_len; 2444 goto next_data; 2445 } 2446 copy_pos = lzh_decode_huffman(pt, 2447 lzh_br_bits_forced(&bre, pt_max_bits)); 2448 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2449 if (!lzh_br_has(&bre, 0)) 2450 goto failed;/* Over read. */ 2451 } else { 2452 copy_pos = lzh_decode_huffman(pt, 2453 lzh_br_bits(&bre, pt_max_bits)); 2454 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2455 } 2456 /* FALL THROUGH */ 2457 case ST_GET_POS_2: 2458 if (copy_pos > 1) { 2459 /* We need an additional adjustment number to 2460 * the position. */ 2461 int p = copy_pos - 1; 2462 if (!lzh_br_read_ahead(strm, &bre, p)) { 2463 if (last) 2464 goto failed;/* Truncated data.*/ 2465 state = ST_GET_POS_2; 2466 ds->copy_len = copy_len; 2467 ds->copy_pos = copy_pos; 2468 goto next_data; 2469 } 2470 copy_pos = (1 << p) + lzh_br_bits(&bre, p); 2471 lzh_br_consume(&bre, p); 2472 } 2473 /* The position is actually a distance from the last 2474 * code we had extracted and thus we have to convert 2475 * it to a position of the window. */ 2476 copy_pos = (w_pos - copy_pos - 1) & w_mask; 2477 /* FALL THROUGH */ 2478 case ST_COPY_DATA: 2479 /* 2480 * Copy `copy_len' bytes as extracted data from 2481 * the window into the output buffer. 2482 */ 2483 for (;;) { 2484 int l; 2485 2486 l = copy_len; 2487 if (copy_pos > w_pos) { 2488 if (l > w_size - copy_pos) 2489 l = w_size - copy_pos; 2490 } else { 2491 if (l > w_size - w_pos) 2492 l = w_size - w_pos; 2493 } 2494 if ((copy_pos + l < w_pos) 2495 || (w_pos + l < copy_pos)) { 2496 /* No overlap. */ 2497 memcpy(w_buff + w_pos, 2498 w_buff + copy_pos, l); 2499 } else { 2500 const unsigned char *s; 2501 unsigned char *d; 2502 int li; 2503 2504 d = w_buff + w_pos; 2505 s = w_buff + copy_pos; 2506 for (li = 0; li < l-1;) { 2507 d[li] = s[li];li++; 2508 d[li] = s[li];li++; 2509 } 2510 if (li < l) 2511 d[li] = s[li]; 2512 } 2513 w_pos += l; 2514 if (w_pos == w_size) { 2515 w_pos = 0; 2516 lzh_emit_window(strm, w_size); 2517 if (copy_len <= l) 2518 state = ST_GET_LITERAL; 2519 else { 2520 state = ST_COPY_DATA; 2521 ds->copy_len = copy_len - l; 2522 ds->copy_pos = 2523 (copy_pos + l) & w_mask; 2524 } 2525 goto next_data; 2526 } 2527 if (copy_len <= l) 2528 /* A copy of current pattern ended. */ 2529 break; 2530 copy_len -= l; 2531 copy_pos = (copy_pos + l) & w_mask; 2532 } 2533 state = ST_GET_LITERAL; 2534 break; 2535 } 2536 } 2537 failed: 2538 return (ds->error = ARCHIVE_FAILED); 2539 next_data: 2540 ds->br = bre; 2541 ds->blocks_avail = blocks_avail; 2542 ds->state = state; 2543 ds->w_pos = w_pos; 2544 return (ARCHIVE_OK); 2545 } 2546 2547 static int 2548 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits) 2549 { 2550 int bits; 2551 2552 if (hf->bitlen == NULL) { 2553 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0])); 2554 if (hf->bitlen == NULL) 2555 return (ARCHIVE_FATAL); 2556 } 2557 if (hf->tbl == NULL) { 2558 if (tbl_bits < HTBL_BITS) 2559 bits = tbl_bits; 2560 else 2561 bits = HTBL_BITS; 2562 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0])); 2563 if (hf->tbl == NULL) 2564 return (ARCHIVE_FATAL); 2565 } 2566 if (hf->tree == NULL && tbl_bits > HTBL_BITS) { 2567 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4); 2568 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0])); 2569 if (hf->tree == NULL) 2570 return (ARCHIVE_FATAL); 2571 } 2572 hf->len_size = (int)len_size; 2573 hf->tbl_bits = tbl_bits; 2574 return (ARCHIVE_OK); 2575 } 2576 2577 static void 2578 lzh_huffman_free(struct huffman *hf) 2579 { 2580 free(hf->bitlen); 2581 free(hf->tbl); 2582 free(hf->tree); 2583 } 2584 2585 static const char bitlen_tbl[0x400] = { 2586 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2587 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2588 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2589 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2590 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2591 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2592 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2593 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2594 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2595 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2596 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2597 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2598 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2599 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2600 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2601 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2602 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2603 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2604 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2605 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2606 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2607 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2608 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2609 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2610 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2611 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2612 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2613 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2614 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2615 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2616 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2617 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2618 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2619 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2620 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2621 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2622 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2623 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2624 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2625 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2626 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2627 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2628 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2629 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2630 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2631 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2632 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2633 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2634 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2635 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2636 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2637 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2638 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2639 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2640 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2641 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2642 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2643 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2644 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2645 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2646 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2647 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2648 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2649 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0 2650 }; 2651 static int 2652 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end) 2653 { 2654 struct lzh_dec *ds = strm->ds; 2655 struct lzh_br *br = &(ds->br); 2656 int c, i; 2657 2658 for (i = start; i < end; ) { 2659 /* 2660 * bit pattern the number we need 2661 * 000 -> 0 2662 * 001 -> 1 2663 * 010 -> 2 2664 * ... 2665 * 110 -> 6 2666 * 1110 -> 7 2667 * 11110 -> 8 2668 * ... 2669 * 1111111111110 -> 16 2670 */ 2671 if (!lzh_br_read_ahead(strm, br, 3)) 2672 return (i); 2673 if ((c = lzh_br_bits(br, 3)) == 7) { 2674 if (!lzh_br_read_ahead(strm, br, 13)) 2675 return (i); 2676 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF]; 2677 if (c) 2678 lzh_br_consume(br, c - 3); 2679 else 2680 return (-1);/* Invalid data. */ 2681 } else 2682 lzh_br_consume(br, 3); 2683 ds->pt.bitlen[i++] = c; 2684 ds->pt.freq[c]++; 2685 } 2686 return (i); 2687 } 2688 2689 static int 2690 lzh_make_fake_table(struct huffman *hf, uint16_t c) 2691 { 2692 if (c >= hf->len_size) 2693 return (0); 2694 hf->tbl[0] = c; 2695 hf->max_bits = 0; 2696 hf->shift_bits = 0; 2697 hf->bitlen[hf->tbl[0]] = 0; 2698 return (1); 2699 } 2700 2701 /* 2702 * Make a huffman coding table. 2703 */ 2704 static int 2705 lzh_make_huffman_table(struct huffman *hf) 2706 { 2707 uint16_t *tbl; 2708 const unsigned char *bitlen; 2709 int bitptn[17], weight[17]; 2710 int i, maxbits = 0, ptn, tbl_size, w; 2711 int diffbits, len_avail; 2712 2713 /* 2714 * Initialize bit patterns. 2715 */ 2716 ptn = 0; 2717 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) { 2718 bitptn[i] = ptn; 2719 weight[i] = w; 2720 if (hf->freq[i]) { 2721 ptn += hf->freq[i] * w; 2722 maxbits = i; 2723 } 2724 } 2725 if (ptn != 0x10000 || maxbits > hf->tbl_bits) 2726 return (0);/* Invalid */ 2727 2728 hf->max_bits = maxbits; 2729 2730 /* 2731 * Cut out extra bits which we won't house in the table. 2732 * This preparation reduces the same calculation in the for-loop 2733 * making the table. 2734 */ 2735 if (maxbits < 16) { 2736 int ebits = 16 - maxbits; 2737 for (i = 1; i <= maxbits; i++) { 2738 bitptn[i] >>= ebits; 2739 weight[i] >>= ebits; 2740 } 2741 } 2742 if (maxbits > HTBL_BITS) { 2743 unsigned htbl_max; 2744 uint16_t *p; 2745 2746 diffbits = maxbits - HTBL_BITS; 2747 for (i = 1; i <= HTBL_BITS; i++) { 2748 bitptn[i] >>= diffbits; 2749 weight[i] >>= diffbits; 2750 } 2751 htbl_max = bitptn[HTBL_BITS] + 2752 weight[HTBL_BITS] * hf->freq[HTBL_BITS]; 2753 p = &(hf->tbl[htbl_max]); 2754 while (p < &hf->tbl[1U<<HTBL_BITS]) 2755 *p++ = 0; 2756 } else 2757 diffbits = 0; 2758 hf->shift_bits = diffbits; 2759 2760 /* 2761 * Make the table. 2762 */ 2763 tbl_size = 1 << HTBL_BITS; 2764 tbl = hf->tbl; 2765 bitlen = hf->bitlen; 2766 len_avail = hf->len_avail; 2767 hf->tree_used = 0; 2768 for (i = 0; i < len_avail; i++) { 2769 uint16_t *p; 2770 int len, cnt; 2771 uint16_t bit; 2772 int extlen; 2773 struct htree_t *ht; 2774 2775 if (bitlen[i] == 0) 2776 continue; 2777 /* Get a bit pattern */ 2778 len = bitlen[i]; 2779 ptn = bitptn[len]; 2780 cnt = weight[len]; 2781 if (len <= HTBL_BITS) { 2782 /* Calculate next bit pattern */ 2783 if ((bitptn[len] = ptn + cnt) > tbl_size) 2784 return (0);/* Invalid */ 2785 /* Update the table */ 2786 p = &(tbl[ptn]); 2787 if (cnt > 7) { 2788 uint16_t *pc; 2789 2790 cnt -= 8; 2791 pc = &p[cnt]; 2792 pc[0] = (uint16_t)i; 2793 pc[1] = (uint16_t)i; 2794 pc[2] = (uint16_t)i; 2795 pc[3] = (uint16_t)i; 2796 pc[4] = (uint16_t)i; 2797 pc[5] = (uint16_t)i; 2798 pc[6] = (uint16_t)i; 2799 pc[7] = (uint16_t)i; 2800 if (cnt > 7) { 2801 cnt -= 8; 2802 memcpy(&p[cnt], pc, 2803 8 * sizeof(uint16_t)); 2804 pc = &p[cnt]; 2805 while (cnt > 15) { 2806 cnt -= 16; 2807 memcpy(&p[cnt], pc, 2808 16 * sizeof(uint16_t)); 2809 } 2810 } 2811 if (cnt) 2812 memcpy(p, pc, cnt * sizeof(uint16_t)); 2813 } else { 2814 while (cnt > 1) { 2815 p[--cnt] = (uint16_t)i; 2816 p[--cnt] = (uint16_t)i; 2817 } 2818 if (cnt) 2819 p[--cnt] = (uint16_t)i; 2820 } 2821 continue; 2822 } 2823 2824 /* 2825 * A bit length is too big to be housed to a direct table, 2826 * so we use a tree model for its extra bits. 2827 */ 2828 bitptn[len] = ptn + cnt; 2829 bit = 1U << (diffbits -1); 2830 extlen = len - HTBL_BITS; 2831 2832 p = &(tbl[ptn >> diffbits]); 2833 if (*p == 0) { 2834 *p = len_avail + hf->tree_used; 2835 ht = &(hf->tree[hf->tree_used++]); 2836 if (hf->tree_used > hf->tree_avail) 2837 return (0);/* Invalid */ 2838 ht->left = 0; 2839 ht->right = 0; 2840 } else { 2841 if (*p < len_avail || 2842 *p >= (len_avail + hf->tree_used)) 2843 return (0);/* Invalid */ 2844 ht = &(hf->tree[*p - len_avail]); 2845 } 2846 while (--extlen > 0) { 2847 if (ptn & bit) { 2848 if (ht->left < len_avail) { 2849 ht->left = len_avail + hf->tree_used; 2850 ht = &(hf->tree[hf->tree_used++]); 2851 if (hf->tree_used > hf->tree_avail) 2852 return (0);/* Invalid */ 2853 ht->left = 0; 2854 ht->right = 0; 2855 } else { 2856 ht = &(hf->tree[ht->left - len_avail]); 2857 } 2858 } else { 2859 if (ht->right < len_avail) { 2860 ht->right = len_avail + hf->tree_used; 2861 ht = &(hf->tree[hf->tree_used++]); 2862 if (hf->tree_used > hf->tree_avail) 2863 return (0);/* Invalid */ 2864 ht->left = 0; 2865 ht->right = 0; 2866 } else { 2867 ht = &(hf->tree[ht->right - len_avail]); 2868 } 2869 } 2870 bit >>= 1; 2871 } 2872 if (ptn & bit) { 2873 if (ht->left != 0) 2874 return (0);/* Invalid */ 2875 ht->left = (uint16_t)i; 2876 } else { 2877 if (ht->right != 0) 2878 return (0);/* Invalid */ 2879 ht->right = (uint16_t)i; 2880 } 2881 } 2882 return (1); 2883 } 2884 2885 static int 2886 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c) 2887 { 2888 struct htree_t *ht; 2889 int extlen; 2890 2891 ht = hf->tree; 2892 extlen = hf->shift_bits; 2893 while (c >= hf->len_avail) { 2894 c -= hf->len_avail; 2895 if (extlen-- <= 0 || c >= hf->tree_used) 2896 return (0); 2897 if (rbits & (1U << extlen)) 2898 c = ht[c].left; 2899 else 2900 c = ht[c].right; 2901 } 2902 return (c); 2903 } 2904 2905 static inline int 2906 lzh_decode_huffman(struct huffman *hf, unsigned rbits) 2907 { 2908 int c; 2909 /* 2910 * At first search an index table for a bit pattern. 2911 * If it fails, search a huffman tree for. 2912 */ 2913 c = hf->tbl[rbits >> hf->shift_bits]; 2914 if (c < hf->len_avail || hf->len_avail == 0) 2915 return (c); 2916 /* This bit pattern needs to be found out at a huffman tree. */ 2917 return (lzh_decode_huffman_tree(hf, rbits, c)); 2918 } 2919 2920