1 /*- 2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #include "archive_platform.h" 27 28 #ifdef HAVE_ERRNO_H 29 #include <errno.h> 30 #endif 31 #ifdef HAVE_LIMITS_H 32 #include <limits.h> 33 #endif 34 #ifdef HAVE_STDLIB_H 35 #include <stdlib.h> 36 #endif 37 #ifdef HAVE_STRING_H 38 #include <string.h> 39 #endif 40 41 #include "archive.h" 42 #include "archive_entry.h" 43 #include "archive_entry_locale.h" 44 #include "archive_private.h" 45 #include "archive_read_private.h" 46 #include "archive_endian.h" 47 48 49 #define MAXMATCH 256 /* Maximum match length. */ 50 #define MINMATCH 3 /* Minimum match length. */ 51 /* 52 * Literal table format: 53 * +0 +256 +510 54 * +---------------+-------------------------+ 55 * | literal code | match length | 56 * | 0 ... 255 | MINMATCH ... MAXMATCH | 57 * +---------------+-------------------------+ 58 * <--- LT_BITLEN_SIZE ---> 59 */ 60 /* Literal table size. */ 61 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1) 62 /* Position table size. 63 * Note: this used for both position table and pre literal table.*/ 64 #define PT_BITLEN_SIZE (3 + 16) 65 66 struct lzh_dec { 67 /* Decoding status. */ 68 int state; 69 70 /* 71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded 72 * data. 73 */ 74 int w_size; 75 int w_mask; 76 /* Window buffer, which is a loop buffer. */ 77 unsigned char *w_buff; 78 /* The insert position to the window. */ 79 int w_pos; 80 /* The position where we can copy decoded code from the window. */ 81 int copy_pos; 82 /* The length how many bytes we can copy decoded code from 83 * the window. */ 84 int copy_len; 85 86 /* 87 * Bit stream reader. 88 */ 89 struct lzh_br { 90 #define CACHE_TYPE uint64_t 91 #define CACHE_BITS (8 * sizeof(CACHE_TYPE)) 92 /* Cache buffer. */ 93 CACHE_TYPE cache_buffer; 94 /* Indicates how many bits avail in cache_buffer. */ 95 int cache_avail; 96 } br; 97 98 /* 99 * Huffman coding. 100 */ 101 struct huffman { 102 int len_size; 103 int len_avail; 104 int len_bits; 105 int freq[17]; 106 unsigned char *bitlen; 107 108 /* 109 * Use a index table. It's faster than searching a huffman 110 * coding tree, which is a binary tree. But a use of a large 111 * index table causes L1 cache read miss many times. 112 */ 113 #define HTBL_BITS 10 114 int max_bits; 115 int shift_bits; 116 int tbl_bits; 117 int tree_used; 118 int tree_avail; 119 /* Direct access table. */ 120 uint16_t *tbl; 121 /* Binary tree table for extra bits over the direct access. */ 122 struct htree_t { 123 uint16_t left; 124 uint16_t right; 125 } *tree; 126 } lt, pt; 127 128 int blocks_avail; 129 int pos_pt_len_size; 130 int pos_pt_len_bits; 131 int literal_pt_len_size; 132 int literal_pt_len_bits; 133 int reading_position; 134 int loop; 135 int error; 136 }; 137 138 struct lzh_stream { 139 const unsigned char *next_in; 140 int avail_in; 141 int64_t total_in; 142 const unsigned char *ref_ptr; 143 int avail_out; 144 int64_t total_out; 145 struct lzh_dec *ds; 146 }; 147 148 struct lha { 149 /* entry_bytes_remaining is the number of bytes we expect. */ 150 int64_t entry_offset; 151 int64_t entry_bytes_remaining; 152 int64_t entry_unconsumed; 153 uint16_t entry_crc_calculated; 154 155 size_t header_size; /* header size */ 156 unsigned char level; /* header level */ 157 char method[3]; /* compress type */ 158 int64_t compsize; /* compressed data size */ 159 int64_t origsize; /* original file size */ 160 int setflag; 161 #define BIRTHTIME_IS_SET 1 162 #define ATIME_IS_SET 2 163 #define UNIX_MODE_IS_SET 4 164 #define CRC_IS_SET 8 165 time_t birthtime; 166 long birthtime_tv_nsec; 167 time_t mtime; 168 long mtime_tv_nsec; 169 time_t atime; 170 long atime_tv_nsec; 171 mode_t mode; 172 int64_t uid; 173 int64_t gid; 174 struct archive_string uname; 175 struct archive_string gname; 176 uint16_t header_crc; 177 uint16_t crc; 178 struct archive_string_conv *sconv; 179 struct archive_string_conv *opt_sconv; 180 181 struct archive_string dirname; 182 struct archive_string filename; 183 struct archive_wstring ws; 184 185 unsigned char dos_attr; 186 187 /* Flag to mark progress that an archive was read their first header.*/ 188 char found_first_header; 189 /* Flag to mark that indicates an empty directory. */ 190 char directory; 191 192 /* Flags to mark progress of decompression. */ 193 char decompress_init; 194 char end_of_entry; 195 char end_of_entry_cleanup; 196 char entry_is_compressed; 197 198 char format_name[64]; 199 200 struct lzh_stream strm; 201 }; 202 203 /* 204 * LHA header common member offset. 205 */ 206 #define H_METHOD_OFFSET 2 /* Compress type. */ 207 #define H_ATTR_OFFSET 19 /* DOS attribute. */ 208 #define H_LEVEL_OFFSET 20 /* Header Level. */ 209 #define H_SIZE 22 /* Minimum header size. */ 210 211 static int archive_read_format_lha_bid(struct archive_read *, int); 212 static int archive_read_format_lha_options(struct archive_read *, 213 const char *, const char *); 214 static int archive_read_format_lha_read_header(struct archive_read *, 215 struct archive_entry *); 216 static int archive_read_format_lha_read_data(struct archive_read *, 217 const void **, size_t *, int64_t *); 218 static int archive_read_format_lha_read_data_skip(struct archive_read *); 219 static int archive_read_format_lha_cleanup(struct archive_read *); 220 221 static void lha_replace_path_separator(struct lha *, 222 struct archive_entry *); 223 static int lha_read_file_header_0(struct archive_read *, struct lha *); 224 static int lha_read_file_header_1(struct archive_read *, struct lha *); 225 static int lha_read_file_header_2(struct archive_read *, struct lha *); 226 static int lha_read_file_header_3(struct archive_read *, struct lha *); 227 static int lha_read_file_extended_header(struct archive_read *, 228 struct lha *, uint16_t *, int, size_t, size_t *); 229 static size_t lha_check_header_format(const void *); 230 static int lha_skip_sfx(struct archive_read *); 231 static time_t lha_dos_time(const unsigned char *); 232 static time_t lha_win_time(uint64_t, long *); 233 static unsigned char lha_calcsum(unsigned char, const void *, 234 int, size_t); 235 static int lha_parse_linkname(struct archive_string *, 236 struct archive_string *); 237 static int lha_read_data_none(struct archive_read *, const void **, 238 size_t *, int64_t *); 239 static int lha_read_data_lzh(struct archive_read *, const void **, 240 size_t *, int64_t *); 241 static void lha_crc16_init(void); 242 static uint16_t lha_crc16(uint16_t, const void *, size_t); 243 static int lzh_decode_init(struct lzh_stream *, const char *); 244 static void lzh_decode_free(struct lzh_stream *); 245 static int lzh_decode(struct lzh_stream *, int); 246 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *); 247 static int lzh_huffman_init(struct huffman *, size_t, int); 248 static void lzh_huffman_free(struct huffman *); 249 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end); 250 static int lzh_make_fake_table(struct huffman *, uint16_t); 251 static int lzh_make_huffman_table(struct huffman *); 252 static inline int lzh_decode_huffman(struct huffman *, unsigned); 253 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int); 254 255 256 int 257 archive_read_support_format_lha(struct archive *_a) 258 { 259 struct archive_read *a = (struct archive_read *)_a; 260 struct lha *lha; 261 int r; 262 263 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 264 ARCHIVE_STATE_NEW, "archive_read_support_format_lha"); 265 266 lha = (struct lha *)calloc(1, sizeof(*lha)); 267 if (lha == NULL) { 268 archive_set_error(&a->archive, ENOMEM, 269 "Can't allocate lha data"); 270 return (ARCHIVE_FATAL); 271 } 272 archive_string_init(&lha->ws); 273 274 r = __archive_read_register_format(a, 275 lha, 276 "lha", 277 archive_read_format_lha_bid, 278 archive_read_format_lha_options, 279 archive_read_format_lha_read_header, 280 archive_read_format_lha_read_data, 281 archive_read_format_lha_read_data_skip, 282 NULL, 283 archive_read_format_lha_cleanup, 284 NULL, 285 NULL); 286 287 if (r != ARCHIVE_OK) 288 free(lha); 289 return (ARCHIVE_OK); 290 } 291 292 static size_t 293 lha_check_header_format(const void *h) 294 { 295 const unsigned char *p = h; 296 size_t next_skip_bytes; 297 298 switch (p[H_METHOD_OFFSET+3]) { 299 /* 300 * "-lh0-" ... "-lh7-" "-lhd-" 301 * "-lzs-" "-lz5-" 302 */ 303 case '0': case '1': case '2': case '3': 304 case '4': case '5': case '6': case '7': 305 case 'd': 306 case 's': 307 next_skip_bytes = 4; 308 309 /* b0 == 0 means the end of an LHa archive file. */ 310 if (p[0] == 0) 311 break; 312 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l' 313 || p[H_METHOD_OFFSET+4] != '-') 314 break; 315 316 if (p[H_METHOD_OFFSET+2] == 'h') { 317 /* "-lh?-" */ 318 if (p[H_METHOD_OFFSET+3] == 's') 319 break; 320 if (p[H_LEVEL_OFFSET] == 0) 321 return (0); 322 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20) 323 return (0); 324 } 325 if (p[H_METHOD_OFFSET+2] == 'z') { 326 /* LArc extensions: -lzs-,-lz4- and -lz5- */ 327 if (p[H_LEVEL_OFFSET] != 0) 328 break; 329 if (p[H_METHOD_OFFSET+3] == 's' 330 || p[H_METHOD_OFFSET+3] == '4' 331 || p[H_METHOD_OFFSET+3] == '5') 332 return (0); 333 } 334 break; 335 case 'h': next_skip_bytes = 1; break; 336 case 'z': next_skip_bytes = 1; break; 337 case 'l': next_skip_bytes = 2; break; 338 case '-': next_skip_bytes = 3; break; 339 default : next_skip_bytes = 4; break; 340 } 341 342 return (next_skip_bytes); 343 } 344 345 static int 346 archive_read_format_lha_bid(struct archive_read *a, int best_bid) 347 { 348 const char *p; 349 const void *buff; 350 ssize_t bytes_avail, offset, window; 351 size_t next; 352 353 /* If there's already a better bid than we can ever 354 make, don't bother testing. */ 355 if (best_bid > 30) 356 return (-1); 357 358 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) 359 return (-1); 360 361 if (lha_check_header_format(p) == 0) 362 return (30); 363 364 if (p[0] == 'M' && p[1] == 'Z') { 365 /* PE file */ 366 offset = 0; 367 window = 4096; 368 while (offset < (1024 * 20)) { 369 buff = __archive_read_ahead(a, offset + window, 370 &bytes_avail); 371 if (buff == NULL) { 372 /* Remaining bytes are less than window. */ 373 window >>= 1; 374 if (window < (H_SIZE + 3)) 375 return (0); 376 continue; 377 } 378 p = (const char *)buff + offset; 379 while (p + H_SIZE < (const char *)buff + bytes_avail) { 380 if ((next = lha_check_header_format(p)) == 0) 381 return (30); 382 p += next; 383 } 384 offset = p - (const char *)buff; 385 } 386 } 387 return (0); 388 } 389 390 static int 391 archive_read_format_lha_options(struct archive_read *a, 392 const char *key, const char *val) 393 { 394 struct lha *lha; 395 int ret = ARCHIVE_FAILED; 396 397 lha = (struct lha *)(a->format->data); 398 if (strcmp(key, "hdrcharset") == 0) { 399 if (val == NULL || val[0] == 0) 400 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 401 "lha: hdrcharset option needs a character-set name"); 402 else { 403 lha->opt_sconv = 404 archive_string_conversion_from_charset( 405 &a->archive, val, 0); 406 if (lha->opt_sconv != NULL) 407 ret = ARCHIVE_OK; 408 else 409 ret = ARCHIVE_FATAL; 410 } 411 return (ret); 412 } 413 414 /* Note: The "warn" return is just to inform the options 415 * supervisor that we didn't handle it. It will generate 416 * a suitable error if no one used this option. */ 417 return (ARCHIVE_WARN); 418 } 419 420 static int 421 lha_skip_sfx(struct archive_read *a) 422 { 423 const void *h; 424 const char *p, *q; 425 size_t next, skip; 426 ssize_t bytes, window; 427 428 window = 4096; 429 for (;;) { 430 h = __archive_read_ahead(a, window, &bytes); 431 if (h == NULL) { 432 /* Remaining bytes are less than window. */ 433 window >>= 1; 434 if (window < (H_SIZE + 3)) 435 goto fatal; 436 continue; 437 } 438 if (bytes < H_SIZE) 439 goto fatal; 440 p = h; 441 q = p + bytes; 442 443 /* 444 * Scan ahead until we find something that looks 445 * like the lha header. 446 */ 447 while (p + H_SIZE < q) { 448 if ((next = lha_check_header_format(p)) == 0) { 449 skip = p - (const char *)h; 450 __archive_read_consume(a, skip); 451 return (ARCHIVE_OK); 452 } 453 p += next; 454 } 455 skip = p - (const char *)h; 456 __archive_read_consume(a, skip); 457 } 458 fatal: 459 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 460 "Couldn't find out LHa header"); 461 return (ARCHIVE_FATAL); 462 } 463 464 static int 465 truncated_error(struct archive_read *a) 466 { 467 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 468 "Truncated LHa header"); 469 return (ARCHIVE_FATAL); 470 } 471 472 static int 473 archive_read_format_lha_read_header(struct archive_read *a, 474 struct archive_entry *entry) 475 { 476 struct archive_string linkname; 477 struct archive_string pathname; 478 struct lha *lha; 479 const unsigned char *p; 480 const char *signature; 481 int err; 482 483 lha_crc16_init(); 484 485 a->archive.archive_format = ARCHIVE_FORMAT_LHA; 486 if (a->archive.archive_format_name == NULL) 487 a->archive.archive_format_name = "lha"; 488 489 lha = (struct lha *)(a->format->data); 490 lha->decompress_init = 0; 491 lha->end_of_entry = 0; 492 lha->end_of_entry_cleanup = 0; 493 lha->entry_unconsumed = 0; 494 495 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) { 496 /* 497 * LHa archiver added 0 to the tail of its archive file as 498 * the mark of the end of the archive. 499 */ 500 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL); 501 if (signature == NULL || signature[0] == 0) 502 return (ARCHIVE_EOF); 503 return (truncated_error(a)); 504 } 505 506 signature = (const char *)p; 507 if (lha->found_first_header == 0 && 508 signature[0] == 'M' && signature[1] == 'Z') { 509 /* This is an executable? Must be self-extracting... */ 510 err = lha_skip_sfx(a); 511 if (err < ARCHIVE_WARN) 512 return (err); 513 514 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL) 515 return (truncated_error(a)); 516 signature = (const char *)p; 517 } 518 /* signature[0] == 0 means the end of an LHa archive file. */ 519 if (signature[0] == 0) 520 return (ARCHIVE_EOF); 521 522 /* 523 * Check the header format and method type. 524 */ 525 if (lha_check_header_format(p) != 0) { 526 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 527 "Bad LHa file"); 528 return (ARCHIVE_FATAL); 529 } 530 531 /* We've found the first header. */ 532 lha->found_first_header = 1; 533 /* Set a default value and common data */ 534 lha->header_size = 0; 535 lha->level = p[H_LEVEL_OFFSET]; 536 lha->method[0] = p[H_METHOD_OFFSET+1]; 537 lha->method[1] = p[H_METHOD_OFFSET+2]; 538 lha->method[2] = p[H_METHOD_OFFSET+3]; 539 if (memcmp(lha->method, "lhd", 3) == 0) 540 lha->directory = 1; 541 else 542 lha->directory = 0; 543 if (memcmp(lha->method, "lh0", 3) == 0 || 544 memcmp(lha->method, "lz4", 3) == 0) 545 lha->entry_is_compressed = 0; 546 else 547 lha->entry_is_compressed = 1; 548 549 lha->compsize = 0; 550 lha->origsize = 0; 551 lha->setflag = 0; 552 lha->birthtime = 0; 553 lha->birthtime_tv_nsec = 0; 554 lha->mtime = 0; 555 lha->mtime_tv_nsec = 0; 556 lha->atime = 0; 557 lha->atime_tv_nsec = 0; 558 lha->mode = (lha->directory)? 0777 : 0666; 559 lha->uid = 0; 560 lha->gid = 0; 561 archive_string_empty(&lha->dirname); 562 archive_string_empty(&lha->filename); 563 lha->dos_attr = 0; 564 if (lha->opt_sconv != NULL) 565 lha->sconv = lha->opt_sconv; 566 else 567 lha->sconv = NULL; 568 569 switch (p[H_LEVEL_OFFSET]) { 570 case 0: 571 err = lha_read_file_header_0(a, lha); 572 break; 573 case 1: 574 err = lha_read_file_header_1(a, lha); 575 break; 576 case 2: 577 err = lha_read_file_header_2(a, lha); 578 break; 579 case 3: 580 err = lha_read_file_header_3(a, lha); 581 break; 582 default: 583 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 584 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]); 585 err = ARCHIVE_FATAL; 586 break; 587 } 588 if (err < ARCHIVE_WARN) 589 return (err); 590 591 592 if (!lha->directory && archive_strlen(&lha->filename) == 0) 593 /* The filename has not been set */ 594 return (truncated_error(a)); 595 596 /* 597 * Make a pathname from a dirname and a filename. 598 */ 599 archive_string_concat(&lha->dirname, &lha->filename); 600 archive_string_init(&pathname); 601 archive_string_init(&linkname); 602 archive_string_copy(&pathname, &lha->dirname); 603 604 if ((lha->mode & AE_IFMT) == AE_IFLNK) { 605 /* 606 * Extract the symlink-name if it's included in the pathname. 607 */ 608 if (!lha_parse_linkname(&linkname, &pathname)) { 609 /* We couldn't get the symlink-name. */ 610 archive_set_error(&a->archive, 611 ARCHIVE_ERRNO_FILE_FORMAT, 612 "Unknown symlink-name"); 613 archive_string_free(&pathname); 614 archive_string_free(&linkname); 615 return (ARCHIVE_FAILED); 616 } 617 } else { 618 /* 619 * Make sure a file-type is set. 620 * The mode has been overridden if it is in the extended data. 621 */ 622 lha->mode = (lha->mode & ~AE_IFMT) | 623 ((lha->directory)? AE_IFDIR: AE_IFREG); 624 } 625 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 && 626 (lha->dos_attr & 1) != 0) 627 lha->mode &= ~(0222);/* read only. */ 628 629 /* 630 * Set basic file parameters. 631 */ 632 if (archive_entry_copy_pathname_l(entry, pathname.s, 633 pathname.length, lha->sconv) != 0) { 634 if (errno == ENOMEM) { 635 archive_set_error(&a->archive, ENOMEM, 636 "Can't allocate memory for Pathname"); 637 return (ARCHIVE_FATAL); 638 } 639 archive_set_error(&a->archive, 640 ARCHIVE_ERRNO_FILE_FORMAT, 641 "Pathname cannot be converted " 642 "from %s to current locale.", 643 archive_string_conversion_charset_name(lha->sconv)); 644 err = ARCHIVE_WARN; 645 } 646 archive_string_free(&pathname); 647 if (archive_strlen(&linkname) > 0) { 648 if (archive_entry_copy_symlink_l(entry, linkname.s, 649 linkname.length, lha->sconv) != 0) { 650 if (errno == ENOMEM) { 651 archive_set_error(&a->archive, ENOMEM, 652 "Can't allocate memory for Linkname"); 653 return (ARCHIVE_FATAL); 654 } 655 archive_set_error(&a->archive, 656 ARCHIVE_ERRNO_FILE_FORMAT, 657 "Linkname cannot be converted " 658 "from %s to current locale.", 659 archive_string_conversion_charset_name(lha->sconv)); 660 err = ARCHIVE_WARN; 661 } 662 } else 663 archive_entry_set_symlink(entry, NULL); 664 archive_string_free(&linkname); 665 /* 666 * When a header level is 0, there is a possibility that 667 * a pathname and a symlink has '\' character, a directory 668 * separator in DOS/Windows. So we should convert it to '/'. 669 */ 670 if (p[H_LEVEL_OFFSET] == 0) 671 lha_replace_path_separator(lha, entry); 672 673 archive_entry_set_mode(entry, lha->mode); 674 archive_entry_set_uid(entry, lha->uid); 675 archive_entry_set_gid(entry, lha->gid); 676 if (archive_strlen(&lha->uname) > 0) 677 archive_entry_set_uname(entry, lha->uname.s); 678 if (archive_strlen(&lha->gname) > 0) 679 archive_entry_set_gname(entry, lha->gname.s); 680 if (lha->setflag & BIRTHTIME_IS_SET) { 681 archive_entry_set_birthtime(entry, lha->birthtime, 682 lha->birthtime_tv_nsec); 683 archive_entry_set_ctime(entry, lha->birthtime, 684 lha->birthtime_tv_nsec); 685 } else { 686 archive_entry_unset_birthtime(entry); 687 archive_entry_unset_ctime(entry); 688 } 689 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec); 690 if (lha->setflag & ATIME_IS_SET) 691 archive_entry_set_atime(entry, lha->atime, 692 lha->atime_tv_nsec); 693 else 694 archive_entry_unset_atime(entry); 695 if (lha->directory || archive_entry_symlink(entry) != NULL) 696 archive_entry_unset_size(entry); 697 else 698 archive_entry_set_size(entry, lha->origsize); 699 700 /* 701 * Prepare variables used to read a file content. 702 */ 703 lha->entry_bytes_remaining = lha->compsize; 704 lha->entry_offset = 0; 705 lha->entry_crc_calculated = 0; 706 707 /* 708 * This file does not have a content. 709 */ 710 if (lha->directory || lha->compsize == 0) 711 lha->end_of_entry = 1; 712 713 sprintf(lha->format_name, "lha -%c%c%c-", 714 lha->method[0], lha->method[1], lha->method[2]); 715 a->archive.archive_format_name = lha->format_name; 716 717 return (err); 718 } 719 720 /* 721 * Replace a DOS path separator '\' by a character '/'. 722 * Some multi-byte character set have a character '\' in its second byte. 723 */ 724 static void 725 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry) 726 { 727 const wchar_t *wp; 728 size_t i; 729 730 if ((wp = archive_entry_pathname_w(entry)) != NULL) { 731 archive_wstrcpy(&(lha->ws), wp); 732 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 733 if (lha->ws.s[i] == L'\\') 734 lha->ws.s[i] = L'/'; 735 } 736 archive_entry_copy_pathname_w(entry, lha->ws.s); 737 } 738 739 if ((wp = archive_entry_symlink_w(entry)) != NULL) { 740 archive_wstrcpy(&(lha->ws), wp); 741 for (i = 0; i < archive_strlen(&(lha->ws)); i++) { 742 if (lha->ws.s[i] == L'\\') 743 lha->ws.s[i] = L'/'; 744 } 745 archive_entry_copy_symlink_w(entry, lha->ws.s); 746 } 747 } 748 749 /* 750 * Header 0 format 751 * 752 * +0 +1 +2 +7 +11 753 * +---------------+----------+----------------+-------------------+ 754 * |header size(*1)|header sum|compression type|compressed size(*2)| 755 * +---------------+----------+----------------+-------------------+ 756 * <---------------------(*1)----------* 757 * 758 * +11 +15 +17 +19 +20 +21 759 * +-----------------+---------+---------+--------------+----------------+ 760 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)| 761 * +-----------------+---------+---------+--------------+----------------+ 762 * *--------------------------------(*1)---------------------------------* 763 * 764 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4) 765 * +---------------+---------+----------+----------------+------------------+ 766 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data | 767 * +---------------+---------+----------+----------------+------------------+ 768 * <--(*3)-> <------(*2)------> 769 * *----------------------(*1)--------------------------> 770 * 771 */ 772 #define H0_HEADER_SIZE_OFFSET 0 773 #define H0_HEADER_SUM_OFFSET 1 774 #define H0_COMP_SIZE_OFFSET 7 775 #define H0_ORIG_SIZE_OFFSET 11 776 #define H0_DOS_TIME_OFFSET 15 777 #define H0_NAME_LEN_OFFSET 21 778 #define H0_FILE_NAME_OFFSET 22 779 #define H0_FIXED_SIZE 24 780 static int 781 lha_read_file_header_0(struct archive_read *a, struct lha *lha) 782 { 783 const unsigned char *p; 784 int extdsize, namelen; 785 unsigned char headersum, sum_calculated; 786 787 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL) 788 return (truncated_error(a)); 789 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2; 790 headersum = p[H0_HEADER_SUM_OFFSET]; 791 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET); 792 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET); 793 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET); 794 namelen = p[H0_NAME_LEN_OFFSET]; 795 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen; 796 if ((namelen > 221 || extdsize < 0) && extdsize != -2) { 797 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 798 "Invalid LHa header"); 799 return (ARCHIVE_FATAL); 800 } 801 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 802 return (truncated_error(a)); 803 804 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen); 805 /* When extdsize == -2, A CRC16 value is not present in the header. */ 806 if (extdsize >= 0) { 807 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen); 808 lha->setflag |= CRC_IS_SET; 809 } 810 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 811 812 /* Read an extended header */ 813 if (extdsize > 0) { 814 /* This extended data is set by 'LHa for UNIX' only. 815 * Maybe fixed size. 816 */ 817 p += H0_FILE_NAME_OFFSET + namelen + 2; 818 if (p[0] == 'U' && extdsize == 12) { 819 /* p[1] is a minor version. */ 820 lha->mtime = archive_le32dec(&p[2]); 821 lha->mode = archive_le16dec(&p[6]); 822 lha->uid = archive_le16dec(&p[8]); 823 lha->gid = archive_le16dec(&p[10]); 824 lha->setflag |= UNIX_MODE_IS_SET; 825 } 826 } 827 __archive_read_consume(a, lha->header_size); 828 829 if (sum_calculated != headersum) { 830 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 831 "LHa header sum error"); 832 return (ARCHIVE_FATAL); 833 } 834 835 return (ARCHIVE_OK); 836 } 837 838 /* 839 * Header 1 format 840 * 841 * +0 +1 +2 +7 +11 842 * +---------------+----------+----------------+-------------+ 843 * |header size(*1)|header sum|compression type|skip size(*2)| 844 * +---------------+----------+----------------+-------------+ 845 * <---------------(*1)----------* 846 * 847 * +11 +15 +17 +19 +20 +21 848 * +-----------------+---------+---------+--------------+----------------+ 849 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)| 850 * +-----------------+---------+---------+--------------+----------------+ 851 * *-------------------------------(*1)----------------------------------* 852 * 853 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4) 854 * +---------------+---------+----------+-----------+-----------+ 855 * |name length(*3)|file name|file CRC16| creator |padding(*4)| 856 * +---------------+---------+----------+-----------+-----------+ 857 * <--(*3)-> 858 * *----------------------------(*1)----------------------------* 859 * 860 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5) 861 * +----------------+---------------------+------------------------+ 862 * |next header size| extended header(*5) | compressed data | 863 * +----------------+---------------------+------------------------+ 864 * *------(*1)-----> <--------------------(*2)--------------------> 865 */ 866 #define H1_HEADER_SIZE_OFFSET 0 867 #define H1_HEADER_SUM_OFFSET 1 868 #define H1_COMP_SIZE_OFFSET 7 869 #define H1_ORIG_SIZE_OFFSET 11 870 #define H1_DOS_TIME_OFFSET 15 871 #define H1_NAME_LEN_OFFSET 21 872 #define H1_FILE_NAME_OFFSET 22 873 #define H1_FIXED_SIZE 27 874 static int 875 lha_read_file_header_1(struct archive_read *a, struct lha *lha) 876 { 877 const unsigned char *p; 878 size_t extdsize; 879 int i, err, err2; 880 int namelen, padding; 881 unsigned char headersum, sum_calculated; 882 883 err = ARCHIVE_OK; 884 885 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL) 886 return (truncated_error(a)); 887 888 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2; 889 headersum = p[H1_HEADER_SUM_OFFSET]; 890 /* Note: An extended header size is included in a compsize. */ 891 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET); 892 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET); 893 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET); 894 namelen = p[H1_NAME_LEN_OFFSET]; 895 /* Calculate a padding size. The result will be normally 0 only(?) */ 896 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen; 897 898 if (namelen > 230 || padding < 0) 899 goto invalid; 900 901 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL) 902 return (truncated_error(a)); 903 904 for (i = 0; i < namelen; i++) { 905 if (p[i + H1_FILE_NAME_OFFSET] == 0xff) 906 goto invalid;/* Invalid filename. */ 907 } 908 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen); 909 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen); 910 lha->setflag |= CRC_IS_SET; 911 912 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2); 913 /* Consume used bytes but not include `next header size' data 914 * since it will be consumed in lha_read_file_extended_header(). */ 915 __archive_read_consume(a, lha->header_size - 2); 916 917 /* Read extended headers */ 918 err2 = lha_read_file_extended_header(a, lha, NULL, 2, 919 (size_t)(lha->compsize + 2), &extdsize); 920 if (err2 < ARCHIVE_WARN) 921 return (err2); 922 if (err2 < err) 923 err = err2; 924 /* Get a real compressed file size. */ 925 lha->compsize -= extdsize - 2; 926 927 if (sum_calculated != headersum) { 928 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 929 "LHa header sum error"); 930 return (ARCHIVE_FATAL); 931 } 932 return (err); 933 invalid: 934 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 935 "Invalid LHa header"); 936 return (ARCHIVE_FATAL); 937 } 938 939 /* 940 * Header 2 format 941 * 942 * +0 +2 +7 +11 +15 943 * +---------------+----------------+-------------------+-----------------+ 944 * |header size(*1)|compression type|compressed size(*2)|uncompressed size| 945 * +---------------+----------------+-------------------+-----------------+ 946 * <--------------------------------(*1)---------------------------------* 947 * 948 * +15 +19 +20 +21 +23 +24 949 * +-----------------+------------+----------------+----------+-----------+ 950 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator | 951 * +-----------------+------------+----------------+----------+-----------+ 952 * *---------------------------------(*1)---------------------------------* 953 * 954 * +24 +26 +26+(*3) +26+(*3)+(*4) 955 * +----------------+-------------------+-------------+-------------------+ 956 * |next header size|extended header(*3)| padding(*4) | compressed data | 957 * +----------------+-------------------+-------------+-------------------+ 958 * *--------------------------(*1)-------------------> <------(*2)-------> 959 * 960 */ 961 #define H2_HEADER_SIZE_OFFSET 0 962 #define H2_COMP_SIZE_OFFSET 7 963 #define H2_ORIG_SIZE_OFFSET 11 964 #define H2_TIME_OFFSET 15 965 #define H2_CRC_OFFSET 21 966 #define H2_FIXED_SIZE 24 967 static int 968 lha_read_file_header_2(struct archive_read *a, struct lha *lha) 969 { 970 const unsigned char *p; 971 size_t extdsize; 972 int err, padding; 973 uint16_t header_crc; 974 975 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL) 976 return (truncated_error(a)); 977 978 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET); 979 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET); 980 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET); 981 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET); 982 lha->crc = archive_le16dec(p + H2_CRC_OFFSET); 983 lha->setflag |= CRC_IS_SET; 984 985 if (lha->header_size < H2_FIXED_SIZE) { 986 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 987 "Invalid LHa header size"); 988 return (ARCHIVE_FATAL); 989 } 990 991 header_crc = lha_crc16(0, p, H2_FIXED_SIZE); 992 __archive_read_consume(a, H2_FIXED_SIZE); 993 994 /* Read extended headers */ 995 err = lha_read_file_extended_header(a, lha, &header_crc, 2, 996 lha->header_size - H2_FIXED_SIZE, &extdsize); 997 if (err < ARCHIVE_WARN) 998 return (err); 999 1000 /* Calculate a padding size. The result will be normally 0 or 1. */ 1001 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize); 1002 if (padding > 0) { 1003 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL) 1004 return (truncated_error(a)); 1005 header_crc = lha_crc16(header_crc, p, padding); 1006 __archive_read_consume(a, padding); 1007 } 1008 1009 if (header_crc != lha->header_crc) { 1010 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1011 "LHa header CRC error"); 1012 return (ARCHIVE_FATAL); 1013 } 1014 return (err); 1015 } 1016 1017 /* 1018 * Header 3 format 1019 * 1020 * +0 +2 +7 +11 +15 1021 * +------------+----------------+-------------------+-----------------+ 1022 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size| 1023 * +------------+----------------+-------------------+-----------------+ 1024 * <-------------------------------(*1)-------------------------------* 1025 * 1026 * +15 +19 +20 +21 +23 +24 1027 * +-----------------+------------+----------------+----------+-----------+ 1028 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator | 1029 * +-----------------+------------+----------------+----------+-----------+ 1030 * *--------------------------------(*1)----------------------------------* 1031 * 1032 * +24 +28 +32 +32+(*3) 1033 * +---------------+----------------+-------------------+-----------------+ 1034 * |header size(*1)|next header size|extended header(*3)| compressed data | 1035 * +---------------+----------------+-------------------+-----------------+ 1036 * *------------------------(*1)-----------------------> <------(*2)-----> 1037 * 1038 */ 1039 #define H3_FIELD_LEN_OFFSET 0 1040 #define H3_COMP_SIZE_OFFSET 7 1041 #define H3_ORIG_SIZE_OFFSET 11 1042 #define H3_TIME_OFFSET 15 1043 #define H3_CRC_OFFSET 21 1044 #define H3_HEADER_SIZE_OFFSET 24 1045 #define H3_FIXED_SIZE 28 1046 static int 1047 lha_read_file_header_3(struct archive_read *a, struct lha *lha) 1048 { 1049 const unsigned char *p; 1050 size_t extdsize; 1051 int err; 1052 uint16_t header_crc; 1053 1054 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL) 1055 return (truncated_error(a)); 1056 1057 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4) 1058 goto invalid; 1059 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET); 1060 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET); 1061 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET); 1062 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET); 1063 lha->crc = archive_le16dec(p + H3_CRC_OFFSET); 1064 lha->setflag |= CRC_IS_SET; 1065 1066 if (lha->header_size < H3_FIXED_SIZE + 4) 1067 goto invalid; 1068 header_crc = lha_crc16(0, p, H3_FIXED_SIZE); 1069 __archive_read_consume(a, H3_FIXED_SIZE); 1070 1071 /* Read extended headers */ 1072 err = lha_read_file_extended_header(a, lha, &header_crc, 4, 1073 lha->header_size - H3_FIXED_SIZE, &extdsize); 1074 if (err < ARCHIVE_WARN) 1075 return (err); 1076 1077 if (header_crc != lha->header_crc) { 1078 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1079 "LHa header CRC error"); 1080 return (ARCHIVE_FATAL); 1081 } 1082 return (err); 1083 invalid: 1084 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1085 "Invalid LHa header"); 1086 return (ARCHIVE_FATAL); 1087 } 1088 1089 /* 1090 * Extended header format 1091 * 1092 * +0 +2 +3 -- used in header 1 and 2 1093 * +0 +4 +5 -- used in header 3 1094 * +--------------+---------+-------------------+--------------+-- 1095 * |ex-header size|header id| data |ex-header size| ....... 1096 * +--------------+---------+-------------------+--------------+-- 1097 * <-------------( ex-header size)------------> <-- next extended header --* 1098 * 1099 * If the ex-header size is zero, it is the make of the end of extended 1100 * headers. 1101 * 1102 */ 1103 static int 1104 lha_read_file_extended_header(struct archive_read *a, struct lha *lha, 1105 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size) 1106 { 1107 const void *h; 1108 const unsigned char *extdheader; 1109 size_t extdsize; 1110 size_t datasize; 1111 unsigned int i; 1112 unsigned char extdtype; 1113 1114 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/ 1115 #define EXT_FILENAME 0x01 /* Filename */ 1116 #define EXT_DIRECTORY 0x02 /* Directory name */ 1117 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */ 1118 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */ 1119 #define EXT_FILESIZE 0x42 /* Large file size */ 1120 #define EXT_TIMEZONE 0x43 /* Time zone */ 1121 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */ 1122 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */ 1123 #define EXT_CODEPAGE 0x46 /* Codepage */ 1124 #define EXT_UNIX_MODE 0x50 /* File permission */ 1125 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */ 1126 #define EXT_UNIX_GNAME 0x52 /* Group name */ 1127 #define EXT_UNIX_UNAME 0x53 /* User name */ 1128 #define EXT_UNIX_MTIME 0x54 /* Modified time */ 1129 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */ 1130 #define EXT_NEW_ATTR 0xff /* new attribute */ 1131 1132 *total_size = sizefield_length; 1133 1134 for (;;) { 1135 /* Read an extended header size. */ 1136 if ((h = 1137 __archive_read_ahead(a, sizefield_length, NULL)) == NULL) 1138 return (truncated_error(a)); 1139 /* Check if the size is the zero indicates the end of the 1140 * extended header. */ 1141 if (sizefield_length == sizeof(uint16_t)) 1142 extdsize = archive_le16dec(h); 1143 else 1144 extdsize = archive_le32dec(h); 1145 if (extdsize == 0) { 1146 /* End of extended header */ 1147 if (crc != NULL) 1148 *crc = lha_crc16(*crc, h, sizefield_length); 1149 __archive_read_consume(a, sizefield_length); 1150 return (ARCHIVE_OK); 1151 } 1152 1153 /* Sanity check to the extended header size. */ 1154 if (((uint64_t)*total_size + extdsize) > 1155 (uint64_t)limitsize || 1156 extdsize <= (size_t)sizefield_length) 1157 goto invalid; 1158 1159 /* Read the extended header. */ 1160 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL) 1161 return (truncated_error(a)); 1162 *total_size += extdsize; 1163 1164 extdheader = (const unsigned char *)h; 1165 /* Get the extended header type. */ 1166 extdtype = extdheader[sizefield_length]; 1167 /* Calculate an extended data size. */ 1168 datasize = extdsize - (1 + sizefield_length); 1169 /* Skip an extended header size field and type field. */ 1170 extdheader += sizefield_length + 1; 1171 1172 if (crc != NULL && extdtype != EXT_HEADER_CRC) 1173 *crc = lha_crc16(*crc, h, extdsize); 1174 switch (extdtype) { 1175 case EXT_HEADER_CRC: 1176 /* We only use a header CRC. Following data will not 1177 * be used. */ 1178 if (datasize >= 2) { 1179 lha->header_crc = archive_le16dec(extdheader); 1180 if (crc != NULL) { 1181 static const char zeros[2] = {0, 0}; 1182 *crc = lha_crc16(*crc, h, 1183 extdsize - datasize); 1184 /* CRC value itself as zero */ 1185 *crc = lha_crc16(*crc, zeros, 2); 1186 *crc = lha_crc16(*crc, 1187 extdheader+2, datasize - 2); 1188 } 1189 } 1190 break; 1191 case EXT_FILENAME: 1192 if (datasize == 0) { 1193 /* maybe directory header */ 1194 archive_string_empty(&lha->filename); 1195 break; 1196 } 1197 if (extdheader[0] == '\0') 1198 goto invalid; 1199 archive_strncpy(&lha->filename, 1200 (const char *)extdheader, datasize); 1201 break; 1202 case EXT_DIRECTORY: 1203 if (datasize == 0 || extdheader[0] == '\0') 1204 /* no directory name data. exit this case. */ 1205 goto invalid; 1206 1207 archive_strncpy(&lha->dirname, 1208 (const char *)extdheader, datasize); 1209 /* 1210 * Convert directory delimiter from 0xFF 1211 * to '/' for local system. 1212 */ 1213 for (i = 0; i < lha->dirname.length; i++) { 1214 if ((unsigned char)lha->dirname.s[i] == 0xFF) 1215 lha->dirname.s[i] = '/'; 1216 } 1217 /* Is last character directory separator? */ 1218 if (lha->dirname.s[lha->dirname.length-1] != '/') 1219 /* invalid directory data */ 1220 goto invalid; 1221 break; 1222 case EXT_DOS_ATTR: 1223 if (datasize == 2) 1224 lha->dos_attr = (unsigned char) 1225 (archive_le16dec(extdheader) & 0xff); 1226 break; 1227 case EXT_TIMESTAMP: 1228 if (datasize == (sizeof(uint64_t) * 3)) { 1229 lha->birthtime = lha_win_time( 1230 archive_le64dec(extdheader), 1231 &lha->birthtime_tv_nsec); 1232 extdheader += sizeof(uint64_t); 1233 lha->mtime = lha_win_time( 1234 archive_le64dec(extdheader), 1235 &lha->mtime_tv_nsec); 1236 extdheader += sizeof(uint64_t); 1237 lha->atime = lha_win_time( 1238 archive_le64dec(extdheader), 1239 &lha->atime_tv_nsec); 1240 lha->setflag |= BIRTHTIME_IS_SET | 1241 ATIME_IS_SET; 1242 } 1243 break; 1244 case EXT_FILESIZE: 1245 if (datasize == sizeof(uint64_t) * 2) { 1246 lha->compsize = archive_le64dec(extdheader); 1247 extdheader += sizeof(uint64_t); 1248 lha->origsize = archive_le64dec(extdheader); 1249 } 1250 break; 1251 case EXT_CODEPAGE: 1252 /* Get an archived filename charset from codepage. 1253 * This overwrites the charset specified by 1254 * hdrcharset option. */ 1255 if (datasize == sizeof(uint32_t)) { 1256 struct archive_string cp; 1257 const char *charset; 1258 1259 archive_string_init(&cp); 1260 switch (archive_le32dec(extdheader)) { 1261 case 65001: /* UTF-8 */ 1262 charset = "UTF-8"; 1263 break; 1264 default: 1265 archive_string_sprintf(&cp, "CP%d", 1266 (int)archive_le32dec(extdheader)); 1267 charset = cp.s; 1268 break; 1269 } 1270 lha->sconv = 1271 archive_string_conversion_from_charset( 1272 &(a->archive), charset, 1); 1273 archive_string_free(&cp); 1274 if (lha->sconv == NULL) 1275 return (ARCHIVE_FATAL); 1276 } 1277 break; 1278 case EXT_UNIX_MODE: 1279 if (datasize == sizeof(uint16_t)) { 1280 lha->mode = archive_le16dec(extdheader); 1281 lha->setflag |= UNIX_MODE_IS_SET; 1282 } 1283 break; 1284 case EXT_UNIX_GID_UID: 1285 if (datasize == (sizeof(uint16_t) * 2)) { 1286 lha->gid = archive_le16dec(extdheader); 1287 lha->uid = archive_le16dec(extdheader+2); 1288 } 1289 break; 1290 case EXT_UNIX_GNAME: 1291 if (datasize > 0) 1292 archive_strncpy(&lha->gname, 1293 (const char *)extdheader, datasize); 1294 break; 1295 case EXT_UNIX_UNAME: 1296 if (datasize > 0) 1297 archive_strncpy(&lha->uname, 1298 (const char *)extdheader, datasize); 1299 break; 1300 case EXT_UNIX_MTIME: 1301 if (datasize == sizeof(uint32_t)) 1302 lha->mtime = archive_le32dec(extdheader); 1303 break; 1304 case EXT_OS2_NEW_ATTR: 1305 /* This extended header is OS/2 depend. */ 1306 if (datasize == 16) { 1307 lha->dos_attr = (unsigned char) 1308 (archive_le16dec(extdheader) & 0xff); 1309 lha->mode = archive_le16dec(extdheader+2); 1310 lha->gid = archive_le16dec(extdheader+4); 1311 lha->uid = archive_le16dec(extdheader+6); 1312 lha->birthtime = archive_le32dec(extdheader+8); 1313 lha->atime = archive_le32dec(extdheader+12); 1314 lha->setflag |= UNIX_MODE_IS_SET 1315 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1316 } 1317 break; 1318 case EXT_NEW_ATTR: 1319 if (datasize == 20) { 1320 lha->mode = (mode_t)archive_le32dec(extdheader); 1321 lha->gid = archive_le32dec(extdheader+4); 1322 lha->uid = archive_le32dec(extdheader+8); 1323 lha->birthtime = archive_le32dec(extdheader+12); 1324 lha->atime = archive_le32dec(extdheader+16); 1325 lha->setflag |= UNIX_MODE_IS_SET 1326 | BIRTHTIME_IS_SET | ATIME_IS_SET; 1327 } 1328 break; 1329 case EXT_TIMEZONE: /* Not supported */ 1330 case EXT_UTF16_FILENAME: /* Not supported */ 1331 case EXT_UTF16_DIRECTORY: /* Not supported */ 1332 default: 1333 break; 1334 } 1335 1336 __archive_read_consume(a, extdsize); 1337 } 1338 invalid: 1339 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1340 "Invalid extended LHa header"); 1341 return (ARCHIVE_FATAL); 1342 } 1343 1344 static int 1345 lha_end_of_entry(struct archive_read *a) 1346 { 1347 struct lha *lha = (struct lha *)(a->format->data); 1348 int r = ARCHIVE_EOF; 1349 1350 if (!lha->end_of_entry_cleanup) { 1351 if ((lha->setflag & CRC_IS_SET) && 1352 lha->crc != lha->entry_crc_calculated) { 1353 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1354 "LHa data CRC error"); 1355 r = ARCHIVE_WARN; 1356 } 1357 1358 /* End-of-entry cleanup done. */ 1359 lha->end_of_entry_cleanup = 1; 1360 } 1361 return (r); 1362 } 1363 1364 static int 1365 archive_read_format_lha_read_data(struct archive_read *a, 1366 const void **buff, size_t *size, int64_t *offset) 1367 { 1368 struct lha *lha = (struct lha *)(a->format->data); 1369 int r; 1370 1371 if (lha->entry_unconsumed) { 1372 /* Consume as much as the decompressor actually used. */ 1373 __archive_read_consume(a, lha->entry_unconsumed); 1374 lha->entry_unconsumed = 0; 1375 } 1376 if (lha->end_of_entry) { 1377 *offset = lha->entry_offset; 1378 *size = 0; 1379 *buff = NULL; 1380 return (lha_end_of_entry(a)); 1381 } 1382 1383 if (lha->entry_is_compressed) 1384 r = lha_read_data_lzh(a, buff, size, offset); 1385 else 1386 /* No compression. */ 1387 r = lha_read_data_none(a, buff, size, offset); 1388 return (r); 1389 } 1390 1391 /* 1392 * Read a file content in no compression. 1393 * 1394 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 1395 * lha->end_of_entry if it consumes all of the data. 1396 */ 1397 static int 1398 lha_read_data_none(struct archive_read *a, const void **buff, 1399 size_t *size, int64_t *offset) 1400 { 1401 struct lha *lha = (struct lha *)(a->format->data); 1402 ssize_t bytes_avail; 1403 1404 if (lha->entry_bytes_remaining == 0) { 1405 *buff = NULL; 1406 *size = 0; 1407 *offset = lha->entry_offset; 1408 lha->end_of_entry = 1; 1409 return (ARCHIVE_OK); 1410 } 1411 /* 1412 * Note: '1' here is a performance optimization. 1413 * Recall that the decompression layer returns a count of 1414 * available bytes; asking for more than that forces the 1415 * decompressor to combine reads by copying data. 1416 */ 1417 *buff = __archive_read_ahead(a, 1, &bytes_avail); 1418 if (bytes_avail <= 0) { 1419 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1420 "Truncated LHa file data"); 1421 return (ARCHIVE_FATAL); 1422 } 1423 if (bytes_avail > lha->entry_bytes_remaining) 1424 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1425 lha->entry_crc_calculated = 1426 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail); 1427 *size = bytes_avail; 1428 *offset = lha->entry_offset; 1429 lha->entry_offset += bytes_avail; 1430 lha->entry_bytes_remaining -= bytes_avail; 1431 if (lha->entry_bytes_remaining == 0) 1432 lha->end_of_entry = 1; 1433 lha->entry_unconsumed = bytes_avail; 1434 return (ARCHIVE_OK); 1435 } 1436 1437 /* 1438 * Read a file content in LZHUFF encoding. 1439 * 1440 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is 1441 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes 1442 * all of the data. 1443 */ 1444 static int 1445 lha_read_data_lzh(struct archive_read *a, const void **buff, 1446 size_t *size, int64_t *offset) 1447 { 1448 struct lha *lha = (struct lha *)(a->format->data); 1449 ssize_t bytes_avail; 1450 int r; 1451 1452 /* If we haven't yet read any data, initialize the decompressor. */ 1453 if (!lha->decompress_init) { 1454 r = lzh_decode_init(&(lha->strm), lha->method); 1455 switch (r) { 1456 case ARCHIVE_OK: 1457 break; 1458 case ARCHIVE_FAILED: 1459 /* Unsupported compression. */ 1460 *buff = NULL; 1461 *size = 0; 1462 *offset = 0; 1463 archive_set_error(&a->archive, 1464 ARCHIVE_ERRNO_FILE_FORMAT, 1465 "Unsupported lzh compression method -%c%c%c-", 1466 lha->method[0], lha->method[1], lha->method[2]); 1467 /* We know compressed size; just skip it. */ 1468 archive_read_format_lha_read_data_skip(a); 1469 return (ARCHIVE_WARN); 1470 default: 1471 archive_set_error(&a->archive, ENOMEM, 1472 "Couldn't allocate memory " 1473 "for lzh decompression"); 1474 return (ARCHIVE_FATAL); 1475 } 1476 /* We've initialized decompression for this stream. */ 1477 lha->decompress_init = 1; 1478 lha->strm.avail_out = 0; 1479 lha->strm.total_out = 0; 1480 } 1481 1482 /* 1483 * Note: '1' here is a performance optimization. 1484 * Recall that the decompression layer returns a count of 1485 * available bytes; asking for more than that forces the 1486 * decompressor to combine reads by copying data. 1487 */ 1488 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail); 1489 if (bytes_avail <= 0) { 1490 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1491 "Truncated LHa file body"); 1492 return (ARCHIVE_FATAL); 1493 } 1494 if (bytes_avail > lha->entry_bytes_remaining) 1495 bytes_avail = (ssize_t)lha->entry_bytes_remaining; 1496 1497 lha->strm.avail_in = (int)bytes_avail; 1498 lha->strm.total_in = 0; 1499 lha->strm.avail_out = 0; 1500 1501 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining); 1502 switch (r) { 1503 case ARCHIVE_OK: 1504 break; 1505 case ARCHIVE_EOF: 1506 lha->end_of_entry = 1; 1507 break; 1508 default: 1509 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1510 "Bad lzh data"); 1511 return (ARCHIVE_FAILED); 1512 } 1513 lha->entry_unconsumed = lha->strm.total_in; 1514 lha->entry_bytes_remaining -= lha->strm.total_in; 1515 1516 if (lha->strm.avail_out) { 1517 *offset = lha->entry_offset; 1518 *size = lha->strm.avail_out; 1519 *buff = lha->strm.ref_ptr; 1520 lha->entry_crc_calculated = 1521 lha_crc16(lha->entry_crc_calculated, *buff, *size); 1522 lha->entry_offset += *size; 1523 } else { 1524 *offset = lha->entry_offset; 1525 *size = 0; 1526 *buff = NULL; 1527 if (lha->end_of_entry) 1528 return (lha_end_of_entry(a)); 1529 } 1530 return (ARCHIVE_OK); 1531 } 1532 1533 /* 1534 * Skip a file content. 1535 */ 1536 static int 1537 archive_read_format_lha_read_data_skip(struct archive_read *a) 1538 { 1539 struct lha *lha; 1540 int64_t bytes_skipped; 1541 1542 lha = (struct lha *)(a->format->data); 1543 1544 if (lha->entry_unconsumed) { 1545 /* Consume as much as the decompressor actually used. */ 1546 __archive_read_consume(a, lha->entry_unconsumed); 1547 lha->entry_unconsumed = 0; 1548 } 1549 1550 /* if we've already read to end of data, we're done. */ 1551 if (lha->end_of_entry_cleanup) 1552 return (ARCHIVE_OK); 1553 1554 /* 1555 * If the length is at the beginning, we can skip the 1556 * compressed data much more quickly. 1557 */ 1558 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining); 1559 if (bytes_skipped < 0) 1560 return (ARCHIVE_FATAL); 1561 1562 /* This entry is finished and done. */ 1563 lha->end_of_entry_cleanup = lha->end_of_entry = 1; 1564 return (ARCHIVE_OK); 1565 } 1566 1567 static int 1568 archive_read_format_lha_cleanup(struct archive_read *a) 1569 { 1570 struct lha *lha = (struct lha *)(a->format->data); 1571 1572 lzh_decode_free(&(lha->strm)); 1573 archive_string_free(&(lha->dirname)); 1574 archive_string_free(&(lha->filename)); 1575 archive_string_free(&(lha->uname)); 1576 archive_string_free(&(lha->gname)); 1577 archive_wstring_free(&(lha->ws)); 1578 free(lha); 1579 (a->format->data) = NULL; 1580 return (ARCHIVE_OK); 1581 } 1582 1583 /* 1584 * 'LHa for UNIX' utility has archived a symbolic-link name after 1585 * a pathname with '|' character. 1586 * This function extracts the symbolic-link name from the pathname. 1587 * 1588 * example. 1589 * 1. a symbolic-name is 'aaa/bb/cc' 1590 * 2. a filename is 'xxx/bbb' 1591 * then a archived pathname is 'xxx/bbb|aaa/bb/cc' 1592 */ 1593 static int 1594 lha_parse_linkname(struct archive_string *linkname, 1595 struct archive_string *pathname) 1596 { 1597 char * linkptr; 1598 size_t symlen; 1599 1600 linkptr = strchr(pathname->s, '|'); 1601 if (linkptr != NULL) { 1602 symlen = strlen(linkptr + 1); 1603 archive_strncpy(linkname, linkptr+1, symlen); 1604 1605 *linkptr = 0; 1606 pathname->length = strlen(pathname->s); 1607 1608 return (1); 1609 } 1610 return (0); 1611 } 1612 1613 /* Convert an MSDOS-style date/time into Unix-style time. */ 1614 static time_t 1615 lha_dos_time(const unsigned char *p) 1616 { 1617 int msTime, msDate; 1618 struct tm ts; 1619 1620 msTime = archive_le16dec(p); 1621 msDate = archive_le16dec(p+2); 1622 1623 memset(&ts, 0, sizeof(ts)); 1624 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 1625 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 1626 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 1627 ts.tm_hour = (msTime >> 11) & 0x1f; 1628 ts.tm_min = (msTime >> 5) & 0x3f; 1629 ts.tm_sec = (msTime << 1) & 0x3e; 1630 ts.tm_isdst = -1; 1631 return (mktime(&ts)); 1632 } 1633 1634 /* Convert an MS-Windows-style date/time into Unix-style time. */ 1635 static time_t 1636 lha_win_time(uint64_t wintime, long *ns) 1637 { 1638 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000) 1639 1640 if (wintime >= EPOC_TIME) { 1641 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */ 1642 if (ns != NULL) 1643 *ns = (long)(wintime % 10000000) * 100; 1644 return (wintime / 10000000); 1645 } else { 1646 if (ns != NULL) 1647 *ns = 0; 1648 return (0); 1649 } 1650 } 1651 1652 static unsigned char 1653 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size) 1654 { 1655 unsigned char const *p = (unsigned char const *)pp; 1656 1657 p += offset; 1658 for (;size > 0; --size) 1659 sum += *p++; 1660 return (sum); 1661 } 1662 1663 static uint16_t crc16tbl[2][256]; 1664 static void 1665 lha_crc16_init(void) 1666 { 1667 unsigned int i; 1668 static int crc16init = 0; 1669 1670 if (crc16init) 1671 return; 1672 crc16init = 1; 1673 1674 for (i = 0; i < 256; i++) { 1675 unsigned int j; 1676 uint16_t crc = (uint16_t)i; 1677 for (j = 8; j; j--) 1678 crc = (crc >> 1) ^ ((crc & 1) * 0xA001); 1679 crc16tbl[0][i] = crc; 1680 } 1681 1682 for (i = 0; i < 256; i++) { 1683 crc16tbl[1][i] = (crc16tbl[0][i] >> 8) 1684 ^ crc16tbl[0][crc16tbl[0][i] & 0xff]; 1685 } 1686 } 1687 1688 static uint16_t 1689 lha_crc16(uint16_t crc, const void *pp, size_t len) 1690 { 1691 const unsigned char *p = (const unsigned char *)pp; 1692 const uint16_t *buff; 1693 const union { 1694 uint32_t i; 1695 char c[4]; 1696 } u = { 0x01020304 }; 1697 1698 if (len == 0) 1699 return crc; 1700 1701 /* Process unaligned address. */ 1702 if (((uintptr_t)p) & (uintptr_t)0x1) { 1703 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1704 len--; 1705 } 1706 buff = (const uint16_t *)p; 1707 /* 1708 * Modern C compiler such as GCC does not unroll automatically yet 1709 * without unrolling pragma, and Clang is so. So we should 1710 * unroll this loop for its performance. 1711 */ 1712 for (;len >= 8; len -= 8) { 1713 /* This if statement expects compiler optimization will 1714 * remove the stament which will not be executed. */ 1715 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */ 1716 # define bswap16(x) _byteswap_ushort(x) 1717 #elif (defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8) \ 1718 || defined(__clang__) 1719 # define bswap16(x) __builtin_bswap16(x) 1720 #else 1721 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8)) 1722 #endif 1723 #define CRC16W do { \ 1724 if(u.c[0] == 1) { /* Big endian */ \ 1725 crc ^= bswap16(*buff); buff++; \ 1726 } else \ 1727 crc ^= *buff++; \ 1728 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\ 1729 } while (0) 1730 CRC16W; 1731 CRC16W; 1732 CRC16W; 1733 CRC16W; 1734 #undef CRC16W 1735 #undef bswap16 1736 } 1737 1738 p = (const unsigned char *)buff; 1739 for (;len; len--) { 1740 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff]; 1741 } 1742 return crc; 1743 } 1744 1745 /* 1746 * Initialize LZHUF decoder. 1747 * 1748 * Returns ARCHIVE_OK if initialization was successful. 1749 * Returns ARCHIVE_FAILED if method is unsupported. 1750 * Returns ARCHIVE_FATAL if initialization failed; memory allocation 1751 * error occurred. 1752 */ 1753 static int 1754 lzh_decode_init(struct lzh_stream *strm, const char *method) 1755 { 1756 struct lzh_dec *ds; 1757 int w_bits, w_size; 1758 1759 if (strm->ds == NULL) { 1760 strm->ds = calloc(1, sizeof(*strm->ds)); 1761 if (strm->ds == NULL) 1762 return (ARCHIVE_FATAL); 1763 } 1764 ds = strm->ds; 1765 ds->error = ARCHIVE_FAILED; 1766 if (method == NULL || method[0] != 'l' || method[1] != 'h') 1767 return (ARCHIVE_FAILED); 1768 switch (method[2]) { 1769 case '5': 1770 w_bits = 13;/* 8KiB for window */ 1771 break; 1772 case '6': 1773 w_bits = 15;/* 32KiB for window */ 1774 break; 1775 case '7': 1776 w_bits = 16;/* 64KiB for window */ 1777 break; 1778 default: 1779 return (ARCHIVE_FAILED);/* Not supported. */ 1780 } 1781 ds->error = ARCHIVE_FATAL; 1782 /* Expand a window size up to 128 KiB for decompressing process 1783 * performance whatever its original window size is. */ 1784 ds->w_size = 1U << 17; 1785 ds->w_mask = ds->w_size -1; 1786 if (ds->w_buff == NULL) { 1787 ds->w_buff = malloc(ds->w_size); 1788 if (ds->w_buff == NULL) 1789 return (ARCHIVE_FATAL); 1790 } 1791 w_size = 1U << w_bits; 1792 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size); 1793 ds->w_pos = 0; 1794 ds->state = 0; 1795 ds->pos_pt_len_size = w_bits + 1; 1796 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4; 1797 ds->literal_pt_len_size = PT_BITLEN_SIZE; 1798 ds->literal_pt_len_bits = 5; 1799 ds->br.cache_buffer = 0; 1800 ds->br.cache_avail = 0; 1801 1802 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16) 1803 != ARCHIVE_OK) 1804 return (ARCHIVE_FATAL); 1805 ds->lt.len_bits = 9; 1806 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16) 1807 != ARCHIVE_OK) 1808 return (ARCHIVE_FATAL); 1809 ds->error = 0; 1810 1811 return (ARCHIVE_OK); 1812 } 1813 1814 /* 1815 * Release LZHUF decoder. 1816 */ 1817 static void 1818 lzh_decode_free(struct lzh_stream *strm) 1819 { 1820 1821 if (strm->ds == NULL) 1822 return; 1823 free(strm->ds->w_buff); 1824 lzh_huffman_free(&(strm->ds->lt)); 1825 lzh_huffman_free(&(strm->ds->pt)); 1826 free(strm->ds); 1827 strm->ds = NULL; 1828 } 1829 1830 /* 1831 * Bit stream reader. 1832 */ 1833 /* Check that the cache buffer has enough bits. */ 1834 #define lzh_br_has(br, n) ((br)->cache_avail >= n) 1835 /* Get compressed data by bit. */ 1836 #define lzh_br_bits(br, n) \ 1837 (((uint16_t)((br)->cache_buffer >> \ 1838 ((br)->cache_avail - (n)))) & cache_masks[n]) 1839 #define lzh_br_bits_forced(br, n) \ 1840 (((uint16_t)((br)->cache_buffer << \ 1841 ((n) - (br)->cache_avail))) & cache_masks[n]) 1842 /* Read ahead to make sure the cache buffer has enough compressed data we 1843 * will use. 1844 * True : completed, there is enough data in the cache buffer. 1845 * False : we met that strm->next_in is empty, we have to get following 1846 * bytes. */ 1847 #define lzh_br_read_ahead_0(strm, br, n) \ 1848 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br)) 1849 /* True : the cache buffer has some bits as much as we need. 1850 * False : there are no enough bits in the cache buffer to be used, 1851 * we have to get following bytes if we could. */ 1852 #define lzh_br_read_ahead(strm, br, n) \ 1853 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n))) 1854 1855 /* Notify how many bits we consumed. */ 1856 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n)) 1857 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n)) 1858 1859 static const uint16_t cache_masks[] = { 1860 0x0000, 0x0001, 0x0003, 0x0007, 1861 0x000F, 0x001F, 0x003F, 0x007F, 1862 0x00FF, 0x01FF, 0x03FF, 0x07FF, 1863 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 1864 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF 1865 }; 1866 1867 /* 1868 * Shift away used bits in the cache data and fill it up with following bits. 1869 * Call this when cache buffer does not have enough bits you need. 1870 * 1871 * Returns 1 if the cache buffer is full. 1872 * Returns 0 if the cache buffer is not full; input buffer is empty. 1873 */ 1874 static int 1875 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br) 1876 { 1877 int n = CACHE_BITS - br->cache_avail; 1878 1879 for (;;) { 1880 const int x = n >> 3; 1881 if (strm->avail_in >= x) { 1882 switch (x) { 1883 case 8: 1884 br->cache_buffer = 1885 ((uint64_t)strm->next_in[0]) << 56 | 1886 ((uint64_t)strm->next_in[1]) << 48 | 1887 ((uint64_t)strm->next_in[2]) << 40 | 1888 ((uint64_t)strm->next_in[3]) << 32 | 1889 ((uint32_t)strm->next_in[4]) << 24 | 1890 ((uint32_t)strm->next_in[5]) << 16 | 1891 ((uint32_t)strm->next_in[6]) << 8 | 1892 (uint32_t)strm->next_in[7]; 1893 strm->next_in += 8; 1894 strm->avail_in -= 8; 1895 br->cache_avail += 8 * 8; 1896 return (1); 1897 case 7: 1898 br->cache_buffer = 1899 (br->cache_buffer << 56) | 1900 ((uint64_t)strm->next_in[0]) << 48 | 1901 ((uint64_t)strm->next_in[1]) << 40 | 1902 ((uint64_t)strm->next_in[2]) << 32 | 1903 ((uint32_t)strm->next_in[3]) << 24 | 1904 ((uint32_t)strm->next_in[4]) << 16 | 1905 ((uint32_t)strm->next_in[5]) << 8 | 1906 (uint32_t)strm->next_in[6]; 1907 strm->next_in += 7; 1908 strm->avail_in -= 7; 1909 br->cache_avail += 7 * 8; 1910 return (1); 1911 case 6: 1912 br->cache_buffer = 1913 (br->cache_buffer << 48) | 1914 ((uint64_t)strm->next_in[0]) << 40 | 1915 ((uint64_t)strm->next_in[1]) << 32 | 1916 ((uint32_t)strm->next_in[2]) << 24 | 1917 ((uint32_t)strm->next_in[3]) << 16 | 1918 ((uint32_t)strm->next_in[4]) << 8 | 1919 (uint32_t)strm->next_in[5]; 1920 strm->next_in += 6; 1921 strm->avail_in -= 6; 1922 br->cache_avail += 6 * 8; 1923 return (1); 1924 case 0: 1925 /* We have enough compressed data in 1926 * the cache buffer.*/ 1927 return (1); 1928 default: 1929 break; 1930 } 1931 } 1932 if (strm->avail_in == 0) { 1933 /* There is not enough compressed data to fill up the 1934 * cache buffer. */ 1935 return (0); 1936 } 1937 br->cache_buffer = 1938 (br->cache_buffer << 8) | *strm->next_in++; 1939 strm->avail_in--; 1940 br->cache_avail += 8; 1941 n -= 8; 1942 } 1943 } 1944 1945 /* 1946 * Decode LZHUF. 1947 * 1948 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty. 1949 * Please set available buffer and call this function again. 1950 * 2. Returns ARCHIVE_EOF if decompression has been completed. 1951 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data 1952 * is broken or you do not set 'last' flag properly. 1953 * 4. 'last' flag is very important, you must set 1 to the flag if there 1954 * is no input data. The lha compressed data format does not provide how 1955 * to know the compressed data is really finished. 1956 * Note: lha command utility check if the total size of output bytes is 1957 * reached the uncompressed size recorded in its header. it does not mind 1958 * that the decoding process is properly finished. 1959 * GNU ZIP can decompress another compressed file made by SCO LZH compress. 1960 * it handles EOF as null to fill read buffer with zero until the decoding 1961 * process meet 2 bytes of zeros at reading a size of a next chunk, so the 1962 * zeros are treated as the mark of the end of the data although the zeros 1963 * is dummy, not the file data. 1964 */ 1965 static int lzh_read_blocks(struct lzh_stream *, int); 1966 static int lzh_decode_blocks(struct lzh_stream *, int); 1967 #define ST_RD_BLOCK 0 1968 #define ST_RD_PT_1 1 1969 #define ST_RD_PT_2 2 1970 #define ST_RD_PT_3 3 1971 #define ST_RD_PT_4 4 1972 #define ST_RD_LITERAL_1 5 1973 #define ST_RD_LITERAL_2 6 1974 #define ST_RD_LITERAL_3 7 1975 #define ST_RD_POS_DATA_1 8 1976 #define ST_GET_LITERAL 9 1977 #define ST_GET_POS_1 10 1978 #define ST_GET_POS_2 11 1979 #define ST_COPY_DATA 12 1980 1981 static int 1982 lzh_decode(struct lzh_stream *strm, int last) 1983 { 1984 struct lzh_dec *ds = strm->ds; 1985 int avail_in; 1986 int r; 1987 1988 if (ds->error) 1989 return (ds->error); 1990 1991 avail_in = strm->avail_in; 1992 do { 1993 if (ds->state < ST_GET_LITERAL) 1994 r = lzh_read_blocks(strm, last); 1995 else 1996 r = lzh_decode_blocks(strm, last); 1997 } while (r == 100); 1998 strm->total_in += avail_in - strm->avail_in; 1999 return (r); 2000 } 2001 2002 static void 2003 lzh_emit_window(struct lzh_stream *strm, size_t s) 2004 { 2005 strm->ref_ptr = strm->ds->w_buff; 2006 strm->avail_out = (int)s; 2007 strm->total_out += s; 2008 } 2009 2010 static int 2011 lzh_read_blocks(struct lzh_stream *strm, int last) 2012 { 2013 struct lzh_dec *ds = strm->ds; 2014 struct lzh_br *br = &(ds->br); 2015 int c = 0, i; 2016 unsigned rbits; 2017 2018 for (;;) { 2019 switch (ds->state) { 2020 case ST_RD_BLOCK: 2021 /* 2022 * Read a block number indicates how many blocks 2023 * we will handle. The block is composed of a 2024 * literal and a match, sometimes a literal only 2025 * in particular, there are no reference data at 2026 * the beginning of the decompression. 2027 */ 2028 if (!lzh_br_read_ahead_0(strm, br, 16)) { 2029 if (!last) 2030 /* We need following data. */ 2031 return (ARCHIVE_OK); 2032 if (lzh_br_has(br, 8)) { 2033 /* 2034 * It seems there are extra bits. 2035 * 1. Compressed data is broken. 2036 * 2. `last' flag does not properly 2037 * set. 2038 */ 2039 goto failed; 2040 } 2041 if (ds->w_pos > 0) { 2042 lzh_emit_window(strm, ds->w_pos); 2043 ds->w_pos = 0; 2044 return (ARCHIVE_OK); 2045 } 2046 /* End of compressed data; we have completely 2047 * handled all compressed data. */ 2048 return (ARCHIVE_EOF); 2049 } 2050 ds->blocks_avail = lzh_br_bits(br, 16); 2051 if (ds->blocks_avail == 0) 2052 goto failed; 2053 lzh_br_consume(br, 16); 2054 /* 2055 * Read a literal table compressed in huffman 2056 * coding. 2057 */ 2058 ds->pt.len_size = ds->literal_pt_len_size; 2059 ds->pt.len_bits = ds->literal_pt_len_bits; 2060 ds->reading_position = 0; 2061 /* FALL THROUGH */ 2062 case ST_RD_PT_1: 2063 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are 2064 * used in reading both a literal table and a 2065 * position table. */ 2066 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) { 2067 if (last) 2068 goto failed;/* Truncated data. */ 2069 ds->state = ST_RD_PT_1; 2070 return (ARCHIVE_OK); 2071 } 2072 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits); 2073 lzh_br_consume(br, ds->pt.len_bits); 2074 /* FALL THROUGH */ 2075 case ST_RD_PT_2: 2076 if (ds->pt.len_avail == 0) { 2077 /* There is no bitlen. */ 2078 if (!lzh_br_read_ahead(strm, br, 2079 ds->pt.len_bits)) { 2080 if (last) 2081 goto failed;/* Truncated data.*/ 2082 ds->state = ST_RD_PT_2; 2083 return (ARCHIVE_OK); 2084 } 2085 if (!lzh_make_fake_table(&(ds->pt), 2086 lzh_br_bits(br, ds->pt.len_bits))) 2087 goto failed;/* Invalid data. */ 2088 lzh_br_consume(br, ds->pt.len_bits); 2089 if (ds->reading_position) 2090 ds->state = ST_GET_LITERAL; 2091 else 2092 ds->state = ST_RD_LITERAL_1; 2093 break; 2094 } else if (ds->pt.len_avail > ds->pt.len_size) 2095 goto failed;/* Invalid data. */ 2096 ds->loop = 0; 2097 memset(ds->pt.freq, 0, sizeof(ds->pt.freq)); 2098 if (ds->pt.len_avail < 3 || 2099 ds->pt.len_size == ds->pos_pt_len_size) { 2100 ds->state = ST_RD_PT_4; 2101 break; 2102 } 2103 /* FALL THROUGH */ 2104 case ST_RD_PT_3: 2105 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3); 2106 if (ds->loop < 3) { 2107 if (ds->loop < 0 || last) 2108 goto failed;/* Invalid data. */ 2109 /* Not completed, get following data. */ 2110 ds->state = ST_RD_PT_3; 2111 return (ARCHIVE_OK); 2112 } 2113 /* There are some null in bitlen of the literal. */ 2114 if (!lzh_br_read_ahead(strm, br, 2)) { 2115 if (last) 2116 goto failed;/* Truncated data. */ 2117 ds->state = ST_RD_PT_3; 2118 return (ARCHIVE_OK); 2119 } 2120 c = lzh_br_bits(br, 2); 2121 lzh_br_consume(br, 2); 2122 if (c > ds->pt.len_avail - 3) 2123 goto failed;/* Invalid data. */ 2124 for (i = 3; c-- > 0 ;) 2125 ds->pt.bitlen[i++] = 0; 2126 ds->loop = i; 2127 /* FALL THROUGH */ 2128 case ST_RD_PT_4: 2129 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 2130 ds->pt.len_avail); 2131 if (ds->loop < ds->pt.len_avail) { 2132 if (ds->loop < 0 || last) 2133 goto failed;/* Invalid data. */ 2134 /* Not completed, get following data. */ 2135 ds->state = ST_RD_PT_4; 2136 return (ARCHIVE_OK); 2137 } 2138 if (!lzh_make_huffman_table(&(ds->pt))) 2139 goto failed;/* Invalid data */ 2140 if (ds->reading_position) { 2141 ds->state = ST_GET_LITERAL; 2142 break; 2143 } 2144 /* FALL THROUGH */ 2145 case ST_RD_LITERAL_1: 2146 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) { 2147 if (last) 2148 goto failed;/* Truncated data. */ 2149 ds->state = ST_RD_LITERAL_1; 2150 return (ARCHIVE_OK); 2151 } 2152 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits); 2153 lzh_br_consume(br, ds->lt.len_bits); 2154 /* FALL THROUGH */ 2155 case ST_RD_LITERAL_2: 2156 if (ds->lt.len_avail == 0) { 2157 /* There is no bitlen. */ 2158 if (!lzh_br_read_ahead(strm, br, 2159 ds->lt.len_bits)) { 2160 if (last) 2161 goto failed;/* Truncated data.*/ 2162 ds->state = ST_RD_LITERAL_2; 2163 return (ARCHIVE_OK); 2164 } 2165 if (!lzh_make_fake_table(&(ds->lt), 2166 lzh_br_bits(br, ds->lt.len_bits))) 2167 goto failed;/* Invalid data */ 2168 lzh_br_consume(br, ds->lt.len_bits); 2169 ds->state = ST_RD_POS_DATA_1; 2170 break; 2171 } else if (ds->lt.len_avail > ds->lt.len_size) 2172 goto failed;/* Invalid data */ 2173 ds->loop = 0; 2174 memset(ds->lt.freq, 0, sizeof(ds->lt.freq)); 2175 /* FALL THROUGH */ 2176 case ST_RD_LITERAL_3: 2177 i = ds->loop; 2178 while (i < ds->lt.len_avail) { 2179 if (!lzh_br_read_ahead(strm, br, 2180 ds->pt.max_bits)) { 2181 if (last) 2182 goto failed;/* Truncated data.*/ 2183 ds->loop = i; 2184 ds->state = ST_RD_LITERAL_3; 2185 return (ARCHIVE_OK); 2186 } 2187 rbits = lzh_br_bits(br, ds->pt.max_bits); 2188 c = lzh_decode_huffman(&(ds->pt), rbits); 2189 if (c > 2) { 2190 /* Note: 'c' will never be more than 2191 * eighteen since it's limited by 2192 * PT_BITLEN_SIZE, which is being set 2193 * to ds->pt.len_size through 2194 * ds->literal_pt_len_size. */ 2195 lzh_br_consume(br, ds->pt.bitlen[c]); 2196 c -= 2; 2197 ds->lt.freq[c]++; 2198 ds->lt.bitlen[i++] = c; 2199 } else if (c == 0) { 2200 lzh_br_consume(br, ds->pt.bitlen[c]); 2201 ds->lt.bitlen[i++] = 0; 2202 } else { 2203 /* c == 1 or c == 2 */ 2204 int n = (c == 1)?4:9; 2205 if (!lzh_br_read_ahead(strm, br, 2206 ds->pt.bitlen[c] + n)) { 2207 if (last) /* Truncated data. */ 2208 goto failed; 2209 ds->loop = i; 2210 ds->state = ST_RD_LITERAL_3; 2211 return (ARCHIVE_OK); 2212 } 2213 lzh_br_consume(br, ds->pt.bitlen[c]); 2214 c = lzh_br_bits(br, n); 2215 lzh_br_consume(br, n); 2216 c += (n == 4)?3:20; 2217 if (i + c > ds->lt.len_avail) 2218 goto failed;/* Invalid data */ 2219 memset(&(ds->lt.bitlen[i]), 0, c); 2220 i += c; 2221 } 2222 } 2223 if (i > ds->lt.len_avail || 2224 !lzh_make_huffman_table(&(ds->lt))) 2225 goto failed;/* Invalid data */ 2226 /* FALL THROUGH */ 2227 case ST_RD_POS_DATA_1: 2228 /* 2229 * Read a position table compressed in huffman 2230 * coding. 2231 */ 2232 ds->pt.len_size = ds->pos_pt_len_size; 2233 ds->pt.len_bits = ds->pos_pt_len_bits; 2234 ds->reading_position = 1; 2235 ds->state = ST_RD_PT_1; 2236 break; 2237 case ST_GET_LITERAL: 2238 return (100); 2239 } 2240 } 2241 failed: 2242 return (ds->error = ARCHIVE_FAILED); 2243 } 2244 2245 static int 2246 lzh_decode_blocks(struct lzh_stream *strm, int last) 2247 { 2248 struct lzh_dec *ds = strm->ds; 2249 struct lzh_br bre = ds->br; 2250 struct huffman *lt = &(ds->lt); 2251 struct huffman *pt = &(ds->pt); 2252 unsigned char *w_buff = ds->w_buff; 2253 unsigned char *lt_bitlen = lt->bitlen; 2254 unsigned char *pt_bitlen = pt->bitlen; 2255 int blocks_avail = ds->blocks_avail, c = 0; 2256 int copy_len = ds->copy_len, copy_pos = ds->copy_pos; 2257 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size; 2258 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits; 2259 int state = ds->state; 2260 2261 for (;;) { 2262 switch (state) { 2263 case ST_GET_LITERAL: 2264 for (;;) { 2265 if (blocks_avail == 0) { 2266 /* We have decoded all blocks. 2267 * Let's handle next blocks. */ 2268 ds->state = ST_RD_BLOCK; 2269 ds->br = bre; 2270 ds->blocks_avail = 0; 2271 ds->w_pos = w_pos; 2272 ds->copy_pos = 0; 2273 return (100); 2274 } 2275 2276 /* lzh_br_read_ahead() always try to fill the 2277 * cache buffer up. In specific situation we 2278 * are close to the end of the data, the cache 2279 * buffer will not be full and thus we have to 2280 * determine if the cache buffer has some bits 2281 * as much as we need after lzh_br_read_ahead() 2282 * failed. */ 2283 if (!lzh_br_read_ahead(strm, &bre, 2284 lt_max_bits)) { 2285 if (!last) 2286 goto next_data; 2287 /* Remaining bits are less than 2288 * maximum bits(lt.max_bits) but maybe 2289 * it still remains as much as we need, 2290 * so we should try to use it with 2291 * dummy bits. */ 2292 c = lzh_decode_huffman(lt, 2293 lzh_br_bits_forced(&bre, 2294 lt_max_bits)); 2295 lzh_br_consume(&bre, lt_bitlen[c]); 2296 if (!lzh_br_has(&bre, 0)) 2297 goto failed;/* Over read. */ 2298 } else { 2299 c = lzh_decode_huffman(lt, 2300 lzh_br_bits(&bre, lt_max_bits)); 2301 lzh_br_consume(&bre, lt_bitlen[c]); 2302 } 2303 blocks_avail--; 2304 if (c > UCHAR_MAX) 2305 /* Current block is a match data. */ 2306 break; 2307 /* 2308 * 'c' is exactly a literal code. 2309 */ 2310 /* Save a decoded code to reference it 2311 * afterward. */ 2312 w_buff[w_pos] = c; 2313 if (++w_pos >= w_size) { 2314 w_pos = 0; 2315 lzh_emit_window(strm, w_size); 2316 goto next_data; 2317 } 2318 } 2319 /* 'c' is the length of a match pattern we have 2320 * already extracted, which has be stored in 2321 * window(ds->w_buff). */ 2322 copy_len = c - (UCHAR_MAX + 1) + MINMATCH; 2323 /* FALL THROUGH */ 2324 case ST_GET_POS_1: 2325 /* 2326 * Get a reference position. 2327 */ 2328 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) { 2329 if (!last) { 2330 state = ST_GET_POS_1; 2331 ds->copy_len = copy_len; 2332 goto next_data; 2333 } 2334 copy_pos = lzh_decode_huffman(pt, 2335 lzh_br_bits_forced(&bre, pt_max_bits)); 2336 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2337 if (!lzh_br_has(&bre, 0)) 2338 goto failed;/* Over read. */ 2339 } else { 2340 copy_pos = lzh_decode_huffman(pt, 2341 lzh_br_bits(&bre, pt_max_bits)); 2342 lzh_br_consume(&bre, pt_bitlen[copy_pos]); 2343 } 2344 /* FALL THROUGH */ 2345 case ST_GET_POS_2: 2346 if (copy_pos > 1) { 2347 /* We need an additional adjustment number to 2348 * the position. */ 2349 int p = copy_pos - 1; 2350 if (!lzh_br_read_ahead(strm, &bre, p)) { 2351 if (last) 2352 goto failed;/* Truncated data.*/ 2353 state = ST_GET_POS_2; 2354 ds->copy_len = copy_len; 2355 ds->copy_pos = copy_pos; 2356 goto next_data; 2357 } 2358 copy_pos = (1 << p) + lzh_br_bits(&bre, p); 2359 lzh_br_consume(&bre, p); 2360 } 2361 /* The position is actually a distance from the last 2362 * code we had extracted and thus we have to convert 2363 * it to a position of the window. */ 2364 copy_pos = (w_pos - copy_pos - 1) & w_mask; 2365 /* FALL THROUGH */ 2366 case ST_COPY_DATA: 2367 /* 2368 * Copy `copy_len' bytes as extracted data from 2369 * the window into the output buffer. 2370 */ 2371 for (;;) { 2372 int l; 2373 2374 l = copy_len; 2375 if (copy_pos > w_pos) { 2376 if (l > w_size - copy_pos) 2377 l = w_size - copy_pos; 2378 } else { 2379 if (l > w_size - w_pos) 2380 l = w_size - w_pos; 2381 } 2382 if ((copy_pos + l < w_pos) 2383 || (w_pos + l < copy_pos)) { 2384 /* No overlap. */ 2385 memcpy(w_buff + w_pos, 2386 w_buff + copy_pos, l); 2387 } else { 2388 const unsigned char *s; 2389 unsigned char *d; 2390 int li; 2391 2392 d = w_buff + w_pos; 2393 s = w_buff + copy_pos; 2394 for (li = 0; li < l-1;) { 2395 d[li] = s[li];li++; 2396 d[li] = s[li];li++; 2397 } 2398 if (li < l) 2399 d[li] = s[li]; 2400 } 2401 w_pos += l; 2402 if (w_pos == w_size) { 2403 w_pos = 0; 2404 lzh_emit_window(strm, w_size); 2405 if (copy_len <= l) 2406 state = ST_GET_LITERAL; 2407 else { 2408 state = ST_COPY_DATA; 2409 ds->copy_len = copy_len - l; 2410 ds->copy_pos = 2411 (copy_pos + l) & w_mask; 2412 } 2413 goto next_data; 2414 } 2415 if (copy_len <= l) 2416 /* A copy of current pattern ended. */ 2417 break; 2418 copy_len -= l; 2419 copy_pos = (copy_pos + l) & w_mask; 2420 } 2421 state = ST_GET_LITERAL; 2422 break; 2423 } 2424 } 2425 failed: 2426 return (ds->error = ARCHIVE_FAILED); 2427 next_data: 2428 ds->br = bre; 2429 ds->blocks_avail = blocks_avail; 2430 ds->state = state; 2431 ds->w_pos = w_pos; 2432 return (ARCHIVE_OK); 2433 } 2434 2435 static int 2436 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits) 2437 { 2438 int bits; 2439 2440 if (hf->bitlen == NULL) { 2441 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0])); 2442 if (hf->bitlen == NULL) 2443 return (ARCHIVE_FATAL); 2444 } 2445 if (hf->tbl == NULL) { 2446 if (tbl_bits < HTBL_BITS) 2447 bits = tbl_bits; 2448 else 2449 bits = HTBL_BITS; 2450 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0])); 2451 if (hf->tbl == NULL) 2452 return (ARCHIVE_FATAL); 2453 } 2454 if (hf->tree == NULL && tbl_bits > HTBL_BITS) { 2455 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4); 2456 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0])); 2457 if (hf->tree == NULL) 2458 return (ARCHIVE_FATAL); 2459 } 2460 hf->len_size = (int)len_size; 2461 hf->tbl_bits = tbl_bits; 2462 return (ARCHIVE_OK); 2463 } 2464 2465 static void 2466 lzh_huffman_free(struct huffman *hf) 2467 { 2468 free(hf->bitlen); 2469 free(hf->tbl); 2470 free(hf->tree); 2471 } 2472 2473 static char bitlen_tbl[0x400] = { 2474 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2475 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2476 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2477 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2478 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2479 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2480 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2481 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2482 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2483 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2484 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2485 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2486 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2487 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2488 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2489 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2490 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2491 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2492 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2493 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2494 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2495 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2496 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2497 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2498 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2499 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2500 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2501 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2502 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2503 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2504 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2505 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 2506 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2507 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2508 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2509 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2510 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2511 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2512 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2513 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2514 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2515 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2516 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2517 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2518 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2519 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2520 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2521 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 2522 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2523 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2524 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2525 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2526 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2527 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2528 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2529 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2530 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2531 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2532 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2533 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 2534 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2535 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 2536 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 2537 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0 2538 }; 2539 static int 2540 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end) 2541 { 2542 struct lzh_dec *ds = strm->ds; 2543 struct lzh_br *br = &(ds->br); 2544 int c, i; 2545 2546 for (i = start; i < end; ) { 2547 /* 2548 * bit pattern the number we need 2549 * 000 -> 0 2550 * 001 -> 1 2551 * 010 -> 2 2552 * ... 2553 * 110 -> 6 2554 * 1110 -> 7 2555 * 11110 -> 8 2556 * ... 2557 * 1111111111110 -> 16 2558 */ 2559 if (!lzh_br_read_ahead(strm, br, 3)) 2560 return (i); 2561 if ((c = lzh_br_bits(br, 3)) == 7) { 2562 if (!lzh_br_read_ahead(strm, br, 13)) 2563 return (i); 2564 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF]; 2565 if (c) 2566 lzh_br_consume(br, c - 3); 2567 else 2568 return (-1);/* Invalid data. */ 2569 } else 2570 lzh_br_consume(br, 3); 2571 ds->pt.bitlen[i++] = c; 2572 ds->pt.freq[c]++; 2573 } 2574 return (i); 2575 } 2576 2577 static int 2578 lzh_make_fake_table(struct huffman *hf, uint16_t c) 2579 { 2580 if (c >= hf->len_size) 2581 return (0); 2582 hf->tbl[0] = c; 2583 hf->max_bits = 0; 2584 hf->shift_bits = 0; 2585 hf->bitlen[hf->tbl[0]] = 0; 2586 return (1); 2587 } 2588 2589 /* 2590 * Make a huffman coding table. 2591 */ 2592 static int 2593 lzh_make_huffman_table(struct huffman *hf) 2594 { 2595 uint16_t *tbl; 2596 const unsigned char *bitlen; 2597 int bitptn[17], weight[17]; 2598 int i, maxbits = 0, ptn, tbl_size, w; 2599 int diffbits, len_avail; 2600 2601 /* 2602 * Initialize bit patterns. 2603 */ 2604 ptn = 0; 2605 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) { 2606 bitptn[i] = ptn; 2607 weight[i] = w; 2608 if (hf->freq[i]) { 2609 ptn += hf->freq[i] * w; 2610 maxbits = i; 2611 } 2612 } 2613 if (ptn != 0x10000 || maxbits > hf->tbl_bits) 2614 return (0);/* Invalid */ 2615 2616 hf->max_bits = maxbits; 2617 2618 /* 2619 * Cut out extra bits which we won't house in the table. 2620 * This preparation reduces the same calculation in the for-loop 2621 * making the table. 2622 */ 2623 if (maxbits < 16) { 2624 int ebits = 16 - maxbits; 2625 for (i = 1; i <= maxbits; i++) { 2626 bitptn[i] >>= ebits; 2627 weight[i] >>= ebits; 2628 } 2629 } 2630 if (maxbits > HTBL_BITS) { 2631 unsigned htbl_max; 2632 uint16_t *p; 2633 2634 diffbits = maxbits - HTBL_BITS; 2635 for (i = 1; i <= HTBL_BITS; i++) { 2636 bitptn[i] >>= diffbits; 2637 weight[i] >>= diffbits; 2638 } 2639 htbl_max = bitptn[HTBL_BITS] + 2640 weight[HTBL_BITS] * hf->freq[HTBL_BITS]; 2641 p = &(hf->tbl[htbl_max]); 2642 while (p < &hf->tbl[1U<<HTBL_BITS]) 2643 *p++ = 0; 2644 } else 2645 diffbits = 0; 2646 hf->shift_bits = diffbits; 2647 2648 /* 2649 * Make the table. 2650 */ 2651 tbl_size = 1 << HTBL_BITS; 2652 tbl = hf->tbl; 2653 bitlen = hf->bitlen; 2654 len_avail = hf->len_avail; 2655 hf->tree_used = 0; 2656 for (i = 0; i < len_avail; i++) { 2657 uint16_t *p; 2658 int len, cnt; 2659 uint16_t bit; 2660 int extlen; 2661 struct htree_t *ht; 2662 2663 if (bitlen[i] == 0) 2664 continue; 2665 /* Get a bit pattern */ 2666 len = bitlen[i]; 2667 ptn = bitptn[len]; 2668 cnt = weight[len]; 2669 if (len <= HTBL_BITS) { 2670 /* Calculate next bit pattern */ 2671 if ((bitptn[len] = ptn + cnt) > tbl_size) 2672 return (0);/* Invalid */ 2673 /* Update the table */ 2674 p = &(tbl[ptn]); 2675 if (cnt > 7) { 2676 uint16_t *pc; 2677 2678 cnt -= 8; 2679 pc = &p[cnt]; 2680 pc[0] = (uint16_t)i; 2681 pc[1] = (uint16_t)i; 2682 pc[2] = (uint16_t)i; 2683 pc[3] = (uint16_t)i; 2684 pc[4] = (uint16_t)i; 2685 pc[5] = (uint16_t)i; 2686 pc[6] = (uint16_t)i; 2687 pc[7] = (uint16_t)i; 2688 if (cnt > 7) { 2689 cnt -= 8; 2690 memcpy(&p[cnt], pc, 2691 8 * sizeof(uint16_t)); 2692 pc = &p[cnt]; 2693 while (cnt > 15) { 2694 cnt -= 16; 2695 memcpy(&p[cnt], pc, 2696 16 * sizeof(uint16_t)); 2697 } 2698 } 2699 if (cnt) 2700 memcpy(p, pc, cnt * sizeof(uint16_t)); 2701 } else { 2702 while (cnt > 1) { 2703 p[--cnt] = (uint16_t)i; 2704 p[--cnt] = (uint16_t)i; 2705 } 2706 if (cnt) 2707 p[--cnt] = (uint16_t)i; 2708 } 2709 continue; 2710 } 2711 2712 /* 2713 * A bit length is too big to be housed to a direct table, 2714 * so we use a tree model for its extra bits. 2715 */ 2716 bitptn[len] = ptn + cnt; 2717 bit = 1U << (diffbits -1); 2718 extlen = len - HTBL_BITS; 2719 2720 p = &(tbl[ptn >> diffbits]); 2721 if (*p == 0) { 2722 *p = len_avail + hf->tree_used; 2723 ht = &(hf->tree[hf->tree_used++]); 2724 if (hf->tree_used > hf->tree_avail) 2725 return (0);/* Invalid */ 2726 ht->left = 0; 2727 ht->right = 0; 2728 } else { 2729 if (*p < len_avail || 2730 *p >= (len_avail + hf->tree_used)) 2731 return (0);/* Invalid */ 2732 ht = &(hf->tree[*p - len_avail]); 2733 } 2734 while (--extlen > 0) { 2735 if (ptn & bit) { 2736 if (ht->left < len_avail) { 2737 ht->left = len_avail + hf->tree_used; 2738 ht = &(hf->tree[hf->tree_used++]); 2739 if (hf->tree_used > hf->tree_avail) 2740 return (0);/* Invalid */ 2741 ht->left = 0; 2742 ht->right = 0; 2743 } else { 2744 ht = &(hf->tree[ht->left - len_avail]); 2745 } 2746 } else { 2747 if (ht->right < len_avail) { 2748 ht->right = len_avail + hf->tree_used; 2749 ht = &(hf->tree[hf->tree_used++]); 2750 if (hf->tree_used > hf->tree_avail) 2751 return (0);/* Invalid */ 2752 ht->left = 0; 2753 ht->right = 0; 2754 } else { 2755 ht = &(hf->tree[ht->right - len_avail]); 2756 } 2757 } 2758 bit >>= 1; 2759 } 2760 if (ptn & bit) { 2761 if (ht->left != 0) 2762 return (0);/* Invalid */ 2763 ht->left = (uint16_t)i; 2764 } else { 2765 if (ht->right != 0) 2766 return (0);/* Invalid */ 2767 ht->right = (uint16_t)i; 2768 } 2769 } 2770 return (1); 2771 } 2772 2773 static int 2774 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c) 2775 { 2776 struct htree_t *ht; 2777 int extlen; 2778 2779 ht = hf->tree; 2780 extlen = hf->shift_bits; 2781 while (c >= hf->len_avail) { 2782 c -= hf->len_avail; 2783 if (extlen-- <= 0 || c >= hf->tree_used) 2784 return (0); 2785 if (rbits & (1U << extlen)) 2786 c = ht[c].left; 2787 else 2788 c = ht[c].right; 2789 } 2790 return (c); 2791 } 2792 2793 static inline int 2794 lzh_decode_huffman(struct huffman *hf, unsigned rbits) 2795 { 2796 int c; 2797 /* 2798 * At first search an index table for a bit pattern. 2799 * If it fails, search a huffman tree for. 2800 */ 2801 c = hf->tbl[rbits >> hf->shift_bits]; 2802 if (c < hf->len_avail || hf->len_avail == 0) 2803 return (c); 2804 /* This bit pattern needs to be found out at a huffman tree. */ 2805 return (lzh_decode_huffman_tree(hf, rbits, c)); 2806 } 2807 2808