1 /*- 2 * Copyright (c) 2004 Tim Kientzle 3 * Copyright (c) 2011-2012 Michihiro NAKAJIMA 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "archive_platform.h" 28 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 2009-12-28 03:11:36Z kientzle $"); 29 30 #ifdef HAVE_ERRNO_H 31 #include <errno.h> 32 #endif 33 #ifdef HAVE_STDLIB_H 34 #include <stdlib.h> 35 #endif 36 #ifdef HAVE_ZLIB_H 37 #include <zlib.h> 38 #endif 39 40 #include "archive.h" 41 #include "archive_endian.h" 42 #include "archive_entry.h" 43 #include "archive_entry_locale.h" 44 #include "archive_private.h" 45 #include "archive_rb.h" 46 #include "archive_read_private.h" 47 48 #ifndef HAVE_ZLIB_H 49 #include "archive_crc32.h" 50 #endif 51 52 struct zip_entry { 53 struct archive_rb_node node; 54 int64_t local_header_offset; 55 int64_t compressed_size; 56 int64_t uncompressed_size; 57 int64_t gid; 58 int64_t uid; 59 struct archive_entry *entry; 60 struct archive_string rsrcname; 61 time_t mtime; 62 time_t atime; 63 time_t ctime; 64 uint32_t crc32; 65 uint16_t mode; 66 uint16_t flags; 67 char compression; 68 char system; 69 }; 70 71 struct zip { 72 /* Structural information about the archive. */ 73 int64_t end_of_central_directory_offset; 74 int64_t central_directory_offset; 75 size_t central_directory_size; 76 size_t central_directory_entries; 77 char have_central_directory; 78 int64_t offset; 79 80 /* List of entries (seekable Zip only) */ 81 size_t entries_remaining; 82 struct zip_entry *zip_entries; 83 struct zip_entry *entry; 84 struct archive_rb_tree tree; 85 struct archive_rb_tree tree_rsrc; 86 87 size_t unconsumed; 88 89 /* entry_bytes_remaining is the number of bytes we expect. */ 90 int64_t entry_bytes_remaining; 91 92 /* These count the number of bytes actually read for the entry. */ 93 int64_t entry_compressed_bytes_read; 94 int64_t entry_uncompressed_bytes_read; 95 96 /* Running CRC32 of the decompressed data */ 97 unsigned long entry_crc32; 98 99 /* Flags to mark progress of decompression. */ 100 char decompress_init; 101 char end_of_entry; 102 103 ssize_t filename_length; 104 ssize_t extra_length; 105 106 unsigned char *uncompressed_buffer; 107 size_t uncompressed_buffer_size; 108 #ifdef HAVE_ZLIB_H 109 z_stream stream; 110 char stream_valid; 111 #endif 112 113 struct archive_string extra; 114 struct archive_string_conv *sconv; 115 struct archive_string_conv *sconv_default; 116 struct archive_string_conv *sconv_utf8; 117 int init_default_conversion; 118 char format_name[64]; 119 }; 120 121 #define ZIP_LENGTH_AT_END 8 122 #define ZIP_ENCRYPTED (1<<0) 123 #define ZIP_STRONG_ENCRYPTED (1<<6) 124 #define ZIP_UTF8_NAME (1<<11) 125 126 static int archive_read_format_zip_streamable_bid(struct archive_read *, 127 int); 128 static int archive_read_format_zip_seekable_bid(struct archive_read *, 129 int); 130 static int archive_read_format_zip_options(struct archive_read *, 131 const char *, const char *); 132 static int archive_read_format_zip_cleanup(struct archive_read *); 133 static int archive_read_format_zip_read_data(struct archive_read *, 134 const void **, size_t *, int64_t *); 135 static int archive_read_format_zip_read_data_skip(struct archive_read *a); 136 static int archive_read_format_zip_seekable_read_header( 137 struct archive_read *, struct archive_entry *); 138 static int archive_read_format_zip_streamable_read_header( 139 struct archive_read *, struct archive_entry *); 140 static ssize_t zip_get_local_file_header_size(struct archive_read *, size_t); 141 #ifdef HAVE_ZLIB_H 142 static int zip_deflate_init(struct archive_read *, struct zip *); 143 static int zip_read_data_deflate(struct archive_read *a, const void **buff, 144 size_t *size, int64_t *offset); 145 #endif 146 static int zip_read_data_none(struct archive_read *a, const void **buff, 147 size_t *size, int64_t *offset); 148 static int zip_read_local_file_header(struct archive_read *a, 149 struct archive_entry *entry, struct zip *); 150 static time_t zip_time(const char *); 151 static const char *compression_name(int compression); 152 static void process_extra(const char *, size_t, struct zip_entry *); 153 154 int archive_read_support_format_zip_streamable(struct archive *); 155 int archive_read_support_format_zip_seekable(struct archive *); 156 157 int 158 archive_read_support_format_zip_streamable(struct archive *_a) 159 { 160 struct archive_read *a = (struct archive_read *)_a; 161 struct zip *zip; 162 int r; 163 164 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 165 ARCHIVE_STATE_NEW, "archive_read_support_format_zip"); 166 167 zip = (struct zip *)malloc(sizeof(*zip)); 168 if (zip == NULL) { 169 archive_set_error(&a->archive, ENOMEM, 170 "Can't allocate zip data"); 171 return (ARCHIVE_FATAL); 172 } 173 memset(zip, 0, sizeof(*zip)); 174 175 r = __archive_read_register_format(a, 176 zip, 177 "zip", 178 archive_read_format_zip_streamable_bid, 179 archive_read_format_zip_options, 180 archive_read_format_zip_streamable_read_header, 181 archive_read_format_zip_read_data, 182 archive_read_format_zip_read_data_skip, 183 NULL, 184 archive_read_format_zip_cleanup); 185 186 if (r != ARCHIVE_OK) 187 free(zip); 188 return (ARCHIVE_OK); 189 } 190 191 int 192 archive_read_support_format_zip_seekable(struct archive *_a) 193 { 194 struct archive_read *a = (struct archive_read *)_a; 195 struct zip *zip; 196 int r; 197 198 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 199 ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable"); 200 201 zip = (struct zip *)malloc(sizeof(*zip)); 202 if (zip == NULL) { 203 archive_set_error(&a->archive, ENOMEM, 204 "Can't allocate zip data"); 205 return (ARCHIVE_FATAL); 206 } 207 memset(zip, 0, sizeof(*zip)); 208 209 r = __archive_read_register_format(a, 210 zip, 211 "zip", 212 archive_read_format_zip_seekable_bid, 213 archive_read_format_zip_options, 214 archive_read_format_zip_seekable_read_header, 215 archive_read_format_zip_read_data, 216 archive_read_format_zip_read_data_skip, 217 NULL, 218 archive_read_format_zip_cleanup); 219 220 if (r != ARCHIVE_OK) 221 free(zip); 222 return (ARCHIVE_OK); 223 } 224 225 int 226 archive_read_support_format_zip(struct archive *a) 227 { 228 int r; 229 r = archive_read_support_format_zip_streamable(a); 230 if (r != ARCHIVE_OK) 231 return r; 232 return (archive_read_support_format_zip_seekable(a)); 233 } 234 235 /* 236 * TODO: This is a performance sink because it forces the read core to 237 * drop buffered data from the start of file, which will then have to 238 * be re-read again if this bidder loses. 239 * 240 * We workaround this a little by passing in the best bid so far so 241 * that later bidders can do nothing if they know they'll never 242 * outbid. But we can certainly do better... 243 */ 244 static int 245 archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) 246 { 247 struct zip *zip = (struct zip *)a->format->data; 248 int64_t filesize; 249 const char *p; 250 251 /* If someone has already bid more than 32, then avoid 252 trashing the look-ahead buffers with a seek. */ 253 if (best_bid > 32) 254 return (-1); 255 256 filesize = __archive_read_seek(a, -22, SEEK_END); 257 /* If we can't seek, then we can't bid. */ 258 if (filesize <= 0) 259 return 0; 260 261 /* TODO: More robust search for end of central directory record. */ 262 if ((p = __archive_read_ahead(a, 22, NULL)) == NULL) 263 return 0; 264 /* First four bytes are signature for end of central directory 265 record. Four zero bytes ensure this isn't a multi-volume 266 Zip file (which we don't yet support). */ 267 if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) { 268 int64_t i, tail; 269 int found; 270 271 /* 272 * If there is a comment in end of central directory 273 * record, 22 bytes are too short. we have to read more 274 * to properly detect the record. Hopefully, a length 275 * of the comment is not longer than 16362 bytes(16K-22). 276 */ 277 if (filesize + 22 > 1024 * 16) { 278 tail = 1024 * 16; 279 filesize = __archive_read_seek(a, tail * -1, SEEK_END); 280 } else { 281 tail = filesize + 22; 282 filesize = __archive_read_seek(a, 0, SEEK_SET); 283 } 284 if (filesize < 0) 285 return 0; 286 if ((p = __archive_read_ahead(a, (size_t)tail, NULL)) == NULL) 287 return 0; 288 for (found = 0, i = 0;!found && i < tail - 22;) { 289 switch (p[i]) { 290 case 'P': 291 if (memcmp(p+i, 292 "PK\005\006\000\000\000\000", 8) == 0) { 293 p += i; 294 filesize += tail - 295 (22 + archive_le16dec(p+20)); 296 found = 1; 297 } else 298 i += 8; 299 break; 300 case 'K': i += 7; break; 301 case 005: i += 6; break; 302 case 006: i += 5; break; 303 default: i += 1; break; 304 } 305 } 306 if (!found) 307 return 0; 308 } 309 310 /* Since we've already done the hard work of finding the 311 end of central directory record, let's save the important 312 information. */ 313 zip->central_directory_entries = archive_le16dec(p + 10); 314 zip->central_directory_size = archive_le32dec(p + 12); 315 zip->central_directory_offset = archive_le32dec(p + 16); 316 zip->end_of_central_directory_offset = filesize; 317 318 /* Just one volume, so central dir must all be on this volume. */ 319 if (zip->central_directory_entries != archive_le16dec(p + 8)) 320 return 0; 321 /* Central directory can't extend beyond end of this file. */ 322 if (zip->central_directory_offset + 323 (int64_t)zip->central_directory_size > filesize) 324 return 0; 325 326 /* This is just a tiny bit higher than the maximum returned by 327 the streaming Zip bidder. This ensures that the more accurate 328 seeking Zip parser wins whenever seek is available. */ 329 return 32; 330 } 331 332 static int 333 cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2) 334 { 335 const struct zip_entry *e1 = (const struct zip_entry *)n1; 336 const struct zip_entry *e2 = (const struct zip_entry *)n2; 337 338 return ((int)(e2->local_header_offset - e1->local_header_offset)); 339 } 340 341 static int 342 cmp_key(const struct archive_rb_node *n, const void *key) 343 { 344 /* This function won't be called */ 345 (void)n; /* UNUSED */ 346 (void)key; /* UNUSED */ 347 return 1; 348 } 349 350 static int 351 rsrc_cmp_node(const struct archive_rb_node *n1, 352 const struct archive_rb_node *n2) 353 { 354 const struct zip_entry *e1 = (const struct zip_entry *)n1; 355 const struct zip_entry *e2 = (const struct zip_entry *)n2; 356 357 return (strcmp(e2->rsrcname.s, e1->rsrcname.s)); 358 } 359 360 static int 361 rsrc_cmp_key(const struct archive_rb_node *n, const void *key) 362 { 363 const struct zip_entry *e = (const struct zip_entry *)n; 364 return (strcmp((const char *)key, e->rsrcname.s)); 365 } 366 367 static const char * 368 rsrc_basename(const char *name, size_t name_length) 369 { 370 const char *s, *r; 371 372 r = s = name; 373 for (;;) { 374 s = memchr(s, '/', name_length - (s - name)); 375 if (s == NULL) 376 break; 377 r = ++s; 378 } 379 return (r); 380 } 381 382 static void 383 expose_parent_dirs(struct zip *zip, const char *name, size_t name_length) 384 { 385 struct archive_string str; 386 struct zip_entry *dir; 387 char *s; 388 389 archive_string_init(&str); 390 archive_strncpy(&str, name, name_length); 391 for (;;) { 392 s = strrchr(str.s, '/'); 393 if (s == NULL) 394 break; 395 *s = '\0'; 396 /* Transfer the parent directory from zip->tree_rsrc RB 397 * tree to zip->tree RB tree to expose. */ 398 dir = (struct zip_entry *) 399 __archive_rb_tree_find_node(&zip->tree_rsrc, str.s); 400 if (dir == NULL) 401 break; 402 __archive_rb_tree_remove_node(&zip->tree_rsrc, &dir->node); 403 archive_string_free(&dir->rsrcname); 404 __archive_rb_tree_insert_node(&zip->tree, &dir->node); 405 } 406 archive_string_free(&str); 407 } 408 409 static int 410 slurp_central_directory(struct archive_read *a, struct zip *zip) 411 { 412 unsigned i; 413 int64_t correction; 414 static const struct archive_rb_tree_ops rb_ops = { 415 &cmp_node, &cmp_key 416 }; 417 static const struct archive_rb_tree_ops rb_rsrc_ops = { 418 &rsrc_cmp_node, &rsrc_cmp_key 419 }; 420 421 /* 422 * Consider the archive file we are reading may be SFX. 423 * So we have to calculate a SFX header size to revise 424 * ZIP header offsets. 425 */ 426 correction = zip->end_of_central_directory_offset - 427 (zip->central_directory_offset + zip->central_directory_size); 428 /* The central directory offset is relative value, and so 429 * we revise this offset for SFX. */ 430 zip->central_directory_offset += correction; 431 432 __archive_read_seek(a, zip->central_directory_offset, SEEK_SET); 433 zip->offset = zip->central_directory_offset; 434 __archive_rb_tree_init(&zip->tree, &rb_ops); 435 __archive_rb_tree_init(&zip->tree_rsrc, &rb_rsrc_ops); 436 437 zip->zip_entries = calloc(zip->central_directory_entries, 438 sizeof(struct zip_entry)); 439 for (i = 0; i < zip->central_directory_entries; ++i) { 440 struct zip_entry *zip_entry = &zip->zip_entries[i]; 441 size_t filename_length, extra_length, comment_length; 442 uint32_t external_attributes; 443 const char *name, *p, *r; 444 445 if ((p = __archive_read_ahead(a, 46, NULL)) == NULL) 446 return ARCHIVE_FATAL; 447 if (memcmp(p, "PK\001\002", 4) != 0) { 448 archive_set_error(&a->archive, 449 -1, "Invalid central directory signature"); 450 return ARCHIVE_FATAL; 451 } 452 zip->have_central_directory = 1; 453 /* version = p[4]; */ 454 zip_entry->system = p[5]; 455 /* version_required = archive_le16dec(p + 6); */ 456 zip_entry->flags = archive_le16dec(p + 8); 457 zip_entry->compression = (char)archive_le16dec(p + 10); 458 zip_entry->mtime = zip_time(p + 12); 459 zip_entry->crc32 = archive_le32dec(p + 16); 460 zip_entry->compressed_size = archive_le32dec(p + 20); 461 zip_entry->uncompressed_size = archive_le32dec(p + 24); 462 filename_length = archive_le16dec(p + 28); 463 extra_length = archive_le16dec(p + 30); 464 comment_length = archive_le16dec(p + 32); 465 /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */ 466 /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */ 467 external_attributes = archive_le32dec(p + 38); 468 zip_entry->local_header_offset = 469 archive_le32dec(p + 42) + correction; 470 471 /* If we can't guess the mode, leave it zero here; 472 when we read the local file header we might get 473 more information. */ 474 zip_entry->mode = 0; 475 if (zip_entry->system == 3) { 476 zip_entry->mode = external_attributes >> 16; 477 } 478 479 /* 480 * Mac resource fork files are stored under the 481 * "__MACOSX/" directory, so we should check if 482 * it is. 483 */ 484 /* Make sure we have the file name. */ 485 if ((p = __archive_read_ahead(a, 46 + filename_length, NULL)) 486 == NULL) 487 return ARCHIVE_FATAL; 488 name = p + 46; 489 r = rsrc_basename(name, filename_length); 490 if (filename_length >= 9 && 491 strncmp("__MACOSX/", name, 9) == 0) { 492 /* If this file is not a resource fork nor 493 * a directory. We should treat it as a non 494 * resource fork file to expose it. */ 495 if (name[filename_length-1] != '/' && 496 (r - name < 3 || r[0] != '.' || r[1] != '_')) { 497 __archive_rb_tree_insert_node(&zip->tree, 498 &zip_entry->node); 499 /* Expose its parent directories. */ 500 expose_parent_dirs(zip, name, filename_length); 501 } else { 502 /* This file is a resource fork file or 503 * a directory. */ 504 archive_strncpy(&(zip_entry->rsrcname), name, 505 filename_length); 506 __archive_rb_tree_insert_node(&zip->tree_rsrc, 507 &zip_entry->node); 508 } 509 } else { 510 /* Generate resource fork name to find its resource 511 * file at zip->tree_rsrc. */ 512 archive_strcpy(&(zip_entry->rsrcname), "__MACOSX/"); 513 archive_strncat(&(zip_entry->rsrcname), name, r - name); 514 archive_strcat(&(zip_entry->rsrcname), "._"); 515 archive_strncat(&(zip_entry->rsrcname), 516 name + (r - name), filename_length - (r - name)); 517 /* Register an entry to RB tree to sort it by 518 * file offset. */ 519 __archive_rb_tree_insert_node(&zip->tree, 520 &zip_entry->node); 521 } 522 523 /* We don't read the filename until we get to the 524 local file header. Reading it here would speed up 525 table-of-contents operations (removing the need to 526 find and read local file header to get the 527 filename) at the cost of requiring a lot of extra 528 space. */ 529 /* We don't read the extra block here. We assume it 530 will be duplicated at the local file header. */ 531 __archive_read_consume(a, 532 46 + filename_length + extra_length + comment_length); 533 } 534 535 return ARCHIVE_OK; 536 } 537 538 static int64_t 539 zip_read_consume(struct archive_read *a, int64_t bytes) 540 { 541 struct zip *zip = (struct zip *)a->format->data; 542 int64_t skip; 543 544 skip = __archive_read_consume(a, bytes); 545 if (skip > 0) 546 zip->offset += skip; 547 return (skip); 548 } 549 550 static int 551 zip_read_mac_metadata(struct archive_read *a, struct archive_entry *entry, 552 struct zip_entry *rsrc) 553 { 554 struct zip *zip = (struct zip *)a->format->data; 555 unsigned char *metadata, *mp; 556 int64_t offset = zip->offset; 557 size_t remaining_bytes, metadata_bytes; 558 ssize_t hsize; 559 int ret = ARCHIVE_OK, eof; 560 561 switch(rsrc->compression) { 562 case 0: /* No compression. */ 563 #ifdef HAVE_ZLIB_H 564 case 8: /* Deflate compression. */ 565 #endif 566 break; 567 default: /* Unsupported compression. */ 568 /* Return a warning. */ 569 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 570 "Unsupported ZIP compression method (%s)", 571 compression_name(rsrc->compression)); 572 /* We can't decompress this entry, but we will 573 * be able to skip() it and try the next entry. */ 574 return (ARCHIVE_WARN); 575 } 576 577 if (rsrc->uncompressed_size > (128 * 1024)) { 578 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 579 "Mac metadata is too large: %jd > 128K bytes", 580 (intmax_t)rsrc->uncompressed_size); 581 return (ARCHIVE_WARN); 582 } 583 584 metadata = malloc((size_t)rsrc->uncompressed_size); 585 if (metadata == NULL) { 586 archive_set_error(&a->archive, ENOMEM, 587 "Can't allocate memory for Mac metadata"); 588 return (ARCHIVE_FATAL); 589 } 590 591 if (zip->offset < rsrc->local_header_offset) 592 zip_read_consume(a, rsrc->local_header_offset - zip->offset); 593 else if (zip->offset != rsrc->local_header_offset) { 594 __archive_read_seek(a, rsrc->local_header_offset, SEEK_SET); 595 zip->offset = zip->entry->local_header_offset; 596 } 597 598 hsize = zip_get_local_file_header_size(a, 0); 599 zip_read_consume(a, hsize); 600 601 remaining_bytes = (size_t)rsrc->compressed_size; 602 metadata_bytes = (size_t)rsrc->uncompressed_size; 603 mp = metadata; 604 eof = 0; 605 while (!eof && remaining_bytes) { 606 const unsigned char *p; 607 ssize_t bytes_avail; 608 size_t bytes_used; 609 610 p = __archive_read_ahead(a, 1, &bytes_avail); 611 if (p == NULL) { 612 archive_set_error(&a->archive, 613 ARCHIVE_ERRNO_FILE_FORMAT, 614 "Truncated ZIP file header"); 615 ret = ARCHIVE_WARN; 616 goto exit_mac_metadata; 617 } 618 if ((size_t)bytes_avail > remaining_bytes) 619 bytes_avail = remaining_bytes; 620 switch(rsrc->compression) { 621 case 0: /* No compression. */ 622 memcpy(mp, p, bytes_avail); 623 bytes_used = (size_t)bytes_avail; 624 metadata_bytes -= bytes_used; 625 mp += bytes_used; 626 if (metadata_bytes == 0) 627 eof = 1; 628 break; 629 #ifdef HAVE_ZLIB_H 630 case 8: /* Deflate compression. */ 631 { 632 int r; 633 634 ret = zip_deflate_init(a, zip); 635 if (ret != ARCHIVE_OK) 636 goto exit_mac_metadata; 637 zip->stream.next_in = 638 (Bytef *)(uintptr_t)(const void *)p; 639 zip->stream.avail_in = (uInt)bytes_avail; 640 zip->stream.total_in = 0; 641 zip->stream.next_out = mp; 642 zip->stream.avail_out = (uInt)metadata_bytes; 643 zip->stream.total_out = 0; 644 645 r = inflate(&zip->stream, 0); 646 switch (r) { 647 case Z_OK: 648 break; 649 case Z_STREAM_END: 650 eof = 1; 651 break; 652 case Z_MEM_ERROR: 653 archive_set_error(&a->archive, ENOMEM, 654 "Out of memory for ZIP decompression"); 655 ret = ARCHIVE_FATAL; 656 goto exit_mac_metadata; 657 default: 658 archive_set_error(&a->archive, 659 ARCHIVE_ERRNO_MISC, 660 "ZIP decompression failed (%d)", r); 661 ret = ARCHIVE_FATAL; 662 goto exit_mac_metadata; 663 } 664 bytes_used = zip->stream.total_in; 665 metadata_bytes -= zip->stream.total_out; 666 mp += zip->stream.total_out; 667 break; 668 } 669 #endif 670 default: 671 bytes_used = 0; 672 break; 673 } 674 zip_read_consume(a, bytes_used); 675 remaining_bytes -= bytes_used; 676 } 677 archive_entry_copy_mac_metadata(entry, metadata, 678 (size_t)rsrc->uncompressed_size - metadata_bytes); 679 680 __archive_read_seek(a, offset, SEEK_SET); 681 zip->offset = offset; 682 exit_mac_metadata: 683 zip->decompress_init = 0; 684 free(metadata); 685 return (ret); 686 } 687 688 static int 689 archive_read_format_zip_seekable_read_header(struct archive_read *a, 690 struct archive_entry *entry) 691 { 692 struct zip *zip = (struct zip *)a->format->data; 693 struct zip_entry *rsrc; 694 int r, ret = ARCHIVE_OK; 695 696 a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 697 if (a->archive.archive_format_name == NULL) 698 a->archive.archive_format_name = "ZIP"; 699 700 if (zip->zip_entries == NULL) { 701 r = slurp_central_directory(a, zip); 702 zip->entries_remaining = zip->central_directory_entries; 703 if (r != ARCHIVE_OK) 704 return r; 705 /* Get first entry whose local header offset is lower than 706 * other entries in the archive file. */ 707 zip->entry = 708 (struct zip_entry *)ARCHIVE_RB_TREE_MIN(&zip->tree); 709 } else if (zip->entry != NULL) { 710 /* Get next entry in local header offset order. */ 711 zip->entry = (struct zip_entry *)__archive_rb_tree_iterate( 712 &zip->tree, &zip->entry->node, ARCHIVE_RB_DIR_RIGHT); 713 } 714 715 if (zip->entries_remaining <= 0 || zip->entry == NULL) 716 return ARCHIVE_EOF; 717 --zip->entries_remaining; 718 719 if (zip->entry->rsrcname.s) 720 rsrc = (struct zip_entry *)__archive_rb_tree_find_node( 721 &zip->tree_rsrc, zip->entry->rsrcname.s); 722 else 723 rsrc = NULL; 724 725 /* File entries are sorted by the header offset, we should mostly 726 * use zip_read_consume to advance a read point to avoid redundant 727 * data reading. */ 728 if (zip->offset < zip->entry->local_header_offset) 729 zip_read_consume(a, 730 zip->entry->local_header_offset - zip->offset); 731 else if (zip->offset != zip->entry->local_header_offset) { 732 __archive_read_seek(a, zip->entry->local_header_offset, 733 SEEK_SET); 734 zip->offset = zip->entry->local_header_offset; 735 } 736 zip->unconsumed = 0; 737 r = zip_read_local_file_header(a, entry, zip); 738 if (r != ARCHIVE_OK) 739 return r; 740 if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) { 741 const void *p; 742 struct archive_string_conv *sconv; 743 size_t linkname_length = (size_t)archive_entry_size(entry); 744 745 archive_entry_set_size(entry, 0); 746 p = __archive_read_ahead(a, linkname_length, NULL); 747 if (p == NULL) { 748 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 749 "Truncated Zip file"); 750 return ARCHIVE_FATAL; 751 } 752 753 sconv = zip->sconv; 754 if (sconv == NULL && (zip->entry->flags & ZIP_UTF8_NAME)) 755 sconv = zip->sconv_utf8; 756 if (sconv == NULL) 757 sconv = zip->sconv_default; 758 if (archive_entry_copy_symlink_l(entry, p, linkname_length, 759 sconv) != 0) { 760 if (errno != ENOMEM && sconv == zip->sconv_utf8 && 761 (zip->entry->flags & ZIP_UTF8_NAME)) 762 archive_entry_copy_symlink_l(entry, p, 763 linkname_length, NULL); 764 if (errno == ENOMEM) { 765 archive_set_error(&a->archive, ENOMEM, 766 "Can't allocate memory for Symlink"); 767 return (ARCHIVE_FATAL); 768 } 769 /* 770 * Since there is no character-set regulation for 771 * symlink name, do not report the conversion error 772 * in an automatic conversion. 773 */ 774 if (sconv != zip->sconv_utf8 || 775 (zip->entry->flags & ZIP_UTF8_NAME) == 0) { 776 archive_set_error(&a->archive, 777 ARCHIVE_ERRNO_FILE_FORMAT, 778 "Symlink cannot be converted " 779 "from %s to current locale.", 780 archive_string_conversion_charset_name( 781 sconv)); 782 ret = ARCHIVE_WARN; 783 } 784 } 785 } 786 if (rsrc) { 787 int ret2 = zip_read_mac_metadata(a, entry, rsrc); 788 if (ret2 < ret) 789 ret = ret2; 790 } 791 return (ret); 792 } 793 794 static int 795 archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid) 796 { 797 const char *p; 798 799 (void)best_bid; /* UNUSED */ 800 801 if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) 802 return (-1); 803 804 /* 805 * Bid of 30 here is: 16 bits for "PK", 806 * next 16-bit field has four options (-2 bits). 807 * 16 + 16-2 = 30. 808 */ 809 if (p[0] == 'P' && p[1] == 'K') { 810 if ((p[2] == '\001' && p[3] == '\002') 811 || (p[2] == '\003' && p[3] == '\004') 812 || (p[2] == '\005' && p[3] == '\006') 813 || (p[2] == '\007' && p[3] == '\010') 814 || (p[2] == '0' && p[3] == '0')) 815 return (30); 816 } 817 818 /* TODO: It's worth looking ahead a little bit for a valid 819 * PK signature. In particular, that would make it possible 820 * to read some UUEncoded SFX files or SFX files coming from 821 * a network socket. */ 822 823 return (0); 824 } 825 826 static int 827 archive_read_format_zip_options(struct archive_read *a, 828 const char *key, const char *val) 829 { 830 struct zip *zip; 831 int ret = ARCHIVE_FAILED; 832 833 zip = (struct zip *)(a->format->data); 834 if (strcmp(key, "compat-2x") == 0) { 835 /* Handle filnames as libarchive 2.x */ 836 zip->init_default_conversion = (val != NULL) ? 1 : 0; 837 return (ARCHIVE_OK); 838 } else if (strcmp(key, "hdrcharset") == 0) { 839 if (val == NULL || val[0] == 0) 840 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 841 "zip: hdrcharset option needs a character-set name" 842 ); 843 else { 844 zip->sconv = archive_string_conversion_from_charset( 845 &a->archive, val, 0); 846 if (zip->sconv != NULL) { 847 if (strcmp(val, "UTF-8") == 0) 848 zip->sconv_utf8 = zip->sconv; 849 ret = ARCHIVE_OK; 850 } else 851 ret = ARCHIVE_FATAL; 852 } 853 return (ret); 854 } 855 856 /* Note: The "warn" return is just to inform the options 857 * supervisor that we didn't handle it. It will generate 858 * a suitable error if no one used this option. */ 859 return (ARCHIVE_WARN); 860 } 861 862 static int 863 archive_read_format_zip_streamable_read_header(struct archive_read *a, 864 struct archive_entry *entry) 865 { 866 struct zip *zip; 867 868 a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 869 if (a->archive.archive_format_name == NULL) 870 a->archive.archive_format_name = "ZIP"; 871 872 zip = (struct zip *)(a->format->data); 873 874 /* Make sure we have a zip_entry structure to use. */ 875 if (zip->zip_entries == NULL) { 876 zip->zip_entries = malloc(sizeof(struct zip_entry)); 877 if (zip->zip_entries == NULL) { 878 archive_set_error(&a->archive, ENOMEM, 879 "Out of memory"); 880 return ARCHIVE_FATAL; 881 } 882 } 883 zip->entry = zip->zip_entries; 884 memset(zip->entry, 0, sizeof(struct zip_entry)); 885 886 /* Search ahead for the next local file header. */ 887 zip_read_consume(a, zip->unconsumed); 888 zip->unconsumed = 0; 889 for (;;) { 890 int64_t skipped = 0; 891 const char *p, *end; 892 ssize_t bytes; 893 894 p = __archive_read_ahead(a, 4, &bytes); 895 if (p == NULL) 896 return (ARCHIVE_FATAL); 897 end = p + bytes; 898 899 while (p + 4 <= end) { 900 if (p[0] == 'P' && p[1] == 'K') { 901 if (p[2] == '\001' && p[3] == '\002') 902 /* Beginning of central directory. */ 903 return (ARCHIVE_EOF); 904 905 if (p[2] == '\003' && p[3] == '\004') { 906 /* Regular file entry. */ 907 zip_read_consume(a, skipped); 908 return zip_read_local_file_header(a, 909 entry, zip); 910 } 911 912 if (p[2] == '\005' && p[3] == '\006') 913 /* End of central directory. */ 914 return (ARCHIVE_EOF); 915 } 916 ++p; 917 ++skipped; 918 } 919 zip_read_consume(a, skipped); 920 } 921 } 922 923 static ssize_t 924 zip_get_local_file_header_size(struct archive_read *a, size_t extra) 925 { 926 const char *p; 927 ssize_t filename_length, extra_length; 928 929 if ((p = __archive_read_ahead(a, extra + 30, NULL)) == NULL) { 930 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 931 "Truncated ZIP file header"); 932 return (ARCHIVE_WARN); 933 } 934 p += extra; 935 936 if (memcmp(p, "PK\003\004", 4) != 0) { 937 archive_set_error(&a->archive, -1, "Damaged Zip archive"); 938 return ARCHIVE_WARN; 939 } 940 filename_length = archive_le16dec(p + 26); 941 extra_length = archive_le16dec(p + 28); 942 943 return (30 + filename_length + extra_length); 944 } 945 946 /* 947 * Assumes file pointer is at beginning of local file header. 948 */ 949 static int 950 zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, 951 struct zip *zip) 952 { 953 const char *p; 954 const void *h; 955 const wchar_t *wp; 956 const char *cp; 957 size_t len, filename_length, extra_length; 958 struct archive_string_conv *sconv; 959 struct zip_entry *zip_entry = zip->entry; 960 uint32_t local_crc32; 961 int64_t compressed_size, uncompressed_size; 962 int ret = ARCHIVE_OK; 963 char version; 964 965 zip->decompress_init = 0; 966 zip->end_of_entry = 0; 967 zip->entry_uncompressed_bytes_read = 0; 968 zip->entry_compressed_bytes_read = 0; 969 zip->entry_crc32 = crc32(0, NULL, 0); 970 971 /* Setup default conversion. */ 972 if (zip->sconv == NULL && !zip->init_default_conversion) { 973 zip->sconv_default = 974 archive_string_default_conversion_for_read(&(a->archive)); 975 zip->init_default_conversion = 1; 976 } 977 978 if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) { 979 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 980 "Truncated ZIP file header"); 981 return (ARCHIVE_FATAL); 982 } 983 984 if (memcmp(p, "PK\003\004", 4) != 0) { 985 archive_set_error(&a->archive, -1, "Damaged Zip archive"); 986 return ARCHIVE_FATAL; 987 } 988 version = p[4]; 989 zip_entry->system = p[5]; 990 zip_entry->flags = archive_le16dec(p + 6); 991 zip_entry->compression = (char)archive_le16dec(p + 8); 992 zip_entry->mtime = zip_time(p + 10); 993 local_crc32 = archive_le32dec(p + 14); 994 compressed_size = archive_le32dec(p + 18); 995 uncompressed_size = archive_le32dec(p + 22); 996 filename_length = archive_le16dec(p + 26); 997 extra_length = archive_le16dec(p + 28); 998 999 zip_read_consume(a, 30); 1000 1001 if (zip->have_central_directory) { 1002 /* If we read the central dir entry, we must have size 1003 * information as well, so ignore the length-at-end flag. */ 1004 zip_entry->flags &= ~ZIP_LENGTH_AT_END; 1005 /* If we have values from both the local file header 1006 and the central directory, warn about mismatches 1007 which might indicate a damaged file. But some 1008 writers always put zero in the local header; don't 1009 bother warning about that. */ 1010 if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) { 1011 archive_set_error(&a->archive, 1012 ARCHIVE_ERRNO_FILE_FORMAT, 1013 "Inconsistent CRC32 values"); 1014 ret = ARCHIVE_WARN; 1015 } 1016 if (compressed_size != 0 1017 && compressed_size != zip_entry->compressed_size) { 1018 archive_set_error(&a->archive, 1019 ARCHIVE_ERRNO_FILE_FORMAT, 1020 "Inconsistent compressed size"); 1021 ret = ARCHIVE_WARN; 1022 } 1023 if (uncompressed_size != 0 1024 && uncompressed_size != zip_entry->uncompressed_size) { 1025 archive_set_error(&a->archive, 1026 ARCHIVE_ERRNO_FILE_FORMAT, 1027 "Inconsistent uncompressed size"); 1028 ret = ARCHIVE_WARN; 1029 } 1030 } else { 1031 /* If we don't have the CD info, use whatever we do have. */ 1032 zip_entry->crc32 = local_crc32; 1033 zip_entry->compressed_size = compressed_size; 1034 zip_entry->uncompressed_size = uncompressed_size; 1035 } 1036 1037 /* Read the filename. */ 1038 if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) { 1039 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1040 "Truncated ZIP file header"); 1041 return (ARCHIVE_FATAL); 1042 } 1043 if (zip_entry->flags & ZIP_UTF8_NAME) { 1044 /* The filename is stored to be UTF-8. */ 1045 if (zip->sconv_utf8 == NULL) { 1046 zip->sconv_utf8 = 1047 archive_string_conversion_from_charset( 1048 &a->archive, "UTF-8", 1); 1049 if (zip->sconv_utf8 == NULL) 1050 return (ARCHIVE_FATAL); 1051 } 1052 sconv = zip->sconv_utf8; 1053 } else if (zip->sconv != NULL) 1054 sconv = zip->sconv; 1055 else 1056 sconv = zip->sconv_default; 1057 1058 if (archive_entry_copy_pathname_l(entry, 1059 h, filename_length, sconv) != 0) { 1060 if (errno == ENOMEM) { 1061 archive_set_error(&a->archive, ENOMEM, 1062 "Can't allocate memory for Pathname"); 1063 return (ARCHIVE_FATAL); 1064 } 1065 archive_set_error(&a->archive, 1066 ARCHIVE_ERRNO_FILE_FORMAT, 1067 "Pathname cannot be converted " 1068 "from %s to current locale.", 1069 archive_string_conversion_charset_name(sconv)); 1070 ret = ARCHIVE_WARN; 1071 } 1072 zip_read_consume(a, filename_length); 1073 1074 if (zip_entry->mode == 0) { 1075 /* Especially in streaming mode, we can end up 1076 here without having seen any mode information. 1077 Guess from the filename. */ 1078 wp = archive_entry_pathname_w(entry); 1079 if (wp != NULL) { 1080 len = wcslen(wp); 1081 if (len > 0 && wp[len - 1] == L'/') 1082 zip_entry->mode = AE_IFDIR | 0777; 1083 else 1084 zip_entry->mode = AE_IFREG | 0666; 1085 } else { 1086 cp = archive_entry_pathname(entry); 1087 len = (cp != NULL)?strlen(cp):0; 1088 if (len > 0 && cp[len - 1] == '/') 1089 zip_entry->mode = AE_IFDIR | 0777; 1090 else 1091 zip_entry->mode = AE_IFREG | 0666; 1092 } 1093 } 1094 1095 /* Read the extra data. */ 1096 if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) { 1097 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1098 "Truncated ZIP file header"); 1099 return (ARCHIVE_FATAL); 1100 } 1101 process_extra(h, extra_length, zip_entry); 1102 zip_read_consume(a, extra_length); 1103 1104 /* Populate some additional entry fields: */ 1105 archive_entry_set_mode(entry, zip_entry->mode); 1106 archive_entry_set_uid(entry, zip_entry->uid); 1107 archive_entry_set_gid(entry, zip_entry->gid); 1108 archive_entry_set_mtime(entry, zip_entry->mtime, 0); 1109 archive_entry_set_ctime(entry, zip_entry->ctime, 0); 1110 archive_entry_set_atime(entry, zip_entry->atime, 0); 1111 /* Set the size only if it's meaningful. */ 1112 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END)) 1113 archive_entry_set_size(entry, zip_entry->uncompressed_size); 1114 1115 zip->entry_bytes_remaining = zip_entry->compressed_size; 1116 1117 /* If there's no body, force read_data() to return EOF immediately. */ 1118 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END) 1119 && zip->entry_bytes_remaining < 1) 1120 zip->end_of_entry = 1; 1121 1122 /* Set up a more descriptive format name. */ 1123 sprintf(zip->format_name, "ZIP %d.%d (%s)", 1124 version / 10, version % 10, 1125 compression_name(zip->entry->compression)); 1126 a->archive.archive_format_name = zip->format_name; 1127 1128 return (ret); 1129 } 1130 1131 static const char * 1132 compression_name(int compression) 1133 { 1134 static const char *compression_names[] = { 1135 "uncompressed", 1136 "shrinking", 1137 "reduced-1", 1138 "reduced-2", 1139 "reduced-3", 1140 "reduced-4", 1141 "imploded", 1142 "reserved", 1143 "deflation" 1144 }; 1145 1146 if (0 <= compression && compression < 1147 (int)(sizeof(compression_names)/sizeof(compression_names[0]))) 1148 return compression_names[compression]; 1149 else 1150 return "??"; 1151 } 1152 1153 /* Convert an MSDOS-style date/time into Unix-style time. */ 1154 static time_t 1155 zip_time(const char *p) 1156 { 1157 int msTime, msDate; 1158 struct tm ts; 1159 1160 msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); 1161 msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); 1162 1163 memset(&ts, 0, sizeof(ts)); 1164 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 1165 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 1166 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 1167 ts.tm_hour = (msTime >> 11) & 0x1f; 1168 ts.tm_min = (msTime >> 5) & 0x3f; 1169 ts.tm_sec = (msTime << 1) & 0x3e; 1170 ts.tm_isdst = -1; 1171 return mktime(&ts); 1172 } 1173 1174 static int 1175 archive_read_format_zip_read_data(struct archive_read *a, 1176 const void **buff, size_t *size, int64_t *offset) 1177 { 1178 int r; 1179 struct zip *zip = (struct zip *)(a->format->data); 1180 1181 *offset = zip->entry_uncompressed_bytes_read; 1182 *size = 0; 1183 *buff = NULL; 1184 1185 /* If we hit end-of-entry last time, return ARCHIVE_EOF. */ 1186 if (zip->end_of_entry) 1187 return (ARCHIVE_EOF); 1188 1189 /* Return EOF immediately if this is a non-regular file. */ 1190 if (AE_IFREG != (zip->entry->mode & AE_IFMT)) 1191 return (ARCHIVE_EOF); 1192 1193 if (zip->entry->flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) { 1194 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1195 "Encrypted file is unsupported"); 1196 return (ARCHIVE_FAILED); 1197 } 1198 1199 zip_read_consume(a, zip->unconsumed); 1200 zip->unconsumed = 0; 1201 1202 switch(zip->entry->compression) { 1203 case 0: /* No compression. */ 1204 r = zip_read_data_none(a, buff, size, offset); 1205 break; 1206 #ifdef HAVE_ZLIB_H 1207 case 8: /* Deflate compression. */ 1208 r = zip_read_data_deflate(a, buff, size, offset); 1209 break; 1210 #endif 1211 default: /* Unsupported compression. */ 1212 /* Return a warning. */ 1213 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1214 "Unsupported ZIP compression method (%s)", 1215 compression_name(zip->entry->compression)); 1216 /* We can't decompress this entry, but we will 1217 * be able to skip() it and try the next entry. */ 1218 return (ARCHIVE_FAILED); 1219 break; 1220 } 1221 if (r != ARCHIVE_OK) 1222 return (r); 1223 /* Update checksum */ 1224 if (*size) 1225 zip->entry_crc32 = crc32(zip->entry_crc32, *buff, 1226 (unsigned)*size); 1227 /* If we hit the end, swallow any end-of-data marker. */ 1228 if (zip->end_of_entry) { 1229 /* Check file size, CRC against these values. */ 1230 if (zip->entry->compressed_size != 1231 zip->entry_compressed_bytes_read) { 1232 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1233 "ZIP compressed data is wrong size " 1234 "(read %jd, expected %jd)", 1235 (intmax_t)zip->entry_compressed_bytes_read, 1236 (intmax_t)zip->entry->compressed_size); 1237 return (ARCHIVE_WARN); 1238 } 1239 /* Size field only stores the lower 32 bits of the actual 1240 * size. */ 1241 if ((zip->entry->uncompressed_size & UINT32_MAX) 1242 != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { 1243 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1244 "ZIP uncompressed data is wrong size " 1245 "(read %jd, expected %jd)", 1246 (intmax_t)zip->entry_uncompressed_bytes_read, 1247 (intmax_t)zip->entry->uncompressed_size); 1248 return (ARCHIVE_WARN); 1249 } 1250 /* Check computed CRC against header */ 1251 if (zip->entry->crc32 != zip->entry_crc32) { 1252 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1253 "ZIP bad CRC: 0x%lx should be 0x%lx", 1254 (unsigned long)zip->entry_crc32, 1255 (unsigned long)zip->entry->crc32); 1256 return (ARCHIVE_WARN); 1257 } 1258 } 1259 1260 return (ARCHIVE_OK); 1261 } 1262 1263 /* 1264 * Read "uncompressed" data. There are three cases: 1265 * 1) We know the size of the data. This is always true for the 1266 * seeking reader (we've examined the Central Directory already). 1267 * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred. 1268 * Info-ZIP seems to do this; we know the size but have to grab 1269 * the CRC from the data descriptor afterwards. 1270 * 3) We're streaming and ZIP_LENGTH_AT_END was specified and 1271 * we have no size information. In this case, we can do pretty 1272 * well by watching for the data descriptor record. The data 1273 * descriptor is 16 bytes and includes a computed CRC that should 1274 * provide a strong check. 1275 * 1276 * TODO: Technically, the PK\007\010 signature is optional. 1277 * In the original spec, the data descriptor contained CRC 1278 * and size fields but had no leading signature. In practice, 1279 * newer writers seem to provide the signature pretty consistently, 1280 * but we might need to do something more complex here if 1281 * we want to handle older archives that lack that signature. 1282 * 1283 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 1284 * zip->end_of_entry if it consumes all of the data. 1285 */ 1286 static int 1287 zip_read_data_none(struct archive_read *a, const void **_buff, 1288 size_t *size, int64_t *offset) 1289 { 1290 struct zip *zip; 1291 const char *buff; 1292 ssize_t bytes_avail; 1293 1294 (void)offset; /* UNUSED */ 1295 1296 zip = (struct zip *)(a->format->data); 1297 1298 if (zip->entry->flags & ZIP_LENGTH_AT_END) { 1299 const char *p; 1300 1301 /* Grab at least 16 bytes. */ 1302 buff = __archive_read_ahead(a, 16, &bytes_avail); 1303 if (bytes_avail < 16) { 1304 /* Zip archives have end-of-archive markers 1305 that are longer than this, so a failure to get at 1306 least 16 bytes really does indicate a truncated 1307 file. */ 1308 archive_set_error(&a->archive, 1309 ARCHIVE_ERRNO_FILE_FORMAT, 1310 "Truncated ZIP file data"); 1311 return (ARCHIVE_FATAL); 1312 } 1313 /* Check for a complete PK\007\010 signature. */ 1314 p = buff; 1315 if (p[0] == 'P' && p[1] == 'K' 1316 && p[2] == '\007' && p[3] == '\010' 1317 && archive_le32dec(p + 4) == zip->entry_crc32 1318 && archive_le32dec(p + 8) == 1319 zip->entry_compressed_bytes_read 1320 && archive_le32dec(p + 12) == 1321 zip->entry_uncompressed_bytes_read) { 1322 zip->entry->crc32 = archive_le32dec(p + 4); 1323 zip->entry->compressed_size = archive_le32dec(p + 8); 1324 zip->entry->uncompressed_size = archive_le32dec(p + 12); 1325 zip->end_of_entry = 1; 1326 zip->unconsumed = 16; 1327 return (ARCHIVE_OK); 1328 } 1329 /* If not at EOF, ensure we consume at least one byte. */ 1330 ++p; 1331 1332 /* Scan forward until we see where a PK\007\010 signature 1333 * might be. */ 1334 /* Return bytes up until that point. On the next call, 1335 * the code above will verify the data descriptor. */ 1336 while (p < buff + bytes_avail - 4) { 1337 if (p[3] == 'P') { p += 3; } 1338 else if (p[3] == 'K') { p += 2; } 1339 else if (p[3] == '\007') { p += 1; } 1340 else if (p[3] == '\010' && p[2] == '\007' 1341 && p[1] == 'K' && p[0] == 'P') { 1342 break; 1343 } else { p += 4; } 1344 } 1345 bytes_avail = p - buff; 1346 } else { 1347 if (zip->entry_bytes_remaining == 0) { 1348 zip->end_of_entry = 1; 1349 return (ARCHIVE_OK); 1350 } 1351 /* Grab a bunch of bytes. */ 1352 buff = __archive_read_ahead(a, 1, &bytes_avail); 1353 if (bytes_avail <= 0) { 1354 archive_set_error(&a->archive, 1355 ARCHIVE_ERRNO_FILE_FORMAT, 1356 "Truncated ZIP file data"); 1357 return (ARCHIVE_FATAL); 1358 } 1359 if (bytes_avail > zip->entry_bytes_remaining) 1360 bytes_avail = (ssize_t)zip->entry_bytes_remaining; 1361 } 1362 *size = bytes_avail; 1363 zip->entry_bytes_remaining -= bytes_avail; 1364 zip->entry_uncompressed_bytes_read += bytes_avail; 1365 zip->entry_compressed_bytes_read += bytes_avail; 1366 zip->unconsumed += bytes_avail; 1367 *_buff = buff; 1368 return (ARCHIVE_OK); 1369 } 1370 1371 #ifdef HAVE_ZLIB_H 1372 static int 1373 zip_deflate_init(struct archive_read *a, struct zip *zip) 1374 { 1375 int r; 1376 1377 /* If we haven't yet read any data, initialize the decompressor. */ 1378 if (!zip->decompress_init) { 1379 if (zip->stream_valid) 1380 r = inflateReset(&zip->stream); 1381 else 1382 r = inflateInit2(&zip->stream, 1383 -15 /* Don't check for zlib header */); 1384 if (r != Z_OK) { 1385 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1386 "Can't initialize ZIP decompression."); 1387 return (ARCHIVE_FATAL); 1388 } 1389 /* Stream structure has been set up. */ 1390 zip->stream_valid = 1; 1391 /* We've initialized decompression for this stream. */ 1392 zip->decompress_init = 1; 1393 } 1394 return (ARCHIVE_OK); 1395 } 1396 1397 static int 1398 zip_read_data_deflate(struct archive_read *a, const void **buff, 1399 size_t *size, int64_t *offset) 1400 { 1401 struct zip *zip; 1402 ssize_t bytes_avail; 1403 const void *compressed_buff; 1404 int r; 1405 1406 (void)offset; /* UNUSED */ 1407 1408 zip = (struct zip *)(a->format->data); 1409 1410 /* If the buffer hasn't been allocated, allocate it now. */ 1411 if (zip->uncompressed_buffer == NULL) { 1412 zip->uncompressed_buffer_size = 256 * 1024; 1413 zip->uncompressed_buffer 1414 = (unsigned char *)malloc(zip->uncompressed_buffer_size); 1415 if (zip->uncompressed_buffer == NULL) { 1416 archive_set_error(&a->archive, ENOMEM, 1417 "No memory for ZIP decompression"); 1418 return (ARCHIVE_FATAL); 1419 } 1420 } 1421 1422 r = zip_deflate_init(a, zip); 1423 if (r != ARCHIVE_OK) 1424 return (r); 1425 1426 /* 1427 * Note: '1' here is a performance optimization. 1428 * Recall that the decompression layer returns a count of 1429 * available bytes; asking for more than that forces the 1430 * decompressor to combine reads by copying data. 1431 */ 1432 compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); 1433 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END) 1434 && bytes_avail > zip->entry_bytes_remaining) { 1435 bytes_avail = (ssize_t)zip->entry_bytes_remaining; 1436 } 1437 if (bytes_avail <= 0) { 1438 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1439 "Truncated ZIP file body"); 1440 return (ARCHIVE_FATAL); 1441 } 1442 1443 /* 1444 * A bug in zlib.h: stream.next_in should be marked 'const' 1445 * but isn't (the library never alters data through the 1446 * next_in pointer, only reads it). The result: this ugly 1447 * cast to remove 'const'. 1448 */ 1449 zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff; 1450 zip->stream.avail_in = (uInt)bytes_avail; 1451 zip->stream.total_in = 0; 1452 zip->stream.next_out = zip->uncompressed_buffer; 1453 zip->stream.avail_out = (uInt)zip->uncompressed_buffer_size; 1454 zip->stream.total_out = 0; 1455 1456 r = inflate(&zip->stream, 0); 1457 switch (r) { 1458 case Z_OK: 1459 break; 1460 case Z_STREAM_END: 1461 zip->end_of_entry = 1; 1462 break; 1463 case Z_MEM_ERROR: 1464 archive_set_error(&a->archive, ENOMEM, 1465 "Out of memory for ZIP decompression"); 1466 return (ARCHIVE_FATAL); 1467 default: 1468 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1469 "ZIP decompression failed (%d)", r); 1470 return (ARCHIVE_FATAL); 1471 } 1472 1473 /* Consume as much as the compressor actually used. */ 1474 bytes_avail = zip->stream.total_in; 1475 zip_read_consume(a, bytes_avail); 1476 zip->entry_bytes_remaining -= bytes_avail; 1477 zip->entry_compressed_bytes_read += bytes_avail; 1478 1479 *size = zip->stream.total_out; 1480 zip->entry_uncompressed_bytes_read += zip->stream.total_out; 1481 *buff = zip->uncompressed_buffer; 1482 1483 if (zip->end_of_entry && (zip->entry->flags & ZIP_LENGTH_AT_END)) { 1484 const char *p; 1485 1486 if (NULL == (p = __archive_read_ahead(a, 16, NULL))) { 1487 archive_set_error(&a->archive, 1488 ARCHIVE_ERRNO_FILE_FORMAT, 1489 "Truncated ZIP end-of-file record"); 1490 return (ARCHIVE_FATAL); 1491 } 1492 /* Consume the optional PK\007\010 marker. */ 1493 if (p[0] == 'P' && p[1] == 'K' && 1494 p[2] == '\007' && p[3] == '\010') { 1495 zip->entry->crc32 = archive_le32dec(p + 4); 1496 zip->entry->compressed_size = archive_le32dec(p + 8); 1497 zip->entry->uncompressed_size = archive_le32dec(p + 12); 1498 zip->unconsumed = 16; 1499 } 1500 } 1501 1502 return (ARCHIVE_OK); 1503 } 1504 #endif 1505 1506 static int 1507 archive_read_format_zip_read_data_skip(struct archive_read *a) 1508 { 1509 struct zip *zip; 1510 1511 zip = (struct zip *)(a->format->data); 1512 1513 /* If we've already read to end of data, we're done. */ 1514 if (zip->end_of_entry) 1515 return (ARCHIVE_OK); 1516 1517 /* So we know we're streaming... */ 1518 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) { 1519 /* We know the compressed length, so we can just skip. */ 1520 int64_t bytes_skipped = zip_read_consume(a, 1521 zip->entry_bytes_remaining + zip->unconsumed); 1522 if (bytes_skipped < 0) 1523 return (ARCHIVE_FATAL); 1524 zip->unconsumed = 0; 1525 return (ARCHIVE_OK); 1526 } 1527 1528 /* We're streaming and we don't know the length. */ 1529 /* If the body is compressed and we know the format, we can 1530 * find an exact end-of-entry by decompressing it. */ 1531 switch (zip->entry->compression) { 1532 #ifdef HAVE_ZLIB_H 1533 case 8: /* Deflate compression. */ 1534 while (!zip->end_of_entry) { 1535 int64_t offset = 0; 1536 const void *buff = NULL; 1537 size_t size = 0; 1538 int r; 1539 r = zip_read_data_deflate(a, &buff, &size, &offset); 1540 if (r != ARCHIVE_OK) 1541 return (r); 1542 } 1543 return ARCHIVE_OK; 1544 #endif 1545 default: /* Uncompressed or unknown. */ 1546 /* Scan for a PK\007\010 signature. */ 1547 zip_read_consume(a, zip->unconsumed); 1548 zip->unconsumed = 0; 1549 for (;;) { 1550 const char *p, *buff; 1551 ssize_t bytes_avail; 1552 buff = __archive_read_ahead(a, 16, &bytes_avail); 1553 if (bytes_avail < 16) { 1554 archive_set_error(&a->archive, 1555 ARCHIVE_ERRNO_FILE_FORMAT, 1556 "Truncated ZIP file data"); 1557 return (ARCHIVE_FATAL); 1558 } 1559 p = buff; 1560 while (p <= buff + bytes_avail - 16) { 1561 if (p[3] == 'P') { p += 3; } 1562 else if (p[3] == 'K') { p += 2; } 1563 else if (p[3] == '\007') { p += 1; } 1564 else if (p[3] == '\010' && p[2] == '\007' 1565 && p[1] == 'K' && p[0] == 'P') { 1566 zip_read_consume(a, p - buff + 16); 1567 return ARCHIVE_OK; 1568 } else { p += 4; } 1569 } 1570 zip_read_consume(a, p - buff); 1571 } 1572 } 1573 } 1574 1575 static int 1576 archive_read_format_zip_cleanup(struct archive_read *a) 1577 { 1578 struct zip *zip; 1579 1580 zip = (struct zip *)(a->format->data); 1581 #ifdef HAVE_ZLIB_H 1582 if (zip->stream_valid) 1583 inflateEnd(&zip->stream); 1584 #endif 1585 if (zip->zip_entries && zip->central_directory_entries) { 1586 unsigned i; 1587 for (i = 0; i < zip->central_directory_entries; i++) 1588 archive_string_free(&(zip->zip_entries[i].rsrcname)); 1589 } 1590 free(zip->zip_entries); 1591 free(zip->uncompressed_buffer); 1592 archive_string_free(&(zip->extra)); 1593 free(zip); 1594 (a->format->data) = NULL; 1595 return (ARCHIVE_OK); 1596 } 1597 1598 /* 1599 * The extra data is stored as a list of 1600 * id1+size1+data1 + id2+size2+data2 ... 1601 * triplets. id and size are 2 bytes each. 1602 */ 1603 static void 1604 process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) 1605 { 1606 unsigned offset = 0; 1607 1608 while (offset < extra_length - 4) 1609 { 1610 unsigned short headerid = archive_le16dec(p + offset); 1611 unsigned short datasize = archive_le16dec(p + offset + 2); 1612 offset += 4; 1613 if (offset + datasize > extra_length) 1614 break; 1615 #ifdef DEBUG 1616 fprintf(stderr, "Header id 0x%x, length %d\n", 1617 headerid, datasize); 1618 #endif 1619 switch (headerid) { 1620 case 0x0001: 1621 /* Zip64 extended information extra field. */ 1622 if (datasize >= 8) 1623 zip_entry->uncompressed_size = 1624 archive_le64dec(p + offset); 1625 if (datasize >= 16) 1626 zip_entry->compressed_size = 1627 archive_le64dec(p + offset + 8); 1628 break; 1629 case 0x5455: 1630 { 1631 /* Extended time field "UT". */ 1632 int flags = p[offset]; 1633 offset++; 1634 datasize--; 1635 /* Flag bits indicate which dates are present. */ 1636 if (flags & 0x01) 1637 { 1638 #ifdef DEBUG 1639 fprintf(stderr, "mtime: %lld -> %d\n", 1640 (long long)zip_entry->mtime, 1641 archive_le32dec(p + offset)); 1642 #endif 1643 if (datasize < 4) 1644 break; 1645 zip_entry->mtime = archive_le32dec(p + offset); 1646 offset += 4; 1647 datasize -= 4; 1648 } 1649 if (flags & 0x02) 1650 { 1651 if (datasize < 4) 1652 break; 1653 zip_entry->atime = archive_le32dec(p + offset); 1654 offset += 4; 1655 datasize -= 4; 1656 } 1657 if (flags & 0x04) 1658 { 1659 if (datasize < 4) 1660 break; 1661 zip_entry->ctime = archive_le32dec(p + offset); 1662 offset += 4; 1663 datasize -= 4; 1664 } 1665 break; 1666 } 1667 case 0x5855: 1668 { 1669 /* Info-ZIP Unix Extra Field (old version) "UX". */ 1670 if (datasize >= 8) { 1671 zip_entry->atime = archive_le32dec(p + offset); 1672 zip_entry->mtime = 1673 archive_le32dec(p + offset + 4); 1674 } 1675 if (datasize >= 12) { 1676 zip_entry->uid = 1677 archive_le16dec(p + offset + 8); 1678 zip_entry->gid = 1679 archive_le16dec(p + offset + 10); 1680 } 1681 break; 1682 } 1683 case 0x7855: 1684 /* Info-ZIP Unix Extra Field (type 2) "Ux". */ 1685 #ifdef DEBUG 1686 fprintf(stderr, "uid %d gid %d\n", 1687 archive_le16dec(p + offset), 1688 archive_le16dec(p + offset + 2)); 1689 #endif 1690 if (datasize >= 2) 1691 zip_entry->uid = archive_le16dec(p + offset); 1692 if (datasize >= 4) 1693 zip_entry->gid = 1694 archive_le16dec(p + offset + 2); 1695 break; 1696 case 0x7875: 1697 { 1698 /* Info-Zip Unix Extra Field (type 3) "ux". */ 1699 int uidsize = 0, gidsize = 0; 1700 1701 if (datasize >= 1 && p[offset] == 1) {/* version=1 */ 1702 if (datasize >= 4) { 1703 /* get a uid size. */ 1704 uidsize = p[offset+1]; 1705 if (uidsize == 2) 1706 zip_entry->uid = 1707 archive_le16dec( 1708 p + offset + 2); 1709 else if (uidsize == 4 && datasize >= 6) 1710 zip_entry->uid = 1711 archive_le32dec( 1712 p + offset + 2); 1713 } 1714 if (datasize >= (2 + uidsize + 3)) { 1715 /* get a gid size. */ 1716 gidsize = p[offset+2+uidsize]; 1717 if (gidsize == 2) 1718 zip_entry->gid = 1719 archive_le16dec( 1720 p+offset+2+uidsize+1); 1721 else if (gidsize == 4 && 1722 datasize >= (2 + uidsize + 5)) 1723 zip_entry->gid = 1724 archive_le32dec( 1725 p+offset+2+uidsize+1); 1726 } 1727 } 1728 break; 1729 } 1730 default: 1731 break; 1732 } 1733 offset += datasize; 1734 } 1735 #ifdef DEBUG 1736 if (offset != extra_length) 1737 { 1738 fprintf(stderr, 1739 "Extra data field contents do not match reported size!\n"); 1740 } 1741 #endif 1742 } 1743