1 /*- 2 * Copyright (c) 2004 Tim Kientzle 3 * Copyright (c) 2011 Michihiro NAKAJIMA 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "archive_platform.h" 28 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 2009-12-28 03:11:36Z kientzle $"); 29 30 #ifdef HAVE_ERRNO_H 31 #include <errno.h> 32 #endif 33 #ifdef HAVE_STDLIB_H 34 #include <stdlib.h> 35 #endif 36 #ifdef HAVE_ZLIB_H 37 #include <zlib.h> 38 #endif 39 40 #include "archive.h" 41 #include "archive_entry.h" 42 #include "archive_entry_locale.h" 43 #include "archive_private.h" 44 #include "archive_read_private.h" 45 #include "archive_endian.h" 46 47 #ifndef HAVE_ZLIB_H 48 #include "archive_crc32.h" 49 #endif 50 51 struct zip_entry { 52 int64_t local_header_offset; 53 int64_t compressed_size; 54 int64_t uncompressed_size; 55 int64_t gid; 56 int64_t uid; 57 struct archive_entry *entry; 58 time_t mtime; 59 time_t atime; 60 time_t ctime; 61 uint32_t crc32; 62 uint16_t mode; 63 uint16_t flags; 64 char compression; 65 char system; 66 }; 67 68 struct zip { 69 /* Structural information about the archive. */ 70 int64_t central_directory_offset; 71 size_t central_directory_size; 72 size_t central_directory_entries; 73 char have_central_directory; 74 75 /* List of entries (seekable Zip only) */ 76 size_t entries_remaining; 77 struct zip_entry *zip_entries; 78 struct zip_entry *entry; 79 80 size_t unconsumed; 81 82 /* entry_bytes_remaining is the number of bytes we expect. */ 83 int64_t entry_bytes_remaining; 84 85 /* These count the number of bytes actually read for the entry. */ 86 int64_t entry_compressed_bytes_read; 87 int64_t entry_uncompressed_bytes_read; 88 89 /* Running CRC32 of the decompressed data */ 90 unsigned long entry_crc32; 91 92 /* Flags to mark progress of decompression. */ 93 char decompress_init; 94 char end_of_entry; 95 96 ssize_t filename_length; 97 ssize_t extra_length; 98 99 unsigned char *uncompressed_buffer; 100 size_t uncompressed_buffer_size; 101 #ifdef HAVE_ZLIB_H 102 z_stream stream; 103 char stream_valid; 104 #endif 105 106 struct archive_string extra; 107 struct archive_string_conv *sconv; 108 struct archive_string_conv *sconv_default; 109 struct archive_string_conv *sconv_utf8; 110 int init_default_conversion; 111 char format_name[64]; 112 }; 113 114 #define ZIP_LENGTH_AT_END 8 115 #define ZIP_ENCRYPTED (1<<0) 116 #define ZIP_STRONG_ENCRYPTED (1<<6) 117 #define ZIP_UTF8_NAME (1<<11) 118 119 static int archive_read_format_zip_streamable_bid(struct archive_read *, int); 120 static int archive_read_format_zip_seekable_bid(struct archive_read *, int); 121 static int archive_read_format_zip_options(struct archive_read *, 122 const char *, const char *); 123 static int archive_read_format_zip_cleanup(struct archive_read *); 124 static int archive_read_format_zip_read_data(struct archive_read *, 125 const void **, size_t *, int64_t *); 126 static int archive_read_format_zip_read_data_skip(struct archive_read *a); 127 static int archive_read_format_zip_seekable_read_header(struct archive_read *, 128 struct archive_entry *); 129 static int archive_read_format_zip_streamable_read_header(struct archive_read *, 130 struct archive_entry *); 131 #ifdef HAVE_ZLIB_H 132 static int zip_read_data_deflate(struct archive_read *a, const void **buff, 133 size_t *size, int64_t *offset); 134 #endif 135 static int zip_read_data_none(struct archive_read *a, const void **buff, 136 size_t *size, int64_t *offset); 137 static int zip_read_local_file_header(struct archive_read *a, 138 struct archive_entry *entry, struct zip *); 139 static time_t zip_time(const char *); 140 static const char *compression_name(int compression); 141 static void process_extra(const char *, size_t, struct zip_entry *); 142 143 int 144 archive_read_support_format_zip_streamable(struct archive *_a) 145 { 146 struct archive_read *a = (struct archive_read *)_a; 147 struct zip *zip; 148 int r; 149 150 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 151 ARCHIVE_STATE_NEW, "archive_read_support_format_zip"); 152 153 zip = (struct zip *)malloc(sizeof(*zip)); 154 if (zip == NULL) { 155 archive_set_error(&a->archive, ENOMEM, 156 "Can't allocate zip data"); 157 return (ARCHIVE_FATAL); 158 } 159 memset(zip, 0, sizeof(*zip)); 160 161 r = __archive_read_register_format(a, 162 zip, 163 "zip", 164 archive_read_format_zip_streamable_bid, 165 archive_read_format_zip_options, 166 archive_read_format_zip_streamable_read_header, 167 archive_read_format_zip_read_data, 168 archive_read_format_zip_read_data_skip, 169 archive_read_format_zip_cleanup); 170 171 if (r != ARCHIVE_OK) 172 free(zip); 173 return (ARCHIVE_OK); 174 } 175 176 int 177 archive_read_support_format_zip_seekable(struct archive *_a) 178 { 179 struct archive_read *a = (struct archive_read *)_a; 180 struct zip *zip; 181 int r; 182 183 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 184 ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable"); 185 186 zip = (struct zip *)malloc(sizeof(*zip)); 187 if (zip == NULL) { 188 archive_set_error(&a->archive, ENOMEM, 189 "Can't allocate zip data"); 190 return (ARCHIVE_FATAL); 191 } 192 memset(zip, 0, sizeof(*zip)); 193 194 r = __archive_read_register_format(a, 195 zip, 196 "zip", 197 archive_read_format_zip_seekable_bid, 198 archive_read_format_zip_options, 199 archive_read_format_zip_seekable_read_header, 200 archive_read_format_zip_read_data, 201 archive_read_format_zip_read_data_skip, 202 archive_read_format_zip_cleanup); 203 204 if (r != ARCHIVE_OK) 205 free(zip); 206 return (ARCHIVE_OK); 207 } 208 209 int 210 archive_read_support_format_zip(struct archive *a) 211 { 212 int r; 213 r = archive_read_support_format_zip_streamable(a); 214 if (r != ARCHIVE_OK) 215 return r; 216 return (archive_read_support_format_zip_seekable(a)); 217 } 218 219 /* 220 * TODO: This is a performance sink because it forces 221 * the read core to drop buffered data from the start 222 * of file, which will then have to be re-read again 223 * if this bidder loses. 224 * 225 * Consider passing in the winning bid value to subsequent 226 * bidders so that this bidder in particular can avoid 227 * seeking if it knows it's going to lose anyway. 228 */ 229 static int 230 archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid) 231 { 232 struct zip *zip = (struct zip *)a->format->data; 233 int64_t filesize; 234 const char *p; 235 236 /* If someone has already bid more than 32, then avoid 237 trashing the look-ahead buffers with a seek. */ 238 if (best_bid > 32) 239 return (-1); 240 241 filesize = __archive_read_seek(a, -22, SEEK_END); 242 /* If we can't seek, then we can't bid. */ 243 if (filesize <= 0) 244 return 0; 245 246 /* TODO: More robust search for end of central directory record. */ 247 if ((p = __archive_read_ahead(a, 22, NULL)) == NULL) 248 return 0; 249 /* First four bytes are signature for end of central directory 250 record. Four zero bytes ensure this isn't a multi-volume 251 Zip file (which we don't yet support). */ 252 if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) 253 return 0; 254 255 /* Since we've already done the hard work of finding the 256 end of central directory record, let's save the important 257 information. */ 258 zip->central_directory_entries = archive_le16dec(p + 10); 259 zip->central_directory_size = archive_le32dec(p + 12); 260 zip->central_directory_offset = archive_le32dec(p + 16); 261 262 /* Just one volume, so central dir must all be on this volume. */ 263 if (zip->central_directory_entries != archive_le16dec(p + 8)) 264 return 0; 265 /* Central directory can't extend beyond end of this file. */ 266 if (zip->central_directory_offset + zip->central_directory_size > filesize) 267 return 0; 268 269 /* This is just a tiny bit higher than the maximum returned by 270 the streaming Zip bidder. This ensures that the more accurate 271 seeking Zip parser wins whenever seek is available. */ 272 return 32; 273 } 274 275 static int 276 slurp_central_directory(struct archive_read *a, struct zip *zip) 277 { 278 unsigned i; 279 280 __archive_read_seek(a, zip->central_directory_offset, SEEK_SET); 281 282 zip->zip_entries = calloc(zip->central_directory_entries, sizeof(struct zip_entry)); 283 for (i = 0; i < zip->central_directory_entries; ++i) { 284 struct zip_entry *zip_entry = &zip->zip_entries[i]; 285 size_t filename_length, extra_length, comment_length; 286 uint32_t external_attributes; 287 const char *p; 288 289 if ((p = __archive_read_ahead(a, 46, NULL)) == NULL) 290 return ARCHIVE_FATAL; 291 if (memcmp(p, "PK\001\002", 4) != 0) { 292 archive_set_error(&a->archive, 293 -1, "Invalid central directory signature"); 294 return ARCHIVE_FATAL; 295 } 296 zip->have_central_directory = 1; 297 /* version = p[4]; */ 298 zip_entry->system = p[5]; 299 /* version_required = archive_le16dec(p + 6); */ 300 zip_entry->flags = archive_le16dec(p + 8); 301 zip_entry->compression = archive_le16dec(p + 10); 302 zip_entry->mtime = zip_time(p + 12); 303 zip_entry->crc32 = archive_le32dec(p + 16); 304 zip_entry->compressed_size = archive_le32dec(p + 20); 305 zip_entry->uncompressed_size = archive_le32dec(p + 24); 306 filename_length = archive_le16dec(p + 28); 307 extra_length = archive_le16dec(p + 30); 308 comment_length = archive_le16dec(p + 32); 309 /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */ 310 /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */ 311 external_attributes = archive_le32dec(p + 38); 312 zip_entry->local_header_offset = archive_le32dec(p + 42); 313 314 if (zip_entry->system == 3) { 315 zip_entry->mode = external_attributes >> 16; 316 } else { 317 zip_entry->mode = AE_IFREG | 0777; 318 } 319 320 /* Do we need to parse filename here? */ 321 /* Or can we wait until we read the local header? */ 322 __archive_read_consume(a, 323 46 + filename_length + extra_length + comment_length); 324 } 325 326 /* TODO: Sort zip entries. */ 327 328 return ARCHIVE_OK; 329 } 330 331 static int 332 archive_read_format_zip_seekable_read_header(struct archive_read *a, 333 struct archive_entry *entry) 334 { 335 struct zip *zip = (struct zip *)a->format->data; 336 int r; 337 338 a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 339 if (a->archive.archive_format_name == NULL) 340 a->archive.archive_format_name = "ZIP"; 341 342 if (zip->zip_entries == NULL) { 343 r = slurp_central_directory(a, zip); 344 zip->entries_remaining = zip->central_directory_entries; 345 if (r != ARCHIVE_OK) 346 return r; 347 zip->entry = zip->zip_entries; 348 } else { 349 ++zip->entry; 350 } 351 352 if (zip->entries_remaining <= 0) 353 return ARCHIVE_EOF; 354 --zip->entries_remaining; 355 356 /* TODO: If entries are sorted by offset within the file, we 357 should be able to skip here instead of seeking. Skipping is 358 typically faster (easier for I/O layer to optimize). */ 359 __archive_read_seek(a, zip->entry->local_header_offset, SEEK_SET); 360 zip->unconsumed = 0; 361 r = zip_read_local_file_header(a, entry, zip); 362 if (r != ARCHIVE_OK) 363 return r; 364 if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) { 365 const void *p; 366 size_t linkname_length = archive_entry_size(entry); 367 368 archive_entry_set_size(entry, 0); 369 p = __archive_read_ahead(a, linkname_length, NULL); 370 if (p == NULL) { 371 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 372 "Truncated Zip file"); 373 return ARCHIVE_FATAL; 374 } 375 376 if (archive_entry_copy_symlink_l(entry, p, linkname_length, 377 NULL) != 0) { 378 /* NOTE: If the last argument is NULL, this will 379 * fail only by memeory allocation failure. */ 380 archive_set_error(&a->archive, ENOMEM, 381 "Can't allocate memory for Symlink"); 382 return (ARCHIVE_FATAL); 383 } 384 /* TODO: handle character-set issues? */ 385 } 386 return ARCHIVE_OK; 387 } 388 389 static int 390 archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid) 391 { 392 const char *p; 393 394 (void)best_bid; /* UNUSED */ 395 396 if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) 397 return (-1); 398 399 /* 400 * Bid of 30 here is: 16 bits for "PK", 401 * next 16-bit field has four options (-2 bits). 402 * 16 + 16-2 = 30. 403 */ 404 if (p[0] == 'P' && p[1] == 'K') { 405 if ((p[2] == '\001' && p[3] == '\002') 406 || (p[2] == '\003' && p[3] == '\004') 407 || (p[2] == '\005' && p[3] == '\006') 408 || (p[2] == '\007' && p[3] == '\010') 409 || (p[2] == '0' && p[3] == '0')) 410 return (30); 411 } 412 413 return (0); 414 } 415 416 static int 417 archive_read_format_zip_options(struct archive_read *a, 418 const char *key, const char *val) 419 { 420 struct zip *zip; 421 int ret = ARCHIVE_FAILED; 422 423 zip = (struct zip *)(a->format->data); 424 if (strcmp(key, "compat-2x") == 0) { 425 /* Handle filnames as libarchive 2.x */ 426 zip->init_default_conversion = (val != NULL) ? 1 : 0; 427 ret = ARCHIVE_OK; 428 } else if (strcmp(key, "hdrcharset") == 0) { 429 if (val == NULL || val[0] == 0) 430 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 431 "zip: hdrcharset option needs a character-set name"); 432 else { 433 zip->sconv = archive_string_conversion_from_charset( 434 &a->archive, val, 0); 435 if (zip->sconv != NULL) { 436 if (strcmp(val, "UTF-8") == 0) 437 zip->sconv_utf8 = zip->sconv; 438 ret = ARCHIVE_OK; 439 } else 440 ret = ARCHIVE_FATAL; 441 } 442 } else 443 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 444 "zip: unknown keyword ``%s''", key); 445 446 return (ret); 447 } 448 449 static int 450 archive_read_format_zip_streamable_read_header(struct archive_read *a, 451 struct archive_entry *entry) 452 { 453 struct zip *zip; 454 455 a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 456 if (a->archive.archive_format_name == NULL) 457 a->archive.archive_format_name = "ZIP"; 458 459 zip = (struct zip *)(a->format->data); 460 461 /* Make sure we have a zip_entry structure to use. */ 462 if (zip->zip_entries == NULL) { 463 zip->zip_entries = malloc(sizeof(struct zip_entry)); 464 if (zip->zip_entries == NULL) { 465 archive_set_error(&a->archive, ENOMEM, "Out of memory"); 466 return ARCHIVE_FATAL; 467 } 468 } 469 zip->entry = zip->zip_entries; 470 memset(zip->entry, 0, sizeof(struct zip_entry)); 471 472 /* Search ahead for the next local file header. */ 473 __archive_read_consume(a, zip->unconsumed); 474 zip->unconsumed = 0; 475 for (;;) { 476 int64_t skipped = 0; 477 const char *p, *end; 478 ssize_t bytes; 479 480 p = __archive_read_ahead(a, 4, &bytes); 481 if (p == NULL) 482 return (ARCHIVE_FATAL); 483 end = p + bytes; 484 485 while (p + 4 <= end) { 486 if (p[0] == 'P' && p[1] == 'K') { 487 if (p[2] == '\001' && p[3] == '\002') 488 /* Beginning of central directory. */ 489 return (ARCHIVE_EOF); 490 491 if (p[2] == '\003' && p[3] == '\004') { 492 /* Regular file entry. */ 493 __archive_read_consume(a, skipped); 494 return zip_read_local_file_header(a, entry, zip); 495 } 496 497 if (p[2] == '\005' && p[3] == '\006') 498 /* End of central directory. */ 499 return (ARCHIVE_EOF); 500 } 501 ++p; 502 ++skipped; 503 } 504 __archive_read_consume(a, skipped); 505 } 506 } 507 508 /* 509 * Assumes file pointer is at beginning of local file header. 510 */ 511 static int 512 zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, 513 struct zip *zip) 514 { 515 const char *p; 516 const void *h; 517 const wchar_t *wp; 518 const char *cp; 519 size_t len, filename_length, extra_length; 520 struct archive_string_conv *sconv; 521 struct zip_entry *zip_entry = zip->entry; 522 uint32_t local_crc32; 523 int64_t compressed_size, uncompressed_size; 524 int ret = ARCHIVE_OK; 525 char version; 526 527 zip->decompress_init = 0; 528 zip->end_of_entry = 0; 529 zip->entry_uncompressed_bytes_read = 0; 530 zip->entry_compressed_bytes_read = 0; 531 zip->entry_crc32 = crc32(0, NULL, 0); 532 533 /* Setup default conversion. */ 534 if (zip->sconv == NULL && !zip->init_default_conversion) { 535 zip->sconv_default = 536 archive_string_default_conversion_for_read(&(a->archive)); 537 zip->init_default_conversion = 1; 538 } 539 540 if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) { 541 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 542 "Truncated ZIP file header"); 543 return (ARCHIVE_FATAL); 544 } 545 546 if (memcmp(p, "PK\003\004", 4) != 0) { 547 archive_set_error(&a->archive, -1, "Damaged Zip archive"); 548 return ARCHIVE_FATAL; 549 } 550 version = p[4]; 551 zip_entry->system = p[5]; 552 zip_entry->flags = archive_le16dec(p + 6); 553 zip_entry->compression = archive_le16dec(p + 8); 554 zip_entry->mtime = zip_time(p + 10); 555 local_crc32 = archive_le32dec(p + 14); 556 compressed_size = archive_le32dec(p + 18); 557 uncompressed_size = archive_le32dec(p + 22); 558 filename_length = archive_le16dec(p + 26); 559 extra_length = archive_le16dec(p + 28); 560 561 __archive_read_consume(a, 30); 562 563 if (zip->have_central_directory) { 564 /* If we read the central dir entry, we must have size information 565 as well, so ignore the length-at-end flag. */ 566 zip_entry->flags &= ~ZIP_LENGTH_AT_END; 567 /* If we have values from both the local file header 568 and the central directory, warn about mismatches 569 which might indicate a damaged file. But some 570 writers always put zero in the local header; don't 571 bother warning about that. */ 572 if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) { 573 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 574 "Inconsistent CRC32 values"); 575 ret = ARCHIVE_WARN; 576 } 577 if (compressed_size != 0 578 && compressed_size != zip_entry->compressed_size) { 579 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 580 "Inconsistent compressed size"); 581 ret = ARCHIVE_WARN; 582 } 583 if (uncompressed_size != 0 584 && uncompressed_size != zip_entry->uncompressed_size) { 585 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 586 "Inconsistent uncompressed size"); 587 ret = ARCHIVE_WARN; 588 } 589 } else { 590 /* If we don't have the CD info, use whatever we do have. */ 591 zip_entry->crc32 = local_crc32; 592 zip_entry->compressed_size = compressed_size; 593 zip_entry->uncompressed_size = uncompressed_size; 594 } 595 596 /* Read the filename. */ 597 if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) { 598 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 599 "Truncated ZIP file header"); 600 return (ARCHIVE_FATAL); 601 } 602 if (zip_entry->flags & ZIP_UTF8_NAME) { 603 /* The filename is stored to be UTF-8. */ 604 if (zip->sconv_utf8 == NULL) { 605 zip->sconv_utf8 = 606 archive_string_conversion_from_charset( 607 &a->archive, "UTF-8", 1); 608 if (zip->sconv_utf8 == NULL) 609 return (ARCHIVE_FATAL); 610 } 611 sconv = zip->sconv_utf8; 612 } else if (zip->sconv != NULL) 613 sconv = zip->sconv; 614 else 615 sconv = zip->sconv_default; 616 617 if (archive_entry_copy_pathname_l(entry, 618 h, filename_length, sconv) != 0) { 619 if (errno == ENOMEM) { 620 archive_set_error(&a->archive, ENOMEM, 621 "Can't allocate memory for Pathname"); 622 return (ARCHIVE_FATAL); 623 } 624 archive_set_error(&a->archive, 625 ARCHIVE_ERRNO_FILE_FORMAT, 626 "Pathname cannot be converted " 627 "from %s to current locale.", 628 archive_string_conversion_charset_name(sconv)); 629 ret = ARCHIVE_WARN; 630 } 631 __archive_read_consume(a, filename_length); 632 633 if (zip_entry->mode == 0) { 634 /* Especially in streaming mode, we can end up 635 here without having seen any mode information. 636 Guess from the filename. */ 637 wp = archive_entry_pathname_w(entry); 638 if (wp != NULL) { 639 len = wcslen(wp); 640 if (len > 0 && wp[len - 1] == L'/') 641 zip_entry->mode = AE_IFDIR | 0777; 642 else 643 zip_entry->mode = AE_IFREG | 0777; 644 } else { 645 cp = archive_entry_pathname(entry); 646 len = (cp != NULL)?strlen(cp):0; 647 if (len > 0 && cp[len - 1] == '/') 648 zip_entry->mode = AE_IFDIR | 0777; 649 else 650 zip_entry->mode = AE_IFREG | 0777; 651 } 652 } 653 654 /* Read the extra data. */ 655 if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) { 656 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 657 "Truncated ZIP file header"); 658 return (ARCHIVE_FATAL); 659 } 660 process_extra(h, extra_length, zip_entry); 661 __archive_read_consume(a, extra_length); 662 663 /* Populate some additional entry fields: */ 664 archive_entry_set_mode(entry, zip_entry->mode); 665 archive_entry_set_uid(entry, zip_entry->uid); 666 archive_entry_set_gid(entry, zip_entry->gid); 667 archive_entry_set_mtime(entry, zip_entry->mtime, 0); 668 archive_entry_set_ctime(entry, zip_entry->ctime, 0); 669 archive_entry_set_atime(entry, zip_entry->atime, 0); 670 /* Set the size only if it's meaningful. */ 671 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END)) 672 archive_entry_set_size(entry, zip_entry->uncompressed_size); 673 674 zip->entry_bytes_remaining = zip_entry->compressed_size; 675 676 /* If there's no body, force read_data() to return EOF immediately. */ 677 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END) 678 && zip->entry_bytes_remaining < 1) 679 zip->end_of_entry = 1; 680 681 /* Set up a more descriptive format name. */ 682 sprintf(zip->format_name, "ZIP %d.%d (%s)", 683 version / 10, version % 10, 684 compression_name(zip->entry->compression)); 685 a->archive.archive_format_name = zip->format_name; 686 687 return (ret); 688 } 689 690 static const char * 691 compression_name(int compression) 692 { 693 static const char *compression_names[] = { 694 "uncompressed", 695 "shrinking", 696 "reduced-1", 697 "reduced-2", 698 "reduced-3", 699 "reduced-4", 700 "imploded", 701 "reserved", 702 "deflation" 703 }; 704 705 if (compression < 706 sizeof(compression_names)/sizeof(compression_names[0])) 707 return compression_names[compression]; 708 else 709 return "??"; 710 } 711 712 /* Convert an MSDOS-style date/time into Unix-style time. */ 713 static time_t 714 zip_time(const char *p) 715 { 716 int msTime, msDate; 717 struct tm ts; 718 719 msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); 720 msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); 721 722 memset(&ts, 0, sizeof(ts)); 723 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 724 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 725 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 726 ts.tm_hour = (msTime >> 11) & 0x1f; 727 ts.tm_min = (msTime >> 5) & 0x3f; 728 ts.tm_sec = (msTime << 1) & 0x3e; 729 ts.tm_isdst = -1; 730 return mktime(&ts); 731 } 732 733 static int 734 archive_read_format_zip_read_data(struct archive_read *a, 735 const void **buff, size_t *size, int64_t *offset) 736 { 737 int r; 738 struct zip *zip = (struct zip *)(a->format->data); 739 740 *offset = zip->entry_uncompressed_bytes_read; 741 *size = 0; 742 *buff = NULL; 743 744 /* If we hit end-of-entry last time, return ARCHIVE_EOF. */ 745 if (zip->end_of_entry) 746 return (ARCHIVE_EOF); 747 748 /* Return EOF immediately if this is a non-regular file. */ 749 if (AE_IFREG != (zip->entry->mode & AE_IFMT)) 750 return (ARCHIVE_EOF); 751 752 if (zip->entry->flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) { 753 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 754 "Encrypted file is unsupported"); 755 return (ARCHIVE_FAILED); 756 } 757 758 __archive_read_consume(a, zip->unconsumed); 759 zip->unconsumed = 0; 760 761 switch(zip->entry->compression) { 762 case 0: /* No compression. */ 763 r = zip_read_data_none(a, buff, size, offset); 764 break; 765 #ifdef HAVE_ZLIB_H 766 case 8: /* Deflate compression. */ 767 r = zip_read_data_deflate(a, buff, size, offset); 768 break; 769 #endif 770 default: /* Unsupported compression. */ 771 /* Return a warning. */ 772 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 773 "Unsupported ZIP compression method (%s)", 774 compression_name(zip->entry->compression)); 775 /* We can't decompress this entry, but we will 776 * be able to skip() it and try the next entry. */ 777 return (ARCHIVE_FAILED); 778 break; 779 } 780 if (r != ARCHIVE_OK) 781 return (r); 782 /* Update checksum */ 783 if (*size) 784 zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size); 785 /* If we hit the end, swallow any end-of-data marker. */ 786 if (zip->end_of_entry) { 787 /* Check file size, CRC against these values. */ 788 if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) { 789 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 790 "ZIP compressed data is wrong size (read %jd, expected %jd)", 791 (intmax_t)zip->entry_compressed_bytes_read, 792 (intmax_t)zip->entry->compressed_size); 793 return (ARCHIVE_WARN); 794 } 795 /* Size field only stores the lower 32 bits of the actual 796 * size. */ 797 if ((zip->entry->uncompressed_size & UINT32_MAX) 798 != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { 799 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 800 "ZIP uncompressed data is wrong size (read %jd, expected %jd)", 801 (intmax_t)zip->entry_uncompressed_bytes_read, 802 (intmax_t)zip->entry->uncompressed_size); 803 return (ARCHIVE_WARN); 804 } 805 /* Check computed CRC against header */ 806 if (zip->entry->crc32 != zip->entry_crc32) { 807 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 808 "ZIP bad CRC: 0x%lx should be 0x%lx", 809 (unsigned long)zip->entry_crc32, 810 (unsigned long)zip->entry->crc32); 811 return (ARCHIVE_WARN); 812 } 813 } 814 815 return (ARCHIVE_OK); 816 } 817 818 /* 819 * Read "uncompressed" data. There are three cases: 820 * 1) We know the size of the data. This is always true for the 821 * seeking reader (we've examined the Central Directory already). 822 * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred. 823 * Info-ZIP seems to do this; we know the size but have to grab 824 * the CRC from the data descriptor afterwards. 825 * 3) We're streaming and ZIP_LENGTH_AT_END was specified and 826 * we have no size information. In this case, we can do pretty 827 * well by watching for the data descriptor record. The data 828 * descriptor is 16 bytes and includes a computed CRC that should 829 * provide a strong check. 830 * 831 * TODO: Technically, the PK\007\010 signature is optional. 832 * In the original spec, the data descriptor contained CRC 833 * and size fields but had no leading signature. In practice, 834 * newer writers seem to provide the signature pretty consistently, 835 * but we might need to do something more complex here if 836 * we want to handle older archives that lack that signature. 837 * 838 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 839 * zip->end_of_entry if it consumes all of the data. 840 */ 841 static int 842 zip_read_data_none(struct archive_read *a, const void **_buff, 843 size_t *size, int64_t *offset) 844 { 845 struct zip *zip; 846 const char *buff; 847 ssize_t bytes_avail; 848 849 zip = (struct zip *)(a->format->data); 850 851 if (zip->entry->flags & ZIP_LENGTH_AT_END) { 852 const char *p; 853 854 /* Grab at least 16 bytes. */ 855 buff = __archive_read_ahead(a, 16, &bytes_avail); 856 if (bytes_avail < 16) { 857 /* Zip archives have end-of-archive markers 858 that are longer than this, so a failure to get at 859 least 16 bytes really does indicate a truncated 860 file. */ 861 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 862 "Truncated ZIP file data"); 863 return (ARCHIVE_FATAL); 864 } 865 /* Check for a complete PK\007\010 signature. */ 866 p = buff; 867 if (p[0] == 'P' && p[1] == 'K' 868 && p[2] == '\007' && p[3] == '\010' 869 && archive_le32dec(p + 4) == zip->entry_crc32 870 && archive_le32dec(p + 8) == zip->entry_compressed_bytes_read 871 && archive_le32dec(p + 12) == zip->entry_uncompressed_bytes_read) { 872 zip->entry->crc32 = archive_le32dec(p + 4); 873 zip->entry->compressed_size = archive_le32dec(p + 8); 874 zip->entry->uncompressed_size = archive_le32dec(p + 12); 875 zip->end_of_entry = 1; 876 zip->unconsumed = 16; 877 return (ARCHIVE_OK); 878 } 879 /* If not at EOF, ensure we consume at least one byte. */ 880 ++p; 881 882 /* Scan forward until we see where a PK\007\010 signature might be. */ 883 /* Return bytes up until that point. On the next call, the code 884 above will verify the data descriptor. */ 885 while (p < buff + bytes_avail - 4) { 886 if (p[3] == 'P') { p += 3; } 887 else if (p[3] == 'K') { p += 2; } 888 else if (p[3] == '\007') { p += 1; } 889 else if (p[3] == '\010' && p[2] == '\007' 890 && p[1] == 'K' && p[0] == 'P') { 891 break; 892 } else { p += 4; } 893 } 894 bytes_avail = p - buff; 895 } else { 896 if (zip->entry_bytes_remaining == 0) { 897 zip->end_of_entry = 1; 898 return (ARCHIVE_OK); 899 } 900 /* Grab a bunch of bytes. */ 901 buff = __archive_read_ahead(a, 1, &bytes_avail); 902 if (bytes_avail <= 0) { 903 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 904 "Truncated ZIP file data"); 905 return (ARCHIVE_FATAL); 906 } 907 if (bytes_avail > zip->entry_bytes_remaining) 908 bytes_avail = zip->entry_bytes_remaining; 909 } 910 *size = bytes_avail; 911 zip->entry_bytes_remaining -= bytes_avail; 912 zip->entry_uncompressed_bytes_read += bytes_avail; 913 zip->entry_compressed_bytes_read += bytes_avail; 914 zip->unconsumed += bytes_avail; 915 *_buff = buff; 916 return (ARCHIVE_OK); 917 } 918 919 #ifdef HAVE_ZLIB_H 920 static int 921 zip_read_data_deflate(struct archive_read *a, const void **buff, 922 size_t *size, int64_t *offset) 923 { 924 struct zip *zip; 925 ssize_t bytes_avail; 926 const void *compressed_buff; 927 int r; 928 929 zip = (struct zip *)(a->format->data); 930 931 /* If the buffer hasn't been allocated, allocate it now. */ 932 if (zip->uncompressed_buffer == NULL) { 933 zip->uncompressed_buffer_size = 256 * 1024; 934 zip->uncompressed_buffer 935 = (unsigned char *)malloc(zip->uncompressed_buffer_size); 936 if (zip->uncompressed_buffer == NULL) { 937 archive_set_error(&a->archive, ENOMEM, 938 "No memory for ZIP decompression"); 939 return (ARCHIVE_FATAL); 940 } 941 } 942 943 /* If we haven't yet read any data, initialize the decompressor. */ 944 if (!zip->decompress_init) { 945 if (zip->stream_valid) 946 r = inflateReset(&zip->stream); 947 else 948 r = inflateInit2(&zip->stream, 949 -15 /* Don't check for zlib header */); 950 if (r != Z_OK) { 951 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 952 "Can't initialize ZIP decompression."); 953 return (ARCHIVE_FATAL); 954 } 955 /* Stream structure has been set up. */ 956 zip->stream_valid = 1; 957 /* We've initialized decompression for this stream. */ 958 zip->decompress_init = 1; 959 } 960 961 /* 962 * Note: '1' here is a performance optimization. 963 * Recall that the decompression layer returns a count of 964 * available bytes; asking for more than that forces the 965 * decompressor to combine reads by copying data. 966 */ 967 compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); 968 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END) 969 && bytes_avail > zip->entry_bytes_remaining) { 970 bytes_avail = zip->entry_bytes_remaining; 971 } 972 if (bytes_avail <= 0) { 973 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 974 "Truncated ZIP file body"); 975 return (ARCHIVE_FATAL); 976 } 977 978 /* 979 * A bug in zlib.h: stream.next_in should be marked 'const' 980 * but isn't (the library never alters data through the 981 * next_in pointer, only reads it). The result: this ugly 982 * cast to remove 'const'. 983 */ 984 zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff; 985 zip->stream.avail_in = bytes_avail; 986 zip->stream.total_in = 0; 987 zip->stream.next_out = zip->uncompressed_buffer; 988 zip->stream.avail_out = zip->uncompressed_buffer_size; 989 zip->stream.total_out = 0; 990 991 r = inflate(&zip->stream, 0); 992 switch (r) { 993 case Z_OK: 994 break; 995 case Z_STREAM_END: 996 zip->end_of_entry = 1; 997 break; 998 case Z_MEM_ERROR: 999 archive_set_error(&a->archive, ENOMEM, 1000 "Out of memory for ZIP decompression"); 1001 return (ARCHIVE_FATAL); 1002 default: 1003 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 1004 "ZIP decompression failed (%d)", r); 1005 return (ARCHIVE_FATAL); 1006 } 1007 1008 /* Consume as much as the compressor actually used. */ 1009 bytes_avail = zip->stream.total_in; 1010 __archive_read_consume(a, bytes_avail); 1011 zip->entry_bytes_remaining -= bytes_avail; 1012 zip->entry_compressed_bytes_read += bytes_avail; 1013 1014 *size = zip->stream.total_out; 1015 zip->entry_uncompressed_bytes_read += zip->stream.total_out; 1016 *buff = zip->uncompressed_buffer; 1017 1018 if (zip->end_of_entry && (zip->entry->flags & ZIP_LENGTH_AT_END)) { 1019 const char *p; 1020 1021 if (NULL == (p = __archive_read_ahead(a, 16, NULL))) { 1022 archive_set_error(&a->archive, 1023 ARCHIVE_ERRNO_FILE_FORMAT, 1024 "Truncated ZIP end-of-file record"); 1025 return (ARCHIVE_FATAL); 1026 } 1027 /* Consume the optional PK\007\010 marker. */ 1028 if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010') { 1029 zip->entry->crc32 = archive_le32dec(p + 4); 1030 zip->entry->compressed_size = archive_le32dec(p + 8); 1031 zip->entry->uncompressed_size = archive_le32dec(p + 12); 1032 zip->unconsumed = 16; 1033 } 1034 } 1035 1036 return (ARCHIVE_OK); 1037 } 1038 #endif 1039 1040 static int 1041 archive_read_format_zip_read_data_skip(struct archive_read *a) 1042 { 1043 struct zip *zip; 1044 1045 zip = (struct zip *)(a->format->data); 1046 1047 /* If we've already read to end of data, we're done. */ 1048 if (zip->end_of_entry) 1049 return (ARCHIVE_OK); 1050 /* If we're seeking, we're done. */ 1051 if (zip->have_central_directory) 1052 return (ARCHIVE_OK); 1053 1054 /* So we know we're streaming... */ 1055 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) { 1056 /* We know the compressed length, so we can just skip. */ 1057 int64_t bytes_skipped = __archive_read_consume(a, 1058 zip->entry_bytes_remaining + zip->unconsumed); 1059 if (bytes_skipped < 0) 1060 return (ARCHIVE_FATAL); 1061 zip->unconsumed = 0; 1062 return (ARCHIVE_OK); 1063 } 1064 1065 /* We're streaming and we don't know the length. */ 1066 /* If the body is compressed and we know the format, we can 1067 * find an exact end-of-entry by decompressing it. */ 1068 switch (zip->entry->compression) { 1069 #ifdef HAVE_ZLIB_H 1070 case 8: /* Deflate compression. */ 1071 while (!zip->end_of_entry) { 1072 int64_t offset = 0; 1073 const void *buff = NULL; 1074 size_t size = 0; 1075 int r; 1076 r = zip_read_data_deflate(a, &buff, &size, &offset); 1077 if (r != ARCHIVE_OK) 1078 return (r); 1079 } 1080 break; 1081 #endif 1082 default: /* Uncompressed or unknown. */ 1083 /* Scan for a PK\007\010 signature. */ 1084 __archive_read_consume(a, zip->unconsumed); 1085 zip->unconsumed = 0; 1086 for (;;) { 1087 const char *p, *buff; 1088 ssize_t bytes_avail; 1089 buff = __archive_read_ahead(a, 16, &bytes_avail); 1090 if (bytes_avail < 16) { 1091 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 1092 "Truncated ZIP file data"); 1093 return (ARCHIVE_FATAL); 1094 } 1095 p = buff; 1096 while (p < buff + bytes_avail - 16) { 1097 if (p[3] == 'P') { p += 3; } 1098 else if (p[3] == 'K') { p += 2; } 1099 else if (p[3] == '\007') { p += 1; } 1100 else if (p[3] == '\010' && p[2] == '\007' 1101 && p[1] == 'K' && p[0] == 'P') { 1102 __archive_read_consume(a, p - buff + 16); 1103 return ARCHIVE_OK; 1104 } else { p += 4; } 1105 } 1106 __archive_read_consume(a, p - buff); 1107 } 1108 } 1109 return ARCHIVE_OK; 1110 } 1111 1112 static int 1113 archive_read_format_zip_cleanup(struct archive_read *a) 1114 { 1115 struct zip *zip; 1116 1117 zip = (struct zip *)(a->format->data); 1118 #ifdef HAVE_ZLIB_H 1119 if (zip->stream_valid) 1120 inflateEnd(&zip->stream); 1121 #endif 1122 free(zip->zip_entries); 1123 free(zip->uncompressed_buffer); 1124 archive_string_free(&(zip->extra)); 1125 free(zip); 1126 (a->format->data) = NULL; 1127 return (ARCHIVE_OK); 1128 } 1129 1130 /* 1131 * The extra data is stored as a list of 1132 * id1+size1+data1 + id2+size2+data2 ... 1133 * triplets. id and size are 2 bytes each. 1134 */ 1135 static void 1136 process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry) 1137 { 1138 unsigned offset = 0; 1139 1140 while (offset < extra_length - 4) 1141 { 1142 unsigned short headerid = archive_le16dec(p + offset); 1143 unsigned short datasize = archive_le16dec(p + offset + 2); 1144 offset += 4; 1145 if (offset + datasize > extra_length) 1146 break; 1147 #ifdef DEBUG 1148 fprintf(stderr, "Header id 0x%x, length %d\n", 1149 headerid, datasize); 1150 #endif 1151 switch (headerid) { 1152 case 0x0001: 1153 /* Zip64 extended information extra field. */ 1154 if (datasize >= 8) 1155 zip_entry->uncompressed_size = 1156 archive_le64dec(p + offset); 1157 if (datasize >= 16) 1158 zip_entry->compressed_size = 1159 archive_le64dec(p + offset + 8); 1160 break; 1161 case 0x5455: 1162 { 1163 /* Extended time field "UT". */ 1164 int flags = p[offset]; 1165 offset++; 1166 datasize--; 1167 /* Flag bits indicate which dates are present. */ 1168 if (flags & 0x01) 1169 { 1170 #ifdef DEBUG 1171 fprintf(stderr, "mtime: %lld -> %d\n", 1172 (long long)zip_entry->mtime, 1173 archive_le32dec(p + offset)); 1174 #endif 1175 if (datasize < 4) 1176 break; 1177 zip_entry->mtime = archive_le32dec(p + offset); 1178 offset += 4; 1179 datasize -= 4; 1180 } 1181 if (flags & 0x02) 1182 { 1183 if (datasize < 4) 1184 break; 1185 zip_entry->atime = archive_le32dec(p + offset); 1186 offset += 4; 1187 datasize -= 4; 1188 } 1189 if (flags & 0x04) 1190 { 1191 if (datasize < 4) 1192 break; 1193 zip_entry->ctime = archive_le32dec(p + offset); 1194 offset += 4; 1195 datasize -= 4; 1196 } 1197 break; 1198 } 1199 case 0x5855: 1200 { 1201 /* Info-ZIP Unix Extra Field (old version) "UX". */ 1202 if (datasize >= 8) { 1203 zip_entry->atime = archive_le32dec(p + offset); 1204 zip_entry->mtime = archive_le32dec(p + offset + 4); 1205 } 1206 if (datasize >= 12) { 1207 zip_entry->uid = archive_le16dec(p + offset + 8); 1208 zip_entry->gid = archive_le16dec(p + offset + 10); 1209 } 1210 break; 1211 } 1212 case 0x7855: 1213 /* Info-ZIP Unix Extra Field (type 2) "Ux". */ 1214 #ifdef DEBUG 1215 fprintf(stderr, "uid %d gid %d\n", 1216 archive_le16dec(p + offset), 1217 archive_le16dec(p + offset + 2)); 1218 #endif 1219 if (datasize >= 2) 1220 zip_entry->uid = archive_le16dec(p + offset); 1221 if (datasize >= 4) 1222 zip_entry->gid = archive_le16dec(p + offset + 2); 1223 break; 1224 case 0x7875: 1225 { 1226 /* Info-Zip Unix Extra Field (type 3) "ux". */ 1227 int uidsize = 0, gidsize = 0; 1228 1229 if (datasize >= 1 && p[offset] == 1) {/* version=1 */ 1230 if (datasize >= 4) { 1231 /* get a uid size. */ 1232 uidsize = p[offset+1]; 1233 if (uidsize == 2) 1234 zip_entry->uid = archive_le16dec( 1235 p + offset + 2); 1236 else if (uidsize == 4 && datasize >= 6) 1237 zip_entry->uid = archive_le32dec( 1238 p + offset + 2); 1239 } 1240 if (datasize >= (2 + uidsize + 3)) { 1241 /* get a gid size. */ 1242 gidsize = p[offset+2+uidsize]; 1243 if (gidsize == 2) 1244 zip_entry->gid = archive_le16dec( 1245 p+offset+2+uidsize+1); 1246 else if (gidsize == 4 && 1247 datasize >= (2 + uidsize + 5)) 1248 zip_entry->gid = archive_le32dec( 1249 p+offset+2+uidsize+1); 1250 } 1251 } 1252 break; 1253 } 1254 default: 1255 break; 1256 } 1257 offset += datasize; 1258 } 1259 #ifdef DEBUG 1260 if (offset != extra_length) 1261 { 1262 fprintf(stderr, 1263 "Extra data field contents do not match reported size!\n"); 1264 } 1265 #endif 1266 } 1267