1 /*-
2 * Copyright (c) 2004 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 2009-12-28 03:11:36Z kientzle $");
28
29 #ifdef HAVE_ERRNO_H
30 #include <errno.h>
31 #endif
32 #include <stdio.h>
33 #ifdef HAVE_STDLIB_H
34 #include <stdlib.h>
35 #endif
36 #include <time.h>
37 #ifdef HAVE_ZLIB_H
38 #include <zlib.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_private.h"
44 #include "archive_read_private.h"
45 #include "archive_endian.h"
46
47 #ifndef HAVE_ZLIB_H
48 #include "archive_crc32.h"
49 #endif
50
51 struct zip {
52 /* entry_bytes_remaining is the number of bytes we expect. */
53 int64_t entry_bytes_remaining;
54 int64_t entry_offset;
55
56 /* These count the number of bytes actually read for the entry. */
57 int64_t entry_compressed_bytes_read;
58 int64_t entry_uncompressed_bytes_read;
59
60 /* Running CRC32 of the decompressed data */
61 unsigned long entry_crc32;
62
63 unsigned version;
64 unsigned system;
65 unsigned flags;
66 unsigned compression;
67 const char * compression_name;
68 time_t mtime;
69 time_t ctime;
70 time_t atime;
71 mode_t mode;
72 uid_t uid;
73 gid_t gid;
74
75 /* Flags to mark progress of decompression. */
76 char decompress_init;
77 char end_of_entry;
78
79 unsigned long crc32;
80 ssize_t filename_length;
81 ssize_t extra_length;
82 int64_t uncompressed_size;
83 int64_t compressed_size;
84
85 unsigned char *uncompressed_buffer;
86 size_t uncompressed_buffer_size;
87 #ifdef HAVE_ZLIB_H
88 z_stream stream;
89 char stream_valid;
90 #endif
91
92 struct archive_string pathname;
93 struct archive_string extra;
94 char format_name[64];
95 };
96
97 #define ZIP_LENGTH_AT_END 8
98
99 struct zip_file_header {
100 char signature[4];
101 char version[2];
102 char flags[2];
103 char compression[2];
104 char timedate[4];
105 char crc32[4];
106 char compressed_size[4];
107 char uncompressed_size[4];
108 char filename_length[2];
109 char extra_length[2];
110 };
111
112 static const char *compression_names[] = {
113 "uncompressed",
114 "shrinking",
115 "reduced-1",
116 "reduced-2",
117 "reduced-3",
118 "reduced-4",
119 "imploded",
120 "reserved",
121 "deflation"
122 };
123
124 static int archive_read_format_zip_bid(struct archive_read *);
125 static int archive_read_format_zip_cleanup(struct archive_read *);
126 static int archive_read_format_zip_read_data(struct archive_read *,
127 const void **, size_t *, off_t *);
128 static int archive_read_format_zip_read_data_skip(struct archive_read *a);
129 static int archive_read_format_zip_read_header(struct archive_read *,
130 struct archive_entry *);
131 static int zip_read_data_deflate(struct archive_read *a, const void **buff,
132 size_t *size, off_t *offset);
133 static int zip_read_data_none(struct archive_read *a, const void **buff,
134 size_t *size, off_t *offset);
135 static int zip_read_file_header(struct archive_read *a,
136 struct archive_entry *entry, struct zip *zip);
137 static time_t zip_time(const char *);
138 static void process_extra(const void* extra, struct zip* zip);
139
140 int
archive_read_support_format_zip(struct archive * _a)141 archive_read_support_format_zip(struct archive *_a)
142 {
143 struct archive_read *a = (struct archive_read *)_a;
144 struct zip *zip;
145 int r;
146
147 zip = (struct zip *)malloc(sizeof(*zip));
148 if (zip == NULL) {
149 archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data");
150 return (ARCHIVE_FATAL);
151 }
152 memset(zip, 0, sizeof(*zip));
153
154 r = __archive_read_register_format(a,
155 zip,
156 "zip",
157 archive_read_format_zip_bid,
158 NULL,
159 archive_read_format_zip_read_header,
160 archive_read_format_zip_read_data,
161 archive_read_format_zip_read_data_skip,
162 archive_read_format_zip_cleanup);
163
164 if (r != ARCHIVE_OK)
165 free(zip);
166 return (ARCHIVE_OK);
167 }
168
169
170 static int
archive_read_format_zip_bid(struct archive_read * a)171 archive_read_format_zip_bid(struct archive_read *a)
172 {
173 const char *p;
174 const void *buff;
175 ssize_t bytes_avail, offset;
176
177 if ((p = __archive_read_ahead(a, 4, NULL)) == NULL)
178 return (-1);
179
180 /*
181 * Bid of 30 here is: 16 bits for "PK",
182 * next 16-bit field has four options (-2 bits).
183 * 16 + 16-2 = 30.
184 */
185 if (p[0] == 'P' && p[1] == 'K') {
186 if ((p[2] == '\001' && p[3] == '\002')
187 || (p[2] == '\003' && p[3] == '\004')
188 || (p[2] == '\005' && p[3] == '\006')
189 || (p[2] == '\007' && p[3] == '\010')
190 || (p[2] == '0' && p[3] == '0'))
191 return (30);
192 }
193
194 /*
195 * Attempt to handle self-extracting archives
196 * by noting a PE header and searching forward
197 * up to 128k for a 'PK\003\004' marker.
198 */
199 if (p[0] == 'M' && p[1] == 'Z') {
200 /*
201 * TODO: Optimize by initializing 'offset' to an
202 * estimate of the likely start of the archive data
203 * based on values in the PE header. Note that we
204 * don't need to be exact, but we mustn't skip too
205 * far. The search below will compensate if we
206 * undershoot.
207 */
208 offset = 0;
209 while (offset < 124000) {
210 /* Get 4k of data beyond where we stopped. */
211 buff = __archive_read_ahead(a, offset + 4096,
212 &bytes_avail);
213 if (buff == NULL)
214 break;
215 p = (const char *)buff + offset;
216 while (p + 9 < (const char *)buff + bytes_avail) {
217 if (p[0] == 'P' && p[1] == 'K' /* signature */
218 && p[2] == 3 && p[3] == 4 /* File entry */
219 && p[8] == 8 /* compression == deflate */
220 && p[9] == 0 /* High byte of compression */
221 )
222 {
223 return (30);
224 }
225 ++p;
226 }
227 offset = p - (const char *)buff;
228 }
229 }
230
231 return (0);
232 }
233
234 /*
235 * Search forward for a "PK\003\004" file header. This handles the
236 * case of self-extracting archives, where there is an executable
237 * prepended to the ZIP archive.
238 */
239 static int
skip_sfx(struct archive_read * a)240 skip_sfx(struct archive_read *a)
241 {
242 const void *h;
243 const char *p, *q;
244 size_t skip;
245 ssize_t bytes;
246
247 /*
248 * TODO: We should be able to skip forward by a bunch
249 * by lifting some values from the PE header. We don't
250 * need to be exact (we're still going to search forward
251 * to find the header), but it will speed things up and
252 * reduce the chance of a false positive.
253 */
254 for (;;) {
255 h = __archive_read_ahead(a, 4, &bytes);
256 if (bytes < 4)
257 return (ARCHIVE_FATAL);
258 p = h;
259 q = p + bytes;
260
261 /*
262 * Scan ahead until we find something that looks
263 * like the zip header.
264 */
265 while (p + 4 < q) {
266 switch (p[3]) {
267 case '\004':
268 /* TODO: Additional verification here. */
269 if (memcmp("PK\003\004", p, 4) == 0) {
270 skip = p - (const char *)h;
271 __archive_read_consume(a, skip);
272 return (ARCHIVE_OK);
273 }
274 p += 4;
275 break;
276 case '\003': p += 1; break;
277 case 'K': p += 2; break;
278 case 'P': p += 3; break;
279 default: p += 4; break;
280 }
281 }
282 skip = p - (const char *)h;
283 __archive_read_consume(a, skip);
284 }
285 }
286
287 static int
archive_read_format_zip_read_header(struct archive_read * a,struct archive_entry * entry)288 archive_read_format_zip_read_header(struct archive_read *a,
289 struct archive_entry *entry)
290 {
291 const void *h;
292 const char *signature;
293 struct zip *zip;
294 int r = ARCHIVE_OK, r1;
295
296 a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
297 if (a->archive.archive_format_name == NULL)
298 a->archive.archive_format_name = "ZIP";
299
300 zip = (struct zip *)(a->format->data);
301 zip->decompress_init = 0;
302 zip->end_of_entry = 0;
303 zip->entry_uncompressed_bytes_read = 0;
304 zip->entry_compressed_bytes_read = 0;
305 zip->entry_crc32 = crc32(0, NULL, 0);
306 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL)
307 return (ARCHIVE_FATAL);
308
309 signature = (const char *)h;
310 if (signature[0] == 'M' && signature[1] == 'Z') {
311 /* This is an executable? Must be self-extracting... */
312 r = skip_sfx(a);
313 if (r < ARCHIVE_WARN)
314 return (r);
315 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL)
316 return (ARCHIVE_FATAL);
317 signature = (const char *)h;
318 }
319
320 if (signature[0] != 'P' || signature[1] != 'K') {
321 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
322 "Bad ZIP file");
323 return (ARCHIVE_FATAL);
324 }
325
326 /*
327 * "PK00" signature is used for "split" archives that
328 * only have a single segment. This means we can just
329 * skip the PK00; the first real file header should follow.
330 */
331 if (signature[2] == '0' && signature[3] == '0') {
332 __archive_read_consume(a, 4);
333 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL)
334 return (ARCHIVE_FATAL);
335 signature = (const char *)h;
336 if (signature[0] != 'P' || signature[1] != 'K') {
337 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
338 "Bad ZIP file");
339 return (ARCHIVE_FATAL);
340 }
341 }
342
343 if (signature[2] == '\001' && signature[3] == '\002') {
344 /* Beginning of central directory. */
345 return (ARCHIVE_EOF);
346 }
347
348 if (signature[2] == '\003' && signature[3] == '\004') {
349 /* Regular file entry. */
350 r1 = zip_read_file_header(a, entry, zip);
351 if (r1 != ARCHIVE_OK)
352 return (r1);
353 return (r);
354 }
355
356 if (signature[2] == '\005' && signature[3] == '\006') {
357 /* End-of-archive record. */
358 return (ARCHIVE_EOF);
359 }
360
361 if (signature[2] == '\007' && signature[3] == '\010') {
362 /*
363 * We should never encounter this record here;
364 * see ZIP_LENGTH_AT_END handling below for details.
365 */
366 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
367 "Bad ZIP file: Unexpected end-of-entry record");
368 return (ARCHIVE_FATAL);
369 }
370
371 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
372 "Damaged ZIP file or unsupported format variant (%d,%d)",
373 signature[2], signature[3]);
374 return (ARCHIVE_FATAL);
375 }
376
377 static int
zip_read_file_header(struct archive_read * a,struct archive_entry * entry,struct zip * zip)378 zip_read_file_header(struct archive_read *a, struct archive_entry *entry,
379 struct zip *zip)
380 {
381 const struct zip_file_header *p;
382 const void *h;
383
384 if ((p = __archive_read_ahead(a, sizeof *p, NULL)) == NULL) {
385 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
386 "Truncated ZIP file header");
387 return (ARCHIVE_FATAL);
388 }
389
390 zip->version = p->version[0];
391 zip->system = p->version[1];
392 zip->flags = archive_le16dec(p->flags);
393 zip->compression = archive_le16dec(p->compression);
394 if (zip->compression <
395 sizeof(compression_names)/sizeof(compression_names[0]))
396 zip->compression_name = compression_names[zip->compression];
397 else
398 zip->compression_name = "??";
399 zip->mtime = zip_time(p->timedate);
400 zip->ctime = 0;
401 zip->atime = 0;
402 zip->mode = 0;
403 zip->uid = 0;
404 zip->gid = 0;
405 zip->crc32 = archive_le32dec(p->crc32);
406 zip->filename_length = archive_le16dec(p->filename_length);
407 zip->extra_length = archive_le16dec(p->extra_length);
408 zip->uncompressed_size = archive_le32dec(p->uncompressed_size);
409 zip->compressed_size = archive_le32dec(p->compressed_size);
410
411 __archive_read_consume(a, sizeof(struct zip_file_header));
412
413
414 /* Read the filename. */
415 if ((h = __archive_read_ahead(a, zip->filename_length, NULL)) == NULL) {
416 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
417 "Truncated ZIP file header");
418 return (ARCHIVE_FATAL);
419 }
420 if (archive_string_ensure(&zip->pathname, zip->filename_length) == NULL)
421 __archive_errx(1, "Out of memory");
422 archive_strncpy(&zip->pathname, h, zip->filename_length);
423 __archive_read_consume(a, zip->filename_length);
424 archive_entry_set_pathname(entry, zip->pathname.s);
425
426 if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/')
427 zip->mode = AE_IFDIR | 0777;
428 else
429 zip->mode = AE_IFREG | 0777;
430
431 /* Read the extra data. */
432 if ((h = __archive_read_ahead(a, zip->extra_length, NULL)) == NULL) {
433 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
434 "Truncated ZIP file header");
435 return (ARCHIVE_FATAL);
436 }
437 process_extra(h, zip);
438 __archive_read_consume(a, zip->extra_length);
439
440 /* Populate some additional entry fields: */
441 archive_entry_set_mode(entry, zip->mode);
442 archive_entry_set_uid(entry, zip->uid);
443 archive_entry_set_gid(entry, zip->gid);
444 archive_entry_set_mtime(entry, zip->mtime, 0);
445 archive_entry_set_ctime(entry, zip->ctime, 0);
446 archive_entry_set_atime(entry, zip->atime, 0);
447 /* Set the size only if it's meaningful. */
448 if (0 == (zip->flags & ZIP_LENGTH_AT_END))
449 archive_entry_set_size(entry, zip->uncompressed_size);
450
451 zip->entry_bytes_remaining = zip->compressed_size;
452 zip->entry_offset = 0;
453
454 /* If there's no body, force read_data() to return EOF immediately. */
455 if (0 == (zip->flags & ZIP_LENGTH_AT_END)
456 && zip->entry_bytes_remaining < 1)
457 zip->end_of_entry = 1;
458
459 /* Set up a more descriptive format name. */
460 sprintf(zip->format_name, "ZIP %d.%d (%s)",
461 zip->version / 10, zip->version % 10,
462 zip->compression_name);
463 a->archive.archive_format_name = zip->format_name;
464
465 return (ARCHIVE_OK);
466 }
467
468 /* Convert an MSDOS-style date/time into Unix-style time. */
469 static time_t
zip_time(const char * p)470 zip_time(const char *p)
471 {
472 int msTime, msDate;
473 struct tm ts;
474
475 msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]);
476 msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]);
477
478 memset(&ts, 0, sizeof(ts));
479 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
480 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
481 ts.tm_mday = msDate & 0x1f; /* Day of month. */
482 ts.tm_hour = (msTime >> 11) & 0x1f;
483 ts.tm_min = (msTime >> 5) & 0x3f;
484 ts.tm_sec = (msTime << 1) & 0x3e;
485 ts.tm_isdst = -1;
486 return mktime(&ts);
487 }
488
489 static int
archive_read_format_zip_read_data(struct archive_read * a,const void ** buff,size_t * size,off_t * offset)490 archive_read_format_zip_read_data(struct archive_read *a,
491 const void **buff, size_t *size, off_t *offset)
492 {
493 int r;
494 struct zip *zip;
495
496 zip = (struct zip *)(a->format->data);
497
498 /*
499 * If we hit end-of-entry last time, clean up and return
500 * ARCHIVE_EOF this time.
501 */
502 if (zip->end_of_entry) {
503 *offset = zip->entry_uncompressed_bytes_read;
504 *size = 0;
505 *buff = NULL;
506 return (ARCHIVE_EOF);
507 }
508
509 switch(zip->compression) {
510 case 0: /* No compression. */
511 r = zip_read_data_none(a, buff, size, offset);
512 break;
513 case 8: /* Deflate compression. */
514 r = zip_read_data_deflate(a, buff, size, offset);
515 break;
516 default: /* Unsupported compression. */
517 *buff = NULL;
518 *size = 0;
519 *offset = 0;
520 /* Return a warning. */
521 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
522 "Unsupported ZIP compression method (%s)",
523 zip->compression_name);
524 if (zip->flags & ZIP_LENGTH_AT_END) {
525 /*
526 * ZIP_LENGTH_AT_END requires us to
527 * decompress the entry in order to
528 * skip it, but we don't know this
529 * compression method, so we give up.
530 */
531 r = ARCHIVE_FATAL;
532 } else {
533 /* We can't decompress this entry, but we will
534 * be able to skip() it and try the next entry. */
535 r = ARCHIVE_WARN;
536 }
537 break;
538 }
539 if (r != ARCHIVE_OK)
540 return (r);
541 /* Update checksum */
542 if (*size)
543 zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size);
544 /* If we hit the end, swallow any end-of-data marker. */
545 if (zip->end_of_entry) {
546 if (zip->flags & ZIP_LENGTH_AT_END) {
547 const char *p;
548
549 if ((p = __archive_read_ahead(a, 16, NULL)) == NULL) {
550 archive_set_error(&a->archive,
551 ARCHIVE_ERRNO_FILE_FORMAT,
552 "Truncated ZIP end-of-file record");
553 return (ARCHIVE_FATAL);
554 }
555 zip->crc32 = archive_le32dec(p + 4);
556 zip->compressed_size = archive_le32dec(p + 8);
557 zip->uncompressed_size = archive_le32dec(p + 12);
558 __archive_read_consume(a, 16);
559 }
560 /* Check file size, CRC against these values. */
561 if (zip->compressed_size != zip->entry_compressed_bytes_read) {
562 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
563 "ZIP compressed data is wrong size");
564 return (ARCHIVE_WARN);
565 }
566 /* Size field only stores the lower 32 bits of the actual size. */
567 if ((zip->uncompressed_size & UINT32_MAX)
568 != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) {
569 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
570 "ZIP uncompressed data is wrong size");
571 return (ARCHIVE_WARN);
572 }
573 /* Check computed CRC against header */
574 if (zip->crc32 != zip->entry_crc32) {
575 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
576 "ZIP bad CRC: 0x%lx should be 0x%lx",
577 zip->entry_crc32, zip->crc32);
578 return (ARCHIVE_WARN);
579 }
580 }
581
582 /* Return EOF immediately if this is a non-regular file. */
583 if (AE_IFREG != (zip->mode & AE_IFMT))
584 return (ARCHIVE_EOF);
585 return (ARCHIVE_OK);
586 }
587
588 /*
589 * Read "uncompressed" data. According to the current specification,
590 * if ZIP_LENGTH_AT_END is specified, then the size fields in the
591 * initial file header are supposed to be set to zero. This would, of
592 * course, make it impossible for us to read the archive, since we
593 * couldn't determine the end of the file data. Info-ZIP seems to
594 * include the real size fields both before and after the data in this
595 * case (the CRC only appears afterwards), so this works as you would
596 * expect.
597 *
598 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
599 * zip->end_of_entry if it consumes all of the data.
600 */
601 static int
zip_read_data_none(struct archive_read * a,const void ** buff,size_t * size,off_t * offset)602 zip_read_data_none(struct archive_read *a, const void **buff,
603 size_t *size, off_t *offset)
604 {
605 struct zip *zip;
606 ssize_t bytes_avail;
607
608 zip = (struct zip *)(a->format->data);
609
610 if (zip->entry_bytes_remaining == 0) {
611 *buff = NULL;
612 *size = 0;
613 *offset = zip->entry_offset;
614 zip->end_of_entry = 1;
615 return (ARCHIVE_OK);
616 }
617 /*
618 * Note: '1' here is a performance optimization.
619 * Recall that the decompression layer returns a count of
620 * available bytes; asking for more than that forces the
621 * decompressor to combine reads by copying data.
622 */
623 *buff = __archive_read_ahead(a, 1, &bytes_avail);
624 if (bytes_avail <= 0) {
625 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
626 "Truncated ZIP file data");
627 return (ARCHIVE_FATAL);
628 }
629 if (bytes_avail > zip->entry_bytes_remaining)
630 bytes_avail = zip->entry_bytes_remaining;
631 __archive_read_consume(a, bytes_avail);
632 *size = bytes_avail;
633 *offset = zip->entry_offset;
634 zip->entry_offset += *size;
635 zip->entry_bytes_remaining -= *size;
636 zip->entry_uncompressed_bytes_read += *size;
637 zip->entry_compressed_bytes_read += *size;
638 return (ARCHIVE_OK);
639 }
640
641 #ifdef HAVE_ZLIB_H
642 static int
zip_read_data_deflate(struct archive_read * a,const void ** buff,size_t * size,off_t * offset)643 zip_read_data_deflate(struct archive_read *a, const void **buff,
644 size_t *size, off_t *offset)
645 {
646 struct zip *zip;
647 ssize_t bytes_avail;
648 const void *compressed_buff;
649 int r;
650
651 zip = (struct zip *)(a->format->data);
652
653 /* If the buffer hasn't been allocated, allocate it now. */
654 if (zip->uncompressed_buffer == NULL) {
655 zip->uncompressed_buffer_size = 32 * 1024;
656 zip->uncompressed_buffer
657 = (unsigned char *)malloc(zip->uncompressed_buffer_size);
658 if (zip->uncompressed_buffer == NULL) {
659 archive_set_error(&a->archive, ENOMEM,
660 "No memory for ZIP decompression");
661 return (ARCHIVE_FATAL);
662 }
663 }
664
665 /* If we haven't yet read any data, initialize the decompressor. */
666 if (!zip->decompress_init) {
667 if (zip->stream_valid)
668 r = inflateReset(&zip->stream);
669 else
670 r = inflateInit2(&zip->stream,
671 -15 /* Don't check for zlib header */);
672 if (r != Z_OK) {
673 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
674 "Can't initialize ZIP decompression.");
675 return (ARCHIVE_FATAL);
676 }
677 /* Stream structure has been set up. */
678 zip->stream_valid = 1;
679 /* We've initialized decompression for this stream. */
680 zip->decompress_init = 1;
681 }
682
683 /*
684 * Note: '1' here is a performance optimization.
685 * Recall that the decompression layer returns a count of
686 * available bytes; asking for more than that forces the
687 * decompressor to combine reads by copying data.
688 */
689 compressed_buff = __archive_read_ahead(a, 1, &bytes_avail);
690 if (bytes_avail <= 0) {
691 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
692 "Truncated ZIP file body");
693 return (ARCHIVE_FATAL);
694 }
695
696 /*
697 * A bug in zlib.h: stream.next_in should be marked 'const'
698 * but isn't (the library never alters data through the
699 * next_in pointer, only reads it). The result: this ugly
700 * cast to remove 'const'.
701 */
702 zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff;
703 zip->stream.avail_in = bytes_avail;
704 zip->stream.total_in = 0;
705 zip->stream.next_out = zip->uncompressed_buffer;
706 zip->stream.avail_out = zip->uncompressed_buffer_size;
707 zip->stream.total_out = 0;
708
709 r = inflate(&zip->stream, 0);
710 switch (r) {
711 case Z_OK:
712 break;
713 case Z_STREAM_END:
714 zip->end_of_entry = 1;
715 break;
716 case Z_MEM_ERROR:
717 archive_set_error(&a->archive, ENOMEM,
718 "Out of memory for ZIP decompression");
719 return (ARCHIVE_FATAL);
720 default:
721 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
722 "ZIP decompression failed (%d)", r);
723 return (ARCHIVE_FATAL);
724 }
725
726 /* Consume as much as the compressor actually used. */
727 bytes_avail = zip->stream.total_in;
728 __archive_read_consume(a, bytes_avail);
729 zip->entry_bytes_remaining -= bytes_avail;
730 zip->entry_compressed_bytes_read += bytes_avail;
731
732 *offset = zip->entry_offset;
733 *size = zip->stream.total_out;
734 zip->entry_uncompressed_bytes_read += *size;
735 *buff = zip->uncompressed_buffer;
736 zip->entry_offset += *size;
737 return (ARCHIVE_OK);
738 }
739 #else
740 static int
zip_read_data_deflate(struct archive_read * a,const void ** buff,size_t * size,off_t * offset)741 zip_read_data_deflate(struct archive_read *a, const void **buff,
742 size_t *size, off_t *offset)
743 {
744 *buff = NULL;
745 *size = 0;
746 *offset = 0;
747 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
748 "libarchive compiled without deflate support (no libz)");
749 return (ARCHIVE_FATAL);
750 }
751 #endif
752
753 static int
archive_read_format_zip_read_data_skip(struct archive_read * a)754 archive_read_format_zip_read_data_skip(struct archive_read *a)
755 {
756 struct zip *zip;
757 const void *buff = NULL;
758 off_t bytes_skipped;
759
760 zip = (struct zip *)(a->format->data);
761
762 /* If we've already read to end of data, we're done. */
763 if (zip->end_of_entry)
764 return (ARCHIVE_OK);
765
766 /*
767 * If the length is at the end, we have no choice but
768 * to decompress all the data to find the end marker.
769 */
770 if (zip->flags & ZIP_LENGTH_AT_END) {
771 size_t size;
772 off_t offset;
773 int r;
774 do {
775 r = archive_read_format_zip_read_data(a, &buff,
776 &size, &offset);
777 } while (r == ARCHIVE_OK);
778 return (r);
779 }
780
781 /*
782 * If the length is at the beginning, we can skip the
783 * compressed data much more quickly.
784 */
785 bytes_skipped = __archive_read_skip(a, zip->entry_bytes_remaining);
786 if (bytes_skipped < 0)
787 return (ARCHIVE_FATAL);
788
789 /* This entry is finished and done. */
790 zip->end_of_entry = 1;
791 return (ARCHIVE_OK);
792 }
793
794 static int
archive_read_format_zip_cleanup(struct archive_read * a)795 archive_read_format_zip_cleanup(struct archive_read *a)
796 {
797 struct zip *zip;
798
799 zip = (struct zip *)(a->format->data);
800 #ifdef HAVE_ZLIB_H
801 if (zip->stream_valid)
802 inflateEnd(&zip->stream);
803 #endif
804 free(zip->uncompressed_buffer);
805 archive_string_free(&(zip->pathname));
806 archive_string_free(&(zip->extra));
807 free(zip);
808 (a->format->data) = NULL;
809 return (ARCHIVE_OK);
810 }
811
812 /*
813 * The extra data is stored as a list of
814 * id1+size1+data1 + id2+size2+data2 ...
815 * triplets. id and size are 2 bytes each.
816 */
817 static void
process_extra(const void * extra,struct zip * zip)818 process_extra(const void* extra, struct zip* zip)
819 {
820 int offset = 0;
821 const char *p = (const char *)extra;
822 while (offset < zip->extra_length - 4)
823 {
824 unsigned short headerid = archive_le16dec(p + offset);
825 unsigned short datasize = archive_le16dec(p + offset + 2);
826 offset += 4;
827 if (offset + datasize > zip->extra_length)
828 break;
829 #ifdef DEBUG
830 fprintf(stderr, "Header id 0x%04x, length %d\n",
831 headerid, datasize);
832 #endif
833 switch (headerid) {
834 case 0x0001:
835 /* Zip64 extended information extra field. */
836 if (datasize >= 8)
837 zip->uncompressed_size = archive_le64dec(p + offset);
838 if (datasize >= 16)
839 zip->compressed_size = archive_le64dec(p + offset + 8);
840 break;
841 case 0x5455:
842 {
843 /* Extended time field "UT". */
844 int flags = p[offset];
845 offset++;
846 datasize--;
847 /* Flag bits indicate which dates are present. */
848 if (flags & 0x01)
849 {
850 #ifdef DEBUG
851 fprintf(stderr, "mtime: %lld -> %d\n",
852 (long long)zip->mtime,
853 archive_le32dec(p + offset));
854 #endif
855 if (datasize < 4)
856 break;
857 zip->mtime = archive_le32dec(p + offset);
858 offset += 4;
859 datasize -= 4;
860 }
861 if (flags & 0x02)
862 {
863 if (datasize < 4)
864 break;
865 zip->atime = archive_le32dec(p + offset);
866 offset += 4;
867 datasize -= 4;
868 }
869 if (flags & 0x04)
870 {
871 if (datasize < 4)
872 break;
873 zip->ctime = archive_le32dec(p + offset);
874 offset += 4;
875 datasize -= 4;
876 }
877 break;
878 }
879 case 0x7855:
880 /* Info-ZIP Unix Extra Field (type 2) "Ux". */
881 #ifdef DEBUG
882 fprintf(stderr, "uid %d gid %d\n",
883 archive_le16dec(p + offset),
884 archive_le16dec(p + offset + 2));
885 #endif
886 if (datasize >= 2)
887 zip->uid = archive_le16dec(p + offset);
888 if (datasize >= 4)
889 zip->gid = archive_le16dec(p + offset + 2);
890 break;
891 default:
892 break;
893 }
894 offset += datasize;
895 }
896 #ifdef DEBUG
897 if (offset != zip->extra_length)
898 {
899 fprintf(stderr,
900 "Extra data field contents do not match reported size!");
901 }
902 #endif
903 }
904