1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 __FBSDID("$FreeBSD$");
29 
30 
31 #ifdef HAVE_ERRNO_H
32 #include <errno.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40 #ifdef HAVE_LIMITS_H
41 #include <limits.h>
42 #endif
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_ZLIB_H
47 #include <zlib.h>
48 #endif
49 
50 #include "archive.h"
51 #include "archive_entry.h"
52 #include "archive_endian.h"
53 #include "archive_private.h"
54 #include "archive_read_private.h"
55 
56 #ifdef HAVE_ZLIB_H
57 struct private_data {
58 	z_stream	 stream;
59 	char		 in_stream;
60 	unsigned char	*out_block;
61 	size_t		 out_block_size;
62 	int64_t		 total_out;
63 	unsigned long	 crc;
64 	uint32_t	 mtime;
65 	char		*name;
66 	char		 eof; /* True = found end of compressed data. */
67 };
68 
69 /* Gzip Filter. */
70 static ssize_t	gzip_filter_read(struct archive_read_filter *, const void **);
71 static int	gzip_filter_close(struct archive_read_filter *);
72 #endif
73 
74 /*
75  * Note that we can detect gzip archives even if we can't decompress
76  * them.  (In fact, we like detecting them because we can give better
77  * error messages.)  So the bid framework here gets compiled even
78  * if zlib is unavailable.
79  *
80  * TODO: If zlib is unavailable, gzip_bidder_init() should
81  * use the compress_program framework to try to fire up an external
82  * gzip program.
83  */
84 static int	gzip_bidder_bid(struct archive_read_filter_bidder *,
85 		    struct archive_read_filter *);
86 static int	gzip_bidder_init(struct archive_read_filter *);
87 
88 #if ARCHIVE_VERSION_NUMBER < 4000000
89 /* Deprecated; remove in libarchive 4.0 */
90 int
91 archive_read_support_compression_gzip(struct archive *a)
92 {
93 	return archive_read_support_filter_gzip(a);
94 }
95 #endif
96 
97 static const struct archive_read_filter_bidder_vtable
98 gzip_bidder_vtable = {
99 	.bid = gzip_bidder_bid,
100 	.init = gzip_bidder_init,
101 };
102 
103 int
104 archive_read_support_filter_gzip(struct archive *_a)
105 {
106 	struct archive_read *a = (struct archive_read *)_a;
107 
108 	if (__archive_read_register_bidder(a, NULL, "gzip",
109 				&gzip_bidder_vtable) != ARCHIVE_OK)
110 		return (ARCHIVE_FATAL);
111 
112 	/* Signal the extent of gzip support with the return value here. */
113 #if HAVE_ZLIB_H
114 	return (ARCHIVE_OK);
115 #else
116 	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
117 	    "Using external gzip program");
118 	return (ARCHIVE_WARN);
119 #endif
120 }
121 
122 /*
123  * Read and verify the header.
124  *
125  * Returns zero if the header couldn't be validated, else returns
126  * number of bytes in header.  If pbits is non-NULL, it receives a
127  * count of bits verified, suitable for use by bidder.
128  */
129 static ssize_t
130 peek_at_header(struct archive_read_filter *filter, int *pbits,
131 #ifdef HAVE_ZLIB_H
132 	       struct private_data *state
133 #else
134 	       void *state
135 #endif
136 	      )
137 {
138 	const unsigned char *p;
139 	ssize_t avail, len;
140 	int bits = 0;
141 	int header_flags;
142 #ifndef HAVE_ZLIB_H
143 	(void)state; /* UNUSED */
144 #endif
145 
146 	/* Start by looking at the first ten bytes of the header, which
147 	 * is all fixed layout. */
148 	len = 10;
149 	p = __archive_read_filter_ahead(filter, len, &avail);
150 	if (p == NULL || avail == 0)
151 		return (0);
152 	/* We only support deflation- third byte must be 0x08. */
153 	if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
154 		return (0);
155 	bits += 24;
156 	if ((p[3] & 0xE0)!= 0)	/* No reserved flags set. */
157 		return (0);
158 	bits += 3;
159 	header_flags = p[3];
160 	/* Bytes 4-7 are mod time in little endian. */
161 #ifdef HAVE_ZLIB_H
162 	if (state)
163 		state->mtime = archive_le32dec(p + 4);
164 #endif
165 	/* Byte 8 is deflate flags. */
166 	/* XXXX TODO: return deflate flags back to consume_header for use
167 	   in initializing the decompressor. */
168 	/* Byte 9 is OS. */
169 
170 	/* Optional extra data:  2 byte length plus variable body. */
171 	if (header_flags & 4) {
172 		p = __archive_read_filter_ahead(filter, len + 2, &avail);
173 		if (p == NULL)
174 			return (0);
175 		len += ((int)p[len + 1] << 8) | (int)p[len];
176 		len += 2;
177 	}
178 
179 	/* Null-terminated optional filename. */
180 	if (header_flags & 8) {
181 #ifdef HAVE_ZLIB_H
182 		ssize_t file_start = len;
183 #endif
184 		do {
185 			++len;
186 			if (avail < len)
187 				p = __archive_read_filter_ahead(filter,
188 				    len, &avail);
189 			if (p == NULL)
190 				return (0);
191 		} while (p[len - 1] != 0);
192 
193 #ifdef HAVE_ZLIB_H
194 		if (state) {
195 			/* Reset the name in case of repeat header reads. */
196 			free(state->name);
197 			state->name = strdup((const char *)&p[file_start]);
198 		}
199 #endif
200 	}
201 
202 	/* Null-terminated optional comment. */
203 	if (header_flags & 16) {
204 		do {
205 			++len;
206 			if (avail < len)
207 				p = __archive_read_filter_ahead(filter,
208 				    len, &avail);
209 			if (p == NULL)
210 				return (0);
211 		} while (p[len - 1] != 0);
212 	}
213 
214 	/* Optional header CRC */
215 	if ((header_flags & 2)) {
216 		p = __archive_read_filter_ahead(filter, len + 2, &avail);
217 		if (p == NULL)
218 			return (0);
219 #if 0
220 	int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
221 	int crc = /* XXX TODO: Compute header CRC. */;
222 	if (crc != hcrc)
223 		return (0);
224 	bits += 16;
225 #endif
226 		len += 2;
227 	}
228 
229 	if (pbits != NULL)
230 		*pbits = bits;
231 	return (len);
232 }
233 
234 /*
235  * Bidder just verifies the header and returns the number of verified bits.
236  */
237 static int
238 gzip_bidder_bid(struct archive_read_filter_bidder *self,
239     struct archive_read_filter *filter)
240 {
241 	int bits_checked;
242 
243 	(void)self; /* UNUSED */
244 
245 	if (peek_at_header(filter, &bits_checked, NULL))
246 		return (bits_checked);
247 	return (0);
248 }
249 
250 #ifndef HAVE_ZLIB_H
251 
252 /*
253  * If we don't have the library on this system, we can't do the
254  * decompression directly.  We can, however, try to run "gzip -d"
255  * in case that's available.
256  */
257 static int
258 gzip_bidder_init(struct archive_read_filter *self)
259 {
260 	int r;
261 
262 	r = __archive_read_program(self, "gzip -d");
263 	/* Note: We set the format here even if __archive_read_program()
264 	 * above fails.  We do, after all, know what the format is
265 	 * even if we weren't able to read it. */
266 	self->code = ARCHIVE_FILTER_GZIP;
267 	self->name = "gzip";
268 	return (r);
269 }
270 
271 #else
272 
273 static int
274 gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
275 {
276 	struct private_data *state;
277 
278 	state = (struct private_data *)self->data;
279 
280 	/* A mtime of 0 is considered invalid/missing. */
281 	if (state->mtime != 0)
282 		archive_entry_set_mtime(entry, state->mtime, 0);
283 
284 	/* If the name is available, extract it. */
285 	if (state->name)
286 		archive_entry_set_pathname(entry, state->name);
287 
288 	return (ARCHIVE_OK);
289 }
290 
291 static const struct archive_read_filter_vtable
292 gzip_reader_vtable = {
293 	.read = gzip_filter_read,
294 	.close = gzip_filter_close,
295 #ifdef HAVE_ZLIB_H
296 	.read_header = gzip_read_header,
297 #endif
298 };
299 
300 /*
301  * Initialize the filter object.
302  */
303 static int
304 gzip_bidder_init(struct archive_read_filter *self)
305 {
306 	struct private_data *state;
307 	static const size_t out_block_size = 64 * 1024;
308 	void *out_block;
309 
310 	self->code = ARCHIVE_FILTER_GZIP;
311 	self->name = "gzip";
312 
313 	state = (struct private_data *)calloc(sizeof(*state), 1);
314 	out_block = (unsigned char *)malloc(out_block_size);
315 	if (state == NULL || out_block == NULL) {
316 		free(out_block);
317 		free(state);
318 		archive_set_error(&self->archive->archive, ENOMEM,
319 		    "Can't allocate data for gzip decompression");
320 		return (ARCHIVE_FATAL);
321 	}
322 
323 	self->data = state;
324 	state->out_block_size = out_block_size;
325 	state->out_block = out_block;
326 	self->vtable = &gzip_reader_vtable;
327 
328 	state->in_stream = 0; /* We're not actually within a stream yet. */
329 
330 	return (ARCHIVE_OK);
331 }
332 
333 static int
334 consume_header(struct archive_read_filter *self)
335 {
336 	struct private_data *state;
337 	ssize_t avail;
338 	size_t len;
339 	int ret;
340 
341 	state = (struct private_data *)self->data;
342 
343 	/* If this is a real header, consume it. */
344 	len = peek_at_header(self->upstream, NULL, state);
345 	if (len == 0)
346 		return (ARCHIVE_EOF);
347 	__archive_read_filter_consume(self->upstream, len);
348 
349 	/* Initialize CRC accumulator. */
350 	state->crc = crc32(0L, NULL, 0);
351 
352 	/* Initialize compression library. */
353 	state->stream.next_in = (unsigned char *)(uintptr_t)
354 	    __archive_read_filter_ahead(self->upstream, 1, &avail);
355 	state->stream.avail_in = (uInt)avail;
356 	ret = inflateInit2(&(state->stream),
357 	    -15 /* Don't check for zlib header */);
358 
359 	/* Decipher the error code. */
360 	switch (ret) {
361 	case Z_OK:
362 		state->in_stream = 1;
363 		return (ARCHIVE_OK);
364 	case Z_STREAM_ERROR:
365 		archive_set_error(&self->archive->archive,
366 		    ARCHIVE_ERRNO_MISC,
367 		    "Internal error initializing compression library: "
368 		    "invalid setup parameter");
369 		break;
370 	case Z_MEM_ERROR:
371 		archive_set_error(&self->archive->archive, ENOMEM,
372 		    "Internal error initializing compression library: "
373 		    "out of memory");
374 		break;
375 	case Z_VERSION_ERROR:
376 		archive_set_error(&self->archive->archive,
377 		    ARCHIVE_ERRNO_MISC,
378 		    "Internal error initializing compression library: "
379 		    "invalid library version");
380 		break;
381 	default:
382 		archive_set_error(&self->archive->archive,
383 		    ARCHIVE_ERRNO_MISC,
384 		    "Internal error initializing compression library: "
385 		    " Zlib error %d", ret);
386 		break;
387 	}
388 	return (ARCHIVE_FATAL);
389 }
390 
391 static int
392 consume_trailer(struct archive_read_filter *self)
393 {
394 	struct private_data *state;
395 	const unsigned char *p;
396 	ssize_t avail;
397 
398 	state = (struct private_data *)self->data;
399 
400 	state->in_stream = 0;
401 	switch (inflateEnd(&(state->stream))) {
402 	case Z_OK:
403 		break;
404 	default:
405 		archive_set_error(&self->archive->archive,
406 		    ARCHIVE_ERRNO_MISC,
407 		    "Failed to clean up gzip decompressor");
408 		return (ARCHIVE_FATAL);
409 	}
410 
411 	/* GZip trailer is a fixed 8 byte structure. */
412 	p = __archive_read_filter_ahead(self->upstream, 8, &avail);
413 	if (p == NULL || avail == 0)
414 		return (ARCHIVE_FATAL);
415 
416 	/* XXX TODO: Verify the length and CRC. */
417 
418 	/* We've verified the trailer, so consume it now. */
419 	__archive_read_filter_consume(self->upstream, 8);
420 
421 	return (ARCHIVE_OK);
422 }
423 
424 static ssize_t
425 gzip_filter_read(struct archive_read_filter *self, const void **p)
426 {
427 	struct private_data *state;
428 	size_t decompressed;
429 	ssize_t avail_in, max_in;
430 	int ret;
431 
432 	state = (struct private_data *)self->data;
433 
434 	/* Empty our output buffer. */
435 	state->stream.next_out = state->out_block;
436 	state->stream.avail_out = (uInt)state->out_block_size;
437 
438 	/* Try to fill the output buffer. */
439 	while (state->stream.avail_out > 0 && !state->eof) {
440 		/* If we're not in a stream, read a header
441 		 * and initialize the decompression library. */
442 		if (!state->in_stream) {
443 			ret = consume_header(self);
444 			if (ret == ARCHIVE_EOF) {
445 				state->eof = 1;
446 				break;
447 			}
448 			if (ret < ARCHIVE_OK)
449 				return (ret);
450 		}
451 
452 		/* Peek at the next available data. */
453 		/* ZLib treats stream.next_in as const but doesn't declare
454 		 * it so, hence this ugly cast. */
455 		state->stream.next_in = (unsigned char *)(uintptr_t)
456 		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
457 		if (state->stream.next_in == NULL) {
458 			archive_set_error(&self->archive->archive,
459 			    ARCHIVE_ERRNO_MISC,
460 			    "truncated gzip input");
461 			return (ARCHIVE_FATAL);
462 		}
463 		if (UINT_MAX >= SSIZE_MAX)
464 			max_in = SSIZE_MAX;
465 		else
466 			max_in = UINT_MAX;
467 		if (avail_in > max_in)
468 			avail_in = max_in;
469 		state->stream.avail_in = (uInt)avail_in;
470 
471 		/* Decompress and consume some of that data. */
472 		ret = inflate(&(state->stream), 0);
473 		switch (ret) {
474 		case Z_OK: /* Decompressor made some progress. */
475 			__archive_read_filter_consume(self->upstream,
476 			    avail_in - state->stream.avail_in);
477 			break;
478 		case Z_STREAM_END: /* Found end of stream. */
479 			__archive_read_filter_consume(self->upstream,
480 			    avail_in - state->stream.avail_in);
481 			/* Consume the stream trailer; release the
482 			 * decompression library. */
483 			ret = consume_trailer(self);
484 			if (ret < ARCHIVE_OK)
485 				return (ret);
486 			break;
487 		default:
488 			/* Return an error. */
489 			archive_set_error(&self->archive->archive,
490 			    ARCHIVE_ERRNO_MISC,
491 			    "gzip decompression failed");
492 			return (ARCHIVE_FATAL);
493 		}
494 	}
495 
496 	/* We've read as much as we can. */
497 	decompressed = state->stream.next_out - state->out_block;
498 	state->total_out += decompressed;
499 	if (decompressed == 0)
500 		*p = NULL;
501 	else
502 		*p = state->out_block;
503 	return (decompressed);
504 }
505 
506 /*
507  * Clean up the decompressor.
508  */
509 static int
510 gzip_filter_close(struct archive_read_filter *self)
511 {
512 	struct private_data *state;
513 	int ret;
514 
515 	state = (struct private_data *)self->data;
516 	ret = ARCHIVE_OK;
517 
518 	if (state->in_stream) {
519 		switch (inflateEnd(&(state->stream))) {
520 		case Z_OK:
521 			break;
522 		default:
523 			archive_set_error(&(self->archive->archive),
524 			    ARCHIVE_ERRNO_MISC,
525 			    "Failed to clean up gzip compressor");
526 			ret = ARCHIVE_FATAL;
527 		}
528 	}
529 
530 	free(state->name);
531 	free(state->out_block);
532 	free(state);
533 	return (ret);
534 }
535 
536 #endif /* HAVE_ZLIB_H */
537