1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "archive_platform.h"
27 
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_STDLIB_H
32 #include <stdlib.h>
33 #endif
34 #ifdef HAVE_STRING_H
35 #include <string.h>
36 #endif
37 #ifdef HAVE_LIMITS_H
38 #include <limits.h>
39 #endif
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_ZLIB_H
44 #include <zlib.h>
45 #endif
46 
47 #include "archive.h"
48 #include "archive_entry.h"
49 #include "archive_endian.h"
50 #include "archive_private.h"
51 #include "archive_read_private.h"
52 
53 #ifdef HAVE_ZLIB_H
54 struct private_data {
55 	z_stream	 stream;
56 	char		 in_stream;
57 	unsigned char	*out_block;
58 	size_t		 out_block_size;
59 	int64_t		 total_out;
60 	unsigned long	 crc;
61 	uint32_t	 mtime;
62 	char		*name;
63 	char		 eof; /* True = found end of compressed data. */
64 };
65 
66 /* Gzip Filter. */
67 static ssize_t	gzip_filter_read(struct archive_read_filter *, const void **);
68 static int	gzip_filter_close(struct archive_read_filter *);
69 #endif
70 
71 /*
72  * Note that we can detect gzip archives even if we can't decompress
73  * them.  (In fact, we like detecting them because we can give better
74  * error messages.)  So the bid framework here gets compiled even
75  * if zlib is unavailable.
76  *
77  * TODO: If zlib is unavailable, gzip_bidder_init() should
78  * use the compress_program framework to try to fire up an external
79  * gzip program.
80  */
81 static int	gzip_bidder_bid(struct archive_read_filter_bidder *,
82 		    struct archive_read_filter *);
83 static int	gzip_bidder_init(struct archive_read_filter *);
84 
85 #if ARCHIVE_VERSION_NUMBER < 4000000
86 /* Deprecated; remove in libarchive 4.0 */
87 int
88 archive_read_support_compression_gzip(struct archive *a)
89 {
90 	return archive_read_support_filter_gzip(a);
91 }
92 #endif
93 
94 static const struct archive_read_filter_bidder_vtable
95 gzip_bidder_vtable = {
96 	.bid = gzip_bidder_bid,
97 	.init = gzip_bidder_init,
98 };
99 
100 int
101 archive_read_support_filter_gzip(struct archive *_a)
102 {
103 	struct archive_read *a = (struct archive_read *)_a;
104 
105 	if (__archive_read_register_bidder(a, NULL, "gzip",
106 				&gzip_bidder_vtable) != ARCHIVE_OK)
107 		return (ARCHIVE_FATAL);
108 
109 	/* Signal the extent of gzip support with the return value here. */
110 #if HAVE_ZLIB_H
111 	return (ARCHIVE_OK);
112 #else
113 	archive_set_error(_a, ARCHIVE_ERRNO_MISC,
114 	    "Using external gzip program");
115 	return (ARCHIVE_WARN);
116 #endif
117 }
118 
119 /*
120  * Read and verify the header.
121  *
122  * Returns zero if the header couldn't be validated, else returns
123  * number of bytes in header.  If pbits is non-NULL, it receives a
124  * count of bits verified, suitable for use by bidder.
125  */
126 static ssize_t
127 peek_at_header(struct archive_read_filter *filter, int *pbits,
128 #ifdef HAVE_ZLIB_H
129 	       struct private_data *state
130 #else
131 	       void *state
132 #endif
133 	      )
134 {
135 	const unsigned char *p;
136 	ssize_t avail, len;
137 	int bits = 0;
138 	int header_flags;
139 #ifndef HAVE_ZLIB_H
140 	(void)state; /* UNUSED */
141 #endif
142 
143 	/* Start by looking at the first ten bytes of the header, which
144 	 * is all fixed layout. */
145 	len = 10;
146 	p = __archive_read_filter_ahead(filter, len, &avail);
147 	if (p == NULL || avail == 0)
148 		return (0);
149 	/* We only support deflation- third byte must be 0x08. */
150 	if (memcmp(p, "\x1F\x8B\x08", 3) != 0)
151 		return (0);
152 	bits += 24;
153 	if ((p[3] & 0xE0)!= 0)	/* No reserved flags set. */
154 		return (0);
155 	bits += 3;
156 	header_flags = p[3];
157 	/* Bytes 4-7 are mod time in little endian. */
158 #ifdef HAVE_ZLIB_H
159 	if (state)
160 		state->mtime = archive_le32dec(p + 4);
161 #endif
162 	/* Byte 8 is deflate flags. */
163 	/* XXXX TODO: return deflate flags back to consume_header for use
164 	   in initializing the decompressor. */
165 	/* Byte 9 is OS. */
166 
167 	/* Optional extra data:  2 byte length plus variable body. */
168 	if (header_flags & 4) {
169 		p = __archive_read_filter_ahead(filter, len + 2, &avail);
170 		if (p == NULL)
171 			return (0);
172 		len += ((int)p[len + 1] << 8) | (int)p[len];
173 		len += 2;
174 	}
175 
176 	/* Null-terminated optional filename. */
177 	if (header_flags & 8) {
178 #ifdef HAVE_ZLIB_H
179 		ssize_t file_start = len;
180 #endif
181 		do {
182 			++len;
183 			if (avail < len)
184 				p = __archive_read_filter_ahead(filter,
185 				    len, &avail);
186 			if (p == NULL)
187 				return (0);
188 		} while (p[len - 1] != 0);
189 
190 #ifdef HAVE_ZLIB_H
191 		if (state) {
192 			/* Reset the name in case of repeat header reads. */
193 			free(state->name);
194 			state->name = strdup((const char *)&p[file_start]);
195 		}
196 #endif
197 	}
198 
199 	/* Null-terminated optional comment. */
200 	if (header_flags & 16) {
201 		do {
202 			++len;
203 			if (avail < len)
204 				p = __archive_read_filter_ahead(filter,
205 				    len, &avail);
206 			if (p == NULL)
207 				return (0);
208 		} while (p[len - 1] != 0);
209 	}
210 
211 	/* Optional header CRC */
212 	if ((header_flags & 2)) {
213 		p = __archive_read_filter_ahead(filter, len + 2, &avail);
214 		if (p == NULL)
215 			return (0);
216 #if 0
217 	int hcrc = ((int)p[len + 1] << 8) | (int)p[len];
218 	int crc = /* XXX TODO: Compute header CRC. */;
219 	if (crc != hcrc)
220 		return (0);
221 	bits += 16;
222 #endif
223 		len += 2;
224 	}
225 
226 	if (pbits != NULL)
227 		*pbits = bits;
228 	return (len);
229 }
230 
231 /*
232  * Bidder just verifies the header and returns the number of verified bits.
233  */
234 static int
235 gzip_bidder_bid(struct archive_read_filter_bidder *self,
236     struct archive_read_filter *filter)
237 {
238 	int bits_checked;
239 
240 	(void)self; /* UNUSED */
241 
242 	if (peek_at_header(filter, &bits_checked, NULL))
243 		return (bits_checked);
244 	return (0);
245 }
246 
247 #ifndef HAVE_ZLIB_H
248 
249 /*
250  * If we don't have the library on this system, we can't do the
251  * decompression directly.  We can, however, try to run "gzip -d"
252  * in case that's available.
253  */
254 static int
255 gzip_bidder_init(struct archive_read_filter *self)
256 {
257 	int r;
258 
259 	r = __archive_read_program(self, "gzip -d");
260 	/* Note: We set the format here even if __archive_read_program()
261 	 * above fails.  We do, after all, know what the format is
262 	 * even if we weren't able to read it. */
263 	self->code = ARCHIVE_FILTER_GZIP;
264 	self->name = "gzip";
265 	return (r);
266 }
267 
268 #else
269 
270 static int
271 gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
272 {
273 	struct private_data *state;
274 
275 	state = (struct private_data *)self->data;
276 
277 	/* A mtime of 0 is considered invalid/missing. */
278 	if (state->mtime != 0)
279 		archive_entry_set_mtime(entry, state->mtime, 0);
280 
281 	/* If the name is available, extract it. */
282 	if (state->name)
283 		archive_entry_set_pathname(entry, state->name);
284 
285 	return (ARCHIVE_OK);
286 }
287 
288 static const struct archive_read_filter_vtable
289 gzip_reader_vtable = {
290 	.read = gzip_filter_read,
291 	.close = gzip_filter_close,
292 #ifdef HAVE_ZLIB_H
293 	.read_header = gzip_read_header,
294 #endif
295 };
296 
297 /*
298  * Initialize the filter object.
299  */
300 static int
301 gzip_bidder_init(struct archive_read_filter *self)
302 {
303 	struct private_data *state;
304 	static const size_t out_block_size = 64 * 1024;
305 	void *out_block;
306 
307 	self->code = ARCHIVE_FILTER_GZIP;
308 	self->name = "gzip";
309 
310 	state = (struct private_data *)calloc(1, sizeof(*state));
311 	out_block = (unsigned char *)malloc(out_block_size);
312 	if (state == NULL || out_block == NULL) {
313 		free(out_block);
314 		free(state);
315 		archive_set_error(&self->archive->archive, ENOMEM,
316 		    "Can't allocate data for gzip decompression");
317 		return (ARCHIVE_FATAL);
318 	}
319 
320 	self->data = state;
321 	state->out_block_size = out_block_size;
322 	state->out_block = out_block;
323 	self->vtable = &gzip_reader_vtable;
324 
325 	state->in_stream = 0; /* We're not actually within a stream yet. */
326 
327 	return (ARCHIVE_OK);
328 }
329 
330 static int
331 consume_header(struct archive_read_filter *self)
332 {
333 	struct private_data *state;
334 	ssize_t avail;
335 	size_t len;
336 	int ret;
337 
338 	state = (struct private_data *)self->data;
339 
340 	/* If this is a real header, consume it. */
341 	len = peek_at_header(self->upstream, NULL, state);
342 	if (len == 0)
343 		return (ARCHIVE_EOF);
344 	__archive_read_filter_consume(self->upstream, len);
345 
346 	/* Initialize CRC accumulator. */
347 	state->crc = crc32(0L, NULL, 0);
348 
349 	/* Initialize compression library. */
350 	state->stream.next_in = (unsigned char *)(uintptr_t)
351 	    __archive_read_filter_ahead(self->upstream, 1, &avail);
352 	state->stream.avail_in = (uInt)avail;
353 	ret = inflateInit2(&(state->stream),
354 	    -15 /* Don't check for zlib header */);
355 
356 	/* Decipher the error code. */
357 	switch (ret) {
358 	case Z_OK:
359 		state->in_stream = 1;
360 		return (ARCHIVE_OK);
361 	case Z_STREAM_ERROR:
362 		archive_set_error(&self->archive->archive,
363 		    ARCHIVE_ERRNO_MISC,
364 		    "Internal error initializing compression library: "
365 		    "invalid setup parameter");
366 		break;
367 	case Z_MEM_ERROR:
368 		archive_set_error(&self->archive->archive, ENOMEM,
369 		    "Internal error initializing compression library: "
370 		    "out of memory");
371 		break;
372 	case Z_VERSION_ERROR:
373 		archive_set_error(&self->archive->archive,
374 		    ARCHIVE_ERRNO_MISC,
375 		    "Internal error initializing compression library: "
376 		    "invalid library version");
377 		break;
378 	default:
379 		archive_set_error(&self->archive->archive,
380 		    ARCHIVE_ERRNO_MISC,
381 		    "Internal error initializing compression library: "
382 		    " Zlib error %d", ret);
383 		break;
384 	}
385 	return (ARCHIVE_FATAL);
386 }
387 
388 static int
389 consume_trailer(struct archive_read_filter *self)
390 {
391 	struct private_data *state;
392 	const unsigned char *p;
393 	ssize_t avail;
394 
395 	state = (struct private_data *)self->data;
396 
397 	state->in_stream = 0;
398 	switch (inflateEnd(&(state->stream))) {
399 	case Z_OK:
400 		break;
401 	default:
402 		archive_set_error(&self->archive->archive,
403 		    ARCHIVE_ERRNO_MISC,
404 		    "Failed to clean up gzip decompressor");
405 		return (ARCHIVE_FATAL);
406 	}
407 
408 	/* GZip trailer is a fixed 8 byte structure. */
409 	p = __archive_read_filter_ahead(self->upstream, 8, &avail);
410 	if (p == NULL || avail == 0)
411 		return (ARCHIVE_FATAL);
412 
413 	/* XXX TODO: Verify the length and CRC. */
414 
415 	/* We've verified the trailer, so consume it now. */
416 	__archive_read_filter_consume(self->upstream, 8);
417 
418 	return (ARCHIVE_OK);
419 }
420 
421 static ssize_t
422 gzip_filter_read(struct archive_read_filter *self, const void **p)
423 {
424 	struct private_data *state;
425 	size_t decompressed;
426 	ssize_t avail_in, max_in;
427 	int ret;
428 
429 	state = (struct private_data *)self->data;
430 
431 	/* Empty our output buffer. */
432 	state->stream.next_out = state->out_block;
433 	state->stream.avail_out = (uInt)state->out_block_size;
434 
435 	/* Try to fill the output buffer. */
436 	while (state->stream.avail_out > 0 && !state->eof) {
437 		/* If we're not in a stream, read a header
438 		 * and initialize the decompression library. */
439 		if (!state->in_stream) {
440 			ret = consume_header(self);
441 			if (ret == ARCHIVE_EOF) {
442 				state->eof = 1;
443 				break;
444 			}
445 			if (ret < ARCHIVE_OK)
446 				return (ret);
447 		}
448 
449 		/* Peek at the next available data. */
450 		/* ZLib treats stream.next_in as const but doesn't declare
451 		 * it so, hence this ugly cast. */
452 		state->stream.next_in = (unsigned char *)(uintptr_t)
453 		    __archive_read_filter_ahead(self->upstream, 1, &avail_in);
454 		if (state->stream.next_in == NULL) {
455 			archive_set_error(&self->archive->archive,
456 			    ARCHIVE_ERRNO_MISC,
457 			    "truncated gzip input");
458 			return (ARCHIVE_FATAL);
459 		}
460 		if (UINT_MAX >= SSIZE_MAX)
461 			max_in = SSIZE_MAX;
462 		else
463 			max_in = UINT_MAX;
464 		if (avail_in > max_in)
465 			avail_in = max_in;
466 		state->stream.avail_in = (uInt)avail_in;
467 
468 		/* Decompress and consume some of that data. */
469 		ret = inflate(&(state->stream), 0);
470 		switch (ret) {
471 		case Z_OK: /* Decompressor made some progress. */
472 			__archive_read_filter_consume(self->upstream,
473 			    avail_in - state->stream.avail_in);
474 			break;
475 		case Z_STREAM_END: /* Found end of stream. */
476 			__archive_read_filter_consume(self->upstream,
477 			    avail_in - state->stream.avail_in);
478 			/* Consume the stream trailer; release the
479 			 * decompression library. */
480 			ret = consume_trailer(self);
481 			if (ret < ARCHIVE_OK)
482 				return (ret);
483 			break;
484 		default:
485 			/* Return an error. */
486 			archive_set_error(&self->archive->archive,
487 			    ARCHIVE_ERRNO_MISC,
488 			    "gzip decompression failed");
489 			return (ARCHIVE_FATAL);
490 		}
491 	}
492 
493 	/* We've read as much as we can. */
494 	decompressed = state->stream.next_out - state->out_block;
495 	state->total_out += decompressed;
496 	if (decompressed == 0)
497 		*p = NULL;
498 	else
499 		*p = state->out_block;
500 	return (decompressed);
501 }
502 
503 /*
504  * Clean up the decompressor.
505  */
506 static int
507 gzip_filter_close(struct archive_read_filter *self)
508 {
509 	struct private_data *state;
510 	int ret;
511 
512 	state = (struct private_data *)self->data;
513 	ret = ARCHIVE_OK;
514 
515 	if (state->in_stream) {
516 		switch (inflateEnd(&(state->stream))) {
517 		case Z_OK:
518 			break;
519 		default:
520 			archive_set_error(&(self->archive->archive),
521 			    ARCHIVE_ERRNO_MISC,
522 			    "Failed to clean up gzip compressor");
523 			ret = ARCHIVE_FATAL;
524 		}
525 	}
526 
527 	free(state->name);
528 	free(state->out_block);
529 	free(state);
530 	return (ret);
531 }
532 
533 #endif /* HAVE_ZLIB_H */
534