1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 /*
27  * This code borrows heavily from "compress" source code, which is
28  * protected by the following copyright.  (Clause 3 dropped by request
29  * of the Regents.)
30  */
31 
32 /*-
33  * Copyright (c) 1985, 1986, 1992, 1993
34  *	The Regents of the University of California.  All rights reserved.
35  *
36  * This code is derived from software contributed to Berkeley by
37  * Diomidis Spinellis and James A. Woods, derived from original
38  * work by Spencer Thomas and Joseph Orost.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 
66 #include "archive_platform.h"
67 
68 #ifdef HAVE_ERRNO_H
69 #include <errno.h>
70 #endif
71 #ifdef HAVE_STDLIB_H
72 #include <stdlib.h>
73 #endif
74 #ifdef HAVE_STRING_H
75 #include <string.h>
76 #endif
77 #ifdef HAVE_UNISTD_H
78 #include <unistd.h>
79 #endif
80 
81 #include "archive.h"
82 #include "archive_private.h"
83 #include "archive_read_private.h"
84 
85 /*
86  * Because LZW decompression is pretty simple, I've just implemented
87  * the whole decompressor here (cribbing from "compress" source code,
88  * of course), rather than relying on an external library.  I have
89  * made an effort to clarify and simplify the algorithm, so the
90  * names and structure here don't exactly match those used by compress.
91  */
92 
93 struct private_data {
94 	/* Input variables. */
95 	const unsigned char	*next_in;
96 	size_t			 avail_in;
97 	size_t			 consume_unnotified;
98 	int			 bit_buffer;
99 	int			 bits_avail;
100 	size_t			 bytes_in_section;
101 
102 	/* Output variables. */
103 	size_t			 out_block_size;
104 	void			*out_block;
105 
106 	/* Decompression status variables. */
107 	int			 use_reset_code;
108 	int			 end_of_stream;	/* EOF status. */
109 	int			 maxcode;	/* Largest code. */
110 	int			 maxcode_bits;	/* Length of largest code. */
111 	int			 section_end_code; /* When to increase bits. */
112 	int			 bits;		/* Current code length. */
113 	int			 oldcode;	/* Previous code. */
114 	int			 finbyte;	/* Last byte of prev code. */
115 
116 	/* Dictionary. */
117 	int			 free_ent;       /* Next dictionary entry. */
118 	unsigned char		 suffix[65536];
119 	uint16_t		 prefix[65536];
120 
121 	/*
122 	 * Scratch area for expanding dictionary entries.  Note:
123 	 * "worst" case here comes from compressing /dev/zero: the
124 	 * last code in the dictionary will code a sequence of
125 	 * 65536-256 zero bytes.  Thus, we need stack space to expand
126 	 * a 65280-byte dictionary entry.  (Of course, 32640:1
127 	 * compression could also be considered the "best" case. ;-)
128 	 */
129 	unsigned char		*stackp;
130 	unsigned char		 stack[65300];
131 };
132 
133 static int	compress_bidder_bid(struct archive_read_filter_bidder *, struct archive_read_filter *);
134 static int	compress_bidder_init(struct archive_read_filter *);
135 
136 static ssize_t	compress_filter_read(struct archive_read_filter *, const void **);
137 static int	compress_filter_close(struct archive_read_filter *);
138 
139 static int	getbits(struct archive_read_filter *, int n);
140 static int	next_code(struct archive_read_filter *);
141 
142 #if ARCHIVE_VERSION_NUMBER < 4000000
143 /* Deprecated; remove in libarchive 4.0 */
144 int
archive_read_support_compression_compress(struct archive * a)145 archive_read_support_compression_compress(struct archive *a)
146 {
147 	return archive_read_support_filter_compress(a);
148 }
149 #endif
150 
151 static const struct archive_read_filter_bidder_vtable
152 compress_bidder_vtable = {
153 	.bid = compress_bidder_bid,
154 	.init = compress_bidder_init,
155 };
156 
157 int
archive_read_support_filter_compress(struct archive * _a)158 archive_read_support_filter_compress(struct archive *_a)
159 {
160 	struct archive_read *a = (struct archive_read *)_a;
161 
162 	return __archive_read_register_bidder(a, NULL, "compress (.Z)",
163 			&compress_bidder_vtable);
164 }
165 
166 /*
167  * Test whether we can handle this data.
168  * This logic returns zero if any part of the signature fails.
169  */
170 static int
compress_bidder_bid(struct archive_read_filter_bidder * self,struct archive_read_filter * filter)171 compress_bidder_bid(struct archive_read_filter_bidder *self,
172     struct archive_read_filter *filter)
173 {
174 	const unsigned char *buffer;
175 	ssize_t avail;
176 	int bits_checked;
177 
178 	(void)self; /* UNUSED */
179 
180 	/* Shortest valid compress file is 3 bytes. */
181 	buffer = __archive_read_filter_ahead(filter, 3, &avail);
182 
183 	if (buffer == NULL)
184 		return (0);
185 
186 	bits_checked = 0;
187 	/* First two bytes are the magic value */
188 	if (buffer[0] != 0x1F || buffer[1] != 0x9D)
189 		return (0);
190 	/* Third byte holds compression parameters. */
191 	if (buffer[2] & 0x20) /* Reserved bit, must be zero. */
192 		return (0);
193 	if (buffer[2] & 0x40) /* Reserved bit, must be zero. */
194 		return (0);
195 	bits_checked += 18;
196 
197 	return (bits_checked);
198 }
199 
200 static const struct archive_read_filter_vtable
201 compress_reader_vtable = {
202 	.read = compress_filter_read,
203 	.close = compress_filter_close,
204 };
205 
206 /*
207  * Setup the callbacks.
208  */
209 static int
compress_bidder_init(struct archive_read_filter * self)210 compress_bidder_init(struct archive_read_filter *self)
211 {
212 	struct private_data *state;
213 	static const size_t out_block_size = 64 * 1024;
214 	void *out_block;
215 	int code;
216 
217 	self->code = ARCHIVE_FILTER_COMPRESS;
218 	self->name = "compress (.Z)";
219 
220 	state = (struct private_data *)calloc(1, sizeof(*state));
221 	out_block = malloc(out_block_size);
222 	if (state == NULL || out_block == NULL) {
223 		free(out_block);
224 		free(state);
225 		archive_set_error(&self->archive->archive, ENOMEM,
226 		    "Can't allocate data for %s decompression",
227 		    self->name);
228 		return (ARCHIVE_FATAL);
229 	}
230 
231 	self->data = state;
232 	state->out_block_size = out_block_size;
233 	state->out_block = out_block;
234 	self->vtable = &compress_reader_vtable;
235 
236 	/* XXX MOVE THE FOLLOWING OUT OF INIT() XXX */
237 
238 	(void)getbits(self, 8); /* Skip first signature byte. */
239 	(void)getbits(self, 8); /* Skip second signature byte. */
240 
241 	/* Get compression parameters. */
242 	code = getbits(self, 8);
243 	if ((code & 0x1f) > 16) {
244 		archive_set_error(&self->archive->archive, -1,
245 		    "Invalid compressed data");
246 		return (ARCHIVE_FATAL);
247 	}
248 	state->maxcode_bits = code & 0x1f;
249 	state->maxcode = (1 << state->maxcode_bits);
250 	state->use_reset_code = code & 0x80;
251 
252 	/* Initialize decompressor. */
253 	state->free_ent = 256;
254 	state->stackp = state->stack;
255 	if (state->use_reset_code)
256 		state->free_ent++;
257 	state->bits = 9;
258 	state->section_end_code = (1<<state->bits) - 1;
259 	state->oldcode = -1;
260 	for (code = 255; code >= 0; code--) {
261 		state->prefix[code] = 0;
262 		state->suffix[code] = code;
263 	}
264 	next_code(self);
265 
266 	return (ARCHIVE_OK);
267 }
268 
269 /*
270  * Return a block of data from the decompression buffer.  Decompress more
271  * as necessary.
272  */
273 static ssize_t
compress_filter_read(struct archive_read_filter * self,const void ** pblock)274 compress_filter_read(struct archive_read_filter *self, const void **pblock)
275 {
276 	struct private_data *state;
277 	unsigned char *p, *start, *end;
278 	int ret;
279 
280 	state = (struct private_data *)self->data;
281 	if (state->end_of_stream) {
282 		*pblock = NULL;
283 		return (0);
284 	}
285 	p = start = (unsigned char *)state->out_block;
286 	end = start + state->out_block_size;
287 
288 	while (p < end && !state->end_of_stream) {
289 		if (state->stackp > state->stack) {
290 			*p++ = *--state->stackp;
291 		} else {
292 			ret = next_code(self);
293 			if (ret == -1)
294 				state->end_of_stream = ret;
295 			else if (ret != ARCHIVE_OK)
296 				return (ret);
297 		}
298 	}
299 
300 	*pblock = start;
301 	return (p - start);
302 }
303 
304 /*
305  * Close and release the filter.
306  */
307 static int
compress_filter_close(struct archive_read_filter * self)308 compress_filter_close(struct archive_read_filter *self)
309 {
310 	struct private_data *state = (struct private_data *)self->data;
311 
312 	free(state->out_block);
313 	free(state);
314 	return (ARCHIVE_OK);
315 }
316 
317 /*
318  * Process the next code and fill the stack with the expansion
319  * of the code.  Returns ARCHIVE_FATAL if there is a fatal I/O or
320  * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise.
321  */
322 static int
next_code(struct archive_read_filter * self)323 next_code(struct archive_read_filter *self)
324 {
325 	struct private_data *state = (struct private_data *)self->data;
326 	int code, newcode;
327 
328 	static int debug_buff[1024];
329 	static unsigned debug_index;
330 
331 	code = newcode = getbits(self, state->bits);
332 	if (code < 0)
333 		return (code);
334 
335 	debug_buff[debug_index++] = code;
336 	if (debug_index >= sizeof(debug_buff)/sizeof(debug_buff[0]))
337 		debug_index = 0;
338 
339 	/* If it's a reset code, reset the dictionary. */
340 	if ((code == 256) && state->use_reset_code) {
341 		/*
342 		 * The original 'compress' implementation blocked its
343 		 * I/O in a manner that resulted in junk bytes being
344 		 * inserted after every reset.  The next section skips
345 		 * this junk.  (Yes, the number of *bytes* to skip is
346 		 * a function of the current *bit* length.)
347 		 */
348 		int skip_bytes =  state->bits -
349 		    (state->bytes_in_section % state->bits);
350 		skip_bytes %= state->bits;
351 		state->bits_avail = 0; /* Discard rest of this byte. */
352 		while (skip_bytes-- > 0) {
353 			code = getbits(self, 8);
354 			if (code < 0)
355 				return (code);
356 		}
357 		/* Now, actually do the reset. */
358 		state->bytes_in_section = 0;
359 		state->bits = 9;
360 		state->section_end_code = (1 << state->bits) - 1;
361 		state->free_ent = 257;
362 		state->oldcode = -1;
363 		return (next_code(self));
364 	}
365 
366 	if (code > state->free_ent
367 	    || (code == state->free_ent && state->oldcode < 0)) {
368 		/* An invalid code is a fatal error. */
369 		archive_set_error(&(self->archive->archive), -1,
370 		    "Invalid compressed data");
371 		return (ARCHIVE_FATAL);
372 	}
373 
374 	/* Special case for KwKwK string. */
375 	if (code >= state->free_ent) {
376 		*state->stackp++ = state->finbyte;
377 		code = state->oldcode;
378 	}
379 
380 	/* Generate output characters in reverse order. */
381 	while (code >= 256) {
382 		*state->stackp++ = state->suffix[code];
383 		code = state->prefix[code];
384 	}
385 	*state->stackp++ = state->finbyte = code;
386 
387 	/* Generate the new entry. */
388 	code = state->free_ent;
389 	if (code < state->maxcode && state->oldcode >= 0) {
390 		state->prefix[code] = state->oldcode;
391 		state->suffix[code] = state->finbyte;
392 		++state->free_ent;
393 	}
394 	if (state->free_ent > state->section_end_code) {
395 		state->bits++;
396 		state->bytes_in_section = 0;
397 		if (state->bits == state->maxcode_bits)
398 			state->section_end_code = state->maxcode;
399 		else
400 			state->section_end_code = (1 << state->bits) - 1;
401 	}
402 
403 	/* Remember previous code. */
404 	state->oldcode = newcode;
405 	return (ARCHIVE_OK);
406 }
407 
408 /*
409  * Return next 'n' bits from stream.
410  *
411  * -1 indicates end of available data.
412  */
413 static int
getbits(struct archive_read_filter * self,int n)414 getbits(struct archive_read_filter *self, int n)
415 {
416 	struct private_data *state = (struct private_data *)self->data;
417 	int code;
418 	ssize_t ret;
419 	static const int mask[] = {
420 		0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
421 		0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff
422 	};
423 
424 	while (state->bits_avail < n) {
425 		if (state->avail_in <= 0) {
426 			if (state->consume_unnotified) {
427 				__archive_read_filter_consume(self->upstream,
428 					state->consume_unnotified);
429 				state->consume_unnotified = 0;
430 			}
431 			state->next_in
432 			    = __archive_read_filter_ahead(self->upstream,
433 				1, &ret);
434 			if (ret == 0)
435 				return (-1);
436 			if (ret < 0 || state->next_in == NULL)
437 				return (ARCHIVE_FATAL);
438 			state->consume_unnotified = state->avail_in = ret;
439 		}
440 		state->bit_buffer |= *state->next_in++ << state->bits_avail;
441 		state->avail_in--;
442 		state->bits_avail += 8;
443 		state->bytes_in_section++;
444 	}
445 
446 	code = state->bit_buffer;
447 	state->bit_buffer >>= n;
448 	state->bits_avail -= n;
449 
450 	return (code & mask[n]);
451 }
452