xref: /openbsd/usr.bin/compress/zopen.c (revision 3d8817e4)
1 /*	$OpenBSD: zopen.c,v 1.17 2005/08/25 17:07:56 millert Exp $	*/
2 /*	$NetBSD: zopen.c,v 1.5 1995/03/26 09:44:53 glass Exp $	*/
3 
4 /*-
5  * Copyright (c) 1985, 1986, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Diomidis Spinellis and James A. Woods, derived from original
10  * work by Spencer Thomas and Joseph Orost.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	From: @(#)zopen.c	8.1 (Berkeley) 6/27/93
37  */
38 
39 #if 0
40 static char sccsid[] = "@(#)zopen.c	8.1 (Berkeley) 6/27/93";
41 #else
42 const char z_rcsid[] =
43 	"$OpenBSD: zopen.c,v 1.17 2005/08/25 17:07:56 millert Exp $";
44 #endif
45 
46 /*-
47  * fcompress.c - File compression ala IEEE Computer, June 1984.
48  *
49  * Compress authors:
50  *		Spencer W. Thomas	(decvax!utah-cs!thomas)
51  *		Jim McKie		(decvax!mcvax!jim)
52  *		Steve Davies		(decvax!vax135!petsd!peora!srd)
53  *		Ken Turkowski		(decvax!decwrl!turtlevax!ken)
54  *		James A. Woods		(decvax!ihnp4!ames!jaw)
55  *		Joe Orost		(decvax!vax135!petsd!joe)
56  *
57  * Cleaned up and converted to library returning I/O streams by
58  * Diomidis Spinellis <dds@doc.ic.ac.uk>.
59  *
60  * zopen(filename, mode, bits)
61  *	Returns a FILE * that can be used for read or write.  The modes
62  *	supported are only "r" and "w".  Seeking is not allowed.  On
63  *	reading the file is decompressed, on writing it is compressed.
64  *	The output is compatible with compress(1) with 16 bit tables.
65  *	Any file produced by compress(1) can be read.
66  */
67 
68 #include <sys/param.h>
69 #include <sys/stat.h>
70 
71 #include <ctype.h>
72 #include <errno.h>
73 #include <signal.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <unistd.h>
78 #include <fcntl.h>
79 #include "compress.h"
80 
81 #define	BITS		16		/* Default bits. */
82 #define	HSIZE		69001		/* 95% occupancy */
83 #define	ZBUFSIZ		8192		/* I/O buffer size */
84 
85 /* A code_int must be able to hold 2**BITS values of type int, and also -1. */
86 typedef long code_int;
87 typedef long count_int;
88 
89 static const u_char z_magic[] =
90 	{'\037', '\235'};		/* 1F 9D */
91 
92 #define	BIT_MASK	0x1f		/* Defines for third byte of header. */
93 #define	BLOCK_MASK	0x80
94 
95 /*
96  * Masks 0x40 and 0x20 are free.  I think 0x20 should mean that there is
97  * a fourth header byte (for expansion).
98  */
99 #define	INIT_BITS 9			/* Initial number of bits/code. */
100 
101 #define	MAXCODE(n_bits)	((1 << (n_bits)) - 1)
102 
103 struct s_zstate {
104 	int zs_fd;			/* File stream for I/O */
105 	char zs_mode;			/* r or w */
106 	enum {
107 		S_START, S_MAGIC, S_MIDDLE, S_EOF
108 	} zs_state;			/* State of computation */
109 	int zs_n_bits;			/* Number of bits/code. */
110 	int zs_maxbits;			/* User settable max # bits/code. */
111 	code_int zs_maxcode;		/* Maximum code, given n_bits. */
112 	code_int zs_maxmaxcode;		/* Should NEVER generate this code. */
113 	count_int zs_htab[HSIZE];
114 	u_short zs_codetab[HSIZE];
115 	code_int zs_hsize;		/* For dynamic table sizing. */
116 	code_int zs_free_ent;		/* First unused entry. */
117 	/*
118 	 * Block compression parameters -- after all codes are used up,
119 	 * and compression rate changes, start over.
120 	 */
121 	int zs_block_compress;
122 	int zs_clear_flg;
123 	long zs_ratio;
124 	count_int zs_checkpoint;
125 	long zs_in_count;		/* Length of input. */
126 	long zs_bytes_out;		/* Length of output. */
127 	long zs_out_count;		/* # of codes output (for debugging).*/
128 	u_char zs_buf[ZBUFSIZ];		/* I/O buffer */
129 	u_char *zs_bp;			/* Current I/O window in the zs_buf */
130 	int zs_offset;			/* Number of bits in the zs_buf */
131 	union {
132 		struct {
133 			long zs_fcode;
134 			code_int zs_ent;
135 			code_int zs_hsize_reg;
136 			int zs_hshift;
137 		} w;			/* Write parameters */
138 		struct {
139 			u_char *zs_stackp, *zs_ebp;
140 			int zs_finchar;
141 			code_int zs_code, zs_oldcode, zs_incode;
142 			int zs_size;
143 		} r;			/* Read parameters */
144 	} u;
145 };
146 
147 /* Definitions to retain old variable names */
148 #define zs_fcode	u.w.zs_fcode
149 #define zs_ent		u.w.zs_ent
150 #define zs_hsize_reg	u.w.zs_hsize_reg
151 #define zs_hshift	u.w.zs_hshift
152 #define zs_stackp	u.r.zs_stackp
153 #define zs_finchar	u.r.zs_finchar
154 #define zs_code		u.r.zs_code
155 #define zs_oldcode	u.r.zs_oldcode
156 #define zs_incode	u.r.zs_incode
157 #define zs_size		u.r.zs_size
158 #define zs_ebp		u.r.zs_ebp
159 
160 /*
161  * To save much memory, we overlay the table used by compress() with those
162  * used by decompress().  The tab_prefix table is the same size and type as
163  * the codetab.  The tab_suffix table needs 2**BITS characters.  We get this
164  * from the beginning of htab.  The output stack uses the rest of htab, and
165  * contains characters.  There is plenty of room for any possible stack
166  * (stack used to be 8000 characters).
167  */
168 
169 #define	htabof(i)	zs->zs_htab[i]
170 #define	codetabof(i)	zs->zs_codetab[i]
171 
172 #define	tab_prefixof(i)	codetabof(i)
173 #define	tab_suffixof(i)	((u_char *)(zs->zs_htab))[i]
174 #define	de_stack	((u_char *)&tab_suffixof(1 << BITS))
175 
176 #define	CHECK_GAP 10000		/* Ratio check interval. */
177 
178 /*
179  * the next two codes should not be changed lightly, as they must not
180  * lie within the contiguous general code space.
181  */
182 #define	FIRST	257		/* First free entry. */
183 #define	CLEAR	256		/* Table clear output code. */
184 
185 static int	cl_block(struct s_zstate *);
186 static void	cl_hash(struct s_zstate *, count_int);
187 static code_int	getcode(struct s_zstate *);
188 static int	output(struct s_zstate *, code_int);
189 
190 /*-
191  * Algorithm from "A Technique for High Performance Data Compression",
192  * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
193  *
194  * Algorithm:
195  *	Modified Lempel-Ziv method (LZW).  Basically finds common
196  * substrings and replaces them with a variable size code.  This is
197  * deterministic, and can be done on the fly.  Thus, the decompression
198  * procedure needs no input table, but tracks the way the table was built.
199  */
200 
201 /*-
202  * compress write
203  *
204  * Algorithm:  use open addressing double hashing (no chaining) on the
205  * prefix code / next character combination.  We do a variant of Knuth's
206  * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
207  * secondary probe.  Here, the modular division first probe is gives way
208  * to a faster exclusive-or manipulation.  Also do block compression with
209  * an adaptive reset, whereby the code table is cleared when the compression
210  * ratio decreases, but after the table fills.  The variable-length output
211  * codes are re-sized at this point, and a special CLEAR code is generated
212  * for the decompressor.  Late addition:  construct the table according to
213  * file size for noticeable speed improvement on small files.  Please direct
214  * questions about this implementation to ames!jaw.
215  */
216 int
217 zwrite(void *cookie, const char *wbp, int num)
218 {
219 	code_int i;
220 	int c, disp;
221 	struct s_zstate *zs;
222 	const u_char *bp;
223 	u_char tmp;
224 	int count;
225 
226 	zs = cookie;
227 	count = num;
228 	bp = (u_char *)wbp;
229 	switch (zs->zs_state) {
230 	case S_MAGIC:
231 		return -1;
232 	case S_EOF:
233 		return 0;
234 	case S_START:
235 		zs->zs_state = S_MIDDLE;
236 
237 		zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
238 		if (write(zs->zs_fd, z_magic, sizeof(z_magic)) !=
239 		    sizeof(z_magic))
240 			return (-1);
241 		tmp = (u_char)(zs->zs_maxbits | zs->zs_block_compress);
242 		if (write(zs->zs_fd, &tmp, sizeof(tmp)) != sizeof(tmp))
243 			return (-1);
244 
245 		zs->zs_bp = zs->zs_buf;
246 		zs->zs_offset = 0;
247 		zs->zs_bytes_out = 3;	/* Includes 3-byte header mojo. */
248 		zs->zs_out_count = 0;
249 		zs->zs_clear_flg = 0;
250 		zs->zs_ratio = 0;
251 		zs->zs_in_count = 1;
252 		zs->zs_checkpoint = CHECK_GAP;
253 		zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
254 		zs->zs_free_ent = ((zs->zs_block_compress) ? FIRST : 256);
255 
256 		zs->zs_ent = *bp++;
257 		--count;
258 
259 		zs->zs_hshift = 0;
260 		for (zs->zs_fcode = (long)zs->zs_hsize; zs->zs_fcode < 65536L;
261 		    zs->zs_fcode *= 2L)
262 			zs->zs_hshift++;
263 		/* Set hash code range bound. */
264 		zs->zs_hshift = 8 - zs->zs_hshift;
265 
266 		zs->zs_hsize_reg = zs->zs_hsize;
267 		/* Clear hash table. */
268 		cl_hash(zs, (count_int)zs->zs_hsize_reg);
269 
270 	case S_MIDDLE:
271 		for (i = 0; count-- > 0;) {
272 			c = *bp++;
273 			zs->zs_in_count++;
274 			zs->zs_fcode = (long)(((long)c << zs->zs_maxbits) +
275 			    zs->zs_ent);
276 			/* Xor hashing. */
277 			i = ((c << zs->zs_hshift) ^ zs->zs_ent);
278 
279 			if (htabof(i) == zs->zs_fcode) {
280 				zs->zs_ent = codetabof(i);
281 				continue;
282 			} else if ((long)htabof(i) < 0)	/* Empty slot. */
283 				goto nomatch;
284 			/* Secondary hash (after G. Knott). */
285 			disp = zs->zs_hsize_reg - i;
286 			if (i == 0)
287 			disp = 1;
288 probe:			if ((i -= disp) < 0)
289 				i += zs->zs_hsize_reg;
290 
291 			if (htabof(i) == zs->zs_fcode) {
292 				zs->zs_ent = codetabof(i);
293 				continue;
294 			}
295 			if ((long)htabof(i) >= 0)
296 				goto probe;
297 nomatch:		if (output(zs, (code_int) zs->zs_ent) == -1)
298 				return (-1);
299 			zs->zs_out_count++;
300 			zs->zs_ent = c;
301 			if (zs->zs_free_ent < zs->zs_maxmaxcode) {
302 				/* code -> hashtable */
303 				codetabof(i) = zs->zs_free_ent++;
304 				htabof(i) = zs->zs_fcode;
305 			} else if ((count_int)zs->zs_in_count >=
306 			    zs->zs_checkpoint && zs->zs_block_compress) {
307 				if (cl_block(zs) == -1)
308 					return (-1);
309 			}
310 		}
311 	}
312 	return (num);
313 }
314 
315 int
316 z_close(void *cookie, struct z_info *info, const char *name, struct stat *sb)
317 {
318 	struct s_zstate *zs;
319 	int rval;
320 
321 	zs = cookie;
322 	if (zs->zs_mode == 'w') {		/* Put out the final code. */
323 		if (output(zs, (code_int) zs->zs_ent) == -1) {
324 			(void)close(zs->zs_fd);
325 			free(zs);
326 			return (-1);
327 		}
328 		zs->zs_out_count++;
329 		if (output(zs, (code_int) - 1) == -1) {
330 			(void)close(zs->zs_fd);
331 			free(zs);
332 			return (-1);
333 		}
334 	}
335 
336 	if (info != NULL) {
337 		info->mtime = 0;
338 		info->crc = (u_int32_t)-1;
339 		info->hlen = 0;
340 		info->total_in = (off_t)zs->zs_in_count;
341 		info->total_out = (off_t)zs->zs_bytes_out;
342 	}
343 
344 #ifndef SAVECORE
345 	setfile(name, zs->zs_fd, sb);
346 #endif
347 	rval = close(zs->zs_fd);
348 	free(zs);
349 	return (rval);
350 }
351 
352 static int
353 zclose(void *cookie)
354 {
355 	return z_close(cookie, NULL, NULL, NULL);
356 }
357 
358 /*-
359  * Output the given code.
360  * Inputs:
361  *	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
362  *		that n_bits =< (long)wordsize - 1.
363  * Outputs:
364  *	Outputs code to the file.
365  * Assumptions:
366  *	Chars are 8 bits long.
367  * Algorithm:
368  *	Maintain a BITS character long buffer (so that 8 codes will
369  * fit in it exactly).  Use the VAX insv instruction to insert each
370  * code in turn.  When the buffer fills up empty it and start over.
371  */
372 
373 static const u_char lmask[9] =
374 	{0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
375 static const u_char rmask[9] =
376 	{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
377 
378 static int
379 output(struct s_zstate *zs, code_int ocode)
380 {
381 	int bits;
382 
383 	if (ocode >= 0) {
384 		int r_off;
385 		u_char *bp;
386 
387 		/* Get to the first byte. */
388 		bp = zs->zs_bp + (zs->zs_offset >> 3);
389 		r_off = zs->zs_offset & 7;
390 		bits = zs->zs_n_bits;
391 
392 		/*
393 		 * Since ocode is always >= 8 bits, only need to mask the first
394 		 * hunk on the left.
395 		 */
396 		*bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]);
397 		bp++;
398 		bits -= (8 - r_off);
399 		ocode >>= 8 - r_off;
400 		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits) */
401 		if (bits >= 8) {
402 			*bp++ = ocode;
403 			ocode >>= 8;
404 			bits -= 8;
405 		}
406 		/* Last bits. */
407 		if (bits)
408 			*bp = ocode;
409 		zs->zs_offset += zs->zs_n_bits;
410 		if (zs->zs_offset == (zs->zs_n_bits << 3)) {
411 			zs->zs_bp += zs->zs_n_bits;
412 			zs->zs_offset = 0;
413 		}
414 		/*
415 		 * If the next entry is going to be too big for the ocode size,
416 		 * then increase it, if possible.
417 		 */
418 		if (zs->zs_free_ent > zs->zs_maxcode ||
419 		    (zs->zs_clear_flg > 0)) {
420 			/*
421 			 * Write the whole buffer, because the input side won't
422 			 * discover the size increase until after it has read it
423 			 */
424 			if (zs->zs_offset > 0) {
425 				zs->zs_bp += zs->zs_n_bits;
426 				zs->zs_offset = 0;
427 			}
428 
429 			if (zs->zs_clear_flg) {
430 				zs->zs_maxcode =
431 					MAXCODE(zs->zs_n_bits = INIT_BITS);
432 				zs->zs_clear_flg = 0;
433 			} else {
434 				zs->zs_n_bits++;
435 				if (zs->zs_n_bits == zs->zs_maxbits)
436 					zs->zs_maxcode = zs->zs_maxmaxcode;
437 				else
438 					zs->zs_maxcode =
439 					    MAXCODE(zs->zs_n_bits);
440 			}
441 		}
442 
443 		if (zs->zs_bp + zs->zs_n_bits > &zs->zs_buf[ZBUFSIZ]) {
444 			bits = zs->zs_bp - zs->zs_buf;
445 			if (write(zs->zs_fd, zs->zs_buf, bits) != bits)
446 				return (-1);
447 			zs->zs_bytes_out += bits;
448 			if (zs->zs_offset > 0)
449 				fprintf (stderr, "zs_offset != 0\n");
450 			zs->zs_bp = zs->zs_buf;
451 		}
452 	} else {
453 		/* At EOF, write the rest of the buffer. */
454 		if (zs->zs_offset > 0)
455 			zs->zs_bp += (zs->zs_offset + 7) / 8;
456 		if (zs->zs_bp > zs->zs_buf) {
457 			bits = zs->zs_bp - zs->zs_buf;
458 			if (write(zs->zs_fd, zs->zs_buf, bits) != bits)
459 				return (-1);
460 			zs->zs_bytes_out += bits;
461 		}
462 		zs->zs_offset = 0;
463 		zs->zs_bp = zs->zs_buf;
464 	}
465 	return (0);
466 }
467 
468 /*
469  * Decompress read.  This routine adapts to the codes in the file building
470  * the "string" table on-the-fly; requiring no table to be stored in the
471  * compressed file.  The tables used herein are shared with those of the
472  * compress() routine.  See the definitions above.
473  */
474 int
475 zread(void *cookie, char *rbp, int num)
476 {
477 	u_int count;
478 	struct s_zstate *zs;
479 	u_char *bp, header[3];
480 
481 	if (num == 0)
482 		return (0);
483 
484 	zs = cookie;
485 	count = num;
486 	bp = (u_char *)rbp;
487 	switch (zs->zs_state) {
488 	case S_START:
489 		zs->zs_state = S_MIDDLE;
490 		zs->zs_bp = zs->zs_buf;
491 		header[0] = header[1] = header[2] = '\0';
492 		read(zs->zs_fd, header, sizeof(header));
493 		break;
494 	case S_MAGIC:
495 		zs->zs_state = S_MIDDLE;
496 		zs->zs_bp = zs->zs_buf;
497 		header[0] = z_magic[0];
498 		header[1] = z_magic[1];
499 		header[2] = '\0';
500 		read(zs->zs_fd, &header[2], 1);
501 		break;
502 	case S_MIDDLE:
503 		goto middle;
504 	case S_EOF:
505 		goto eof;
506 	}
507 
508 	/* Check the magic number */
509 	if (header[0] != z_magic[0] || header[1] != z_magic[1]) {
510 		errno = EFTYPE;
511 		return (-1);
512 	}
513 	zs->zs_maxbits = header[2];	/* Set -b from file. */
514 	zs->zs_in_count += sizeof(header);
515 	zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
516 	zs->zs_maxbits &= BIT_MASK;
517 	zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
518 	if (zs->zs_maxbits > BITS) {
519 		errno = EFTYPE;
520 		return (-1);
521 	}
522 	/* As above, initialize the first 256 entries in the table. */
523 	zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
524 	for (zs->zs_code = 255; zs->zs_code >= 0; zs->zs_code--) {
525 		tab_prefixof(zs->zs_code) = 0;
526 		tab_suffixof(zs->zs_code) = (u_char) zs->zs_code;
527 	}
528 	zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
529 
530 	zs->zs_finchar = zs->zs_oldcode = getcode(zs);
531 	if (zs->zs_oldcode == -1)	/* EOF already? */
532 		return (0);	/* Get out of here */
533 
534 	/* First code must be 8 bits = char. */
535 	*bp++ = (u_char)zs->zs_finchar;
536 	count--;
537 	zs->zs_stackp = de_stack;
538 
539 	while ((zs->zs_code = getcode(zs)) > -1) {
540 
541 		if ((zs->zs_code == CLEAR) && zs->zs_block_compress) {
542 			for (zs->zs_code = 255; zs->zs_code >= 0;
543 			    zs->zs_code--)
544 				tab_prefixof(zs->zs_code) = 0;
545 			zs->zs_clear_flg = 1;
546 			zs->zs_free_ent = FIRST - 1;
547 			if ((zs->zs_code = getcode(zs)) == -1)	/* O, untimely death! */
548 				break;
549 		}
550 		zs->zs_incode = zs->zs_code;
551 
552 		/* Special case for KwKwK string. */
553 		if (zs->zs_code >= zs->zs_free_ent) {
554 			*zs->zs_stackp++ = zs->zs_finchar;
555 			zs->zs_code = zs->zs_oldcode;
556 		}
557 
558 		/* Generate output characters in reverse order. */
559 		while (zs->zs_code >= 256) {
560 			/*
561 			 * Bad input file may cause zs_stackp to overflow
562 			 * zs_htab; check here and abort decompression,
563 			 * that's better than dumping core.
564 			 */
565 			if (zs->zs_stackp >= (u_char *)&zs->zs_htab[HSIZE]) {
566 				errno = EINVAL;
567 				return (-1);
568 			}
569 			*zs->zs_stackp++ = tab_suffixof(zs->zs_code);
570 			zs->zs_code = tab_prefixof(zs->zs_code);
571 		}
572 		*zs->zs_stackp++ = zs->zs_finchar = tab_suffixof(zs->zs_code);
573 
574 		/* And put them out in forward order.  */
575 middle:		do {
576 			if (count-- == 0) {
577 				zs->zs_bytes_out += num;
578 				return (num);
579 			}
580 			*bp++ = *--zs->zs_stackp;
581 		} while (zs->zs_stackp > de_stack);
582 
583 		/* Generate the new entry. */
584 		if ((zs->zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode) {
585 			tab_prefixof(zs->zs_code) = (u_short) zs->zs_oldcode;
586 			tab_suffixof(zs->zs_code) = zs->zs_finchar;
587 			zs->zs_free_ent = zs->zs_code + 1;
588 		}
589 
590 		/* Remember previous code. */
591 		zs->zs_oldcode = zs->zs_incode;
592 	}
593 	zs->zs_state = S_EOF;
594 	zs->zs_bytes_out += num - count;
595 eof:	return (num - count);
596 }
597 
598 /*-
599  * Read one code from the standard input.  If EOF, return -1.
600  * Inputs:
601  *	stdin
602  * Outputs:
603  *	code or -1 is returned.
604  */
605 static code_int
606 getcode(struct s_zstate *zs)
607 {
608 	code_int gcode;
609 	int r_off, bits;
610 	u_char *bp;
611 
612 	if (zs->zs_clear_flg > 0 || zs->zs_offset >= zs->zs_size ||
613 	    zs->zs_free_ent > zs->zs_maxcode) {
614 
615 		zs->zs_bp += zs->zs_n_bits;
616 		/*
617 		 * If the next entry will be too big for the current gcode
618 		 * size, then we must increase the size.  This implies reading
619 		 * a new buffer full, too.
620 		 */
621 		if (zs->zs_free_ent > zs->zs_maxcode) {
622 			zs->zs_n_bits++;
623 			if (zs->zs_n_bits == zs->zs_maxbits) {
624 				/* Won't get any bigger now. */
625 				zs->zs_maxcode = zs->zs_maxmaxcode;
626 			} else
627 				zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
628 		}
629 		if (zs->zs_clear_flg > 0) {
630 			zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
631 			zs->zs_clear_flg = 0;
632 		}
633 
634 		/* fill the buffer up to the neck */
635 		if (zs->zs_bp + zs->zs_n_bits > zs->zs_ebp) {
636 			for (bp = zs->zs_buf; zs->zs_bp < zs->zs_ebp;
637 				*bp++ = *zs->zs_bp++);
638 			if ((bits = read(zs->zs_fd, bp, ZBUFSIZ -
639 			    (bp - zs->zs_buf))) < 0)
640 				return -1;
641 			zs->zs_in_count += bits;
642 			zs->zs_bp = zs->zs_buf;
643 			zs->zs_ebp = bp + bits;
644 		}
645 		zs->zs_offset = 0;
646 		zs->zs_size = MIN(zs->zs_n_bits, zs->zs_ebp - zs->zs_bp);
647 		if (zs->zs_size == 0)
648 			return -1;
649 		/* Round size down to integral number of codes. */
650 		zs->zs_size = (zs->zs_size << 3) - (zs->zs_n_bits - 1);
651 	}
652 
653 	bp = zs->zs_bp;
654 	r_off = zs->zs_offset;
655 	bits = zs->zs_n_bits;
656 
657 	/* Get to the first byte. */
658 	bp += (r_off >> 3);
659 	r_off &= 7;
660 
661 	/* Get first part (low order bits). */
662 	gcode = (*bp++ >> r_off);
663 	bits -= (8 - r_off);
664 	r_off = 8 - r_off;	/* Now, roffset into gcode word. */
665 
666 	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
667 	if (bits >= 8) {
668 		gcode |= *bp++ << r_off;
669 		r_off += 8;
670 		bits -= 8;
671 	}
672 
673 	/* High order bits. */
674 	gcode |= (*bp & rmask[bits]) << r_off;
675 	zs->zs_offset += zs->zs_n_bits;
676 
677 	return (gcode);
678 }
679 
680 /* Table clear for block compress. */
681 static int
682 cl_block(struct s_zstate *zs)
683 {
684 	long rat;
685 
686 	zs->zs_checkpoint = zs->zs_in_count + CHECK_GAP;
687 
688 	if (zs->zs_in_count > 0x007fffff) {	/* Shift will overflow. */
689 		rat = zs->zs_bytes_out >> 8;
690 		if (rat == 0)		/* Don't divide by zero. */
691 			rat = 0x7fffffff;
692 		else
693 			rat = zs->zs_in_count / rat;
694 	} else {
695 		/* 8 fractional bits. */
696 		rat = (zs->zs_in_count << 8) / zs->zs_bytes_out;
697 	}
698 	if (rat > zs->zs_ratio)
699 		zs->zs_ratio = rat;
700 	else {
701 		zs->zs_ratio = 0;
702 		cl_hash(zs, (count_int) zs->zs_hsize);
703 		zs->zs_free_ent = FIRST;
704 		zs->zs_clear_flg = 1;
705 		if (output(zs, (code_int) CLEAR) == -1)
706 			return (-1);
707 	}
708 	return (0);
709 }
710 
711 /* Reset code table. */
712 static void
713 cl_hash(struct s_zstate *zs, count_int cl_hsize)
714 {
715 	count_int *htab_p;
716 	long i, m1;
717 
718 	m1 = -1;
719 	htab_p = zs->zs_htab + cl_hsize;
720 	i = cl_hsize - 16;
721 	do {			/* Might use Sys V memset(3) here. */
722 		*(htab_p - 16) = m1;
723 		*(htab_p - 15) = m1;
724 		*(htab_p - 14) = m1;
725 		*(htab_p - 13) = m1;
726 		*(htab_p - 12) = m1;
727 		*(htab_p - 11) = m1;
728 		*(htab_p - 10) = m1;
729 		*(htab_p - 9) = m1;
730 		*(htab_p - 8) = m1;
731 		*(htab_p - 7) = m1;
732 		*(htab_p - 6) = m1;
733 		*(htab_p - 5) = m1;
734 		*(htab_p - 4) = m1;
735 		*(htab_p - 3) = m1;
736 		*(htab_p - 2) = m1;
737 		*(htab_p - 1) = m1;
738 		htab_p -= 16;
739 	} while ((i -= 16) >= 0);
740 	for (i += 16; i > 0; i--)
741 		*--htab_p = m1;
742 }
743 
744 FILE *
745 zopen(const char *name, const char *mode, int bits)
746 {
747 	int fd;
748 	void *cookie;
749 	if ((fd = open(name, (*mode=='r'? O_RDONLY:O_WRONLY|O_CREAT),
750 	    S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) == -1)
751 		return NULL;
752 	if ((cookie = z_open(fd, mode, NULL, bits, 0, 0)) == NULL) {
753 		close(fd);
754 		return NULL;
755 	}
756 	return funopen(cookie, (*mode == 'r'?zread:NULL),
757 	    (*mode == 'w'?zwrite:NULL), NULL, zclose);
758 }
759 
760 void *
761 z_open(int fd, const char *mode, char *name, int bits,
762     u_int32_t mtime, int gotmagic)
763 {
764 	struct s_zstate *zs;
765 
766 	if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' ||
767 	    bits < 0 || bits > BITS) {
768 		errno = EINVAL;
769 		return (NULL);
770 	}
771 
772 	if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
773 		return (NULL);
774 
775 	/* User settable max # bits/code. */
776 	zs->zs_maxbits = bits ? bits : BITS;
777 	/* Should NEVER generate this code. */
778 	zs->zs_maxmaxcode = 1 << zs->zs_maxbits;
779 	zs->zs_hsize = HSIZE;		/* For dynamic table sizing. */
780 	zs->zs_free_ent = 0;		/* First unused entry. */
781 	zs->zs_block_compress = BLOCK_MASK;
782 	zs->zs_clear_flg = 0;
783 	zs->zs_ratio = 0;
784 	zs->zs_checkpoint = CHECK_GAP;
785 	zs->zs_in_count = 0;		/* Length of input. */
786 	zs->zs_out_count = 0;		/* # of codes output (for debugging).*/
787 	zs->zs_state = gotmagic ? S_MAGIC : S_START;
788 	zs->zs_offset = 0;
789 	zs->zs_size = 0;
790 	zs->zs_mode = mode[0];
791 	zs->zs_bp = zs->zs_ebp = zs->zs_buf;
792 
793 	zs->zs_fd = fd;
794 	return zs;
795 }
796