xref: /openbsd/usr.bin/compress/zopen.c (revision fd84ef7e)
1 /*	$OpenBSD: zopen.c,v 1.7 2001/11/19 19:02:13 mpech Exp $	*/
2 /*	$NetBSD: zopen.c,v 1.5 1995/03/26 09:44:53 glass Exp $	*/
3 
4 /*-
5  * Copyright (c) 1985, 1986, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Diomidis Spinellis and James A. Woods, derived from original
10  * work by Spencer Thomas and Joseph Orost.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by the University of
23  *	California, Berkeley and its contributors.
24  * 4. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  */
40 
41 #if defined(LIBC_SCCS) && !defined(lint)
42 #if 0
43 static char sccsid[] = "@(#)zopen.c	8.1 (Berkeley) 6/27/93";
44 #else
45 static char rcsid[] = "$OpenBSD: zopen.c,v 1.7 2001/11/19 19:02:13 mpech Exp $";
46 #endif
47 #endif /* LIBC_SCCS and not lint */
48 
49 /*-
50  * fcompress.c - File compression ala IEEE Computer, June 1984.
51  *
52  * Compress authors:
53  *		Spencer W. Thomas	(decvax!utah-cs!thomas)
54  *		Jim McKie		(decvax!mcvax!jim)
55  *		Steve Davies		(decvax!vax135!petsd!peora!srd)
56  *		Ken Turkowski		(decvax!decwrl!turtlevax!ken)
57  *		James A. Woods		(decvax!ihnp4!ames!jaw)
58  *		Joe Orost		(decvax!vax135!petsd!joe)
59  *
60  * Cleaned up and converted to library returning I/O streams by
61  * Diomidis Spinellis <dds@doc.ic.ac.uk>.
62  *
63  * zopen(filename, mode, bits)
64  *	Returns a FILE * that can be used for read or write.  The modes
65  *	supported are only "r" and "w".  Seeking is not allowed.  On
66  *	reading the file is decompressed, on writing it is compressed.
67  *	The output is compatible with compress(1) with 16 bit tables.
68  *	Any file produced by compress(1) can be read.
69  */
70 
71 #include <sys/param.h>
72 #include <sys/stat.h>
73 
74 #include <ctype.h>
75 #include <errno.h>
76 #include <signal.h>
77 #include <stdio.h>
78 #include <stdlib.h>
79 #include <string.h>
80 #include <unistd.h>
81 #include <fcntl.h>
82 #include "compress.h"
83 
84 #define	BITS		16		/* Default bits. */
85 #define	HSIZE		69001		/* 95% occupancy */
86 #define	ZBUFSIZ		8192		/* I/O buffer size */
87 
88 /* A code_int must be able to hold 2**BITS values of type int, and also -1. */
89 typedef long code_int;
90 typedef long count_int;
91 
92 static u_char z_magic[] =
93 	{'\037', '\235'};		/* 1F 9D */
94 
95 #define	BIT_MASK	0x1f		/* Defines for third byte of header. */
96 #define	BLOCK_MASK	0x80
97 
98 /*
99  * Masks 0x40 and 0x20 are free.  I think 0x20 should mean that there is
100  * a fourth header byte (for expansion).
101  */
102 #define	INIT_BITS 9			/* Initial number of bits/code. */
103 
104 #define	MAXCODE(n_bits)	((1 << (n_bits)) - 1)
105 
106 struct s_zstate {
107 	int zs_fd;			/* File stream for I/O */
108 	char zs_mode;			/* r or w */
109 	enum {
110 		S_START, S_MIDDLE, S_EOF
111 	} zs_state;			/* State of computation */
112 	int zs_n_bits;			/* Number of bits/code. */
113 	int zs_maxbits;			/* User settable max # bits/code. */
114 	code_int zs_maxcode;		/* Maximum code, given n_bits. */
115 	code_int zs_maxmaxcode;		/* Should NEVER generate this code. */
116 	count_int zs_htab [HSIZE];
117 	u_short zs_codetab [HSIZE];
118 	code_int zs_hsize;		/* For dynamic table sizing. */
119 	code_int zs_free_ent;		/* First unused entry. */
120 	/*
121 	 * Block compression parameters -- after all codes are used up,
122 	 * and compression rate changes, start over.
123 	 */
124 	int zs_block_compress;
125 	int zs_clear_flg;
126 	long zs_ratio;
127 	count_int zs_checkpoint;
128 	long zs_in_count;		/* Length of input. */
129 	long zs_bytes_out;		/* Length of compressed output. */
130 	long zs_out_count;		/* # of codes output (for debugging).*/
131 	u_char zs_buf[ZBUFSIZ];		/* I/O buffer */
132 	u_char *zs_bp;			/* Current I/O window in the zs_buf */
133 	int zs_offset;			/* Number of bits in the zs_buf */
134 	union {
135 		struct {
136 			long zs_fcode;
137 			code_int zs_ent;
138 			code_int zs_hsize_reg;
139 			int zs_hshift;
140 		} w;			/* Write paramenters */
141 		struct {
142 			u_char *zs_stackp, *zs_ebp;
143 			int zs_finchar;
144 			code_int zs_code, zs_oldcode, zs_incode;
145 			int zs_size;
146 		} r;			/* Read parameters */
147 	} u;
148 };
149 
150 /* Definitions to retain old variable names */
151 #define zs_fcode	u.w.zs_fcode
152 #define zs_ent		u.w.zs_ent
153 #define zs_hsize_reg	u.w.zs_hsize_reg
154 #define zs_hshift	u.w.zs_hshift
155 #define zs_stackp	u.r.zs_stackp
156 #define zs_finchar	u.r.zs_finchar
157 #define zs_code		u.r.zs_code
158 #define zs_oldcode	u.r.zs_oldcode
159 #define zs_incode	u.r.zs_incode
160 #define zs_size		u.r.zs_size
161 #define zs_ebp		u.r.zs_ebp
162 
163 /*
164  * To save much memory, we overlay the table used by compress() with those
165  * used by decompress().  The tab_prefix table is the same size and type as
166  * the codetab.  The tab_suffix table needs 2**BITS characters.  We get this
167  * from the beginning of htab.  The output stack uses the rest of htab, and
168  * contains characters.  There is plenty of room for any possible stack
169  * (stack used to be 8000 characters).
170  */
171 
172 #define	htabof(i)	zs->zs_htab[i]
173 #define	codetabof(i)	zs->zs_codetab[i]
174 
175 #define	tab_prefixof(i)	codetabof(i)
176 #define	tab_suffixof(i)	((u_char *)(zs->zs_htab))[i]
177 #define	de_stack	((u_char *)&tab_suffixof(1 << BITS))
178 
179 #define	CHECK_GAP 10000		/* Ratio check interval. */
180 
181 /*
182  * the next two codes should not be changed lightly, as they must not
183  * lie within the contiguous general code space.
184  */
185 #define	FIRST	257		/* First free entry. */
186 #define	CLEAR	256		/* Table clear output code. */
187 
188 static int	cl_block __P((struct s_zstate *));
189 static void	cl_hash __P((struct s_zstate *, register count_int));
190 static code_int	getcode __P((struct s_zstate *));
191 static int	output __P((struct s_zstate *, code_int));
192 
193 /*-
194  * Algorithm from "A Technique for High Performance Data Compression",
195  * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
196  *
197  * Algorithm:
198  * 	Modified Lempel-Ziv method (LZW).  Basically finds common
199  * substrings and replaces them with a variable size code.  This is
200  * deterministic, and can be done on the fly.  Thus, the decompression
201  * procedure needs no input table, but tracks the way the table was built.
202  */
203 
204 /*-
205  * compress write
206  *
207  * Algorithm:  use open addressing double hashing (no chaining) on the
208  * prefix code / next character combination.  We do a variant of Knuth's
209  * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
210  * secondary probe.  Here, the modular division first probe is gives way
211  * to a faster exclusive-or manipulation.  Also do block compression with
212  * an adaptive reset, whereby the code table is cleared when the compression
213  * ratio decreases, but after the table fills.  The variable-length output
214  * codes are re-sized at this point, and a special CLEAR code is generated
215  * for the decompressor.  Late addition:  construct the table according to
216  * file size for noticeable speed improvement on small files.  Please direct
217  * questions about this implementation to ames!jaw.
218  */
219 int
220 zwrite(cookie, wbp, num)
221 	void *cookie;
222 	const char *wbp;
223 	int num;
224 {
225 	code_int i;
226 	int c, disp;
227 	struct s_zstate *zs;
228 	const u_char *bp;
229 	u_char tmp;
230 	int count;
231 
232 	zs = cookie;
233 	count = num;
234 	bp = (u_char *)wbp;
235 	switch (zs->zs_state) {
236 	case S_EOF:
237 		return 0;
238 	case S_START:
239 		zs->zs_state = S_MIDDLE;
240 
241 		zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
242 		if (write(zs->zs_fd, z_magic, sizeof(z_magic)) !=
243 		    sizeof(z_magic))
244 			return (-1);
245 		tmp = (u_char)(zs->zs_maxbits | zs->zs_block_compress);
246 		if (write(zs->zs_fd, &tmp, sizeof(tmp)) != sizeof(tmp))
247 			return (-1);
248 
249 		zs->zs_bp = zs->zs_buf;
250 		zs->zs_offset = 0;
251 		zs->zs_bytes_out = 3;	/* Includes 3-byte header mojo. */
252 		zs->zs_out_count = 0;
253 		zs->zs_clear_flg = 0;
254 		zs->zs_ratio = 0;
255 		zs->zs_in_count = 1;
256 		zs->zs_checkpoint = CHECK_GAP;
257 		zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
258 		zs->zs_free_ent = ((zs->zs_block_compress) ? FIRST : 256);
259 
260 		zs->zs_ent = *bp++;
261 		--count;
262 
263 		zs->zs_hshift = 0;
264 		for (zs->zs_fcode = (long)zs->zs_hsize; zs->zs_fcode < 65536L;
265 		     zs->zs_fcode *= 2L)
266 			zs->zs_hshift++;
267 		/* Set hash code range bound. */
268 		zs->zs_hshift = 8 - zs->zs_hshift;
269 
270 		zs->zs_hsize_reg = zs->zs_hsize;
271 		/* Clear hash table. */
272 		cl_hash(zs, (count_int)zs->zs_hsize_reg);
273 
274 	case S_MIDDLE:
275 		for (i = 0; count-- > 0;) {
276 			c = *bp++;
277 			zs->zs_in_count++;
278 			zs->zs_fcode = (long)(((long)c << zs->zs_maxbits) +
279 					      zs->zs_ent);
280 			/* Xor hashing. */
281 			i = ((c << zs->zs_hshift) ^ zs->zs_ent);
282 
283 			if (htabof(i) == zs->zs_fcode) {
284 				zs->zs_ent = codetabof(i);
285 				continue;
286 			} else if ((long)htabof(i) < 0)	/* Empty slot. */
287 				goto nomatch;
288 			/* Secondary hash (after G. Knott). */
289 			disp = zs->zs_hsize_reg - i;
290 			if (i == 0)
291 			disp = 1;
292 probe:			if ((i -= disp) < 0)
293 				i += zs->zs_hsize_reg;
294 
295 			if (htabof(i) == zs->zs_fcode) {
296 				zs->zs_ent = codetabof(i);
297 				continue;
298 			}
299 			if ((long)htabof(i) >= 0)
300 				goto probe;
301 nomatch:		if (output(zs, (code_int) zs->zs_ent) == -1)
302 				return (-1);
303 			zs->zs_out_count++;
304 			zs->zs_ent = c;
305 			if (zs->zs_free_ent < zs->zs_maxmaxcode) {
306 				/* code -> hashtable */
307 				codetabof(i) = zs->zs_free_ent++;
308 				htabof(i) = zs->zs_fcode;
309 			} else if ((count_int)zs->zs_in_count >=
310 			    zs->zs_checkpoint && zs->zs_block_compress) {
311 				if (cl_block(zs) == -1)
312 					return (-1);
313 			}
314 		}
315 	}
316 	return (num);
317 }
318 
319 int
320 zclose(cookie)
321 	void *cookie;
322 {
323 	struct s_zstate *zs;
324 	int rval;
325 
326 	zs = cookie;
327 	if (zs->zs_mode == 'w') {		/* Put out the final code. */
328 		if (output(zs, (code_int) zs->zs_ent) == -1) {
329 			(void)close(zs->zs_fd);
330 			free(zs);
331 			return (-1);
332 		}
333 		zs->zs_out_count++;
334 		if (output(zs, (code_int) - 1) == -1) {
335 			(void)close(zs->zs_fd);
336 			free(zs);
337 			return (-1);
338 		}
339 	}
340 	rval = close(zs->zs_fd);
341 	free(zs);
342 	return (rval);
343 }
344 
345 /*-
346  * Output the given code.
347  * Inputs:
348  * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
349  *		that n_bits =< (long)wordsize - 1.
350  * Outputs:
351  * 	Outputs code to the file.
352  * Assumptions:
353  *	Chars are 8 bits long.
354  * Algorithm:
355  * 	Maintain a BITS character long buffer (so that 8 codes will
356  * fit in it exactly).  Use the VAX insv instruction to insert each
357  * code in turn.  When the buffer fills up empty it and start over.
358  */
359 
360 static const u_char lmask[9] =
361 	{0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
362 static const u_char rmask[9] =
363 	{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
364 
365 static int
366 output(zs, ocode)
367 	struct s_zstate *zs;
368 	code_int ocode;
369 {
370 	int bits;
371 
372 	if (ocode >= 0) {
373 		int r_off;
374 		u_char *bp;
375 
376 		/* Get to the first byte. */
377 		bp = zs->zs_bp + (zs->zs_offset >> 3);
378 		r_off = zs->zs_offset & 7;
379 		bits = zs->zs_n_bits;
380 
381 		/*
382 		 * Since ocode is always >= 8 bits, only need to mask the first
383 		 * hunk on the left.
384 		 */
385 		*bp = (*bp & rmask[r_off]) | ((ocode << r_off) & lmask[r_off]);
386 		bp++;
387 		bits -= (8 - r_off);
388 		ocode >>= 8 - r_off;
389 		/* Get any 8 bit parts in the middle (<=1 for up to 16 bits) */
390 		if (bits >= 8) {
391 			*bp++ = ocode;
392 			ocode >>= 8;
393 			bits -= 8;
394 		}
395 		/* Last bits. */
396 		if (bits)
397 			*bp = ocode;
398 		zs->zs_offset += zs->zs_n_bits;
399 		if (zs->zs_offset == (zs->zs_n_bits << 3)) {
400 			zs->zs_bp += zs->zs_n_bits;
401 			zs->zs_offset = 0;
402 		}
403 		/*
404 		 * If the next entry is going to be too big for the ocode size,
405 		 * then increase it, if possible.
406 		 */
407 		if (zs->zs_free_ent > zs->zs_maxcode ||
408 		    (zs->zs_clear_flg > 0)) {
409 		       /*
410 			* Write the whole buffer, because the input side won't
411 			* discover the size increase until after it has read it
412 			*/
413 			if (zs->zs_offset > 0) {
414 				zs->zs_bp += zs->zs_n_bits;
415 				zs->zs_offset = 0;
416 			}
417 
418 			if (zs->zs_clear_flg) {
419 				zs->zs_maxcode =
420 					MAXCODE(zs->zs_n_bits = INIT_BITS);
421 				zs->zs_clear_flg = 0;
422 			} else {
423 				zs->zs_n_bits++;
424 				if (zs->zs_n_bits == zs->zs_maxbits)
425 					zs->zs_maxcode = zs->zs_maxmaxcode;
426 				else
427 					zs->zs_maxcode =
428 						MAXCODE(zs->zs_n_bits);
429 			}
430 		}
431 
432 		if (zs->zs_bp + zs->zs_n_bits > &zs->zs_buf[ZBUFSIZ]) {
433 			bits = zs->zs_bp - zs->zs_buf;
434 			if (write(zs->zs_fd, zs->zs_buf, bits) != bits)
435 				return (-1);
436 			zs->zs_bytes_out += bits;
437 			if (zs->zs_offset > 0)
438 				fprintf (stderr, "zs_offset != 0\n");
439 			zs->zs_bp = zs->zs_buf;
440 		}
441 	} else {
442 		/* At EOF, write the rest of the buffer. */
443 		if (zs->zs_offset > 0)
444 			zs->zs_bp += (zs->zs_offset + 7) / 8;
445 		if (zs->zs_bp > zs->zs_buf) {
446 			bits = zs->zs_bp - zs->zs_buf;
447 			if (write(zs->zs_fd, zs->zs_buf, bits) != bits)
448 				return (-1);
449 			zs->zs_bytes_out += bits;
450 		}
451 		zs->zs_offset = 0;
452 		zs->zs_bp = zs->zs_buf;
453 	}
454 	return (0);
455 }
456 
457 /*
458  * Decompress read.  This routine adapts to the codes in the file building
459  * the "string" table on-the-fly; requiring no table to be stored in the
460  * compressed file.  The tables used herein are shared with those of the
461  * compress() routine.  See the definitions above.
462  */
463 int
464 zread(cookie, rbp, num)
465 	void *cookie;
466 	char *rbp;
467 	int num;
468 {
469 	u_int count;
470 	struct s_zstate *zs;
471 	u_char *bp, header[3];
472 
473 	if (num == 0)
474 		return (0);
475 
476 	zs = cookie;
477 	count = num;
478 	bp = (u_char *)rbp;
479 	switch (zs->zs_state) {
480 	case S_START:
481 		zs->zs_state = S_MIDDLE;
482 		zs->zs_bp = zs->zs_buf;
483 		break;
484 	case S_MIDDLE:
485 		goto middle;
486 	case S_EOF:
487 		goto eof;
488 	}
489 
490 	/* Check the magic number */
491 	if (read(zs->zs_fd, header, sizeof(header)) != sizeof(header) ||
492 	    memcmp(header, z_magic, sizeof(z_magic)) != 0) {
493 		errno = EFTYPE;
494 		return (-1);
495 	}
496 	zs->zs_maxbits = header[2];	/* Set -b from file. */
497 	zs->zs_block_compress = zs->zs_maxbits & BLOCK_MASK;
498 	zs->zs_maxbits &= BIT_MASK;
499 	zs->zs_maxmaxcode = 1L << zs->zs_maxbits;
500 	if (zs->zs_maxbits > BITS) {
501 		errno = EFTYPE;
502 		return (-1);
503 	}
504 	/* As above, initialize the first 256 entries in the table. */
505 	zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
506 	for (zs->zs_code = 255; zs->zs_code >= 0; zs->zs_code--) {
507 		tab_prefixof(zs->zs_code) = 0;
508 		tab_suffixof(zs->zs_code) = (u_char) zs->zs_code;
509 	}
510 	zs->zs_free_ent = zs->zs_block_compress ? FIRST : 256;
511 
512 	zs->zs_finchar = zs->zs_oldcode = getcode(zs);
513 	if (zs->zs_oldcode == -1)	/* EOF already? */
514 		return (0);	/* Get out of here */
515 
516 	/* First code must be 8 bits = char. */
517 	*bp++ = (u_char)zs->zs_finchar;
518 	count--;
519 	zs->zs_stackp = de_stack;
520 
521 	while ((zs->zs_code = getcode(zs)) > -1) {
522 
523 		if ((zs->zs_code == CLEAR) && zs->zs_block_compress) {
524 			for (zs->zs_code = 255; zs->zs_code >= 0;
525 			     zs->zs_code--)
526 				tab_prefixof(zs->zs_code) = 0;
527 			zs->zs_clear_flg = 1;
528 			zs->zs_free_ent = FIRST - 1;
529 			if ((zs->zs_code = getcode(zs)) == -1)	/* O, untimely death! */
530 				break;
531 		}
532 		zs->zs_incode = zs->zs_code;
533 
534 		/* Special case for KwKwK string. */
535 		if (zs->zs_code >= zs->zs_free_ent) {
536 			*zs->zs_stackp++ = zs->zs_finchar;
537 			zs->zs_code = zs->zs_oldcode;
538 		}
539 
540 		/* Generate output characters in reverse order. */
541 		while (zs->zs_code >= 256) {
542 			*zs->zs_stackp++ = tab_suffixof(zs->zs_code);
543 			zs->zs_code = tab_prefixof(zs->zs_code);
544 		}
545 		*zs->zs_stackp++ = zs->zs_finchar = tab_suffixof(zs->zs_code);
546 
547 		/* And put them out in forward order.  */
548 middle:		do {
549 			if (count-- == 0)
550 				return (num);
551 			*bp++ = *--zs->zs_stackp;
552 		} while (zs->zs_stackp > de_stack);
553 
554 		/* Generate the new entry. */
555 		if ((zs->zs_code = zs->zs_free_ent) < zs->zs_maxmaxcode) {
556 			tab_prefixof(zs->zs_code) = (u_short) zs->zs_oldcode;
557 			tab_suffixof(zs->zs_code) = zs->zs_finchar;
558 			zs->zs_free_ent = zs->zs_code + 1;
559 		}
560 
561 		/* Remember previous code. */
562 		zs->zs_oldcode = zs->zs_incode;
563 	}
564 	zs->zs_state = S_EOF;
565 eof:	return (num - count);
566 }
567 
568 /*-
569  * Read one code from the standard input.  If EOF, return -1.
570  * Inputs:
571  * 	stdin
572  * Outputs:
573  * 	code or -1 is returned.
574  */
575 static code_int
576 getcode(zs)
577 	struct s_zstate *zs;
578 {
579 	code_int gcode;
580 	int r_off, bits;
581 	u_char *bp;
582 
583 	if (zs->zs_clear_flg > 0 || zs->zs_offset >= zs->zs_size ||
584 	    zs->zs_free_ent > zs->zs_maxcode) {
585 
586 		zs->zs_bp += zs->zs_n_bits;
587 		/*
588 		 * If the next entry will be too big for the current gcode
589 		 * size, then we must increase the size.  This implies reading
590 		 * a new buffer full, too.
591 		 */
592 		if (zs->zs_free_ent > zs->zs_maxcode) {
593 			zs->zs_n_bits++;
594 			if (zs->zs_n_bits == zs->zs_maxbits)	/* Won't get any bigger now. */
595 				zs->zs_maxcode = zs->zs_maxmaxcode;
596 			else
597 				zs->zs_maxcode = MAXCODE(zs->zs_n_bits);
598 		}
599 		if (zs->zs_clear_flg > 0) {
600 			zs->zs_maxcode = MAXCODE(zs->zs_n_bits = INIT_BITS);
601 			zs->zs_clear_flg = 0;
602 		}
603 
604 		/* fill the buffer up to the neck */
605 		if (zs->zs_bp + zs->zs_n_bits > zs->zs_ebp) {
606 			for (bp = zs->zs_buf; zs->zs_bp < zs->zs_ebp;
607 				*bp++ = *zs->zs_bp++);
608 			if ((bits = read(zs->zs_fd, bp, ZBUFSIZ -
609 					 (bp - zs->zs_buf))) < 0)
610 				return -1;
611 			zs->zs_bp = zs->zs_buf;
612 			zs->zs_ebp = bp + bits;
613 		}
614 		zs->zs_offset = 0;
615 		zs->zs_size = MIN(zs->zs_n_bits, zs->zs_ebp - zs->zs_bp);
616 		if (zs->zs_size == 0)
617 			return -1;
618 		/* Round size down to integral number of codes. */
619 		zs->zs_size = (zs->zs_size << 3) - (zs->zs_n_bits - 1);
620 	}
621 
622 	bp = zs->zs_bp;
623 	r_off = zs->zs_offset;
624 	bits = zs->zs_n_bits;
625 
626 	/* Get to the first byte. */
627 	bp += (r_off >> 3);
628 	r_off &= 7;
629 
630 	/* Get first part (low order bits). */
631 	gcode = (*bp++ >> r_off);
632 	bits -= (8 - r_off);
633 	r_off = 8 - r_off;	/* Now, roffset into gcode word. */
634 
635 	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
636 	if (bits >= 8) {
637 		gcode |= *bp++ << r_off;
638 		r_off += 8;
639 		bits -= 8;
640 	}
641 
642 	/* High order bits. */
643 	gcode |= (*bp & rmask[bits]) << r_off;
644 	zs->zs_offset += zs->zs_n_bits;
645 
646 	return (gcode);
647 }
648 
649 static int
650 cl_block(zs)			/* Table clear for block compress. */
651 	struct s_zstate *zs;
652 {
653 	long rat;
654 
655 	zs->zs_checkpoint = zs->zs_in_count + CHECK_GAP;
656 
657 	if (zs->zs_in_count > 0x007fffff) {	/* Shift will overflow. */
658 		rat = zs->zs_bytes_out >> 8;
659 		if (rat == 0)		/* Don't divide by zero. */
660 			rat = 0x7fffffff;
661 		else
662 			rat = zs->zs_in_count / rat;
663 	} else
664 		rat = (zs->zs_in_count << 8) / zs->zs_bytes_out;	/* 8 fractional bits. */
665 	if (rat > zs->zs_ratio)
666 		zs->zs_ratio = rat;
667 	else {
668 		zs->zs_ratio = 0;
669 		cl_hash(zs, (count_int) zs->zs_hsize);
670 		zs->zs_free_ent = FIRST;
671 		zs->zs_clear_flg = 1;
672 		if (output(zs, (code_int) CLEAR) == -1)
673 			return (-1);
674 	}
675 	return (0);
676 }
677 
678 static void
679 cl_hash(zs, cl_hsize)			/* Reset code table. */
680 	struct s_zstate *zs;
681 	count_int cl_hsize;
682 {
683 	count_int *htab_p;
684 	long i, m1;
685 
686 	m1 = -1;
687 	htab_p = zs->zs_htab + cl_hsize;
688 	i = cl_hsize - 16;
689 	do {			/* Might use Sys V memset(3) here. */
690 		*(htab_p - 16) = m1;
691 		*(htab_p - 15) = m1;
692 		*(htab_p - 14) = m1;
693 		*(htab_p - 13) = m1;
694 		*(htab_p - 12) = m1;
695 		*(htab_p - 11) = m1;
696 		*(htab_p - 10) = m1;
697 		*(htab_p - 9) = m1;
698 		*(htab_p - 8) = m1;
699 		*(htab_p - 7) = m1;
700 		*(htab_p - 6) = m1;
701 		*(htab_p - 5) = m1;
702 		*(htab_p - 4) = m1;
703 		*(htab_p - 3) = m1;
704 		*(htab_p - 2) = m1;
705 		*(htab_p - 1) = m1;
706 		htab_p -= 16;
707 	} while ((i -= 16) >= 0);
708 	for (i += 16; i > 0; i--)
709 		*--htab_p = m1;
710 }
711 
712 FILE *
713 zopen(name, mode, bits)
714 	const char *name;
715 	const char *mode;
716 	int bits;
717 {
718 	int fd;
719 	void *cookie;
720 	if ((fd = open(name, (*mode=='r'? O_RDONLY:O_WRONLY|O_CREAT),
721 		       S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) == -1)
722 		return NULL;
723 	if ((cookie = z_open(fd, mode, bits)) == NULL) {
724 		close(fd);
725 		return NULL;
726 	}
727 	return funopen(cookie, (*mode == 'r'?zread:NULL),
728 		       (*mode == 'w'?zwrite:NULL), NULL, zclose);
729 }
730 
731 void *
732 z_open(fd, mode, bits)
733 	int fd;
734 	const char *mode;
735 	int bits;
736 {
737 	struct s_zstate *zs;
738 
739 	if ((mode[0] != 'r' && mode[0] != 'w') || mode[1] != '\0' ||
740 	    bits < 0 || bits > BITS) {
741 		errno = EINVAL;
742 		return (NULL);
743 	}
744 
745 	if ((zs = calloc(1, sizeof(struct s_zstate))) == NULL)
746 		return (NULL);
747 
748 	/* User settable max # bits/code. */
749 	zs->zs_maxbits = bits ? bits : BITS;
750 	/* Should NEVER generate this code. */
751 	zs->zs_maxmaxcode = 1 << zs->zs_maxbits;
752 	zs->zs_hsize = HSIZE;		/* For dynamic table sizing. */
753 	zs->zs_free_ent = 0;		/* First unused entry. */
754 	zs->zs_block_compress = BLOCK_MASK;
755 	zs->zs_clear_flg = 0;
756 	zs->zs_ratio = 0;
757 	zs->zs_checkpoint = CHECK_GAP;
758 	zs->zs_in_count = 1;		/* Length of input. */
759 	zs->zs_out_count = 0;		/* # of codes output (for debugging).*/
760 	zs->zs_state = S_START;
761 	zs->zs_offset = 0;
762 	zs->zs_size = 0;
763 	zs->zs_mode = mode[0];
764 	zs->zs_bp = zs->zs_ebp = zs->zs_buf;
765 
766 	zs->zs_fd = fd;
767 	return zs;
768 }
769 
770 int
771 z_check_header(fd, sb, ofn)
772 	int fd;
773 	struct stat *sb;
774 	const char *ofn;
775 {
776 	int f;
777 	u_char buf[sizeof(z_magic)];
778 	off_t off = lseek(fd, 0, SEEK_CUR);
779 
780 	f = (read(fd, buf, sizeof(buf)) == sizeof(buf) &&
781 	     !memcmp(buf, z_magic, sizeof(buf)));
782 
783 	lseek (fd, off, SEEK_SET);
784 
785 	return f;
786 }
787