xref: /original-bsd/usr.bin/compress/compress.c (revision 6f815012)
1 /*
2  * Copyright (c) 1985, 1986 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * James A. Woods, derived from original work by Spencer Thomas
7  * and Joseph Orost.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 char copyright[] =
14 "@(#) Copyright (c) 1985, 1986 The Regents of the University of California.\n\
15  All rights reserved.\n";
16 #endif /* not lint */
17 
18 #ifndef lint
19 static char sccsid[] = "@(#)compress.c	5.19 (Berkeley) 03/18/91";
20 #endif /* not lint */
21 
22 /*
23  * compress.c - File compression ala IEEE Computer, June 1984.
24  *
25  * Authors:	Spencer W. Thomas	(decvax!utah-cs!thomas)
26  *		Jim McKie		(decvax!mcvax!jim)
27  *		Steve Davies		(decvax!vax135!petsd!peora!srd)
28  *		Ken Turkowski		(decvax!decwrl!turtlevax!ken)
29  *		James A. Woods		(decvax!ihnp4!ames!jaw)
30  *		Joe Orost		(decvax!vax135!petsd!joe)
31  */
32 
33 #include <sys/param.h>
34 #include <sys/stat.h>
35 #include <signal.h>
36 #include <utime.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <stdio.h>
40 #include <ctype.h>
41 #include <stdlib.h>
42 #include <string.h>
43 
44 /*
45  * Set USERMEM to the maximum amount of physical user memory available
46  * in bytes.  USERMEM is used to determine the maximum BITS that can be used
47  * for compression.
48  *
49  * SACREDMEM is the amount of physical memory saved for others; compress
50  * will hog the rest.
51  */
52 #ifndef SACREDMEM
53 #define SACREDMEM	0
54 #endif
55 
56 #ifndef USERMEM
57 # define USERMEM 	450000	/* default user memory */
58 #endif
59 
60 #ifdef pdp11
61 # define BITS 	12	/* max bits/code for 16-bit machine */
62 # define NO_UCHAR	/* also if "unsigned char" functions as signed char */
63 # undef USERMEM
64 #endif /* pdp11 */	/* don't forget to compile with -i */
65 
66 #ifdef USERMEM
67 # if USERMEM >= (433484+SACREDMEM)
68 #  define PBITS	16
69 # else
70 #  if USERMEM >= (229600+SACREDMEM)
71 #   define PBITS	15
72 #  else
73 #   if USERMEM >= (127536+SACREDMEM)
74 #    define PBITS	14
75 #   else
76 #    if USERMEM >= (73464+SACREDMEM)
77 #     define PBITS	13
78 #    else
79 #     define PBITS	12
80 #    endif
81 #   endif
82 #  endif
83 # endif
84 # undef USERMEM
85 #endif /* USERMEM */
86 
87 #ifdef PBITS		/* Preferred BITS for this memory size */
88 # ifndef BITS
89 #  define BITS PBITS
90 # endif BITS
91 #endif /* PBITS */
92 
93 #if BITS == 16
94 # define HSIZE	69001		/* 95% occupancy */
95 #endif
96 #if BITS == 15
97 # define HSIZE	35023		/* 94% occupancy */
98 #endif
99 #if BITS == 14
100 # define HSIZE	18013		/* 91% occupancy */
101 #endif
102 #if BITS == 13
103 # define HSIZE	9001		/* 91% occupancy */
104 #endif
105 #if BITS <= 12
106 # define HSIZE	5003		/* 80% occupancy */
107 #endif
108 
109 /*
110  * a code_int must be able to hold 2**BITS values of type int, and also -1
111  */
112 #if BITS > 15
113 typedef long int	code_int;
114 #else
115 typedef int		code_int;
116 #endif
117 
118 #ifdef SIGNED_COMPARE_SLOW
119 typedef unsigned long int count_int;
120 typedef unsigned short int count_short;
121 #else
122 typedef long int	  count_int;
123 #endif
124 
125 #ifdef NO_UCHAR
126  typedef char	char_type;
127 #else
128  typedef	unsigned char	char_type;
129 #endif /* UCHAR */
130 char_type magic_header[] = { "\037\235" };	/* 1F 9D */
131 
132 /* Defines for third byte of header */
133 #define BIT_MASK	0x1f
134 #define BLOCK_MASK	0x80
135 /* Masks 0x40 and 0x20 are free.  I think 0x20 should mean that there is
136    a fourth header byte (for expansion).
137 */
138 #define INIT_BITS 9			/* initial number of bits/code */
139 
140 int n_bits;				/* number of bits/code */
141 int maxbits = BITS;			/* user settable max # bits/code */
142 code_int maxcode;			/* maximum code, given n_bits */
143 code_int maxmaxcode = 1 << BITS;	/* should NEVER generate this code */
144 #ifdef COMPATIBLE		/* But wrong! */
145 # define MAXCODE(n_bits)	(1 << (n_bits) - 1)
146 #else
147 # define MAXCODE(n_bits)	((1 << (n_bits)) - 1)
148 #endif /* COMPATIBLE */
149 
150 count_int htab [HSIZE];
151 unsigned short codetab [HSIZE];
152 
153 #define htabof(i)	htab[i]
154 #define codetabof(i)	codetab[i]
155 code_int hsize = HSIZE;			/* for dynamic table sizing */
156 count_int fsize;
157 
158 /*
159  * To save much memory, we overlay the table used by compress() with those
160  * used by decompress().  The tab_prefix table is the same size and type
161  * as the codetab.  The tab_suffix table needs 2**BITS characters.  We
162  * get this from the beginning of htab.  The output stack uses the rest
163  * of htab, and contains characters.  There is plenty of room for any
164  * possible stack (stack used to be 8000 characters).
165  */
166 
167 #define tab_prefixof(i)	codetabof(i)
168 # define tab_suffixof(i)	((char_type *)(htab))[i]
169 # define de_stack		((char_type *)&tab_suffixof(1<<BITS))
170 
171 code_int free_ent = 0;			/* first unused entry */
172 int exit_stat = 0;			/* per-file status */
173 int perm_stat = 0;			/* permanent status */
174 
175 code_int getcode();
176 
177 int nomagic = 0;	/* Use a 3-byte magic number header, unless old file */
178 int zcat_flg = 0;	/* Write output on stdout, suppress messages */
179 int precious = 1;	/* Don't unlink output file on interrupt */
180 int quiet = 1;		/* don't tell me about compression */
181 
182 /*
183  * block compression parameters -- after all codes are used up,
184  * and compression rate changes, start over.
185  */
186 int block_compress = BLOCK_MASK;
187 int clear_flg = 0;
188 long int ratio = 0;
189 #define CHECK_GAP 10000	/* ratio check interval */
190 count_int checkpoint = CHECK_GAP;
191 /*
192  * the next two codes should not be changed lightly, as they must not
193  * lie within the contiguous general code space.
194  */
195 #define FIRST	257	/* first free entry */
196 #define	CLEAR	256	/* table clear output code */
197 
198 int force = 0;
199 char ofname [100];
200 #ifdef DEBUG
201 int debug, verbose;
202 #endif
203 sig_t oldint;
204 int bgnd_flag;
205 
206 int do_decomp = 0;
207 
208 /*-
209  * Algorithm from "A Technique for High Performance Data Compression",
210  * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19.
211  *
212  * Usage: compress [-dfvc] [-b bits] [file ...]
213  * Inputs:
214  *	-d:	    If given, decompression is done instead.
215  *
216  *      -c:         Write output on stdout, don't remove original.
217  *
218  *      -b:         Parameter limits the max number of bits/code.
219  *
220  *	-f:	    Forces output file to be generated, even if one already
221  *		    exists, and even if no space is saved by compressing.
222  *		    If -f is not used, the user will be prompted if stdin is
223  *		    a tty, otherwise, the output file will not be overwritten.
224  *
225  *      -v:	    Write compression statistics
226  *
227  * 	file ...:   Files to be compressed.  If none specified, stdin
228  *		    is used.
229  * Outputs:
230  *	file.Z:	    Compressed form of file with same mode, owner, and utimes
231  * 	or stdout   (if stdin used as input)
232  *
233  * Assumptions:
234  *	When filenames are given, replaces with the compressed version
235  *	(.Z suffix) only if the file decreases in size.
236  * Algorithm:
237  * 	Modified Lempel-Ziv method (LZW).  Basically finds common
238  * substrings and replaces them with a variable size code.  This is
239  * deterministic, and can be done on the fly.  Thus, the decompression
240  * procedure needs no input table, but tracks the way the table was built.
241  */
242 
243 main(argc, argv)
244 	int argc;
245 	char **argv;
246 {
247 	extern int optind;
248 	extern char *optarg;
249 	struct stat statbuf;
250 	int ch, overwrite;
251 	char **filelist, **fileptr, *cp, tempname[MAXPATHLEN];
252 	void onintr(), oops();
253 
254 	/* This bg check only works for sh. */
255 	if ((oldint = signal(SIGINT, SIG_IGN)) != SIG_IGN) {
256 		(void)signal(SIGINT, onintr);
257 		(void)signal(SIGSEGV, oops);		/* XXX */
258 	}
259 	bgnd_flag = oldint != SIG_DFL;
260 
261 #ifdef COMPATIBLE
262 	nomagic = 1;		/* Original didn't have a magic number */
263 #endif
264 
265 	if (cp = rindex(argv[0], '/'))
266 		++cp;
267 	else
268 		cp = argv[0];
269 	if (strcmp(cp, "uncompress") == 0)
270 		do_decomp = 1;
271 	else if(strcmp(cp, "zcat") == 0) {
272 		do_decomp = 1;
273 		zcat_flg = 1;
274 	}
275 
276 	/*
277 	 * -b maxbits => maxbits.
278 	 * -C => generate output compatible with compress 2.0.
279 	 * -c => cat all output to stdout
280 	 * -D => debug
281 	 * -d => do_decomp
282 	 * -f => force overwrite of output file
283 	 * -n => no header: useful to uncompress old files
284 	 * -V => print Version; debug verbose
285 	 * -v => unquiet
286 	 */
287 
288 	overwrite = 0;
289 #ifdef DEBUG
290 	while ((ch = getopt(argc, argv, "b:CcDdfnVv")) != EOF)
291 #else
292 	while ((ch = getopt(argc, argv, "b:Ccdfnv")) != EOF)
293 #endif
294 		switch(ch) {
295 		case 'b':
296 			maxbits = atoi(optarg);
297 			break;
298 		case 'C':
299 			block_compress = 0;
300 			break;
301 		case 'c':
302 			zcat_flg = 1;
303 			break;
304 #ifdef DEBUG
305 		case 'D':
306 			debug = 1;
307 			break;
308 #endif
309 		case 'd':
310 			do_decomp = 1;
311 			break;
312 		case 'f':
313 			overwrite = 1;
314 			force = 1;
315 			break;
316 		case 'n':
317 			nomagic = 1;
318 			break;
319 		case 'q':
320 			quiet = 1;
321 			break;
322 #ifdef DEBUG
323 		case 'V':
324 			verbose = 1;
325 			break;
326 #endif
327 		case 'v':
328 			quiet = 0;
329 			break;
330 		case '?':
331 		default:
332 			usage();
333 		}
334 	argc -= optind;
335 	argv += optind;
336 
337 	if (maxbits < INIT_BITS)
338 		maxbits = INIT_BITS;
339 	if (maxbits > BITS)
340 		maxbits = BITS;
341 	maxmaxcode = 1 << maxbits;
342 
343 	/* Build useless input file list. */
344 	filelist = fileptr = (char **)(malloc(argc * sizeof(*argv)));
345 	while (*argv)
346 		*fileptr++ = *argv++;
347 	*fileptr = NULL;
348 
349     if (*filelist != NULL) {
350 	for (fileptr = filelist; *fileptr; fileptr++) {
351 	    exit_stat = 0;
352 	    if (do_decomp) {			/* DECOMPRESSION */
353 		/* Check for .Z suffix */
354 		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) {
355 		    /* No .Z: tack one on */
356 		    strcpy(tempname, *fileptr);
357 		    strcat(tempname, ".Z");
358 		    *fileptr = tempname;
359 		}
360 		/* Open input file */
361 		if ((freopen(*fileptr, "r", stdin)) == NULL) {
362 		    perror(*fileptr);
363 		    perm_stat = 1;
364 		    continue;
365 		}
366 		/* Check the magic number */
367 		if (nomagic == 0) {
368 		    if ((getchar() != (magic_header[0] & 0xFF))
369 		     || (getchar() != (magic_header[1] & 0xFF))) {
370 			fprintf(stderr, "%s: not in compressed format\n",
371 			    *fileptr);
372 		    continue;
373 		    }
374 		    maxbits = getchar();	/* set -b from file */
375 		    block_compress = maxbits & BLOCK_MASK;
376 		    maxbits &= BIT_MASK;
377 		    maxmaxcode = 1 << maxbits;
378 		    if(maxbits > BITS) {
379 			fprintf(stderr,
380 			"%s: compressed with %d bits, can only handle %d bits\n",
381 			*fileptr, maxbits, BITS);
382 			continue;
383 		    }
384 		}
385 		/* Generate output filename */
386 		strcpy(ofname, *fileptr);
387 		ofname[strlen(*fileptr) - 2] = '\0';  /* Strip off .Z */
388 	    } else {					/* COMPRESSION */
389 		if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) {
390 		    	fprintf(stderr, "%s: already has .Z suffix -- no change\n",
391 			    *fileptr);
392 		    continue;
393 		}
394 		/* Open input file */
395 		if ((freopen(*fileptr, "r", stdin)) == NULL) {
396 		    perror(*fileptr);
397 		    perm_stat = 1;
398 		    continue;
399 		}
400 		stat ( *fileptr, &statbuf );
401 		fsize = (long) statbuf.st_size;
402 		/*
403 		 * tune hash table size for small files -- ad hoc,
404 		 * but the sizes match earlier #defines, which
405 		 * serve as upper bounds on the number of output codes.
406 		 */
407 		hsize = HSIZE;
408 		if ( fsize < (1 << 12) )
409 		    hsize = MIN ( 5003, HSIZE );
410 		else if ( fsize < (1 << 13) )
411 		    hsize = MIN ( 9001, HSIZE );
412 		else if ( fsize < (1 << 14) )
413 		    hsize = MIN ( 18013, HSIZE );
414 		else if ( fsize < (1 << 15) )
415 		    hsize = MIN ( 35023, HSIZE );
416 		else if ( fsize < 47000 )
417 		    hsize = MIN ( 50021, HSIZE );
418 
419 		/* Generate output filename */
420 		strcpy(ofname, *fileptr);
421 		strcat(ofname, ".Z");
422 	    }
423 	    /* Check for overwrite of existing file */
424 	    if (overwrite == 0 && zcat_flg == 0) {
425 		if (stat(ofname, &statbuf) == 0) {
426 		    char response[2];
427 		    response[0] = 'n';
428 		    fprintf(stderr, "%s already exists;", ofname);
429 		    if (bgnd_flag == 0 && isatty(2)) {
430 			fprintf(stderr, " do you wish to overwrite %s (y or n)? ",
431 			ofname);
432 			fflush(stderr);
433 			read(2, response, 2);
434 			while (response[1] != '\n') {
435 			    if (read(2, response+1, 1) < 0) {	/* Ack! */
436 				perror("stderr"); break;
437 			    }
438 			}
439 		    }
440 		    if (response[0] != 'y') {
441 			fprintf(stderr, "\tnot overwritten\n");
442 			continue;
443 		    }
444 		}
445 	    }
446 	    if(zcat_flg == 0) {		/* Open output file */
447 		if (freopen(ofname, "w", stdout) == NULL) {
448 		    perror(ofname);
449 		    perm_stat = 1;
450 		    continue;
451 		}
452 		precious = 0;
453 		if(!quiet)
454 			fprintf(stderr, "%s: ", *fileptr);
455 	    }
456 
457 	    /* Actually do the compression/decompression */
458 	    if (do_decomp == 0)	compress();
459 #ifndef DEBUG
460 	    else			decompress();
461 #else
462 	    else if (debug == 0)	decompress();
463 	    else			printcodes();
464 	    if (verbose)		dump_tab();
465 #endif /* DEBUG */
466 	    if(zcat_flg == 0) {
467 		copystat(*fileptr, ofname);	/* Copy stats */
468 		precious = 1;
469 		if((exit_stat == 1) || (!quiet))
470 			putc('\n', stderr);
471 	    }
472 	}
473     } else {		/* Standard input */
474 	if (do_decomp == 0) {
475 		compress();
476 #ifdef DEBUG
477 		if(verbose)		dump_tab();
478 #endif /* DEBUG */
479 		if(!quiet)
480 			putc('\n', stderr);
481 	} else {
482 	    /* Check the magic number */
483 	    if (nomagic == 0) {
484 		if ((getchar()!=(magic_header[0] & 0xFF))
485 		 || (getchar()!=(magic_header[1] & 0xFF))) {
486 		    fprintf(stderr, "stdin: not in compressed format\n");
487 		    exit(1);
488 		}
489 		maxbits = getchar();	/* set -b from file */
490 		block_compress = maxbits & BLOCK_MASK;
491 		maxbits &= BIT_MASK;
492 		maxmaxcode = 1 << maxbits;
493 		fsize = 100000;		/* assume stdin large for USERMEM */
494 		if(maxbits > BITS) {
495 			fprintf(stderr,
496 			"stdin: compressed with %d bits, can only handle %d bits\n",
497 			maxbits, BITS);
498 			exit(1);
499 		}
500 	    }
501 #ifndef DEBUG
502 	    decompress();
503 #else
504 	    if (debug == 0)	decompress();
505 	    else		printcodes();
506 	    if (verbose)	dump_tab();
507 #endif /* DEBUG */
508 	}
509     }
510     exit(perm_stat ? perm_stat : exit_stat);
511 }
512 
513 static int offset;
514 long int in_count = 1;			/* length of input */
515 long int bytes_out;			/* length of compressed output */
516 long int out_count = 0;			/* # of codes output (for debugging) */
517 
518 /*
519  * compress stdin to stdout
520  *
521  * Algorithm:  use open addressing double hashing (no chaining) on the
522  * prefix code / next character combination.  We do a variant of Knuth's
523  * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime
524  * secondary probe.  Here, the modular division first probe is gives way
525  * to a faster exclusive-or manipulation.  Also do block compression with
526  * an adaptive reset, whereby the code table is cleared when the compression
527  * ratio decreases, but after the table fills.  The variable-length output
528  * codes are re-sized at this point, and a special CLEAR code is generated
529  * for the decompressor.  Late addition:  construct the table according to
530  * file size for noticeable speed improvement on small files.  Please direct
531  * questions about this implementation to ames!jaw.
532  */
533 
534 compress()
535 {
536     register long fcode;
537     register code_int i = 0;
538     register int c;
539     register code_int ent;
540     register int disp;
541     register code_int hsize_reg;
542     register int hshift;
543 
544 #ifndef COMPATIBLE
545     if (nomagic == 0) {
546 	putchar(magic_header[0]);
547 	putchar(magic_header[1]);
548 	putchar((char)(maxbits | block_compress));
549 	if(ferror(stdout))
550 		writeerr();
551     }
552 #endif /* COMPATIBLE */
553 
554     offset = 0;
555     bytes_out = 3;		/* includes 3-byte header mojo */
556     out_count = 0;
557     clear_flg = 0;
558     ratio = 0;
559     in_count = 1;
560     checkpoint = CHECK_GAP;
561     maxcode = MAXCODE(n_bits = INIT_BITS);
562     free_ent = ((block_compress) ? FIRST : 256 );
563 
564     ent = getchar ();
565 
566     hshift = 0;
567     for ( fcode = (long) hsize;  fcode < 65536L; fcode *= 2L )
568     	hshift++;
569     hshift = 8 - hshift;		/* set hash code range bound */
570 
571     hsize_reg = hsize;
572     cl_hash( (count_int) hsize_reg);		/* clear hash table */
573 
574 #ifdef SIGNED_COMPARE_SLOW
575     while ( (c = getchar()) != (unsigned) EOF ) {
576 #else
577     while ( (c = getchar()) != EOF ) {
578 #endif
579 	in_count++;
580 	fcode = (long) (((long) c << maxbits) + ent);
581  	i = ((c << hshift) ^ ent);	/* xor hashing */
582 
583 	if ( htabof (i) == fcode ) {
584 	    ent = codetabof (i);
585 	    continue;
586 	} else if ( (long)htabof (i) < 0 )	/* empty slot */
587 	    goto nomatch;
588  	disp = hsize_reg - i;		/* secondary hash (after G. Knott) */
589 	if ( i == 0 )
590 	    disp = 1;
591 probe:
592 	if ( (i -= disp) < 0 )
593 	    i += hsize_reg;
594 
595 	if ( htabof (i) == fcode ) {
596 	    ent = codetabof (i);
597 	    continue;
598 	}
599 	if ( (long)htabof (i) > 0 )
600 	    goto probe;
601 nomatch:
602 	output ( (code_int) ent );
603 	out_count++;
604  	ent = c;
605 #ifdef SIGNED_COMPARE_SLOW
606 	if ( (unsigned) free_ent < (unsigned) maxmaxcode) {
607 #else
608 	if ( free_ent < maxmaxcode ) {
609 #endif
610  	    codetabof (i) = free_ent++;	/* code -> hashtable */
611 	    htabof (i) = fcode;
612 	}
613 	else if ( (count_int)in_count >= checkpoint && block_compress )
614 	    cl_block ();
615     }
616     /*
617      * Put out the final code.
618      */
619     output( (code_int)ent );
620     out_count++;
621     output( (code_int)-1 );
622 
623     /*
624      * Print out stats on stderr
625      */
626     if(zcat_flg == 0 && !quiet) {
627 #ifdef DEBUG
628 	fprintf( stderr,
629 		"%ld chars in, %ld codes (%ld bytes) out, compression factor: ",
630 		in_count, out_count, bytes_out );
631 	prratio( stderr, in_count, bytes_out );
632 	fprintf( stderr, "\n");
633 	fprintf( stderr, "\tCompression as in compact: " );
634 	prratio( stderr, in_count-bytes_out, in_count );
635 	fprintf( stderr, "\n");
636 	fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n",
637 		free_ent - 1, n_bits );
638 #else /* !DEBUG */
639 	fprintf( stderr, "Compression: " );
640 	prratio( stderr, in_count-bytes_out, in_count );
641 #endif /* DEBUG */
642     }
643     if(bytes_out > in_count)	/* exit(2) if no savings */
644 	exit_stat = 2;
645     return;
646 }
647 
648 /*-
649  * Output the given code.
650  * Inputs:
651  * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
652  *		that n_bits =< (long)wordsize - 1.
653  * Outputs:
654  * 	Outputs code to the file.
655  * Assumptions:
656  *	Chars are 8 bits long.
657  * Algorithm:
658  * 	Maintain a BITS character long buffer (so that 8 codes will
659  * fit in it exactly).  Use the VAX insv instruction to insert each
660  * code in turn.  When the buffer fills up empty it and start over.
661  */
662 
663 static char buf[BITS];
664 
665 #ifndef vax
666 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00};
667 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
668 #endif /* vax */
669 
670 output( code )
671 code_int  code;
672 {
673 #ifdef DEBUG
674     static int col = 0;
675 #endif /* DEBUG */
676 
677     /*
678      * On the VAX, it is important to have the register declarations
679      * in exactly the order given, or the asm will break.
680      */
681     register int r_off = offset, bits= n_bits;
682     register char * bp = buf;
683 
684 #ifdef DEBUG
685 	if ( verbose )
686 	    fprintf( stderr, "%5d%c", code,
687 		    (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
688 #endif /* DEBUG */
689     if ( code >= 0 ) {
690 #if defined(vax) && !defined(__GNUC__)
691 	/*
692 	 * VAX and PCC DEPENDENT!! Implementation on other machines is
693 	 * below.
694 	 *
695 	 * Translation: Insert BITS bits from the argument starting at
696 	 * offset bits from the beginning of buf.
697 	 */
698 	0;	/* Work around for pcc -O bug with asm and if stmt */
699 	asm( "insv	4(ap),r11,r10,(r9)" );
700 #else
701 /*
702  * byte/bit numbering on the VAX is simulated by the following code
703  */
704 	/*
705 	 * Get to the first byte.
706 	 */
707 	bp += (r_off >> 3);
708 	r_off &= 7;
709 	/*
710 	 * Since code is always >= 8 bits, only need to mask the first
711 	 * hunk on the left.
712 	 */
713 	*bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off];
714 	bp++;
715 	bits -= (8 - r_off);
716 	code >>= 8 - r_off;
717 	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
718 	if ( bits >= 8 ) {
719 	    *bp++ = code;
720 	    code >>= 8;
721 	    bits -= 8;
722 	}
723 	/* Last bits. */
724 	if(bits)
725 	    *bp = code;
726 #endif /* vax */
727 	offset += n_bits;
728 	if ( offset == (n_bits << 3) ) {
729 	    bp = buf;
730 	    bits = n_bits;
731 	    bytes_out += bits;
732 	    do {
733 		putchar(*bp++);
734 		if (ferror(stdout))
735 			writeerr();
736 	    } while(--bits);
737 	    offset = 0;
738 	}
739 
740 	/*
741 	 * If the next entry is going to be too big for the code size,
742 	 * then increase it, if possible.
743 	 */
744 	if ( free_ent > maxcode || (clear_flg > 0))
745 	{
746 	    /*
747 	     * Write the whole buffer, because the input side won't
748 	     * discover the size increase until after it has read it.
749 	     */
750 	    if ( offset > 0 ) {
751 		if( fwrite( buf, 1, n_bits, stdout ) != n_bits)
752 			writeerr();
753 		bytes_out += n_bits;
754 	    }
755 	    offset = 0;
756 
757 	    if ( clear_flg ) {
758     	        maxcode = MAXCODE (n_bits = INIT_BITS);
759 	        clear_flg = 0;
760 	    }
761 	    else {
762 	    	n_bits++;
763 	    	if ( n_bits == maxbits )
764 		    maxcode = maxmaxcode;
765 	    	else
766 		    maxcode = MAXCODE(n_bits);
767 	    }
768 #ifdef DEBUG
769 	    if ( debug ) {
770 		fprintf( stderr, "\nChange to %d bits\n", n_bits );
771 		col = 0;
772 	    }
773 #endif /* DEBUG */
774 	}
775     } else {
776 	/*
777 	 * At EOF, write the rest of the buffer.
778 	 */
779 	if ( offset > 0 ) {
780 		offset = (offset + 7) / 8;
781 		if( fwrite( buf, 1, offset, stdout ) != offset )
782 			writeerr();
783 		bytes_out += offset;
784 	}
785 	offset = 0;
786 	(void)fflush( stdout );
787 	if( ferror( stdout ) )
788 		writeerr();
789 #ifdef DEBUG
790 	if ( verbose )
791 	    fprintf( stderr, "\n" );
792 #endif
793     }
794 }
795 
796 /*
797  * Decompress stdin to stdout.  This routine adapts to the codes in the
798  * file building the "string" table on-the-fly; requiring no table to
799  * be stored in the compressed file.  The tables used herein are shared
800  * with those of the compress() routine.  See the definitions above.
801  */
802 
803 decompress() {
804     register char_type *stackp;
805     register int finchar;
806     register code_int code, oldcode, incode;
807     int    n, nwritten, offset;		/* Variables for buffered write */
808     char buff[BUFSIZ];			/* Buffer for buffered write */
809 
810 
811     /*
812      * As above, initialize the first 256 entries in the table.
813      */
814     maxcode = MAXCODE(n_bits = INIT_BITS);
815     for ( code = 255; code >= 0; code-- ) {
816 	tab_prefixof(code) = 0;
817 	tab_suffixof(code) = (char_type)code;
818     }
819     free_ent = ((block_compress) ? FIRST : 256 );
820 
821     finchar = oldcode = getcode();
822     if(oldcode == -1)	/* EOF already? */
823 	return;			/* Get out of here */
824 
825     /* first code must be 8 bits = char */
826     n=0;
827     buff[n++] = (char)finchar;
828 
829     stackp = de_stack;
830 
831     while ( (code = getcode()) > -1 ) {
832 
833 	if ( (code == CLEAR) && block_compress ) {
834 	    for ( code = 255; code >= 0; code-- )
835 		tab_prefixof(code) = 0;
836 	    clear_flg = 1;
837 	    free_ent = FIRST - 1;
838 	    if ( (code = getcode ()) == -1 )	/* O, untimely death! */
839 		break;
840 	}
841 	incode = code;
842 	/*
843 	 * Special case for KwKwK string.
844 	 */
845 	if ( code >= free_ent ) {
846             *stackp++ = finchar;
847 	    code = oldcode;
848 	}
849 
850 	/*
851 	 * Generate output characters in reverse order
852 	 */
853 #ifdef SIGNED_COMPARE_SLOW
854 	while ( ((unsigned long)code) >= ((unsigned long)256) ) {
855 #else
856 	while ( code >= 256 ) {
857 #endif
858 	    *stackp++ = tab_suffixof(code);
859 	    code = tab_prefixof(code);
860 	}
861 	*stackp++ = finchar = tab_suffixof(code);
862 
863 	/*
864 	 * And put them out in forward order
865 	 */
866 	do {
867 	    /*
868 	     * About 60% of the time is spent in the putchar() call
869 	     * that appeared here.  It was originally
870 	     *		putchar ( *--stackp );
871 	     * If we buffer the writes ourselves, we can go faster (about
872 	     * 30%).
873 	     *
874 	     * At this point, the next line is the next *big* time
875 	     * sink in the code.  It takes up about 10% of the time.
876 	     */
877 	     buff[n++] = *--stackp;
878 	     if (n == BUFSIZ) {
879 		 offset = 0;
880 		 do {
881 		     nwritten = write(fileno(stdout), &buff[offset], n);
882 		     if (nwritten < 0)
883 			 writeerr();
884 		     offset += nwritten;
885 		 } while ((n -= nwritten) > 0);
886 	      }
887 	} while ( stackp > de_stack );
888 
889 	/*
890 	 * Generate the new entry.
891 	 */
892 	if ( (code=free_ent) < maxmaxcode ) {
893 	    tab_prefixof(code) = (unsigned short)oldcode;
894 	    tab_suffixof(code) = finchar;
895 	    free_ent = code+1;
896 	}
897 	/*
898 	 * Remember previous code.
899 	 */
900 	oldcode = incode;
901     }
902     /*
903      * Flush the stuff remaining in our buffer...
904      */
905     offset = 0;
906     while (n > 0) {
907 	nwritten = write(fileno(stdout), &buff[offset], n);
908 	if (nwritten < 0)
909 	    writeerr();
910 	offset += nwritten;
911 	n -= nwritten;
912     }
913 }
914 
915 /*-
916  * Read one code from the standard input.  If EOF, return -1.
917  * Inputs:
918  * 	stdin
919  * Outputs:
920  * 	code or -1 is returned.
921  */
922 code_int
923 getcode() {
924     /*
925      * On the VAX, it is important to have the register declarations
926      * in exactly the order given, or the asm will break.
927      */
928     register code_int code;
929     static int offset = 0, size = 0;
930     static char_type buf[BITS];
931     register int r_off, bits;
932     register char_type *bp = buf;
933 
934     if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) {
935 	/*
936 	 * If the next entry will be too big for the current code
937 	 * size, then we must increase the size.  This implies reading
938 	 * a new buffer full, too.
939 	 */
940 	if ( free_ent > maxcode ) {
941 	    n_bits++;
942 	    if ( n_bits == maxbits )
943 		maxcode = maxmaxcode;	/* won't get any bigger now */
944 	    else
945 		maxcode = MAXCODE(n_bits);
946 	}
947 	if ( clear_flg > 0) {
948     	    maxcode = MAXCODE (n_bits = INIT_BITS);
949 	    clear_flg = 0;
950 	}
951 	size = fread( buf, 1, n_bits, stdin );
952 	if ( size <= 0 )
953 	    return -1;			/* end of file */
954 	offset = 0;
955 	/* Round size down to integral number of codes */
956 	size = (size << 3) - (n_bits - 1);
957     }
958     r_off = offset;
959     bits = n_bits;
960 #ifdef vax
961     asm( "extzv   r10,r9,(r8),r11" );
962 #else /* not a vax */
963 	/*
964 	 * Get to the first byte.
965 	 */
966 	bp += (r_off >> 3);
967 	r_off &= 7;
968 	/* Get first part (low order bits) */
969 #ifdef NO_UCHAR
970 	code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff;
971 #else
972 	code = (*bp++ >> r_off);
973 #endif /* NO_UCHAR */
974 	bits -= (8 - r_off);
975 	r_off = 8 - r_off;		/* now, offset into code word */
976 	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
977 	if ( bits >= 8 ) {
978 #ifdef NO_UCHAR
979 	    code |= (*bp++ & 0xff) << r_off;
980 #else
981 	    code |= *bp++ << r_off;
982 #endif /* NO_UCHAR */
983 	    r_off += 8;
984 	    bits -= 8;
985 	}
986 	/* high order bits. */
987 	code |= (*bp & rmask[bits]) << r_off;
988 #endif /* vax */
989     offset += n_bits;
990 
991     return code;
992 }
993 
994 #ifdef DEBUG
995 printcodes()
996 {
997     /*
998      * Just print out codes from input file.  For debugging.
999      */
1000     code_int code;
1001     int col = 0, bits;
1002 
1003     bits = n_bits = INIT_BITS;
1004     maxcode = MAXCODE(n_bits);
1005     free_ent = ((block_compress) ? FIRST : 256 );
1006     while ( ( code = getcode() ) >= 0 ) {
1007 	if ( (code == CLEAR) && block_compress ) {
1008    	    free_ent = FIRST - 1;
1009    	    clear_flg = 1;
1010 	}
1011 	else if ( free_ent < maxmaxcode )
1012 	    free_ent++;
1013 	if ( bits != n_bits ) {
1014 	    fprintf(stderr, "\nChange to %d bits\n", n_bits );
1015 	    bits = n_bits;
1016 	    col = 0;
1017 	}
1018 	fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' );
1019     }
1020     putc( '\n', stderr );
1021     exit( 0 );
1022 }
1023 
1024 code_int sorttab[1<<BITS];	/* sorted pointers into htab */
1025 
1026 dump_tab()	/* dump string table */
1027 {
1028     register int i, first;
1029     register ent;
1030 #define STACK_SIZE	15000
1031     int stack_top = STACK_SIZE;
1032     register c;
1033 
1034     if(do_decomp == 0) {	/* compressing */
1035 	register int flag = 1;
1036 
1037 	for(i=0; i<hsize; i++) {	/* build sort pointers */
1038 		if((long)htabof(i) >= 0) {
1039 			sorttab[codetabof(i)] = i;
1040 		}
1041 	}
1042 	first = block_compress ? FIRST : 256;
1043 	for(i = first; i < free_ent; i++) {
1044 		fprintf(stderr, "%5d: \"", i);
1045 		de_stack[--stack_top] = '\n';
1046 		de_stack[--stack_top] = '"';
1047 		stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff,
1048                                      stack_top);
1049 		for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1);
1050 		    ent > 256;
1051 		    ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) {
1052 			stack_top = in_stack(htabof(sorttab[ent]) >> maxbits,
1053 						stack_top);
1054 		}
1055 		stack_top = in_stack(ent, stack_top);
1056 		fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr);
1057 	   	stack_top = STACK_SIZE;
1058 	}
1059    } else if(!debug) {	/* decompressing */
1060 
1061        for ( i = 0; i < free_ent; i++ ) {
1062 	   ent = i;
1063 	   c = tab_suffixof(ent);
1064 	   if ( isascii(c) && isprint(c) )
1065 	       fprintf( stderr, "%5d: %5d/'%c'  \"",
1066 			   ent, tab_prefixof(ent), c );
1067 	   else
1068 	       fprintf( stderr, "%5d: %5d/\\%03o \"",
1069 			   ent, tab_prefixof(ent), c );
1070 	   de_stack[--stack_top] = '\n';
1071 	   de_stack[--stack_top] = '"';
1072 	   for ( ; ent != NULL;
1073 		   ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) {
1074 	       stack_top = in_stack(tab_suffixof(ent), stack_top);
1075 	   }
1076 	   fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr );
1077 	   stack_top = STACK_SIZE;
1078        }
1079     }
1080 }
1081 
1082 int
1083 in_stack(c, stack_top)
1084 	register c, stack_top;
1085 {
1086 	if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) {
1087 	    de_stack[--stack_top] = c;
1088 	} else {
1089 	    switch( c ) {
1090 	    case '\n': de_stack[--stack_top] = 'n'; break;
1091 	    case '\t': de_stack[--stack_top] = 't'; break;
1092 	    case '\b': de_stack[--stack_top] = 'b'; break;
1093 	    case '\f': de_stack[--stack_top] = 'f'; break;
1094 	    case '\r': de_stack[--stack_top] = 'r'; break;
1095 	    case '\\': de_stack[--stack_top] = '\\'; break;
1096 	    default:
1097 	 	de_stack[--stack_top] = '0' + c % 8;
1098 	 	de_stack[--stack_top] = '0' + (c / 8) % 8;
1099 	 	de_stack[--stack_top] = '0' + c / 64;
1100 	 	break;
1101 	    }
1102 	    de_stack[--stack_top] = '\\';
1103 	}
1104 	return stack_top;
1105 }
1106 #endif /* DEBUG */
1107 
1108 writeerr()
1109 {
1110 	(void)fprintf(stderr, "compress: %s: %s\n",
1111 	    ofname[0] ? ofname : "stdout", strerror(errno));
1112 	(void)unlink(ofname);
1113 	exit(1);
1114 }
1115 
1116 copystat(ifname, ofname)
1117 char *ifname, *ofname;
1118 {
1119     struct stat statbuf;
1120     int mode;
1121     struct utimbuf tp;
1122 
1123     fclose(stdout);
1124     if (stat(ifname, &statbuf)) {		/* Get stat on input file */
1125 	perror(ifname);
1126 	return;
1127     }
1128     if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) {
1129 	if(quiet)
1130 	    	fprintf(stderr, "%s: ", ifname);
1131 	fprintf(stderr, " -- not a regular file: unchanged");
1132 	exit_stat = 1;
1133 	perm_stat = 1;
1134     } else if (statbuf.st_nlink > 1) {
1135 	if(quiet)
1136 	    	fprintf(stderr, "%s: ", ifname);
1137 	fprintf(stderr, " -- has %d other links: unchanged",
1138 		statbuf.st_nlink - 1);
1139 	exit_stat = 1;
1140 	perm_stat = 1;
1141     } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */
1142 	if(!quiet)
1143 		fprintf(stderr, " -- file unchanged");
1144     } else {			/* ***** Successful Compression ***** */
1145 	exit_stat = 0;
1146 	mode = statbuf.st_mode & 07777;
1147 	if (chmod(ofname, mode))		/* Copy modes */
1148 	    perror(ofname);
1149 	chown(ofname, statbuf.st_uid, statbuf.st_gid);	/* Copy ownership */
1150 	tp.actime = statbuf.st_atime;
1151 	tp.modtime = statbuf.st_mtime;
1152 	utime(ofname, &tp);	/* Update last accessed and modified times */
1153 	if (unlink(ifname))	/* Remove input file */
1154 	    perror(ifname);
1155 	if(!quiet)
1156 		fprintf(stderr, " -- replaced with %s", ofname);
1157 	return;		/* Successful return */
1158     }
1159 
1160     /* Unsuccessful return -- one of the tests failed */
1161     if (unlink(ofname))
1162 	perror(ofname);
1163 }
1164 
1165 void
1166 onintr ( )
1167 {
1168     if (!precious)
1169 	unlink ( ofname );
1170     exit ( 1 );
1171 }
1172 
1173 void
1174 oops ( )	/* wild pointer -- assume bad input */
1175 {
1176     if ( do_decomp )
1177     	fprintf ( stderr, "uncompress: corrupt input\n" );
1178     unlink ( ofname );
1179     exit ( 1 );
1180 }
1181 
1182 cl_block ()		/* table clear for block compress */
1183 {
1184     register long int rat;
1185 
1186     checkpoint = in_count + CHECK_GAP;
1187 #ifdef DEBUG
1188 	if ( debug ) {
1189     		fprintf ( stderr, "count: %ld, ratio: ", in_count );
1190      		prratio ( stderr, in_count, bytes_out );
1191 		fprintf ( stderr, "\n");
1192 	}
1193 #endif /* DEBUG */
1194 
1195     if(in_count > 0x007fffff) {	/* shift will overflow */
1196 	rat = bytes_out >> 8;
1197 	if(rat == 0) {		/* Don't divide by zero */
1198 	    rat = 0x7fffffff;
1199 	} else {
1200 	    rat = in_count / rat;
1201 	}
1202     } else {
1203 	rat = (in_count << 8) / bytes_out;	/* 8 fractional bits */
1204     }
1205     if ( rat > ratio ) {
1206 	ratio = rat;
1207     } else {
1208 	ratio = 0;
1209 #ifdef DEBUG
1210 	if(verbose)
1211 		dump_tab();	/* dump string table */
1212 #endif
1213  	cl_hash ( (count_int) hsize );
1214 	free_ent = FIRST;
1215 	clear_flg = 1;
1216 	output ( (code_int) CLEAR );
1217 #ifdef DEBUG
1218 	if(debug)
1219     		fprintf ( stderr, "clear\n" );
1220 #endif /* DEBUG */
1221     }
1222 }
1223 
1224 cl_hash(hsize)		/* reset code table */
1225 	register count_int hsize;
1226 {
1227 	register count_int *htab_p = htab+hsize;
1228 	register long i;
1229 	register long m1 = -1;
1230 
1231 	i = hsize - 16;
1232  	do {				/* might use Sys V memset(3) here */
1233 		*(htab_p-16) = m1;
1234 		*(htab_p-15) = m1;
1235 		*(htab_p-14) = m1;
1236 		*(htab_p-13) = m1;
1237 		*(htab_p-12) = m1;
1238 		*(htab_p-11) = m1;
1239 		*(htab_p-10) = m1;
1240 		*(htab_p-9) = m1;
1241 		*(htab_p-8) = m1;
1242 		*(htab_p-7) = m1;
1243 		*(htab_p-6) = m1;
1244 		*(htab_p-5) = m1;
1245 		*(htab_p-4) = m1;
1246 		*(htab_p-3) = m1;
1247 		*(htab_p-2) = m1;
1248 		*(htab_p-1) = m1;
1249 		htab_p -= 16;
1250 	} while ((i -= 16) >= 0);
1251     	for ( i += 16; i > 0; i-- )
1252 		*--htab_p = m1;
1253 }
1254 
1255 prratio(stream, num, den)
1256 FILE *stream;
1257 long int num, den;
1258 {
1259 	register int q;			/* Doesn't need to be long */
1260 
1261 	if(num > 214748L) {		/* 2147483647/10000 */
1262 		q = num / (den / 10000L);
1263 	} else {
1264 		q = 10000L * num / den;		/* Long calculations, though */
1265 	}
1266 	if (q < 0) {
1267 		putc('-', stream);
1268 		q = -q;
1269 	}
1270 	fprintf(stream, "%d.%02d%%", q / 100, q % 100);
1271 }
1272 
1273 usage()
1274 {
1275 	(void)fprintf(stderr,
1276 #ifdef DEBUG
1277 	    "compress [-CDVcdfnv] [-b maxbits] [file ...]\n");
1278 #else
1279 	    "compress [-Ccdfnv] [-b maxbits] [file ...]\n");
1280 #endif
1281 	exit(1);
1282 }
1283