1 /* 2 * Copyright (c) 1985, 1986 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods, derived from original work by Spencer Thomas 7 * and Joseph Orost. 8 * 9 * %sccs.include.redist.c% 10 */ 11 12 #ifndef lint 13 char copyright[] = 14 "@(#) Copyright (c) 1985, 1986 The Regents of the University of California.\n\ 15 All rights reserved.\n"; 16 #endif /* not lint */ 17 18 #ifndef lint 19 static char sccsid[] = "@(#)compress.c 5.20 (Berkeley) 03/01/92"; 20 #endif /* not lint */ 21 22 /* 23 * compress.c - File compression ala IEEE Computer, June 1984. 24 * 25 * Authors: Spencer W. Thomas (decvax!utah-cs!thomas) 26 * Jim McKie (decvax!mcvax!jim) 27 * Steve Davies (decvax!vax135!petsd!peora!srd) 28 * Ken Turkowski (decvax!decwrl!turtlevax!ken) 29 * James A. Woods (decvax!ihnp4!ames!jaw) 30 * Joe Orost (decvax!vax135!petsd!joe) 31 */ 32 33 #include <sys/param.h> 34 #include <sys/stat.h> 35 #include <signal.h> 36 #include <utime.h> 37 #include <errno.h> 38 #include <unistd.h> 39 #include <stdio.h> 40 #include <ctype.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 /* 45 * Set USERMEM to the maximum amount of physical user memory available 46 * in bytes. USERMEM is used to determine the maximum BITS that can be used 47 * for compression. 48 * 49 * SACREDMEM is the amount of physical memory saved for others; compress 50 * will hog the rest. 51 */ 52 #ifndef SACREDMEM 53 #define SACREDMEM 0 54 #endif 55 56 #ifndef USERMEM 57 # define USERMEM 450000 /* default user memory */ 58 #endif 59 60 #ifdef pdp11 61 # define BITS 12 /* max bits/code for 16-bit machine */ 62 # define NO_UCHAR /* also if "unsigned char" functions as signed char */ 63 # undef USERMEM 64 #endif /* pdp11 */ /* don't forget to compile with -i */ 65 66 #ifdef USERMEM 67 # if USERMEM >= (433484+SACREDMEM) 68 # define PBITS 16 69 # else 70 # if USERMEM >= (229600+SACREDMEM) 71 # define PBITS 15 72 # else 73 # if USERMEM >= (127536+SACREDMEM) 74 # define PBITS 14 75 # else 76 # if USERMEM >= (73464+SACREDMEM) 77 # define PBITS 13 78 # else 79 # define PBITS 12 80 # endif 81 # endif 82 # endif 83 # endif 84 # undef USERMEM 85 #endif /* USERMEM */ 86 87 #ifdef PBITS /* Preferred BITS for this memory size */ 88 # ifndef BITS 89 # define BITS PBITS 90 # endif BITS 91 #endif /* PBITS */ 92 93 #if BITS == 16 94 # define HSIZE 69001 /* 95% occupancy */ 95 #endif 96 #if BITS == 15 97 # define HSIZE 35023 /* 94% occupancy */ 98 #endif 99 #if BITS == 14 100 # define HSIZE 18013 /* 91% occupancy */ 101 #endif 102 #if BITS == 13 103 # define HSIZE 9001 /* 91% occupancy */ 104 #endif 105 #if BITS <= 12 106 # define HSIZE 5003 /* 80% occupancy */ 107 #endif 108 109 /* 110 * a code_int must be able to hold 2**BITS values of type int, and also -1 111 */ 112 #if BITS > 15 113 typedef long int code_int; 114 #else 115 typedef int code_int; 116 #endif 117 118 #ifdef SIGNED_COMPARE_SLOW 119 typedef unsigned long int count_int; 120 typedef unsigned short int count_short; 121 #else 122 typedef long int count_int; 123 #endif 124 125 #ifdef NO_UCHAR 126 typedef char char_type; 127 #else 128 typedef unsigned char char_type; 129 #endif /* UCHAR */ 130 char_type magic_header[] = { "\037\235" }; /* 1F 9D */ 131 132 /* Defines for third byte of header */ 133 #define BIT_MASK 0x1f 134 #define BLOCK_MASK 0x80 135 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 136 a fourth header byte (for expansion). 137 */ 138 #define INIT_BITS 9 /* initial number of bits/code */ 139 140 int n_bits; /* number of bits/code */ 141 int maxbits = BITS; /* user settable max # bits/code */ 142 code_int maxcode; /* maximum code, given n_bits */ 143 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ 144 #ifdef COMPATIBLE /* But wrong! */ 145 # define MAXCODE(n_bits) (1 << (n_bits) - 1) 146 #else 147 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 148 #endif /* COMPATIBLE */ 149 150 count_int htab [HSIZE]; 151 unsigned short codetab [HSIZE]; 152 153 #define htabof(i) htab[i] 154 #define codetabof(i) codetab[i] 155 code_int hsize = HSIZE; /* for dynamic table sizing */ 156 count_int fsize; 157 158 /* 159 * To save much memory, we overlay the table used by compress() with those 160 * used by decompress(). The tab_prefix table is the same size and type 161 * as the codetab. The tab_suffix table needs 2**BITS characters. We 162 * get this from the beginning of htab. The output stack uses the rest 163 * of htab, and contains characters. There is plenty of room for any 164 * possible stack (stack used to be 8000 characters). 165 */ 166 167 #define tab_prefixof(i) codetabof(i) 168 # define tab_suffixof(i) ((char_type *)(htab))[i] 169 # define de_stack ((char_type *)&tab_suffixof(1<<BITS)) 170 171 code_int free_ent = 0; /* first unused entry */ 172 int exit_stat = 0; /* per-file status */ 173 int perm_stat = 0; /* permanent status */ 174 175 code_int getcode(); 176 177 int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ 178 int zcat_flg = 0; /* Write output on stdout, suppress messages */ 179 int precious = 1; /* Don't unlink output file on interrupt */ 180 int quiet = 1; /* don't tell me about compression */ 181 182 /* 183 * block compression parameters -- after all codes are used up, 184 * and compression rate changes, start over. 185 */ 186 int block_compress = BLOCK_MASK; 187 int clear_flg = 0; 188 long int ratio = 0; 189 #define CHECK_GAP 10000 /* ratio check interval */ 190 count_int checkpoint = CHECK_GAP; 191 /* 192 * the next two codes should not be changed lightly, as they must not 193 * lie within the contiguous general code space. 194 */ 195 #define FIRST 257 /* first free entry */ 196 #define CLEAR 256 /* table clear output code */ 197 198 int force = 0; 199 char ofname [100]; 200 #ifdef DEBUG 201 int debug, verbose; 202 #endif 203 sig_t oldint; 204 int bgnd_flag; 205 206 int do_decomp = 0; 207 208 /*- 209 * Algorithm from "A Technique for High Performance Data Compression", 210 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 211 * 212 * Usage: compress [-dfvc] [-b bits] [file ...] 213 * Inputs: 214 * -d: If given, decompression is done instead. 215 * 216 * -c: Write output on stdout, don't remove original. 217 * 218 * -b: Parameter limits the max number of bits/code. 219 * 220 * -f: Forces output file to be generated, even if one already 221 * exists, and even if no space is saved by compressing. 222 * If -f is not used, the user will be prompted if stdin is 223 * a tty, otherwise, the output file will not be overwritten. 224 * 225 * -v: Write compression statistics 226 * 227 * file ...: Files to be compressed. If none specified, stdin 228 * is used. 229 * Outputs: 230 * file.Z: Compressed form of file with same mode, owner, and utimes 231 * or stdout (if stdin used as input) 232 * 233 * Assumptions: 234 * When filenames are given, replaces with the compressed version 235 * (.Z suffix) only if the file decreases in size. 236 * Algorithm: 237 * Modified Lempel-Ziv method (LZW). Basically finds common 238 * substrings and replaces them with a variable size code. This is 239 * deterministic, and can be done on the fly. Thus, the decompression 240 * procedure needs no input table, but tracks the way the table was built. 241 */ 242 243 main(argc, argv) 244 int argc; 245 char **argv; 246 { 247 extern int optind; 248 extern char *optarg; 249 struct stat statbuf; 250 int ch, overwrite; 251 char **filelist, **fileptr, *cp, tempname[MAXPATHLEN]; 252 void onintr(), oops(); 253 254 /* This bg check only works for sh. */ 255 if ((oldint = signal(SIGINT, SIG_IGN)) != SIG_IGN) { 256 (void)signal(SIGINT, onintr); 257 (void)signal(SIGSEGV, oops); /* XXX */ 258 } 259 bgnd_flag = oldint != SIG_DFL; 260 261 #ifdef COMPATIBLE 262 nomagic = 1; /* Original didn't have a magic number */ 263 #endif 264 265 if (cp = rindex(argv[0], '/')) 266 ++cp; 267 else 268 cp = argv[0]; 269 if (strcmp(cp, "uncompress") == 0) 270 do_decomp = 1; 271 else if(strcmp(cp, "zcat") == 0) { 272 do_decomp = 1; 273 zcat_flg = 1; 274 } 275 276 /* 277 * -b maxbits => maxbits. 278 * -C => generate output compatible with compress 2.0. 279 * -c => cat all output to stdout 280 * -D => debug 281 * -d => do_decomp 282 * -f => force overwrite of output file 283 * -n => no header: useful to uncompress old files 284 * -V => print Version; debug verbose 285 * -v => unquiet 286 */ 287 288 overwrite = 0; 289 #ifdef DEBUG 290 while ((ch = getopt(argc, argv, "b:CcDdfnVv")) != EOF) 291 #else 292 while ((ch = getopt(argc, argv, "b:Ccdfnv")) != EOF) 293 #endif 294 switch(ch) { 295 case 'b': 296 maxbits = atoi(optarg); 297 break; 298 case 'C': 299 block_compress = 0; 300 break; 301 case 'c': 302 zcat_flg = 1; 303 break; 304 #ifdef DEBUG 305 case 'D': 306 debug = 1; 307 break; 308 #endif 309 case 'd': 310 do_decomp = 1; 311 break; 312 case 'f': 313 overwrite = 1; 314 force = 1; 315 break; 316 case 'n': 317 nomagic = 1; 318 break; 319 case 'q': 320 quiet = 1; 321 break; 322 #ifdef DEBUG 323 case 'V': 324 verbose = 1; 325 break; 326 #endif 327 case 'v': 328 quiet = 0; 329 break; 330 case '?': 331 default: 332 usage(); 333 } 334 argc -= optind; 335 argv += optind; 336 337 if (maxbits < INIT_BITS) 338 maxbits = INIT_BITS; 339 if (maxbits > BITS) 340 maxbits = BITS; 341 maxmaxcode = 1 << maxbits; 342 343 /* Build useless input file list. */ 344 filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); 345 while (*argv) 346 *fileptr++ = *argv++; 347 *fileptr = NULL; 348 349 if (*filelist != NULL) { 350 for (fileptr = filelist; *fileptr; fileptr++) { 351 exit_stat = 0; 352 if (do_decomp) { /* DECOMPRESSION */ 353 /* Check for .Z suffix */ 354 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { 355 /* No .Z: tack one on */ 356 strcpy(tempname, *fileptr); 357 strcat(tempname, ".Z"); 358 *fileptr = tempname; 359 } 360 /* Open input file */ 361 if ((freopen(*fileptr, "r", stdin)) == NULL) { 362 perror(*fileptr); 363 perm_stat = 1; 364 continue; 365 } 366 /* Check the magic number */ 367 if (nomagic == 0) { 368 if ((getchar() != (magic_header[0] & 0xFF)) 369 || (getchar() != (magic_header[1] & 0xFF))) { 370 fprintf(stderr, "%s: not in compressed format\n", 371 *fileptr); 372 continue; 373 } 374 maxbits = getchar(); /* set -b from file */ 375 block_compress = maxbits & BLOCK_MASK; 376 maxbits &= BIT_MASK; 377 maxmaxcode = 1 << maxbits; 378 if(maxbits > BITS) { 379 fprintf(stderr, 380 "%s: compressed with %d bits, can only handle %d bits\n", 381 *fileptr, maxbits, BITS); 382 continue; 383 } 384 } 385 /* Generate output filename */ 386 strcpy(ofname, *fileptr); 387 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ 388 } else { /* COMPRESSION */ 389 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { 390 fprintf(stderr, "%s: already has .Z suffix -- no change\n", 391 *fileptr); 392 continue; 393 } 394 /* Open input file */ 395 if ((freopen(*fileptr, "r", stdin)) == NULL) { 396 perror(*fileptr); 397 perm_stat = 1; 398 continue; 399 } 400 stat ( *fileptr, &statbuf ); 401 fsize = (long) statbuf.st_size; 402 /* 403 * tune hash table size for small files -- ad hoc, 404 * but the sizes match earlier #defines, which 405 * serve as upper bounds on the number of output codes. 406 */ 407 hsize = HSIZE; 408 if ( fsize < (1 << 12) ) 409 hsize = MIN ( 5003, HSIZE ); 410 else if ( fsize < (1 << 13) ) 411 hsize = MIN ( 9001, HSIZE ); 412 else if ( fsize < (1 << 14) ) 413 hsize = MIN ( 18013, HSIZE ); 414 else if ( fsize < (1 << 15) ) 415 hsize = MIN ( 35023, HSIZE ); 416 else if ( fsize < 47000 ) 417 hsize = MIN ( 50021, HSIZE ); 418 419 /* Generate output filename */ 420 strcpy(ofname, *fileptr); 421 strcat(ofname, ".Z"); 422 } 423 /* Check for overwrite of existing file */ 424 if (overwrite == 0 && zcat_flg == 0) { 425 if (stat(ofname, &statbuf) == 0) { 426 char response[2]; 427 response[0] = 'n'; 428 fprintf(stderr, "%s already exists;", ofname); 429 if (bgnd_flag == 0 && isatty(2)) { 430 fprintf(stderr, " do you wish to overwrite %s (y or n)? ", 431 ofname); 432 fflush(stderr); 433 read(2, response, 2); 434 while (response[1] != '\n') { 435 if (read(2, response+1, 1) < 0) { /* Ack! */ 436 perror("stderr"); break; 437 } 438 } 439 } 440 if (response[0] != 'y') { 441 fprintf(stderr, "\tnot overwritten\n"); 442 continue; 443 } 444 } 445 } 446 if(zcat_flg == 0) { /* Open output file */ 447 if (freopen(ofname, "w", stdout) == NULL) { 448 perror(ofname); 449 perm_stat = 1; 450 continue; 451 } 452 precious = 0; 453 if(!quiet) 454 fprintf(stderr, "%s: ", *fileptr); 455 } 456 457 /* Actually do the compression/decompression */ 458 if (do_decomp == 0) compress(); 459 #ifndef DEBUG 460 else decompress(); 461 #else 462 else if (debug == 0) decompress(); 463 else printcodes(); 464 if (verbose) dump_tab(); 465 #endif /* DEBUG */ 466 if(zcat_flg == 0) { 467 copystat(*fileptr, ofname); /* Copy stats */ 468 precious = 1; 469 if((exit_stat == 1) || (!quiet)) 470 putc('\n', stderr); 471 } 472 } 473 } else { /* Standard input */ 474 if (do_decomp == 0) { 475 compress(); 476 #ifdef DEBUG 477 if(verbose) dump_tab(); 478 #endif /* DEBUG */ 479 if(!quiet) 480 putc('\n', stderr); 481 } else { 482 /* Check the magic number */ 483 if (nomagic == 0) { 484 if ((getchar()!=(magic_header[0] & 0xFF)) 485 || (getchar()!=(magic_header[1] & 0xFF))) { 486 fprintf(stderr, "stdin: not in compressed format\n"); 487 exit(1); 488 } 489 maxbits = getchar(); /* set -b from file */ 490 block_compress = maxbits & BLOCK_MASK; 491 maxbits &= BIT_MASK; 492 maxmaxcode = 1 << maxbits; 493 fsize = 100000; /* assume stdin large for USERMEM */ 494 if(maxbits > BITS) { 495 fprintf(stderr, 496 "stdin: compressed with %d bits, can only handle %d bits\n", 497 maxbits, BITS); 498 exit(1); 499 } 500 } 501 #ifndef DEBUG 502 decompress(); 503 #else 504 if (debug == 0) decompress(); 505 else printcodes(); 506 if (verbose) dump_tab(); 507 #endif /* DEBUG */ 508 } 509 } 510 exit(perm_stat ? perm_stat : exit_stat); 511 } 512 513 static int offset; 514 long int in_count = 1; /* length of input */ 515 long int bytes_out; /* length of compressed output */ 516 long int out_count = 0; /* # of codes output (for debugging) */ 517 518 /* 519 * compress stdin to stdout 520 * 521 * Algorithm: use open addressing double hashing (no chaining) on the 522 * prefix code / next character combination. We do a variant of Knuth's 523 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 524 * secondary probe. Here, the modular division first probe is gives way 525 * to a faster exclusive-or manipulation. Also do block compression with 526 * an adaptive reset, whereby the code table is cleared when the compression 527 * ratio decreases, but after the table fills. The variable-length output 528 * codes are re-sized at this point, and a special CLEAR code is generated 529 * for the decompressor. Late addition: construct the table according to 530 * file size for noticeable speed improvement on small files. Please direct 531 * questions about this implementation to ames!jaw. 532 */ 533 534 compress() 535 { 536 register long fcode; 537 register code_int i = 0; 538 register int c; 539 register code_int ent; 540 register int disp; 541 register code_int hsize_reg; 542 register int hshift; 543 544 #ifndef COMPATIBLE 545 if (nomagic == 0) { 546 putchar(magic_header[0]); 547 putchar(magic_header[1]); 548 putchar((char)(maxbits | block_compress)); 549 if(ferror(stdout)) 550 writeerr(); 551 } 552 #endif /* COMPATIBLE */ 553 554 offset = 0; 555 bytes_out = 3; /* includes 3-byte header mojo */ 556 out_count = 0; 557 clear_flg = 0; 558 ratio = 0; 559 in_count = 1; 560 checkpoint = CHECK_GAP; 561 maxcode = MAXCODE(n_bits = INIT_BITS); 562 free_ent = ((block_compress) ? FIRST : 256 ); 563 564 ent = getchar (); 565 566 hshift = 0; 567 for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) 568 hshift++; 569 hshift = 8 - hshift; /* set hash code range bound */ 570 571 hsize_reg = hsize; 572 cl_hash( (count_int) hsize_reg); /* clear hash table */ 573 574 #ifdef SIGNED_COMPARE_SLOW 575 while ( (c = getchar()) != (unsigned) EOF ) { 576 #else 577 while ( (c = getchar()) != EOF ) { 578 #endif 579 in_count++; 580 fcode = (long) (((long) c << maxbits) + ent); 581 i = ((c << hshift) ^ ent); /* xor hashing */ 582 583 if ( htabof (i) == fcode ) { 584 ent = codetabof (i); 585 continue; 586 } else if ( (long)htabof (i) < 0 ) /* empty slot */ 587 goto nomatch; 588 disp = hsize_reg - i; /* secondary hash (after G. Knott) */ 589 if ( i == 0 ) 590 disp = 1; 591 probe: 592 if ( (i -= disp) < 0 ) 593 i += hsize_reg; 594 595 if ( htabof (i) == fcode ) { 596 ent = codetabof (i); 597 continue; 598 } 599 if ( (long)htabof (i) > 0 ) 600 goto probe; 601 nomatch: 602 output ( (code_int) ent ); 603 out_count++; 604 ent = c; 605 #ifdef SIGNED_COMPARE_SLOW 606 if ( (unsigned) free_ent < (unsigned) maxmaxcode) { 607 #else 608 if ( free_ent < maxmaxcode ) { 609 #endif 610 codetabof (i) = free_ent++; /* code -> hashtable */ 611 htabof (i) = fcode; 612 } 613 else if ( (count_int)in_count >= checkpoint && block_compress ) 614 cl_block (); 615 } 616 /* 617 * Put out the final code. 618 */ 619 output( (code_int)ent ); 620 out_count++; 621 output( (code_int)-1 ); 622 623 /* 624 * Print out stats on stderr 625 */ 626 if(zcat_flg == 0 && !quiet) { 627 #ifdef DEBUG 628 fprintf( stderr, 629 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", 630 in_count, out_count, bytes_out ); 631 prratio( stderr, in_count, bytes_out ); 632 fprintf( stderr, "\n"); 633 fprintf( stderr, "\tCompression as in compact: " ); 634 prratio( stderr, in_count-bytes_out, in_count ); 635 fprintf( stderr, "\n"); 636 fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", 637 free_ent - 1, n_bits ); 638 #else /* !DEBUG */ 639 fprintf( stderr, "Compression: " ); 640 prratio( stderr, in_count-bytes_out, in_count ); 641 #endif /* DEBUG */ 642 } 643 if(bytes_out > in_count) /* exit(2) if no savings */ 644 exit_stat = 2; 645 return; 646 } 647 648 /*- 649 * Output the given code. 650 * Inputs: 651 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 652 * that n_bits =< (long)wordsize - 1. 653 * Outputs: 654 * Outputs code to the file. 655 * Assumptions: 656 * Chars are 8 bits long. 657 * Algorithm: 658 * Maintain a BITS character long buffer (so that 8 codes will 659 * fit in it exactly). Use the VAX insv instruction to insert each 660 * code in turn. When the buffer fills up empty it and start over. 661 */ 662 663 static char buf[BITS]; 664 665 #ifndef vax 666 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 667 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 668 #endif /* vax */ 669 670 output( code ) 671 code_int code; 672 { 673 #ifdef DEBUG 674 static int col = 0; 675 #endif /* DEBUG */ 676 677 /* 678 * On the VAX, it is important to have the register declarations 679 * in exactly the order given, or the asm will break. 680 */ 681 register int r_off = offset, bits= n_bits; 682 register char * bp = buf; 683 684 #ifdef DEBUG 685 if ( verbose ) 686 fprintf( stderr, "%5d%c", code, 687 (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 688 #endif /* DEBUG */ 689 if ( code >= 0 ) { 690 #if defined(vax) && !defined(__GNUC__) 691 /* 692 * VAX and PCC DEPENDENT!! Implementation on other machines is 693 * below. 694 * 695 * Translation: Insert BITS bits from the argument starting at 696 * offset bits from the beginning of buf. 697 */ 698 0; /* Work around for pcc -O bug with asm and if stmt */ 699 asm( "insv 4(ap),r11,r10,(r9)" ); 700 #else 701 /* 702 * byte/bit numbering on the VAX is simulated by the following code 703 */ 704 /* 705 * Get to the first byte. 706 */ 707 bp += (r_off >> 3); 708 r_off &= 7; 709 /* 710 * Since code is always >= 8 bits, only need to mask the first 711 * hunk on the left. 712 */ 713 *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; 714 bp++; 715 bits -= (8 - r_off); 716 code >>= 8 - r_off; 717 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 718 if ( bits >= 8 ) { 719 *bp++ = code; 720 code >>= 8; 721 bits -= 8; 722 } 723 /* Last bits. */ 724 if(bits) 725 *bp = code; 726 #endif /* vax */ 727 offset += n_bits; 728 if ( offset == (n_bits << 3) ) { 729 bp = buf; 730 bits = n_bits; 731 bytes_out += bits; 732 do { 733 putchar(*bp++); 734 if (ferror(stdout)) 735 writeerr(); 736 } while(--bits); 737 offset = 0; 738 } 739 740 /* 741 * If the next entry is going to be too big for the code size, 742 * then increase it, if possible. 743 */ 744 if ( free_ent > maxcode || (clear_flg > 0)) 745 { 746 /* 747 * Write the whole buffer, because the input side won't 748 * discover the size increase until after it has read it. 749 */ 750 if ( offset > 0 ) { 751 if( fwrite( buf, 1, n_bits, stdout ) != n_bits) 752 writeerr(); 753 bytes_out += n_bits; 754 } 755 offset = 0; 756 757 if ( clear_flg ) { 758 maxcode = MAXCODE (n_bits = INIT_BITS); 759 clear_flg = 0; 760 } 761 else { 762 n_bits++; 763 if ( n_bits == maxbits ) 764 maxcode = maxmaxcode; 765 else 766 maxcode = MAXCODE(n_bits); 767 } 768 #ifdef DEBUG 769 if ( debug ) { 770 fprintf( stderr, "\nChange to %d bits\n", n_bits ); 771 col = 0; 772 } 773 #endif /* DEBUG */ 774 } 775 } else { 776 /* 777 * At EOF, write the rest of the buffer. 778 */ 779 if ( offset > 0 ) { 780 offset = (offset + 7) / 8; 781 if( fwrite( buf, 1, offset, stdout ) != offset ) 782 writeerr(); 783 bytes_out += offset; 784 } 785 offset = 0; 786 (void)fflush( stdout ); 787 if( ferror( stdout ) || (fclose( stdout ) == EOF) ) 788 writeerr(); 789 #ifdef DEBUG 790 if ( verbose ) 791 fprintf( stderr, "\n" ); 792 #endif 793 } 794 } 795 796 /* 797 * Decompress stdin to stdout. This routine adapts to the codes in the 798 * file building the "string" table on-the-fly; requiring no table to 799 * be stored in the compressed file. The tables used herein are shared 800 * with those of the compress() routine. See the definitions above. 801 */ 802 803 decompress() { 804 register char_type *stackp; 805 register int finchar; 806 register code_int code, oldcode, incode; 807 int n, nwritten, offset; /* Variables for buffered write */ 808 char buff[BUFSIZ]; /* Buffer for buffered write */ 809 810 811 /* 812 * As above, initialize the first 256 entries in the table. 813 */ 814 maxcode = MAXCODE(n_bits = INIT_BITS); 815 for ( code = 255; code >= 0; code-- ) { 816 tab_prefixof(code) = 0; 817 tab_suffixof(code) = (char_type)code; 818 } 819 free_ent = ((block_compress) ? FIRST : 256 ); 820 821 finchar = oldcode = getcode(); 822 if(oldcode == -1) /* EOF already? */ 823 return; /* Get out of here */ 824 825 /* first code must be 8 bits = char */ 826 n=0; 827 buff[n++] = (char)finchar; 828 829 stackp = de_stack; 830 831 while ( (code = getcode()) > -1 ) { 832 833 if ( (code == CLEAR) && block_compress ) { 834 for ( code = 255; code >= 0; code-- ) 835 tab_prefixof(code) = 0; 836 clear_flg = 1; 837 free_ent = FIRST - 1; 838 if ( (code = getcode ()) == -1 ) /* O, untimely death! */ 839 break; 840 } 841 incode = code; 842 /* 843 * Special case for KwKwK string. 844 */ 845 if ( code >= free_ent ) { 846 *stackp++ = finchar; 847 code = oldcode; 848 } 849 850 /* 851 * Generate output characters in reverse order 852 */ 853 #ifdef SIGNED_COMPARE_SLOW 854 while ( ((unsigned long)code) >= ((unsigned long)256) ) { 855 #else 856 while ( code >= 256 ) { 857 #endif 858 *stackp++ = tab_suffixof(code); 859 code = tab_prefixof(code); 860 } 861 *stackp++ = finchar = tab_suffixof(code); 862 863 /* 864 * And put them out in forward order 865 */ 866 do { 867 /* 868 * About 60% of the time is spent in the putchar() call 869 * that appeared here. It was originally 870 * putchar ( *--stackp ); 871 * If we buffer the writes ourselves, we can go faster (about 872 * 30%). 873 * 874 * At this point, the next line is the next *big* time 875 * sink in the code. It takes up about 10% of the time. 876 */ 877 buff[n++] = *--stackp; 878 if (n == BUFSIZ) { 879 offset = 0; 880 do { 881 nwritten = write(fileno(stdout), &buff[offset], n); 882 if (nwritten < 0) 883 writeerr(); 884 offset += nwritten; 885 } while ((n -= nwritten) > 0); 886 } 887 } while ( stackp > de_stack ); 888 889 /* 890 * Generate the new entry. 891 */ 892 if ( (code=free_ent) < maxmaxcode ) { 893 tab_prefixof(code) = (unsigned short)oldcode; 894 tab_suffixof(code) = finchar; 895 free_ent = code+1; 896 } 897 /* 898 * Remember previous code. 899 */ 900 oldcode = incode; 901 } 902 /* 903 * Flush the stuff remaining in our buffer... 904 */ 905 offset = 0; 906 while (n > 0) { 907 nwritten = write(fileno(stdout), &buff[offset], n); 908 if (nwritten < 0) 909 writeerr(); 910 offset += nwritten; 911 n -= nwritten; 912 } 913 if ((zcat_flg == 0) && (close(fileno(stdout)) == -1)) 914 writeerr(); 915 } 916 917 /*- 918 * Read one code from the standard input. If EOF, return -1. 919 * Inputs: 920 * stdin 921 * Outputs: 922 * code or -1 is returned. 923 */ 924 code_int 925 getcode() { 926 /* 927 * On the VAX, it is important to have the register declarations 928 * in exactly the order given, or the asm will break. 929 */ 930 register code_int code; 931 static int offset = 0, size = 0; 932 static char_type buf[BITS]; 933 register int r_off, bits; 934 register char_type *bp = buf; 935 936 if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { 937 /* 938 * If the next entry will be too big for the current code 939 * size, then we must increase the size. This implies reading 940 * a new buffer full, too. 941 */ 942 if ( free_ent > maxcode ) { 943 n_bits++; 944 if ( n_bits == maxbits ) 945 maxcode = maxmaxcode; /* won't get any bigger now */ 946 else 947 maxcode = MAXCODE(n_bits); 948 } 949 if ( clear_flg > 0) { 950 maxcode = MAXCODE (n_bits = INIT_BITS); 951 clear_flg = 0; 952 } 953 size = fread( buf, 1, n_bits, stdin ); 954 if ( size <= 0 ) 955 return -1; /* end of file */ 956 offset = 0; 957 /* Round size down to integral number of codes */ 958 size = (size << 3) - (n_bits - 1); 959 } 960 r_off = offset; 961 bits = n_bits; 962 #ifdef vax 963 asm( "extzv r10,r9,(r8),r11" ); 964 #else /* not a vax */ 965 /* 966 * Get to the first byte. 967 */ 968 bp += (r_off >> 3); 969 r_off &= 7; 970 /* Get first part (low order bits) */ 971 #ifdef NO_UCHAR 972 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; 973 #else 974 code = (*bp++ >> r_off); 975 #endif /* NO_UCHAR */ 976 bits -= (8 - r_off); 977 r_off = 8 - r_off; /* now, offset into code word */ 978 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 979 if ( bits >= 8 ) { 980 #ifdef NO_UCHAR 981 code |= (*bp++ & 0xff) << r_off; 982 #else 983 code |= *bp++ << r_off; 984 #endif /* NO_UCHAR */ 985 r_off += 8; 986 bits -= 8; 987 } 988 /* high order bits. */ 989 code |= (*bp & rmask[bits]) << r_off; 990 #endif /* vax */ 991 offset += n_bits; 992 993 return code; 994 } 995 996 #ifdef DEBUG 997 printcodes() 998 { 999 /* 1000 * Just print out codes from input file. For debugging. 1001 */ 1002 code_int code; 1003 int col = 0, bits; 1004 1005 bits = n_bits = INIT_BITS; 1006 maxcode = MAXCODE(n_bits); 1007 free_ent = ((block_compress) ? FIRST : 256 ); 1008 while ( ( code = getcode() ) >= 0 ) { 1009 if ( (code == CLEAR) && block_compress ) { 1010 free_ent = FIRST - 1; 1011 clear_flg = 1; 1012 } 1013 else if ( free_ent < maxmaxcode ) 1014 free_ent++; 1015 if ( bits != n_bits ) { 1016 fprintf(stderr, "\nChange to %d bits\n", n_bits ); 1017 bits = n_bits; 1018 col = 0; 1019 } 1020 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 1021 } 1022 putc( '\n', stderr ); 1023 exit( 0 ); 1024 } 1025 1026 code_int sorttab[1<<BITS]; /* sorted pointers into htab */ 1027 1028 dump_tab() /* dump string table */ 1029 { 1030 register int i, first; 1031 register ent; 1032 #define STACK_SIZE 15000 1033 int stack_top = STACK_SIZE; 1034 register c; 1035 1036 if(do_decomp == 0) { /* compressing */ 1037 register int flag = 1; 1038 1039 for(i=0; i<hsize; i++) { /* build sort pointers */ 1040 if((long)htabof(i) >= 0) { 1041 sorttab[codetabof(i)] = i; 1042 } 1043 } 1044 first = block_compress ? FIRST : 256; 1045 for(i = first; i < free_ent; i++) { 1046 fprintf(stderr, "%5d: \"", i); 1047 de_stack[--stack_top] = '\n'; 1048 de_stack[--stack_top] = '"'; 1049 stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, 1050 stack_top); 1051 for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1); 1052 ent > 256; 1053 ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) { 1054 stack_top = in_stack(htabof(sorttab[ent]) >> maxbits, 1055 stack_top); 1056 } 1057 stack_top = in_stack(ent, stack_top); 1058 fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); 1059 stack_top = STACK_SIZE; 1060 } 1061 } else if(!debug) { /* decompressing */ 1062 1063 for ( i = 0; i < free_ent; i++ ) { 1064 ent = i; 1065 c = tab_suffixof(ent); 1066 if ( isascii(c) && isprint(c) ) 1067 fprintf( stderr, "%5d: %5d/'%c' \"", 1068 ent, tab_prefixof(ent), c ); 1069 else 1070 fprintf( stderr, "%5d: %5d/\\%03o \"", 1071 ent, tab_prefixof(ent), c ); 1072 de_stack[--stack_top] = '\n'; 1073 de_stack[--stack_top] = '"'; 1074 for ( ; ent != NULL; 1075 ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { 1076 stack_top = in_stack(tab_suffixof(ent), stack_top); 1077 } 1078 fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); 1079 stack_top = STACK_SIZE; 1080 } 1081 } 1082 } 1083 1084 int 1085 in_stack(c, stack_top) 1086 register c, stack_top; 1087 { 1088 if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { 1089 de_stack[--stack_top] = c; 1090 } else { 1091 switch( c ) { 1092 case '\n': de_stack[--stack_top] = 'n'; break; 1093 case '\t': de_stack[--stack_top] = 't'; break; 1094 case '\b': de_stack[--stack_top] = 'b'; break; 1095 case '\f': de_stack[--stack_top] = 'f'; break; 1096 case '\r': de_stack[--stack_top] = 'r'; break; 1097 case '\\': de_stack[--stack_top] = '\\'; break; 1098 default: 1099 de_stack[--stack_top] = '0' + c % 8; 1100 de_stack[--stack_top] = '0' + (c / 8) % 8; 1101 de_stack[--stack_top] = '0' + c / 64; 1102 break; 1103 } 1104 de_stack[--stack_top] = '\\'; 1105 } 1106 return stack_top; 1107 } 1108 #endif /* DEBUG */ 1109 1110 writeerr() 1111 { 1112 (void)fprintf(stderr, "compress: %s: %s\n", 1113 ofname[0] ? ofname : "stdout", strerror(errno)); 1114 (void)unlink(ofname); 1115 exit(1); 1116 } 1117 1118 copystat(ifname, ofname) 1119 char *ifname, *ofname; 1120 { 1121 struct stat statbuf; 1122 int mode; 1123 struct utimbuf tp; 1124 1125 fclose(stdout); 1126 if (stat(ifname, &statbuf)) { /* Get stat on input file */ 1127 perror(ifname); 1128 return; 1129 } 1130 if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { 1131 if(quiet) 1132 fprintf(stderr, "%s: ", ifname); 1133 fprintf(stderr, " -- not a regular file: unchanged"); 1134 exit_stat = 1; 1135 perm_stat = 1; 1136 } else if (statbuf.st_nlink > 1) { 1137 if(quiet) 1138 fprintf(stderr, "%s: ", ifname); 1139 fprintf(stderr, " -- has %d other links: unchanged", 1140 statbuf.st_nlink - 1); 1141 exit_stat = 1; 1142 perm_stat = 1; 1143 } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ 1144 if(!quiet) 1145 fprintf(stderr, " -- file unchanged"); 1146 } else { /* ***** Successful Compression ***** */ 1147 exit_stat = 0; 1148 mode = statbuf.st_mode & 07777; 1149 if (chmod(ofname, mode)) /* Copy modes */ 1150 perror(ofname); 1151 chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ 1152 tp.actime = statbuf.st_atime; 1153 tp.modtime = statbuf.st_mtime; 1154 utime(ofname, &tp); /* Update last accessed and modified times */ 1155 if (unlink(ifname)) /* Remove input file */ 1156 perror(ifname); 1157 if(!quiet) 1158 fprintf(stderr, " -- replaced with %s", ofname); 1159 return; /* Successful return */ 1160 } 1161 1162 /* Unsuccessful return -- one of the tests failed */ 1163 if (unlink(ofname)) 1164 perror(ofname); 1165 } 1166 1167 void 1168 onintr ( ) 1169 { 1170 if (!precious) 1171 unlink ( ofname ); 1172 exit ( 1 ); 1173 } 1174 1175 void 1176 oops ( ) /* wild pointer -- assume bad input */ 1177 { 1178 if ( do_decomp ) 1179 fprintf ( stderr, "uncompress: corrupt input\n" ); 1180 unlink ( ofname ); 1181 exit ( 1 ); 1182 } 1183 1184 cl_block () /* table clear for block compress */ 1185 { 1186 register long int rat; 1187 1188 checkpoint = in_count + CHECK_GAP; 1189 #ifdef DEBUG 1190 if ( debug ) { 1191 fprintf ( stderr, "count: %ld, ratio: ", in_count ); 1192 prratio ( stderr, in_count, bytes_out ); 1193 fprintf ( stderr, "\n"); 1194 } 1195 #endif /* DEBUG */ 1196 1197 if(in_count > 0x007fffff) { /* shift will overflow */ 1198 rat = bytes_out >> 8; 1199 if(rat == 0) { /* Don't divide by zero */ 1200 rat = 0x7fffffff; 1201 } else { 1202 rat = in_count / rat; 1203 } 1204 } else { 1205 rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ 1206 } 1207 if ( rat > ratio ) { 1208 ratio = rat; 1209 } else { 1210 ratio = 0; 1211 #ifdef DEBUG 1212 if(verbose) 1213 dump_tab(); /* dump string table */ 1214 #endif 1215 cl_hash ( (count_int) hsize ); 1216 free_ent = FIRST; 1217 clear_flg = 1; 1218 output ( (code_int) CLEAR ); 1219 #ifdef DEBUG 1220 if(debug) 1221 fprintf ( stderr, "clear\n" ); 1222 #endif /* DEBUG */ 1223 } 1224 } 1225 1226 cl_hash(hsize) /* reset code table */ 1227 register count_int hsize; 1228 { 1229 register count_int *htab_p = htab+hsize; 1230 register long i; 1231 register long m1 = -1; 1232 1233 i = hsize - 16; 1234 do { /* might use Sys V memset(3) here */ 1235 *(htab_p-16) = m1; 1236 *(htab_p-15) = m1; 1237 *(htab_p-14) = m1; 1238 *(htab_p-13) = m1; 1239 *(htab_p-12) = m1; 1240 *(htab_p-11) = m1; 1241 *(htab_p-10) = m1; 1242 *(htab_p-9) = m1; 1243 *(htab_p-8) = m1; 1244 *(htab_p-7) = m1; 1245 *(htab_p-6) = m1; 1246 *(htab_p-5) = m1; 1247 *(htab_p-4) = m1; 1248 *(htab_p-3) = m1; 1249 *(htab_p-2) = m1; 1250 *(htab_p-1) = m1; 1251 htab_p -= 16; 1252 } while ((i -= 16) >= 0); 1253 for ( i += 16; i > 0; i-- ) 1254 *--htab_p = m1; 1255 } 1256 1257 prratio(stream, num, den) 1258 FILE *stream; 1259 long int num, den; 1260 { 1261 register int q; /* Doesn't need to be long */ 1262 1263 if(num > 214748L) { /* 2147483647/10000 */ 1264 q = num / (den / 10000L); 1265 } else { 1266 q = 10000L * num / den; /* Long calculations, though */ 1267 } 1268 if (q < 0) { 1269 putc('-', stream); 1270 q = -q; 1271 } 1272 fprintf(stream, "%d.%02d%%", q / 100, q % 100); 1273 } 1274 1275 usage() 1276 { 1277 (void)fprintf(stderr, 1278 #ifdef DEBUG 1279 "compress [-CDVcdfnv] [-b maxbits] [file ...]\n"); 1280 #else 1281 "compress [-Ccdfnv] [-b maxbits] [file ...]\n"); 1282 #endif 1283 exit(1); 1284 } 1285