1 /* 2 * Copyright (c) 1985, 1986 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * James A. Woods, derived from original work by Spencer Thomas 7 * and Joseph Orost. 8 * 9 * %sccs.include.redist.c% 10 */ 11 12 #ifndef lint 13 char copyright[] = 14 "@(#) Copyright (c) 1985, 1986 The Regents of the University of California.\n\ 15 All rights reserved.\n"; 16 #endif /* not lint */ 17 18 #ifndef lint 19 static char sccsid[] = "@(#)compress.c 5.19 (Berkeley) 03/18/91"; 20 #endif /* not lint */ 21 22 /* 23 * compress.c - File compression ala IEEE Computer, June 1984. 24 * 25 * Authors: Spencer W. Thomas (decvax!utah-cs!thomas) 26 * Jim McKie (decvax!mcvax!jim) 27 * Steve Davies (decvax!vax135!petsd!peora!srd) 28 * Ken Turkowski (decvax!decwrl!turtlevax!ken) 29 * James A. Woods (decvax!ihnp4!ames!jaw) 30 * Joe Orost (decvax!vax135!petsd!joe) 31 */ 32 33 #include <sys/param.h> 34 #include <sys/stat.h> 35 #include <signal.h> 36 #include <utime.h> 37 #include <errno.h> 38 #include <unistd.h> 39 #include <stdio.h> 40 #include <ctype.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 /* 45 * Set USERMEM to the maximum amount of physical user memory available 46 * in bytes. USERMEM is used to determine the maximum BITS that can be used 47 * for compression. 48 * 49 * SACREDMEM is the amount of physical memory saved for others; compress 50 * will hog the rest. 51 */ 52 #ifndef SACREDMEM 53 #define SACREDMEM 0 54 #endif 55 56 #ifndef USERMEM 57 # define USERMEM 450000 /* default user memory */ 58 #endif 59 60 #ifdef pdp11 61 # define BITS 12 /* max bits/code for 16-bit machine */ 62 # define NO_UCHAR /* also if "unsigned char" functions as signed char */ 63 # undef USERMEM 64 #endif /* pdp11 */ /* don't forget to compile with -i */ 65 66 #ifdef USERMEM 67 # if USERMEM >= (433484+SACREDMEM) 68 # define PBITS 16 69 # else 70 # if USERMEM >= (229600+SACREDMEM) 71 # define PBITS 15 72 # else 73 # if USERMEM >= (127536+SACREDMEM) 74 # define PBITS 14 75 # else 76 # if USERMEM >= (73464+SACREDMEM) 77 # define PBITS 13 78 # else 79 # define PBITS 12 80 # endif 81 # endif 82 # endif 83 # endif 84 # undef USERMEM 85 #endif /* USERMEM */ 86 87 #ifdef PBITS /* Preferred BITS for this memory size */ 88 # ifndef BITS 89 # define BITS PBITS 90 # endif BITS 91 #endif /* PBITS */ 92 93 #if BITS == 16 94 # define HSIZE 69001 /* 95% occupancy */ 95 #endif 96 #if BITS == 15 97 # define HSIZE 35023 /* 94% occupancy */ 98 #endif 99 #if BITS == 14 100 # define HSIZE 18013 /* 91% occupancy */ 101 #endif 102 #if BITS == 13 103 # define HSIZE 9001 /* 91% occupancy */ 104 #endif 105 #if BITS <= 12 106 # define HSIZE 5003 /* 80% occupancy */ 107 #endif 108 109 /* 110 * a code_int must be able to hold 2**BITS values of type int, and also -1 111 */ 112 #if BITS > 15 113 typedef long int code_int; 114 #else 115 typedef int code_int; 116 #endif 117 118 #ifdef SIGNED_COMPARE_SLOW 119 typedef unsigned long int count_int; 120 typedef unsigned short int count_short; 121 #else 122 typedef long int count_int; 123 #endif 124 125 #ifdef NO_UCHAR 126 typedef char char_type; 127 #else 128 typedef unsigned char char_type; 129 #endif /* UCHAR */ 130 char_type magic_header[] = { "\037\235" }; /* 1F 9D */ 131 132 /* Defines for third byte of header */ 133 #define BIT_MASK 0x1f 134 #define BLOCK_MASK 0x80 135 /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is 136 a fourth header byte (for expansion). 137 */ 138 #define INIT_BITS 9 /* initial number of bits/code */ 139 140 int n_bits; /* number of bits/code */ 141 int maxbits = BITS; /* user settable max # bits/code */ 142 code_int maxcode; /* maximum code, given n_bits */ 143 code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ 144 #ifdef COMPATIBLE /* But wrong! */ 145 # define MAXCODE(n_bits) (1 << (n_bits) - 1) 146 #else 147 # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) 148 #endif /* COMPATIBLE */ 149 150 count_int htab [HSIZE]; 151 unsigned short codetab [HSIZE]; 152 153 #define htabof(i) htab[i] 154 #define codetabof(i) codetab[i] 155 code_int hsize = HSIZE; /* for dynamic table sizing */ 156 count_int fsize; 157 158 /* 159 * To save much memory, we overlay the table used by compress() with those 160 * used by decompress(). The tab_prefix table is the same size and type 161 * as the codetab. The tab_suffix table needs 2**BITS characters. We 162 * get this from the beginning of htab. The output stack uses the rest 163 * of htab, and contains characters. There is plenty of room for any 164 * possible stack (stack used to be 8000 characters). 165 */ 166 167 #define tab_prefixof(i) codetabof(i) 168 # define tab_suffixof(i) ((char_type *)(htab))[i] 169 # define de_stack ((char_type *)&tab_suffixof(1<<BITS)) 170 171 code_int free_ent = 0; /* first unused entry */ 172 int exit_stat = 0; /* per-file status */ 173 int perm_stat = 0; /* permanent status */ 174 175 code_int getcode(); 176 177 int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ 178 int zcat_flg = 0; /* Write output on stdout, suppress messages */ 179 int precious = 1; /* Don't unlink output file on interrupt */ 180 int quiet = 1; /* don't tell me about compression */ 181 182 /* 183 * block compression parameters -- after all codes are used up, 184 * and compression rate changes, start over. 185 */ 186 int block_compress = BLOCK_MASK; 187 int clear_flg = 0; 188 long int ratio = 0; 189 #define CHECK_GAP 10000 /* ratio check interval */ 190 count_int checkpoint = CHECK_GAP; 191 /* 192 * the next two codes should not be changed lightly, as they must not 193 * lie within the contiguous general code space. 194 */ 195 #define FIRST 257 /* first free entry */ 196 #define CLEAR 256 /* table clear output code */ 197 198 int force = 0; 199 char ofname [100]; 200 #ifdef DEBUG 201 int debug, verbose; 202 #endif 203 sig_t oldint; 204 int bgnd_flag; 205 206 int do_decomp = 0; 207 208 /*- 209 * Algorithm from "A Technique for High Performance Data Compression", 210 * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. 211 * 212 * Usage: compress [-dfvc] [-b bits] [file ...] 213 * Inputs: 214 * -d: If given, decompression is done instead. 215 * 216 * -c: Write output on stdout, don't remove original. 217 * 218 * -b: Parameter limits the max number of bits/code. 219 * 220 * -f: Forces output file to be generated, even if one already 221 * exists, and even if no space is saved by compressing. 222 * If -f is not used, the user will be prompted if stdin is 223 * a tty, otherwise, the output file will not be overwritten. 224 * 225 * -v: Write compression statistics 226 * 227 * file ...: Files to be compressed. If none specified, stdin 228 * is used. 229 * Outputs: 230 * file.Z: Compressed form of file with same mode, owner, and utimes 231 * or stdout (if stdin used as input) 232 * 233 * Assumptions: 234 * When filenames are given, replaces with the compressed version 235 * (.Z suffix) only if the file decreases in size. 236 * Algorithm: 237 * Modified Lempel-Ziv method (LZW). Basically finds common 238 * substrings and replaces them with a variable size code. This is 239 * deterministic, and can be done on the fly. Thus, the decompression 240 * procedure needs no input table, but tracks the way the table was built. 241 */ 242 243 main(argc, argv) 244 int argc; 245 char **argv; 246 { 247 extern int optind; 248 extern char *optarg; 249 struct stat statbuf; 250 int ch, overwrite; 251 char **filelist, **fileptr, *cp, tempname[MAXPATHLEN]; 252 void onintr(), oops(); 253 254 /* This bg check only works for sh. */ 255 if ((oldint = signal(SIGINT, SIG_IGN)) != SIG_IGN) { 256 (void)signal(SIGINT, onintr); 257 (void)signal(SIGSEGV, oops); /* XXX */ 258 } 259 bgnd_flag = oldint != SIG_DFL; 260 261 #ifdef COMPATIBLE 262 nomagic = 1; /* Original didn't have a magic number */ 263 #endif 264 265 if (cp = rindex(argv[0], '/')) 266 ++cp; 267 else 268 cp = argv[0]; 269 if (strcmp(cp, "uncompress") == 0) 270 do_decomp = 1; 271 else if(strcmp(cp, "zcat") == 0) { 272 do_decomp = 1; 273 zcat_flg = 1; 274 } 275 276 /* 277 * -b maxbits => maxbits. 278 * -C => generate output compatible with compress 2.0. 279 * -c => cat all output to stdout 280 * -D => debug 281 * -d => do_decomp 282 * -f => force overwrite of output file 283 * -n => no header: useful to uncompress old files 284 * -V => print Version; debug verbose 285 * -v => unquiet 286 */ 287 288 overwrite = 0; 289 #ifdef DEBUG 290 while ((ch = getopt(argc, argv, "b:CcDdfnVv")) != EOF) 291 #else 292 while ((ch = getopt(argc, argv, "b:Ccdfnv")) != EOF) 293 #endif 294 switch(ch) { 295 case 'b': 296 maxbits = atoi(optarg); 297 break; 298 case 'C': 299 block_compress = 0; 300 break; 301 case 'c': 302 zcat_flg = 1; 303 break; 304 #ifdef DEBUG 305 case 'D': 306 debug = 1; 307 break; 308 #endif 309 case 'd': 310 do_decomp = 1; 311 break; 312 case 'f': 313 overwrite = 1; 314 force = 1; 315 break; 316 case 'n': 317 nomagic = 1; 318 break; 319 case 'q': 320 quiet = 1; 321 break; 322 #ifdef DEBUG 323 case 'V': 324 verbose = 1; 325 break; 326 #endif 327 case 'v': 328 quiet = 0; 329 break; 330 case '?': 331 default: 332 usage(); 333 } 334 argc -= optind; 335 argv += optind; 336 337 if (maxbits < INIT_BITS) 338 maxbits = INIT_BITS; 339 if (maxbits > BITS) 340 maxbits = BITS; 341 maxmaxcode = 1 << maxbits; 342 343 /* Build useless input file list. */ 344 filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); 345 while (*argv) 346 *fileptr++ = *argv++; 347 *fileptr = NULL; 348 349 if (*filelist != NULL) { 350 for (fileptr = filelist; *fileptr; fileptr++) { 351 exit_stat = 0; 352 if (do_decomp) { /* DECOMPRESSION */ 353 /* Check for .Z suffix */ 354 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { 355 /* No .Z: tack one on */ 356 strcpy(tempname, *fileptr); 357 strcat(tempname, ".Z"); 358 *fileptr = tempname; 359 } 360 /* Open input file */ 361 if ((freopen(*fileptr, "r", stdin)) == NULL) { 362 perror(*fileptr); 363 perm_stat = 1; 364 continue; 365 } 366 /* Check the magic number */ 367 if (nomagic == 0) { 368 if ((getchar() != (magic_header[0] & 0xFF)) 369 || (getchar() != (magic_header[1] & 0xFF))) { 370 fprintf(stderr, "%s: not in compressed format\n", 371 *fileptr); 372 continue; 373 } 374 maxbits = getchar(); /* set -b from file */ 375 block_compress = maxbits & BLOCK_MASK; 376 maxbits &= BIT_MASK; 377 maxmaxcode = 1 << maxbits; 378 if(maxbits > BITS) { 379 fprintf(stderr, 380 "%s: compressed with %d bits, can only handle %d bits\n", 381 *fileptr, maxbits, BITS); 382 continue; 383 } 384 } 385 /* Generate output filename */ 386 strcpy(ofname, *fileptr); 387 ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ 388 } else { /* COMPRESSION */ 389 if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { 390 fprintf(stderr, "%s: already has .Z suffix -- no change\n", 391 *fileptr); 392 continue; 393 } 394 /* Open input file */ 395 if ((freopen(*fileptr, "r", stdin)) == NULL) { 396 perror(*fileptr); 397 perm_stat = 1; 398 continue; 399 } 400 stat ( *fileptr, &statbuf ); 401 fsize = (long) statbuf.st_size; 402 /* 403 * tune hash table size for small files -- ad hoc, 404 * but the sizes match earlier #defines, which 405 * serve as upper bounds on the number of output codes. 406 */ 407 hsize = HSIZE; 408 if ( fsize < (1 << 12) ) 409 hsize = MIN ( 5003, HSIZE ); 410 else if ( fsize < (1 << 13) ) 411 hsize = MIN ( 9001, HSIZE ); 412 else if ( fsize < (1 << 14) ) 413 hsize = MIN ( 18013, HSIZE ); 414 else if ( fsize < (1 << 15) ) 415 hsize = MIN ( 35023, HSIZE ); 416 else if ( fsize < 47000 ) 417 hsize = MIN ( 50021, HSIZE ); 418 419 /* Generate output filename */ 420 strcpy(ofname, *fileptr); 421 strcat(ofname, ".Z"); 422 } 423 /* Check for overwrite of existing file */ 424 if (overwrite == 0 && zcat_flg == 0) { 425 if (stat(ofname, &statbuf) == 0) { 426 char response[2]; 427 response[0] = 'n'; 428 fprintf(stderr, "%s already exists;", ofname); 429 if (bgnd_flag == 0 && isatty(2)) { 430 fprintf(stderr, " do you wish to overwrite %s (y or n)? ", 431 ofname); 432 fflush(stderr); 433 read(2, response, 2); 434 while (response[1] != '\n') { 435 if (read(2, response+1, 1) < 0) { /* Ack! */ 436 perror("stderr"); break; 437 } 438 } 439 } 440 if (response[0] != 'y') { 441 fprintf(stderr, "\tnot overwritten\n"); 442 continue; 443 } 444 } 445 } 446 if(zcat_flg == 0) { /* Open output file */ 447 if (freopen(ofname, "w", stdout) == NULL) { 448 perror(ofname); 449 perm_stat = 1; 450 continue; 451 } 452 precious = 0; 453 if(!quiet) 454 fprintf(stderr, "%s: ", *fileptr); 455 } 456 457 /* Actually do the compression/decompression */ 458 if (do_decomp == 0) compress(); 459 #ifndef DEBUG 460 else decompress(); 461 #else 462 else if (debug == 0) decompress(); 463 else printcodes(); 464 if (verbose) dump_tab(); 465 #endif /* DEBUG */ 466 if(zcat_flg == 0) { 467 copystat(*fileptr, ofname); /* Copy stats */ 468 precious = 1; 469 if((exit_stat == 1) || (!quiet)) 470 putc('\n', stderr); 471 } 472 } 473 } else { /* Standard input */ 474 if (do_decomp == 0) { 475 compress(); 476 #ifdef DEBUG 477 if(verbose) dump_tab(); 478 #endif /* DEBUG */ 479 if(!quiet) 480 putc('\n', stderr); 481 } else { 482 /* Check the magic number */ 483 if (nomagic == 0) { 484 if ((getchar()!=(magic_header[0] & 0xFF)) 485 || (getchar()!=(magic_header[1] & 0xFF))) { 486 fprintf(stderr, "stdin: not in compressed format\n"); 487 exit(1); 488 } 489 maxbits = getchar(); /* set -b from file */ 490 block_compress = maxbits & BLOCK_MASK; 491 maxbits &= BIT_MASK; 492 maxmaxcode = 1 << maxbits; 493 fsize = 100000; /* assume stdin large for USERMEM */ 494 if(maxbits > BITS) { 495 fprintf(stderr, 496 "stdin: compressed with %d bits, can only handle %d bits\n", 497 maxbits, BITS); 498 exit(1); 499 } 500 } 501 #ifndef DEBUG 502 decompress(); 503 #else 504 if (debug == 0) decompress(); 505 else printcodes(); 506 if (verbose) dump_tab(); 507 #endif /* DEBUG */ 508 } 509 } 510 exit(perm_stat ? perm_stat : exit_stat); 511 } 512 513 static int offset; 514 long int in_count = 1; /* length of input */ 515 long int bytes_out; /* length of compressed output */ 516 long int out_count = 0; /* # of codes output (for debugging) */ 517 518 /* 519 * compress stdin to stdout 520 * 521 * Algorithm: use open addressing double hashing (no chaining) on the 522 * prefix code / next character combination. We do a variant of Knuth's 523 * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime 524 * secondary probe. Here, the modular division first probe is gives way 525 * to a faster exclusive-or manipulation. Also do block compression with 526 * an adaptive reset, whereby the code table is cleared when the compression 527 * ratio decreases, but after the table fills. The variable-length output 528 * codes are re-sized at this point, and a special CLEAR code is generated 529 * for the decompressor. Late addition: construct the table according to 530 * file size for noticeable speed improvement on small files. Please direct 531 * questions about this implementation to ames!jaw. 532 */ 533 534 compress() 535 { 536 register long fcode; 537 register code_int i = 0; 538 register int c; 539 register code_int ent; 540 register int disp; 541 register code_int hsize_reg; 542 register int hshift; 543 544 #ifndef COMPATIBLE 545 if (nomagic == 0) { 546 putchar(magic_header[0]); 547 putchar(magic_header[1]); 548 putchar((char)(maxbits | block_compress)); 549 if(ferror(stdout)) 550 writeerr(); 551 } 552 #endif /* COMPATIBLE */ 553 554 offset = 0; 555 bytes_out = 3; /* includes 3-byte header mojo */ 556 out_count = 0; 557 clear_flg = 0; 558 ratio = 0; 559 in_count = 1; 560 checkpoint = CHECK_GAP; 561 maxcode = MAXCODE(n_bits = INIT_BITS); 562 free_ent = ((block_compress) ? FIRST : 256 ); 563 564 ent = getchar (); 565 566 hshift = 0; 567 for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) 568 hshift++; 569 hshift = 8 - hshift; /* set hash code range bound */ 570 571 hsize_reg = hsize; 572 cl_hash( (count_int) hsize_reg); /* clear hash table */ 573 574 #ifdef SIGNED_COMPARE_SLOW 575 while ( (c = getchar()) != (unsigned) EOF ) { 576 #else 577 while ( (c = getchar()) != EOF ) { 578 #endif 579 in_count++; 580 fcode = (long) (((long) c << maxbits) + ent); 581 i = ((c << hshift) ^ ent); /* xor hashing */ 582 583 if ( htabof (i) == fcode ) { 584 ent = codetabof (i); 585 continue; 586 } else if ( (long)htabof (i) < 0 ) /* empty slot */ 587 goto nomatch; 588 disp = hsize_reg - i; /* secondary hash (after G. Knott) */ 589 if ( i == 0 ) 590 disp = 1; 591 probe: 592 if ( (i -= disp) < 0 ) 593 i += hsize_reg; 594 595 if ( htabof (i) == fcode ) { 596 ent = codetabof (i); 597 continue; 598 } 599 if ( (long)htabof (i) > 0 ) 600 goto probe; 601 nomatch: 602 output ( (code_int) ent ); 603 out_count++; 604 ent = c; 605 #ifdef SIGNED_COMPARE_SLOW 606 if ( (unsigned) free_ent < (unsigned) maxmaxcode) { 607 #else 608 if ( free_ent < maxmaxcode ) { 609 #endif 610 codetabof (i) = free_ent++; /* code -> hashtable */ 611 htabof (i) = fcode; 612 } 613 else if ( (count_int)in_count >= checkpoint && block_compress ) 614 cl_block (); 615 } 616 /* 617 * Put out the final code. 618 */ 619 output( (code_int)ent ); 620 out_count++; 621 output( (code_int)-1 ); 622 623 /* 624 * Print out stats on stderr 625 */ 626 if(zcat_flg == 0 && !quiet) { 627 #ifdef DEBUG 628 fprintf( stderr, 629 "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", 630 in_count, out_count, bytes_out ); 631 prratio( stderr, in_count, bytes_out ); 632 fprintf( stderr, "\n"); 633 fprintf( stderr, "\tCompression as in compact: " ); 634 prratio( stderr, in_count-bytes_out, in_count ); 635 fprintf( stderr, "\n"); 636 fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", 637 free_ent - 1, n_bits ); 638 #else /* !DEBUG */ 639 fprintf( stderr, "Compression: " ); 640 prratio( stderr, in_count-bytes_out, in_count ); 641 #endif /* DEBUG */ 642 } 643 if(bytes_out > in_count) /* exit(2) if no savings */ 644 exit_stat = 2; 645 return; 646 } 647 648 /*- 649 * Output the given code. 650 * Inputs: 651 * code: A n_bits-bit integer. If == -1, then EOF. This assumes 652 * that n_bits =< (long)wordsize - 1. 653 * Outputs: 654 * Outputs code to the file. 655 * Assumptions: 656 * Chars are 8 bits long. 657 * Algorithm: 658 * Maintain a BITS character long buffer (so that 8 codes will 659 * fit in it exactly). Use the VAX insv instruction to insert each 660 * code in turn. When the buffer fills up empty it and start over. 661 */ 662 663 static char buf[BITS]; 664 665 #ifndef vax 666 char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; 667 char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; 668 #endif /* vax */ 669 670 output( code ) 671 code_int code; 672 { 673 #ifdef DEBUG 674 static int col = 0; 675 #endif /* DEBUG */ 676 677 /* 678 * On the VAX, it is important to have the register declarations 679 * in exactly the order given, or the asm will break. 680 */ 681 register int r_off = offset, bits= n_bits; 682 register char * bp = buf; 683 684 #ifdef DEBUG 685 if ( verbose ) 686 fprintf( stderr, "%5d%c", code, 687 (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 688 #endif /* DEBUG */ 689 if ( code >= 0 ) { 690 #if defined(vax) && !defined(__GNUC__) 691 /* 692 * VAX and PCC DEPENDENT!! Implementation on other machines is 693 * below. 694 * 695 * Translation: Insert BITS bits from the argument starting at 696 * offset bits from the beginning of buf. 697 */ 698 0; /* Work around for pcc -O bug with asm and if stmt */ 699 asm( "insv 4(ap),r11,r10,(r9)" ); 700 #else 701 /* 702 * byte/bit numbering on the VAX is simulated by the following code 703 */ 704 /* 705 * Get to the first byte. 706 */ 707 bp += (r_off >> 3); 708 r_off &= 7; 709 /* 710 * Since code is always >= 8 bits, only need to mask the first 711 * hunk on the left. 712 */ 713 *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; 714 bp++; 715 bits -= (8 - r_off); 716 code >>= 8 - r_off; 717 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 718 if ( bits >= 8 ) { 719 *bp++ = code; 720 code >>= 8; 721 bits -= 8; 722 } 723 /* Last bits. */ 724 if(bits) 725 *bp = code; 726 #endif /* vax */ 727 offset += n_bits; 728 if ( offset == (n_bits << 3) ) { 729 bp = buf; 730 bits = n_bits; 731 bytes_out += bits; 732 do { 733 putchar(*bp++); 734 if (ferror(stdout)) 735 writeerr(); 736 } while(--bits); 737 offset = 0; 738 } 739 740 /* 741 * If the next entry is going to be too big for the code size, 742 * then increase it, if possible. 743 */ 744 if ( free_ent > maxcode || (clear_flg > 0)) 745 { 746 /* 747 * Write the whole buffer, because the input side won't 748 * discover the size increase until after it has read it. 749 */ 750 if ( offset > 0 ) { 751 if( fwrite( buf, 1, n_bits, stdout ) != n_bits) 752 writeerr(); 753 bytes_out += n_bits; 754 } 755 offset = 0; 756 757 if ( clear_flg ) { 758 maxcode = MAXCODE (n_bits = INIT_BITS); 759 clear_flg = 0; 760 } 761 else { 762 n_bits++; 763 if ( n_bits == maxbits ) 764 maxcode = maxmaxcode; 765 else 766 maxcode = MAXCODE(n_bits); 767 } 768 #ifdef DEBUG 769 if ( debug ) { 770 fprintf( stderr, "\nChange to %d bits\n", n_bits ); 771 col = 0; 772 } 773 #endif /* DEBUG */ 774 } 775 } else { 776 /* 777 * At EOF, write the rest of the buffer. 778 */ 779 if ( offset > 0 ) { 780 offset = (offset + 7) / 8; 781 if( fwrite( buf, 1, offset, stdout ) != offset ) 782 writeerr(); 783 bytes_out += offset; 784 } 785 offset = 0; 786 (void)fflush( stdout ); 787 if( ferror( stdout ) ) 788 writeerr(); 789 #ifdef DEBUG 790 if ( verbose ) 791 fprintf( stderr, "\n" ); 792 #endif 793 } 794 } 795 796 /* 797 * Decompress stdin to stdout. This routine adapts to the codes in the 798 * file building the "string" table on-the-fly; requiring no table to 799 * be stored in the compressed file. The tables used herein are shared 800 * with those of the compress() routine. See the definitions above. 801 */ 802 803 decompress() { 804 register char_type *stackp; 805 register int finchar; 806 register code_int code, oldcode, incode; 807 int n, nwritten, offset; /* Variables for buffered write */ 808 char buff[BUFSIZ]; /* Buffer for buffered write */ 809 810 811 /* 812 * As above, initialize the first 256 entries in the table. 813 */ 814 maxcode = MAXCODE(n_bits = INIT_BITS); 815 for ( code = 255; code >= 0; code-- ) { 816 tab_prefixof(code) = 0; 817 tab_suffixof(code) = (char_type)code; 818 } 819 free_ent = ((block_compress) ? FIRST : 256 ); 820 821 finchar = oldcode = getcode(); 822 if(oldcode == -1) /* EOF already? */ 823 return; /* Get out of here */ 824 825 /* first code must be 8 bits = char */ 826 n=0; 827 buff[n++] = (char)finchar; 828 829 stackp = de_stack; 830 831 while ( (code = getcode()) > -1 ) { 832 833 if ( (code == CLEAR) && block_compress ) { 834 for ( code = 255; code >= 0; code-- ) 835 tab_prefixof(code) = 0; 836 clear_flg = 1; 837 free_ent = FIRST - 1; 838 if ( (code = getcode ()) == -1 ) /* O, untimely death! */ 839 break; 840 } 841 incode = code; 842 /* 843 * Special case for KwKwK string. 844 */ 845 if ( code >= free_ent ) { 846 *stackp++ = finchar; 847 code = oldcode; 848 } 849 850 /* 851 * Generate output characters in reverse order 852 */ 853 #ifdef SIGNED_COMPARE_SLOW 854 while ( ((unsigned long)code) >= ((unsigned long)256) ) { 855 #else 856 while ( code >= 256 ) { 857 #endif 858 *stackp++ = tab_suffixof(code); 859 code = tab_prefixof(code); 860 } 861 *stackp++ = finchar = tab_suffixof(code); 862 863 /* 864 * And put them out in forward order 865 */ 866 do { 867 /* 868 * About 60% of the time is spent in the putchar() call 869 * that appeared here. It was originally 870 * putchar ( *--stackp ); 871 * If we buffer the writes ourselves, we can go faster (about 872 * 30%). 873 * 874 * At this point, the next line is the next *big* time 875 * sink in the code. It takes up about 10% of the time. 876 */ 877 buff[n++] = *--stackp; 878 if (n == BUFSIZ) { 879 offset = 0; 880 do { 881 nwritten = write(fileno(stdout), &buff[offset], n); 882 if (nwritten < 0) 883 writeerr(); 884 offset += nwritten; 885 } while ((n -= nwritten) > 0); 886 } 887 } while ( stackp > de_stack ); 888 889 /* 890 * Generate the new entry. 891 */ 892 if ( (code=free_ent) < maxmaxcode ) { 893 tab_prefixof(code) = (unsigned short)oldcode; 894 tab_suffixof(code) = finchar; 895 free_ent = code+1; 896 } 897 /* 898 * Remember previous code. 899 */ 900 oldcode = incode; 901 } 902 /* 903 * Flush the stuff remaining in our buffer... 904 */ 905 offset = 0; 906 while (n > 0) { 907 nwritten = write(fileno(stdout), &buff[offset], n); 908 if (nwritten < 0) 909 writeerr(); 910 offset += nwritten; 911 n -= nwritten; 912 } 913 } 914 915 /*- 916 * Read one code from the standard input. If EOF, return -1. 917 * Inputs: 918 * stdin 919 * Outputs: 920 * code or -1 is returned. 921 */ 922 code_int 923 getcode() { 924 /* 925 * On the VAX, it is important to have the register declarations 926 * in exactly the order given, or the asm will break. 927 */ 928 register code_int code; 929 static int offset = 0, size = 0; 930 static char_type buf[BITS]; 931 register int r_off, bits; 932 register char_type *bp = buf; 933 934 if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { 935 /* 936 * If the next entry will be too big for the current code 937 * size, then we must increase the size. This implies reading 938 * a new buffer full, too. 939 */ 940 if ( free_ent > maxcode ) { 941 n_bits++; 942 if ( n_bits == maxbits ) 943 maxcode = maxmaxcode; /* won't get any bigger now */ 944 else 945 maxcode = MAXCODE(n_bits); 946 } 947 if ( clear_flg > 0) { 948 maxcode = MAXCODE (n_bits = INIT_BITS); 949 clear_flg = 0; 950 } 951 size = fread( buf, 1, n_bits, stdin ); 952 if ( size <= 0 ) 953 return -1; /* end of file */ 954 offset = 0; 955 /* Round size down to integral number of codes */ 956 size = (size << 3) - (n_bits - 1); 957 } 958 r_off = offset; 959 bits = n_bits; 960 #ifdef vax 961 asm( "extzv r10,r9,(r8),r11" ); 962 #else /* not a vax */ 963 /* 964 * Get to the first byte. 965 */ 966 bp += (r_off >> 3); 967 r_off &= 7; 968 /* Get first part (low order bits) */ 969 #ifdef NO_UCHAR 970 code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; 971 #else 972 code = (*bp++ >> r_off); 973 #endif /* NO_UCHAR */ 974 bits -= (8 - r_off); 975 r_off = 8 - r_off; /* now, offset into code word */ 976 /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ 977 if ( bits >= 8 ) { 978 #ifdef NO_UCHAR 979 code |= (*bp++ & 0xff) << r_off; 980 #else 981 code |= *bp++ << r_off; 982 #endif /* NO_UCHAR */ 983 r_off += 8; 984 bits -= 8; 985 } 986 /* high order bits. */ 987 code |= (*bp & rmask[bits]) << r_off; 988 #endif /* vax */ 989 offset += n_bits; 990 991 return code; 992 } 993 994 #ifdef DEBUG 995 printcodes() 996 { 997 /* 998 * Just print out codes from input file. For debugging. 999 */ 1000 code_int code; 1001 int col = 0, bits; 1002 1003 bits = n_bits = INIT_BITS; 1004 maxcode = MAXCODE(n_bits); 1005 free_ent = ((block_compress) ? FIRST : 256 ); 1006 while ( ( code = getcode() ) >= 0 ) { 1007 if ( (code == CLEAR) && block_compress ) { 1008 free_ent = FIRST - 1; 1009 clear_flg = 1; 1010 } 1011 else if ( free_ent < maxmaxcode ) 1012 free_ent++; 1013 if ( bits != n_bits ) { 1014 fprintf(stderr, "\nChange to %d bits\n", n_bits ); 1015 bits = n_bits; 1016 col = 0; 1017 } 1018 fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); 1019 } 1020 putc( '\n', stderr ); 1021 exit( 0 ); 1022 } 1023 1024 code_int sorttab[1<<BITS]; /* sorted pointers into htab */ 1025 1026 dump_tab() /* dump string table */ 1027 { 1028 register int i, first; 1029 register ent; 1030 #define STACK_SIZE 15000 1031 int stack_top = STACK_SIZE; 1032 register c; 1033 1034 if(do_decomp == 0) { /* compressing */ 1035 register int flag = 1; 1036 1037 for(i=0; i<hsize; i++) { /* build sort pointers */ 1038 if((long)htabof(i) >= 0) { 1039 sorttab[codetabof(i)] = i; 1040 } 1041 } 1042 first = block_compress ? FIRST : 256; 1043 for(i = first; i < free_ent; i++) { 1044 fprintf(stderr, "%5d: \"", i); 1045 de_stack[--stack_top] = '\n'; 1046 de_stack[--stack_top] = '"'; 1047 stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, 1048 stack_top); 1049 for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1); 1050 ent > 256; 1051 ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) { 1052 stack_top = in_stack(htabof(sorttab[ent]) >> maxbits, 1053 stack_top); 1054 } 1055 stack_top = in_stack(ent, stack_top); 1056 fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); 1057 stack_top = STACK_SIZE; 1058 } 1059 } else if(!debug) { /* decompressing */ 1060 1061 for ( i = 0; i < free_ent; i++ ) { 1062 ent = i; 1063 c = tab_suffixof(ent); 1064 if ( isascii(c) && isprint(c) ) 1065 fprintf( stderr, "%5d: %5d/'%c' \"", 1066 ent, tab_prefixof(ent), c ); 1067 else 1068 fprintf( stderr, "%5d: %5d/\\%03o \"", 1069 ent, tab_prefixof(ent), c ); 1070 de_stack[--stack_top] = '\n'; 1071 de_stack[--stack_top] = '"'; 1072 for ( ; ent != NULL; 1073 ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { 1074 stack_top = in_stack(tab_suffixof(ent), stack_top); 1075 } 1076 fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); 1077 stack_top = STACK_SIZE; 1078 } 1079 } 1080 } 1081 1082 int 1083 in_stack(c, stack_top) 1084 register c, stack_top; 1085 { 1086 if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { 1087 de_stack[--stack_top] = c; 1088 } else { 1089 switch( c ) { 1090 case '\n': de_stack[--stack_top] = 'n'; break; 1091 case '\t': de_stack[--stack_top] = 't'; break; 1092 case '\b': de_stack[--stack_top] = 'b'; break; 1093 case '\f': de_stack[--stack_top] = 'f'; break; 1094 case '\r': de_stack[--stack_top] = 'r'; break; 1095 case '\\': de_stack[--stack_top] = '\\'; break; 1096 default: 1097 de_stack[--stack_top] = '0' + c % 8; 1098 de_stack[--stack_top] = '0' + (c / 8) % 8; 1099 de_stack[--stack_top] = '0' + c / 64; 1100 break; 1101 } 1102 de_stack[--stack_top] = '\\'; 1103 } 1104 return stack_top; 1105 } 1106 #endif /* DEBUG */ 1107 1108 writeerr() 1109 { 1110 (void)fprintf(stderr, "compress: %s: %s\n", 1111 ofname[0] ? ofname : "stdout", strerror(errno)); 1112 (void)unlink(ofname); 1113 exit(1); 1114 } 1115 1116 copystat(ifname, ofname) 1117 char *ifname, *ofname; 1118 { 1119 struct stat statbuf; 1120 int mode; 1121 struct utimbuf tp; 1122 1123 fclose(stdout); 1124 if (stat(ifname, &statbuf)) { /* Get stat on input file */ 1125 perror(ifname); 1126 return; 1127 } 1128 if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { 1129 if(quiet) 1130 fprintf(stderr, "%s: ", ifname); 1131 fprintf(stderr, " -- not a regular file: unchanged"); 1132 exit_stat = 1; 1133 perm_stat = 1; 1134 } else if (statbuf.st_nlink > 1) { 1135 if(quiet) 1136 fprintf(stderr, "%s: ", ifname); 1137 fprintf(stderr, " -- has %d other links: unchanged", 1138 statbuf.st_nlink - 1); 1139 exit_stat = 1; 1140 perm_stat = 1; 1141 } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ 1142 if(!quiet) 1143 fprintf(stderr, " -- file unchanged"); 1144 } else { /* ***** Successful Compression ***** */ 1145 exit_stat = 0; 1146 mode = statbuf.st_mode & 07777; 1147 if (chmod(ofname, mode)) /* Copy modes */ 1148 perror(ofname); 1149 chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ 1150 tp.actime = statbuf.st_atime; 1151 tp.modtime = statbuf.st_mtime; 1152 utime(ofname, &tp); /* Update last accessed and modified times */ 1153 if (unlink(ifname)) /* Remove input file */ 1154 perror(ifname); 1155 if(!quiet) 1156 fprintf(stderr, " -- replaced with %s", ofname); 1157 return; /* Successful return */ 1158 } 1159 1160 /* Unsuccessful return -- one of the tests failed */ 1161 if (unlink(ofname)) 1162 perror(ofname); 1163 } 1164 1165 void 1166 onintr ( ) 1167 { 1168 if (!precious) 1169 unlink ( ofname ); 1170 exit ( 1 ); 1171 } 1172 1173 void 1174 oops ( ) /* wild pointer -- assume bad input */ 1175 { 1176 if ( do_decomp ) 1177 fprintf ( stderr, "uncompress: corrupt input\n" ); 1178 unlink ( ofname ); 1179 exit ( 1 ); 1180 } 1181 1182 cl_block () /* table clear for block compress */ 1183 { 1184 register long int rat; 1185 1186 checkpoint = in_count + CHECK_GAP; 1187 #ifdef DEBUG 1188 if ( debug ) { 1189 fprintf ( stderr, "count: %ld, ratio: ", in_count ); 1190 prratio ( stderr, in_count, bytes_out ); 1191 fprintf ( stderr, "\n"); 1192 } 1193 #endif /* DEBUG */ 1194 1195 if(in_count > 0x007fffff) { /* shift will overflow */ 1196 rat = bytes_out >> 8; 1197 if(rat == 0) { /* Don't divide by zero */ 1198 rat = 0x7fffffff; 1199 } else { 1200 rat = in_count / rat; 1201 } 1202 } else { 1203 rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ 1204 } 1205 if ( rat > ratio ) { 1206 ratio = rat; 1207 } else { 1208 ratio = 0; 1209 #ifdef DEBUG 1210 if(verbose) 1211 dump_tab(); /* dump string table */ 1212 #endif 1213 cl_hash ( (count_int) hsize ); 1214 free_ent = FIRST; 1215 clear_flg = 1; 1216 output ( (code_int) CLEAR ); 1217 #ifdef DEBUG 1218 if(debug) 1219 fprintf ( stderr, "clear\n" ); 1220 #endif /* DEBUG */ 1221 } 1222 } 1223 1224 cl_hash(hsize) /* reset code table */ 1225 register count_int hsize; 1226 { 1227 register count_int *htab_p = htab+hsize; 1228 register long i; 1229 register long m1 = -1; 1230 1231 i = hsize - 16; 1232 do { /* might use Sys V memset(3) here */ 1233 *(htab_p-16) = m1; 1234 *(htab_p-15) = m1; 1235 *(htab_p-14) = m1; 1236 *(htab_p-13) = m1; 1237 *(htab_p-12) = m1; 1238 *(htab_p-11) = m1; 1239 *(htab_p-10) = m1; 1240 *(htab_p-9) = m1; 1241 *(htab_p-8) = m1; 1242 *(htab_p-7) = m1; 1243 *(htab_p-6) = m1; 1244 *(htab_p-5) = m1; 1245 *(htab_p-4) = m1; 1246 *(htab_p-3) = m1; 1247 *(htab_p-2) = m1; 1248 *(htab_p-1) = m1; 1249 htab_p -= 16; 1250 } while ((i -= 16) >= 0); 1251 for ( i += 16; i > 0; i-- ) 1252 *--htab_p = m1; 1253 } 1254 1255 prratio(stream, num, den) 1256 FILE *stream; 1257 long int num, den; 1258 { 1259 register int q; /* Doesn't need to be long */ 1260 1261 if(num > 214748L) { /* 2147483647/10000 */ 1262 q = num / (den / 10000L); 1263 } else { 1264 q = 10000L * num / den; /* Long calculations, though */ 1265 } 1266 if (q < 0) { 1267 putc('-', stream); 1268 q = -q; 1269 } 1270 fprintf(stream, "%d.%02d%%", q / 100, q % 100); 1271 } 1272 1273 usage() 1274 { 1275 (void)fprintf(stderr, 1276 #ifdef DEBUG 1277 "compress [-CDVcdfnv] [-b maxbits] [file ...]\n"); 1278 #else 1279 "compress [-Ccdfnv] [-b maxbits] [file ...]\n"); 1280 #endif 1281 exit(1); 1282 } 1283