1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Edward Wang at The University of California, Berkeley. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)compress.c 8.1 (Berkeley) 6/6/93 37 * $FreeBSD: src/usr.bin/window/compress.c,v 1.2.12.1 2001/05/17 09:45:00 obrien Exp $ 38 * $DragonFly: src/usr.bin/window/compress.c,v 1.2 2003/06/17 04:29:33 dillon Exp $ 39 */ 40 41 /* special */ 42 #include <stdio.h> 43 #include <fcntl.h> 44 #include <stdlib.h> 45 #include <string.h> 46 47 #include "ww.h" 48 #include "tt.h" 49 50 int cc_trace = 0; 51 FILE *cc_trace_fp; 52 53 /* tunable parameters */ 54 55 int cc_reverse = 1; 56 int cc_sort = 0; 57 int cc_chop = 0; 58 59 int cc_token_max = 8; /* <= TOKEN_MAX */ 60 int cc_token_min = 2; /* > tt.tt_put_token_cost */ 61 int cc_npass0 = 1; 62 int cc_npass1 = 1; 63 64 int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */ 65 66 int cc_ntoken = 8192; 67 68 #define cc_weight XXX 69 #ifndef cc_weight 70 int cc_weight = 0; 71 #endif 72 73 #define TOKEN_MAX 16 74 75 struct cc { 76 char string[TOKEN_MAX]; 77 char length; 78 char flag; 79 #ifndef cc_weight 80 short weight; 81 #endif 82 long time; /* time last seen */ 83 short bcount; /* count in this buffer */ 84 short ccount; /* count in compression */ 85 short places; /* places in the buffer */ 86 short code; /* token code */ 87 struct cc *qforw, *qback; 88 struct cc *hforw, **hback; 89 }; 90 91 short cc_thresholds[TOKEN_MAX + 1]; 92 #define thresh(length) (cc_thresholds[length]) 93 #define threshp(code, count, length) \ 94 ((code) >= 0 || (short) (count) >= cc_thresholds[length]) 95 96 #ifndef cc_weight 97 short cc_wthresholds[TOKEN_MAX + 1]; 98 #define wthresh(length) (cc_wthresholds[length]) 99 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length]) 100 #else 101 #define wthreshp(weight, length) (0) 102 #endif 103 104 #ifndef cc_weight 105 short cc_wlimits[TOKEN_MAX + 1]; 106 #define wlimit(length) (cc_wlimits[length]) 107 #endif 108 109 #define put_token_score(length) ((length) - tt.tt_put_token_cost) 110 111 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 112 #define score_adjust(score, p) \ 113 do { \ 114 int length = (p)->length; \ 115 int ccount = (p)->ccount; \ 116 if (threshp((p)->code, ccount, length) || \ 117 wthreshp((p)->weight, length)) /* XXX */ \ 118 (score) -= length - tt.tt_put_token_cost; \ 119 else \ 120 (score) += cc_score_adjustments[length][ccount]; \ 121 } while (0) 122 123 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 124 125 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b; 126 127 #define qinsert(p1, p2) \ 128 do { \ 129 register struct cc *forw = (p1)->qforw; \ 130 register struct cc *back = (p1)->qback; \ 131 back->qforw = forw; \ 132 forw->qback = back; \ 133 forw = (p2)->qforw; \ 134 (p1)->qforw = forw; \ 135 forw->qback = (p1); \ 136 (p2)->qforw = (p1); \ 137 (p1)->qback = (p2); \ 138 } while (0) 139 140 #define qinsertq(q, p) \ 141 ((q)->qforw == (q) ? 0 : \ 142 ((q)->qback->qforw = (p)->qforw, \ 143 (p)->qforw->qback = (q)->qback, \ 144 (q)->qforw->qback = (p), \ 145 (p)->qforw = (q)->qforw, \ 146 (q)->qforw = (q), \ 147 (q)->qback = (q))) 148 149 #define H (14) 150 #define HSIZE (1 << H) 151 #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1) 152 153 char *cc_buffer; 154 struct cc **cc_output; /* the output array */ 155 short *cc_places[TOKEN_MAX + 1]; 156 short *cc_hashcodes; /* for computing hashcodes */ 157 struct cc **cc_htab; /* the hash table */ 158 struct cc **cc_tokens; /* holds all the active tokens */ 159 struct cc_undo { 160 struct cc **pos; 161 struct cc *val; 162 } *cc_undo; 163 164 long cc_time, cc_time0; 165 166 char *cc_tt_ob, *cc_tt_obe; 167 168 ccinit() 169 { 170 register i, j; 171 register struct cc *p; 172 173 if (tt.tt_token_max > cc_token_max) 174 tt.tt_token_max = cc_token_max; 175 if (tt.tt_token_min < cc_token_min) 176 tt.tt_token_min = cc_token_min; 177 if (tt.tt_token_min > tt.tt_token_max) { 178 tt.tt_ntoken = 0; 179 return 0; 180 } 181 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */ 182 tt.tt_ntoken = cc_ntoken / 2; 183 #define C(x) (sizeof (x) / sizeof *(x)) 184 for (i = 0; i < C(cc_thresholds); i++) { 185 int h = i - tt.tt_put_token_cost; 186 if (h > 0) 187 cc_thresholds[i] = 188 (tt.tt_set_token_cost + 1 + h - 1) / h + 1; 189 else 190 cc_thresholds[i] = 0; 191 } 192 for (i = 0; i < C(cc_score_adjustments); i++) { 193 int t = cc_thresholds[i]; 194 for (j = 0; j < C(*cc_score_adjustments); j++) { 195 if (j >= t) 196 cc_score_adjustments[i][j] = 197 - (i - tt.tt_put_token_cost); 198 else if (j < t - 1) 199 cc_score_adjustments[i][j] = 0; 200 else 201 /* 202 * cost now is 203 * length * (ccount + 1) a 204 * cost before was 205 * set-token-cost + length + 206 * ccount * put-token-cost b 207 * the score adjustment is (b - a) 208 */ 209 cc_score_adjustments[i][j] = 210 tt.tt_set_token_cost + i + 211 j * tt.tt_put_token_cost - 212 i * (j + 1); 213 if (j >= t) 214 cc_initial_scores[i][j] = 0; 215 else 216 /* 217 * - (set-token-cost + 218 * (length - put-token-cost) - 219 * (length - put-token-cost) * ccount) 220 */ 221 cc_initial_scores[i][j] = 222 - (tt.tt_set_token_cost + 223 (i - tt.tt_put_token_cost) - 224 (i - tt.tt_put_token_cost) * j); 225 } 226 } 227 #ifndef cc_weight 228 for (i = 1; i < C(cc_wthresholds); i++) { 229 cc_wthresholds[i] = 230 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i + 231 i / 5 + 1) * 232 cc_weight + 1; 233 cc_wlimits[i] = cc_wthresholds[i] + cc_weight; 234 } 235 #endif 236 #undef C 237 if ((cc_output = (struct cc **) 238 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0) 239 goto nomem; 240 if ((cc_hashcodes = (short *) 241 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0) 242 goto nomem; 243 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0) 244 goto nomem; 245 if ((cc_tokens = (struct cc **) 246 malloc((unsigned) 247 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) * 248 sizeof *cc_tokens)) == 0) 249 goto nomem; 250 if ((cc_undo = (struct cc_undo *) 251 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0) 252 goto nomem; 253 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) 254 if ((cc_places[i] = (short *) 255 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0) 256 goto nomem; 257 cc_q0a.qforw = cc_q0a.qback = &cc_q0a; 258 cc_q0b.qforw = cc_q0b.qback = &cc_q0b; 259 cc_q1a.qforw = cc_q1a.qback = &cc_q1a; 260 cc_q1b.qforw = cc_q1b.qback = &cc_q1b; 261 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0) 262 goto nomem; 263 for (i = 0; i < tt.tt_ntoken; i++) { 264 p->code = i; 265 p->time = -1; 266 p->qback = cc_q0a.qback; 267 p->qforw = &cc_q0a; 268 p->qback->qforw = p; 269 cc_q0a.qback = p; 270 p++; 271 } 272 for (; i < cc_ntoken; i++) { 273 p->code = -1; 274 p->time = -1; 275 p->qback = cc_q1a.qback; 276 p->qforw = &cc_q1a; 277 p->qback->qforw = p; 278 cc_q1a.qback = p; 279 p++; 280 } 281 cc_tt_ob = tt_ob; 282 cc_tt_obe = tt_obe; 283 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0) 284 goto nomem; 285 return 0; 286 nomem: 287 wwerrno = WWE_NOMEM; 288 return -1; 289 } 290 291 ccstart() 292 { 293 int ccflush(); 294 295 ttflush(); 296 tt_obp = tt_ob = cc_buffer; 297 tt_obe = tt_ob + cc_bufsize; 298 tt.tt_flush = ccflush; 299 if (cc_trace) { 300 cc_trace_fp = fopen("window-trace", "a"); 301 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1); 302 } 303 ccreset(); 304 } 305 306 ccreset() 307 { 308 register struct cc *p; 309 310 bzero((char *) cc_htab, HSIZE * sizeof *cc_htab); 311 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw) 312 p->hback = 0; 313 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw) 314 p->hback = 0; 315 } 316 317 ccend() 318 { 319 320 ttflush(); 321 tt_obp = tt_ob = cc_tt_ob; 322 tt_obe = cc_tt_obe; 323 tt.tt_flush = 0; 324 if (cc_trace_fp != NULL) { 325 (void) fclose(cc_trace_fp); 326 cc_trace_fp = NULL; 327 } 328 } 329 330 ccflush() 331 { 332 int bufsize = tt_obp - tt_ob; 333 int n; 334 335 if (tt_ob != cc_buffer) 336 abort(); 337 if (cc_trace_fp != NULL) { 338 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp); 339 (void) putc(-1, cc_trace_fp); 340 } 341 tt.tt_flush = 0; 342 (*tt.tt_compress)(1); 343 if (bufsize < tt.tt_token_min) { 344 ttflush(); 345 goto out; 346 } 347 tt_obp = tt_ob = cc_tt_ob; 348 tt_obe = cc_tt_obe; 349 cc_time0 = cc_time; 350 cc_time += bufsize; 351 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens); 352 cc_compress_phase(cc_output, bufsize, cc_tokens, n); 353 cc_output_phase(cc_buffer, cc_output, bufsize); 354 ttflush(); 355 tt_obp = tt_ob = cc_buffer; 356 tt_obe = cc_buffer + cc_bufsize; 357 out: 358 (*tt.tt_compress)(0); 359 tt.tt_flush = ccflush; 360 } 361 362 cc_sweep_phase(buffer, bufsize, tokens) 363 char *buffer; 364 struct cc **tokens; 365 { 366 register struct cc **pp = tokens; 367 register i, n; 368 #ifdef STATS 369 int nn, ii; 370 #endif 371 372 #ifdef STATS 373 if (verbose >= 0) 374 time_begin(); 375 if (verbose > 0) 376 printf("Sweep:"); 377 #endif 378 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1); 379 #ifdef STATS 380 ntoken_stat = 0; 381 nn = 0; 382 ii = 0; 383 #endif 384 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) { 385 #ifdef STATS 386 if (verbose > 0) { 387 if (ii > 7) { 388 printf("\n "); 389 ii = 0; 390 } 391 ii++; 392 printf(" (%d", i); 393 (void) fflush(stdout); 394 } 395 #endif 396 n = cc_sweep(buffer, bufsize, pp, i); 397 pp += n; 398 #ifdef STATS 399 if (verbose > 0) { 400 if (--n > 0) { 401 printf(" %d", n); 402 nn += n; 403 } 404 putchar(')'); 405 } 406 #endif 407 } 408 qinsertq(&cc_q1b, &cc_q1a); 409 #ifdef STATS 410 if (verbose > 0) 411 printf("\n %d tokens, %d candidates\n", 412 ntoken_stat, nn); 413 if (verbose >= 0) 414 time_end(); 415 #endif 416 return pp - tokens; 417 } 418 419 cc_sweep0(buffer, n, length) 420 char *buffer; 421 { 422 register char *p; 423 register short *hc; 424 register i; 425 register short c; 426 register short pc = tt.tt_padc; 427 428 /* n and length are at least 1 */ 429 p = buffer++; 430 hc = cc_hashcodes; 431 i = n; 432 do { 433 if ((*hc++ = *p++) == pc) 434 hc[-1] = -1; 435 } while (--i); 436 while (--length) { 437 p = buffer++; 438 hc = cc_hashcodes; 439 for (i = n--; --i;) { 440 if ((c = *p++) == pc || *hc < 0) 441 c = -1; 442 else 443 c = hash(*hc, c); 444 *hc++ = c; 445 } 446 } 447 } 448 449 cc_sweep(buffer, bufsize, tokens, length) 450 char *buffer; 451 struct cc **tokens; 452 register length; 453 { 454 register struct cc *p; 455 register char *cp; 456 register i; 457 short *hc; 458 short *places = cc_places[length]; 459 struct cc **pp = tokens; 460 short threshold = thresh(length); 461 #ifndef cc_weight 462 short wthreshold = wthresh(length); 463 short limit = wlimit(length); 464 #endif 465 int time; 466 short pc = tt.tt_padc; 467 468 i = length - 1; 469 bufsize -= i; 470 cp = buffer + i; 471 hc = cc_hashcodes; 472 time = cc_time0; 473 for (i = 0; i < bufsize; i++, time++) { 474 struct cc **h; 475 476 { 477 register short *hc1 = hc; 478 register short c = *cp++; 479 register short hh; 480 if ((hh = *hc1) < 0 || c == pc) { 481 *hc1++ = -1; 482 hc = hc1; 483 continue; 484 } 485 h = cc_htab + (*hc1++ = hash(hh, c)); 486 hc = hc1; 487 } 488 for (p = *h; p != 0; p = p->hforw) 489 if (p->length == (char) length) { 490 register char *p1 = p->string; 491 register char *p2 = cp - length; 492 register n = length; 493 do 494 if (*p1++ != *p2++) 495 goto fail; 496 while (--n); 497 break; 498 fail:; 499 } 500 if (p == 0) { 501 p = cc_q1a.qback; 502 if (p == &cc_q1a || 503 p->time >= cc_time0 && p->length == (char) length) 504 continue; 505 if (p->hback != 0) 506 if ((*p->hback = p->hforw) != 0) 507 p->hforw->hback = p->hback; 508 { 509 register char *p1 = p->string; 510 register char *p2 = cp - length; 511 register n = length; 512 do 513 *p1++ = *p2++; 514 while (--n); 515 } 516 p->length = length; 517 #ifndef cc_weight 518 p->weight = cc_weight; 519 #endif 520 p->time = time; 521 p->bcount = 1; 522 p->ccount = 0; 523 p->flag = 0; 524 if ((p->hforw = *h) != 0) 525 p->hforw->hback = &p->hforw; 526 *h = p; 527 p->hback = h; 528 qinsert(p, &cc_q1a); 529 places[i] = -1; 530 p->places = i; 531 #ifdef STATS 532 ntoken_stat++; 533 #endif 534 } else if (p->time < cc_time0) { 535 #ifndef cc_weight 536 if ((p->weight += p->time - time) < 0) 537 p->weight = cc_weight; 538 else if ((p->weight += cc_weight) > limit) 539 p->weight = limit; 540 #endif 541 p->time = time; 542 p->bcount = 1; 543 p->ccount = 0; 544 if (p->code >= 0) { 545 p->flag = 1; 546 *pp++ = p; 547 } else 548 #ifndef cc_weight 549 if (p->weight >= wthreshold) { 550 p->flag = 1; 551 *pp++ = p; 552 qinsert(p, &cc_q1b); 553 } else 554 #endif 555 { 556 p->flag = 0; 557 qinsert(p, &cc_q1a); 558 } 559 places[i] = -1; 560 p->places = i; 561 #ifdef STATS 562 ntoken_stat++; 563 #endif 564 } else if (p->time + length > time) { 565 /* 566 * overlapping token, don't count as two and 567 * don't update time, but do adjust weight to offset 568 * the difference 569 */ 570 #ifndef cc_weight 571 if (cc_weight != 0) { /* XXX */ 572 p->weight += time - p->time; 573 if (!p->flag && p->weight >= wthreshold) { 574 p->flag = 1; 575 *pp++ = p; 576 qinsert(p, &cc_q1b); 577 } 578 } 579 #endif 580 places[i] = p->places; 581 p->places = i; 582 } else { 583 #ifndef cc_weight 584 if ((p->weight += p->time - time) < 0) 585 p->weight = cc_weight; 586 else if ((p->weight += cc_weight) > limit) 587 p->weight = limit; 588 #endif 589 p->time = time; 590 p->bcount++; 591 if (!p->flag && 592 /* code must be < 0 if flag false here */ 593 (p->bcount >= threshold 594 #ifndef cc_weight 595 || p->weight >= wthreshold 596 #endif 597 )) { 598 p->flag = 1; 599 *pp++ = p; 600 qinsert(p, &cc_q1b); 601 } 602 places[i] = p->places; 603 p->places = i; 604 } 605 } 606 if ((i = pp - tokens) > 0) { 607 *pp = 0; 608 if (cc_reverse) 609 cc_sweep_reverse(tokens, places); 610 if (cc_sort && i > 1) { 611 int cc_token_compare(); 612 qsort((char *) tokens, i, sizeof *tokens, 613 cc_token_compare); 614 } 615 if (cc_chop) { 616 if ((i = i * cc_chop / 100) == 0) 617 i = 1; 618 tokens[i] = 0; 619 } 620 i++; 621 } 622 return i; 623 } 624 625 cc_sweep_reverse(pp, places) 626 register struct cc **pp; 627 register short *places; 628 { 629 register struct cc *p; 630 register short front, back, t; 631 632 while ((p = *pp++) != 0) { 633 back = -1; 634 t = p->places; 635 /* the list is never empty */ 636 do { 637 front = places[t]; 638 places[t] = back; 639 back = t; 640 } while ((t = front) >= 0); 641 p->places = back; 642 } 643 } 644 645 cc_compress_phase(output, bufsize, tokens, ntoken) 646 struct cc **output; 647 struct cc **tokens; 648 { 649 register i; 650 651 bzero((char *) output, bufsize * sizeof *output); 652 for (i = 0; i < cc_npass0; i++) 653 cc_compress_phase1(output, tokens, ntoken, 0); 654 for (i = 0; i < cc_npass1; i++) 655 cc_compress_phase1(output, tokens, ntoken, 1); 656 cc_compress_cleanup(output, bufsize); 657 } 658 659 cc_compress_phase1(output, tokens, ntoken, flag) 660 register struct cc **output; 661 struct cc **tokens; 662 { 663 register struct cc **pp; 664 #ifdef STATS 665 register int i = 0; 666 int nt = 0, cc = 0, nc = 0; 667 #endif 668 669 #ifdef STATS 670 if (verbose >= 0) 671 time_begin(); 672 if (verbose > 0) 673 printf("Compress:"); 674 #endif 675 pp = tokens; 676 while (pp < tokens + ntoken) { 677 #ifdef STATS 678 if (verbose > 0) { 679 ntoken_stat = 0; 680 ccount_stat = 0; 681 ncover_stat = 0; 682 if (i > 2) { 683 printf("\n "); 684 i = 0; 685 } 686 i++; 687 printf(" (%d", (*pp)->length); 688 (void) fflush(stdout); 689 } 690 #endif 691 pp += cc_compress(output, pp, flag); 692 #ifdef STATS 693 if (verbose > 0) { 694 printf(" %dt %du %dc)", ntoken_stat, ccount_stat, 695 ncover_stat); 696 nt += ntoken_stat; 697 cc += ccount_stat; 698 nc += ncover_stat; 699 } 700 #endif 701 } 702 #ifdef STATS 703 if (verbose > 0) 704 printf("\n total: (%dt %du %dc)\n", nt, cc, nc); 705 if (verbose >= 0) 706 time_end(); 707 #endif 708 } 709 710 cc_compress_cleanup(output, bufsize) 711 register struct cc **output; 712 { 713 register struct cc **end; 714 715 /* the previous output phase may have been interrupted */ 716 qinsertq(&cc_q0b, &cc_q0a); 717 for (end = output + bufsize; output < end;) { 718 register struct cc *p; 719 register length; 720 if ((p = *output) == 0) { 721 output++; 722 continue; 723 } 724 length = p->length; 725 if (!p->flag) { 726 } else if (p->code >= 0) { 727 qinsert(p, &cc_q0b); 728 p->flag = 0; 729 } else if (p->ccount == 0) { 730 *output = 0; 731 } else if (p->ccount >= thresh(length) 732 #ifndef cc_weight 733 || wthreshp(p->weight, length) 734 #endif 735 ) { 736 p->flag = 0; 737 } else { 738 p->ccount = 0; 739 *output = 0; 740 } 741 output += length; 742 } 743 } 744 745 cc_compress(output, tokens, flag) 746 struct cc **output; 747 struct cc **tokens; 748 char flag; 749 { 750 struct cc **pp = tokens; 751 register struct cc *p = *pp++; 752 int length = p->length; 753 int threshold = thresh(length); 754 #ifndef cc_weight 755 short wthreshold = wthresh(length); 756 #endif 757 short *places = cc_places[length]; 758 int *initial_scores = cc_initial_scores[length]; 759 int initial_score0 = put_token_score(length); 760 761 do { 762 int score; 763 register struct cc_undo *undop; 764 int ccount; 765 #ifdef STATS 766 int ncover; 767 #endif 768 int i; 769 770 ccount = p->ccount; 771 if ((short) ccount >= p->bcount) 772 continue; 773 if (p->code >= 0 || ccount >= threshold) 774 score = 0; 775 #ifndef cc_weight 776 else if (p->weight >= wthreshold) 777 /* allow one fewer match than normal */ 778 /* XXX, should adjust for ccount */ 779 score = - tt.tt_set_token_cost; 780 #endif 781 else 782 score = initial_scores[ccount]; 783 undop = cc_undo; 784 #ifdef STATS 785 ncover = 0; 786 #endif 787 for (i = p->places; i >= 0; i = places[i]) { 788 register struct cc **jp; 789 register struct cc *x; 790 register struct cc **ip = output + i; 791 register score0 = initial_score0; 792 struct cc **iip = ip + length; 793 struct cc_undo *undop1 = undop; 794 795 if ((x = *(jp = ip)) != 0) 796 goto z; 797 while (--jp >= output) 798 if ((x = *jp) != 0) { 799 if (jp + x->length > ip) 800 goto z; 801 break; 802 } 803 jp = ip + 1; 804 while (jp < iip) { 805 if ((x = *jp) == 0) { 806 jp++; 807 continue; 808 } 809 z: 810 if (x == p) 811 goto undo; 812 #ifdef STATS 813 ncover++; 814 #endif 815 undop->pos = jp; 816 undop->val = x; 817 undop++; 818 *jp = 0; 819 x->ccount--; 820 score_adjust(score0, x); 821 if (score0 < 0 && flag) 822 goto undo; 823 jp += x->length; 824 } 825 undop->pos = ip; 826 undop->val = 0; 827 undop++; 828 *ip = p; 829 ccount++; 830 score += score0; 831 continue; 832 undo: 833 while (--undop >= undop1) 834 if (*undop->pos = x = undop->val) 835 x->ccount++; 836 undop++; 837 } 838 if (score > 0) { 839 #ifdef STATS 840 ccount_stat += ccount - p->ccount; 841 ntoken_stat++; 842 ncover_stat += ncover; 843 #endif 844 p->ccount = ccount; 845 } else { 846 register struct cc_undo *u = cc_undo; 847 while (--undop >= u) { 848 register struct cc *x; 849 if (*undop->pos = x = undop->val) 850 x->ccount++; 851 } 852 } 853 } while ((p = *pp++) != 0); 854 return pp - tokens; 855 } 856 857 cc_output_phase(buffer, output, bufsize) 858 register char *buffer; 859 register struct cc **output; 860 register bufsize; 861 { 862 register i; 863 register struct cc *p, *p1; 864 865 for (i = 0; i < bufsize;) { 866 if ((p = output[i]) == 0) { 867 ttputc(buffer[i]); 868 i++; 869 } else if (p->code >= 0) { 870 if (--p->ccount == 0) 871 qinsert(p, &cc_q0a); 872 (*tt.tt_put_token)(p->code, p->string, p->length); 873 wwntokuse++; 874 wwntoksave += put_token_score(p->length); 875 i += p->length; 876 } else if ((p1 = cc_q0a.qback) != &cc_q0a) { 877 p->code = p1->code; 878 p1->code = -1; 879 qinsert(p1, &cc_q1a); 880 if (--p->ccount == 0) 881 qinsert(p, &cc_q0a); 882 else 883 qinsert(p, &cc_q0b); 884 (*tt.tt_set_token)(p->code, p->string, p->length); 885 wwntokdef++; 886 wwntoksave -= tt.tt_set_token_cost; 887 i += p->length; 888 } else { 889 p->ccount--; 890 ttwrite(p->string, p->length); 891 wwntokbad++; 892 i += p->length; 893 } 894 } 895 wwntokc += bufsize; 896 } 897 898 cc_token_compare(p1, p2) 899 struct cc **p1, **p2; 900 { 901 return (*p2)->bcount - (*p1)->bcount; 902 } 903