1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Edward Wang at The University of California, Berkeley. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)compress.c 8.1 (Berkeley) 6/6/93 37 * $FreeBSD: src/usr.bin/window/compress.c,v 1.2.12.1 2001/05/17 09:45:00 obrien Exp $ 38 * $DragonFly: src/usr.bin/window/compress.c,v 1.2 2003/06/17 04:29:33 dillon Exp $ 39 */ 40 41 #include "ww.h" 42 #include "tt.h" 43 44 /* special */ 45 #include <stdio.h> 46 #include <fcntl.h> 47 #include <stdlib.h> 48 int cc_trace = 0; 49 FILE *cc_trace_fp; 50 51 /* tunable parameters */ 52 53 int cc_reverse = 1; 54 int cc_sort = 0; 55 int cc_chop = 0; 56 57 int cc_token_max = 8; /* <= TOKEN_MAX */ 58 int cc_token_min = 2; /* > tt.tt_put_token_cost */ 59 int cc_npass0 = 1; 60 int cc_npass1 = 1; 61 62 int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */ 63 64 int cc_ntoken = 8192; 65 66 #define cc_weight XXX 67 #ifndef cc_weight 68 int cc_weight = 0; 69 #endif 70 71 #define TOKEN_MAX 16 72 73 struct cc { 74 char string[TOKEN_MAX]; 75 char length; 76 char flag; 77 #ifndef cc_weight 78 short weight; 79 #endif 80 long time; /* time last seen */ 81 short bcount; /* count in this buffer */ 82 short ccount; /* count in compression */ 83 short places; /* places in the buffer */ 84 short code; /* token code */ 85 struct cc *qforw, *qback; 86 struct cc *hforw, **hback; 87 }; 88 89 short cc_thresholds[TOKEN_MAX + 1]; 90 #define thresh(length) (cc_thresholds[length]) 91 #define threshp(code, count, length) \ 92 ((code) >= 0 || (short) (count) >= cc_thresholds[length]) 93 94 #ifndef cc_weight 95 short cc_wthresholds[TOKEN_MAX + 1]; 96 #define wthresh(length) (cc_wthresholds[length]) 97 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length]) 98 #else 99 #define wthreshp(weight, length) (0) 100 #endif 101 102 #ifndef cc_weight 103 short cc_wlimits[TOKEN_MAX + 1]; 104 #define wlimit(length) (cc_wlimits[length]) 105 #endif 106 107 #define put_token_score(length) ((length) - tt.tt_put_token_cost) 108 109 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 110 #define score_adjust(score, p) \ 111 do { \ 112 int length = (p)->length; \ 113 int ccount = (p)->ccount; \ 114 if (threshp((p)->code, ccount, length) || \ 115 wthreshp((p)->weight, length)) /* XXX */ \ 116 (score) -= length - tt.tt_put_token_cost; \ 117 else \ 118 (score) += cc_score_adjustments[length][ccount]; \ 119 } while (0) 120 121 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 122 123 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b; 124 125 #define qinsert(p1, p2) \ 126 do { \ 127 register struct cc *forw = (p1)->qforw; \ 128 register struct cc *back = (p1)->qback; \ 129 back->qforw = forw; \ 130 forw->qback = back; \ 131 forw = (p2)->qforw; \ 132 (p1)->qforw = forw; \ 133 forw->qback = (p1); \ 134 (p2)->qforw = (p1); \ 135 (p1)->qback = (p2); \ 136 } while (0) 137 138 #define qinsertq(q, p) \ 139 ((q)->qforw == (q) ? 0 : \ 140 ((q)->qback->qforw = (p)->qforw, \ 141 (p)->qforw->qback = (q)->qback, \ 142 (q)->qforw->qback = (p), \ 143 (p)->qforw = (q)->qforw, \ 144 (q)->qforw = (q), \ 145 (q)->qback = (q))) 146 147 #define H (14) 148 #define HSIZE (1 << H) 149 #define hash(h, c) ((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1) 150 151 char *cc_buffer; 152 struct cc **cc_output; /* the output array */ 153 short *cc_places[TOKEN_MAX + 1]; 154 short *cc_hashcodes; /* for computing hashcodes */ 155 struct cc **cc_htab; /* the hash table */ 156 struct cc **cc_tokens; /* holds all the active tokens */ 157 struct cc_undo { 158 struct cc **pos; 159 struct cc *val; 160 } *cc_undo; 161 162 long cc_time, cc_time0; 163 164 char *cc_tt_ob, *cc_tt_obe; 165 166 ccinit() 167 { 168 register i, j; 169 register struct cc *p; 170 171 if (tt.tt_token_max > cc_token_max) 172 tt.tt_token_max = cc_token_max; 173 if (tt.tt_token_min < cc_token_min) 174 tt.tt_token_min = cc_token_min; 175 if (tt.tt_token_min > tt.tt_token_max) { 176 tt.tt_ntoken = 0; 177 return 0; 178 } 179 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */ 180 tt.tt_ntoken = cc_ntoken / 2; 181 #define C(x) (sizeof (x) / sizeof *(x)) 182 for (i = 0; i < C(cc_thresholds); i++) { 183 int h = i - tt.tt_put_token_cost; 184 if (h > 0) 185 cc_thresholds[i] = 186 (tt.tt_set_token_cost + 1 + h - 1) / h + 1; 187 else 188 cc_thresholds[i] = 0; 189 } 190 for (i = 0; i < C(cc_score_adjustments); i++) { 191 int t = cc_thresholds[i]; 192 for (j = 0; j < C(*cc_score_adjustments); j++) { 193 if (j >= t) 194 cc_score_adjustments[i][j] = 195 - (i - tt.tt_put_token_cost); 196 else if (j < t - 1) 197 cc_score_adjustments[i][j] = 0; 198 else 199 /* 200 * cost now is 201 * length * (ccount + 1) a 202 * cost before was 203 * set-token-cost + length + 204 * ccount * put-token-cost b 205 * the score adjustment is (b - a) 206 */ 207 cc_score_adjustments[i][j] = 208 tt.tt_set_token_cost + i + 209 j * tt.tt_put_token_cost - 210 i * (j + 1); 211 if (j >= t) 212 cc_initial_scores[i][j] = 0; 213 else 214 /* 215 * - (set-token-cost + 216 * (length - put-token-cost) - 217 * (length - put-token-cost) * ccount) 218 */ 219 cc_initial_scores[i][j] = 220 - (tt.tt_set_token_cost + 221 (i - tt.tt_put_token_cost) - 222 (i - tt.tt_put_token_cost) * j); 223 } 224 } 225 #ifndef cc_weight 226 for (i = 1; i < C(cc_wthresholds); i++) { 227 cc_wthresholds[i] = 228 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i + 229 i / 5 + 1) * 230 cc_weight + 1; 231 cc_wlimits[i] = cc_wthresholds[i] + cc_weight; 232 } 233 #endif 234 #undef C 235 if ((cc_output = (struct cc **) 236 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0) 237 goto nomem; 238 if ((cc_hashcodes = (short *) 239 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0) 240 goto nomem; 241 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0) 242 goto nomem; 243 if ((cc_tokens = (struct cc **) 244 malloc((unsigned) 245 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) * 246 sizeof *cc_tokens)) == 0) 247 goto nomem; 248 if ((cc_undo = (struct cc_undo *) 249 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0) 250 goto nomem; 251 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) 252 if ((cc_places[i] = (short *) 253 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0) 254 goto nomem; 255 cc_q0a.qforw = cc_q0a.qback = &cc_q0a; 256 cc_q0b.qforw = cc_q0b.qback = &cc_q0b; 257 cc_q1a.qforw = cc_q1a.qback = &cc_q1a; 258 cc_q1b.qforw = cc_q1b.qback = &cc_q1b; 259 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0) 260 goto nomem; 261 for (i = 0; i < tt.tt_ntoken; i++) { 262 p->code = i; 263 p->time = -1; 264 p->qback = cc_q0a.qback; 265 p->qforw = &cc_q0a; 266 p->qback->qforw = p; 267 cc_q0a.qback = p; 268 p++; 269 } 270 for (; i < cc_ntoken; i++) { 271 p->code = -1; 272 p->time = -1; 273 p->qback = cc_q1a.qback; 274 p->qforw = &cc_q1a; 275 p->qback->qforw = p; 276 cc_q1a.qback = p; 277 p++; 278 } 279 cc_tt_ob = tt_ob; 280 cc_tt_obe = tt_obe; 281 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0) 282 goto nomem; 283 return 0; 284 nomem: 285 wwerrno = WWE_NOMEM; 286 return -1; 287 } 288 289 ccstart() 290 { 291 int ccflush(); 292 293 ttflush(); 294 tt_obp = tt_ob = cc_buffer; 295 tt_obe = tt_ob + cc_bufsize; 296 tt.tt_flush = ccflush; 297 if (cc_trace) { 298 cc_trace_fp = fopen("window-trace", "a"); 299 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1); 300 } 301 ccreset(); 302 } 303 304 ccreset() 305 { 306 register struct cc *p; 307 308 bzero((char *) cc_htab, HSIZE * sizeof *cc_htab); 309 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw) 310 p->hback = 0; 311 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw) 312 p->hback = 0; 313 } 314 315 ccend() 316 { 317 318 ttflush(); 319 tt_obp = tt_ob = cc_tt_ob; 320 tt_obe = cc_tt_obe; 321 tt.tt_flush = 0; 322 if (cc_trace_fp != NULL) { 323 (void) fclose(cc_trace_fp); 324 cc_trace_fp = NULL; 325 } 326 } 327 328 ccflush() 329 { 330 int bufsize = tt_obp - tt_ob; 331 int n; 332 333 if (tt_ob != cc_buffer) 334 abort(); 335 if (cc_trace_fp != NULL) { 336 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp); 337 (void) putc(-1, cc_trace_fp); 338 } 339 tt.tt_flush = 0; 340 (*tt.tt_compress)(1); 341 if (bufsize < tt.tt_token_min) { 342 ttflush(); 343 goto out; 344 } 345 tt_obp = tt_ob = cc_tt_ob; 346 tt_obe = cc_tt_obe; 347 cc_time0 = cc_time; 348 cc_time += bufsize; 349 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens); 350 cc_compress_phase(cc_output, bufsize, cc_tokens, n); 351 cc_output_phase(cc_buffer, cc_output, bufsize); 352 ttflush(); 353 tt_obp = tt_ob = cc_buffer; 354 tt_obe = cc_buffer + cc_bufsize; 355 out: 356 (*tt.tt_compress)(0); 357 tt.tt_flush = ccflush; 358 } 359 360 cc_sweep_phase(buffer, bufsize, tokens) 361 char *buffer; 362 struct cc **tokens; 363 { 364 register struct cc **pp = tokens; 365 register i, n; 366 #ifdef STATS 367 int nn, ii; 368 #endif 369 370 #ifdef STATS 371 if (verbose >= 0) 372 time_begin(); 373 if (verbose > 0) 374 printf("Sweep:"); 375 #endif 376 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1); 377 #ifdef STATS 378 ntoken_stat = 0; 379 nn = 0; 380 ii = 0; 381 #endif 382 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) { 383 #ifdef STATS 384 if (verbose > 0) { 385 if (ii > 7) { 386 printf("\n "); 387 ii = 0; 388 } 389 ii++; 390 printf(" (%d", i); 391 (void) fflush(stdout); 392 } 393 #endif 394 n = cc_sweep(buffer, bufsize, pp, i); 395 pp += n; 396 #ifdef STATS 397 if (verbose > 0) { 398 if (--n > 0) { 399 printf(" %d", n); 400 nn += n; 401 } 402 putchar(')'); 403 } 404 #endif 405 } 406 qinsertq(&cc_q1b, &cc_q1a); 407 #ifdef STATS 408 if (verbose > 0) 409 printf("\n %d tokens, %d candidates\n", 410 ntoken_stat, nn); 411 if (verbose >= 0) 412 time_end(); 413 #endif 414 return pp - tokens; 415 } 416 417 cc_sweep0(buffer, n, length) 418 char *buffer; 419 { 420 register char *p; 421 register short *hc; 422 register i; 423 register short c; 424 register short pc = tt.tt_padc; 425 426 /* n and length are at least 1 */ 427 p = buffer++; 428 hc = cc_hashcodes; 429 i = n; 430 do { 431 if ((*hc++ = *p++) == pc) 432 hc[-1] = -1; 433 } while (--i); 434 while (--length) { 435 p = buffer++; 436 hc = cc_hashcodes; 437 for (i = n--; --i;) { 438 if ((c = *p++) == pc || *hc < 0) 439 c = -1; 440 else 441 c = hash(*hc, c); 442 *hc++ = c; 443 } 444 } 445 } 446 447 cc_sweep(buffer, bufsize, tokens, length) 448 char *buffer; 449 struct cc **tokens; 450 register length; 451 { 452 register struct cc *p; 453 register char *cp; 454 register i; 455 short *hc; 456 short *places = cc_places[length]; 457 struct cc **pp = tokens; 458 short threshold = thresh(length); 459 #ifndef cc_weight 460 short wthreshold = wthresh(length); 461 short limit = wlimit(length); 462 #endif 463 int time; 464 short pc = tt.tt_padc; 465 466 i = length - 1; 467 bufsize -= i; 468 cp = buffer + i; 469 hc = cc_hashcodes; 470 time = cc_time0; 471 for (i = 0; i < bufsize; i++, time++) { 472 struct cc **h; 473 474 { 475 register short *hc1 = hc; 476 register short c = *cp++; 477 register short hh; 478 if ((hh = *hc1) < 0 || c == pc) { 479 *hc1++ = -1; 480 hc = hc1; 481 continue; 482 } 483 h = cc_htab + (*hc1++ = hash(hh, c)); 484 hc = hc1; 485 } 486 for (p = *h; p != 0; p = p->hforw) 487 if (p->length == (char) length) { 488 register char *p1 = p->string; 489 register char *p2 = cp - length; 490 register n = length; 491 do 492 if (*p1++ != *p2++) 493 goto fail; 494 while (--n); 495 break; 496 fail:; 497 } 498 if (p == 0) { 499 p = cc_q1a.qback; 500 if (p == &cc_q1a || 501 p->time >= cc_time0 && p->length == (char) length) 502 continue; 503 if (p->hback != 0) 504 if ((*p->hback = p->hforw) != 0) 505 p->hforw->hback = p->hback; 506 { 507 register char *p1 = p->string; 508 register char *p2 = cp - length; 509 register n = length; 510 do 511 *p1++ = *p2++; 512 while (--n); 513 } 514 p->length = length; 515 #ifndef cc_weight 516 p->weight = cc_weight; 517 #endif 518 p->time = time; 519 p->bcount = 1; 520 p->ccount = 0; 521 p->flag = 0; 522 if ((p->hforw = *h) != 0) 523 p->hforw->hback = &p->hforw; 524 *h = p; 525 p->hback = h; 526 qinsert(p, &cc_q1a); 527 places[i] = -1; 528 p->places = i; 529 #ifdef STATS 530 ntoken_stat++; 531 #endif 532 } else if (p->time < cc_time0) { 533 #ifndef cc_weight 534 if ((p->weight += p->time - time) < 0) 535 p->weight = cc_weight; 536 else if ((p->weight += cc_weight) > limit) 537 p->weight = limit; 538 #endif 539 p->time = time; 540 p->bcount = 1; 541 p->ccount = 0; 542 if (p->code >= 0) { 543 p->flag = 1; 544 *pp++ = p; 545 } else 546 #ifndef cc_weight 547 if (p->weight >= wthreshold) { 548 p->flag = 1; 549 *pp++ = p; 550 qinsert(p, &cc_q1b); 551 } else 552 #endif 553 { 554 p->flag = 0; 555 qinsert(p, &cc_q1a); 556 } 557 places[i] = -1; 558 p->places = i; 559 #ifdef STATS 560 ntoken_stat++; 561 #endif 562 } else if (p->time + length > time) { 563 /* 564 * overlapping token, don't count as two and 565 * don't update time, but do adjust weight to offset 566 * the difference 567 */ 568 #ifndef cc_weight 569 if (cc_weight != 0) { /* XXX */ 570 p->weight += time - p->time; 571 if (!p->flag && p->weight >= wthreshold) { 572 p->flag = 1; 573 *pp++ = p; 574 qinsert(p, &cc_q1b); 575 } 576 } 577 #endif 578 places[i] = p->places; 579 p->places = i; 580 } else { 581 #ifndef cc_weight 582 if ((p->weight += p->time - time) < 0) 583 p->weight = cc_weight; 584 else if ((p->weight += cc_weight) > limit) 585 p->weight = limit; 586 #endif 587 p->time = time; 588 p->bcount++; 589 if (!p->flag && 590 /* code must be < 0 if flag false here */ 591 (p->bcount >= threshold 592 #ifndef cc_weight 593 || p->weight >= wthreshold 594 #endif 595 )) { 596 p->flag = 1; 597 *pp++ = p; 598 qinsert(p, &cc_q1b); 599 } 600 places[i] = p->places; 601 p->places = i; 602 } 603 } 604 if ((i = pp - tokens) > 0) { 605 *pp = 0; 606 if (cc_reverse) 607 cc_sweep_reverse(tokens, places); 608 if (cc_sort && i > 1) { 609 int cc_token_compare(); 610 qsort((char *) tokens, i, sizeof *tokens, 611 cc_token_compare); 612 } 613 if (cc_chop) { 614 if ((i = i * cc_chop / 100) == 0) 615 i = 1; 616 tokens[i] = 0; 617 } 618 i++; 619 } 620 return i; 621 } 622 623 cc_sweep_reverse(pp, places) 624 register struct cc **pp; 625 register short *places; 626 { 627 register struct cc *p; 628 register short front, back, t; 629 630 while ((p = *pp++) != 0) { 631 back = -1; 632 t = p->places; 633 /* the list is never empty */ 634 do { 635 front = places[t]; 636 places[t] = back; 637 back = t; 638 } while ((t = front) >= 0); 639 p->places = back; 640 } 641 } 642 643 cc_compress_phase(output, bufsize, tokens, ntoken) 644 struct cc **output; 645 struct cc **tokens; 646 { 647 register i; 648 649 bzero((char *) output, bufsize * sizeof *output); 650 for (i = 0; i < cc_npass0; i++) 651 cc_compress_phase1(output, tokens, ntoken, 0); 652 for (i = 0; i < cc_npass1; i++) 653 cc_compress_phase1(output, tokens, ntoken, 1); 654 cc_compress_cleanup(output, bufsize); 655 } 656 657 cc_compress_phase1(output, tokens, ntoken, flag) 658 register struct cc **output; 659 struct cc **tokens; 660 { 661 register struct cc **pp; 662 #ifdef STATS 663 register int i = 0; 664 int nt = 0, cc = 0, nc = 0; 665 #endif 666 667 #ifdef STATS 668 if (verbose >= 0) 669 time_begin(); 670 if (verbose > 0) 671 printf("Compress:"); 672 #endif 673 pp = tokens; 674 while (pp < tokens + ntoken) { 675 #ifdef STATS 676 if (verbose > 0) { 677 ntoken_stat = 0; 678 ccount_stat = 0; 679 ncover_stat = 0; 680 if (i > 2) { 681 printf("\n "); 682 i = 0; 683 } 684 i++; 685 printf(" (%d", (*pp)->length); 686 (void) fflush(stdout); 687 } 688 #endif 689 pp += cc_compress(output, pp, flag); 690 #ifdef STATS 691 if (verbose > 0) { 692 printf(" %dt %du %dc)", ntoken_stat, ccount_stat, 693 ncover_stat); 694 nt += ntoken_stat; 695 cc += ccount_stat; 696 nc += ncover_stat; 697 } 698 #endif 699 } 700 #ifdef STATS 701 if (verbose > 0) 702 printf("\n total: (%dt %du %dc)\n", nt, cc, nc); 703 if (verbose >= 0) 704 time_end(); 705 #endif 706 } 707 708 cc_compress_cleanup(output, bufsize) 709 register struct cc **output; 710 { 711 register struct cc **end; 712 713 /* the previous output phase may have been interrupted */ 714 qinsertq(&cc_q0b, &cc_q0a); 715 for (end = output + bufsize; output < end;) { 716 register struct cc *p; 717 register length; 718 if ((p = *output) == 0) { 719 output++; 720 continue; 721 } 722 length = p->length; 723 if (!p->flag) { 724 } else if (p->code >= 0) { 725 qinsert(p, &cc_q0b); 726 p->flag = 0; 727 } else if (p->ccount == 0) { 728 *output = 0; 729 } else if (p->ccount >= thresh(length) 730 #ifndef cc_weight 731 || wthreshp(p->weight, length) 732 #endif 733 ) { 734 p->flag = 0; 735 } else { 736 p->ccount = 0; 737 *output = 0; 738 } 739 output += length; 740 } 741 } 742 743 cc_compress(output, tokens, flag) 744 struct cc **output; 745 struct cc **tokens; 746 char flag; 747 { 748 struct cc **pp = tokens; 749 register struct cc *p = *pp++; 750 int length = p->length; 751 int threshold = thresh(length); 752 #ifndef cc_weight 753 short wthreshold = wthresh(length); 754 #endif 755 short *places = cc_places[length]; 756 int *initial_scores = cc_initial_scores[length]; 757 int initial_score0 = put_token_score(length); 758 759 do { 760 int score; 761 register struct cc_undo *undop; 762 int ccount; 763 #ifdef STATS 764 int ncover; 765 #endif 766 int i; 767 768 ccount = p->ccount; 769 if ((short) ccount >= p->bcount) 770 continue; 771 if (p->code >= 0 || ccount >= threshold) 772 score = 0; 773 #ifndef cc_weight 774 else if (p->weight >= wthreshold) 775 /* allow one fewer match than normal */ 776 /* XXX, should adjust for ccount */ 777 score = - tt.tt_set_token_cost; 778 #endif 779 else 780 score = initial_scores[ccount]; 781 undop = cc_undo; 782 #ifdef STATS 783 ncover = 0; 784 #endif 785 for (i = p->places; i >= 0; i = places[i]) { 786 register struct cc **jp; 787 register struct cc *x; 788 register struct cc **ip = output + i; 789 register score0 = initial_score0; 790 struct cc **iip = ip + length; 791 struct cc_undo *undop1 = undop; 792 793 if ((x = *(jp = ip)) != 0) 794 goto z; 795 while (--jp >= output) 796 if ((x = *jp) != 0) { 797 if (jp + x->length > ip) 798 goto z; 799 break; 800 } 801 jp = ip + 1; 802 while (jp < iip) { 803 if ((x = *jp) == 0) { 804 jp++; 805 continue; 806 } 807 z: 808 if (x == p) 809 goto undo; 810 #ifdef STATS 811 ncover++; 812 #endif 813 undop->pos = jp; 814 undop->val = x; 815 undop++; 816 *jp = 0; 817 x->ccount--; 818 score_adjust(score0, x); 819 if (score0 < 0 && flag) 820 goto undo; 821 jp += x->length; 822 } 823 undop->pos = ip; 824 undop->val = 0; 825 undop++; 826 *ip = p; 827 ccount++; 828 score += score0; 829 continue; 830 undo: 831 while (--undop >= undop1) 832 if (*undop->pos = x = undop->val) 833 x->ccount++; 834 undop++; 835 } 836 if (score > 0) { 837 #ifdef STATS 838 ccount_stat += ccount - p->ccount; 839 ntoken_stat++; 840 ncover_stat += ncover; 841 #endif 842 p->ccount = ccount; 843 } else { 844 register struct cc_undo *u = cc_undo; 845 while (--undop >= u) { 846 register struct cc *x; 847 if (*undop->pos = x = undop->val) 848 x->ccount++; 849 } 850 } 851 } while ((p = *pp++) != 0); 852 return pp - tokens; 853 } 854 855 cc_output_phase(buffer, output, bufsize) 856 register char *buffer; 857 register struct cc **output; 858 register bufsize; 859 { 860 register i; 861 register struct cc *p, *p1; 862 863 for (i = 0; i < bufsize;) { 864 if ((p = output[i]) == 0) { 865 ttputc(buffer[i]); 866 i++; 867 } else if (p->code >= 0) { 868 if (--p->ccount == 0) 869 qinsert(p, &cc_q0a); 870 (*tt.tt_put_token)(p->code, p->string, p->length); 871 wwntokuse++; 872 wwntoksave += put_token_score(p->length); 873 i += p->length; 874 } else if ((p1 = cc_q0a.qback) != &cc_q0a) { 875 p->code = p1->code; 876 p1->code = -1; 877 qinsert(p1, &cc_q1a); 878 if (--p->ccount == 0) 879 qinsert(p, &cc_q0a); 880 else 881 qinsert(p, &cc_q0b); 882 (*tt.tt_set_token)(p->code, p->string, p->length); 883 wwntokdef++; 884 wwntoksave -= tt.tt_set_token_cost; 885 i += p->length; 886 } else { 887 p->ccount--; 888 ttwrite(p->string, p->length); 889 wwntokbad++; 890 i += p->length; 891 } 892 } 893 wwntokc += bufsize; 894 } 895 896 cc_token_compare(p1, p2) 897 struct cc **p1, **p2; 898 { 899 return (*p2)->bcount - (*p1)->bcount; 900 } 901