1 /* @(#)compress.c 8.1 (Berkeley) 6/6/93 */ 2 /* $NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Edward Wang at The University of California, Berkeley. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <fcntl.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include "defs.h" 41 #include "tt.h" 42 43 /* special */ 44 int cc_trace = 0; 45 FILE *cc_trace_fp; 46 47 /* tunable parameters */ 48 49 int cc_reverse = 1; 50 int cc_sort = 0; 51 int cc_chop = 0; 52 53 int cc_token_max = 8; /* <= TOKEN_MAX */ 54 int cc_token_min = 2; /* > tt.tt_put_token_cost */ 55 int cc_npass0 = 1; 56 int cc_npass1 = 1; 57 58 int cc_bufsize = 1024 * 3; /* XXX, or 80 * 24 * 2 */ 59 60 int cc_ntoken = 8192; 61 62 #define cc_weight XXX 63 #ifndef cc_weight 64 int cc_weight = 0; 65 #endif 66 67 #define TOKEN_MAX 16 68 69 struct cc { 70 char string[TOKEN_MAX]; 71 char length; 72 char flag; 73 #ifndef cc_weight 74 short weight; 75 #endif 76 long time; /* time last seen */ 77 short bcount; /* count in this buffer */ 78 short ccount; /* count in compression */ 79 short places; /* places in the buffer */ 80 short code; /* token code */ 81 struct cc *qforw, *qback; 82 struct cc *hforw, **hback; 83 }; 84 85 short cc_thresholds[TOKEN_MAX + 1]; 86 #define thresh(length) (cc_thresholds[length]) 87 #define threshp(code, count, length) \ 88 ((code) >= 0 || (short) (count) >= cc_thresholds[length]) 89 90 #ifndef cc_weight 91 short cc_wthresholds[TOKEN_MAX + 1]; 92 #define wthresh(length) (cc_wthresholds[length]) 93 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length]) 94 #else 95 #define wthreshp(weight, length) (0) 96 #endif 97 98 #ifndef cc_weight 99 short cc_wlimits[TOKEN_MAX + 1]; 100 #define wlimit(length) (cc_wlimits[length]) 101 #endif 102 103 #define put_token_score(length) ((length) - tt.tt_put_token_cost) 104 105 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 106 #define score_adjust(score, p) \ 107 do { \ 108 int _length = (p)->length; \ 109 int _ccount = (p)->ccount; \ 110 if (threshp((p)->code, _ccount, _length) || \ 111 wthreshp((p)->weight, _length)) /* XXX */ \ 112 (score) -= _length - tt.tt_put_token_cost; \ 113 else \ 114 (score) += cc_score_adjustments[_length][_ccount]; \ 115 } while (0) 116 117 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */ 118 119 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b; 120 121 #define qinsert(p1, p2) \ 122 do { \ 123 struct cc *forw = (p1)->qforw; \ 124 struct cc *back = (p1)->qback; \ 125 back->qforw = forw; \ 126 forw->qback = back; \ 127 forw = (p2)->qforw; \ 128 (p1)->qforw = forw; \ 129 forw->qback = (p1); \ 130 (p2)->qforw = (p1); \ 131 (p1)->qback = (p2); \ 132 } while (0) 133 134 #define qinsertq(q, p) \ 135 ((q)->qforw == (q) ? 0 : \ 136 ((q)->qback->qforw = (p)->qforw, \ 137 (p)->qforw->qback = (q)->qback, \ 138 (q)->qforw->qback = (p), \ 139 (p)->qforw = (q)->qforw, \ 140 (q)->qforw = (q), \ 141 (q)->qback = (q))) 142 143 #define H (14) 144 #define HSIZE (1 << H) 145 #define hash(h, c) (((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1)) 146 147 char *cc_buffer; 148 struct cc **cc_output; /* the output array */ 149 short *cc_places[TOKEN_MAX + 1]; 150 short *cc_hashcodes; /* for computing hashcodes */ 151 struct cc **cc_htab; /* the hash table */ 152 struct cc **cc_tokens; /* holds all the active tokens */ 153 struct cc_undo { 154 struct cc **pos; 155 struct cc *val; 156 } *cc_undo; 157 158 long cc_time, cc_time0; 159 160 char *cc_tt_ob, *cc_tt_obe; 161 162 int cc_compress(struct cc **, struct cc **, char); 163 void cc_compress_cleanup(struct cc **, int); 164 void cc_compress_phase(struct cc **, int, struct cc **, int); 165 void cc_compress_phase1(struct cc **, struct cc **, int, int); 166 void cc_output_phase(char *, struct cc **, int); 167 int cc_sweep(char *, int, struct cc **, int); 168 void cc_sweep0(char *, int, int); 169 int cc_sweep_phase(char *, int, struct cc **); 170 void cc_sweep_reverse(struct cc **, short *); 171 int cc_token_compare(const void *, const void *); 172 173 int 174 ccinit(void) 175 { 176 int i, j; 177 struct cc *p; 178 179 if (tt.tt_token_max > cc_token_max) 180 tt.tt_token_max = cc_token_max; 181 if (tt.tt_token_min < cc_token_min) 182 tt.tt_token_min = cc_token_min; 183 if (tt.tt_token_min > tt.tt_token_max) { 184 tt.tt_ntoken = 0; 185 return 0; 186 } 187 if (tt.tt_ntoken > cc_ntoken / 2) /* not likely */ 188 tt.tt_ntoken = cc_ntoken / 2; 189 #define C(x) (sizeof (x) / sizeof *(x)) 190 for (i = 0; i < (int)C(cc_thresholds); i++) { 191 int h = i - tt.tt_put_token_cost; 192 if (h > 0) 193 cc_thresholds[i] = 194 (tt.tt_set_token_cost + 1 + h - 1) / h + 1; 195 else 196 cc_thresholds[i] = 0; 197 } 198 for (i = 0; i < (int)C(cc_score_adjustments); i++) { 199 int t = cc_thresholds[i]; 200 for (j = 0; j < (int)C(*cc_score_adjustments); j++) { 201 if (j >= t) 202 cc_score_adjustments[i][j] = 203 - (i - tt.tt_put_token_cost); 204 else if (j < t - 1) 205 cc_score_adjustments[i][j] = 0; 206 else 207 /* 208 * cost now is 209 * length * (ccount + 1) a 210 * cost before was 211 * set-token-cost + length + 212 * ccount * put-token-cost b 213 * the score adjustment is (b - a) 214 */ 215 cc_score_adjustments[i][j] = 216 tt.tt_set_token_cost + i + 217 j * tt.tt_put_token_cost - 218 i * (j + 1); 219 if (j >= t) 220 cc_initial_scores[i][j] = 0; 221 else 222 /* 223 * - (set-token-cost + 224 * (length - put-token-cost) - 225 * (length - put-token-cost) * ccount) 226 */ 227 cc_initial_scores[i][j] = 228 - (tt.tt_set_token_cost + 229 (i - tt.tt_put_token_cost) - 230 (i - tt.tt_put_token_cost) * j); 231 } 232 } 233 #ifndef cc_weight 234 for (i = 1; i < C(cc_wthresholds); i++) { 235 cc_wthresholds[i] = 236 ((tt.tt_set_token_cost + tt.tt_put_token_cost) / i + 237 i / 5 + 1) * 238 cc_weight + 1; 239 cc_wlimits[i] = cc_wthresholds[i] + cc_weight; 240 } 241 #endif 242 #undef C 243 if ((cc_output = (struct cc **) 244 malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0) 245 goto nomem; 246 if ((cc_hashcodes = (short *) 247 malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0) 248 goto nomem; 249 if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0) 250 goto nomem; 251 if ((cc_tokens = (struct cc **) 252 malloc((unsigned) 253 (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) * 254 sizeof *cc_tokens)) == 0) 255 goto nomem; 256 if ((cc_undo = (struct cc_undo *) 257 malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0) 258 goto nomem; 259 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) 260 if ((cc_places[i] = (short *) 261 malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0) 262 goto nomem; 263 cc_q0a.qforw = cc_q0a.qback = &cc_q0a; 264 cc_q0b.qforw = cc_q0b.qback = &cc_q0b; 265 cc_q1a.qforw = cc_q1a.qback = &cc_q1a; 266 cc_q1b.qforw = cc_q1b.qback = &cc_q1b; 267 if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0) 268 goto nomem; 269 for (i = 0; i < tt.tt_ntoken; i++) { 270 p->code = i; 271 p->time = -1; 272 p->qback = cc_q0a.qback; 273 p->qforw = &cc_q0a; 274 p->qback->qforw = p; 275 cc_q0a.qback = p; 276 p++; 277 } 278 for (; i < cc_ntoken; i++) { 279 p->code = -1; 280 p->time = -1; 281 p->qback = cc_q1a.qback; 282 p->qforw = &cc_q1a; 283 p->qback->qforw = p; 284 cc_q1a.qback = p; 285 p++; 286 } 287 cc_tt_ob = tt_ob; 288 cc_tt_obe = tt_obe; 289 if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0) 290 goto nomem; 291 return 0; 292 nomem: 293 wwerrno = WWE_NOMEM; 294 return -1; 295 } 296 297 void 298 ccstart(void) 299 { 300 ttflush(); 301 tt_obp = tt_ob = cc_buffer; 302 tt_obe = tt_ob + cc_bufsize; 303 tt.tt_flush = ccflush; 304 if (cc_trace) { 305 cc_trace_fp = fopen("window-trace", "a"); 306 (void) fcntl(fileno(cc_trace_fp), F_SETFD, 1); 307 } 308 ccreset(); 309 } 310 311 void 312 ccreset(void) 313 { 314 struct cc *p; 315 316 memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab); 317 for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw) 318 p->hback = 0; 319 for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw) 320 p->hback = 0; 321 } 322 323 void 324 ccend(void) 325 { 326 327 ttflush(); 328 tt_obp = tt_ob = cc_tt_ob; 329 tt_obe = cc_tt_obe; 330 tt.tt_flush = 0; 331 if (cc_trace_fp != NULL) { 332 (void) fclose(cc_trace_fp); 333 cc_trace_fp = NULL; 334 } 335 } 336 337 void 338 ccflush(void) 339 { 340 int bufsize = tt_obp - tt_ob; 341 int n; 342 343 if (tt_ob != cc_buffer) 344 abort(); 345 if (cc_trace_fp != NULL) { 346 (void) fwrite(tt_ob, 1, bufsize, cc_trace_fp); 347 (void) putc(-1, cc_trace_fp); 348 } 349 tt.tt_flush = 0; 350 (*tt.tt_compress)(1); 351 if (bufsize < tt.tt_token_min) { 352 ttflush(); 353 goto out; 354 } 355 tt_obp = tt_ob = cc_tt_ob; 356 tt_obe = cc_tt_obe; 357 cc_time0 = cc_time; 358 cc_time += bufsize; 359 n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens); 360 cc_compress_phase(cc_output, bufsize, cc_tokens, n); 361 cc_output_phase(cc_buffer, cc_output, bufsize); 362 ttflush(); 363 tt_obp = tt_ob = cc_buffer; 364 tt_obe = cc_buffer + cc_bufsize; 365 out: 366 (*tt.tt_compress)(0); 367 tt.tt_flush = ccflush; 368 } 369 370 int 371 cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens) 372 { 373 struct cc **pp = tokens; 374 int i, n; 375 #ifdef STATS 376 int nn, ii; 377 #endif 378 379 #ifdef STATS 380 if (verbose >= 0) 381 time_begin(); 382 if (verbose > 0) 383 printf("Sweep:"); 384 #endif 385 cc_sweep0(buffer, bufsize, tt.tt_token_min - 1); 386 #ifdef STATS 387 ntoken_stat = 0; 388 nn = 0; 389 ii = 0; 390 #endif 391 for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) { 392 #ifdef STATS 393 if (verbose > 0) { 394 if (ii > 7) { 395 printf("\n "); 396 ii = 0; 397 } 398 ii++; 399 printf(" (%d", i); 400 (void) fflush(stdout); 401 } 402 #endif 403 n = cc_sweep(buffer, bufsize, pp, i); 404 pp += n; 405 #ifdef STATS 406 if (verbose > 0) { 407 if (--n > 0) { 408 printf(" %d", n); 409 nn += n; 410 } 411 putchar(')'); 412 } 413 #endif 414 } 415 qinsertq(&cc_q1b, &cc_q1a); 416 #ifdef STATS 417 if (verbose > 0) 418 printf("\n %d tokens, %d candidates\n", 419 ntoken_stat, nn); 420 if (verbose >= 0) 421 time_end(); 422 #endif 423 return pp - tokens; 424 } 425 426 void 427 cc_sweep0(char *buffer, int n, int length) 428 { 429 char *p; 430 short *hc; 431 int i; 432 short c; 433 short pc = tt.tt_padc; 434 435 /* n and length are at least 1 */ 436 p = buffer++; 437 hc = cc_hashcodes; 438 i = n; 439 do { 440 if ((*hc++ = *p++) == pc) 441 hc[-1] = -1; 442 } while (--i); 443 while (--length) { 444 p = buffer++; 445 hc = cc_hashcodes; 446 for (i = n--; --i;) { 447 if ((c = *p++) == pc || *hc < 0) 448 c = -1; 449 else 450 c = hash(*hc, c); 451 *hc++ = c; 452 } 453 } 454 } 455 456 int 457 cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length) 458 { 459 struct cc *p; 460 char *cp; 461 int i; 462 short *hc; 463 short *places = cc_places[length]; 464 struct cc **pp = tokens; 465 short threshold = thresh(length); 466 #ifndef cc_weight 467 short wthreshold = wthresh(length); 468 short limit = wlimit(length); 469 #endif 470 int time0; 471 short pc = tt.tt_padc; 472 473 i = length - 1; 474 bufsize -= i; 475 cp = buffer + i; 476 hc = cc_hashcodes; 477 time0 = cc_time0; 478 for (i = 0; i < bufsize; i++, time0++) { 479 struct cc **h; 480 481 { 482 short *hc1 = hc; 483 short c = *cp++; 484 short hh; 485 if ((hh = *hc1) < 0 || c == pc) { 486 *hc1++ = -1; 487 hc = hc1; 488 continue; 489 } 490 h = cc_htab + (*hc1++ = hash(hh, c)); 491 hc = hc1; 492 } 493 for (p = *h; p != 0; p = p->hforw) 494 if (p->length == (char) length) { 495 char *p1 = p->string; 496 char *p2 = cp - length; 497 int n = length; 498 do 499 if (*p1++ != *p2++) 500 goto fail; 501 while (--n); 502 break; 503 fail:; 504 } 505 if (p == 0) { 506 p = cc_q1a.qback; 507 if (p == &cc_q1a || 508 (p->time >= cc_time0 && p->length == (char) length)) 509 continue; 510 if (p->hback != 0) 511 if ((*p->hback = p->hforw) != 0) 512 p->hforw->hback = p->hback; 513 { 514 char *p1 = p->string; 515 char *p2 = cp - length; 516 int n = length; 517 do 518 *p1++ = *p2++; 519 while (--n); 520 } 521 p->length = length; 522 #ifndef cc_weight 523 p->weight = cc_weight; 524 #endif 525 p->time = time0; 526 p->bcount = 1; 527 p->ccount = 0; 528 p->flag = 0; 529 if ((p->hforw = *h) != 0) 530 p->hforw->hback = &p->hforw; 531 *h = p; 532 p->hback = h; 533 qinsert(p, &cc_q1a); 534 places[i] = -1; 535 p->places = i; 536 #ifdef STATS 537 ntoken_stat++; 538 #endif 539 } else if (p->time < cc_time0) { 540 #ifndef cc_weight 541 if ((p->weight += p->time - time0) < 0) 542 p->weight = cc_weight; 543 else if ((p->weight += cc_weight) > limit) 544 p->weight = limit; 545 #endif 546 p->time = time0; 547 p->bcount = 1; 548 p->ccount = 0; 549 if (p->code >= 0) { 550 p->flag = 1; 551 *pp++ = p; 552 } else 553 #ifndef cc_weight 554 if (p->weight >= wthreshold) { 555 p->flag = 1; 556 *pp++ = p; 557 qinsert(p, &cc_q1b); 558 } else 559 #endif 560 { 561 p->flag = 0; 562 qinsert(p, &cc_q1a); 563 } 564 places[i] = -1; 565 p->places = i; 566 #ifdef STATS 567 ntoken_stat++; 568 #endif 569 } else if (p->time + length > time0) { 570 /* 571 * overlapping token, don't count as two and 572 * don't update time0, but do adjust weight to offset 573 * the difference 574 */ 575 #ifndef cc_weight 576 if (cc_weight != 0) { /* XXX */ 577 p->weight += time0 - p->time; 578 if (!p->flag && p->weight >= wthreshold) { 579 p->flag = 1; 580 *pp++ = p; 581 qinsert(p, &cc_q1b); 582 } 583 } 584 #endif 585 places[i] = p->places; 586 p->places = i; 587 } else { 588 #ifndef cc_weight 589 if ((p->weight += p->time - time0) < 0) 590 p->weight = cc_weight; 591 else if ((p->weight += cc_weight) > limit) 592 p->weight = limit; 593 #endif 594 p->time = time0; 595 p->bcount++; 596 if (!p->flag && 597 /* code must be < 0 if flag false here */ 598 (p->bcount >= threshold 599 #ifndef cc_weight 600 || p->weight >= wthreshold 601 #endif 602 )) { 603 p->flag = 1; 604 *pp++ = p; 605 qinsert(p, &cc_q1b); 606 } 607 places[i] = p->places; 608 p->places = i; 609 } 610 } 611 if ((i = pp - tokens) > 0) { 612 *pp = 0; 613 if (cc_reverse) 614 cc_sweep_reverse(tokens, places); 615 if (cc_sort && i > 1) { 616 qsort((char *) tokens, i, sizeof *tokens, 617 cc_token_compare); 618 } 619 if (cc_chop) { 620 if ((i = i * cc_chop / 100) == 0) 621 i = 1; 622 tokens[i] = 0; 623 } 624 i++; 625 } 626 return i; 627 } 628 629 void 630 cc_sweep_reverse(struct cc **pp, short int *places) 631 { 632 struct cc *p; 633 short frnt, back, t; 634 635 while ((p = *pp++) != 0) { 636 back = -1; 637 t = p->places; 638 /* the list is never empty */ 639 do { 640 frnt = places[t]; 641 places[t] = back; 642 back = t; 643 } while ((t = frnt) >= 0); 644 p->places = back; 645 } 646 } 647 648 void 649 cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken) 650 { 651 int i; 652 653 memset((char *) output, 0, bufsize * sizeof *output); 654 for (i = 0; i < cc_npass0; i++) 655 cc_compress_phase1(output, tokens, ntoken, 0); 656 for (i = 0; i < cc_npass1; i++) 657 cc_compress_phase1(output, tokens, ntoken, 1); 658 cc_compress_cleanup(output, bufsize); 659 } 660 661 void 662 cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag) 663 { 664 struct cc **pp; 665 #ifdef STATS 666 int i = 0; 667 int nt = 0, cc = 0, nc = 0; 668 #endif 669 670 #ifdef STATS 671 if (verbose >= 0) 672 time_begin(); 673 if (verbose > 0) 674 printf("Compress:"); 675 #endif 676 pp = tokens; 677 while (pp < tokens + ntoken) { 678 #ifdef STATS 679 if (verbose > 0) { 680 ntoken_stat = 0; 681 ccount_stat = 0; 682 ncover_stat = 0; 683 if (i > 2) { 684 printf("\n "); 685 i = 0; 686 } 687 i++; 688 printf(" (%d", (*pp)->length); 689 (void) fflush(stdout); 690 } 691 #endif 692 pp += cc_compress(output, pp, flag); 693 #ifdef STATS 694 if (verbose > 0) { 695 printf(" %dt %du %dc)", ntoken_stat, ccount_stat, 696 ncover_stat); 697 nt += ntoken_stat; 698 cc += ccount_stat; 699 nc += ncover_stat; 700 } 701 #endif 702 } 703 #ifdef STATS 704 if (verbose > 0) 705 printf("\n total: (%dt %du %dc)\n", nt, cc, nc); 706 if (verbose >= 0) 707 time_end(); 708 #endif 709 } 710 711 void 712 cc_compress_cleanup(struct cc **output, int bufsize) 713 { 714 struct cc **end; 715 716 /* the previous output phase may have been interrupted */ 717 qinsertq(&cc_q0b, &cc_q0a); 718 for (end = output + bufsize; output < end;) { 719 struct cc *p; 720 int length; 721 if ((p = *output) == 0) { 722 output++; 723 continue; 724 } 725 length = p->length; 726 if (!p->flag) { 727 } else if (p->code >= 0) { 728 qinsert(p, &cc_q0b); 729 p->flag = 0; 730 } else if (p->ccount == 0) { 731 *output = 0; 732 } else if (p->ccount >= thresh(length) 733 #ifndef cc_weight 734 || wthreshp(p->weight, length) 735 #endif 736 ) { 737 p->flag = 0; 738 } else { 739 p->ccount = 0; 740 *output = 0; 741 } 742 output += length; 743 } 744 } 745 746 int 747 cc_compress(struct cc **output, struct cc **tokens, char flag) 748 { 749 struct cc **pp = tokens; 750 struct cc *p = *pp++; 751 int length = p->length; 752 int threshold = thresh(length); 753 #ifndef cc_weight 754 short wthreshold = wthresh(length); 755 #endif 756 short *places = cc_places[length]; 757 int *initial_scores = cc_initial_scores[length]; 758 int initial_score0 = put_token_score(length); 759 760 do { 761 int score; 762 struct cc_undo *undop; 763 int ccount; 764 #ifdef STATS 765 int ncover; 766 #endif 767 int i; 768 769 ccount = p->ccount; 770 if ((short) ccount >= p->bcount) 771 continue; 772 if (p->code >= 0 || ccount >= threshold) 773 score = 0; 774 #ifndef cc_weight 775 else if (p->weight >= wthreshold) 776 /* allow one fewer match than normal */ 777 /* XXX, should adjust for ccount */ 778 score = - tt.tt_set_token_cost; 779 #endif 780 else 781 score = initial_scores[ccount]; 782 undop = cc_undo; 783 #ifdef STATS 784 ncover = 0; 785 #endif 786 for (i = p->places; i >= 0; i = places[i]) { 787 struct cc **jp; 788 struct cc *x; 789 struct cc **ip = output + i; 790 int score0 = initial_score0; 791 struct cc **iip = ip + length; 792 struct cc_undo *undop1 = undop; 793 794 if ((x = *(jp = ip)) != 0) 795 goto z; 796 while (--jp >= output) 797 if ((x = *jp) != 0) { 798 if (jp + x->length > ip) 799 goto z; 800 break; 801 } 802 jp = ip + 1; 803 while (jp < iip) { 804 if ((x = *jp) == 0) { 805 jp++; 806 continue; 807 } 808 z: 809 if (x == p) 810 goto undo; 811 #ifdef STATS 812 ncover++; 813 #endif 814 undop->pos = jp; 815 undop->val = x; 816 undop++; 817 *jp = 0; 818 x->ccount--; 819 score_adjust(score0, x); 820 if (score0 < 0 && flag) 821 goto undo; 822 jp += x->length; 823 } 824 undop->pos = ip; 825 undop->val = 0; 826 undop++; 827 *ip = p; 828 ccount++; 829 score += score0; 830 continue; 831 undo: 832 while (--undop >= undop1) 833 if ((*undop->pos = x = undop->val)) 834 x->ccount++; 835 undop++; 836 } 837 if (score > 0) { 838 #ifdef STATS 839 ccount_stat += ccount - p->ccount; 840 ntoken_stat++; 841 ncover_stat += ncover; 842 #endif 843 p->ccount = ccount; 844 } else { 845 struct cc_undo *u = cc_undo; 846 while (--undop >= u) { 847 struct cc *x; 848 if ((*undop->pos = x = undop->val)) 849 x->ccount++; 850 } 851 } 852 } while ((p = *pp++) != 0); 853 return pp - tokens; 854 } 855 856 void 857 cc_output_phase(char *buffer, struct cc **output, int bufsize) 858 { 859 int i; 860 struct cc *p, *p1; 861 862 for (i = 0; i < bufsize;) { 863 if ((p = output[i]) == 0) { 864 ttputc(buffer[i]); 865 i++; 866 } else if (p->code >= 0) { 867 if (--p->ccount == 0) 868 qinsert(p, &cc_q0a); 869 (*tt.tt_put_token)(p->code, p->string, p->length); 870 wwntokuse++; 871 wwntoksave += put_token_score(p->length); 872 i += p->length; 873 } else if ((p1 = cc_q0a.qback) != &cc_q0a) { 874 p->code = p1->code; 875 p1->code = -1; 876 qinsert(p1, &cc_q1a); 877 if (--p->ccount == 0) 878 qinsert(p, &cc_q0a); 879 else 880 qinsert(p, &cc_q0b); 881 (*tt.tt_set_token)(p->code, p->string, p->length); 882 wwntokdef++; 883 wwntoksave -= tt.tt_set_token_cost; 884 i += p->length; 885 } else { 886 p->ccount--; 887 ttwrite(p->string, p->length); 888 wwntokbad++; 889 i += p->length; 890 } 891 } 892 wwntokc += bufsize; 893 } 894 895 int 896 cc_token_compare(const void *p1, const void *p2) 897 { 898 const struct cc * const * vp1 = p1; 899 const struct cc * const * vp2 = p2; 900 return (*vp2)->bcount - (*vp1)->bcount; 901 } 902