xref: /dragonfly/usr.bin/window/compress.c (revision 1de703da)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)compress.c	8.1 (Berkeley) 6/6/93
37  * $FreeBSD: src/usr.bin/window/compress.c,v 1.2.12.1 2001/05/17 09:45:00 obrien Exp $
38  * $DragonFly: src/usr.bin/window/compress.c,v 1.2 2003/06/17 04:29:33 dillon Exp $
39  */
40 
41 #include "ww.h"
42 #include "tt.h"
43 
44 	/* special */
45 #include <stdio.h>
46 #include <fcntl.h>
47 #include <stdlib.h>
48 int cc_trace = 0;
49 FILE *cc_trace_fp;
50 
51 	/* tunable parameters */
52 
53 int cc_reverse = 1;
54 int cc_sort = 0;
55 int cc_chop = 0;
56 
57 int cc_token_max = 8;		/* <= TOKEN_MAX */
58 int cc_token_min = 2;		/* > tt.tt_put_token_cost */
59 int cc_npass0 = 1;
60 int cc_npass1 = 1;
61 
62 int cc_bufsize = 1024 * 3;	/* XXX, or 80 * 24 * 2 */
63 
64 int cc_ntoken = 8192;
65 
66 #define cc_weight XXX
67 #ifndef cc_weight
68 int cc_weight = 0;
69 #endif
70 
71 #define TOKEN_MAX 16
72 
73 struct cc {
74 	char string[TOKEN_MAX];
75 	char length;
76 	char flag;
77 #ifndef cc_weight
78 	short weight;
79 #endif
80 	long time;		/* time last seen */
81 	short bcount;		/* count in this buffer */
82 	short ccount;		/* count in compression */
83 	short places;		/* places in the buffer */
84 	short code;		/* token code */
85 	struct cc *qforw, *qback;
86 	struct cc *hforw, **hback;
87 };
88 
89 short cc_thresholds[TOKEN_MAX + 1];
90 #define thresh(length) (cc_thresholds[length])
91 #define threshp(code, count, length) \
92 	((code) >= 0 || (short) (count) >= cc_thresholds[length])
93 
94 #ifndef cc_weight
95 short cc_wthresholds[TOKEN_MAX + 1];
96 #define wthresh(length) (cc_wthresholds[length])
97 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
98 #else
99 #define wthreshp(weight, length) (0)
100 #endif
101 
102 #ifndef cc_weight
103 short cc_wlimits[TOKEN_MAX + 1];
104 #define wlimit(length) (cc_wlimits[length])
105 #endif
106 
107 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
108 
109 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
110 #define score_adjust(score, p) \
111 	do { \
112 		int length = (p)->length; \
113 		int ccount = (p)->ccount; \
114 		if (threshp((p)->code, ccount, length) || \
115 		    wthreshp((p)->weight, length)) /* XXX */ \
116 			(score) -= length - tt.tt_put_token_cost; \
117 		else \
118 			(score) += cc_score_adjustments[length][ccount]; \
119 	} while (0)
120 
121 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
122 
123 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
124 
125 #define qinsert(p1, p2) \
126 	do { \
127 		register struct cc *forw = (p1)->qforw; \
128 		register struct cc *back = (p1)->qback; \
129 		back->qforw = forw; \
130 		forw->qback = back; \
131 		forw = (p2)->qforw; \
132 		(p1)->qforw = forw; \
133 		forw->qback = (p1); \
134 		(p2)->qforw = (p1); \
135 		(p1)->qback = (p2); \
136 	} while (0)
137 
138 #define qinsertq(q, p) \
139 	((q)->qforw == (q) ? 0 : \
140 	 ((q)->qback->qforw = (p)->qforw, \
141 	  (p)->qforw->qback = (q)->qback, \
142 	  (q)->qforw->qback = (p), \
143 	  (p)->qforw = (q)->qforw, \
144 	  (q)->qforw = (q), \
145 	  (q)->qback = (q)))
146 
147 #define H		(14)
148 #define HSIZE		(1 << H)
149 #define hash(h, c)	((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
150 
151 char *cc_buffer;
152 struct cc **cc_output;			/* the output array */
153 short *cc_places[TOKEN_MAX + 1];
154 short *cc_hashcodes;			/* for computing hashcodes */
155 struct cc **cc_htab;			/* the hash table */
156 struct cc **cc_tokens;			/* holds all the active tokens */
157 struct cc_undo {
158 	struct cc **pos;
159 	struct cc *val;
160 } *cc_undo;
161 
162 long cc_time, cc_time0;
163 
164 char *cc_tt_ob, *cc_tt_obe;
165 
166 ccinit()
167 {
168 	register i, j;
169 	register struct cc *p;
170 
171 	if (tt.tt_token_max > cc_token_max)
172 		tt.tt_token_max = cc_token_max;
173 	if (tt.tt_token_min < cc_token_min)
174 		tt.tt_token_min = cc_token_min;
175 	if (tt.tt_token_min > tt.tt_token_max) {
176 		tt.tt_ntoken = 0;
177 		return 0;
178 	}
179 	if (tt.tt_ntoken > cc_ntoken / 2)	/* not likely */
180 		tt.tt_ntoken = cc_ntoken / 2;
181 #define C(x) (sizeof (x) / sizeof *(x))
182 	for (i = 0; i < C(cc_thresholds); i++) {
183 		int h = i - tt.tt_put_token_cost;
184 		if (h > 0)
185 			cc_thresholds[i] =
186 				(tt.tt_set_token_cost + 1 + h - 1) / h + 1;
187 		else
188 			cc_thresholds[i] = 0;
189 	}
190 	for (i = 0; i < C(cc_score_adjustments); i++) {
191 		int t = cc_thresholds[i];
192 		for (j = 0; j < C(*cc_score_adjustments); j++) {
193 			if (j >= t)
194 				cc_score_adjustments[i][j] =
195 					- (i - tt.tt_put_token_cost);
196 			else if (j < t - 1)
197 				cc_score_adjustments[i][j] = 0;
198 			else
199 				/*
200 				 * cost now is
201 				 *	length * (ccount + 1)		a
202 				 * cost before was
203 				 *	set-token-cost + length +
204 				 *		ccount * put-token-cost	b
205 				 * the score adjustment is (b - a)
206 				 */
207 				cc_score_adjustments[i][j] =
208 					tt.tt_set_token_cost + i +
209 						j * tt.tt_put_token_cost -
210 							i * (j + 1);
211 			if (j >= t)
212 				cc_initial_scores[i][j] = 0;
213 			else
214 				/*
215 				 * - (set-token-cost +
216 				 *	(length - put-token-cost) -
217 				 *	(length - put-token-cost) * ccount)
218 				 */
219 				cc_initial_scores[i][j] =
220 					- (tt.tt_set_token_cost +
221 					   (i - tt.tt_put_token_cost) -
222 					   (i - tt.tt_put_token_cost) * j);
223 		}
224 	}
225 #ifndef cc_weight
226 	for (i = 1; i < C(cc_wthresholds); i++) {
227 		cc_wthresholds[i] =
228 			((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
229 				i / 5 + 1) *
230 				cc_weight + 1;
231 		cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
232 	}
233 #endif
234 #undef C
235 	if ((cc_output = (struct cc **)
236 	     malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
237 		goto nomem;
238 	if ((cc_hashcodes = (short *)
239 	     malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
240 		goto nomem;
241 	if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
242 		goto nomem;
243 	if ((cc_tokens = (struct cc **)
244 	     malloc((unsigned)
245 	            (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
246 		    sizeof *cc_tokens)) == 0)
247 		goto nomem;
248 	if ((cc_undo = (struct cc_undo *)
249 	     malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
250 		goto nomem;
251 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
252 		if ((cc_places[i] = (short *)
253 		     malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
254 			goto nomem;
255 	cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
256 	cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
257 	cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
258 	cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
259 	if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
260 		goto nomem;
261 	for (i = 0; i < tt.tt_ntoken; i++) {
262 		p->code = i;
263 		p->time = -1;
264 		p->qback = cc_q0a.qback;
265 		p->qforw = &cc_q0a;
266 		p->qback->qforw = p;
267 		cc_q0a.qback = p;
268 		p++;
269 	}
270 	for (; i < cc_ntoken; i++) {
271 		p->code = -1;
272 		p->time = -1;
273 		p->qback = cc_q1a.qback;
274 		p->qforw = &cc_q1a;
275 		p->qback->qforw = p;
276 		cc_q1a.qback = p;
277 		p++;
278 	}
279 	cc_tt_ob = tt_ob;
280 	cc_tt_obe = tt_obe;
281 	if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
282 		goto nomem;
283 	return 0;
284 nomem:
285 	wwerrno = WWE_NOMEM;
286 	return -1;
287 }
288 
289 ccstart()
290 {
291 	int ccflush();
292 
293 	ttflush();
294 	tt_obp = tt_ob = cc_buffer;
295 	tt_obe = tt_ob + cc_bufsize;
296 	tt.tt_flush = ccflush;
297 	if (cc_trace) {
298 		cc_trace_fp = fopen("window-trace", "a");
299 		(void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
300 	}
301 	ccreset();
302 }
303 
304 ccreset()
305 {
306 	register struct cc *p;
307 
308 	bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
309 	for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
310 		p->hback = 0;
311 	for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
312 		p->hback = 0;
313 }
314 
315 ccend()
316 {
317 
318 	ttflush();
319 	tt_obp = tt_ob = cc_tt_ob;
320 	tt_obe = cc_tt_obe;
321 	tt.tt_flush = 0;
322 	if (cc_trace_fp != NULL) {
323 		(void) fclose(cc_trace_fp);
324 		cc_trace_fp = NULL;
325 	}
326 }
327 
328 ccflush()
329 {
330 	int bufsize = tt_obp - tt_ob;
331 	int n;
332 
333 	if (tt_ob != cc_buffer)
334 		abort();
335 	if (cc_trace_fp != NULL) {
336 		(void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
337 		(void) putc(-1, cc_trace_fp);
338 	}
339 	tt.tt_flush = 0;
340 	(*tt.tt_compress)(1);
341 	if (bufsize < tt.tt_token_min) {
342 		ttflush();
343 		goto out;
344 	}
345 	tt_obp = tt_ob = cc_tt_ob;
346 	tt_obe = cc_tt_obe;
347 	cc_time0 = cc_time;
348 	cc_time += bufsize;
349 	n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
350 	cc_compress_phase(cc_output, bufsize, cc_tokens, n);
351 	cc_output_phase(cc_buffer, cc_output, bufsize);
352 	ttflush();
353 	tt_obp = tt_ob = cc_buffer;
354 	tt_obe = cc_buffer + cc_bufsize;
355 out:
356 	(*tt.tt_compress)(0);
357 	tt.tt_flush = ccflush;
358 }
359 
360 cc_sweep_phase(buffer, bufsize, tokens)
361 	char *buffer;
362 	struct cc **tokens;
363 {
364 	register struct cc **pp = tokens;
365 	register i, n;
366 #ifdef STATS
367 	int nn, ii;
368 #endif
369 
370 #ifdef STATS
371 	if (verbose >= 0)
372 		time_begin();
373 	if (verbose > 0)
374 		printf("Sweep:");
375 #endif
376 	cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
377 #ifdef STATS
378 	ntoken_stat = 0;
379 	nn = 0;
380 	ii = 0;
381 #endif
382 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
383 #ifdef STATS
384 		if (verbose > 0) {
385 			if (ii > 7) {
386 				printf("\n      ");
387 				ii = 0;
388 			}
389 			ii++;
390 			printf(" (%d", i);
391 			(void) fflush(stdout);
392 		}
393 #endif
394 		n = cc_sweep(buffer, bufsize, pp, i);
395 		pp += n;
396 #ifdef STATS
397 		if (verbose > 0) {
398 			if (--n > 0) {
399 				printf(" %d", n);
400 				nn += n;
401 			}
402 			putchar(')');
403 		}
404 #endif
405 	}
406 	qinsertq(&cc_q1b, &cc_q1a);
407 #ifdef STATS
408 	if (verbose > 0)
409 		printf("\n       %d tokens, %d candidates\n",
410 			ntoken_stat, nn);
411 	if (verbose >= 0)
412 		time_end();
413 #endif
414 	return pp - tokens;
415 }
416 
417 cc_sweep0(buffer, n, length)
418 	char *buffer;
419 {
420 	register char *p;
421 	register short *hc;
422 	register i;
423 	register short c;
424 	register short pc = tt.tt_padc;
425 
426 	/* n and length are at least 1 */
427 	p = buffer++;
428 	hc = cc_hashcodes;
429 	i = n;
430 	do {
431 		if ((*hc++ = *p++) == pc)
432 			hc[-1] = -1;
433 	} while (--i);
434 	while (--length) {
435 		p = buffer++;
436 		hc = cc_hashcodes;
437 		for (i = n--; --i;) {
438 			if ((c = *p++) == pc || *hc < 0)
439 				c = -1;
440 			else
441 				c = hash(*hc, c);
442 			*hc++ = c;
443 		}
444 	}
445 }
446 
447 cc_sweep(buffer, bufsize, tokens, length)
448 	char *buffer;
449 	struct cc **tokens;
450 	register length;
451 {
452 	register struct cc *p;
453 	register char *cp;
454 	register i;
455 	short *hc;
456 	short *places = cc_places[length];
457 	struct cc **pp = tokens;
458 	short threshold = thresh(length);
459 #ifndef cc_weight
460 	short wthreshold = wthresh(length);
461 	short limit = wlimit(length);
462 #endif
463 	int time;
464 	short pc = tt.tt_padc;
465 
466 	i = length - 1;
467 	bufsize -= i;
468 	cp = buffer + i;
469 	hc = cc_hashcodes;
470 	time = cc_time0;
471 	for (i = 0; i < bufsize; i++, time++) {
472 		struct cc **h;
473 
474 		{
475 			register short *hc1 = hc;
476 			register short c = *cp++;
477 			register short hh;
478 			if ((hh = *hc1) < 0 || c == pc) {
479 				*hc1++ = -1;
480 				hc = hc1;
481 				continue;
482 			}
483 			h = cc_htab + (*hc1++ = hash(hh, c));
484 			hc = hc1;
485 		}
486 		for (p = *h; p != 0; p = p->hforw)
487 			if (p->length == (char) length) {
488 				register char *p1 = p->string;
489 				register char *p2 = cp - length;
490 				register n = length;
491 				do
492 					if (*p1++ != *p2++)
493 						goto fail;
494 				while (--n);
495 				break;
496 			fail:;
497 			}
498 		if (p == 0) {
499 			p = cc_q1a.qback;
500 			if (p == &cc_q1a ||
501 			    p->time >= cc_time0 && p->length == (char) length)
502 				continue;
503 			if (p->hback != 0)
504 				if ((*p->hback = p->hforw) != 0)
505 					p->hforw->hback = p->hback;
506 			{
507 				register char *p1 = p->string;
508 				register char *p2 = cp - length;
509 				register n = length;
510 				do
511 					*p1++ = *p2++;
512 				while (--n);
513 			}
514 			p->length = length;
515 #ifndef cc_weight
516 			p->weight = cc_weight;
517 #endif
518 			p->time = time;
519 			p->bcount = 1;
520 			p->ccount = 0;
521 			p->flag = 0;
522 			if ((p->hforw = *h) != 0)
523 				p->hforw->hback = &p->hforw;
524 			*h = p;
525 			p->hback = h;
526 			qinsert(p, &cc_q1a);
527 			places[i] = -1;
528 			p->places = i;
529 #ifdef STATS
530 			ntoken_stat++;
531 #endif
532 		} else if (p->time < cc_time0) {
533 #ifndef cc_weight
534 			if ((p->weight += p->time - time) < 0)
535 				p->weight = cc_weight;
536 			else if ((p->weight += cc_weight) > limit)
537 				p->weight = limit;
538 #endif
539 			p->time = time;
540 			p->bcount = 1;
541 			p->ccount = 0;
542 			if (p->code >= 0) {
543 				p->flag = 1;
544 				*pp++ = p;
545 			} else
546 #ifndef cc_weight
547 			if (p->weight >= wthreshold) {
548 				p->flag = 1;
549 				*pp++ = p;
550 				qinsert(p, &cc_q1b);
551 			} else
552 #endif
553 			{
554 				p->flag = 0;
555 				qinsert(p, &cc_q1a);
556 			}
557 			places[i] = -1;
558 			p->places = i;
559 #ifdef STATS
560 			ntoken_stat++;
561 #endif
562 		} else if (p->time + length > time) {
563 			/*
564 			 * overlapping token, don't count as two and
565 			 * don't update time, but do adjust weight to offset
566 			 * the difference
567 			 */
568 #ifndef cc_weight
569 			if (cc_weight != 0) {	/* XXX */
570 				p->weight += time - p->time;
571 				if (!p->flag && p->weight >= wthreshold) {
572 					p->flag = 1;
573 					*pp++ = p;
574 					qinsert(p, &cc_q1b);
575 				}
576 			}
577 #endif
578 			places[i] = p->places;
579 			p->places = i;
580 		} else {
581 #ifndef cc_weight
582 			if ((p->weight += p->time - time) < 0)
583 				p->weight = cc_weight;
584 			else if ((p->weight += cc_weight) > limit)
585 				p->weight = limit;
586 #endif
587 			p->time = time;
588 			p->bcount++;
589 			if (!p->flag &&
590 			    /* code must be < 0 if flag false here */
591 			    (p->bcount >= threshold
592 #ifndef cc_weight
593 			     || p->weight >= wthreshold
594 #endif
595 			     )) {
596 				p->flag = 1;
597 				*pp++ = p;
598 				qinsert(p, &cc_q1b);
599 			}
600 			places[i] = p->places;
601 			p->places = i;
602 		}
603 	}
604 	if ((i = pp - tokens) > 0) {
605 		*pp = 0;
606 		if (cc_reverse)
607 			cc_sweep_reverse(tokens, places);
608 		if (cc_sort && i > 1) {
609 			int cc_token_compare();
610 			qsort((char *) tokens, i, sizeof *tokens,
611 			      cc_token_compare);
612 		}
613 		if (cc_chop) {
614 			if ((i = i * cc_chop / 100) == 0)
615 				i = 1;
616 			tokens[i] = 0;
617 		}
618 		i++;
619 	}
620 	return i;
621 }
622 
623 cc_sweep_reverse(pp, places)
624 	register struct cc **pp;
625 	register short *places;
626 {
627 	register struct cc *p;
628 	register short front, back, t;
629 
630 	while ((p = *pp++) != 0) {
631 		back = -1;
632 		t = p->places;
633 		/* the list is never empty */
634 		do {
635 			front = places[t];
636 			places[t] = back;
637 			back = t;
638 		} while ((t = front) >= 0);
639 		p->places = back;
640 	}
641 }
642 
643 cc_compress_phase(output, bufsize, tokens, ntoken)
644 	struct cc **output;
645 	struct cc **tokens;
646 {
647 	register i;
648 
649 	bzero((char *) output, bufsize * sizeof *output);
650 	for (i = 0; i < cc_npass0; i++)
651 		cc_compress_phase1(output, tokens, ntoken, 0);
652 	for (i = 0; i < cc_npass1; i++)
653 		cc_compress_phase1(output, tokens, ntoken, 1);
654 	cc_compress_cleanup(output, bufsize);
655 }
656 
657 cc_compress_phase1(output, tokens, ntoken, flag)
658 	register struct cc **output;
659 	struct cc **tokens;
660 {
661 	register struct cc **pp;
662 #ifdef STATS
663 	register int i = 0;
664 	int nt = 0, cc = 0, nc = 0;
665 #endif
666 
667 #ifdef STATS
668 	if (verbose >= 0)
669 		time_begin();
670 	if (verbose > 0)
671 		printf("Compress:");
672 #endif
673 	pp = tokens;
674 	while (pp < tokens + ntoken) {
675 #ifdef STATS
676 		if (verbose > 0) {
677 			ntoken_stat = 0;
678 			ccount_stat = 0;
679 			ncover_stat = 0;
680 			if (i > 2) {
681 				printf("\n         ");
682 				i = 0;
683 			}
684 			i++;
685 			printf(" (%d", (*pp)->length);
686 			(void) fflush(stdout);
687 		}
688 #endif
689 		pp += cc_compress(output, pp, flag);
690 #ifdef STATS
691 		if (verbose > 0) {
692 			printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
693 			       ncover_stat);
694 			nt += ntoken_stat;
695 			cc += ccount_stat;
696 			nc += ncover_stat;
697 		}
698 #endif
699 	}
700 #ifdef STATS
701 	if (verbose > 0)
702 		printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
703 	if (verbose >= 0)
704 		time_end();
705 #endif
706 }
707 
708 cc_compress_cleanup(output, bufsize)
709 	register struct cc **output;
710 {
711 	register struct cc **end;
712 
713 	/* the previous output phase may have been interrupted */
714 	qinsertq(&cc_q0b, &cc_q0a);
715 	for (end = output + bufsize; output < end;) {
716 		register struct cc *p;
717 		register length;
718 		if ((p = *output) == 0) {
719 			output++;
720 			continue;
721 		}
722 		length = p->length;
723 		if (!p->flag) {
724 		} else if (p->code >= 0) {
725 			qinsert(p, &cc_q0b);
726 			p->flag = 0;
727 		} else if (p->ccount == 0) {
728 			*output = 0;
729 		} else if (p->ccount >= thresh(length)
730 #ifndef cc_weight
731 			   || wthreshp(p->weight, length)
732 #endif
733 			   ) {
734 			p->flag = 0;
735 		} else {
736 			p->ccount = 0;
737 			*output = 0;
738 		}
739 		output += length;
740 	}
741 }
742 
743 cc_compress(output, tokens, flag)
744 	struct cc **output;
745 	struct cc **tokens;
746 	char flag;
747 {
748 	struct cc **pp = tokens;
749 	register struct cc *p = *pp++;
750 	int length = p->length;
751 	int threshold = thresh(length);
752 #ifndef cc_weight
753 	short wthreshold = wthresh(length);
754 #endif
755 	short *places = cc_places[length];
756 	int *initial_scores = cc_initial_scores[length];
757 	int initial_score0 = put_token_score(length);
758 
759 	do {
760 		int score;
761 		register struct cc_undo *undop;
762 		int ccount;
763 #ifdef STATS
764 		int ncover;
765 #endif
766 		int i;
767 
768 		ccount = p->ccount;
769 		if ((short) ccount >= p->bcount)
770 			continue;
771 		if (p->code >= 0 || ccount >= threshold)
772 			score = 0;
773 #ifndef cc_weight
774 		else if (p->weight >= wthreshold)
775 			/* allow one fewer match than normal */
776 			/* XXX, should adjust for ccount */
777 			score = - tt.tt_set_token_cost;
778 #endif
779 		else
780 			score = initial_scores[ccount];
781 		undop = cc_undo;
782 #ifdef STATS
783 		ncover = 0;
784 #endif
785 		for (i = p->places; i >= 0; i = places[i]) {
786 			register struct cc **jp;
787 			register struct cc *x;
788 			register struct cc **ip = output + i;
789 			register score0 = initial_score0;
790 			struct cc **iip = ip + length;
791 			struct cc_undo *undop1 = undop;
792 
793 			if ((x = *(jp = ip)) != 0)
794 				goto z;
795 			while (--jp >= output)
796 				if ((x = *jp) != 0) {
797 					if (jp + x->length > ip)
798 						goto z;
799 					break;
800 				}
801 			jp = ip + 1;
802 			while (jp < iip) {
803 				if ((x = *jp) == 0) {
804 					jp++;
805 					continue;
806 				}
807 			z:
808 				if (x == p)
809 					goto undo;
810 #ifdef STATS
811 				ncover++;
812 #endif
813 				undop->pos = jp;
814 				undop->val = x;
815 				undop++;
816 				*jp = 0;
817 				x->ccount--;
818 				score_adjust(score0, x);
819 				if (score0 < 0 && flag)
820 					goto undo;
821 				jp += x->length;
822 			}
823 			undop->pos = ip;
824 			undop->val = 0;
825 			undop++;
826 			*ip = p;
827 			ccount++;
828 			score += score0;
829 			continue;
830 		undo:
831 			while (--undop >= undop1)
832 				if (*undop->pos = x = undop->val)
833 					x->ccount++;
834 			undop++;
835 		}
836 		if (score > 0) {
837 #ifdef STATS
838 			ccount_stat += ccount - p->ccount;
839 			ntoken_stat++;
840 			ncover_stat += ncover;
841 #endif
842 			p->ccount = ccount;
843 		} else {
844 			register struct cc_undo *u = cc_undo;
845 			while (--undop >= u) {
846 				register struct cc *x;
847 				if (*undop->pos = x = undop->val)
848 					x->ccount++;
849 			}
850 		}
851 	} while ((p = *pp++) != 0);
852 	return pp - tokens;
853 }
854 
855 cc_output_phase(buffer, output, bufsize)
856 	register char *buffer;
857 	register struct cc **output;
858 	register bufsize;
859 {
860 	register i;
861 	register struct cc *p, *p1;
862 
863 	for (i = 0; i < bufsize;) {
864 		if ((p = output[i]) == 0) {
865 			ttputc(buffer[i]);
866 			i++;
867 		} else if (p->code >= 0) {
868 			if (--p->ccount == 0)
869 				qinsert(p, &cc_q0a);
870 			(*tt.tt_put_token)(p->code, p->string, p->length);
871 			wwntokuse++;
872 			wwntoksave += put_token_score(p->length);
873 			i += p->length;
874 		} else if ((p1 = cc_q0a.qback) != &cc_q0a) {
875 			p->code = p1->code;
876 			p1->code = -1;
877 			qinsert(p1, &cc_q1a);
878 			if (--p->ccount == 0)
879 				qinsert(p, &cc_q0a);
880 			else
881 				qinsert(p, &cc_q0b);
882 			(*tt.tt_set_token)(p->code, p->string, p->length);
883 			wwntokdef++;
884 			wwntoksave -= tt.tt_set_token_cost;
885 			i += p->length;
886 		} else {
887 			p->ccount--;
888 			ttwrite(p->string, p->length);
889 			wwntokbad++;
890 			i += p->length;
891 		}
892 	}
893 	wwntokc += bufsize;
894 }
895 
896 cc_token_compare(p1, p2)
897 	struct cc **p1, **p2;
898 {
899 	return (*p2)->bcount - (*p1)->bcount;
900 }
901