xref: /dragonfly/usr.bin/window/compress.c (revision 19fe1c42)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Edward Wang at The University of California, Berkeley.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * @(#)compress.c	8.1 (Berkeley) 6/6/93
37  * $FreeBSD: src/usr.bin/window/compress.c,v 1.2.12.1 2001/05/17 09:45:00 obrien Exp $
38  * $DragonFly: src/usr.bin/window/compress.c,v 1.2 2003/06/17 04:29:33 dillon Exp $
39  */
40 
41 	/* special */
42 #include <stdio.h>
43 #include <fcntl.h>
44 #include <stdlib.h>
45 #include <string.h>
46 
47 #include "ww.h"
48 #include "tt.h"
49 
50 int cc_trace = 0;
51 FILE *cc_trace_fp;
52 
53 	/* tunable parameters */
54 
55 int cc_reverse = 1;
56 int cc_sort = 0;
57 int cc_chop = 0;
58 
59 int cc_token_max = 8;		/* <= TOKEN_MAX */
60 int cc_token_min = 2;		/* > tt.tt_put_token_cost */
61 int cc_npass0 = 1;
62 int cc_npass1 = 1;
63 
64 int cc_bufsize = 1024 * 3;	/* XXX, or 80 * 24 * 2 */
65 
66 int cc_ntoken = 8192;
67 
68 #define cc_weight XXX
69 #ifndef cc_weight
70 int cc_weight = 0;
71 #endif
72 
73 #define TOKEN_MAX 16
74 
75 struct cc {
76 	char string[TOKEN_MAX];
77 	char length;
78 	char flag;
79 #ifndef cc_weight
80 	short weight;
81 #endif
82 	long time;		/* time last seen */
83 	short bcount;		/* count in this buffer */
84 	short ccount;		/* count in compression */
85 	short places;		/* places in the buffer */
86 	short code;		/* token code */
87 	struct cc *qforw, *qback;
88 	struct cc *hforw, **hback;
89 };
90 
91 short cc_thresholds[TOKEN_MAX + 1];
92 #define thresh(length) (cc_thresholds[length])
93 #define threshp(code, count, length) \
94 	((code) >= 0 || (short) (count) >= cc_thresholds[length])
95 
96 #ifndef cc_weight
97 short cc_wthresholds[TOKEN_MAX + 1];
98 #define wthresh(length) (cc_wthresholds[length])
99 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
100 #else
101 #define wthreshp(weight, length) (0)
102 #endif
103 
104 #ifndef cc_weight
105 short cc_wlimits[TOKEN_MAX + 1];
106 #define wlimit(length) (cc_wlimits[length])
107 #endif
108 
109 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
110 
111 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
112 #define score_adjust(score, p) \
113 	do { \
114 		int length = (p)->length; \
115 		int ccount = (p)->ccount; \
116 		if (threshp((p)->code, ccount, length) || \
117 		    wthreshp((p)->weight, length)) /* XXX */ \
118 			(score) -= length - tt.tt_put_token_cost; \
119 		else \
120 			(score) += cc_score_adjustments[length][ccount]; \
121 	} while (0)
122 
123 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
124 
125 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
126 
127 #define qinsert(p1, p2) \
128 	do { \
129 		register struct cc *forw = (p1)->qforw; \
130 		register struct cc *back = (p1)->qback; \
131 		back->qforw = forw; \
132 		forw->qback = back; \
133 		forw = (p2)->qforw; \
134 		(p1)->qforw = forw; \
135 		forw->qback = (p1); \
136 		(p2)->qforw = (p1); \
137 		(p1)->qback = (p2); \
138 	} while (0)
139 
140 #define qinsertq(q, p) \
141 	((q)->qforw == (q) ? 0 : \
142 	 ((q)->qback->qforw = (p)->qforw, \
143 	  (p)->qforw->qback = (q)->qback, \
144 	  (q)->qforw->qback = (p), \
145 	  (p)->qforw = (q)->qforw, \
146 	  (q)->qforw = (q), \
147 	  (q)->qback = (q)))
148 
149 #define H		(14)
150 #define HSIZE		(1 << H)
151 #define hash(h, c)	((((h) >> H - 8 | (h) << 8) ^ (c)) & HSIZE - 1)
152 
153 char *cc_buffer;
154 struct cc **cc_output;			/* the output array */
155 short *cc_places[TOKEN_MAX + 1];
156 short *cc_hashcodes;			/* for computing hashcodes */
157 struct cc **cc_htab;			/* the hash table */
158 struct cc **cc_tokens;			/* holds all the active tokens */
159 struct cc_undo {
160 	struct cc **pos;
161 	struct cc *val;
162 } *cc_undo;
163 
164 long cc_time, cc_time0;
165 
166 char *cc_tt_ob, *cc_tt_obe;
167 
168 ccinit()
169 {
170 	register i, j;
171 	register struct cc *p;
172 
173 	if (tt.tt_token_max > cc_token_max)
174 		tt.tt_token_max = cc_token_max;
175 	if (tt.tt_token_min < cc_token_min)
176 		tt.tt_token_min = cc_token_min;
177 	if (tt.tt_token_min > tt.tt_token_max) {
178 		tt.tt_ntoken = 0;
179 		return 0;
180 	}
181 	if (tt.tt_ntoken > cc_ntoken / 2)	/* not likely */
182 		tt.tt_ntoken = cc_ntoken / 2;
183 #define C(x) (sizeof (x) / sizeof *(x))
184 	for (i = 0; i < C(cc_thresholds); i++) {
185 		int h = i - tt.tt_put_token_cost;
186 		if (h > 0)
187 			cc_thresholds[i] =
188 				(tt.tt_set_token_cost + 1 + h - 1) / h + 1;
189 		else
190 			cc_thresholds[i] = 0;
191 	}
192 	for (i = 0; i < C(cc_score_adjustments); i++) {
193 		int t = cc_thresholds[i];
194 		for (j = 0; j < C(*cc_score_adjustments); j++) {
195 			if (j >= t)
196 				cc_score_adjustments[i][j] =
197 					- (i - tt.tt_put_token_cost);
198 			else if (j < t - 1)
199 				cc_score_adjustments[i][j] = 0;
200 			else
201 				/*
202 				 * cost now is
203 				 *	length * (ccount + 1)		a
204 				 * cost before was
205 				 *	set-token-cost + length +
206 				 *		ccount * put-token-cost	b
207 				 * the score adjustment is (b - a)
208 				 */
209 				cc_score_adjustments[i][j] =
210 					tt.tt_set_token_cost + i +
211 						j * tt.tt_put_token_cost -
212 							i * (j + 1);
213 			if (j >= t)
214 				cc_initial_scores[i][j] = 0;
215 			else
216 				/*
217 				 * - (set-token-cost +
218 				 *	(length - put-token-cost) -
219 				 *	(length - put-token-cost) * ccount)
220 				 */
221 				cc_initial_scores[i][j] =
222 					- (tt.tt_set_token_cost +
223 					   (i - tt.tt_put_token_cost) -
224 					   (i - tt.tt_put_token_cost) * j);
225 		}
226 	}
227 #ifndef cc_weight
228 	for (i = 1; i < C(cc_wthresholds); i++) {
229 		cc_wthresholds[i] =
230 			((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
231 				i / 5 + 1) *
232 				cc_weight + 1;
233 		cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
234 	}
235 #endif
236 #undef C
237 	if ((cc_output = (struct cc **)
238 	     malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
239 		goto nomem;
240 	if ((cc_hashcodes = (short *)
241 	     malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
242 		goto nomem;
243 	if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
244 		goto nomem;
245 	if ((cc_tokens = (struct cc **)
246 	     malloc((unsigned)
247 	            (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
248 		    sizeof *cc_tokens)) == 0)
249 		goto nomem;
250 	if ((cc_undo = (struct cc_undo *)
251 	     malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
252 		goto nomem;
253 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
254 		if ((cc_places[i] = (short *)
255 		     malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
256 			goto nomem;
257 	cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
258 	cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
259 	cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
260 	cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
261 	if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
262 		goto nomem;
263 	for (i = 0; i < tt.tt_ntoken; i++) {
264 		p->code = i;
265 		p->time = -1;
266 		p->qback = cc_q0a.qback;
267 		p->qforw = &cc_q0a;
268 		p->qback->qforw = p;
269 		cc_q0a.qback = p;
270 		p++;
271 	}
272 	for (; i < cc_ntoken; i++) {
273 		p->code = -1;
274 		p->time = -1;
275 		p->qback = cc_q1a.qback;
276 		p->qforw = &cc_q1a;
277 		p->qback->qforw = p;
278 		cc_q1a.qback = p;
279 		p++;
280 	}
281 	cc_tt_ob = tt_ob;
282 	cc_tt_obe = tt_obe;
283 	if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
284 		goto nomem;
285 	return 0;
286 nomem:
287 	wwerrno = WWE_NOMEM;
288 	return -1;
289 }
290 
291 ccstart()
292 {
293 	int ccflush();
294 
295 	ttflush();
296 	tt_obp = tt_ob = cc_buffer;
297 	tt_obe = tt_ob + cc_bufsize;
298 	tt.tt_flush = ccflush;
299 	if (cc_trace) {
300 		cc_trace_fp = fopen("window-trace", "a");
301 		(void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
302 	}
303 	ccreset();
304 }
305 
306 ccreset()
307 {
308 	register struct cc *p;
309 
310 	bzero((char *) cc_htab, HSIZE * sizeof *cc_htab);
311 	for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
312 		p->hback = 0;
313 	for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
314 		p->hback = 0;
315 }
316 
317 ccend()
318 {
319 
320 	ttflush();
321 	tt_obp = tt_ob = cc_tt_ob;
322 	tt_obe = cc_tt_obe;
323 	tt.tt_flush = 0;
324 	if (cc_trace_fp != NULL) {
325 		(void) fclose(cc_trace_fp);
326 		cc_trace_fp = NULL;
327 	}
328 }
329 
330 ccflush()
331 {
332 	int bufsize = tt_obp - tt_ob;
333 	int n;
334 
335 	if (tt_ob != cc_buffer)
336 		abort();
337 	if (cc_trace_fp != NULL) {
338 		(void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
339 		(void) putc(-1, cc_trace_fp);
340 	}
341 	tt.tt_flush = 0;
342 	(*tt.tt_compress)(1);
343 	if (bufsize < tt.tt_token_min) {
344 		ttflush();
345 		goto out;
346 	}
347 	tt_obp = tt_ob = cc_tt_ob;
348 	tt_obe = cc_tt_obe;
349 	cc_time0 = cc_time;
350 	cc_time += bufsize;
351 	n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
352 	cc_compress_phase(cc_output, bufsize, cc_tokens, n);
353 	cc_output_phase(cc_buffer, cc_output, bufsize);
354 	ttflush();
355 	tt_obp = tt_ob = cc_buffer;
356 	tt_obe = cc_buffer + cc_bufsize;
357 out:
358 	(*tt.tt_compress)(0);
359 	tt.tt_flush = ccflush;
360 }
361 
362 cc_sweep_phase(buffer, bufsize, tokens)
363 	char *buffer;
364 	struct cc **tokens;
365 {
366 	register struct cc **pp = tokens;
367 	register i, n;
368 #ifdef STATS
369 	int nn, ii;
370 #endif
371 
372 #ifdef STATS
373 	if (verbose >= 0)
374 		time_begin();
375 	if (verbose > 0)
376 		printf("Sweep:");
377 #endif
378 	cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
379 #ifdef STATS
380 	ntoken_stat = 0;
381 	nn = 0;
382 	ii = 0;
383 #endif
384 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
385 #ifdef STATS
386 		if (verbose > 0) {
387 			if (ii > 7) {
388 				printf("\n      ");
389 				ii = 0;
390 			}
391 			ii++;
392 			printf(" (%d", i);
393 			(void) fflush(stdout);
394 		}
395 #endif
396 		n = cc_sweep(buffer, bufsize, pp, i);
397 		pp += n;
398 #ifdef STATS
399 		if (verbose > 0) {
400 			if (--n > 0) {
401 				printf(" %d", n);
402 				nn += n;
403 			}
404 			putchar(')');
405 		}
406 #endif
407 	}
408 	qinsertq(&cc_q1b, &cc_q1a);
409 #ifdef STATS
410 	if (verbose > 0)
411 		printf("\n       %d tokens, %d candidates\n",
412 			ntoken_stat, nn);
413 	if (verbose >= 0)
414 		time_end();
415 #endif
416 	return pp - tokens;
417 }
418 
419 cc_sweep0(buffer, n, length)
420 	char *buffer;
421 {
422 	register char *p;
423 	register short *hc;
424 	register i;
425 	register short c;
426 	register short pc = tt.tt_padc;
427 
428 	/* n and length are at least 1 */
429 	p = buffer++;
430 	hc = cc_hashcodes;
431 	i = n;
432 	do {
433 		if ((*hc++ = *p++) == pc)
434 			hc[-1] = -1;
435 	} while (--i);
436 	while (--length) {
437 		p = buffer++;
438 		hc = cc_hashcodes;
439 		for (i = n--; --i;) {
440 			if ((c = *p++) == pc || *hc < 0)
441 				c = -1;
442 			else
443 				c = hash(*hc, c);
444 			*hc++ = c;
445 		}
446 	}
447 }
448 
449 cc_sweep(buffer, bufsize, tokens, length)
450 	char *buffer;
451 	struct cc **tokens;
452 	register length;
453 {
454 	register struct cc *p;
455 	register char *cp;
456 	register i;
457 	short *hc;
458 	short *places = cc_places[length];
459 	struct cc **pp = tokens;
460 	short threshold = thresh(length);
461 #ifndef cc_weight
462 	short wthreshold = wthresh(length);
463 	short limit = wlimit(length);
464 #endif
465 	int time;
466 	short pc = tt.tt_padc;
467 
468 	i = length - 1;
469 	bufsize -= i;
470 	cp = buffer + i;
471 	hc = cc_hashcodes;
472 	time = cc_time0;
473 	for (i = 0; i < bufsize; i++, time++) {
474 		struct cc **h;
475 
476 		{
477 			register short *hc1 = hc;
478 			register short c = *cp++;
479 			register short hh;
480 			if ((hh = *hc1) < 0 || c == pc) {
481 				*hc1++ = -1;
482 				hc = hc1;
483 				continue;
484 			}
485 			h = cc_htab + (*hc1++ = hash(hh, c));
486 			hc = hc1;
487 		}
488 		for (p = *h; p != 0; p = p->hforw)
489 			if (p->length == (char) length) {
490 				register char *p1 = p->string;
491 				register char *p2 = cp - length;
492 				register n = length;
493 				do
494 					if (*p1++ != *p2++)
495 						goto fail;
496 				while (--n);
497 				break;
498 			fail:;
499 			}
500 		if (p == 0) {
501 			p = cc_q1a.qback;
502 			if (p == &cc_q1a ||
503 			    p->time >= cc_time0 && p->length == (char) length)
504 				continue;
505 			if (p->hback != 0)
506 				if ((*p->hback = p->hforw) != 0)
507 					p->hforw->hback = p->hback;
508 			{
509 				register char *p1 = p->string;
510 				register char *p2 = cp - length;
511 				register n = length;
512 				do
513 					*p1++ = *p2++;
514 				while (--n);
515 			}
516 			p->length = length;
517 #ifndef cc_weight
518 			p->weight = cc_weight;
519 #endif
520 			p->time = time;
521 			p->bcount = 1;
522 			p->ccount = 0;
523 			p->flag = 0;
524 			if ((p->hforw = *h) != 0)
525 				p->hforw->hback = &p->hforw;
526 			*h = p;
527 			p->hback = h;
528 			qinsert(p, &cc_q1a);
529 			places[i] = -1;
530 			p->places = i;
531 #ifdef STATS
532 			ntoken_stat++;
533 #endif
534 		} else if (p->time < cc_time0) {
535 #ifndef cc_weight
536 			if ((p->weight += p->time - time) < 0)
537 				p->weight = cc_weight;
538 			else if ((p->weight += cc_weight) > limit)
539 				p->weight = limit;
540 #endif
541 			p->time = time;
542 			p->bcount = 1;
543 			p->ccount = 0;
544 			if (p->code >= 0) {
545 				p->flag = 1;
546 				*pp++ = p;
547 			} else
548 #ifndef cc_weight
549 			if (p->weight >= wthreshold) {
550 				p->flag = 1;
551 				*pp++ = p;
552 				qinsert(p, &cc_q1b);
553 			} else
554 #endif
555 			{
556 				p->flag = 0;
557 				qinsert(p, &cc_q1a);
558 			}
559 			places[i] = -1;
560 			p->places = i;
561 #ifdef STATS
562 			ntoken_stat++;
563 #endif
564 		} else if (p->time + length > time) {
565 			/*
566 			 * overlapping token, don't count as two and
567 			 * don't update time, but do adjust weight to offset
568 			 * the difference
569 			 */
570 #ifndef cc_weight
571 			if (cc_weight != 0) {	/* XXX */
572 				p->weight += time - p->time;
573 				if (!p->flag && p->weight >= wthreshold) {
574 					p->flag = 1;
575 					*pp++ = p;
576 					qinsert(p, &cc_q1b);
577 				}
578 			}
579 #endif
580 			places[i] = p->places;
581 			p->places = i;
582 		} else {
583 #ifndef cc_weight
584 			if ((p->weight += p->time - time) < 0)
585 				p->weight = cc_weight;
586 			else if ((p->weight += cc_weight) > limit)
587 				p->weight = limit;
588 #endif
589 			p->time = time;
590 			p->bcount++;
591 			if (!p->flag &&
592 			    /* code must be < 0 if flag false here */
593 			    (p->bcount >= threshold
594 #ifndef cc_weight
595 			     || p->weight >= wthreshold
596 #endif
597 			     )) {
598 				p->flag = 1;
599 				*pp++ = p;
600 				qinsert(p, &cc_q1b);
601 			}
602 			places[i] = p->places;
603 			p->places = i;
604 		}
605 	}
606 	if ((i = pp - tokens) > 0) {
607 		*pp = 0;
608 		if (cc_reverse)
609 			cc_sweep_reverse(tokens, places);
610 		if (cc_sort && i > 1) {
611 			int cc_token_compare();
612 			qsort((char *) tokens, i, sizeof *tokens,
613 			      cc_token_compare);
614 		}
615 		if (cc_chop) {
616 			if ((i = i * cc_chop / 100) == 0)
617 				i = 1;
618 			tokens[i] = 0;
619 		}
620 		i++;
621 	}
622 	return i;
623 }
624 
625 cc_sweep_reverse(pp, places)
626 	register struct cc **pp;
627 	register short *places;
628 {
629 	register struct cc *p;
630 	register short front, back, t;
631 
632 	while ((p = *pp++) != 0) {
633 		back = -1;
634 		t = p->places;
635 		/* the list is never empty */
636 		do {
637 			front = places[t];
638 			places[t] = back;
639 			back = t;
640 		} while ((t = front) >= 0);
641 		p->places = back;
642 	}
643 }
644 
645 cc_compress_phase(output, bufsize, tokens, ntoken)
646 	struct cc **output;
647 	struct cc **tokens;
648 {
649 	register i;
650 
651 	bzero((char *) output, bufsize * sizeof *output);
652 	for (i = 0; i < cc_npass0; i++)
653 		cc_compress_phase1(output, tokens, ntoken, 0);
654 	for (i = 0; i < cc_npass1; i++)
655 		cc_compress_phase1(output, tokens, ntoken, 1);
656 	cc_compress_cleanup(output, bufsize);
657 }
658 
659 cc_compress_phase1(output, tokens, ntoken, flag)
660 	register struct cc **output;
661 	struct cc **tokens;
662 {
663 	register struct cc **pp;
664 #ifdef STATS
665 	register int i = 0;
666 	int nt = 0, cc = 0, nc = 0;
667 #endif
668 
669 #ifdef STATS
670 	if (verbose >= 0)
671 		time_begin();
672 	if (verbose > 0)
673 		printf("Compress:");
674 #endif
675 	pp = tokens;
676 	while (pp < tokens + ntoken) {
677 #ifdef STATS
678 		if (verbose > 0) {
679 			ntoken_stat = 0;
680 			ccount_stat = 0;
681 			ncover_stat = 0;
682 			if (i > 2) {
683 				printf("\n         ");
684 				i = 0;
685 			}
686 			i++;
687 			printf(" (%d", (*pp)->length);
688 			(void) fflush(stdout);
689 		}
690 #endif
691 		pp += cc_compress(output, pp, flag);
692 #ifdef STATS
693 		if (verbose > 0) {
694 			printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
695 			       ncover_stat);
696 			nt += ntoken_stat;
697 			cc += ccount_stat;
698 			nc += ncover_stat;
699 		}
700 #endif
701 	}
702 #ifdef STATS
703 	if (verbose > 0)
704 		printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
705 	if (verbose >= 0)
706 		time_end();
707 #endif
708 }
709 
710 cc_compress_cleanup(output, bufsize)
711 	register struct cc **output;
712 {
713 	register struct cc **end;
714 
715 	/* the previous output phase may have been interrupted */
716 	qinsertq(&cc_q0b, &cc_q0a);
717 	for (end = output + bufsize; output < end;) {
718 		register struct cc *p;
719 		register length;
720 		if ((p = *output) == 0) {
721 			output++;
722 			continue;
723 		}
724 		length = p->length;
725 		if (!p->flag) {
726 		} else if (p->code >= 0) {
727 			qinsert(p, &cc_q0b);
728 			p->flag = 0;
729 		} else if (p->ccount == 0) {
730 			*output = 0;
731 		} else if (p->ccount >= thresh(length)
732 #ifndef cc_weight
733 			   || wthreshp(p->weight, length)
734 #endif
735 			   ) {
736 			p->flag = 0;
737 		} else {
738 			p->ccount = 0;
739 			*output = 0;
740 		}
741 		output += length;
742 	}
743 }
744 
745 cc_compress(output, tokens, flag)
746 	struct cc **output;
747 	struct cc **tokens;
748 	char flag;
749 {
750 	struct cc **pp = tokens;
751 	register struct cc *p = *pp++;
752 	int length = p->length;
753 	int threshold = thresh(length);
754 #ifndef cc_weight
755 	short wthreshold = wthresh(length);
756 #endif
757 	short *places = cc_places[length];
758 	int *initial_scores = cc_initial_scores[length];
759 	int initial_score0 = put_token_score(length);
760 
761 	do {
762 		int score;
763 		register struct cc_undo *undop;
764 		int ccount;
765 #ifdef STATS
766 		int ncover;
767 #endif
768 		int i;
769 
770 		ccount = p->ccount;
771 		if ((short) ccount >= p->bcount)
772 			continue;
773 		if (p->code >= 0 || ccount >= threshold)
774 			score = 0;
775 #ifndef cc_weight
776 		else if (p->weight >= wthreshold)
777 			/* allow one fewer match than normal */
778 			/* XXX, should adjust for ccount */
779 			score = - tt.tt_set_token_cost;
780 #endif
781 		else
782 			score = initial_scores[ccount];
783 		undop = cc_undo;
784 #ifdef STATS
785 		ncover = 0;
786 #endif
787 		for (i = p->places; i >= 0; i = places[i]) {
788 			register struct cc **jp;
789 			register struct cc *x;
790 			register struct cc **ip = output + i;
791 			register score0 = initial_score0;
792 			struct cc **iip = ip + length;
793 			struct cc_undo *undop1 = undop;
794 
795 			if ((x = *(jp = ip)) != 0)
796 				goto z;
797 			while (--jp >= output)
798 				if ((x = *jp) != 0) {
799 					if (jp + x->length > ip)
800 						goto z;
801 					break;
802 				}
803 			jp = ip + 1;
804 			while (jp < iip) {
805 				if ((x = *jp) == 0) {
806 					jp++;
807 					continue;
808 				}
809 			z:
810 				if (x == p)
811 					goto undo;
812 #ifdef STATS
813 				ncover++;
814 #endif
815 				undop->pos = jp;
816 				undop->val = x;
817 				undop++;
818 				*jp = 0;
819 				x->ccount--;
820 				score_adjust(score0, x);
821 				if (score0 < 0 && flag)
822 					goto undo;
823 				jp += x->length;
824 			}
825 			undop->pos = ip;
826 			undop->val = 0;
827 			undop++;
828 			*ip = p;
829 			ccount++;
830 			score += score0;
831 			continue;
832 		undo:
833 			while (--undop >= undop1)
834 				if (*undop->pos = x = undop->val)
835 					x->ccount++;
836 			undop++;
837 		}
838 		if (score > 0) {
839 #ifdef STATS
840 			ccount_stat += ccount - p->ccount;
841 			ntoken_stat++;
842 			ncover_stat += ncover;
843 #endif
844 			p->ccount = ccount;
845 		} else {
846 			register struct cc_undo *u = cc_undo;
847 			while (--undop >= u) {
848 				register struct cc *x;
849 				if (*undop->pos = x = undop->val)
850 					x->ccount++;
851 			}
852 		}
853 	} while ((p = *pp++) != 0);
854 	return pp - tokens;
855 }
856 
857 cc_output_phase(buffer, output, bufsize)
858 	register char *buffer;
859 	register struct cc **output;
860 	register bufsize;
861 {
862 	register i;
863 	register struct cc *p, *p1;
864 
865 	for (i = 0; i < bufsize;) {
866 		if ((p = output[i]) == 0) {
867 			ttputc(buffer[i]);
868 			i++;
869 		} else if (p->code >= 0) {
870 			if (--p->ccount == 0)
871 				qinsert(p, &cc_q0a);
872 			(*tt.tt_put_token)(p->code, p->string, p->length);
873 			wwntokuse++;
874 			wwntoksave += put_token_score(p->length);
875 			i += p->length;
876 		} else if ((p1 = cc_q0a.qback) != &cc_q0a) {
877 			p->code = p1->code;
878 			p1->code = -1;
879 			qinsert(p1, &cc_q1a);
880 			if (--p->ccount == 0)
881 				qinsert(p, &cc_q0a);
882 			else
883 				qinsert(p, &cc_q0b);
884 			(*tt.tt_set_token)(p->code, p->string, p->length);
885 			wwntokdef++;
886 			wwntoksave -= tt.tt_set_token_cost;
887 			i += p->length;
888 		} else {
889 			p->ccount--;
890 			ttwrite(p->string, p->length);
891 			wwntokbad++;
892 			i += p->length;
893 		}
894 	}
895 	wwntokc += bufsize;
896 }
897 
898 cc_token_compare(p1, p2)
899 	struct cc **p1, **p2;
900 {
901 	return (*p2)->bcount - (*p1)->bcount;
902 }
903