xref: /dragonfly/usr.bin/window/compress.c (revision 956939d5)
1 /*	$NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $	*/
2 
3 /*
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Edward Wang at The University of California, Berkeley.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/cdefs.h>
36 #ifndef lint
37 #if 0
38 static char sccsid[] = "@(#)compress.c	8.1 (Berkeley) 6/6/93";
39 #else
40 __RCSID("$NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $");
41 #endif
42 #endif /* not lint */
43 
44 #include <fcntl.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include "defs.h"
49 #include "tt.h"
50 
51 	/* special */
52 int cc_trace = 0;
53 FILE *cc_trace_fp;
54 
55 	/* tunable parameters */
56 
57 int cc_reverse = 1;
58 int cc_sort = 0;
59 int cc_chop = 0;
60 
61 int cc_token_max = 8;		/* <= TOKEN_MAX */
62 int cc_token_min = 2;		/* > tt.tt_put_token_cost */
63 int cc_npass0 = 1;
64 int cc_npass1 = 1;
65 
66 int cc_bufsize = 1024 * 3;	/* XXX, or 80 * 24 * 2 */
67 
68 int cc_ntoken = 8192;
69 
70 #define cc_weight XXX
71 #ifndef cc_weight
72 int cc_weight = 0;
73 #endif
74 
75 #define TOKEN_MAX 16
76 
77 struct cc {
78 	char string[TOKEN_MAX];
79 	char length;
80 	char flag;
81 #ifndef cc_weight
82 	short weight;
83 #endif
84 	long time;		/* time last seen */
85 	short bcount;		/* count in this buffer */
86 	short ccount;		/* count in compression */
87 	short places;		/* places in the buffer */
88 	short code;		/* token code */
89 	struct cc *qforw, *qback;
90 	struct cc *hforw, **hback;
91 };
92 
93 short cc_thresholds[TOKEN_MAX + 1];
94 #define thresh(length) (cc_thresholds[length])
95 #define threshp(code, count, length) \
96 	((code) >= 0 || (short) (count) >= cc_thresholds[length])
97 
98 #ifndef cc_weight
99 short cc_wthresholds[TOKEN_MAX + 1];
100 #define wthresh(length) (cc_wthresholds[length])
101 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
102 #else
103 #define wthreshp(weight, length) (0)
104 #endif
105 
106 #ifndef cc_weight
107 short cc_wlimits[TOKEN_MAX + 1];
108 #define wlimit(length) (cc_wlimits[length])
109 #endif
110 
111 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
112 
113 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
114 #define score_adjust(score, p) \
115 	do { \
116 		int _length = (p)->length; \
117 		int _ccount = (p)->ccount; \
118 		if (threshp((p)->code, _ccount, _length) || \
119 		    wthreshp((p)->weight, _length)) /* XXX */ \
120 			(score) -= _length - tt.tt_put_token_cost; \
121 		else \
122 			(score) += cc_score_adjustments[_length][_ccount]; \
123 	} while (0)
124 
125 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
126 
127 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
128 
129 #define qinsert(p1, p2) \
130 	do { \
131 		struct cc *forw = (p1)->qforw; \
132 		struct cc *back = (p1)->qback; \
133 		back->qforw = forw; \
134 		forw->qback = back; \
135 		forw = (p2)->qforw; \
136 		(p1)->qforw = forw; \
137 		forw->qback = (p1); \
138 		(p2)->qforw = (p1); \
139 		(p1)->qback = (p2); \
140 	} while (0)
141 
142 #define qinsertq(q, p) \
143 	((q)->qforw == (q) ? 0 : \
144 	 ((q)->qback->qforw = (p)->qforw, \
145 	  (p)->qforw->qback = (q)->qback, \
146 	  (q)->qforw->qback = (p), \
147 	  (p)->qforw = (q)->qforw, \
148 	  (q)->qforw = (q), \
149 	  (q)->qback = (q)))
150 
151 #define H		(14)
152 #define HSIZE		(1 << H)
153 #define hash(h, c)	(((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1))
154 
155 char *cc_buffer;
156 struct cc **cc_output;			/* the output array */
157 short *cc_places[TOKEN_MAX + 1];
158 short *cc_hashcodes;			/* for computing hashcodes */
159 struct cc **cc_htab;			/* the hash table */
160 struct cc **cc_tokens;			/* holds all the active tokens */
161 struct cc_undo {
162 	struct cc **pos;
163 	struct cc *val;
164 } *cc_undo;
165 
166 long cc_time, cc_time0;
167 
168 char *cc_tt_ob, *cc_tt_obe;
169 
170 int	cc_compress(struct cc **, struct cc **, char);
171 void	cc_compress_cleanup(struct cc **, int);
172 void	cc_compress_phase(struct cc **, int, struct cc **, int);
173 void	cc_compress_phase1(struct cc **, struct cc **, int, int);
174 void	cc_output_phase(char *, struct cc **, int);
175 int	cc_sweep(char *, int, struct cc **, int);
176 void	cc_sweep0(char *, int, int);
177 int	cc_sweep_phase(char *, int, struct cc **);
178 void	cc_sweep_reverse(struct cc **, short *);
179 int	cc_token_compare(const void *, const void *);
180 
181 int
182 ccinit(void)
183 {
184 	int i, j;
185 	struct cc *p;
186 
187 	if (tt.tt_token_max > cc_token_max)
188 		tt.tt_token_max = cc_token_max;
189 	if (tt.tt_token_min < cc_token_min)
190 		tt.tt_token_min = cc_token_min;
191 	if (tt.tt_token_min > tt.tt_token_max) {
192 		tt.tt_ntoken = 0;
193 		return 0;
194 	}
195 	if (tt.tt_ntoken > cc_ntoken / 2)	/* not likely */
196 		tt.tt_ntoken = cc_ntoken / 2;
197 #define C(x) (sizeof (x) / sizeof *(x))
198 	for (i = 0; i < (int)C(cc_thresholds); i++) {
199 		int h = i - tt.tt_put_token_cost;
200 		if (h > 0)
201 			cc_thresholds[i] =
202 				(tt.tt_set_token_cost + 1 + h - 1) / h + 1;
203 		else
204 			cc_thresholds[i] = 0;
205 	}
206 	for (i = 0; i < (int)C(cc_score_adjustments); i++) {
207 		int t = cc_thresholds[i];
208 		for (j = 0; j < (int)C(*cc_score_adjustments); j++) {
209 			if (j >= t)
210 				cc_score_adjustments[i][j] =
211 					- (i - tt.tt_put_token_cost);
212 			else if (j < t - 1)
213 				cc_score_adjustments[i][j] = 0;
214 			else
215 				/*
216 				 * cost now is
217 				 *	length * (ccount + 1)		a
218 				 * cost before was
219 				 *	set-token-cost + length +
220 				 *		ccount * put-token-cost	b
221 				 * the score adjustment is (b - a)
222 				 */
223 				cc_score_adjustments[i][j] =
224 					tt.tt_set_token_cost + i +
225 						j * tt.tt_put_token_cost -
226 							i * (j + 1);
227 			if (j >= t)
228 				cc_initial_scores[i][j] = 0;
229 			else
230 				/*
231 				 * - (set-token-cost +
232 				 *	(length - put-token-cost) -
233 				 *	(length - put-token-cost) * ccount)
234 				 */
235 				cc_initial_scores[i][j] =
236 					- (tt.tt_set_token_cost +
237 					   (i - tt.tt_put_token_cost) -
238 					   (i - tt.tt_put_token_cost) * j);
239 		}
240 	}
241 #ifndef cc_weight
242 	for (i = 1; i < C(cc_wthresholds); i++) {
243 		cc_wthresholds[i] =
244 			((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
245 				i / 5 + 1) *
246 				cc_weight + 1;
247 		cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
248 	}
249 #endif
250 #undef C
251 	if ((cc_output = (struct cc **)
252 	     malloc((unsigned) cc_bufsize * sizeof *cc_output)) == 0)
253 		goto nomem;
254 	if ((cc_hashcodes = (short *)
255 	     malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == 0)
256 		goto nomem;
257 	if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == 0)
258 		goto nomem;
259 	if ((cc_tokens = (struct cc **)
260 	     malloc((unsigned)
261 	            (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
262 		    sizeof *cc_tokens)) == 0)
263 		goto nomem;
264 	if ((cc_undo = (struct cc_undo *)
265 	     malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == 0)
266 		goto nomem;
267 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
268 		if ((cc_places[i] = (short *)
269 		     malloc((unsigned) cc_bufsize * sizeof **cc_places)) == 0)
270 			goto nomem;
271 	cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
272 	cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
273 	cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
274 	cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
275 	if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == 0)
276 		goto nomem;
277 	for (i = 0; i < tt.tt_ntoken; i++) {
278 		p->code = i;
279 		p->time = -1;
280 		p->qback = cc_q0a.qback;
281 		p->qforw = &cc_q0a;
282 		p->qback->qforw = p;
283 		cc_q0a.qback = p;
284 		p++;
285 	}
286 	for (; i < cc_ntoken; i++) {
287 		p->code = -1;
288 		p->time = -1;
289 		p->qback = cc_q1a.qback;
290 		p->qforw = &cc_q1a;
291 		p->qback->qforw = p;
292 		cc_q1a.qback = p;
293 		p++;
294 	}
295 	cc_tt_ob = tt_ob;
296 	cc_tt_obe = tt_obe;
297 	if ((cc_buffer = malloc((unsigned) cc_bufsize)) == 0)
298 		goto nomem;
299 	return 0;
300 nomem:
301 	wwerrno = WWE_NOMEM;
302 	return -1;
303 }
304 
305 void
306 ccstart(void)
307 {
308 	ttflush();
309 	tt_obp = tt_ob = cc_buffer;
310 	tt_obe = tt_ob + cc_bufsize;
311 	tt.tt_flush = ccflush;
312 	if (cc_trace) {
313 		cc_trace_fp = fopen("window-trace", "a");
314 		(void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
315 	}
316 	ccreset();
317 }
318 
319 void
320 ccreset(void)
321 {
322 	struct cc *p;
323 
324 	memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab);
325 	for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
326 		p->hback = 0;
327 	for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
328 		p->hback = 0;
329 }
330 
331 void
332 ccend(void)
333 {
334 
335 	ttflush();
336 	tt_obp = tt_ob = cc_tt_ob;
337 	tt_obe = cc_tt_obe;
338 	tt.tt_flush = 0;
339 	if (cc_trace_fp != NULL) {
340 		(void) fclose(cc_trace_fp);
341 		cc_trace_fp = NULL;
342 	}
343 }
344 
345 void
346 ccflush(void)
347 {
348 	int bufsize = tt_obp - tt_ob;
349 	int n;
350 
351 	if (tt_ob != cc_buffer)
352 		abort();
353 	if (cc_trace_fp != NULL) {
354 		(void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
355 		(void) putc(-1, cc_trace_fp);
356 	}
357 	tt.tt_flush = 0;
358 	(*tt.tt_compress)(1);
359 	if (bufsize < tt.tt_token_min) {
360 		ttflush();
361 		goto out;
362 	}
363 	tt_obp = tt_ob = cc_tt_ob;
364 	tt_obe = cc_tt_obe;
365 	cc_time0 = cc_time;
366 	cc_time += bufsize;
367 	n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
368 	cc_compress_phase(cc_output, bufsize, cc_tokens, n);
369 	cc_output_phase(cc_buffer, cc_output, bufsize);
370 	ttflush();
371 	tt_obp = tt_ob = cc_buffer;
372 	tt_obe = cc_buffer + cc_bufsize;
373 out:
374 	(*tt.tt_compress)(0);
375 	tt.tt_flush = ccflush;
376 }
377 
378 int
379 cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens)
380 {
381 	struct cc **pp = tokens;
382 	int i, n;
383 #ifdef STATS
384 	int nn, ii;
385 #endif
386 
387 #ifdef STATS
388 	if (verbose >= 0)
389 		time_begin();
390 	if (verbose > 0)
391 		printf("Sweep:");
392 #endif
393 	cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
394 #ifdef STATS
395 	ntoken_stat = 0;
396 	nn = 0;
397 	ii = 0;
398 #endif
399 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
400 #ifdef STATS
401 		if (verbose > 0) {
402 			if (ii > 7) {
403 				printf("\n      ");
404 				ii = 0;
405 			}
406 			ii++;
407 			printf(" (%d", i);
408 			(void) fflush(stdout);
409 		}
410 #endif
411 		n = cc_sweep(buffer, bufsize, pp, i);
412 		pp += n;
413 #ifdef STATS
414 		if (verbose > 0) {
415 			if (--n > 0) {
416 				printf(" %d", n);
417 				nn += n;
418 			}
419 			putchar(')');
420 		}
421 #endif
422 	}
423 	qinsertq(&cc_q1b, &cc_q1a);
424 #ifdef STATS
425 	if (verbose > 0)
426 		printf("\n       %d tokens, %d candidates\n",
427 			ntoken_stat, nn);
428 	if (verbose >= 0)
429 		time_end();
430 #endif
431 	return pp - tokens;
432 }
433 
434 void
435 cc_sweep0(char *buffer, int n, int length)
436 {
437 	char *p;
438 	short *hc;
439 	int i;
440 	short c;
441 	short pc = tt.tt_padc;
442 
443 	/* n and length are at least 1 */
444 	p = buffer++;
445 	hc = cc_hashcodes;
446 	i = n;
447 	do {
448 		if ((*hc++ = *p++) == pc)
449 			hc[-1] = -1;
450 	} while (--i);
451 	while (--length) {
452 		p = buffer++;
453 		hc = cc_hashcodes;
454 		for (i = n--; --i;) {
455 			if ((c = *p++) == pc || *hc < 0)
456 				c = -1;
457 			else
458 				c = hash(*hc, c);
459 			*hc++ = c;
460 		}
461 	}
462 }
463 
464 int
465 cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length)
466 {
467 	struct cc *p;
468 	char *cp;
469 	int i;
470 	short *hc;
471 	short *places = cc_places[length];
472 	struct cc **pp = tokens;
473 	short threshold = thresh(length);
474 #ifndef cc_weight
475 	short wthreshold = wthresh(length);
476 	short limit = wlimit(length);
477 #endif
478 	int time0;
479 	short pc = tt.tt_padc;
480 
481 	i = length - 1;
482 	bufsize -= i;
483 	cp = buffer + i;
484 	hc = cc_hashcodes;
485 	time0 = cc_time0;
486 	for (i = 0; i < bufsize; i++, time0++) {
487 		struct cc **h;
488 
489 		{
490 			short *hc1 = hc;
491 			short c = *cp++;
492 			short hh;
493 			if ((hh = *hc1) < 0 || c == pc) {
494 				*hc1++ = -1;
495 				hc = hc1;
496 				continue;
497 			}
498 			h = cc_htab + (*hc1++ = hash(hh, c));
499 			hc = hc1;
500 		}
501 		for (p = *h; p != 0; p = p->hforw)
502 			if (p->length == (char) length) {
503 				char *p1 = p->string;
504 				char *p2 = cp - length;
505 				int n = length;
506 				do
507 					if (*p1++ != *p2++)
508 						goto fail;
509 				while (--n);
510 				break;
511 			fail:;
512 			}
513 		if (p == 0) {
514 			p = cc_q1a.qback;
515 			if (p == &cc_q1a ||
516 			    (p->time >= cc_time0 && p->length == (char) length))
517 				continue;
518 			if (p->hback != 0)
519 				if ((*p->hback = p->hforw) != 0)
520 					p->hforw->hback = p->hback;
521 			{
522 				char *p1 = p->string;
523 				char *p2 = cp - length;
524 				int n = length;
525 				do
526 					*p1++ = *p2++;
527 				while (--n);
528 			}
529 			p->length = length;
530 #ifndef cc_weight
531 			p->weight = cc_weight;
532 #endif
533 			p->time = time0;
534 			p->bcount = 1;
535 			p->ccount = 0;
536 			p->flag = 0;
537 			if ((p->hforw = *h) != 0)
538 				p->hforw->hback = &p->hforw;
539 			*h = p;
540 			p->hback = h;
541 			qinsert(p, &cc_q1a);
542 			places[i] = -1;
543 			p->places = i;
544 #ifdef STATS
545 			ntoken_stat++;
546 #endif
547 		} else if (p->time < cc_time0) {
548 #ifndef cc_weight
549 			if ((p->weight += p->time - time0) < 0)
550 				p->weight = cc_weight;
551 			else if ((p->weight += cc_weight) > limit)
552 				p->weight = limit;
553 #endif
554 			p->time = time0;
555 			p->bcount = 1;
556 			p->ccount = 0;
557 			if (p->code >= 0) {
558 				p->flag = 1;
559 				*pp++ = p;
560 			} else
561 #ifndef cc_weight
562 			if (p->weight >= wthreshold) {
563 				p->flag = 1;
564 				*pp++ = p;
565 				qinsert(p, &cc_q1b);
566 			} else
567 #endif
568 			{
569 				p->flag = 0;
570 				qinsert(p, &cc_q1a);
571 			}
572 			places[i] = -1;
573 			p->places = i;
574 #ifdef STATS
575 			ntoken_stat++;
576 #endif
577 		} else if (p->time + length > time0) {
578 			/*
579 			 * overlapping token, don't count as two and
580 			 * don't update time0, but do adjust weight to offset
581 			 * the difference
582 			 */
583 #ifndef cc_weight
584 			if (cc_weight != 0) {	/* XXX */
585 				p->weight += time0 - p->time;
586 				if (!p->flag && p->weight >= wthreshold) {
587 					p->flag = 1;
588 					*pp++ = p;
589 					qinsert(p, &cc_q1b);
590 				}
591 			}
592 #endif
593 			places[i] = p->places;
594 			p->places = i;
595 		} else {
596 #ifndef cc_weight
597 			if ((p->weight += p->time - time0) < 0)
598 				p->weight = cc_weight;
599 			else if ((p->weight += cc_weight) > limit)
600 				p->weight = limit;
601 #endif
602 			p->time = time0;
603 			p->bcount++;
604 			if (!p->flag &&
605 			    /* code must be < 0 if flag false here */
606 			    (p->bcount >= threshold
607 #ifndef cc_weight
608 			     || p->weight >= wthreshold
609 #endif
610 			     )) {
611 				p->flag = 1;
612 				*pp++ = p;
613 				qinsert(p, &cc_q1b);
614 			}
615 			places[i] = p->places;
616 			p->places = i;
617 		}
618 	}
619 	if ((i = pp - tokens) > 0) {
620 		*pp = 0;
621 		if (cc_reverse)
622 			cc_sweep_reverse(tokens, places);
623 		if (cc_sort && i > 1) {
624 			qsort((char *) tokens, i, sizeof *tokens,
625 			      cc_token_compare);
626 		}
627 		if (cc_chop) {
628 			if ((i = i * cc_chop / 100) == 0)
629 				i = 1;
630 			tokens[i] = 0;
631 		}
632 		i++;
633 	}
634 	return i;
635 }
636 
637 void
638 cc_sweep_reverse(struct cc **pp, short int *places)
639 {
640 	struct cc *p;
641 	short frnt, back, t;
642 
643 	while ((p = *pp++) != 0) {
644 		back = -1;
645 		t = p->places;
646 		/* the list is never empty */
647 		do {
648 			frnt = places[t];
649 			places[t] = back;
650 			back = t;
651 		} while ((t = frnt) >= 0);
652 		p->places = back;
653 	}
654 }
655 
656 void
657 cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken)
658 {
659 	int i;
660 
661 	memset((char *) output, 0, bufsize * sizeof *output);
662 	for (i = 0; i < cc_npass0; i++)
663 		cc_compress_phase1(output, tokens, ntoken, 0);
664 	for (i = 0; i < cc_npass1; i++)
665 		cc_compress_phase1(output, tokens, ntoken, 1);
666 	cc_compress_cleanup(output, bufsize);
667 }
668 
669 void
670 cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag)
671 {
672 	struct cc **pp;
673 #ifdef STATS
674 	int i = 0;
675 	int nt = 0, cc = 0, nc = 0;
676 #endif
677 
678 #ifdef STATS
679 	if (verbose >= 0)
680 		time_begin();
681 	if (verbose > 0)
682 		printf("Compress:");
683 #endif
684 	pp = tokens;
685 	while (pp < tokens + ntoken) {
686 #ifdef STATS
687 		if (verbose > 0) {
688 			ntoken_stat = 0;
689 			ccount_stat = 0;
690 			ncover_stat = 0;
691 			if (i > 2) {
692 				printf("\n         ");
693 				i = 0;
694 			}
695 			i++;
696 			printf(" (%d", (*pp)->length);
697 			(void) fflush(stdout);
698 		}
699 #endif
700 		pp += cc_compress(output, pp, flag);
701 #ifdef STATS
702 		if (verbose > 0) {
703 			printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
704 			       ncover_stat);
705 			nt += ntoken_stat;
706 			cc += ccount_stat;
707 			nc += ncover_stat;
708 		}
709 #endif
710 	}
711 #ifdef STATS
712 	if (verbose > 0)
713 		printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
714 	if (verbose >= 0)
715 		time_end();
716 #endif
717 }
718 
719 void
720 cc_compress_cleanup(struct cc **output, int bufsize)
721 {
722 	struct cc **end;
723 
724 	/* the previous output phase may have been interrupted */
725 	qinsertq(&cc_q0b, &cc_q0a);
726 	for (end = output + bufsize; output < end;) {
727 		struct cc *p;
728 		int length;
729 		if ((p = *output) == 0) {
730 			output++;
731 			continue;
732 		}
733 		length = p->length;
734 		if (!p->flag) {
735 		} else if (p->code >= 0) {
736 			qinsert(p, &cc_q0b);
737 			p->flag = 0;
738 		} else if (p->ccount == 0) {
739 			*output = 0;
740 		} else if (p->ccount >= thresh(length)
741 #ifndef cc_weight
742 			   || wthreshp(p->weight, length)
743 #endif
744 			   ) {
745 			p->flag = 0;
746 		} else {
747 			p->ccount = 0;
748 			*output = 0;
749 		}
750 		output += length;
751 	}
752 }
753 
754 int
755 cc_compress(struct cc **output, struct cc **tokens, char flag)
756 {
757 	struct cc **pp = tokens;
758 	struct cc *p = *pp++;
759 	int length = p->length;
760 	int threshold = thresh(length);
761 #ifndef cc_weight
762 	short wthreshold = wthresh(length);
763 #endif
764 	short *places = cc_places[length];
765 	int *initial_scores = cc_initial_scores[length];
766 	int initial_score0 = put_token_score(length);
767 
768 	do {
769 		int score;
770 		struct cc_undo *undop;
771 		int ccount;
772 #ifdef STATS
773 		int ncover;
774 #endif
775 		int i;
776 
777 		ccount = p->ccount;
778 		if ((short) ccount >= p->bcount)
779 			continue;
780 		if (p->code >= 0 || ccount >= threshold)
781 			score = 0;
782 #ifndef cc_weight
783 		else if (p->weight >= wthreshold)
784 			/* allow one fewer match than normal */
785 			/* XXX, should adjust for ccount */
786 			score = - tt.tt_set_token_cost;
787 #endif
788 		else
789 			score = initial_scores[ccount];
790 		undop = cc_undo;
791 #ifdef STATS
792 		ncover = 0;
793 #endif
794 		for (i = p->places; i >= 0; i = places[i]) {
795 			struct cc **jp;
796 			struct cc *x;
797 			struct cc **ip = output + i;
798 			int score0 = initial_score0;
799 			struct cc **iip = ip + length;
800 			struct cc_undo *undop1 = undop;
801 
802 			if ((x = *(jp = ip)) != 0)
803 				goto z;
804 			while (--jp >= output)
805 				if ((x = *jp) != 0) {
806 					if (jp + x->length > ip)
807 						goto z;
808 					break;
809 				}
810 			jp = ip + 1;
811 			while (jp < iip) {
812 				if ((x = *jp) == 0) {
813 					jp++;
814 					continue;
815 				}
816 			z:
817 				if (x == p)
818 					goto undo;
819 #ifdef STATS
820 				ncover++;
821 #endif
822 				undop->pos = jp;
823 				undop->val = x;
824 				undop++;
825 				*jp = 0;
826 				x->ccount--;
827 				score_adjust(score0, x);
828 				if (score0 < 0 && flag)
829 					goto undo;
830 				jp += x->length;
831 			}
832 			undop->pos = ip;
833 			undop->val = 0;
834 			undop++;
835 			*ip = p;
836 			ccount++;
837 			score += score0;
838 			continue;
839 		undo:
840 			while (--undop >= undop1)
841 				if ((*undop->pos = x = undop->val))
842 					x->ccount++;
843 			undop++;
844 		}
845 		if (score > 0) {
846 #ifdef STATS
847 			ccount_stat += ccount - p->ccount;
848 			ntoken_stat++;
849 			ncover_stat += ncover;
850 #endif
851 			p->ccount = ccount;
852 		} else {
853 			struct cc_undo *u = cc_undo;
854 			while (--undop >= u) {
855 				struct cc *x;
856 				if ((*undop->pos = x = undop->val))
857 					x->ccount++;
858 			}
859 		}
860 	} while ((p = *pp++) != 0);
861 	return pp - tokens;
862 }
863 
864 void
865 cc_output_phase(char *buffer, struct cc **output, int bufsize)
866 {
867 	int i;
868 	struct cc *p, *p1;
869 
870 	for (i = 0; i < bufsize;) {
871 		if ((p = output[i]) == 0) {
872 			ttputc(buffer[i]);
873 			i++;
874 		} else if (p->code >= 0) {
875 			if (--p->ccount == 0)
876 				qinsert(p, &cc_q0a);
877 			(*tt.tt_put_token)(p->code, p->string, p->length);
878 			wwntokuse++;
879 			wwntoksave += put_token_score(p->length);
880 			i += p->length;
881 		} else if ((p1 = cc_q0a.qback) != &cc_q0a) {
882 			p->code = p1->code;
883 			p1->code = -1;
884 			qinsert(p1, &cc_q1a);
885 			if (--p->ccount == 0)
886 				qinsert(p, &cc_q0a);
887 			else
888 				qinsert(p, &cc_q0b);
889 			(*tt.tt_set_token)(p->code, p->string, p->length);
890 			wwntokdef++;
891 			wwntoksave -= tt.tt_set_token_cost;
892 			i += p->length;
893 		} else {
894 			p->ccount--;
895 			ttwrite(p->string, p->length);
896 			wwntokbad++;
897 			i += p->length;
898 		}
899 	}
900 	wwntokc += bufsize;
901 }
902 
903 int
904 cc_token_compare(const void *p1, const void *p2)
905 {
906 	const struct cc * const * vp1 = p1;
907 	const struct cc * const * vp2 = p2;
908 	return (*vp2)->bcount - (*vp1)->bcount;
909 }
910