xref: /dragonfly/usr.bin/window/compress.c (revision 678e8cc6)
1 /*	@(#)compress.c	8.1 (Berkeley) 6/6/93	*/
2 /*	$NetBSD: compress.c,v 1.7 2009/04/14 08:50:06 lukem Exp $	*/
3 
4 /*
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Edward Wang at The University of California, Berkeley.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <fcntl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include "defs.h"
41 #include "tt.h"
42 
43 	/* special */
44 int cc_trace = 0;
45 FILE *cc_trace_fp;
46 
47 	/* tunable parameters */
48 
49 int cc_reverse = 1;
50 int cc_sort = 0;
51 int cc_chop = 0;
52 
53 int cc_token_max = 8;		/* <= TOKEN_MAX */
54 int cc_token_min = 2;		/* > tt.tt_put_token_cost */
55 int cc_npass0 = 1;
56 int cc_npass1 = 1;
57 
58 int cc_bufsize = 1024 * 3;	/* XXX, or 80 * 24 * 2 */
59 
60 int cc_ntoken = 8192;
61 
62 #define cc_weight XXX
63 #ifndef cc_weight
64 int cc_weight = 0;
65 #endif
66 
67 #define TOKEN_MAX 16
68 
69 struct cc {
70 	char string[TOKEN_MAX];
71 	char length;
72 	char flag;
73 #ifndef cc_weight
74 	short weight;
75 #endif
76 	long time;		/* time last seen */
77 	short bcount;		/* count in this buffer */
78 	short ccount;		/* count in compression */
79 	short places;		/* places in the buffer */
80 	short code;		/* token code */
81 	struct cc *qforw, *qback;
82 	struct cc *hforw, **hback;
83 };
84 
85 short cc_thresholds[TOKEN_MAX + 1];
86 #define thresh(length) (cc_thresholds[length])
87 #define threshp(code, count, length) \
88 	((code) >= 0 || (short) (count) >= cc_thresholds[length])
89 
90 #ifndef cc_weight
91 short cc_wthresholds[TOKEN_MAX + 1];
92 #define wthresh(length) (cc_wthresholds[length])
93 #define wthreshp(weight, length) ((short) (weight) >= cc_wthresholds[length])
94 #else
95 #define wthreshp(weight, length) (0)
96 #endif
97 
98 #ifndef cc_weight
99 short cc_wlimits[TOKEN_MAX + 1];
100 #define wlimit(length) (cc_wlimits[length])
101 #endif
102 
103 #define put_token_score(length) ((length) - tt.tt_put_token_cost)
104 
105 int cc_score_adjustments[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
106 #define score_adjust(score, p) \
107 	do { \
108 		int _length = (p)->length; \
109 		int _ccount = (p)->ccount; \
110 		if (threshp((p)->code, _ccount, _length) || \
111 		    wthreshp((p)->weight, _length)) /* XXX */ \
112 			(score) -= _length - tt.tt_put_token_cost; \
113 		else \
114 			(score) += cc_score_adjustments[_length][_ccount]; \
115 	} while (0)
116 
117 int cc_initial_scores[TOKEN_MAX + 1][8]; /* XXX, 8 > max of cc_thresholds */
118 
119 struct cc cc_q0a, cc_q0b, cc_q1a, cc_q1b;
120 
121 #define qinsert(p1, p2) \
122 	do { \
123 		struct cc *forw = (p1)->qforw; \
124 		struct cc *back = (p1)->qback; \
125 		back->qforw = forw; \
126 		forw->qback = back; \
127 		forw = (p2)->qforw; \
128 		(p1)->qforw = forw; \
129 		forw->qback = (p1); \
130 		(p2)->qforw = (p1); \
131 		(p1)->qback = (p2); \
132 	} while (0)
133 
134 #define qinsertq(q, p) \
135 	((q)->qforw == (q) ? 0 : \
136 	 ((q)->qback->qforw = (p)->qforw, \
137 	  (p)->qforw->qback = (q)->qback, \
138 	  (q)->qforw->qback = (p), \
139 	  (p)->qforw = (q)->qforw, \
140 	  (q)->qforw = (q), \
141 	  (q)->qback = (q)))
142 
143 #define H		(14)
144 #define HSIZE		(1 << H)
145 #define hash(h, c)	(((((h) >> (H - 8)) | (h) << 8) ^ (c)) & (HSIZE - 1))
146 
147 char *cc_buffer;
148 struct cc **cc_output;			/* the output array */
149 short *cc_places[TOKEN_MAX + 1];
150 short *cc_hashcodes;			/* for computing hashcodes */
151 struct cc **cc_htab;			/* the hash table */
152 struct cc **cc_tokens;			/* holds all the active tokens */
153 struct cc_undo {
154 	struct cc **pos;
155 	struct cc *val;
156 } *cc_undo;
157 
158 long cc_time, cc_time0;
159 
160 char *cc_tt_ob, *cc_tt_obe;
161 
162 int	cc_compress(struct cc **, struct cc **, char);
163 void	cc_compress_cleanup(struct cc **, int);
164 void	cc_compress_phase(struct cc **, int, struct cc **, int);
165 void	cc_compress_phase1(struct cc **, struct cc **, int, int);
166 void	cc_output_phase(char *, struct cc **, int);
167 int	cc_sweep(char *, int, struct cc **, int);
168 void	cc_sweep0(char *, int, int);
169 int	cc_sweep_phase(char *, int, struct cc **);
170 void	cc_sweep_reverse(struct cc **, short *);
171 int	cc_token_compare(const void *, const void *);
172 
173 int
ccinit(void)174 ccinit(void)
175 {
176 	int i, j;
177 	struct cc *p;
178 
179 	if (tt.tt_token_max > cc_token_max)
180 		tt.tt_token_max = cc_token_max;
181 	if (tt.tt_token_min < cc_token_min)
182 		tt.tt_token_min = cc_token_min;
183 	if (tt.tt_token_min > tt.tt_token_max) {
184 		tt.tt_ntoken = 0;
185 		return 0;
186 	}
187 	if (tt.tt_ntoken > cc_ntoken / 2)	/* not likely */
188 		tt.tt_ntoken = cc_ntoken / 2;
189 #define C(x) (sizeof (x) / sizeof *(x))
190 	for (i = 0; i < (int)C(cc_thresholds); i++) {
191 		int h = i - tt.tt_put_token_cost;
192 		if (h > 0)
193 			cc_thresholds[i] =
194 				(tt.tt_set_token_cost + 1 + h - 1) / h + 1;
195 		else
196 			cc_thresholds[i] = 0;
197 	}
198 	for (i = 0; i < (int)C(cc_score_adjustments); i++) {
199 		int t = cc_thresholds[i];
200 		for (j = 0; j < (int)C(*cc_score_adjustments); j++) {
201 			if (j >= t)
202 				cc_score_adjustments[i][j] =
203 					- (i - tt.tt_put_token_cost);
204 			else if (j < t - 1)
205 				cc_score_adjustments[i][j] = 0;
206 			else
207 				/*
208 				 * cost now is
209 				 *	length * (ccount + 1)		a
210 				 * cost before was
211 				 *	set-token-cost + length +
212 				 *		ccount * put-token-cost	b
213 				 * the score adjustment is (b - a)
214 				 */
215 				cc_score_adjustments[i][j] =
216 					tt.tt_set_token_cost + i +
217 						j * tt.tt_put_token_cost -
218 							i * (j + 1);
219 			if (j >= t)
220 				cc_initial_scores[i][j] = 0;
221 			else
222 				/*
223 				 * - (set-token-cost +
224 				 *	(length - put-token-cost) -
225 				 *	(length - put-token-cost) * ccount)
226 				 */
227 				cc_initial_scores[i][j] =
228 					- (tt.tt_set_token_cost +
229 					   (i - tt.tt_put_token_cost) -
230 					   (i - tt.tt_put_token_cost) * j);
231 		}
232 	}
233 #ifndef cc_weight
234 	for (i = 1; i < C(cc_wthresholds); i++) {
235 		cc_wthresholds[i] =
236 			((tt.tt_set_token_cost + tt.tt_put_token_cost) / i +
237 				i / 5 + 1) *
238 				cc_weight + 1;
239 		cc_wlimits[i] = cc_wthresholds[i] + cc_weight;
240 	}
241 #endif
242 #undef C
243 	if ((cc_output = (struct cc **)
244 	     malloc((unsigned) cc_bufsize * sizeof *cc_output)) == NULL)
245 		goto nomem;
246 	if ((cc_hashcodes = (short *)
247 	     malloc((unsigned) cc_bufsize * sizeof *cc_hashcodes)) == NULL)
248 		goto nomem;
249 	if ((cc_htab = (struct cc **) malloc(HSIZE * sizeof *cc_htab)) == NULL)
250 		goto nomem;
251 	if ((cc_tokens = (struct cc **)
252 	     malloc((unsigned)
253 	            (cc_ntoken + tt.tt_token_max - tt.tt_token_min + 1) *
254 		    sizeof *cc_tokens)) == NULL)
255 		goto nomem;
256 	if ((cc_undo = (struct cc_undo *)
257 	     malloc((unsigned) cc_bufsize * sizeof *cc_undo)) == NULL)
258 		goto nomem;
259 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++)
260 		if ((cc_places[i] = (short *)
261 		     malloc((unsigned) cc_bufsize * sizeof **cc_places)) == NULL)
262 			goto nomem;
263 	cc_q0a.qforw = cc_q0a.qback = &cc_q0a;
264 	cc_q0b.qforw = cc_q0b.qback = &cc_q0b;
265 	cc_q1a.qforw = cc_q1a.qback = &cc_q1a;
266 	cc_q1b.qforw = cc_q1b.qback = &cc_q1b;
267 	if ((p = (struct cc *) malloc((unsigned) cc_ntoken * sizeof *p)) == NULL)
268 		goto nomem;
269 	for (i = 0; i < tt.tt_ntoken; i++) {
270 		p->code = i;
271 		p->time = -1;
272 		p->qback = cc_q0a.qback;
273 		p->qforw = &cc_q0a;
274 		p->qback->qforw = p;
275 		cc_q0a.qback = p;
276 		p++;
277 	}
278 	for (; i < cc_ntoken; i++) {
279 		p->code = -1;
280 		p->time = -1;
281 		p->qback = cc_q1a.qback;
282 		p->qforw = &cc_q1a;
283 		p->qback->qforw = p;
284 		cc_q1a.qback = p;
285 		p++;
286 	}
287 	cc_tt_ob = tt_ob;
288 	cc_tt_obe = tt_obe;
289 	if ((cc_buffer = malloc((unsigned) cc_bufsize)) == NULL)
290 		goto nomem;
291 	return 0;
292 nomem:
293 	wwerrno = WWE_NOMEM;
294 	return -1;
295 }
296 
297 void
ccstart(void)298 ccstart(void)
299 {
300 	ttflush();
301 	tt_obp = tt_ob = cc_buffer;
302 	tt_obe = tt_ob + cc_bufsize;
303 	tt.tt_flush = ccflush;
304 	if (cc_trace) {
305 		cc_trace_fp = fopen("window-trace", "a");
306 		(void) fcntl(fileno(cc_trace_fp), F_SETFD, 1);
307 	}
308 	ccreset();
309 }
310 
311 void
ccreset(void)312 ccreset(void)
313 {
314 	struct cc *p;
315 
316 	memset((char *) cc_htab, 0, HSIZE * sizeof *cc_htab);
317 	for (p = cc_q0a.qforw; p != &cc_q0a; p = p->qforw)
318 		p->hback = NULL;
319 	for (p = cc_q1a.qforw; p != &cc_q1a; p = p->qforw)
320 		p->hback = NULL;
321 }
322 
323 void
ccend(void)324 ccend(void)
325 {
326 
327 	ttflush();
328 	tt_obp = tt_ob = cc_tt_ob;
329 	tt_obe = cc_tt_obe;
330 	tt.tt_flush = 0;
331 	if (cc_trace_fp != NULL) {
332 		(void) fclose(cc_trace_fp);
333 		cc_trace_fp = NULL;
334 	}
335 }
336 
337 void
ccflush(void)338 ccflush(void)
339 {
340 	int bufsize = tt_obp - tt_ob;
341 	int n;
342 
343 	if (tt_ob != cc_buffer)
344 		abort();
345 	if (cc_trace_fp != NULL) {
346 		(void) fwrite(tt_ob, 1, bufsize, cc_trace_fp);
347 		(void) putc(-1, cc_trace_fp);
348 	}
349 	tt.tt_flush = 0;
350 	(*tt.tt_compress)(1);
351 	if (bufsize < tt.tt_token_min) {
352 		ttflush();
353 		goto out;
354 	}
355 	tt_obp = tt_ob = cc_tt_ob;
356 	tt_obe = cc_tt_obe;
357 	cc_time0 = cc_time;
358 	cc_time += bufsize;
359 	n = cc_sweep_phase(cc_buffer, bufsize, cc_tokens);
360 	cc_compress_phase(cc_output, bufsize, cc_tokens, n);
361 	cc_output_phase(cc_buffer, cc_output, bufsize);
362 	ttflush();
363 	tt_obp = tt_ob = cc_buffer;
364 	tt_obe = cc_buffer + cc_bufsize;
365 out:
366 	(*tt.tt_compress)(0);
367 	tt.tt_flush = ccflush;
368 }
369 
370 int
cc_sweep_phase(char * buffer,int bufsize,struct cc ** tokens)371 cc_sweep_phase(char *buffer, int bufsize, struct cc **tokens)
372 {
373 	struct cc **pp = tokens;
374 	int i, n;
375 #ifdef STATS
376 	int nn, ii;
377 #endif
378 
379 #ifdef STATS
380 	if (verbose >= 0)
381 		time_begin();
382 	if (verbose > 0)
383 		printf("Sweep:");
384 #endif
385 	cc_sweep0(buffer, bufsize, tt.tt_token_min - 1);
386 #ifdef STATS
387 	ntoken_stat = 0;
388 	nn = 0;
389 	ii = 0;
390 #endif
391 	for (i = tt.tt_token_min; i <= tt.tt_token_max; i++) {
392 #ifdef STATS
393 		if (verbose > 0) {
394 			if (ii > 7) {
395 				printf("\n      ");
396 				ii = 0;
397 			}
398 			ii++;
399 			printf(" (%d", i);
400 			(void) fflush(stdout);
401 		}
402 #endif
403 		n = cc_sweep(buffer, bufsize, pp, i);
404 		pp += n;
405 #ifdef STATS
406 		if (verbose > 0) {
407 			if (--n > 0) {
408 				printf(" %d", n);
409 				nn += n;
410 			}
411 			putchar(')');
412 		}
413 #endif
414 	}
415 	qinsertq(&cc_q1b, &cc_q1a);
416 #ifdef STATS
417 	if (verbose > 0)
418 		printf("\n       %d tokens, %d candidates\n",
419 			ntoken_stat, nn);
420 	if (verbose >= 0)
421 		time_end();
422 #endif
423 	return pp - tokens;
424 }
425 
426 void
cc_sweep0(char * buffer,int n,int length)427 cc_sweep0(char *buffer, int n, int length)
428 {
429 	char *p;
430 	short *hc;
431 	int i;
432 	short c;
433 	short pc = tt.tt_padc;
434 
435 	/* n and length are at least 1 */
436 	p = buffer++;
437 	hc = cc_hashcodes;
438 	i = n;
439 	do {
440 		if ((*hc++ = *p++) == pc)
441 			hc[-1] = -1;
442 	} while (--i);
443 	while (--length) {
444 		p = buffer++;
445 		hc = cc_hashcodes;
446 		for (i = n--; --i;) {
447 			if ((c = *p++) == pc || *hc < 0)
448 				c = -1;
449 			else
450 				c = hash(*hc, c);
451 			*hc++ = c;
452 		}
453 	}
454 }
455 
456 int
cc_sweep(char * buffer,int bufsize,struct cc ** tokens,int length)457 cc_sweep(char *buffer, int bufsize, struct cc **tokens, int length)
458 {
459 	struct cc *p;
460 	char *cp;
461 	int i;
462 	short *hc;
463 	short *places = cc_places[length];
464 	struct cc **pp = tokens;
465 	short threshold = thresh(length);
466 #ifndef cc_weight
467 	short wthreshold = wthresh(length);
468 	short limit = wlimit(length);
469 #endif
470 	int time0;
471 	short pc = tt.tt_padc;
472 
473 	i = length - 1;
474 	bufsize -= i;
475 	cp = buffer + i;
476 	hc = cc_hashcodes;
477 	time0 = cc_time0;
478 	for (i = 0; i < bufsize; i++, time0++) {
479 		struct cc **h;
480 
481 		{
482 			short *hc1 = hc;
483 			short c = *cp++;
484 			short hh;
485 			if ((hh = *hc1) < 0 || c == pc) {
486 				*hc1++ = -1;
487 				hc = hc1;
488 				continue;
489 			}
490 			h = cc_htab + (*hc1++ = hash(hh, c));
491 			hc = hc1;
492 		}
493 		for (p = *h; p != NULL; p = p->hforw)
494 			if (p->length == (char) length) {
495 				char *p1 = p->string;
496 				char *p2 = cp - length;
497 				int n = length;
498 				do
499 					if (*p1++ != *p2++)
500 						goto fail;
501 				while (--n);
502 				break;
503 			fail:;
504 			}
505 		if (p == NULL) {
506 			p = cc_q1a.qback;
507 			if (p == &cc_q1a ||
508 			    (p->time >= cc_time0 && p->length == (char) length))
509 				continue;
510 			if (p->hback != NULL)
511 				if ((*p->hback = p->hforw) != NULL)
512 					p->hforw->hback = p->hback;
513 			{
514 				char *p1 = p->string;
515 				char *p2 = cp - length;
516 				int n = length;
517 				do
518 					*p1++ = *p2++;
519 				while (--n);
520 			}
521 			p->length = length;
522 #ifndef cc_weight
523 			p->weight = cc_weight;
524 #endif
525 			p->time = time0;
526 			p->bcount = 1;
527 			p->ccount = 0;
528 			p->flag = 0;
529 			if ((p->hforw = *h) != NULL)
530 				p->hforw->hback = &p->hforw;
531 			*h = p;
532 			p->hback = h;
533 			qinsert(p, &cc_q1a);
534 			places[i] = -1;
535 			p->places = i;
536 #ifdef STATS
537 			ntoken_stat++;
538 #endif
539 		} else if (p->time < cc_time0) {
540 #ifndef cc_weight
541 			if ((p->weight += p->time - time0) < 0)
542 				p->weight = cc_weight;
543 			else if ((p->weight += cc_weight) > limit)
544 				p->weight = limit;
545 #endif
546 			p->time = time0;
547 			p->bcount = 1;
548 			p->ccount = 0;
549 			if (p->code >= 0) {
550 				p->flag = 1;
551 				*pp++ = p;
552 			} else
553 #ifndef cc_weight
554 			if (p->weight >= wthreshold) {
555 				p->flag = 1;
556 				*pp++ = p;
557 				qinsert(p, &cc_q1b);
558 			} else
559 #endif
560 			{
561 				p->flag = 0;
562 				qinsert(p, &cc_q1a);
563 			}
564 			places[i] = -1;
565 			p->places = i;
566 #ifdef STATS
567 			ntoken_stat++;
568 #endif
569 		} else if (p->time + length > time0) {
570 			/*
571 			 * overlapping token, don't count as two and
572 			 * don't update time0, but do adjust weight to offset
573 			 * the difference
574 			 */
575 #ifndef cc_weight
576 			if (cc_weight != 0) {	/* XXX */
577 				p->weight += time0 - p->time;
578 				if (!p->flag && p->weight >= wthreshold) {
579 					p->flag = 1;
580 					*pp++ = p;
581 					qinsert(p, &cc_q1b);
582 				}
583 			}
584 #endif
585 			places[i] = p->places;
586 			p->places = i;
587 		} else {
588 #ifndef cc_weight
589 			if ((p->weight += p->time - time0) < 0)
590 				p->weight = cc_weight;
591 			else if ((p->weight += cc_weight) > limit)
592 				p->weight = limit;
593 #endif
594 			p->time = time0;
595 			p->bcount++;
596 			if (!p->flag &&
597 			    /* code must be < 0 if flag false here */
598 			    (p->bcount >= threshold
599 #ifndef cc_weight
600 			     || p->weight >= wthreshold
601 #endif
602 			     )) {
603 				p->flag = 1;
604 				*pp++ = p;
605 				qinsert(p, &cc_q1b);
606 			}
607 			places[i] = p->places;
608 			p->places = i;
609 		}
610 	}
611 	if ((i = pp - tokens) > 0) {
612 		*pp = NULL;
613 		if (cc_reverse)
614 			cc_sweep_reverse(tokens, places);
615 		if (cc_sort && i > 1) {
616 			qsort((char *) tokens, i, sizeof *tokens,
617 			      cc_token_compare);
618 		}
619 		if (cc_chop) {
620 			if ((i = i * cc_chop / 100) == 0)
621 				i = 1;
622 			tokens[i] = NULL;
623 		}
624 		i++;
625 	}
626 	return i;
627 }
628 
629 void
cc_sweep_reverse(struct cc ** pp,short int * places)630 cc_sweep_reverse(struct cc **pp, short int *places)
631 {
632 	struct cc *p;
633 	short frnt, back, t;
634 
635 	while ((p = *pp++) != NULL) {
636 		back = -1;
637 		t = p->places;
638 		/* the list is never empty */
639 		do {
640 			frnt = places[t];
641 			places[t] = back;
642 			back = t;
643 		} while ((t = frnt) >= 0);
644 		p->places = back;
645 	}
646 }
647 
648 void
cc_compress_phase(struct cc ** output,int bufsize,struct cc ** tokens,int ntoken)649 cc_compress_phase(struct cc **output, int bufsize, struct cc **tokens, int ntoken)
650 {
651 	int i;
652 
653 	memset((char *) output, 0, bufsize * sizeof *output);
654 	for (i = 0; i < cc_npass0; i++)
655 		cc_compress_phase1(output, tokens, ntoken, 0);
656 	for (i = 0; i < cc_npass1; i++)
657 		cc_compress_phase1(output, tokens, ntoken, 1);
658 	cc_compress_cleanup(output, bufsize);
659 }
660 
661 void
cc_compress_phase1(struct cc ** output,struct cc ** tokens,int ntoken,int flag)662 cc_compress_phase1(struct cc **output, struct cc **tokens, int ntoken, int flag)
663 {
664 	struct cc **pp;
665 #ifdef STATS
666 	int i = 0;
667 	int nt = 0, cc = 0, nc = 0;
668 #endif
669 
670 #ifdef STATS
671 	if (verbose >= 0)
672 		time_begin();
673 	if (verbose > 0)
674 		printf("Compress:");
675 #endif
676 	pp = tokens;
677 	while (pp < tokens + ntoken) {
678 #ifdef STATS
679 		if (verbose > 0) {
680 			ntoken_stat = 0;
681 			ccount_stat = 0;
682 			ncover_stat = 0;
683 			if (i > 2) {
684 				printf("\n         ");
685 				i = 0;
686 			}
687 			i++;
688 			printf(" (%d", (*pp)->length);
689 			(void) fflush(stdout);
690 		}
691 #endif
692 		pp += cc_compress(output, pp, flag);
693 #ifdef STATS
694 		if (verbose > 0) {
695 			printf(" %dt %du %dc)", ntoken_stat, ccount_stat,
696 			       ncover_stat);
697 			nt += ntoken_stat;
698 			cc += ccount_stat;
699 			nc += ncover_stat;
700 		}
701 #endif
702 	}
703 #ifdef STATS
704 	if (verbose > 0)
705 		printf("\n   total: (%dt %du %dc)\n", nt, cc, nc);
706 	if (verbose >= 0)
707 		time_end();
708 #endif
709 }
710 
711 void
cc_compress_cleanup(struct cc ** output,int bufsize)712 cc_compress_cleanup(struct cc **output, int bufsize)
713 {
714 	struct cc **end;
715 
716 	/* the previous output phase may have been interrupted */
717 	qinsertq(&cc_q0b, &cc_q0a);
718 	for (end = output + bufsize; output < end;) {
719 		struct cc *p;
720 		int length;
721 		if ((p = *output) == NULL) {
722 			output++;
723 			continue;
724 		}
725 		length = p->length;
726 		if (!p->flag) {
727 		} else if (p->code >= 0) {
728 			qinsert(p, &cc_q0b);
729 			p->flag = 0;
730 		} else if (p->ccount == 0) {
731 			*output = NULL;
732 		} else if (p->ccount >= thresh(length)
733 #ifndef cc_weight
734 			   || wthreshp(p->weight, length)
735 #endif
736 			   ) {
737 			p->flag = 0;
738 		} else {
739 			p->ccount = 0;
740 			*output = NULL;
741 		}
742 		output += length;
743 	}
744 }
745 
746 int
cc_compress(struct cc ** output,struct cc ** tokens,char flag)747 cc_compress(struct cc **output, struct cc **tokens, char flag)
748 {
749 	struct cc **pp = tokens;
750 	struct cc *p = *pp++;
751 	int length = p->length;
752 	int threshold = thresh(length);
753 #ifndef cc_weight
754 	short wthreshold = wthresh(length);
755 #endif
756 	short *places = cc_places[length];
757 	int *initial_scores = cc_initial_scores[length];
758 	int initial_score0 = put_token_score(length);
759 
760 	do {
761 		int score;
762 		struct cc_undo *undop;
763 		int ccount;
764 #ifdef STATS
765 		int ncover;
766 #endif
767 		int i;
768 
769 		ccount = p->ccount;
770 		if ((short) ccount >= p->bcount)
771 			continue;
772 		if (p->code >= 0 || ccount >= threshold)
773 			score = 0;
774 #ifndef cc_weight
775 		else if (p->weight >= wthreshold)
776 			/* allow one fewer match than normal */
777 			/* XXX, should adjust for ccount */
778 			score = - tt.tt_set_token_cost;
779 #endif
780 		else
781 			score = initial_scores[ccount];
782 		undop = cc_undo;
783 #ifdef STATS
784 		ncover = 0;
785 #endif
786 		for (i = p->places; i >= 0; i = places[i]) {
787 			struct cc **jp;
788 			struct cc *x;
789 			struct cc **ip = output + i;
790 			int score0 = initial_score0;
791 			struct cc **iip = ip + length;
792 			struct cc_undo *undop1 = undop;
793 
794 			if ((x = *(jp = ip)) != NULL)
795 				goto z;
796 			while (--jp >= output)
797 				if ((x = *jp) != NULL) {
798 					if (jp + x->length > ip)
799 						goto z;
800 					break;
801 				}
802 			jp = ip + 1;
803 			while (jp < iip) {
804 				if ((x = *jp) == NULL) {
805 					jp++;
806 					continue;
807 				}
808 			z:
809 				if (x == p)
810 					goto undo;
811 #ifdef STATS
812 				ncover++;
813 #endif
814 				undop->pos = jp;
815 				undop->val = x;
816 				undop++;
817 				*jp = NULL;
818 				x->ccount--;
819 				score_adjust(score0, x);
820 				if (score0 < 0 && flag)
821 					goto undo;
822 				jp += x->length;
823 			}
824 			undop->pos = ip;
825 			undop->val = NULL;
826 			undop++;
827 			*ip = p;
828 			ccount++;
829 			score += score0;
830 			continue;
831 		undo:
832 			while (--undop >= undop1)
833 				if ((*undop->pos = x = undop->val))
834 					x->ccount++;
835 			undop++;
836 		}
837 		if (score > 0) {
838 #ifdef STATS
839 			ccount_stat += ccount - p->ccount;
840 			ntoken_stat++;
841 			ncover_stat += ncover;
842 #endif
843 			p->ccount = ccount;
844 		} else {
845 			struct cc_undo *u = cc_undo;
846 			while (--undop >= u) {
847 				struct cc *x;
848 				if ((*undop->pos = x = undop->val))
849 					x->ccount++;
850 			}
851 		}
852 	} while ((p = *pp++) != NULL);
853 	return pp - tokens;
854 }
855 
856 void
cc_output_phase(char * buffer,struct cc ** output,int bufsize)857 cc_output_phase(char *buffer, struct cc **output, int bufsize)
858 {
859 	int i;
860 	struct cc *p, *p1;
861 
862 	for (i = 0; i < bufsize;) {
863 		if ((p = output[i]) == NULL) {
864 			ttputc(buffer[i]);
865 			i++;
866 		} else if (p->code >= 0) {
867 			if (--p->ccount == 0)
868 				qinsert(p, &cc_q0a);
869 			(*tt.tt_put_token)(p->code, p->string, p->length);
870 			wwntokuse++;
871 			wwntoksave += put_token_score(p->length);
872 			i += p->length;
873 		} else if ((p1 = cc_q0a.qback) != &cc_q0a) {
874 			p->code = p1->code;
875 			p1->code = -1;
876 			qinsert(p1, &cc_q1a);
877 			if (--p->ccount == 0)
878 				qinsert(p, &cc_q0a);
879 			else
880 				qinsert(p, &cc_q0b);
881 			(*tt.tt_set_token)(p->code, p->string, p->length);
882 			wwntokdef++;
883 			wwntoksave -= tt.tt_set_token_cost;
884 			i += p->length;
885 		} else {
886 			p->ccount--;
887 			ttwrite(p->string, p->length);
888 			wwntokbad++;
889 			i += p->length;
890 		}
891 	}
892 	wwntokc += bufsize;
893 }
894 
895 int
cc_token_compare(const void * p1,const void * p2)896 cc_token_compare(const void *p1, const void *p2)
897 {
898 	const struct cc * const * vp1 = p1;
899 	const struct cc * const * vp2 = p2;
900 	return (*vp2)->bcount - (*vp1)->bcount;
901 }
902