1 /***********************************************************************
2 This file is part of HA, a general purpose file archiver.
3 Copyright (C) 1995 Harri Hirvola
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 ************************************************************************
19 HA HSC method
20 ***********************************************************************/
21
22 #include <stdlib.h>
23 #include <stdlib.h>
24 #include <stdio.h>
25 #include "ha.h"
26 #include "haio.h"
27 #include "acoder.h"
28 #include "hsc.h"
29 #include "error.h"
30
31 #define IECLIM 32 /* initial escape counter upper limit */
32 #define NECLIM 5 /* no escape expected counter limit */
33 #define NECTLIM 4 /* */
34 #define NECMAX 10 /* no escape expected counter maximum */
35 #define MAXCLEN 4 /* assumed to be 4 in several places */
36 #define NUMCON 10000 /* number of contexts to remember */
37 #define NUMCFB 32760 /* number of frequencies to remember */
38 #define ESCTH 3 /* threshold for escape calculation */
39 #define MAXTVAL 8000 /* maximum frequency value */
40 #define RFMINI 4 /* initial refresh counter value */
41 #define HTLEN 16384 /* length of hash table */
42 #define NIL 0xffff /* NIL pointer in lists */
43 #define ESC 256 /* escape symbol */
44
45 typedef unsigned char Context[4];
46
47 /* model data */
48 static Context curcon; /* current context */
49 static U16B *ht=NULL; /* hash table */
50 static U16B *hp=NULL; /* hash list pointer array */
51 static Context *con=NULL; /* context array */
52 static unsigned char *cl=NULL; /* context length array */
53 static unsigned char *cc=NULL; /* character counts */
54 static U16B *ft=NULL; /* total frequency of context */
55 static unsigned char *fe=NULL; /* frequencys under ESCTH in context */
56 static U16B *elp=NULL; /* expire list previous pointer array */
57 static U16B *eln=NULL; /* expire list next pointer array */
58 static U16B elf,ell; /* first and last of expire list */
59 static unsigned char *rfm=NULL; /* refresh counter array */
60 static U16B *fa=NULL; /* frequency array */
61 static unsigned char *fc=NULL; /* character for frequency array */
62 static U16B *nb=NULL; /* next pointer for frequency array */
63 static U16B fcfbl; /* pointer to free frequency blocks */
64 static U16B nrel; /* context for frequency block release */
65
66 /* frequency mask system */
67 static unsigned char cmask[256]; /* masked characters */
68 static unsigned char cmstack[256]; /* stack of cmask[] entries to clear */
69 static S16B cmsp; /* pointer to cmstack */
70
71 /* escape propability modifying system variables */
72 static unsigned char nec; /* counter for no escape expected */
73 static unsigned char iec[MAXCLEN+1]; /* initial escape counters */
74
75 /* update stack variables */
76 static U16B usp; /* stack pointer */
77 static U16B cps[MAXCLEN+1]; /* context pointers */
78 static U16B as[MAXCLEN+1]; /* indexes to frequency array */
79
80 /* miscalneous */
81 static S16B dropcnt; /* counter for context len drop */
82 static unsigned char maxclen; /* current maximum length for context */
83 static U16B hrt[HTLEN]; /* semi random data for hashing */
84 static U16B hs[MAXCLEN+1]; /* hash stack for context search */
85 static S16B cslen; /* length of context to search */
86
87 /***********************************************************************
88 Cleanup routine
89 ***********************************************************************/
90
hsc_cleanup(void)91 void hsc_cleanup(void) {
92
93 if (ht!=NULL) free(ht),ht=NULL;
94 if (fc!=NULL) free(fc),fc=NULL;
95 if (fa!=NULL) free(fa),fa=NULL;
96 if (ft!=NULL) free(ft),ft=NULL;
97 if (fe!=NULL) free(fe),fe=NULL;
98 if (nb!=NULL) free(nb),nb=NULL;
99 if (hp!=NULL) free(hp),hp=NULL;
100 if (elp!=NULL) free(elp),elp=NULL;
101 if (eln!=NULL) free(eln),eln=NULL;
102 if (cl!=NULL) free(cl),cl=NULL;
103 if (cc!=NULL) free(cc),cc=NULL;
104 if (rfm!=NULL) free(rfm),rfm=NULL;
105 if (con!=NULL) free(con),con=NULL;
106 }
107
108
109 /***********************************************************************
110 System initialization
111 ***********************************************************************/
112
113 static U16B make_context(unsigned char cl, S16B c);
114
init_model(void)115 static void init_model(void) {
116
117 register S16B i;
118 S32B z,l,h,t;
119
120 ht=malloc(HTLEN*sizeof(*ht));
121 hp=malloc(NUMCON*sizeof(*hp));
122 elp=malloc(NUMCON*sizeof(*elp));
123 eln=malloc(NUMCON*sizeof(*eln));
124 cl=malloc(NUMCON*sizeof(*cl));
125 cc=malloc(NUMCON*sizeof(*cc));
126 ft=malloc(NUMCON*sizeof(*ft));
127 fe=malloc(NUMCON*sizeof(*fe));
128 rfm=malloc(NUMCON*sizeof(*rfm));
129 con=malloc(NUMCON*sizeof(*con));
130 fc=malloc(NUMCFB*sizeof(*fc));
131 fa=malloc(NUMCFB*sizeof(*fa));
132 nb=malloc(NUMCFB*sizeof(*nb));
133 if (hp==NULL || elp==NULL || eln==NULL ||
134 cl==NULL || rfm==NULL || con==NULL ||
135 cc==NULL || ft==NULL || fe==NULL ||
136 fc==NULL || fa==NULL || nb==NULL || ht==NULL) {
137 hsc_cleanup();
138 error(1,ERR_MEM,"init_model()");
139 }
140 maxclen=MAXCLEN;
141 iec[0]=(IECLIM>>1);
142 for (i=1;i<=MAXCLEN;++i) iec[i]=(IECLIM>>1)-1;
143 dropcnt=NUMCON/4;
144 nec=0;
145 nrel=0;
146 hs[0]=0;
147 for (i=0;i<HTLEN;++i) ht[i]=NIL;
148 for (i=0;i<NUMCON;++i) {
149 eln[i]=i+1;
150 elp[i]=i-1;
151 cl[i]=0xff;
152 nb[i]=NIL;
153 }
154 elf=0;
155 ell=NUMCON-1;
156 for (i=NUMCON;i<NUMCFB-1;++i) nb[i]=i+1;
157 nb[i]=NIL;
158 fcfbl=NUMCON;
159 curcon[3]=curcon[2]=curcon[1]=curcon[0]=0;
160 cmsp=0;
161 for (i=0;i<256;++i) cmask[i]=0;
162 for (z=10,i=0;i<HTLEN;++i) {
163 h=z/(2147483647L/16807L);
164 l=z%(2147483647L/16807L);
165 if ((t=16807L*l-(2147483647L%16807L)*h)>0) z=t;
166 else z=t+2147483647L;
167 hrt[i]=(U16B)z&(HTLEN-1);
168 }
169 }
170
init_pack(void)171 static void init_pack(void) {
172
173 init_model();
174 ac_init_encode();
175 }
176
init_unpack(void)177 static void init_unpack(void) {
178
179 init_model();
180 ac_init_decode();
181 }
182
183
184 /***********************************************************************
185 Finite context model
186 ***********************************************************************/
187
188 #define HASH(s,l,h) { \
189 h=0; \
190 if (l) h=hrt[s[0]]; \
191 if (l>1) h=hrt[(s[1]+h)&(HTLEN-1)]; \
192 if (l>2) h=hrt[(s[2]+h)&(HTLEN-1)]; \
193 if (l>3) h=hrt[(s[3]+h)&(HTLEN-1)]; \
194 }
195
196 #define move_context(c) curcon[3]=curcon[2],curcon[2]=curcon[1], \
197 curcon[1]=curcon[0],curcon[0]=c
198
release_cfblocks(void)199 static void release_cfblocks(void) {
200
201 register U16B i,j,d;
202
203 do {
204 do if (++nrel==NUMCON) nrel=0; while (nb[nrel]==NIL);
205 for (i=0;i<=usp;++i) if ((cps[i]&0x7fff)==nrel) break;
206 } while (i<=usp);
207 for (i=nb[nrel],d=fa[nrel];i!=NIL;i=nb[i]) if (fa[i]<d) d=fa[i];
208 ++d;
209 if (fa[nrel]<d) {
210 for (i=nb[nrel];fa[i]<d && nb[i]!=NIL;i=nb[i]);
211 fa[nrel]=fa[i];
212 fc[nrel]=fc[i];
213 j=nb[i];
214 nb[i]=fcfbl;
215 fcfbl=nb[nrel];
216 if ((nb[nrel]=j)==NIL) {
217 cc[nrel]=0;
218 fe[nrel]=(ft[nrel]=fa[nrel])<ESCTH?1:0;
219 return;
220 }
221 }
222 fe[nrel]=(ft[nrel]=fa[nrel]/=d)<ESCTH?1:0;
223 cc[nrel]=0;
224 for (j=nrel,i=nb[j];i!=NIL;) {
225 if (fa[i]<d) {
226 nb[j]=nb[i];
227 nb[i]=fcfbl;
228 fcfbl=i;
229 i=nb[j];
230 }
231 else {
232 ++cc[nrel];
233 ft[nrel]+=fa[i]/=d;
234 if (fa[i]<ESCTH) fe[nrel]++;
235 j=i;
236 i=nb[i];
237 }
238 }
239 }
240
make_context(unsigned char conlen,S16B c)241 static U16B make_context(unsigned char conlen, S16B c) {
242
243 register S16B i;
244 register U16B nc,fp;
245
246 nc=ell;
247 ell=elp[nc];
248 elp[elf]=nc;
249 eln[nc]=elf;
250 elf=nc;
251 if (cl[nc]!=0xff) {
252 if (cl[nc]==MAXCLEN && --dropcnt==0) maxclen=MAXCLEN-1;
253 HASH(con[nc],cl[nc],i);
254 if (ht[i]==nc) ht[i]=hp[nc];
255 else {
256 for (i=ht[i];hp[i]!=nc;i=hp[i]);
257 hp[i]=hp[nc];
258 }
259 if (nb[nc]!=NIL) {
260 for (fp=nb[nc];nb[fp]!=NIL;fp=nb[fp]);
261 nb[fp]=fcfbl;
262 fcfbl=nb[nc];
263 }
264 }
265 nb[nc]=NIL;
266 fe[nc]=ft[nc]=fa[nc]=1;
267 fc[nc]=c;
268 rfm[nc]=RFMINI;
269 cc[nc]=0;
270 cl[nc]=conlen;
271 con[nc][0]=curcon[0];
272 con[nc][1]=curcon[1];
273 con[nc][2]=curcon[2];
274 con[nc][3]=curcon[3];
275 HASH(curcon,conlen,i);
276 hp[nc]=ht[i];
277 ht[i]=nc;
278 return nc;
279 }
280
el_movefront(U16B cp)281 static void el_movefront(U16B cp) {
282
283 if (cp==elf) return;
284 if (cp==ell) ell=elp[cp];
285 else {
286 elp[eln[cp]]=elp[cp];
287 eln[elp[cp]]=eln[cp];
288 }
289 elp[elf]=cp;
290 eln[cp]=elf;
291 elf=cp;
292 }
293
add_model(S16B c)294 static void add_model(S16B c) {
295
296 register U16B i;
297 register S16B cp;
298
299 while (usp!=0) {
300 i=as[--usp];
301 cp=cps[usp];
302 if (cp&0x8000) {
303 cp&=0x7fff;
304 if (fcfbl==NIL) release_cfblocks();
305 nb[i]=fcfbl;
306 i=nb[i];
307 fcfbl=nb[fcfbl];
308 nb[i]=NIL;
309 fa[i]=1;
310 fc[i]=c;
311 ++cc[cp];
312 ++fe[cp];
313 }
314 else if (++fa[i]==ESCTH) --fe[cp];
315 if ((fa[i]<<1)<++ft[cp]/(cc[cp]+1)) --rfm[cp];
316 else if (rfm[cp]<RFMINI) ++rfm[cp];
317 if (!rfm[cp] || ft[cp]>=MAXTVAL) {
318 ++rfm[cp];
319 fe[cp]=ft[cp]=0;
320 for (i=cp;i!=NIL;i=nb[i]) {
321 if (fa[i]>1) {
322 ft[cp]+=fa[i]>>=1;
323 if (fa[i]<ESCTH) ++fe[cp];
324 }
325 else {
326 ++ft[cp];
327 ++fe[cp];
328 }
329 }
330 }
331 }
332 }
333
find_next(void)334 static U16B find_next(void) {
335
336 register S16B i,k;
337 register U16B cp;
338
339 for (i=cslen-1;i>=0;--i) {
340 k=hs[i];
341 for (cp=ht[k];cp!=NIL;cp=hp[cp]) {
342 if (cl[cp]==i) {
343 switch (i) {
344 case 4:
345 if (curcon[3]!=con[cp][3]) break;
346 case 3:
347 if (curcon[2]!=con[cp][2]) break;
348 case 2:
349 if (curcon[1]!=con[cp][1]) break;
350 case 1:
351 if (curcon[0]!=con[cp][0]) break;
352 case 0:
353 cslen=i;
354 return cp;
355 }
356 }
357 }
358 }
359 return NIL;
360 }
361
find_longest(void)362 static U16B find_longest(void) {
363
364 hs[1]=hrt[curcon[0]];
365 hs[2]=hrt[(curcon[1]+hs[1])&(HTLEN-1)];
366 hs[3]=hrt[(curcon[2]+hs[2])&(HTLEN-1)];
367 hs[4]=hrt[(curcon[3]+hs[3])&(HTLEN-1)];
368 usp=0;
369 while(cmsp) cmask[cmstack[--cmsp]]=0;
370 cslen=MAXCLEN+1;
371 return find_next();
372 }
373
adj_escape_prob(U16B esc,U16B cp)374 static U16B adj_escape_prob(U16B esc, U16B cp) {
375
376 if (ft[cp]==1) return iec[cl[cp]]>=(IECLIM>>1)?2:1;
377 if (cc[cp]==255) return 1;
378 if (cc[cp] && ((cc[cp]+1)<<1)>=ft[cp]) {
379 esc=(S16B)((S32B)esc*((cc[cp]+1)<<1)/ft[cp]);
380 if (cc[cp]+1==ft[cp]) esc+=(cc[cp]+1)>>1;
381 }
382 return esc?esc:1;
383 }
384
385
code_first(U16B cp,S16B c)386 static S16B code_first(U16B cp, S16B c) {
387
388 register U16B i;
389 register S16B sum,cf,tot,esc;
390
391 sum=cf=0;
392 for (i=cp;i!=NIL;i=nb[i]) {
393 if (fc[i]==c) {
394 cf=fa[i];
395 as[0]=i;
396 break;
397 }
398 sum+=fa[i];
399 }
400 tot=ft[cp];
401 esc=adj_escape_prob(fe[cp],cp);
402 if (nec>=NECLIM) {
403 if (tot<=NECTLIM && nec==NECMAX) {
404 tot<<=2;
405 sum<<=2;
406 cf<<=2;
407 }
408 else {
409 tot<<=1;
410 sum<<=1;
411 cf<<=1;
412 }
413 }
414 usp=1;
415 if (cf==0) {
416 ac_out(tot,tot+esc,tot+esc);
417 for (i=cp;i!=NIL;sum=i,i=nb[i]) {
418 cmstack[cmsp++]=fc[i];
419 cmask[fc[i]]=1;
420 }
421 as[0]=sum; /* sum holds last i ! */
422 nec=0;
423 if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
424 cps[0]=0x8000|cp;
425 return 0;
426 }
427 ac_out(sum,sum+cf,tot+esc);
428 if (nec<NECMAX) ++nec;
429 if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
430 cps[0]=cp;
431 return 1;
432 }
433
434
code_rest(U16B cp,S16B c)435 static S16B code_rest(U16B cp, S16B c) {
436
437 register U16B i;
438 register S16B sum,cf,tot,esc;
439
440 tot=sum=cf=esc=0;
441 for (i=cp;i!=NIL;i=nb[i]) {
442 if (!cmask[fc[i]]) {
443 if (fa[i]<ESCTH) ++esc;
444 if (cf==0 && fc[i]==c) {
445 sum=tot;
446 cf=fa[i];
447 as[usp]=i;
448 }
449 tot+=fa[i];
450 }
451 }
452 esc=adj_escape_prob(esc,cp);
453 if (cf==0) {
454 ac_out(tot,tot+esc,tot+esc);
455 for (i=cp;i!=NIL;sum=i,i=nb[i]) {
456 if (!cmask[fc[i]]) {
457 cmstack[cmsp++]=fc[i];
458 cmask[fc[i]]=1;
459 }
460 }
461 as[usp]=sum; /* sum holds last i ! */
462 if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
463 cps[usp++]=0x8000|cp;
464 return 0;
465 }
466 ac_out(sum,sum+cf,tot+esc);
467 ++nec; /* must add test used in code_first() if NECMAX<5 ! */
468 if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
469 cps[usp++]=cp;
470 return 1;
471 }
472
code_new(S16B c)473 static void code_new(S16B c) {
474
475 register S16B i;
476 register U16B sum,tot;
477
478 sum=0;
479 tot=257-cmsp;
480 for (i=0;i<c;++i) sum+=1-cmask[i];
481 ac_out(sum,sum+1,tot);
482 }
483
decode_first(U16B cp)484 static S16B decode_first(U16B cp) {
485
486 register U16B c;
487 register U16B tv;
488 register U16B i;
489 register S16B sum,tot,esc,cf;
490 register unsigned char sv;
491
492 esc=adj_escape_prob(fe[cp],cp);
493 tot=ft[cp];
494 if (nec>=NECLIM) {
495 if (tot<=NECTLIM && nec==NECMAX) sv=2;
496 else sv=1;
497 tot<<=sv;
498 tv=ac_threshold_val(tot+esc)>>sv;
499 for (c=cp,sum=0;;c=nb[c]) {
500 if (c==NIL) break;
501 if (sum+fa[c]<=tv) sum+=fa[c];
502 else {
503 cf=fa[c]<<sv;
504 break;
505 }
506 }
507 sum<<=sv;
508 }
509 else {
510 tv=ac_threshold_val(tot+esc);
511 for (c=cp,sum=0;;c=nb[c]) {
512 if (c==NIL) break;
513 if (sum+fa[c]<=tv) sum+=fa[c];
514 else {
515 cf=fa[c];
516 break;
517 }
518 }
519 }
520 usp=1;
521 if (c!=NIL) {
522 ac_in(sum,sum+cf,tot+esc);
523 if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
524 as[0]=c;
525 cps[0]=cp;
526 c=fc[c];
527 if (nec<NECMAX) ++nec;
528 }
529 else {
530 ac_in(tot,tot+esc,tot+esc);
531 if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
532 for (i=cp;i!=NIL;sum=i,i=nb[i]) {
533 cmstack[cmsp++]=fc[i];
534 cmask[fc[i]]=1;
535 }
536 cps[0]=0x8000|cp;
537 as[0]=sum;
538 c=ESC;
539 nec=0;
540 }
541 return c;
542 }
543
decode_rest(U16B cp)544 static S16B decode_rest(U16B cp) {
545
546 register U16B c;
547 register U16B tv;
548 register U16B i;
549 register S16B sum,tot,esc,cf;
550
551 esc=tot=0;
552 for (i=cp;i!=NIL;i=nb[i]) {
553 if (!cmask[fc[i]]) {
554 tot+=fa[i];
555 if (fa[i]<ESCTH) ++esc;
556 }
557 }
558 esc=adj_escape_prob(esc,cp);
559 tv=ac_threshold_val(tot+esc);
560 for (c=cp,sum=0;;c=nb[c]) {
561 if (c==NIL) break;
562 if (!cmask[fc[c]]) {
563 if (sum+fa[c]<=tv) sum+=fa[c];
564 else {
565 cf=fa[c];
566 break;
567 }
568 }
569 }
570 if (c!=NIL) {
571 ac_in(sum,sum+cf,tot+esc);
572 if (ft[cp]==1 && iec[cl[cp]]) --iec[cl[cp]];
573 as[usp]=c;
574 cps[usp++]=cp;
575 c=fc[c];
576 ++nec; /* must add test used in code_first() if NECMAX<5 ! */
577 }
578 else {
579 ac_in(tot,tot+esc,tot+esc);
580 if (ft[cp]==1 && iec[cl[cp]]<IECLIM) ++iec[cl[cp]];
581 for (i=cp;i!=NIL;sum=i,i=nb[i]) {
582 if (!cmask[fc[i]]) {
583 cmstack[cmsp++]=fc[i];
584 cmask[fc[i]]=1;
585 }
586 }
587 cps[usp]=0x8000|cp;
588 as[usp++]=sum; /* sum holds last i !! */
589 c=ESC;
590 }
591 return c;
592 }
593
decode_new(void)594 static S16B decode_new(void) {
595
596 register S16B c;
597 register U16B tv,sum,tot;
598
599 tot=257-cmsp;
600 tv=ac_threshold_val(tot);
601 for (c=sum=0;c<256;++c) {
602 if (cmask[c]) continue;
603 if (sum+1<=tv) ++sum;
604 else break;
605 }
606 ac_in(sum,sum+1,tot);
607 return c;
608 }
609
610 #define code_byte(cp,c) (cmsp?code_rest(cp,c):code_first(cp,c))
611 #define decode_byte(cp) (cmsp?decode_rest(cp):decode_first(cp))
612
613 /***********************************************************************
614 Encoding
615 ***********************************************************************/
616
hsc_pack(void)617 void hsc_pack(void) {
618
619 S16B c;
620 U16B cp;
621 unsigned char ncmax,ncmin;
622
623 init_pack();
624 for (;(c=getbyte())>=0;) {
625 cp=find_longest();
626 ncmin=cp==NIL?0:cl[cp]+1;
627 ncmax=maxclen+1;
628 for(;;) {
629 if (cp==NIL) {
630 code_new(c);
631 break;
632 }
633 if (code_byte(cp,c)) {
634 el_movefront(cp);
635 break;
636 }
637 cp=find_next();
638 }
639 add_model(c);
640 while (ncmax>ncmin) make_context(--ncmax,c);
641 move_context(c);
642 }
643 cp=find_longest();
644 while (cp!=NIL) {
645 code_byte(cp,ESC);
646 cp=find_next();
647 }
648 code_new(ESC);
649 ac_end_encode();
650 hsc_cleanup();
651 }
652
653 /***********************************************************************
654 Decoding
655 ***********************************************************************/
656
hsc_unpack(void)657 void hsc_unpack(void) {
658
659 S16B c;
660 U16B cp;
661 unsigned char ncmax,ncmin;
662
663 init_unpack();
664 for (;;) {
665 cp=find_longest();
666 ncmin=cp==NIL?0:cl[cp]+1;
667 ncmax=maxclen+1;
668 for(;;) {
669 if (cp==NIL) {
670 c=decode_new();
671 break;
672 }
673 if ((c=decode_byte(cp))!=ESC) {
674 el_movefront(cp);
675 break;
676 }
677 cp=find_next();
678 }
679 if (c==ESC) break;
680 add_model(c);
681 while (ncmax>ncmin) make_context(--ncmax,c);
682 putbyte(c);
683 move_context(c);
684 }
685 flush();
686 hsc_cleanup();
687 }
688
689