1 /**************************************************************
2 lzhuf.c
3 written by Haruyasu Yoshizaki 11/20/1988
4 some minor changes 4/6/1989
5 comments translated by Haruhiko Okumura 4/7/1989
6 **************************************************************/
7
8 /*
9 LZHUF.C (c)1989 by Haruyasu Yoshizaki, Haruhiko Okumura, and Kenji Rikitake.
10 All rights reserved. Permission granted for non-commercial use.
11 */
12
13 #include <cstring>
14 #include <cstdlib>
15 #include "xr_lzhuf.h"
16
17 using namespace xray_re;
18
19 /********** LZHUF compression **********/
20
21
InitTree(void)22 void _lzhuf::InitTree(void) /* initialize trees */
23 {
24 int i;
25
26 for (i = N + 1; i <= N + 256; i++)
27 rson[i] = NIL; /* root */
28 for (i = 0; i < N; i++)
29 dad[i] = NIL; /* node */
30 }
31
InsertNode(int r)32 void _lzhuf::InsertNode(int r) /* insert to tree */
33 {
34 int i, p, cmp;
35 unsigned char *key;
36 unsigned c;
37
38 cmp = 1;
39 key = &text_buf[r];
40 p = N + 1 + key[0];
41 rson[r] = lson[r] = NIL;
42 match_length = 0;
43 for ( ; ; ) {
44 if (cmp >= 0) {
45 if (rson[p] != NIL)
46 p = rson[p];
47 else {
48 rson[p] = r;
49 dad[r] = p;
50 return;
51 }
52 } else {
53 if (lson[p] != NIL)
54 p = lson[p];
55 else {
56 lson[p] = r;
57 dad[r] = p;
58 return;
59 }
60 }
61 for (i = 1; i < F; i++)
62 if ((cmp = key[i] - text_buf[p + i]) != 0)
63 break;
64 if (i > THRESHOLD) {
65 if (i > match_length) {
66 match_position = ((r - p) & (N - 1)) - 1;
67 if ((match_length = i) >= F)
68 break;
69 }
70 if (i == match_length) {
71 if ((c = ((r - p) & (N - 1)) - 1) < (unsigned)match_position) {
72 match_position = c;
73 }
74 }
75 }
76 }
77 dad[r] = dad[p];
78 lson[r] = lson[p];
79 rson[r] = rson[p];
80 dad[lson[p]] = r;
81 dad[rson[p]] = r;
82 if (rson[dad[p]] == p)
83 rson[dad[p]] = r;
84 else
85 lson[dad[p]] = r;
86 dad[p] = NIL; /* remove p */
87 }
88
DeleteNode(int p)89 void _lzhuf::DeleteNode(int p) /* remove from tree */
90 {
91 int q;
92
93 if (dad[p] == NIL)
94 return; /* not registered */
95 if (rson[p] == NIL)
96 q = lson[p];
97 else
98 if (lson[p] == NIL)
99 q = rson[p];
100 else {
101 q = lson[p];
102 if (rson[q] != NIL) {
103 do {
104 q = rson[q];
105 } while (rson[q] != NIL);
106 rson[dad[q]] = lson[q];
107 dad[lson[q]] = dad[q];
108 lson[q] = lson[p];
109 dad[lson[p]] = q;
110 }
111 rson[q] = rson[p];
112 dad[rson[p]] = q;
113 }
114 dad[q] = dad[p];
115 if (rson[dad[p]] == p)
116 rson[dad[p]] = q;
117 else
118 lson[dad[p]] = q;
119 dad[p] = NIL;
120 }
121
122 /* Huffman coding */
123
124
125
126 /* table for encoding and decoding the upper 6 bits of position */
127
128 /* for encoding */
129 const uint8_t _lzhuf::p_len[64] = {
130 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
131 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
132 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
133 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
134 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
135 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
136 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
137 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
138 };
139
140 const uint8_t _lzhuf::p_code[64] = {
141 0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
142 0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
143 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
144 0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
145 0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
146 0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
147 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
148 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
149 };
150
151 /* for decoding */
152 const uint8_t _lzhuf::d_code[256] = {
153 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
154 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
155 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
156 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
157 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
158 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
159 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
160 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
161 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
162 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
163 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
164 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
165 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
166 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
167 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
168 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
169 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
170 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
171 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
172 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
173 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
174 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
175 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
176 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
177 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
178 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
179 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
180 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
181 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
182 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
183 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
184 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
185 };
186
187 const uint8_t _lzhuf::d_len[256] = {
188 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
189 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
190 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
191 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
192 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
193 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
194 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
195 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
196 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
197 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
198 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
199 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
200 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
201 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
202 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
203 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
204 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
205 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
206 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
207 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
208 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
209 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
210 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
211 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
212 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
213 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
214 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
215 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
216 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
217 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
218 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
219 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
220 };
221
GetBit(void)222 int _lzhuf::GetBit(void) /* get one bit */
223 {
224 int i;
225
226 while (getlen <= 8) {
227 if ((i = getc()) < 0) i = 0;
228 getbuf |= i << (8 - getlen);
229 getlen += 8;
230 }
231 i = getbuf;
232 getbuf <<= 1;
233 getlen--;
234 return (i >> 15) & 1;
235 }
236
GetByte(void)237 int _lzhuf::GetByte(void) /* get one byte */
238 {
239 unsigned i;
240
241 while (getlen <= 8) {
242 int c = getc();
243 i = (c < 0) ? 0 : c;
244 getbuf |= i << (8 - getlen);
245 getlen += 8;
246 }
247 i = getbuf;
248 getbuf <<= 8;
249 getlen -= 8;
250 return (i & 0xff00) >> 8;
251 }
252
Putcode(int l,unsigned c)253 void _lzhuf::Putcode(int l, unsigned c) /* output c bits of code */
254 {
255 putbuf |= c >> putlen;
256 if ((putlen += l) >= 8) {
257 putc(putbuf >> 8);
258 if ((putlen -= 8) >= 8) {
259 putc(putbuf);
260 codesize += 2;
261 putlen -= 8;
262 putbuf = c << (l - putlen);
263 } else {
264 putbuf <<= 8;
265 codesize++;
266 }
267 }
268 }
269
270
271 /* initialization of tree */
272
StartHuff(void)273 void _lzhuf::StartHuff(void)
274 {
275 int i, j;
276
277 for (i = 0; i < N_CHAR; i++) {
278 freq[i] = 1;
279 son[i] = i + T;
280 prnt[i + T] = i;
281 }
282 i = 0; j = N_CHAR;
283 while (j <= R) {
284 freq[j] = freq[i] + freq[i + 1];
285 son[j] = i;
286 prnt[i] = prnt[i + 1] = j;
287 i += 2; j++;
288 }
289 freq[T] = 0xffff;
290 prnt[R] = 0;
291 }
292
293
294 /* reconstruction of tree */
295
reconst(void)296 void _lzhuf::reconst(void)
297 {
298 int i, j, k;
299 unsigned f, l;
300
301 /* collect leaf nodes in the first half of the table */
302 /* and replace the freq by (freq + 1) / 2. */
303 j = 0;
304 for (i = 0; i < T; i++) {
305 if (son[i] >= T) {
306 freq[j] = (freq[i] + 1) / 2;
307 son[j] = son[i];
308 j++;
309 }
310 }
311 /* begin constructing tree by connecting sons */
312 for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
313 k = i + 1;
314 f = freq[j] = freq[i] + freq[k];
315 for (k = j - 1; f < freq[k]; k--);
316 k++;
317 l = (j - k) * sizeof(freq[0]);
318 std::memmove(&freq[k + 1], &freq[k], l);
319 freq[k] = f;
320 std::memmove(&son[k + 1], &son[k], l);
321 son[k] = i;
322 }
323 /* connect prnt */
324 for (i = 0; i < T; i++) {
325 if ((k = son[i]) >= T) {
326 prnt[k] = i;
327 } else {
328 prnt[k] = prnt[k + 1] = i;
329 }
330 }
331 }
332
333
334 /* increment frequency of given code by one, and update tree */
335
update(int c)336 void _lzhuf::update(int c)
337 {
338 int i, j, k, l;
339
340 if (freq[R] == MAX_FREQ) {
341 reconst();
342 }
343 c = prnt[c + T];
344 do {
345 k = ++freq[c];
346
347 /* if the order is disturbed, exchange nodes */
348 if ((unsigned)k > freq[l = c + 1]) {
349 while ((unsigned)k > freq[++l]);
350 l--;
351 freq[c] = freq[l];
352 freq[l] = k;
353
354 i = son[c];
355 prnt[i] = l;
356 if (i < T) prnt[i + 1] = l;
357
358 j = son[l];
359 son[l] = i;
360
361 prnt[j] = c;
362 if (j < T) prnt[j + 1] = c;
363 son[c] = j;
364
365 c = l;
366 }
367 } while ((c = prnt[c]) != 0); /* repeat up to root */
368 }
369
EncodeChar(unsigned c)370 void _lzhuf::EncodeChar(unsigned c)
371 {
372 unsigned i;
373 int j, k;
374
375 i = 0;
376 j = 0;
377 k = prnt[c + T];
378
379 /* travel from leaf to root */
380 do {
381 i >>= 1;
382
383 /* if node's address is odd-numbered, choose bigger brother node */
384 if (k & 1) i += 0x8000;
385
386 j++;
387 } while ((k = prnt[k]) != R);
388 Putcode(j, i);
389 // code = i;
390 // len = j;
391 update(c);
392 }
393
EncodePosition(unsigned c)394 void _lzhuf::EncodePosition(unsigned c)
395 {
396 unsigned i;
397
398 /* output upper 6 bits by table lookup */
399 i = c >> 6;
400 Putcode(p_len[i], (unsigned)p_code[i] << 8);
401
402 /* output lower 6 bits verbatim */
403 Putcode(6, (c & 0x3f) << 10);
404 }
405
EncodeEnd(void)406 void _lzhuf::EncodeEnd(void)
407 {
408 if (putlen) {
409 putc(putbuf >> 8);
410 codesize++;
411 }
412 }
413
DecodeChar(void)414 int _lzhuf::DecodeChar(void)
415 {
416 unsigned c;
417
418 c = son[R];
419
420 /* travel from root to leaf, */
421 /* choosing the smaller child node (son[]) if the read bit is 0, */
422 /* the bigger (son[]+1} if 1 */
423 while (c < T) {
424 c += GetBit();
425 c = son[c];
426 }
427 c -= T;
428 update(c);
429 return c;
430 }
431
DecodePosition(void)432 int _lzhuf::DecodePosition(void)
433 {
434 unsigned i, j, c;
435
436 /* recover upper 6 bits from table */
437 i = GetByte();
438 c = (unsigned)d_code[i] << 6;
439 j = d_len[i];
440
441 /* read lower 6 bits verbatim */
442 j -= 2;
443 while (j--) {
444 i = (i << 1) + GetBit();
445 }
446 return c | (i & 0x3f);
447 }
448
449 /* compression */
450
Encode(uint8_t * & _code,size_t & _codesize,const uint8_t * _text,size_t _textsize)451 void _lzhuf::Encode(uint8_t*& _code, size_t& _codesize, const uint8_t* _text, size_t _textsize) /* compression */
452 {
453 int i, c, len, r, s, last_match_length;
454
455 m_src_limit = _textsize;
456 m_src_pos = 0;
457 m_src = _text;
458
459 m_dest_limit = _textsize/2;
460 m_dest_pos = 4;
461 m_dest = static_cast<uint8_t*>(malloc(m_dest_limit));
462 *(uint32_t*)m_dest = uint32_t(_textsize);
463
464 putbuf = 0;
465 putlen = 0;
466
467 StartHuff();
468 InitTree();
469 s = 0;
470 r = N - F;
471 for (i = s; i < r; i++)
472 text_buf[i] = ' ';
473 for (len = 0; len < F && (c = getc()) >= 0; len++)
474 text_buf[r + len] = c;
475 textsize = len;
476 for (i = 1; i <= F; i++)
477 InsertNode(r - i);
478 InsertNode(r);
479 do {
480 if (match_length > len)
481 match_length = len;
482 if (match_length <= THRESHOLD) {
483 match_length = 1;
484 EncodeChar(text_buf[r]);
485 } else {
486 EncodeChar(255 - THRESHOLD + match_length);
487 EncodePosition(match_position);
488 }
489 last_match_length = match_length;
490 for (i = 0; i < last_match_length && (c = getc()) >= 0; i++) {
491 DeleteNode(s);
492 text_buf[s] = c;
493 if (s < F - 1)
494 text_buf[s + N] = c;
495 s = (s + 1) & (N - 1);
496 r = (r + 1) & (N - 1);
497 InsertNode(r);
498 }
499 while (i++ < last_match_length) {
500 DeleteNode(s);
501 s = (s + 1) & (N - 1);
502 r = (r + 1) & (N - 1);
503 if (--len) InsertNode(r);
504 }
505 } while (len > 0);
506 EncodeEnd();
507 _code = m_dest;
508 _codesize = m_dest_pos;
509 }
510
Decode(uint8_t * & _text,size_t & _textsize,const uint8_t * _code,size_t _codesize)511 void _lzhuf::Decode(uint8_t*& _text, size_t& _textsize, const uint8_t* _code, size_t _codesize) /* recover */
512 {
513 int i, j, k, r, c;
514 size_t count;
515
516 m_dest_limit = textsize = *(uint32_t*)_code;
517 m_dest = static_cast<uint8_t*>(malloc(m_dest_limit));
518 m_dest_pos = 0;
519
520 m_src_limit = codesize = _codesize;
521 m_src = _code;
522 m_src_pos = 4;
523
524 getbuf = 0;
525 getlen = 0;
526
527 StartHuff();
528 for (i = 0; i < N - F; i++)
529 text_buf[i] = ' ';
530 r = N - F;
531 for (count = 0; count < textsize; ) {
532 c = DecodeChar();
533 if (c < 256) {
534 putc(c);
535 text_buf[r++] = c;
536 r &= (N - 1);
537 count++;
538 } else {
539 i = (r - DecodePosition() - 1) & (N - 1);
540 j = c - 255 + THRESHOLD;
541 for (k = 0; k < j; k++) {
542 c = text_buf[(i + k) & (N - 1)];
543 putc(c);
544 text_buf[r++] = c;
545 r &= (N - 1);
546 count++;
547 }
548 }
549 }
550 xr_assert(m_dest_pos == textsize);
551 _text = m_dest;
552 _textsize = textsize;
553 }
554
getc()555 int _lzhuf::getc()
556 {
557 if (m_src_pos < m_src_limit)
558 return m_src[m_src_pos++];
559 else
560 return -1;
561 }
562
putc(int c)563 void _lzhuf::putc(int c)
564 {
565 if (m_dest_pos >= m_dest_limit) {
566 m_dest_limit = m_dest_pos*2;
567 m_dest = static_cast<uint8_t*>(realloc(m_dest, m_dest_limit));
568
569 }
570 xr_assert(m_dest_pos < m_dest_limit);
571 m_dest[m_dest_pos++] = c;
572 }
573
instance()574 xr_lzhuf* xr_lzhuf::instance()
575 {
576 static xr_lzhuf instance;
577 return &instance;
578 }
579