1 /**************************************************************
2 	lzhuf.c
3 	written by Haruyasu Yoshizaki 11/20/1988
4 	some minor changes 4/6/1989
5 	comments translated by Haruhiko Okumura 4/7/1989
6 **************************************************************/
7 
8 /*
9 LZHUF.C (c)1989 by Haruyasu Yoshizaki, Haruhiko Okumura, and Kenji Rikitake.
10 All rights reserved. Permission granted for non-commercial use.
11 */
12 
13 #include <cstring>
14 #include <cstdlib>
15 #include "xr_lzhuf.h"
16 
17 using namespace xray_re;
18 
19 /********** LZHUF compression **********/
20 
21 
InitTree(void)22 void _lzhuf::InitTree(void)  /* initialize trees */
23 {
24 	int  i;
25 
26 	for (i = N + 1; i <= N + 256; i++)
27 		rson[i] = NIL;			/* root */
28 	for (i = 0; i < N; i++)
29 		dad[i] = NIL;			/* node */
30 }
31 
InsertNode(int r)32 void _lzhuf::InsertNode(int r)  /* insert to tree */
33 {
34 	int  i, p, cmp;
35 	unsigned char  *key;
36 	unsigned c;
37 
38 	cmp = 1;
39 	key = &text_buf[r];
40 	p = N + 1 + key[0];
41 	rson[r] = lson[r] = NIL;
42 	match_length = 0;
43 	for ( ; ; ) {
44 		if (cmp >= 0) {
45 			if (rson[p] != NIL)
46 				p = rson[p];
47 			else {
48 				rson[p] = r;
49 				dad[r] = p;
50 				return;
51 			}
52 		} else {
53 			if (lson[p] != NIL)
54 				p = lson[p];
55 			else {
56 				lson[p] = r;
57 				dad[r] = p;
58 				return;
59 			}
60 		}
61 		for (i = 1; i < F; i++)
62 			if ((cmp = key[i] - text_buf[p + i]) != 0)
63 				break;
64 		if (i > THRESHOLD) {
65 			if (i > match_length) {
66 				match_position = ((r - p) & (N - 1)) - 1;
67 				if ((match_length = i) >= F)
68 					break;
69 			}
70 			if (i == match_length) {
71 				if ((c = ((r - p) & (N - 1)) - 1) < (unsigned)match_position) {
72 					match_position = c;
73 				}
74 			}
75 		}
76 	}
77 	dad[r] = dad[p];
78 	lson[r] = lson[p];
79 	rson[r] = rson[p];
80 	dad[lson[p]] = r;
81 	dad[rson[p]] = r;
82 	if (rson[dad[p]] == p)
83 		rson[dad[p]] = r;
84 	else
85 		lson[dad[p]] = r;
86 	dad[p] = NIL;  /* remove p */
87 }
88 
DeleteNode(int p)89 void _lzhuf::DeleteNode(int p)  /* remove from tree */
90 {
91 	int  q;
92 
93 	if (dad[p] == NIL)
94 		return;			/* not registered */
95 	if (rson[p] == NIL)
96 		q = lson[p];
97 	else
98 	if (lson[p] == NIL)
99 		q = rson[p];
100 	else {
101 		q = lson[p];
102 		if (rson[q] != NIL) {
103 			do {
104 				q = rson[q];
105 			} while (rson[q] != NIL);
106 			rson[dad[q]] = lson[q];
107 			dad[lson[q]] = dad[q];
108 			lson[q] = lson[p];
109 			dad[lson[p]] = q;
110 		}
111 		rson[q] = rson[p];
112 		dad[rson[p]] = q;
113 	}
114 	dad[q] = dad[p];
115 	if (rson[dad[p]] == p)
116 		rson[dad[p]] = q;
117 	else
118 		lson[dad[p]] = q;
119 	dad[p] = NIL;
120 }
121 
122 /* Huffman coding */
123 
124 
125 
126 /* table for encoding and decoding the upper 6 bits of position */
127 
128 /* for encoding */
129 const uint8_t _lzhuf::p_len[64] = {
130 	0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05,
131 	0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06,
132 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
133 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
134 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
135 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
136 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
137 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
138 };
139 
140 const uint8_t _lzhuf::p_code[64] = {
141 	0x00, 0x20, 0x30, 0x40, 0x50, 0x58, 0x60, 0x68,
142 	0x70, 0x78, 0x80, 0x88, 0x90, 0x94, 0x98, 0x9C,
143 	0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC,
144 	0xC0, 0xC2, 0xC4, 0xC6, 0xC8, 0xCA, 0xCC, 0xCE,
145 	0xD0, 0xD2, 0xD4, 0xD6, 0xD8, 0xDA, 0xDC, 0xDE,
146 	0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
147 	0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
148 	0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF
149 };
150 
151 /* for decoding */
152 const uint8_t _lzhuf::d_code[256] = {
153 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
154 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
155 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
156 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
157 	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
158 	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
159 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
160 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
161 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
162 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
163 	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
164 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
165 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
166 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
167 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
168 	0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
169 	0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
170 	0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
171 	0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
172 	0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
173 	0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
174 	0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
175 	0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
176 	0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
177 	0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
178 	0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
179 	0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
180 	0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
181 	0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
182 	0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
183 	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
184 	0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
185 };
186 
187 const uint8_t _lzhuf::d_len[256] = {
188 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
189 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
190 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
191 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
192 	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
193 	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
194 	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
195 	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
196 	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
197 	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
198 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
199 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
200 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
201 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
202 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
203 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
204 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
205 	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
206 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
207 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
208 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
209 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
210 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
211 	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
212 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
213 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
214 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
215 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
216 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
217 	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
218 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
219 	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
220 };
221 
GetBit(void)222 int _lzhuf::GetBit(void)	/* get one bit */
223 {
224 	int i;
225 
226 	while (getlen <= 8) {
227 		if ((i = getc()) < 0) i = 0;
228 		getbuf |= i << (8 - getlen);
229 		getlen += 8;
230 	}
231 	i = getbuf;
232 	getbuf <<= 1;
233 	getlen--;
234 	return (i >> 15) & 1;
235 }
236 
GetByte(void)237 int _lzhuf::GetByte(void)	/* get one byte */
238 {
239 	unsigned i;
240 
241 	while (getlen <= 8) {
242 		int c = getc();
243 		i = (c < 0) ? 0 : c;
244 		getbuf |= i << (8 - getlen);
245 		getlen += 8;
246 	}
247 	i = getbuf;
248 	getbuf <<= 8;
249 	getlen -= 8;
250 	return (i & 0xff00) >> 8;
251 }
252 
Putcode(int l,unsigned c)253 void _lzhuf::Putcode(int l, unsigned c)		/* output c bits of code */
254 {
255 	putbuf |= c >> putlen;
256 	if ((putlen += l) >= 8) {
257 		putc(putbuf >> 8);
258 		if ((putlen -= 8) >= 8) {
259 			putc(putbuf);
260 			codesize += 2;
261 			putlen -= 8;
262 			putbuf = c << (l - putlen);
263 		} else {
264 			putbuf <<= 8;
265 			codesize++;
266 		}
267 	}
268 }
269 
270 
271 /* initialization of tree */
272 
StartHuff(void)273 void _lzhuf::StartHuff(void)
274 {
275 	int i, j;
276 
277 	for (i = 0; i < N_CHAR; i++) {
278 		freq[i] = 1;
279 		son[i] = i + T;
280 		prnt[i + T] = i;
281 	}
282 	i = 0; j = N_CHAR;
283 	while (j <= R) {
284 		freq[j] = freq[i] + freq[i + 1];
285 		son[j] = i;
286 		prnt[i] = prnt[i + 1] = j;
287 		i += 2; j++;
288 	}
289 	freq[T] = 0xffff;
290 	prnt[R] = 0;
291 }
292 
293 
294 /* reconstruction of tree */
295 
reconst(void)296 void _lzhuf::reconst(void)
297 {
298 	int i, j, k;
299 	unsigned f, l;
300 
301 	/* collect leaf nodes in the first half of the table */
302 	/* and replace the freq by (freq + 1) / 2. */
303 	j = 0;
304 	for (i = 0; i < T; i++) {
305 		if (son[i] >= T) {
306 			freq[j] = (freq[i] + 1) / 2;
307 			son[j] = son[i];
308 			j++;
309 		}
310 	}
311 	/* begin constructing tree by connecting sons */
312 	for (i = 0, j = N_CHAR; j < T; i += 2, j++) {
313 		k = i + 1;
314 		f = freq[j] = freq[i] + freq[k];
315 		for (k = j - 1; f < freq[k]; k--);
316 		k++;
317 		l = (j - k) * sizeof(freq[0]);
318 		std::memmove(&freq[k + 1], &freq[k], l);
319 		freq[k] = f;
320 		std::memmove(&son[k + 1], &son[k], l);
321 		son[k] = i;
322 	}
323 	/* connect prnt */
324 	for (i = 0; i < T; i++) {
325 		if ((k = son[i]) >= T) {
326 			prnt[k] = i;
327 		} else {
328 			prnt[k] = prnt[k + 1] = i;
329 		}
330 	}
331 }
332 
333 
334 /* increment frequency of given code by one, and update tree */
335 
update(int c)336 void _lzhuf::update(int c)
337 {
338 	int i, j, k, l;
339 
340 	if (freq[R] == MAX_FREQ) {
341 		reconst();
342 	}
343 	c = prnt[c + T];
344 	do {
345 		k = ++freq[c];
346 
347 		/* if the order is disturbed, exchange nodes */
348 		if ((unsigned)k > freq[l = c + 1]) {
349 			while ((unsigned)k > freq[++l]);
350 			l--;
351 			freq[c] = freq[l];
352 			freq[l] = k;
353 
354 			i = son[c];
355 			prnt[i] = l;
356 			if (i < T) prnt[i + 1] = l;
357 
358 			j = son[l];
359 			son[l] = i;
360 
361 			prnt[j] = c;
362 			if (j < T) prnt[j + 1] = c;
363 			son[c] = j;
364 
365 			c = l;
366 		}
367 	} while ((c = prnt[c]) != 0);	/* repeat up to root */
368 }
369 
EncodeChar(unsigned c)370 void _lzhuf::EncodeChar(unsigned c)
371 {
372 	unsigned i;
373 	int j, k;
374 
375 	i = 0;
376 	j = 0;
377 	k = prnt[c + T];
378 
379 	/* travel from leaf to root */
380 	do {
381 		i >>= 1;
382 
383 		/* if node's address is odd-numbered, choose bigger brother node */
384 		if (k & 1) i += 0x8000;
385 
386 		j++;
387 	} while ((k = prnt[k]) != R);
388 	Putcode(j, i);
389 //	code = i;
390 //	len = j;
391 	update(c);
392 }
393 
EncodePosition(unsigned c)394 void _lzhuf::EncodePosition(unsigned c)
395 {
396 	unsigned i;
397 
398 	/* output upper 6 bits by table lookup */
399 	i = c >> 6;
400 	Putcode(p_len[i], (unsigned)p_code[i] << 8);
401 
402 	/* output lower 6 bits verbatim */
403 	Putcode(6, (c & 0x3f) << 10);
404 }
405 
EncodeEnd(void)406 void _lzhuf::EncodeEnd(void)
407 {
408 	if (putlen) {
409 		putc(putbuf >> 8);
410 		codesize++;
411 	}
412 }
413 
DecodeChar(void)414 int _lzhuf::DecodeChar(void)
415 {
416 	unsigned c;
417 
418 	c = son[R];
419 
420 	/* travel from root to leaf, */
421 	/* choosing the smaller child node (son[]) if the read bit is 0, */
422 	/* the bigger (son[]+1} if 1 */
423 	while (c < T) {
424 		c += GetBit();
425 		c = son[c];
426 	}
427 	c -= T;
428 	update(c);
429 	return c;
430 }
431 
DecodePosition(void)432 int _lzhuf::DecodePosition(void)
433 {
434 	unsigned i, j, c;
435 
436 	/* recover upper 6 bits from table */
437 	i = GetByte();
438 	c = (unsigned)d_code[i] << 6;
439 	j = d_len[i];
440 
441 	/* read lower 6 bits verbatim */
442 	j -= 2;
443 	while (j--) {
444 		i = (i << 1) + GetBit();
445 	}
446 	return c | (i & 0x3f);
447 }
448 
449 /* compression */
450 
Encode(uint8_t * & _code,size_t & _codesize,const uint8_t * _text,size_t _textsize)451 void _lzhuf::Encode(uint8_t*& _code, size_t& _codesize, const uint8_t* _text, size_t _textsize)  /* compression */
452 {
453 	int i, c, len, r, s, last_match_length;
454 
455 	m_src_limit = _textsize;
456 	m_src_pos = 0;
457 	m_src = _text;
458 
459 	m_dest_limit = _textsize/2;
460 	m_dest_pos = 4;
461 	m_dest = static_cast<uint8_t*>(malloc(m_dest_limit));
462 	*(uint32_t*)m_dest = uint32_t(_textsize);
463 
464 	putbuf = 0;
465 	putlen = 0;
466 
467 	StartHuff();
468 	InitTree();
469 	s = 0;
470 	r = N - F;
471 	for (i = s; i < r; i++)
472 		text_buf[i] = ' ';
473 	for (len = 0; len < F && (c = getc()) >= 0; len++)
474 		text_buf[r + len] = c;
475 	textsize = len;
476 	for (i = 1; i <= F; i++)
477 		InsertNode(r - i);
478 	InsertNode(r);
479 	do {
480 		if (match_length > len)
481 			match_length = len;
482 		if (match_length <= THRESHOLD) {
483 			match_length = 1;
484 			EncodeChar(text_buf[r]);
485 		} else {
486 			EncodeChar(255 - THRESHOLD + match_length);
487 			EncodePosition(match_position);
488 		}
489 		last_match_length = match_length;
490 		for (i = 0; i < last_match_length && (c = getc()) >= 0; i++) {
491 			DeleteNode(s);
492 			text_buf[s] = c;
493 			if (s < F - 1)
494 				text_buf[s + N] = c;
495 			s = (s + 1) & (N - 1);
496 			r = (r + 1) & (N - 1);
497 			InsertNode(r);
498 		}
499 		while (i++ < last_match_length) {
500 			DeleteNode(s);
501 			s = (s + 1) & (N - 1);
502 			r = (r + 1) & (N - 1);
503 			if (--len) InsertNode(r);
504 		}
505 	} while (len > 0);
506 	EncodeEnd();
507 	_code = m_dest;
508 	_codesize = m_dest_pos;
509 }
510 
Decode(uint8_t * & _text,size_t & _textsize,const uint8_t * _code,size_t _codesize)511 void _lzhuf::Decode(uint8_t*& _text, size_t& _textsize, const uint8_t* _code, size_t _codesize)  /* recover */
512 {
513 	int i, j, k, r, c;
514 	size_t count;
515 
516 	m_dest_limit = textsize = *(uint32_t*)_code;
517 	m_dest = static_cast<uint8_t*>(malloc(m_dest_limit));
518 	m_dest_pos = 0;
519 
520 	m_src_limit = codesize = _codesize;
521 	m_src = _code;
522 	m_src_pos = 4;
523 
524 	getbuf = 0;
525 	getlen = 0;
526 
527 	StartHuff();
528 	for (i = 0; i < N - F; i++)
529 		text_buf[i] = ' ';
530 	r = N - F;
531 	for (count = 0; count < textsize; ) {
532 		c = DecodeChar();
533 		if (c < 256) {
534 			putc(c);
535 			text_buf[r++] = c;
536 			r &= (N - 1);
537 			count++;
538 		} else {
539 			i = (r - DecodePosition() - 1) & (N - 1);
540 			j = c - 255 + THRESHOLD;
541 			for (k = 0; k < j; k++) {
542 				c = text_buf[(i + k) & (N - 1)];
543 				putc(c);
544 				text_buf[r++] = c;
545 				r &= (N - 1);
546 				count++;
547 			}
548 		}
549 	}
550 	xr_assert(m_dest_pos == textsize);
551 	_text = m_dest;
552 	_textsize = textsize;
553 }
554 
getc()555 int _lzhuf::getc()
556 {
557 	if (m_src_pos < m_src_limit)
558 		return m_src[m_src_pos++];
559 	else
560 		return -1;
561 }
562 
putc(int c)563 void _lzhuf::putc(int c)
564 {
565 	if (m_dest_pos >= m_dest_limit) {
566 		m_dest_limit = m_dest_pos*2;
567 		m_dest = static_cast<uint8_t*>(realloc(m_dest, m_dest_limit));
568 
569 	}
570 	xr_assert(m_dest_pos < m_dest_limit);
571 	m_dest[m_dest_pos++] = c;
572 }
573 
instance()574 xr_lzhuf* xr_lzhuf::instance()
575 {
576 	static xr_lzhuf instance;
577 	return &instance;
578 }
579