1 /*
2  * Seven Kingdoms: Ancient Adversaries
3  *
4  * Copyright 1997,1998 Enlight Software Ltd.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 // Filename    : OLZW.CPP
22 // Description : LZW compression and decompression
23 
24 
25 #include <ALL.h>
26 #include <OFILE.h>
27 #include <OLZW.h>
28 
29 // --------- define constant ------------//
30 
31 #define BITS                       15
32 #define MAX_CODE                   ( ( 1 << BITS ) - 1 )
33 #define TABLE_SIZE                 35023L
34 #define END_OF_STREAM              256
35 #define BUMP_CODE                  257
36 #define FLUSH_CODE                 258
37 #define FIRST_CODE                 259
38 #define UNUSED                     0xffff
39 
40 static unsigned short BITS_MASK[] =
41 {
42 	0x0000,
43 	0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
44 	0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff,
45 };
46 
47 // --------- define macro ----------//
48 #define DICT( i ) dict[i]
49 
50 // --------- define struct Dictionary ---------//
51 
52 struct Dictionary
53 {
54 	unsigned short code_value;
55 	unsigned short parent_code;
56 	unsigned char character;
57 };
58 
59 
BitStream()60 BitStream::BitStream() : bit_offset(0)
61 {
62 }
63 
~BitStream()64 BitStream::~BitStream()
65 {
66 }
67 
input_bits(unsigned stringLen)68 unsigned short BitStream::input_bits(unsigned stringLen)
69 {
70 	bit_offset += stringLen;
71 	return 0;
72 }
73 
output_bits(unsigned short stringCode,unsigned stringLen)74 void BitStream::output_bits(unsigned short stringCode, unsigned stringLen)
75 {
76 	bit_offset += stringLen;
77 }
78 
BitMemStream(unsigned char * p)79 BitMemStream::BitMemStream(unsigned char *p) : BitStream(), bytePtr(p)
80 {
81 }
82 
input_bits(unsigned stringLen)83 unsigned short BitMemStream::input_bits(unsigned stringLen)
84 {
85 	unsigned char *p = bytePtr + bit_offset / 8;
86 	int s = bit_offset % 8;
87 
88 	// get longer bits
89 	unsigned long ul = *(unsigned long *)p >> s;
90 
91 	(void) BitStream::input_bits(stringLen);
92 
93 	// mask off leading bits
94 	return (unsigned short)ul & BITS_MASK[stringLen];
95 }
96 
output_bits(unsigned short stringCode,unsigned stringLen)97 void BitMemStream::output_bits(unsigned short stringCode, unsigned stringLen)
98 {
99 	// fill low bit first
100 	unsigned char *p = bytePtr + bit_offset / 8;
101 	int s = bit_offset % 8;
102 
103 	// mask off unused bit
104 	*(unsigned long *)p &= BITS_MASK[s];
105 	// stringCode &= bitMask[stringCode];
106 
107 	// shift stringCode and put them to outPtr
108 	*(unsigned long *)p |= (unsigned long)stringCode << s;
109 
110 	BitStream::output_bits(stringCode, stringLen);
111 }
112 
113 
BitFileRead(File * f)114 BitFileRead::BitFileRead(File *f) : filePtr(f), last_offset(0)
115 {
116 	f->file_read(&residue, sizeof(residue));
117 }
118 
input_bits(unsigned stringLen)119 unsigned short BitFileRead::input_bits(unsigned stringLen)
120 {
121 	if( bit_offset + stringLen > (last_offset+sizeof(residue))*8 )
122 	{
123 		// find byte to read
124 		long byteFetch = bit_offset/8 - last_offset;
125 		if( byteFetch >= sizeof(residue) )		// residue >>= 32 does not change to 0
126 			residue = 0;
127 		else
128 			residue >>= 8*byteFetch;
129 		filePtr->file_read( sizeof(residue)-byteFetch+(unsigned char *)&residue, byteFetch );
130 		last_offset += byteFetch;
131 	}
132 
133 	err_when( bit_offset + stringLen > (last_offset+sizeof(residue))*8 );
134 	int s = bit_offset - last_offset*8;
135 	unsigned long ul = residue >> s;
136 
137 	(void) BitStream::input_bits(stringLen);
138 
139 	// mask off leading bits
140 	return (unsigned short)ul & BITS_MASK[stringLen];
141 }
142 
output_bits(unsigned short stringCode,unsigned stringLen)143 void BitFileRead::output_bits(unsigned short stringCode, unsigned stringLen)
144 {
145 	err_here();
146 	BitStream::output_bits(stringCode, stringLen);
147 }
148 
149 
BitFileWrite(File * f)150 BitFileWrite::BitFileWrite(File *f) : filePtr(f), residue(0), residue_len(0)
151 {
152 }
153 
~BitFileWrite()154 BitFileWrite::~BitFileWrite()
155 {
156 	// flush output
157 	filePtr->file_write(&residue, sizeof(residue));
158 }
159 
input_bits(unsigned stringLen)160 unsigned short BitFileWrite::input_bits(unsigned stringLen)
161 {
162 	err_here();
163 	return BitStream::input_bits(stringLen);
164 }
165 
output_bits(unsigned short stringCode,unsigned stringLen)166 void BitFileWrite::output_bits(unsigned short stringCode, unsigned stringLen)
167 {
168 	if( residue_len + stringLen > sizeof(residue)*8 )
169 	{
170 		// number of byte to write, is residue_len / 8
171 		int byteFlush = residue_len / 8;
172 		filePtr->file_write( &residue, byteFlush );
173 		if( byteFlush >= sizeof(residue))			// if byteFlush == 4, residue >>= 32 does not set residue to 0
174 			residue = 0;
175 		else
176 			residue >>= byteFlush * 8;
177 		residue_len -= byteFlush * 8;
178 	}
179 	err_when( residue_len + stringLen > sizeof(residue)*8 );
180 	residue |= (unsigned long) stringCode << residue_len;
181 	residue_len += stringLen;
182 
183 	BitStream::output_bits(stringCode, stringLen);
184 }
185 
186 
187 // --------- begin of function Lzw::Lzw -------------//
Lzw()188 Lzw::Lzw()
189 {
190 	dict = NULL;
191 	decode_stack = NULL;
192 }
193 // --------- end of function Lzw::Lzw -------------//
194 
195 
196 // --------- begin of function Lzw::~Lzw -------------//
~Lzw()197 Lzw::~Lzw()
198 {
199 	free_storage();
200 }
201 // --------- end of function Lzw::~Lzw -------------//
202 
203 
204 // --------- begin of function Lzw::initialize_storage -------------//
initialize_storage()205 void Lzw::initialize_storage()
206 {
207 	if( !dict )
208 		dict = (Dictionary *) mem_add(sizeof(Dictionary) * TABLE_SIZE);
209 
210 	if( !decode_stack )
211 		decode_stack = (unsigned char *)mem_add(sizeof(unsigned char) * TABLE_SIZE);
212 }
213 // --------- end of function Lzw::initialize_storage -------------//
214 
215 
216 // --------- begin of function Lzw::free_storage -------------//
free_storage()217 void Lzw::free_storage()
218 {
219 	if( decode_stack )
220 	{
221 		mem_del(decode_stack);
222 		decode_stack = NULL;
223 	}
224 	if( dict )
225 	{
226 		mem_del(dict);
227 		dict = NULL;
228 	}
229 }
230 // --------- end of function Lzw::free_storage -------------//
231 
232 
233 // --------- begin of function Lzw::initialize_dictionary -------------//
initialize_dictionary()234 void Lzw::initialize_dictionary()
235 {
236 	unsigned short i;
237 
238 	for ( i = 0 ; i < TABLE_SIZE ; i++ )
239 		DICT( i ).code_value = UNUSED;
240 	next_code = FIRST_CODE;
241 	// putc( 'F', stdout );
242 	current_code_bits = 9;
243 	next_bump_code = 511;
244 }
245 // --------- end of function Lzw::initialize_dictionary -------------//
246 
247 // nul output, to find output size in bits
compress(unsigned char * inPtr,long inByteLen)248 long Lzw::compress( unsigned char *inPtr, long inByteLen)
249 {
250 	BitStream nulStream;
251 	return basic_compress( inPtr, inByteLen, &nulStream);
252 }
253 
254 // compressed data in memory
compress(unsigned char * inPtr,long inByteLen,unsigned char * outPtr)255 long Lzw::compress( unsigned char *inPtr, long inByteLen, unsigned char *outPtr)
256 {
257 	BitMemStream memStream(outPtr);
258 	return basic_compress( inPtr, inByteLen, &memStream);
259 }
260 
261 // set outPtr to NULL to find the decompressed size
expand(unsigned char * inPtr,long inBitLen,unsigned char * outPtr)262 long Lzw::expand( unsigned char *inPtr, long inBitLen, unsigned char *outPtr)
263 {
264 	BitMemStream memStream(inPtr);
265 	return basic_expand( &memStream, outPtr );
266 }
267 
268 // compressed data in file
compress(unsigned char * inPtr,long inByteLen,File * outFile)269 long Lzw::compress( unsigned char *inPtr, long inByteLen, File *outFile)
270 {
271 	BitFileWrite fileStream(outFile);
272 	return basic_compress( inPtr, inByteLen, &fileStream);
273 }
274 
275 // set outPtr to NULL to find the decompressed size
expand(File * inFile,unsigned char * outPtr)276 long Lzw::expand( File *inFile, unsigned char *outPtr)
277 {
278 	BitFileRead fileStream(inFile);
279 	return basic_expand( &fileStream, outPtr);
280 }
281 
282 
283 // --------- begin of function Lzw::basic_compress -------------//
284 // compress a memory block to another memory block
285 // <unsigned char *> inPtr           address of decompressed input data
286 // <long> inByteLen                  length of input data (in byte)
287 // <BitStream *> outStream           output stream, BitMemStream or BitFileWrite
288 //
289 // return in no. of bits, the size of the compressed data
290 // call free_storage after compress to free allocated space, if it will
291 // not going to compress/decompress soon
basic_compress(unsigned char * inPtr,long inByteLen,BitStream * outStream)292 long Lzw::basic_compress( unsigned char *inPtr, long inByteLen, BitStream *outStream)
293 {
294 	unsigned char character;
295 	unsigned short stringCode;
296 	unsigned short index;
297 
298 	initialize_storage();
299 	initialize_dictionary();
300 
301 	if ( inByteLen == 0 )
302 		stringCode = END_OF_STREAM;
303 	else
304 	{
305 		stringCode = *inPtr++;
306 		inByteLen--;
307 	}
308 
309 	while ( inByteLen-- > 0)
310 	{
311 		character = *inPtr++;
312 		index = find_child_node( stringCode, character );
313 		if ( DICT( index ).code_value != UNUSED )
314 			stringCode = DICT( index ).code_value;
315 		else
316 		{
317 			DICT( index ).code_value = next_code++;
318 			DICT( index ).parent_code = stringCode;
319 			DICT( index ).character = character;
320 			outStream->output_bits( stringCode, current_code_bits );
321 			stringCode = character;
322 			if ( next_code > MAX_CODE )
323 			{
324 				outStream->output_bits( FLUSH_CODE, current_code_bits );
325 				initialize_dictionary();
326 			}
327 			else if ( next_code > next_bump_code )
328 			{
329 				outStream->output_bits( BUMP_CODE, current_code_bits );
330 				current_code_bits++;
331 				next_bump_code <<= 1;
332 				next_bump_code |= 1;
333 			}
334 		}
335 	}
336 
337 	outStream->output_bits( stringCode, current_code_bits );
338 	outStream->output_bits( END_OF_STREAM, current_code_bits);
339 
340 	//	free_storage();
341 	return outStream->bit_offset;
342 }
343 // --------- end of function Lzw::basic_compress -------------//
344 
345 
346 // --------- begin of function Lzw::basic_expand -------------//
347 // decompress a memory block to another memory block
348 // <BitStream *> inStream            bit stream, can be BitMemStream or BitFileRead
349 // <unsigned char *> outPtr          address of decompressed output data
350 //                                   (NULL to find the size of decompressed data)
351 //
352 // return the no. of byte of the decompressed data
353 // call free_storage after decompress to free allocated space, if it will
354 // not going to compress/decompress soon
basic_expand(BitStream * inStream,unsigned char * outPtr)355 long Lzw::basic_expand( BitStream *inStream, unsigned char *outPtr)
356 {
357 	unsigned short newCode;
358 	unsigned short oldCode;
359 	unsigned char character;
360 	unsigned int count;
361 
362 	long outByteLen = 0;
363 
364 	initialize_storage();
365 	for ( ; ; )
366 	{
367 		initialize_dictionary();
368 		oldCode = inStream->input_bits( current_code_bits );
369 
370 		if ( oldCode == END_OF_STREAM )
371 		{
372 			// free_storage();
373 			return outByteLen;
374 		}
375 		character = (unsigned char) oldCode;
376 		if( outPtr )
377 			outPtr[outByteLen] = character;
378 		outByteLen++;
379 
380 		for ( ; ; )
381 		{
382 			newCode = inStream->input_bits( current_code_bits );
383 			if ( newCode == END_OF_STREAM )
384 			{
385 				// free_storage();
386 				return outByteLen;
387 			}
388 			if ( newCode == FLUSH_CODE )
389 				break;
390 			if ( newCode == BUMP_CODE )
391 			{
392 				current_code_bits++;
393 				continue;
394 			}
395 			if ( newCode >= next_code )
396 			{
397 				decode_stack[ 0 ] = character;
398 				count = decode_string( 1, oldCode );
399 			}
400 			else
401 				count = decode_string( 0, newCode );
402 			character = decode_stack[ count - 1 ];
403 			if( outPtr )
404 			{
405 				while ( count > 0 )
406 					outPtr[outByteLen++] = decode_stack[ --count ];
407 			}
408 			else
409 			{
410 				outByteLen += count;
411 			}
412 
413 			err_when( next_code >= TABLE_SIZE);
414 			DICT( next_code ).parent_code = oldCode;
415 			DICT( next_code ).character = character;
416 			next_code++;
417 			oldCode = newCode;
418 		}
419 	}
420 
421 	//	free_storage();
422 	return outByteLen;
423 }
424 // --------- end of function Lzw::basic_expand -------------//
425 
426 
427 // --------- begin of function Lzw::find_child_node -------------//
428 //
429 // This hashing routine is responsible for finding the table location
430 // for a string/character combination.  The table index is created
431 // by using an exclusive OR combination of the prefix and character.
432 // This code also has to check for collisions, and handles them by
433 // jumping around in the table.
434 //
find_child_node(unsigned short parentCode,unsigned char childChar)435 unsigned short Lzw::find_child_node( unsigned short parentCode, unsigned char childChar )
436 {
437 	unsigned short index;
438 	unsigned short offset;
439 
440 	index = ( (unsigned short) childChar << ( BITS - 8 ) ) ^ parentCode;
441 	if ( index == 0 )
442 		offset = 1;
443 	else
444 		offset = TABLE_SIZE - index;
445 	for ( ; ; )
446 	{
447 		if ( DICT( index ).code_value == UNUSED )
448 			return( index );
449 		if ( DICT( index ).parent_code == parentCode &&
450 			 DICT( index ).character == childChar )
451 			return( index );
452 		if ( index >= offset )
453 			index -= offset;
454 		else
455 			index += TABLE_SIZE - offset;
456 	}
457 }
458 // --------- end of function Lzw::find_child_node -------------//
459 
460 // --------- begin of function Lzw::decode_string -------------//
461 //
462 // This routine decodes a string from the dictionary, and stores it
463 // in the decode_stack data structure.  It returns a count to the
464 // calling program of how many characters were placed in the stack.
465 //
decode_string(unsigned int count,unsigned short code)466 unsigned int Lzw::decode_string( unsigned int count, unsigned short code )
467 {
468 	unsigned short initCode = code;
469 
470 	while ( code > 255 )
471 	{
472 		err_when(count >= TABLE_SIZE);
473 		decode_stack[ count++ ] = DICT( code ).character;
474 		code = DICT( code ).parent_code;
475 	}
476 	decode_stack[ count++ ] = (char) code;
477 	return( count );
478 }
479 // --------- end of function Lzw::decode_string -------------//
480 
481 
482 /*
483 // --------- begin of function Lzw::output_bits -------------//
484 //
485 // if outPtr is null to find the compressed length
486 // make sure outPtr buffer is long enough
487 //
488 void Lzw::output_bits( unsigned char *outPtr, long &outLen, unsigned short stringCode, unsigned int stringLen )
489 {
490 	// fill low bit first
491 	if( outPtr )
492 	{
493 		outPtr += outLen / 8;
494 		int s = outLen % 8;
495 		int r = 8 - s;
496 
497 		// mask off unused bit
498 		*(unsigned long *)outPtr &= bitMask[s];
499 		// stringCode &= bitMask[stringCode];
500 
501 		// shift stringCode and put them to outPtr
502 		*(unsigned long *)outPtr |= (unsigned long)stringCode << s;
503 	}
504 
505 	outLen += stringLen;
506 }
507 // --------- end of function Lzw::output_bits -------------//
508 
509 // --------- begin of function Lzw::input_bits -------------//
510 // inPtr is allocated at least 32 bits longer than inCnt
511 unsigned short Lzw::input_bits( unsigned char *inPtr, long &inCnt, unsigned int stringLen )
512 {
513 	inPtr += inCnt / 8;
514 	int s = inCnt % 8;
515 
516 	// get longer bits
517 	unsigned long ul = *(unsigned long *)inPtr;
518 	ul >>= s;
519 
520 	inCnt += stringLen;
521 
522 	// mask off leading bits
523 	return (unsigned short)ul & BITS_MASK[stringLen];
524 }
525 // --------- end of function Lzw::input_bits -------------//
526 */
527 
528 
529