1 /* This file is part of libmspack.
2  * (C) 2003-2011 Stuart Caie.
3  *
4  * KWAJ is a format very similar to SZDD. KWAJ method 3 (LZH) was
5  * written by Jeff Johnson.
6  *
7  * libmspack is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU Lesser General Public License (LGPL) version 2.1
9  *
10  * For further details, see the file COPYING.LIB distributed with libmspack
11  */
12 
13 /* KWAJ decompression implementation */
14 
15 #include <system.h>
16 #include <kwaj.h>
17 #include <mszip.h>
18 
19 /* prototypes */
20 static struct mskwajd_header *kwajd_open(
21     struct mskwaj_decompressor *base, const char *filename);
22 static void kwajd_close(
23     struct mskwaj_decompressor *base, struct mskwajd_header *hdr);
24 static int kwajd_read_headers(
25     struct mspack_system *sys, struct mspack_file *fh,
26     struct mskwajd_header *hdr);
27 static int kwajd_extract(
28     struct mskwaj_decompressor *base, struct mskwajd_header *hdr,
29     const char *filename);
30 static int kwajd_decompress(
31     struct mskwaj_decompressor *base, const char *input, const char *output);
32 static int kwajd_error(
33     struct mskwaj_decompressor *base);
34 
35 static struct kwajd_stream *lzh_init(
36     struct mspack_system *sys, struct mspack_file *in, struct mspack_file *out);
37 static int lzh_decompress(
38     struct kwajd_stream *kwaj);
39 static void lzh_free(
40     struct kwajd_stream *kwaj);
41 static int lzh_read_lens(
42     struct kwajd_stream *kwaj,
43     unsigned int type, unsigned int numsyms,
44     unsigned char *lens);
45 static int lzh_read_input(
46     struct kwajd_stream *kwaj);
47 
48 
49 /***************************************
50  * MSPACK_CREATE_KWAJ_DECOMPRESSOR
51  ***************************************
52  * constructor
53  */
54 struct mskwaj_decompressor *
mspack_create_kwaj_decompressor(struct mspack_system * sys)55     mspack_create_kwaj_decompressor(struct mspack_system *sys)
56 {
57   struct mskwaj_decompressor_p *self = NULL;
58 
59   if (!sys) sys = mspack_default_system;
60   if (!mspack_valid_system(sys)) return NULL;
61 
62   if ((self = (struct mskwaj_decompressor_p *) sys->alloc(sys, sizeof(struct mskwaj_decompressor_p)))) {
63     self->base.open       = &kwajd_open;
64     self->base.close      = &kwajd_close;
65     self->base.extract    = &kwajd_extract;
66     self->base.decompress = &kwajd_decompress;
67     self->base.last_error = &kwajd_error;
68     self->system          = sys;
69     self->error           = MSPACK_ERR_OK;
70   }
71   return (struct mskwaj_decompressor *) self;
72 }
73 
74 /***************************************
75  * MSPACK_DESTROY_KWAJ_DECOMPRESSOR
76  ***************************************
77  * destructor
78  */
mspack_destroy_kwaj_decompressor(struct mskwaj_decompressor * base)79 void mspack_destroy_kwaj_decompressor(struct mskwaj_decompressor *base)
80 {
81     struct mskwaj_decompressor_p *self = (struct mskwaj_decompressor_p *) base;
82     if (self) {
83         struct mspack_system *sys = self->system;
84         sys->free(self);
85     }
86 }
87 
88 /***************************************
89  * KWAJD_OPEN
90  ***************************************
91  * opens a KWAJ file without decompressing, reads header
92  */
kwajd_open(struct mskwaj_decompressor * base,const char * filename)93 static struct mskwajd_header *kwajd_open(struct mskwaj_decompressor *base,
94                                          const char *filename)
95 {
96     struct mskwaj_decompressor_p *self = (struct mskwaj_decompressor_p *) base;
97     struct mskwajd_header *hdr;
98     struct mspack_system *sys;
99     struct mspack_file *fh;
100 
101     if (!self) return NULL;
102     sys = self->system;
103 
104     fh  = sys->open(sys, filename, MSPACK_SYS_OPEN_READ);
105     hdr = (struct mskwajd_header *) sys->alloc(sys, sizeof(struct mskwajd_header_p));
106     if (fh && hdr) {
107         ((struct mskwajd_header_p *) hdr)->fh = fh;
108         self->error = kwajd_read_headers(sys, fh, hdr);
109     }
110     else {
111         if (!fh)  self->error = MSPACK_ERR_OPEN;
112         if (!hdr) self->error = MSPACK_ERR_NOMEMORY;
113     }
114 
115     if (self->error) {
116         if (fh) sys->close(fh);
117         sys->free(hdr);
118         hdr = NULL;
119     }
120 
121     return hdr;
122 }
123 
124 /***************************************
125  * KWAJD_CLOSE
126  ***************************************
127  * closes a KWAJ file
128  */
kwajd_close(struct mskwaj_decompressor * base,struct mskwajd_header * hdr)129 static void kwajd_close(struct mskwaj_decompressor *base,
130                         struct mskwajd_header *hdr)
131 {
132     struct mskwaj_decompressor_p *self = (struct mskwaj_decompressor_p *) base;
133     struct mskwajd_header_p *hdr_p = (struct mskwajd_header_p *) hdr;
134 
135     if (!self || !self->system) return;
136 
137     /* close the file handle associated */
138     self->system->close(hdr_p->fh);
139 
140     /* free the memory associated */
141     self->system->free(hdr);
142 
143     self->error = MSPACK_ERR_OK;
144 }
145 
146 /***************************************
147  * KWAJD_READ_HEADERS
148  ***************************************
149  * reads the headers of a KWAJ format file
150  */
kwajd_read_headers(struct mspack_system * sys,struct mspack_file * fh,struct mskwajd_header * hdr)151 static int kwajd_read_headers(struct mspack_system *sys,
152                               struct mspack_file *fh,
153                               struct mskwajd_header *hdr)
154 {
155     unsigned char buf[16];
156     int i;
157 
158     /* read in the header */
159     if (sys->read(fh, &buf[0], kwajh_SIZEOF) != kwajh_SIZEOF) {
160         return MSPACK_ERR_READ;
161     }
162 
163     /* check for "KWAJ" signature */
164     if (((unsigned int) EndGetI32(&buf[kwajh_Signature1]) != 0x4A41574B) ||
165         ((unsigned int) EndGetI32(&buf[kwajh_Signature2]) != 0xD127F088))
166     {
167         return MSPACK_ERR_SIGNATURE;
168     }
169 
170     /* basic header fields */
171     hdr->comp_type    = EndGetI16(&buf[kwajh_CompMethod]);
172     hdr->data_offset  = EndGetI16(&buf[kwajh_DataOffset]);
173     hdr->headers      = EndGetI16(&buf[kwajh_Flags]);
174     hdr->length       = 0;
175     hdr->filename     = NULL;
176     hdr->extra        = NULL;
177     hdr->extra_length = 0;
178 
179     /* optional headers */
180 
181     /* 4 bytes: length of unpacked file */
182     if (hdr->headers & MSKWAJ_HDR_HASLENGTH) {
183         if (sys->read(fh, &buf[0], 4) != 4) return MSPACK_ERR_READ;
184         hdr->length = EndGetI32(&buf[0]);
185     }
186 
187     /* 2 bytes: unknown purpose */
188     if (hdr->headers & MSKWAJ_HDR_HASUNKNOWN1) {
189         if (sys->read(fh, &buf[0], 2) != 2) return MSPACK_ERR_READ;
190     }
191 
192     /* 2 bytes: length of section, then [length] bytes: unknown purpose */
193     if (hdr->headers & MSKWAJ_HDR_HASUNKNOWN2) {
194         if (sys->read(fh, &buf[0], 2) != 2) return MSPACK_ERR_READ;
195         i = EndGetI16(&buf[0]);
196         if (sys->seek(fh, (off_t)i, MSPACK_SYS_SEEK_CUR)) return MSPACK_ERR_SEEK;
197     }
198 
199     /* filename and extension */
200     if (hdr->headers & (MSKWAJ_HDR_HASFILENAME | MSKWAJ_HDR_HASFILEEXT)) {
201         int len;
202         /* allocate memory for maximum length filename */
203         char *fn = (char *) sys->alloc(sys, (size_t) 13);
204         if (!(hdr->filename = fn)) return MSPACK_ERR_NOMEMORY;
205 
206         /* copy filename if present */
207         if (hdr->headers & MSKWAJ_HDR_HASFILENAME) {
208             /* read and copy up to 9 bytes of a null terminated string */
209             if ((len = sys->read(fh, &buf[0], 9)) < 2) return MSPACK_ERR_READ;
210             for (i = 0; i < len; i++) if (!(*fn++ = buf[i])) break;
211             /* if string was 9 bytes with no null terminator, reject it */
212             if (i == 9 && buf[8] != '\0') return MSPACK_ERR_DATAFORMAT;
213             /* seek to byte after string ended in file */
214             if (sys->seek(fh, (off_t)(i + 1 - len), MSPACK_SYS_SEEK_CUR))
215                 return MSPACK_ERR_SEEK;
216             fn--; /* remove the null terminator */
217         }
218 
219         /* copy extension if present */
220         if (hdr->headers & MSKWAJ_HDR_HASFILEEXT) {
221             *fn++ = '.';
222             /* read and copy up to 4 bytes of a null terminated string */
223             if ((len = sys->read(fh, &buf[0], 4)) < 2) return MSPACK_ERR_READ;
224             for (i = 0; i < len; i++) if (!(*fn++ = buf[i])) break;
225             /* if string was 4 bytes with no null terminator, reject it */
226             if (i == 4 && buf[3] != '\0') return MSPACK_ERR_DATAFORMAT;
227             /* seek to byte after string ended in file */
228             if (sys->seek(fh, (off_t)(i + 1 - len), MSPACK_SYS_SEEK_CUR))
229                 return MSPACK_ERR_SEEK;
230             fn--; /* remove the null terminator */
231         }
232         *fn = '\0';
233     }
234 
235     /* 2 bytes: extra text length then [length] bytes of extra text data */
236     if (hdr->headers & MSKWAJ_HDR_HASEXTRATEXT) {
237         if (sys->read(fh, &buf[0], 2) != 2) return MSPACK_ERR_READ;
238         i = EndGetI16(&buf[0]);
239         hdr->extra = (char *) sys->alloc(sys, (size_t)i+1);
240         if (! hdr->extra) return MSPACK_ERR_NOMEMORY;
241         if (sys->read(fh, hdr->extra, i) != i) return MSPACK_ERR_READ;
242         hdr->extra[i] = '\0';
243         hdr->extra_length = i;
244     }
245     return MSPACK_ERR_OK;
246 }
247 
248 /***************************************
249  * KWAJD_EXTRACT
250  ***************************************
251  * decompresses a KWAJ file
252  */
kwajd_extract(struct mskwaj_decompressor * base,struct mskwajd_header * hdr,const char * filename)253 static int kwajd_extract(struct mskwaj_decompressor *base,
254                          struct mskwajd_header *hdr, const char *filename)
255 {
256     struct mskwaj_decompressor_p *self = (struct mskwaj_decompressor_p *) base;
257     struct mspack_system *sys;
258     struct mspack_file *fh, *outfh;
259 
260     if (!self) return MSPACK_ERR_ARGS;
261     if (!hdr) return self->error = MSPACK_ERR_ARGS;
262 
263     sys = self->system;
264     fh = ((struct mskwajd_header_p *) hdr)->fh;
265 
266     /* seek to the compressed data */
267     if (sys->seek(fh, hdr->data_offset, MSPACK_SYS_SEEK_START)) {
268         return self->error = MSPACK_ERR_SEEK;
269     }
270 
271     /* open file for output */
272     if (!(outfh = sys->open(sys, filename, MSPACK_SYS_OPEN_WRITE))) {
273         return self->error = MSPACK_ERR_OPEN;
274     }
275 
276     self->error = MSPACK_ERR_OK;
277 
278     /* decompress based on format */
279     if (hdr->comp_type == MSKWAJ_COMP_NONE ||
280         hdr->comp_type == MSKWAJ_COMP_XOR)
281     {
282         /* NONE is a straight copy. XOR is a copy xored with 0xFF */
283         unsigned char *buf = (unsigned char *) sys->alloc(sys, (size_t) KWAJ_INPUT_SIZE);
284         if (buf) {
285             int read, i;
286             while ((read = sys->read(fh, buf, KWAJ_INPUT_SIZE)) > 0) {
287                 if (hdr->comp_type == MSKWAJ_COMP_XOR) {
288                     for (i = 0; i < read; i++) buf[i] ^= 0xFF;
289                 }
290                 if (sys->write(outfh, buf, read) != read) {
291                     self->error = MSPACK_ERR_WRITE;
292                     break;
293                 }
294             }
295             if (read < 0) self->error = MSPACK_ERR_READ;
296             sys->free(buf);
297         }
298         else {
299             self->error = MSPACK_ERR_NOMEMORY;
300         }
301     }
302     else if (hdr->comp_type == MSKWAJ_COMP_SZDD) {
303         self->error = lzss_decompress(sys, fh, outfh, KWAJ_INPUT_SIZE,
304                                       LZSS_MODE_EXPAND);
305     }
306     else if (hdr->comp_type == MSKWAJ_COMP_LZH) {
307         struct kwajd_stream *lzh = lzh_init(sys, fh, outfh);
308         self->error = (lzh) ? lzh_decompress(lzh) : MSPACK_ERR_NOMEMORY;
309         lzh_free(lzh);
310     }
311     else if (hdr->comp_type == MSKWAJ_COMP_MSZIP) {
312         struct mszipd_stream *zip = mszipd_init(sys,fh,outfh,KWAJ_INPUT_SIZE,0);
313         self->error = (zip) ? mszipd_decompress_kwaj(zip) : MSPACK_ERR_NOMEMORY;
314         mszipd_free(zip);
315     }
316     else {
317         self->error = MSPACK_ERR_DATAFORMAT;
318     }
319 
320     /* close output file */
321     sys->close(outfh);
322 
323     return self->error;
324 }
325 
326 /***************************************
327  * KWAJD_DECOMPRESS
328  ***************************************
329  * unpacks directly from input to output
330  */
kwajd_decompress(struct mskwaj_decompressor * base,const char * input,const char * output)331 static int kwajd_decompress(struct mskwaj_decompressor *base,
332                             const char *input, const char *output)
333 {
334     struct mskwaj_decompressor_p *self = (struct mskwaj_decompressor_p *) base;
335     struct mskwajd_header *hdr;
336     int error;
337 
338     if (!self) return MSPACK_ERR_ARGS;
339 
340     if (!(hdr = kwajd_open(base, input))) return self->error;
341     error = kwajd_extract(base, hdr, output);
342     kwajd_close(base, hdr);
343     return self->error = error;
344 }
345 
346 /***************************************
347  * KWAJD_ERROR
348  ***************************************
349  * returns the last error that occurred
350  */
kwajd_error(struct mskwaj_decompressor * base)351 static int kwajd_error(struct mskwaj_decompressor *base)
352 {
353     struct mskwaj_decompressor_p *self = (struct mskwaj_decompressor_p *) base;
354     return (self) ? self->error : MSPACK_ERR_ARGS;
355 }
356 
357 /***************************************
358  * LZH_INIT, LZH_DECOMPRESS, LZH_FREE
359  ***************************************
360  * unpacks KWAJ method 3 files
361  */
362 
363 /* import bit-reading macros and code */
364 #define BITS_TYPE struct kwajd_stream
365 #define BITS_VAR lzh
366 #define BITS_ORDER_MSB
367 #define BITS_NO_READ_INPUT
368 #define READ_BYTES do {                                 \
369     if (i_ptr >= i_end) {                               \
370         if ((err = lzh_read_input(lzh))) return err;    \
371         i_ptr = lzh->i_ptr;                             \
372         i_end = lzh->i_end;                             \
373     }                                                   \
374     INJECT_BITS(*i_ptr++, 8);                           \
375 } while (0)
376 #include <readbits.h>
377 
378 /* import huffman-reading macros and code */
379 #define TABLEBITS(tbl)      KWAJ_TABLEBITS
380 #define MAXSYMBOLS(tbl)     KWAJ_##tbl##_SYMS
381 #define HUFF_TABLE(tbl,idx) lzh->tbl##_table[idx]
382 #define HUFF_LEN(tbl,idx)   lzh->tbl##_len[idx]
383 #define HUFF_ERROR          return MSPACK_ERR_DATAFORMAT
384 #include <readhuff.h>
385 
386 /* In the KWAJ LZH format, there is no special 'eof' marker, it just
387  * ends. Depending on how many bits are left in the final byte when
388  * the stream ends, that might be enough to start another literal or
389  * match. The only easy way to detect that we've come to an end is to
390  * guard all bit-reading. We allow fake bits to be read once we reach
391  * the end of the stream, but we check if we then consumed any of
392  * those fake bits, after doing the READ_BITS / READ_HUFFSYM. This
393  * isn't how the default readbits.h read_input() works (it simply lets
394  * 2 fake bytes in then stops), so we implement our own.
395  */
396 #define READ_BITS_SAFE(val, n) do {                     \
397     READ_BITS(val, n);                                  \
398     if (lzh->input_end && bits_left < lzh->input_end)   \
399         return MSPACK_ERR_OK;                           \
400 } while (0)
401 
402 #define READ_HUFFSYM_SAFE(tbl, val) do {                \
403     READ_HUFFSYM(tbl, val);                             \
404     if (lzh->input_end && bits_left < lzh->input_end)   \
405         return MSPACK_ERR_OK;                           \
406 } while (0)
407 
408 #define BUILD_TREE(tbl, type)                                           \
409     STORE_BITS;                                                         \
410     err = lzh_read_lens(lzh, type, MAXSYMBOLS(tbl), &HUFF_LEN(tbl,0));  \
411     if (err) return err;                                                \
412     RESTORE_BITS;                                                       \
413     if (make_decode_table(MAXSYMBOLS(tbl), TABLEBITS(tbl),              \
414         &HUFF_LEN(tbl,0), &HUFF_TABLE(tbl,0)))                          \
415         return MSPACK_ERR_DATAFORMAT;
416 
417 #define WRITE_BYTE do {                                                 \
418     if (lzh->sys->write(lzh->output, &lzh->window[pos], 1) != 1)        \
419         return MSPACK_ERR_WRITE;                                        \
420 } while (0)
421 
lzh_init(struct mspack_system * sys,struct mspack_file * in,struct mspack_file * out)422 static struct kwajd_stream *lzh_init(struct mspack_system *sys,
423     struct mspack_file *in, struct mspack_file *out)
424 {
425     struct kwajd_stream *lzh;
426 
427     if (!sys || !in || !out) return NULL;
428     if (!(lzh = (struct kwajd_stream *) sys->alloc(sys, sizeof(struct kwajd_stream)))) return NULL;
429 
430     lzh->sys    = sys;
431     lzh->input  = in;
432     lzh->output = out;
433     return lzh;
434 }
435 
lzh_decompress(struct kwajd_stream * lzh)436 static int lzh_decompress(struct kwajd_stream *lzh)
437 {
438     register unsigned int bit_buffer;
439     register int bits_left, i;
440     register unsigned short sym;
441     unsigned char *i_ptr, *i_end, lit_run = 0;
442     int j, pos = 0, len, offset, err;
443     unsigned int types[6];
444 
445     /* reset global state */
446     INIT_BITS;
447     RESTORE_BITS;
448     memset(&lzh->window[0], LZSS_WINDOW_FILL, (size_t) LZSS_WINDOW_SIZE);
449 
450     /* read 6 encoding types (for byte alignment) but only 5 are needed */
451     for (i = 0; i < 6; i++) READ_BITS_SAFE(types[i], 4);
452 
453     /* read huffman table symbol lengths and build huffman trees */
454     BUILD_TREE(MATCHLEN1, types[0]);
455     BUILD_TREE(MATCHLEN2, types[1]);
456     BUILD_TREE(LITLEN,    types[2]);
457     BUILD_TREE(OFFSET,    types[3]);
458     BUILD_TREE(LITERAL,   types[4]);
459 
460     while (!lzh->input_end) {
461         if (lit_run) READ_HUFFSYM_SAFE(MATCHLEN2, len);
462         else         READ_HUFFSYM_SAFE(MATCHLEN1, len);
463 
464         if (len > 0) {
465             len += 2;
466             lit_run = 0; /* not the end of a literal run */
467             READ_HUFFSYM_SAFE(OFFSET, j); offset = j << 6;
468             READ_BITS_SAFE(j, 6);         offset |= j;
469 
470             /* copy match as output and into the ring buffer */
471             while (len-- > 0) {
472                 lzh->window[pos] = lzh->window[(pos+4096-offset) & 4095];
473                 WRITE_BYTE;
474                 pos++; pos &= 4095;
475             }
476         }
477         else {
478             READ_HUFFSYM_SAFE(LITLEN, len); len++;
479             lit_run = (len == 32) ? 0 : 1; /* end of a literal run? */
480             while (len-- > 0) {
481                 READ_HUFFSYM_SAFE(LITERAL, j);
482                 /* copy as output and into the ring buffer */
483                 lzh->window[pos] = j;
484                 WRITE_BYTE;
485                 pos++; pos &= 4095;
486             }
487         }
488     }
489     return MSPACK_ERR_OK;
490 }
491 
lzh_free(struct kwajd_stream * lzh)492 static void lzh_free(struct kwajd_stream *lzh)
493 {
494     struct mspack_system *sys;
495     if (!lzh || !lzh->sys) return;
496     sys = lzh->sys;
497     sys->free(lzh);
498 }
499 
lzh_read_lens(struct kwajd_stream * lzh,unsigned int type,unsigned int numsyms,unsigned char * lens)500 static int lzh_read_lens(struct kwajd_stream *lzh,
501                          unsigned int type, unsigned int numsyms,
502                          unsigned char *lens)
503 {
504     register unsigned int bit_buffer;
505     register int bits_left;
506     unsigned char *i_ptr, *i_end;
507     unsigned int i, c, sel;
508     int err;
509 
510     RESTORE_BITS;
511     switch (type) {
512     case 0:
513         i = numsyms; c = (i==16)?4: (i==32)?5: (i==64)?6: (i==256)?8 :0;
514         for (i = 0; i < numsyms; i++) lens[i] = c;
515         break;
516 
517     case 1:
518         READ_BITS_SAFE(c, 4); lens[0] = c;
519         for (i = 1; i < numsyms; i++) {
520                    READ_BITS_SAFE(sel, 1); if (sel == 0)  lens[i] = c;
521             else { READ_BITS_SAFE(sel, 1); if (sel == 0)  lens[i] = ++c;
522             else { READ_BITS_SAFE(c, 4);                  lens[i] = c; }}
523         }
524         break;
525 
526     case 2:
527         READ_BITS_SAFE(c, 4); lens[0] = c;
528         for (i = 1; i < numsyms; i++) {
529             READ_BITS_SAFE(sel, 2);
530             if (sel == 3) READ_BITS_SAFE(c, 4); else c += (char) sel-1;
531             lens[i] = c;
532         }
533         break;
534 
535     case 3:
536         for (i = 0; i < numsyms; i++) {
537             READ_BITS_SAFE(c, 4); lens[i] = c;
538         }
539         break;
540     }
541     STORE_BITS;
542     return MSPACK_ERR_OK;
543 }
544 
lzh_read_input(struct kwajd_stream * lzh)545 static int lzh_read_input(struct kwajd_stream *lzh) {
546     int read;
547     if (lzh->input_end) {
548         lzh->input_end += 8;
549         lzh->inbuf[0] = 0;
550         read = 1;
551     }
552     else {
553         read = lzh->sys->read(lzh->input, &lzh->inbuf[0], KWAJ_INPUT_SIZE);
554         if (read < 0) return MSPACK_ERR_READ;
555         if (read == 0) {
556             lzh->input_end = 8;
557             lzh->inbuf[0] = 0;
558             read = 1;
559         }
560     }
561 
562     /* update i_ptr and i_end */
563     lzh->i_ptr = &lzh->inbuf[0];
564     lzh->i_end = &lzh->inbuf[read];
565     return MSPACK_ERR_OK;
566 }
567