1 /* lbrate 1.0 - fully extract CP/M `.lbr' archives.
2  * Copyright (C) 2001 Russell Marks. See main.c for license details.
3  *
4  * readlzh.c - read LZH-compressed files.
5  *
6  * This is based on the well-known lzhuf.c. Since the original
7  * licence was at best ambiguous, I asked all three authors if
8  * I could use a modified version of lzhuf.c in a GPL'd program,
9  * and the two who responded agreed.
10  *
11  * (The third, Yoshizaki, is thought by Okumura not likely to object,
12  * perhaps since his code was based on Okumura's lzari.c - which has
13  * always been under the licence mentioned below. :-))
14  *
15  * The following reflects what they consider to be the real licence
16  * on lzhuf.c:
17  *
18  * lzhuf.c
19  * Copyright (C) 1989 Haruhiko Okumura, Haruyasu Yoshizaki, and Kenji Rikitake.
20  * Use, distribute, and modify this program freely.
21  */
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #include "readlzh.h"
28 
29 
30 /********** LZSS compression **********/
31 
32 /* these are the values required for the "Y" format */
33 #define LZ_N		2048
34 #define LZ_F		60
35 #define THRESHOLD	2
36 
37 static unsigned int checksum;
38 static int oldver,lastchar;
39 
40 unsigned char text_buf[LZ_N + LZ_F - 1];
41 
42 
43 /* Huffman coding */
44 
45 #define N_CHAR		(256 + 1 - THRESHOLD + LZ_F)
46 /* kinds of characters (character code = 0..N_CHAR-1) */
47 #define LZ_T		(N_CHAR * 2 - 1)	/* size of table */
48 #define LZ_R		(LZ_T - 1)		/* position of root */
49 #define MAX_FREQ	0x8000		/* updates tree when the */
50 				   /* root frequency comes to this value. */
51 
52 
53 /* table for decoding the upper 6 bits of position */
54 
55 /* for decoding */
56 unsigned char d_code[256] =
57   {
58   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
59   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
60   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
61   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
62   0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
63   0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
64   0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
65   0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
66   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
67   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
68   0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
69   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
70   0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
71   0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
72   0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
73   0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
74   0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
75   0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
76   0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
77   0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
78   0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
79   0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
80   0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
81   0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
82   0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
83   0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
84   0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
85   0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
86   0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
87   0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
88   0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
89   0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
90   };
91 
92 unsigned char d_len[256] =
93   {
94   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
95   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
96   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
97   0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
98   0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
99   0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
100   0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
101   0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
102   0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
103   0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
104   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
105   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
106   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
107   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
108   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
109   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
110   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
111   0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
112   0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
113   0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
114   0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
115   0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
116   0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
117   0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
118   0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
119   0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
120   0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
121   0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
122   0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
123   0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
124   0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
125   0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
126   };
127 
128 static unsigned freq[LZ_T + 1];     /* frequency table */
129 
130 /* pointers to parent nodes, except for the elements [LZ_T..LZ_T + N_CHAR - 1]
131  * which are used to get the positions of leaves corresponding to the codes.
132  */
133 static int prnt[LZ_T + N_CHAR];
134 
135 /* pointers to child nodes (son[], son[] + 1) */
136 static int son[LZ_T];
137 
138 /* for bit/byte reader */
139 unsigned int getbuf=0;
140 unsigned char getlen=0;
141 
142 #define ALLOC_BLOCK_SIZE	32768
143 
144 static unsigned char *data_in_point,*data_in_max;
145 static unsigned char *data_out,*data_out_point;
146 static int data_out_len,data_out_allocated;
147 
148 
rawinput(void)149 static int rawinput(void)
150 {
151 if(data_in_point<data_in_max)
152   return(*data_in_point++);
153 return(-1);
154 }
155 
rawoutput(int byte)156 static void rawoutput(int byte)
157 {
158 if(data_out_len>=data_out_allocated)
159   {
160   data_out_allocated+=ALLOC_BLOCK_SIZE;
161   if((data_out=realloc(data_out,data_out_allocated))==NULL)
162     fprintf(stderr,"lbrate: out of memory!\n"),exit(1);
163   data_out_point=data_out+data_out_len;
164   }
165 
166 *data_out_point++=byte;
167 data_out_len++;
168 checksum+=byte;
169 }
170 
getbit(void)171 static int getbit(void)
172 {
173 int i;
174 
175 while (getlen <= 8)
176   {
177   if ((lastchar = i = rawinput()) < 0) i = 0;
178   getbuf |= i << (8 - getlen);
179   getlen += 8;
180   }
181 i = getbuf;
182 getbuf <<= 1;
183 getlen--;
184 return ((i&0x8000)?1:0);
185 }
186 
getbyte(void)187 static int getbyte(void)
188 {
189 int i;
190 
191 while (getlen <= 8)
192   {
193   if ((lastchar = i = rawinput()) < 0) i = 0;
194   getbuf |= i << (8 - getlen);
195   getlen += 8;
196   }
197 i = getbuf;
198 getbuf <<= 8;
199 getlen -= 8;
200 return ((i >> 8)&255);
201 }
202 
203 
204 
205 /* initialization of tree */
206 
start_huff(void)207 void start_huff(void)
208 {
209 int i, j;
210 
211 for (i = 0; i < N_CHAR; i++)
212   {
213   freq[i] = 1;
214   son[i] = i + LZ_T;
215   prnt[i + LZ_T] = i;
216   }
217 i = 0; j = N_CHAR;
218 while (j <= LZ_R)
219   {
220   freq[j] = freq[i] + freq[i + 1];
221   son[j] = i;
222   prnt[i] = prnt[i + 1] = j;
223   i += 2; j++;
224   }
225 freq[LZ_T] = 0xffff;
226 prnt[LZ_R] = 0;
227 }
228 
229 
230 /* reconstruction of tree */
231 
reconst(void)232 void reconst(void)
233 {
234 int i, j, k;
235 unsigned f;
236 
237 /* collect leaf nodes in the first half of the table */
238 /* and replace the freq by (freq + 1) / 2. */
239 j = 0;
240 for (i = 0; i < LZ_T; i++)
241   {
242   if (son[i] >= LZ_T)
243     {
244     freq[j] = (freq[i] + 1) / 2;
245     son[j] = son[i];
246     j++;
247     }
248   }
249 
250 /* begin constructing tree by connecting sons */
251 for (i = 0, j = N_CHAR; j < LZ_T; i += 2, j++)
252   {
253   k = i + 1;
254   f = freq[j] = freq[i] + freq[k];
255   for (k = j - 1; f < freq[k]; k--);
256   k++;
257   memmove(freq+k+1,freq+k,(j-k)*sizeof(freq[0]));
258   freq[k]=f;
259   memmove(son+k+1,son+k,(j-k)*sizeof(son[0]));
260   son[k]=i;
261   }
262 
263 /* connect prnt */
264 for (i = 0; i < LZ_T; i++)
265   {
266   if ((k = son[i]) >= LZ_T)
267     prnt[k] = i;
268   else
269     prnt[k] = prnt[k + 1] = i;
270   }
271 }
272 
273 
274 /* increment frequency of given code by one, and update tree */
275 
update(int c)276 void update(int c)
277 {
278 int i, j, k, l;
279 
280 if (freq[LZ_R] == MAX_FREQ)
281   reconst();
282 
283 c = prnt[c + LZ_T];
284 do
285   {
286   k = ++freq[c];
287 
288   /* if the order is disturbed, exchange nodes */
289   if (k > freq[l = c + 1])
290     {
291     while (k > freq[++l]);
292     l--;
293     freq[c] = freq[l];
294     freq[l] = k;
295 
296     i = son[c];
297     prnt[i] = l;
298     if (i < LZ_T) prnt[i + 1] = l;
299 
300     j = son[l];
301     son[l] = i;
302 
303     prnt[j] = c;
304     if (j < LZ_T) prnt[j + 1] = c;
305     son[c] = j;
306 
307     c = l;
308     }
309   }
310 while ((c = prnt[c]) != 0);   /* repeat up to root */
311 }
312 
313 
decode_char(void)314 int decode_char(void)
315 {
316 unsigned c;
317 
318 c = son[LZ_R];
319 
320 /* travel from root to leaf,
321  * choosing the smaller child node (son[]) if the read bit is 0,
322  * the bigger (son[]+1) if 1.
323  */
324 while (c < LZ_T)
325   {
326   c += getbit();
327   c = son[c];
328   }
329 c -= LZ_T;
330 update(c);
331 return c;
332 }
333 
decode_position(void)334 int decode_position(void)
335 {
336 unsigned i, j, c;
337 
338 /* recover upper bits from table */
339 i = getbyte();
340 c = (unsigned)d_code[i] << (5+oldver);	/* 5, or 6 for 1.x */
341 j = d_len[i];
342 
343 /* read lower bits verbatim */
344 j -= 3-oldver;				/* 3, or 2 for 1.x */
345 while (j--)
346   {
347   i = (i << 1) + getbit();
348   }
349 return c | (i & (oldver?0x3f:0x1f));	/* 0x1f, or 0x3f for 1.x */
350 }
351 
352 
353 
354 #define READ_WORD(x) (x)=rawinput(),(x)|=(rawinput()<<8)
355 
convert_lzh(unsigned char * data_in,unsigned long in_len,unsigned long * out_len_ptr)356 unsigned char *convert_lzh(unsigned char *data_in,
357                            unsigned long in_len,
358                            unsigned long *out_len_ptr)
359 {
360 int c,v,checktype,magic,orig_checksum;
361 int i,j,k,r;
362 
363 *out_len_ptr=0;
364 
365 if((data_out=malloc(data_out_allocated=ALLOC_BLOCK_SIZE))==NULL)
366   fprintf(stderr,"lbrate: out of memory!\n"),exit(1);
367 
368 data_in_point=data_in; data_in_max=data_in+in_len;
369 data_out_point=data_out; data_out_len=0;
370 
371 READ_WORD(magic);
372 if(magic!=MAGIC_LZH)
373   {
374   free(data_out);
375   return(NULL);
376   }
377 
378 /* skip filename */
379 while((c=rawinput())!=0)
380   if(c==-1)
381     {
382     free(data_out);
383     return(NULL);
384     }
385 
386 /* four info bytes */
387 rawinput();
388 oldver=((v=rawinput())<0x20?1:0);
389 checktype=rawinput();
390 rawinput();
391 
392 getbuf=0;
393 getlen=0;
394 checksum=0;
395 
396 start_huff();
397 
398 r=LZ_N-LZ_F;
399 memset(text_buf,32,r);
400 
401 while((c=decode_char())!=256)	/* 256 = EOF */
402   {
403   if(c<256)
404     {
405     rawoutput(c);
406     text_buf[r++] = c;
407     r&=(LZ_N-1);
408     }
409   else
410     {
411     i=(r-decode_position()-1)&(LZ_N-1);
412     j=c-256+THRESHOLD;
413     for (k = 0; k < j; k++)
414       {
415       c=text_buf[(i+k)&(LZ_N-1)];
416       rawoutput(c);
417       text_buf[r++]=c;
418       r&=(LZ_N-1);
419       }
420     }
421   }
422 
423 /* lastchar junk is needed because bit(/byte) reader reads a byte
424  * in advance.
425  */
426 orig_checksum=lastchar;
427 orig_checksum+=256*rawinput();
428 
429 /* see how the checksum turned out */
430 checksum&=0xffff;
431 if(checktype==0 && checksum!=orig_checksum)
432   {
433   free(data_out);
434   return(NULL);
435   }
436 
437 *out_len_ptr=data_out_len;
438 return(data_out);
439 }
440