1 /* lbrate 1.0 - fully extract CP/M `.lbr' archives.
2 * Copyright (C) 2001 Russell Marks. See main.c for license details.
3 *
4 * readlzh.c - read LZH-compressed files.
5 *
6 * This is based on the well-known lzhuf.c. Since the original
7 * licence was at best ambiguous, I asked all three authors if
8 * I could use a modified version of lzhuf.c in a GPL'd program,
9 * and the two who responded agreed.
10 *
11 * (The third, Yoshizaki, is thought by Okumura not likely to object,
12 * perhaps since his code was based on Okumura's lzari.c - which has
13 * always been under the licence mentioned below. :-))
14 *
15 * The following reflects what they consider to be the real licence
16 * on lzhuf.c:
17 *
18 * lzhuf.c
19 * Copyright (C) 1989 Haruhiko Okumura, Haruyasu Yoshizaki, and Kenji Rikitake.
20 * Use, distribute, and modify this program freely.
21 */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26
27 #include "readlzh.h"
28
29
30 /********** LZSS compression **********/
31
32 /* these are the values required for the "Y" format */
33 #define LZ_N 2048
34 #define LZ_F 60
35 #define THRESHOLD 2
36
37 static unsigned int checksum;
38 static int oldver,lastchar;
39
40 unsigned char text_buf[LZ_N + LZ_F - 1];
41
42
43 /* Huffman coding */
44
45 #define N_CHAR (256 + 1 - THRESHOLD + LZ_F)
46 /* kinds of characters (character code = 0..N_CHAR-1) */
47 #define LZ_T (N_CHAR * 2 - 1) /* size of table */
48 #define LZ_R (LZ_T - 1) /* position of root */
49 #define MAX_FREQ 0x8000 /* updates tree when the */
50 /* root frequency comes to this value. */
51
52
53 /* table for decoding the upper 6 bits of position */
54
55 /* for decoding */
56 unsigned char d_code[256] =
57 {
58 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
59 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
60 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
61 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
62 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
63 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
64 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
65 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
66 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
67 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
68 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
69 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
70 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
71 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
72 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
73 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
74 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A,
75 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B, 0x0B,
76 0x0C, 0x0C, 0x0C, 0x0C, 0x0D, 0x0D, 0x0D, 0x0D,
77 0x0E, 0x0E, 0x0E, 0x0E, 0x0F, 0x0F, 0x0F, 0x0F,
78 0x10, 0x10, 0x10, 0x10, 0x11, 0x11, 0x11, 0x11,
79 0x12, 0x12, 0x12, 0x12, 0x13, 0x13, 0x13, 0x13,
80 0x14, 0x14, 0x14, 0x14, 0x15, 0x15, 0x15, 0x15,
81 0x16, 0x16, 0x16, 0x16, 0x17, 0x17, 0x17, 0x17,
82 0x18, 0x18, 0x19, 0x19, 0x1A, 0x1A, 0x1B, 0x1B,
83 0x1C, 0x1C, 0x1D, 0x1D, 0x1E, 0x1E, 0x1F, 0x1F,
84 0x20, 0x20, 0x21, 0x21, 0x22, 0x22, 0x23, 0x23,
85 0x24, 0x24, 0x25, 0x25, 0x26, 0x26, 0x27, 0x27,
86 0x28, 0x28, 0x29, 0x29, 0x2A, 0x2A, 0x2B, 0x2B,
87 0x2C, 0x2C, 0x2D, 0x2D, 0x2E, 0x2E, 0x2F, 0x2F,
88 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
89 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
90 };
91
92 unsigned char d_len[256] =
93 {
94 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
95 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
96 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
97 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
98 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
99 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
100 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
101 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
102 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
103 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
104 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
105 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
106 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
107 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
108 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
109 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
110 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
111 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
112 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
113 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
114 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
115 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
116 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
117 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
118 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
119 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
120 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
121 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
122 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
123 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
124 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
125 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
126 };
127
128 static unsigned freq[LZ_T + 1]; /* frequency table */
129
130 /* pointers to parent nodes, except for the elements [LZ_T..LZ_T + N_CHAR - 1]
131 * which are used to get the positions of leaves corresponding to the codes.
132 */
133 static int prnt[LZ_T + N_CHAR];
134
135 /* pointers to child nodes (son[], son[] + 1) */
136 static int son[LZ_T];
137
138 /* for bit/byte reader */
139 unsigned int getbuf=0;
140 unsigned char getlen=0;
141
142 #define ALLOC_BLOCK_SIZE 32768
143
144 static unsigned char *data_in_point,*data_in_max;
145 static unsigned char *data_out,*data_out_point;
146 static int data_out_len,data_out_allocated;
147
148
rawinput(void)149 static int rawinput(void)
150 {
151 if(data_in_point<data_in_max)
152 return(*data_in_point++);
153 return(-1);
154 }
155
rawoutput(int byte)156 static void rawoutput(int byte)
157 {
158 if(data_out_len>=data_out_allocated)
159 {
160 data_out_allocated+=ALLOC_BLOCK_SIZE;
161 if((data_out=realloc(data_out,data_out_allocated))==NULL)
162 fprintf(stderr,"lbrate: out of memory!\n"),exit(1);
163 data_out_point=data_out+data_out_len;
164 }
165
166 *data_out_point++=byte;
167 data_out_len++;
168 checksum+=byte;
169 }
170
getbit(void)171 static int getbit(void)
172 {
173 int i;
174
175 while (getlen <= 8)
176 {
177 if ((lastchar = i = rawinput()) < 0) i = 0;
178 getbuf |= i << (8 - getlen);
179 getlen += 8;
180 }
181 i = getbuf;
182 getbuf <<= 1;
183 getlen--;
184 return ((i&0x8000)?1:0);
185 }
186
getbyte(void)187 static int getbyte(void)
188 {
189 int i;
190
191 while (getlen <= 8)
192 {
193 if ((lastchar = i = rawinput()) < 0) i = 0;
194 getbuf |= i << (8 - getlen);
195 getlen += 8;
196 }
197 i = getbuf;
198 getbuf <<= 8;
199 getlen -= 8;
200 return ((i >> 8)&255);
201 }
202
203
204
205 /* initialization of tree */
206
start_huff(void)207 void start_huff(void)
208 {
209 int i, j;
210
211 for (i = 0; i < N_CHAR; i++)
212 {
213 freq[i] = 1;
214 son[i] = i + LZ_T;
215 prnt[i + LZ_T] = i;
216 }
217 i = 0; j = N_CHAR;
218 while (j <= LZ_R)
219 {
220 freq[j] = freq[i] + freq[i + 1];
221 son[j] = i;
222 prnt[i] = prnt[i + 1] = j;
223 i += 2; j++;
224 }
225 freq[LZ_T] = 0xffff;
226 prnt[LZ_R] = 0;
227 }
228
229
230 /* reconstruction of tree */
231
reconst(void)232 void reconst(void)
233 {
234 int i, j, k;
235 unsigned f;
236
237 /* collect leaf nodes in the first half of the table */
238 /* and replace the freq by (freq + 1) / 2. */
239 j = 0;
240 for (i = 0; i < LZ_T; i++)
241 {
242 if (son[i] >= LZ_T)
243 {
244 freq[j] = (freq[i] + 1) / 2;
245 son[j] = son[i];
246 j++;
247 }
248 }
249
250 /* begin constructing tree by connecting sons */
251 for (i = 0, j = N_CHAR; j < LZ_T; i += 2, j++)
252 {
253 k = i + 1;
254 f = freq[j] = freq[i] + freq[k];
255 for (k = j - 1; f < freq[k]; k--);
256 k++;
257 memmove(freq+k+1,freq+k,(j-k)*sizeof(freq[0]));
258 freq[k]=f;
259 memmove(son+k+1,son+k,(j-k)*sizeof(son[0]));
260 son[k]=i;
261 }
262
263 /* connect prnt */
264 for (i = 0; i < LZ_T; i++)
265 {
266 if ((k = son[i]) >= LZ_T)
267 prnt[k] = i;
268 else
269 prnt[k] = prnt[k + 1] = i;
270 }
271 }
272
273
274 /* increment frequency of given code by one, and update tree */
275
update(int c)276 void update(int c)
277 {
278 int i, j, k, l;
279
280 if (freq[LZ_R] == MAX_FREQ)
281 reconst();
282
283 c = prnt[c + LZ_T];
284 do
285 {
286 k = ++freq[c];
287
288 /* if the order is disturbed, exchange nodes */
289 if (k > freq[l = c + 1])
290 {
291 while (k > freq[++l]);
292 l--;
293 freq[c] = freq[l];
294 freq[l] = k;
295
296 i = son[c];
297 prnt[i] = l;
298 if (i < LZ_T) prnt[i + 1] = l;
299
300 j = son[l];
301 son[l] = i;
302
303 prnt[j] = c;
304 if (j < LZ_T) prnt[j + 1] = c;
305 son[c] = j;
306
307 c = l;
308 }
309 }
310 while ((c = prnt[c]) != 0); /* repeat up to root */
311 }
312
313
decode_char(void)314 int decode_char(void)
315 {
316 unsigned c;
317
318 c = son[LZ_R];
319
320 /* travel from root to leaf,
321 * choosing the smaller child node (son[]) if the read bit is 0,
322 * the bigger (son[]+1) if 1.
323 */
324 while (c < LZ_T)
325 {
326 c += getbit();
327 c = son[c];
328 }
329 c -= LZ_T;
330 update(c);
331 return c;
332 }
333
decode_position(void)334 int decode_position(void)
335 {
336 unsigned i, j, c;
337
338 /* recover upper bits from table */
339 i = getbyte();
340 c = (unsigned)d_code[i] << (5+oldver); /* 5, or 6 for 1.x */
341 j = d_len[i];
342
343 /* read lower bits verbatim */
344 j -= 3-oldver; /* 3, or 2 for 1.x */
345 while (j--)
346 {
347 i = (i << 1) + getbit();
348 }
349 return c | (i & (oldver?0x3f:0x1f)); /* 0x1f, or 0x3f for 1.x */
350 }
351
352
353
354 #define READ_WORD(x) (x)=rawinput(),(x)|=(rawinput()<<8)
355
convert_lzh(unsigned char * data_in,unsigned long in_len,unsigned long * out_len_ptr)356 unsigned char *convert_lzh(unsigned char *data_in,
357 unsigned long in_len,
358 unsigned long *out_len_ptr)
359 {
360 int c,v,checktype,magic,orig_checksum;
361 int i,j,k,r;
362
363 *out_len_ptr=0;
364
365 if((data_out=malloc(data_out_allocated=ALLOC_BLOCK_SIZE))==NULL)
366 fprintf(stderr,"lbrate: out of memory!\n"),exit(1);
367
368 data_in_point=data_in; data_in_max=data_in+in_len;
369 data_out_point=data_out; data_out_len=0;
370
371 READ_WORD(magic);
372 if(magic!=MAGIC_LZH)
373 {
374 free(data_out);
375 return(NULL);
376 }
377
378 /* skip filename */
379 while((c=rawinput())!=0)
380 if(c==-1)
381 {
382 free(data_out);
383 return(NULL);
384 }
385
386 /* four info bytes */
387 rawinput();
388 oldver=((v=rawinput())<0x20?1:0);
389 checktype=rawinput();
390 rawinput();
391
392 getbuf=0;
393 getlen=0;
394 checksum=0;
395
396 start_huff();
397
398 r=LZ_N-LZ_F;
399 memset(text_buf,32,r);
400
401 while((c=decode_char())!=256) /* 256 = EOF */
402 {
403 if(c<256)
404 {
405 rawoutput(c);
406 text_buf[r++] = c;
407 r&=(LZ_N-1);
408 }
409 else
410 {
411 i=(r-decode_position()-1)&(LZ_N-1);
412 j=c-256+THRESHOLD;
413 for (k = 0; k < j; k++)
414 {
415 c=text_buf[(i+k)&(LZ_N-1)];
416 rawoutput(c);
417 text_buf[r++]=c;
418 r&=(LZ_N-1);
419 }
420 }
421 }
422
423 /* lastchar junk is needed because bit(/byte) reader reads a byte
424 * in advance.
425 */
426 orig_checksum=lastchar;
427 orig_checksum+=256*rawinput();
428
429 /* see how the checksum turned out */
430 checksum&=0xffff;
431 if(checktype==0 && checksum!=orig_checksum)
432 {
433 free(data_out);
434 return(NULL);
435 }
436
437 *out_len_ptr=data_out_len;
438 return(data_out);
439 }
440