1 /* ------------------------------------------------------------------------ */
2 /* LHa for UNIX */
3 /* slide.c -- sliding dictionary with percolating update */
4 /* */
5 /* Modified Nobutaka Watazaki */
6 /* */
7 /* Ver. 1.14d Exchanging a search algorithm 1997.01.11 T.Okamoto */
8 /* ------------------------------------------------------------------------ */
9
10 #if 0
11 #define DEBUG 1
12 #endif
13
14 #include "lha.h"
15
16 #ifdef DEBUG
17 FILE *fout = NULL;
18 static int noslide = 1;
19 #endif
20
21 /* variables for hash */
22 struct hash {
23 unsigned int pos;
24 int too_flag; /* if 1, matching candidate is too many */
25 } *hash;
26 static unsigned int *prev; /* previous posiion associated with hash */
27
28 /* hash function: it represents 3 letters from `pos' on `text' */
29 #define INIT_HASH(pos) \
30 ((( (text[(pos)] << 5) \
31 ^ text[(pos) + 1] ) << 5) \
32 ^ text[(pos) + 2] ) & (unsigned)(HSHSIZ - 1);
33 #define NEXT_HASH(hash,pos) \
34 (((hash) << 5) \
35 ^ text[(pos) + 2] ) & (unsigned)(HSHSIZ - 1);
36
37 static struct encode_option encode_define[2] = {
38 #if defined(__STDC__) || defined(AIX)
39 /* lh1 */
40 {(void (*) ()) output_dyn,
41 (void (*) ()) encode_start_fix,
42 (void (*) ()) encode_end_dyn},
43 /* lh4, 5, 6, 7 */
44 {(void (*) ()) output_st1,
45 (void (*) ()) encode_start_st1,
46 (void (*) ()) encode_end_st1}
47 #else
48 /* lh1 */
49 {(int (*) ()) output_dyn,
50 (int (*) ()) encode_start_fix,
51 (int (*) ()) encode_end_dyn},
52 /* lh4, 5, 6, 7 */
53 {(int (*) ()) output_st1,
54 (int (*) ()) encode_start_st1,
55 (int (*) ()) encode_end_st1}
56 #endif
57 };
58
59 static struct decode_option decode_define[] = {
60 /* lh1 */
61 {decode_c_dyn, decode_p_st0, decode_start_fix},
62 /* lh2 */
63 {decode_c_dyn, decode_p_dyn, decode_start_dyn},
64 /* lh3 */
65 {decode_c_st0, decode_p_st0, decode_start_st0},
66 /* lh4 */
67 {decode_c_st1, decode_p_st1, decode_start_st1},
68 /* lh5 */
69 {decode_c_st1, decode_p_st1, decode_start_st1},
70 /* lh6 */
71 {decode_c_st1, decode_p_st1, decode_start_st1},
72 /* lh7 */
73 {decode_c_st1, decode_p_st1, decode_start_st1},
74 /* lzs */
75 {decode_c_lzs, decode_p_lzs, decode_start_lzs},
76 /* lz5 */
77 {decode_c_lz5, decode_p_lz5, decode_start_lz5}
78 };
79
80 static struct encode_option encode_set;
81 static struct decode_option decode_set;
82
83 #define TXTSIZ (MAX_DICSIZ * 2L + MAXMATCH)
84 #define HSHSIZ (((unsigned long)1) <<15)
85 #define NIL 0
86 #define LIMIT 0x100 /* limit of hash chain */
87
88 static unsigned int txtsiz;
89 static unsigned long dicsiz;
90 static unsigned int remainder;
91
92 struct matchdata {
93 int len;
94 unsigned int off;
95 };
96
97 int
encode_alloc(method)98 encode_alloc(method)
99 int method;
100 {
101 switch (method) {
102 case LZHUFF1_METHOD_NUM:
103 encode_set = encode_define[0];
104 maxmatch = 60;
105 dicbit = LZHUFF1_DICBIT; /* 12 bits Changed N.Watazaki */
106 break;
107 case LZHUFF5_METHOD_NUM:
108 encode_set = encode_define[1];
109 maxmatch = MAXMATCH;
110 dicbit = LZHUFF5_DICBIT; /* 13 bits */
111 break;
112 case LZHUFF6_METHOD_NUM:
113 encode_set = encode_define[1];
114 maxmatch = MAXMATCH;
115 dicbit = LZHUFF6_DICBIT; /* 15 bits */
116 break;
117 case LZHUFF7_METHOD_NUM:
118 encode_set = encode_define[1];
119 maxmatch = MAXMATCH;
120 dicbit = LZHUFF7_DICBIT; /* 16 bits */
121 break;
122 default:
123 error("unknown method %d", method);
124 exit(1);
125 }
126
127 dicsiz = (((unsigned long)1) << dicbit);
128 txtsiz = dicsiz*2+maxmatch;
129
130 if (hash) return method;
131
132 alloc_buf();
133
134 hash = (struct hash*)xmalloc(HSHSIZ * sizeof(struct hash));
135 prev = (unsigned int*)xmalloc(MAX_DICSIZ * sizeof(unsigned int));
136 text = (unsigned char*)xmalloc(TXTSIZ);
137
138 return method;
139 }
140
141 static void
init_slide()142 init_slide()
143 {
144 unsigned int i;
145
146 for (i = 0; i < HSHSIZ; i++) {
147 hash[i].pos = NIL;
148 hash[i].too_flag = 0;
149 }
150 }
151
152 /* update dictionary */
153 static void
update_dict(pos,crc)154 update_dict(pos, crc)
155 unsigned int *pos;
156 unsigned int *crc;
157 {
158 unsigned int i, j;
159 long n;
160
161 memmove(&text[0], &text[dicsiz], txtsiz - dicsiz);
162
163 n = fread_crc(crc, &text[txtsiz - dicsiz], dicsiz, infile);
164
165 remainder += n;
166
167 *pos -= dicsiz;
168 for (i = 0; i < HSHSIZ; i++) {
169 j = hash[i].pos;
170 hash[i].pos = (j > dicsiz) ? j - dicsiz : NIL;
171 hash[i].too_flag = 0;
172 }
173 for (i = 0; i < dicsiz; i++) {
174 j = prev[i];
175 prev[i] = (j > dicsiz) ? j - dicsiz : NIL;
176 }
177 }
178
179 /* associate position with token */
180 static void
insert_hash(token,pos)181 insert_hash(token, pos)
182 unsigned int token;
183 unsigned int pos;
184 {
185 prev[pos & (dicsiz - 1)] = hash[token].pos; /* chain the previous pos. */
186 hash[token].pos = pos;
187 }
188
189 static void
search_dict_1(token,pos,off,max,m)190 search_dict_1(token, pos, off, max, m)
191 unsigned int token;
192 unsigned int pos;
193 unsigned int off;
194 unsigned int max; /* max. length of matching string */
195 struct matchdata *m;
196 {
197 unsigned int chain = 0;
198 unsigned int scan_pos = hash[token].pos;
199 int scan_beg = scan_pos - off;
200 int scan_end = pos - dicsiz;
201 unsigned int len;
202
203 while (scan_beg > scan_end) {
204 chain++;
205
206 if (text[scan_beg + m->len] == text[pos + m->len]) {
207 {
208 /* collate token */
209 unsigned char *a = &text[scan_beg];
210 unsigned char *b = &text[pos];
211
212 for (len = 0; len < max && *a++ == *b++; len++);
213 }
214
215 if (len > m->len) {
216 m->off = pos - scan_beg;
217 m->len = len;
218 if (m->len == max)
219 break;
220
221 #ifdef DEBUG
222 if (noslide) {
223 if (pos - m->off < dicsiz) {
224 printf("matchpos=%u scan_pos=%u dicsiz=%u\n",
225 pos - m->off, scan_pos, dicsiz);
226 }
227 }
228 #endif
229 }
230 }
231 scan_pos = prev[scan_pos & (dicsiz - 1)];
232 scan_beg = scan_pos - off;
233 }
234
235 if (chain >= LIMIT)
236 hash[token].too_flag = 1;
237 }
238
239 /* search the longest token matching to current token */
240 static void
search_dict(token,pos,min,m)241 search_dict(token, pos, min, m)
242 unsigned int token; /* search token */
243 unsigned int pos; /* position of token */
244 int min; /* min. length of matching string */
245 struct matchdata *m;
246 {
247 unsigned int off, tok, max;
248
249 if (min < THRESHOLD - 1) min = THRESHOLD - 1;
250
251 max = maxmatch;
252 m->off = 0;
253 m->len = min;
254
255 off = 0;
256 for (tok = token; hash[tok].too_flag && off < maxmatch - THRESHOLD; ) {
257 /* If matching position is too many, The search key is
258 changed into following token from `off' (for speed). */
259 ++off;
260 tok = NEXT_HASH(tok, pos+off);
261 }
262 if (off == maxmatch - THRESHOLD) {
263 off = 0;
264 tok = token;
265 }
266
267 search_dict_1(tok, pos, off, max, m);
268
269 if (off > 0 && m->len < off + 3)
270 /* re-search */
271 search_dict_1(token, pos, 0, off+2, m);
272
273 if (m->len > remainder) m->len = remainder;
274 }
275
276 /* slide dictionary */
277 static void
next_token(token,pos,crc)278 next_token(token, pos, crc)
279 unsigned int *token;
280 unsigned int *pos;
281 unsigned int *crc;
282 {
283 remainder--;
284 if (++*pos >= txtsiz - maxmatch) {
285 update_dict(pos, crc);
286 #ifdef DEBUG
287 noslide = 0;
288 #endif
289 }
290 *token = NEXT_HASH(*token, *pos);
291 }
292
293 unsigned int
encode(interface)294 encode(interface)
295 struct interfacing *interface;
296 {
297 unsigned int token, pos, crc;
298 size_t count;
299 struct matchdata match, last;
300
301 #ifdef DEBUG
302 if (!fout)
303 fout = xfopen("en", "wt");
304 fprintf(fout, "[filename: %s]\n", reading_filename);
305 #endif
306 infile = interface->infile;
307 outfile = interface->outfile;
308 origsize = interface->original;
309 compsize = count = 0L;
310 unpackable = 0;
311
312 INITIALIZE_CRC(crc);
313
314 init_slide();
315
316 encode_set.encode_start();
317 memset(text, ' ', TXTSIZ);
318
319 remainder = fread_crc(&crc, &text[dicsiz], txtsiz-dicsiz, infile);
320
321 match.len = THRESHOLD - 1;
322 match.off = 0;
323 if (match.len > remainder) match.len = remainder;
324
325 pos = dicsiz;
326 token = INIT_HASH(pos);
327 insert_hash(token, pos); /* associate token and pos */
328
329 while (remainder > 0 && ! unpackable) {
330 last = match;
331
332 next_token(&token, &pos, &crc);
333 search_dict(token, pos, last.len-1, &match);
334 insert_hash(token, pos);
335
336 if (match.len > last.len || last.len < THRESHOLD) {
337 /* output a letter */
338 encode_set.output(text[pos - 1], 0);
339 #ifdef DEBUG
340 fprintf(fout, "%u C %02X\n", count, text[pos-1]);
341 #endif
342 count++;
343 } else {
344 /* output length and offset */
345 encode_set.output(last.len + (256 - THRESHOLD),
346 (last.off-1) & (dicsiz-1) );
347
348 #ifdef DEBUG
349 {
350 int i;
351 unsigned char *ptr;
352 unsigned int offset = (last.off & (dicsiz-1));
353
354 fprintf(fout, "%u M <%u %u> ",
355 count, last.len, count - offset);
356
357 ptr = &text[pos-1 - offset];
358 for (i=0; i < last.len; i++)
359 fprintf(fout, "%02X ", ptr[i]);
360 fprintf(fout, "\n");
361 }
362 #endif
363 count += last.len;
364
365 --last.len;
366 while (--last.len > 0) {
367 next_token(&token, &pos, &crc);
368 insert_hash(token, pos);
369 }
370 next_token(&token, &pos, &crc);
371 search_dict(token, pos, THRESHOLD - 1, &match);
372 insert_hash(token, pos);
373 }
374 }
375 encode_set.encode_end();
376
377 interface->packed = compsize;
378 interface->original = count;
379
380 return crc;
381 }
382
383 unsigned int
decode(interface)384 decode(interface)
385 struct interfacing *interface;
386 {
387 unsigned int i, c;
388 unsigned int dicsiz1, adjust;
389 unsigned char *dtext;
390 unsigned int crc;
391
392 #ifdef DEBUG
393 if (!fout)
394 fout = xfopen("de", "wt");
395 fprintf(fout, "[filename: %s]\n", writing_filename);
396 #endif
397
398 infile = interface->infile;
399 outfile = interface->outfile;
400 dicbit = interface->dicbit;
401 origsize = interface->original;
402 compsize = interface->packed;
403 decode_set = decode_define[interface->method - 1];
404
405 INITIALIZE_CRC(crc);
406 dicsiz = 1L << dicbit;
407 dtext = (unsigned char *)xmalloc(dicsiz);
408
409 if (extract_broken_archive)
410
411 /* LHa for UNIX (autoconf) had a fatal bug since version
412 1.14i-ac20030713 (slide.c revision 1.20).
413
414 This bug is possible to make a broken archive, proper LHA
415 cannot extract it (probably it report CRC error).
416
417 If the option "--extract-broken-archive" specified, extract
418 the broken archive made by old LHa for UNIX. */
419 memset(dtext, 0, dicsiz);
420 else
421 memset(dtext, ' ', dicsiz);
422 decode_set.decode_start();
423 dicsiz1 = dicsiz - 1;
424 adjust = 256 - THRESHOLD;
425 if (interface->method == LARC_METHOD_NUM)
426 adjust = 256 - 2;
427
428 decode_count = 0;
429 loc = 0;
430 while (decode_count < origsize) {
431 c = decode_set.decode_c();
432 if (c < 256) {
433 #ifdef DEBUG
434 fprintf(fout, "%u C %02X\n", decode_count, c);
435 #endif
436 dtext[loc++] = c;
437 if (loc == dicsiz) {
438 fwrite_crc(&crc, dtext, dicsiz, outfile);
439 loc = 0;
440 }
441 decode_count++;
442 }
443 else {
444 struct matchdata match;
445 unsigned int matchpos;
446
447 match.len = c - adjust;
448 match.off = decode_set.decode_p() + 1;
449 matchpos = (loc - match.off) & dicsiz1;
450 #ifdef DEBUG
451 fprintf(fout, "%u M <%u %u> ",
452 decode_count, match.len, decode_count-match.off);
453 #endif
454 decode_count += match.len;
455 for (i = 0; i < match.len; i++) {
456 c = dtext[(matchpos + i) & dicsiz1];
457 #ifdef DEBUG
458 fprintf(fout, "%02X ", c & 0xff);
459 #endif
460 dtext[loc++] = c;
461 if (loc == dicsiz) {
462 fwrite_crc(&crc, dtext, dicsiz, outfile);
463 loc = 0;
464 }
465 }
466 #ifdef DEBUG
467 fprintf(fout, "\n");
468 #endif
469 }
470 }
471 if (loc != 0) {
472 fwrite_crc(&crc, dtext, loc, outfile);
473 }
474
475 free(dtext);
476
477 /* usually read size is interface->packed */
478 interface->read_size = interface->packed - compsize;
479
480 return crc;
481 }
482