1 #include <assert.h>
2 #include <stddef.h>
3 
4 #include "halibut.h"
5 #include "huffman.h"
6 #include "lz77.h"
7 #include "lzx.h"
8 
9 #define OUR_LZX_WINSIZE 0x10000
10 #define LZX_MINMATCHLEN 2
11 #define LZX_MAXMATCHLEN 257
12 
lzx_compute_position_slot(int pos,int * footer_bits)13 int lzx_compute_position_slot(int pos, int *footer_bits)
14 {
15     if (pos < 4) {
16         /* The bottom four position slots cover one value each. */
17         *footer_bits = 0;
18         return pos;
19     } else if (pos >= 0x40000) {
20         /* _All_ slots from 36 onwards are 2^17 values wide. */
21         *footer_bits = 17;
22         return 34 + (pos >> 17);
23     } else {
24         /* In between, there are two slots for each power-of-2 size,
25          * so that slots 4,5 have width 2^1, 6,7 have width 2^2, 8,9
26          * have width 2^3, ..., and 34,35 have width 2^16. */
27         int bits = 16;
28         int shifted = pos;
29         if (shifted < (1<<(18-8))) shifted <<= 8, bits -= 8;
30         if (shifted < (1<<(18-4))) shifted <<= 4, bits -= 4;
31         if (shifted < (1<<(18-2))) shifted <<= 2, bits -= 2;
32         if (shifted < (1<<(18-1))) shifted <<= 1, bits -= 1;
33         *footer_bits = bits;
34         return 2 + 2*bits + ((shifted >> 16) & 1);
35     }
36 }
37 
38 typedef enum LZXSymType {
39     LST_MAINTREE, LST_LENTREE, LST_ALIGNOFFTREE,
40     LST_MAINTREE_PRETREE_1, LST_MAINTREE_PRETREE_2, LST_LENTREE_PRETREE,
41     LST_NTREES, dummy_enum_const = LST_NTREES-1,
42     LST_REALIGN_BITSTREAM,
43     LST_RAWBITS_BASE /* add the number of actual bits to this code */
44 } LZXSymType;
45 
46 typedef struct LZXSym {
47     LZXSymType type;
48     int value;
49 } LZXSym;
50 
51 typedef struct LZXBuffer {
52     LZXSym *syms;
53     int nsyms, symsize;
54 } LZXBuffer;
55 
56 typedef struct LZXInfo {
57     LZXBuffer *buf;
58     int r0, r1, r2;                    /* saved match offsets */
59 } LZXInfo;
60 
lzx_buffer_init(LZXBuffer * buf)61 static void lzx_buffer_init(LZXBuffer *buf)
62 {
63     buf->syms = NULL;
64     buf->nsyms = buf->symsize = 0;
65 }
66 
lzx_addsym(LZXBuffer * buf,LZXSymType type,int value)67 static void lzx_addsym(LZXBuffer *buf, LZXSymType type, int value)
68 {
69     if (buf->nsyms >= buf->symsize) {
70         assert(buf->nsyms == buf->symsize);
71         buf->symsize = buf->nsyms * 5 / 4 + 16384;
72         buf->syms = sresize(buf->syms, buf->symsize, LZXSym);
73     }
74     buf->syms[buf->nsyms].type = type;
75     buf->syms[buf->nsyms].value = value;
76     buf->nsyms++;
77 }
78 
lzx_literal(struct LZ77Context * ctx,unsigned char c)79 static void lzx_literal(struct LZ77Context *ctx, unsigned char c)
80 {
81     LZXBuffer *buf = ((LZXInfo *)ctx->userdata)->buf;
82     lzx_addsym(buf, LST_MAINTREE, c);
83 }
84 
lzx_match(struct LZ77Context * ctx,int match_offset,int totallen)85 static void lzx_match(struct LZ77Context *ctx, int match_offset, int totallen)
86 {
87     LZXInfo *info = (LZXInfo *)ctx->userdata;
88     LZXBuffer *buf = info->buf;
89 
90     /*
91      * First, this variant of LZX has a maximum match length of 257
92      * bytes, so if lz77.c reports a longer match than that, we must
93      * break it up.
94      */
95     while (totallen > 0) {
96         int len, length_header, length_footer, len_pos_header;
97         int formatted_offset, position_slot, position_verbatim_bits;
98         int position_verbatim_value, position_aligned_offset;
99 
100         if (totallen <= LZX_MAXMATCHLEN) {
101             /* We can emit all of the (remaining) match length in one go. */
102             len = totallen;
103         } else if (totallen >= LZX_MAXMATCHLEN+LZX_MINMATCHLEN) {
104             /* There's enough match left that we can emit a
105              * maximum-length chunk and still be assured of being able
106              * to emit what's left as a viable followup match. */
107             len = LZX_MAXMATCHLEN;
108         } else {
109             /* The in-between case, where we have _only just_ too long
110              * a match to emit in one go, so that if we emitted a
111              * max-size chunk then what's left would be under the min
112              * size and we couldn't emit it. */
113             len = totallen - LZX_MINMATCHLEN;
114         }
115         totallen -= len;
116 
117         /*
118          * Now we're outputting a single LZX-level match of length
119          * 'len'. Break the length up into a 'header' (included in the
120          * starting LST_MAINTREE symbol) and a 'footer' (tacked on
121          * afterwards using LST_LENTREE).
122          */
123         if (len < 9) {
124             length_header = len - 2;   /* in the range {0,...,6} */
125             length_footer = -1;        /* not transmitted at all */
126         } else {
127             length_header = 7;         /* header indicates more to come */
128             length_footer = len - 9;   /* in the range {0,...,248} */
129         }
130 
131         /*
132          * Meanwhile, the raw backward distance is first transformed
133          * into the 'formatted offset', by either adding 2 or using
134          * one of the low-numbered special codes meaning to use one of
135          * the three most recent match distances.
136          */
137         if (match_offset == info->r0) {
138             /* Reuse the most recent distance */
139             formatted_offset = 0;
140         } else if (match_offset == info->r1) {
141             /* Reuse the 2nd most recent, and swap it into first place */
142             int tmp = info->r1;
143             info->r1 = info->r0;
144             info->r0 = tmp;
145             formatted_offset = 1;
146         } else if (match_offset == info->r2) {
147             /* Reuse the 3rd most recent and swap it to first place.
148              * This is intentionally not quite a move-to-front
149              * shuffle, which would permute (r0,r1,r2)->(r2,r0,r1); MS
150              * decided that just swapping r0 with r2 was a better
151              * performance tradeoff. */
152             int tmp = info->r2;
153             info->r2 = info->r0;
154             info->r0 = tmp;
155             formatted_offset = 2;
156         } else {
157             /* This offset matches none of the three saved values.
158              * Put it in r0, and move up the rest of the list. */
159             info->r2 = info->r1;
160             info->r1 = info->r0;
161             info->r0 = match_offset;
162             formatted_offset = match_offset + 2;
163         }
164 
165         /*
166          * The formatted offset now breaks up into a 'position slot'
167          * (encoded as part of the starting symbol) and an offset from
168          * the smallest position value covered by that slot. The
169          * system of slots is designed so that every slot's width is a
170          * power of two and its base value is a multiple of its width,
171          * so we can get the offset just by taking the bottom n bits
172          * of the full formatted offset, once the choice of position
173          * slot tells us what n is.
174          */
175         position_slot = lzx_compute_position_slot(
176             formatted_offset, &position_verbatim_bits);
177         position_verbatim_value = formatted_offset &
178             ((1 << position_verbatim_bits)-1);
179 
180         /*
181          * If there are three or more additional bits, then the last 3
182          * of them are (potentially, depending on block type which we
183          * haven't decided about yet) transmitted using the aligned
184          * offset tree. The rest are sent verbatim.
185          */
186         if (position_verbatim_bits >= 3) {
187             position_aligned_offset = position_verbatim_value & 7;
188             position_verbatim_bits -= 3;
189             position_verbatim_value >>= 3;
190         } else {
191             position_aligned_offset = -1; /* not transmitted */
192         }
193 
194         /*
195          * Combine the length header and position slot into the full
196          * set of information encoded by the starting symbol.
197          */
198         len_pos_header = position_slot * 8 + length_header;
199 
200         /*
201          * And now we've finished figuring out _what_ to output, so
202          * output it.
203          */
204         lzx_addsym(buf, LST_MAINTREE, 256 + len_pos_header);
205         if (length_footer >= 0)
206             lzx_addsym(buf, LST_LENTREE, length_footer);
207         if (position_verbatim_bits > 0)
208             lzx_addsym(buf, LST_RAWBITS_BASE + position_verbatim_bits,
209                        position_verbatim_value);
210         if (position_aligned_offset >= 0)
211             lzx_addsym(buf, LST_ALIGNOFFTREE, position_aligned_offset);
212     }
213 }
214 
lzx_lz77_inner(LZXInfo * info,const unsigned char * data,int len)215 void lzx_lz77_inner(LZXInfo *info, const unsigned char *data, int len)
216 {
217     struct LZ77Context lz77c;
218     lz77_init(&lz77c, OUR_LZX_WINSIZE);
219     lz77c.literal = lzx_literal;
220     lz77c.match = lzx_match;
221     lz77c.userdata = info;
222     lz77_compress(&lz77c, data, len, TRUE);
223     lz77_cleanup(&lz77c);
224 }
225 
lzx_lz77(LZXBuffer * buf,const unsigned char * data,int totallen,int realign_interval)226 void lzx_lz77(LZXBuffer *buf, const unsigned char *data,
227               int totallen, int realign_interval)
228 {
229     LZXInfo info;
230 
231     info.r0 = info.r1 = info.r2 = 1;
232     info.buf = buf;
233 
234     while (totallen > 0) {
235         int thislen =
236             totallen < realign_interval ? totallen : realign_interval;
237         lzx_lz77_inner(&info, data, thislen);
238         data += thislen;
239         totallen -= thislen;
240         if (totallen > 0)
241             lzx_addsym(info.buf, LST_REALIGN_BITSTREAM, 0);
242     }
243 }
244 
245 typedef struct LZXHuf {
246     int nsyms;
247     unsigned char *lengths;
248     unsigned char *oldlengths; /* for pretree encoding to diff against */
249     int *codes;
250 } LZXHuf;
251 
252 typedef struct LZXHufs {
253     LZXHuf hufs[LST_NTREES];
254 } LZXHufs;
255 
lzx_build_tree(LZXSym * syms,int nsyms,LZXSymType which,LZXHufs * hufs)256 void lzx_build_tree(LZXSym *syms, int nsyms, LZXSymType which, LZXHufs *hufs)
257 {
258     int i, max_code_len;
259     int *freqs;
260     LZXHuf *huf = &hufs->hufs[which];
261 
262     switch (which) {
263       default:
264         assert(0 && "Bad lzx_build_tree tree type");
265       case LST_MAINTREE:
266         /*
267          * Trees encoded via a pretree have a max code length of 16,
268          * because that's the limit of what the pretree alphabet can
269          * represent.
270          */
271         max_code_len = 16;
272 
273         /*
274          * Number of symbols in the main tree is 256 literals, plus 8n
275          * match header symbols where n is the largest position slot
276          * number that might be needed to address any offset in the
277          * window.
278          */
279         {
280             int ignored, last_slot;
281             last_slot = lzx_compute_position_slot(OUR_LZX_WINSIZE-1, &ignored);
282             huf->nsyms = 8 * (last_slot+1) + 256;
283         }
284         break;
285       case LST_LENTREE:
286         max_code_len = 16;             /* pretree again */
287         huf->nsyms = 249;              /* a fixed value in the spec */
288         break;
289       case LST_MAINTREE_PRETREE_1:
290       case LST_MAINTREE_PRETREE_2:
291       case LST_LENTREE_PRETREE:
292         /* Pretree code lengths are stored in 4-bit fields, so they
293          * can't go above 15. There are a standard 20 symbols in the
294          * pretree alphabet. */
295         max_code_len = 15;
296         huf->nsyms = 20;
297         break;
298       case LST_ALIGNOFFTREE:
299         /* The aligned-offset tree has 8 elements stored in 3-bit
300          * fields. */
301         max_code_len = 7;
302         huf->nsyms = 8;
303         break;
304     }
305 
306     freqs = snewn(huf->nsyms, int);
307 
308     /*
309      * Count up the symbol frequencies.
310      */
311     for (i = 0; i < huf->nsyms; i++)
312         freqs[i] = 0;
313     for (i = 0; i < nsyms; i++)
314         if (syms[i].type == which)
315             freqs[syms[i].value]++;
316 
317     /*
318      * Build the Huffman table.
319      */
320     huf->lengths = snewn(huf->nsyms, unsigned char);
321     build_huffman_tree(freqs, huf->lengths, huf->nsyms, max_code_len);
322     huf->codes = snewn(huf->nsyms, int);
323     compute_huffman_codes(huf->lengths, huf->codes, huf->nsyms);
324 
325     /*
326      * Cleanup.
327      */
328     sfree(freqs);
329 }
330 
lzx_tree_with_pretree(LZXHuf * huf,int symoffset,int symlimit,LZXBuffer * buf,LZXSymType pretree_symtype)331 void lzx_tree_with_pretree(LZXHuf *huf, int symoffset, int symlimit,
332                            LZXBuffer *buf, LZXSymType pretree_symtype)
333 {
334     int i, r;
335 
336     if (!huf->oldlengths) {
337         huf->oldlengths = snewn(huf->nsyms, unsigned char);
338         for (i = 0; i < huf->nsyms; i++)
339             huf->oldlengths[i] = 0;
340     }
341 
342     for (i = symoffset; i < symlimit; i++) {
343         for (r = 1; i+r < symlimit; r++)
344             if (huf->lengths[i+r] != huf->lengths[i])
345                 break;
346 
347         if (r >= 4) {
348             /*
349              * We have at least one run of the same code length long
350              * enough to use one of the run-length encoding symbols.
351              */
352             while (r >= 4) {
353                 int thisrun;
354                 if (huf->lengths[i] == 0) {
355                     thisrun = r > 20+31 ? 20+31 : r;
356                     if (thisrun >= 20) {
357                         lzx_addsym(buf, pretree_symtype, 18);
358                         lzx_addsym(buf, LST_RAWBITS_BASE + 5, thisrun - 20);
359                     } else {
360                         lzx_addsym(buf, pretree_symtype, 17);
361                         lzx_addsym(buf, LST_RAWBITS_BASE + 4, thisrun - 4);
362                     }
363                 } else {
364                     thisrun = r > 5 ? 5 : r;
365                     lzx_addsym(buf, pretree_symtype, 19);
366                     lzx_addsym(buf, LST_RAWBITS_BASE + 1, thisrun - 4);
367                     lzx_addsym(buf, pretree_symtype,
368                                (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
369                 }
370                 r -= thisrun;
371                 i += thisrun;
372             }
373 
374             if (r == 0) {
375                 i--;        /* compensate for normal loop increment */
376                 continue;
377             }
378         }
379 
380         /*
381          * Otherwise, emit a normal non-encoded symbol.
382          */
383         lzx_addsym(buf, pretree_symtype,
384                    (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
385     }
386 }
387 
lzx_tree_simple(LZXHuf * huf,LZXBuffer * buf,int bits)388 void lzx_tree_simple(LZXHuf *huf, LZXBuffer *buf, int bits)
389 {
390     int i;
391     for (i = 0; i < huf->nsyms; i++)
392         lzx_addsym(buf, LST_RAWBITS_BASE + bits, huf->lengths[i]);
393 }
394 
395 typedef struct LZXBitstream {
396     struct LZXEncodedFile *ef;
397     size_t data_size, resets_size;
398     unsigned short bitbuffer;
399     int nbits;
400     int first_block;
401 } LZXBitstream;
402 
lzx_write_bits(LZXBitstream * bs,int value,int bits)403 void lzx_write_bits(LZXBitstream *bs, int value, int bits)
404 {
405     while (bs->nbits + bits >= 16) {
406         int thisbits = 16 - bs->nbits;
407         bs->bitbuffer = (bs->bitbuffer << thisbits) |
408             (value >> (bits-thisbits));
409 
410         if (bs->ef->data_len+2 > bs->data_size) {
411             bs->data_size = bs->ef->data_len * 5 / 4 + 65536;
412             bs->ef->data = sresize(bs->ef->data, bs->data_size,
413                                    unsigned char);
414         }
415         bs->ef->data[bs->ef->data_len++] = bs->bitbuffer;
416         bs->ef->data[bs->ef->data_len++] = bs->bitbuffer >> 8;
417 
418         bs->bitbuffer = 0;
419         bs->nbits = 0;
420 
421         bits -= thisbits;
422         value &= (1<<bits) - 1;
423     }
424 
425     bs->bitbuffer = (bs->bitbuffer << bits) | value;
426     bs->nbits += bits;
427 }
428 
lzx_realign(LZXBitstream * bs)429 void lzx_realign(LZXBitstream *bs)
430 {
431     lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
432 }
433 
lzx_write_reset_table_entry(LZXBitstream * bs)434 void lzx_write_reset_table_entry(LZXBitstream *bs)
435 {
436     lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
437 
438     if (bs->ef->n_resets >= bs->resets_size) {
439         bs->resets_size = bs->ef->n_resets * 5 / 4 + 256;
440         bs->ef->reset_byte_offsets = sresize(bs->ef->reset_byte_offsets,
441                                              bs->resets_size, size_t);
442     }
443     bs->ef->reset_byte_offsets[bs->ef->n_resets++] = bs->ef->data_len;
444 }
445 
lzx_huf_encode(LZXSym * syms,int nsyms,LZXHufs * hufs,LZXBitstream * bs)446 void lzx_huf_encode(LZXSym *syms, int nsyms, LZXHufs *hufs, LZXBitstream *bs)
447 {
448     int i;
449     for (i = 0; i < nsyms; i++) {
450         LZXSymType type = syms[i].type;
451         int value = syms[i].value;
452 
453         if (type >= LST_RAWBITS_BASE) {
454             lzx_write_bits(bs, value, type - LST_RAWBITS_BASE);
455         } else if (type == LST_REALIGN_BITSTREAM) {
456             /* Realign the bitstream to a 16-bit boundary, and write a
457              * reset table entry giving the resulting byte offset. */
458             lzx_realign(bs);
459             lzx_write_reset_table_entry(bs);
460         } else {
461             lzx_write_bits(bs, hufs->hufs[type].codes[value],
462                            hufs->hufs[type].lengths[value]);
463         }
464     }
465 }
466 
lzx_encode_block(LZXSym * syms,int nsyms,int blocksize,LZXHufs * hufs,LZXBitstream * bs)467 void lzx_encode_block(LZXSym *syms, int nsyms, int blocksize,
468                       LZXHufs *hufs, LZXBitstream *bs)
469 {
470     LZXBuffer header[8];
471     int i, blocktype;
472 
473     for (i = 0; i < (int)lenof(header); i++)
474         lzx_buffer_init(&header[i]);
475 
476     /*
477      * Build the Huffman trees for the main alphabets used in the
478      * block.
479      */
480     lzx_build_tree(syms, nsyms, LST_MAINTREE, hufs);
481     lzx_build_tree(syms, nsyms, LST_LENTREE, hufs);
482     lzx_build_tree(syms, nsyms, LST_ALIGNOFFTREE, hufs);
483 
484     /*
485      * Encode each of those as a sequence of pretree symbols.
486      */
487     lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 0, 256,
488                           &header[3], LST_MAINTREE_PRETREE_1);
489     lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 256,
490                           hufs->hufs[LST_MAINTREE].nsyms,
491                           &header[5], LST_MAINTREE_PRETREE_2);
492     lzx_tree_with_pretree(&hufs->hufs[LST_LENTREE], 0,
493                           hufs->hufs[LST_LENTREE].nsyms,
494                           &header[7], LST_LENTREE_PRETREE);
495 
496     /*
497      * Build the pretree for each of those encodings.
498      */
499     lzx_build_tree(header[3].syms, header[3].nsyms,
500                    LST_MAINTREE_PRETREE_1, hufs);
501     lzx_build_tree(header[5].syms, header[5].nsyms,
502                    LST_MAINTREE_PRETREE_2, hufs);
503     lzx_build_tree(header[7].syms, header[7].nsyms,
504                    LST_LENTREE_PRETREE, hufs);
505 
506     /*
507      * Decide whether we're keeping the aligned offset tree or not.
508      */
509     {
510         int with, without;
511 
512         with = 3*8;                    /* cost of transmitting tree */
513         without = 0;                   /* or not */
514 
515         for (i = 0; i < nsyms; i++)
516             if (syms[i].type == LST_ALIGNOFFTREE) {
517                 with += hufs->hufs[LST_ALIGNOFFTREE].lengths[syms[i].value];
518                 without += 3;
519             }
520 
521         if (with < without) {
522             /* Yes, it's a win to use the aligned offset tree. */
523             blocktype = 2;
524         } else {
525             /* No, we do better by throwing it away. */
526             blocktype = 1;
527 
528             /* Easiest way to simulate that is to pretend we're still
529              * using an aligned offset tree in the encoding, but to
530              * chuck away our code lengths and replace them with the
531              * fixed-length trivial tree. */
532             for (i = 0; i < 8; i++) {
533                 hufs->hufs[LST_ALIGNOFFTREE].lengths[i] = 3;
534                 hufs->hufs[LST_ALIGNOFFTREE].codes[i] = i;
535             }
536         }
537     }
538 
539     /*
540      * Encode all the simply encoded trees (the three pretrees and the
541      * aligned offset tree).
542      */
543     lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_1], &header[2], 4);
544     lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_2], &header[4], 4);
545     lzx_tree_simple(&hufs->hufs[LST_LENTREE_PRETREE], &header[6], 4);
546     if (blocktype == 2)
547         lzx_tree_simple(&hufs->hufs[LST_ALIGNOFFTREE], &header[1], 3);
548 
549     /*
550      * Top-level block header.
551      */
552     if (bs->first_block) {
553         /*
554          * Also include the whole-file header which says whether E8
555          * call translation is on. We never turn it on, because we
556          * don't support it (since in this use case it doesn't seem
557          * likely to be particularly useful anyway).
558          *
559          * It looks like a layer violation to put the output of this
560          * whole-file header inside the per-block function like this,
561          * but in fact it has to be done here because the first reset
562          * table entry really is supposed to point to the _start_ of
563          * the whole-file header.
564          */
565         lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0);
566         bs->first_block = FALSE;
567     }
568     lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype);
569     lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize);
570 
571     /*
572      * Ensure the bit stream starts off aligned, and output an initial
573      * reset-table entry.
574      */
575     lzx_realign(bs);
576     lzx_write_reset_table_entry(bs);
577 
578     /*
579      * Write out all of our symbol sequences in order: all of those
580      * assorted header fragments, then the main LZ77 token sequence.
581      */
582     for (i = 0; i < (int)lenof(header); i++)
583         lzx_huf_encode(header[i].syms, header[i].nsyms, hufs, bs);
584     lzx_huf_encode(syms, nsyms, hufs, bs);
585 
586     /*
587      * Clean up.
588      */
589     for (i = 0; i < (int)lenof(header); i++)
590         sfree(header[i].syms);
591     for (i = 0; i < (int)lenof(hufs->hufs); i++) {
592         sfree(hufs->hufs[i].codes);
593         sfree(hufs->hufs[i].lengths);
594     }
595 }
596 
lzx(const void * vdata,int totallen,int realign_interval,int reset_interval)597 struct LZXEncodedFile *lzx(const void *vdata, int totallen,
598                            int realign_interval, int reset_interval)
599 {
600     const unsigned char *data = (const unsigned char *)vdata;
601     LZXBitstream bs;
602     LZXHufs hufs;
603     int i;
604 
605     bs.ef = snew(struct LZXEncodedFile);
606     bs.ef->data = NULL;
607     bs.ef->reset_byte_offsets = NULL;
608     bs.ef->data_len = bs.data_size = 0;
609     bs.ef->n_resets = bs.resets_size = 0;
610     bs.bitbuffer = 0;
611     bs.nbits = 0;
612 
613     for (i = 0; i < (int)lenof(hufs.hufs); i++)
614         hufs.hufs[i].oldlengths = NULL;
615 
616     while (totallen > 0) {
617         int thislen =
618             totallen < reset_interval ? totallen : reset_interval;
619         LZXBuffer buf;
620 
621         lzx_buffer_init(&buf);
622 
623         lzx_lz77(&buf, data, thislen, realign_interval);
624         data += thislen;
625         totallen -= thislen;
626 
627         /*
628          * Block boundaries are chosen completely trivially: since we
629          * have to terminate a block every time we reach the (fairly
630          * short) reset interval in any case, it doesn't hurt us much
631          * to just fix the assumption that every (reset_interval)
632          * bytes of the input turn into exactly one block, i.e. the
633          * whole of buf.syms that we just constructed is output in one
634          * go. We _could_ try improving on this by clever
635          * block-boundary heuristics, but I don't really think it's
636          * worth it.
637          */
638         bs.first_block = TRUE; /* reset every time we reset the LZ state */
639         lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs);
640 
641         sfree(buf.syms);
642     }
643 
644     for (i = 0; i < (int)lenof(hufs.hufs); i++)
645         sfree(hufs.hufs[i].oldlengths);
646 
647     /* Realign to a 16-bit boundary, i.e. flush out any last few
648      * unwritten bits. */
649     lzx_realign(&bs);
650 
651     return bs.ef;
652 }
653 
654 #ifdef LZX_TEST
655 /*
656 gcc -g -O0 -DLZX_TEST -o lzxtest -Icharset lzx.c lz77.c huffman.c malloc.c
657 */
658 #include <err.h>
main(int argc,char ** argv)659 int main(int argc, char **argv)
660 {
661     FILE *fp;
662     long insize;
663     unsigned char *inbuf;
664     struct LZXEncodedFile *ef;
665 
666     if (argc != 3)
667         errx(1, "expected infile and outfile arguments");
668 
669     fp = fopen(argv[1], "rb");
670     if (!fp)
671         err(1, "%s: open", argv[1]);
672     fseek(fp, 0, SEEK_END);
673     insize = ftell(fp);
674     rewind(fp);
675     inbuf = snewn(insize, unsigned char);
676     fread(inbuf, 1, insize, fp);
677     fclose(fp);
678 
679     ef = lzx(inbuf, insize, 0x8000, 0x10000);
680 
681     fp = fopen(argv[2], "wb");
682     if (!fp)
683         err(1, "%s: open", argv[2]);
684     fwrite(ef->data, 1, ef->data_len, fp);
685     fclose(fp);
686 
687     sfree(ef->data);
688     sfree(ef->reset_byte_offsets);
689     sfree(ef);
690     sfree(inbuf);
691 
692     return 0;
693 }
694 
ustrdup(wchar_t const * s)695 wchar_t *ustrdup(wchar_t const *s) { assert(0 && "should be unused"); }
fatalerr_nomemory(void)696 void fatalerr_nomemory(void) { errx(1, "out of memory"); }
697 #endif
698