1 
2 /*-----------------------------------------------------------*/
3 /*--- A block-sorting, lossless compressor         bzip.c ---*/
4 /*-----------------------------------------------------------*/
5 
6 /*--
7   This program is BZIP, a lossless, block-sorting data compressor,
8   version 0.21, dated 25-August-1996.
9 
10   Copyright (C) 1996 by Julian Seward.
11      Department of Computer Science, University of Manchester,
12      Oxford Road, Manchester M13 9PL, UK.
13      email: sewardj@cs.man.ac.uk
14 
15   This program is free software; you can redistribute it and/or modify
16   it under the terms of the GNU General Public License as published by
17   the Free Software Foundation; either version 2 of the License, or
18   (at your option) any later version.
19 
20   This program is distributed in the hope that it will be useful,
21   but WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23   GNU General Public License for more details.
24 
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 
29   The GNU General Public License is contained in the file LICENSE.
30 
31   This program is based on (at least) the work of:
32      Mike Burrows
33      David Wheeler
34      Peter Fenwick
35      Alistair Moffat
36      Radford Neal
37      Ian H. Witten
38 
39   For more information on these sources, see the file ALGORITHMS.
40 --*/
41 
42 /*----------------------------------------------------*/
43 /*--- IMPORTANT                                    ---*/
44 /*----------------------------------------------------*/
45 
46 /*--
47    WARNING:
48       This program (attempts to) compress data by performing several
49       non-trivial transformations on it.  Unless you are 100% familiar
50       with *all* the algorithms contained herein, and with the
51       consequences of modifying them, you should NOT meddle with the
52       compression or decompression machinery.  Incorrect changes can
53       and very likely *will* lead to disasterous loss of data.
54 
55    DISCLAIMER:
56       I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
57       USE OF THIS PROGRAM, HOWSOEVER CAUSED.
58 
59       Every compression of a file implies an assumption that the
60       compressed file can be decompressed to reproduce the original.
61       Great efforts in design, coding and testing have been made to
62       ensure that this program works correctly.  However, the
63       complexity of the algorithms, and, in particular, the presence
64       of various special cases in the code which occur with very low
65       but non-zero probability make it impossible to rule out the
66       possibility of bugs remaining in the program.  DO NOT COMPRESS
67       ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE
68       POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
69 
70       That is not to say this program is inherently unreliable.
71       Indeed, I very much hope the opposite is true.  BZIP has been
72       carefully constructed and extensively tested.
73 --*/
74 
75 
76 
77 /*----------------------------------------------------*/
78 /*--- and now for something much more pleasant :-) ---*/
79 /*----------------------------------------------------*/
80 
81 /*---------------------------------------------*/
82 /*--
83   Place a 1 beside your platform, and 0 elsewhere.
84 --*/
85 
86 #define BZ_UNIX_32          1  /*-- generic 32-bit Unix          --*/
87 #define BZ_UNIX_64          0  /*-- generic 64-bit Unix          --*/
88 #define BZ_WIN32_BORLANDC50 0  /*-- Win32/95/NT, Borland C++ 5.0 --*/
89 #define BZ_WIN32_MSVC20     0  /*-- Win32/95/NT, MS VC++ 2.0     --*/
90 
91 #define BZ_UNIX (BZ_UNIX_32 | BZ_UNIX_64)
92 
93 
94 /*---------------------------------------------*/
95 /*--
96   Some stuff for all platforms.
97 --*/
98 
99 #include <stdio.h>
100 #include <stdlib.h>
101 #include <assert.h>
102 #include <string.h>
103 #include <signal.h>
104 #include <errno.h>
105 
106 #define ERROR_IF_EOF(i)       { if ((i) == EOF)  ioError(); }
107 #define ERROR_IF_NOT_ZERO(i)  { if ((i) != 0)    ioError(); }
108 #define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); }
109 
110 
111 /*---------------------------------------------*/
112 /*--
113    Platform-specific stuff.
114 --*/
115 
116 #if BZ_UNIX_32
117    #include <sys/types.h>
118    #include <utime.h>
119    #include <unistd.h>
120 #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) \
121  && !defined(__DragonFly__)	/* stdlib already included above */
122    #include <malloc.h>
123 #endif
124    #include <sys/stat.h>
125    #include <sys/times.h>
126 
127    #define Int32   int
128    #define UInt32  unsigned int
129    #define Char    char
130    #define UChar   unsigned char
131 
132    #define PATH_SEP    '/'
133    #define MY_LSTAT    lstat
134    #define MY_S_IFREG  S_ISREG
135    #define MY_STAT     stat
136 
137    #define APPEND_FILESPEC(root, name) \
138       root=snocString((root), (name))
139 
140    #define SET_BINARY_MODE(fd) /**/
141 
142    /*--
143       You should try very hard to persuade your C compiler
144       to inline the bits marked INLINE.  Otherwise bzip will
145       run rather slowly.  gcc version 2.x is recommended.
146    --*/
147    #ifdef __GNUC__
148       #define INLINE   inline
149       #define NORETURN __attribute__ ((noreturn))
150    #else
151       #define INLINE   /**/
152       #define NORETURN /**/
153    #endif
154 #endif
155 
156 
157 #if BZ_UNIX_64
158    Any volunteers?  If so, please mail me the relevant bits.
159 #endif
160 
161 
162 #if BZ_WIN32_BORLANDC50
163    #include <dir.h>
164    #include <io.h>
165    #include <fcntl.h>
166    #include <sys\stat.h>
167    #include <utime.h>
168 
169    #define Int32   int
170    #define UInt32  unsigned int
171    #define Char    char
172    #define UChar   unsigned char
173 
174    #define INLINE         /**/
175    #define NORETURN       /**/
176    #define PATH_SEP       '\\'
177    #define MY_S_IFREG(x)  ((x) & S_IFREG)
178    #define MY_LSTAT       stat
179    #define MY_STAT        stat
180 
181    #define APPEND_FILESPEC(root, spec)              \
182       do {                                          \
183          if ((spec)[0] == '-') {                    \
184             root = snocString((root), (spec));      \
185          } else {                                   \
186             struct ffblk ffblk;                     \
187             int done;                               \
188             done = findfirst((spec), &ffblk, 0);    \
189             if ( done ) {                           \
190                root = snocString ((root), (spec));  \
191             } else {                                \
192                while ( !done ) {                    \
193                   root = snocString((root),         \
194                             &ffblk.ff_name[0]);     \
195                   done = findnext(&ffblk);          \
196                }                                    \
197             }                                       \
198          }                                          \
199       } while ( 0 )
200 
201    #define SET_BINARY_MODE(fd)                     \
202       do {                                         \
203          int retVal = setmode ( fileno ( fd ),     \
204                                O_BINARY );         \
205          ERROR_IF_MINUS_ONE ( retVal );            \
206       } while ( 0 )
207 
208 #endif
209 
210 
211 #if BZ_WIN32_MSVC20
212    #include <io.h>
213    #include <fcntl.h>
214    #include <sys\stat.h>
215    #include <sys\utime.h>
216 
217    #define Int32   int
218    #define UInt32  unsigned int
219    #define Char    char
220    #define UChar   unsigned char
221 
222    #define INLINE         /**/
223    #define NORETURN       /**/
224    #define PATH_SEP       '\\'
225    #define MY_LSTAT       _stat
226    #define MY_STAT        _stat
227    #define MY_S_IFREG(x)  ((x) & _S_IFREG)
228 
229    #define APPEND_FILESPEC(root, spec)                \
230       do {                                            \
231          if ((spec)[0] == '-') {                      \
232             root = snocString((root), (spec));        \
233          } else {                                     \
234             struct _finddata_t c_file;                \
235             long hFile;                               \
236             hFile = _findfirst((spec), &c_file);      \
237             if ( hFile == -1L ) {                     \
238                root = snocString ((root), (spec));    \
239             } else {                                  \
240                int anInt = 0;                         \
241                while ( anInt == 0 ) {                 \
242                   root = snocString((root),           \
243                             &c_file.name[0]);         \
244                   anInt = _findnext(hFile, &c_file);  \
245                }                                      \
246             }                                         \
247          }                                            \
248       } while ( 0 )
249 
250    #define SET_BINARY_MODE(fd)                        \
251       do {                                            \
252          int retVal = setmode ( fileno ( fd ),        \
253                                O_BINARY );            \
254          ERROR_IF_MINUS_ONE ( retVal );               \
255       } while ( 0 )
256 
257 #endif
258 
259 
260 /*---------------------------------------------*/
261 /*--
262   Some more stuff for all platforms :-)
263 --*/
264 
265 #define Bool   int
266 #define True   1
267 #define False  0
268 
269 /*--
270   IntNative is your platform's `native' int size.
271   Only here to avoid probs with 64-bit platforms.
272 --*/
273 #define IntNative int
274 
275 
276 /*--
277    change to 1, or compile with -DDEBUG=1 to debug
278 --*/
279 #ifndef DEBUG
280 #define DEBUG 0
281 #endif
282 
283 
284 /*---------------------------------------------------*/
285 /*---                                             ---*/
286 /*---------------------------------------------------*/
287 
288 /*--
289    Implementation notes, July 1996
290    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
291 
292    Memory allocation
293    ~~~~~~~~~~~~~~~~~
294    All large data structures are allocated on the C heap,
295    for better or for worse.  That includes the various
296    arrays of pointers, striped words, bytes and frequency
297    tables for compression and decompression.  The only non-heap
298    allocated structures are the coding models and the
299    BitStream structures, but these are both small.
300 
301    BZIP can operate at various block-sizes, ranging from
302    100k to 900k in 100k steps, and it allocates only as
303    much as it needs to.  When compressing, we know from the
304    command-line options what the block-size is going to be,
305    so all allocation can be done at start-up; if that
306    succeeds, there can be no further allocation problems.
307 
308    Decompression is more complicated.  Each compressed file
309    contains, in its header, a byte indicating the block
310    size used for compression.  This means BZIP potentially
311    needs to reallocate memory for each file it deals with,
312    which in turn opens the possibility for a memory allocation
313    failure part way through a run of files, by encountering
314    a file requiring a much larger block size than all the
315    ones preceding it.
316 
317    The policy is to simply give up if a memory allocation
318    failure occurs.  During decompression, it would be
319    possible to move on to subsequent files in the hope that
320    some might ask for a smaller block size, but the
321    complications for doing this seem more trouble than they
322    are worth.
323 
324 
325    Compressed file formats
326    ~~~~~~~~~~~~~~~~~~~~~~~
327    Perhaps the most important point is that compressed
328    files start with a 4-byte preamble:
329 
330       'B' 'Z'     -- a crude `magic number'
331 
332       '0'         -- file format version
333 
334       '1' to '9'  -- block size indicator
335 
336    The third byte gives a trapdoor mechanism so that we can
337    change file formats and/or algorithm later, without
338    losing backwards compatibility.  The fourth byte
339    indicates the compression block size; '1' stands for
340    100k, '2' for 200k, &c.
341 
342    In the present file format (call it version '0') *all*
343    material after this 4-byte preamble is written via the
344    arithmetic coder, using various different models,
345    including the `bogus' non-adaptive 256-entry model used
346    to send bytes through the arithmetic coder.  The overall
347    structure of this part is a sequence of blocks, each of
348    the format
349 
350       origPtr    run-length-coded MTF values   EOB
351 
352    origPtr is a 32-bit number (sent via bogusModel)
353    indicating the position in the sorted block of the
354    un-rotated original string.  A negative value of
355    origPtr indicates that this is the last block.
356 
357    Finally, after all the blocks, there is a 32-bit
358    CRC value, again sent using the `bogus' model.
359    The CRC applies to the entirety of the uncompressed
360    data stream.
361 
362    The MTF values are coded exactly as with Fenwick's
363    `structured model', augmented with Wheeler's run-length
364    coding scheme for zeros.  The basis model has an extra
365    symbol, EOB, denoting the end of the block; if that is
366    not encountered within the block size indicated by the
367    preamble, something is wrong.
368 
369 
370    Error conditions
371    ~~~~~~~~~~~~~~~~
372    Dealing with error conditions is the least satisfactory
373    aspect of BZIP.  The policy is to try and leave the
374    filesystem in a consistent state, then quit, even if it
375    means not processing some of the files mentioned in the
376    command line.  `A consistent state' means that a file
377    exists either in its compressed or uncompressed form,
378    but not both.  This boils down to the rule `delete the
379    output file if an error condition occurs, leaving the
380    input intact'.  Input files are only deleted when we can
381    be pretty sure the output file has been written and
382    closed successfully.
383 
384    Errors are a dog because there's so many things to
385    deal with.  The following can happen mid-file, and
386    require cleaning up.
387 
388      internal `panics' -- indicating a bug
389      corrupted compressed file -- block overrun in MTF decode
390      bad magic number or version on compressed file
391      can't allocate enough memory to decompress this file
392      I/O error reading/writing/opening/closing
393      signal catches -- Control-C, SIGTERM, SIGHUP.
394 
395    Other conditions, primarily pertaining to file names,
396    can be checked in-between files, which makes dealing
397    with them easier.
398 --*/
399 
400 
401 
402 
403 /*---------------------------------------------*/
404 
405 Int32   bytesIn, bytesOut;
406 Bool    verbose, veryVerbose;
407 Bool    compressing, keepInputFiles;
408 UInt32  globalCrc;
409 
410 #define OM_FILES_TO_FILES   1
411 #define OM_FILE_TO_STDOUT   2
412 #define OM_STDIN_TO_STDOUT  3
413 Int32   opMode;
414 
415 Int32   longestFileName;
416 Char    inName[1024];
417 Char    outName[1024];
418 Char    *progName;
419 Char    progNameReally[1024];
420 FILE    *outputHandleJustInCase;
421 
422 void    panic                 ( Char* )          NORETURN;
423 void    ioError               ( void )           NORETURN;
424 void    compressOutOfMemory   ( Int32, Int32 )   NORETURN;
425 void    uncompressOutOfMemory ( Int32, Int32 )   NORETURN;
426 void    blockOverrun          ( void )           NORETURN;
427 void    unblockError          ( void )           NORETURN;
428 void    crcError              ( UInt32, UInt32 ) NORETURN;
429 void    bitStreamEOF          ( void )           NORETURN;
430 void    cleanUpAndFail        ( void )           NORETURN;
431 void    compressedStreamEOF   ( void )           NORETURN;
432 
433 
434 /*---------------------------------------------------*/
435 /*--- 32-bit CRC grunge                           ---*/
436 /*---------------------------------------------------*/
437 
438 /*--
439   I think this is an implementation of the AUTODIN-II,
440   Ethernet & FDDI 32-bit CRC standard.  Vaguely derived
441   from code by Rob Warnock, in Section 51 of the
442   comp.compression FAQ.
443 --*/
444 
445 UInt32 crc32Table[256] = {
446 
447    /*-- Ugly, innit? --*/
448 
449    0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
450    0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
451    0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
452    0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
453    0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
454    0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
455    0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
456    0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
457    0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
458    0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
459    0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
460    0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
461    0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
462    0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
463    0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
464    0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
465    0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
466    0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
467    0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
468    0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
469    0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
470    0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
471    0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
472    0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
473    0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
474    0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
475    0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
476    0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
477    0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
478    0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
479    0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
480    0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
481    0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
482    0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
483    0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
484    0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
485    0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
486    0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
487    0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
488    0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
489    0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
490    0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
491    0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
492    0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
493    0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
494    0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
495    0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
496    0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
497    0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
498    0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
499    0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
500    0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
501    0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
502    0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
503    0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
504    0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
505    0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
506    0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
507    0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
508    0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
509    0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
510    0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
511    0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
512    0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
513 };
514 
515 
516 /*---------------------------------------------*/
initialiseCRC(void)517 void initialiseCRC ( void )
518 {
519    globalCrc = 0xffffffffL;
520 }
521 
522 
523 /*---------------------------------------------*/
getFinalCRC(void)524 UInt32 getFinalCRC ( void )
525 {
526    return ~globalCrc;
527 }
528 
529 
530 /*---------------------------------------------*/
getGlobalCRC(void)531 UInt32 getGlobalCRC ( void )
532 {
533    return globalCrc;
534 }
535 
536 
537 /*---------------------------------------------*/
setGlobalCRC(UInt32 newCrc)538 void setGlobalCRC ( UInt32 newCrc )
539 {
540    globalCrc = newCrc;
541 }
542 
543 
544 /*---------------------------------------------*/
545 #define UPDATE_CRC(crcVar,cha)              \
546 {                                           \
547    crcVar = (crcVar << 8) ^                 \
548             crc32Table[(crcVar >> 24) ^     \
549                        ((UChar)cha)];       \
550 }
551 
552 
553 /*---------------------------------------------------*/
554 /*--- Bit stream I/O                              ---*/
555 /*---------------------------------------------------*/
556 
557 /*--
558    Although it seems a bit silly, we restrict
559    ourselves to one bitstream at a time, so as to
560    avoid malloc-ing the structure.  This avoids any
561    possible fragmentation-style interactions with
562    the repeated malloc/free cycles of large areas
563    which happen during decompression.
564 
565    bsInUse is a consistency-check feature.
566 --*/
567 
568 typedef
569    struct {
570       FILE*  handle;
571       Int32  buffer;
572       Int32  buffLive;
573       Char   mode;
574    }
575    BitStream;
576 
577 BitStream  aBitStreamBuffer;
578 Bool       bsInUse;
579 
580 
581 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)582 BitStream* bsOpenReadStream ( FILE* stream )
583 {
584    BitStream *bs;
585 
586    if (bsInUse) panic ( "bsOpenReadStream" );
587    bsInUse = True;
588    bs = &aBitStreamBuffer;
589 
590    bs->handle = stream;
591    bs->buffer = 0;
592    bs->buffLive = 0;
593    bs->mode = 'r';
594 
595    return bs;
596 }
597 
598 
599 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)600 BitStream* bsOpenWriteStream ( FILE* stream )
601 {
602    BitStream *bs;
603 
604    if (bsInUse) panic ( "bsOpenWriteStream" );
605    bsInUse = True;
606    bs = &aBitStreamBuffer;
607 
608    bs->handle = stream;
609    bs->buffer = 0;
610    bs->buffLive = 0;
611    bs->mode = 'w';
612 
613    return bs;
614 }
615 
616 
617 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)618 INLINE void bsPutBit ( BitStream* bs, Int32 bit )
619 {
620    if (bs->buffLive == 8) {
621       IntNative retVal = putc ( (UChar) bs->buffer, bs->handle );
622       ERROR_IF_EOF ( retVal );
623       bytesOut++;
624       bs->buffLive = 1;
625       bs->buffer = bit & 0x1;
626    } else {
627       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
628       bs->buffLive++;
629    };
630 }
631 
632 
633 /*---------------------------------------------*/
634 /*--
635   We abort if an EOF appears, regardless of whether
636   an I/O error has happened, or we've really hit
637   the end of the file.  The pseudo-justification for
638   this is that the caller should interpret the bit
639   stream and decide for itself when the stream has
640   ended.
641 --*/
642 
bsGetBit(BitStream * bs)643 INLINE Int32 bsGetBit ( BitStream* bs )
644 {
645    if (bs->buffLive > 0) {
646       bs->buffLive --;
647       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
648    } else {
649       IntNative retVal = getc ( bs->handle );
650       if ( retVal == EOF ) compressedStreamEOF();
651       bs->buffLive = 7;
652       bs->buffer = retVal;
653       if (bs->buffer == EOF) bitStreamEOF();
654       return ( ((bs->buffer) >> 7) & 0x1 );
655    }
656 }
657 
658 
659 /*---------------------------------------------*/
bsGetUChar(BitStream * bs)660 UChar bsGetUChar ( BitStream* bs )
661 {
662    Int32  i;
663    UInt32 c;
664 
665    c = 0;
666    for (i = 0; i <= 7; i++)
667       c = (c << 1) | bsGetBit ( bs );
668 
669    return (UChar)c;
670 }
671 
672 
673 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)674 void bsPutUChar ( BitStream* bs, UChar c )
675 {
676    Int32 i;
677    for (i = 7; i >= 0; i--)
678       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
679 }
680 
681 
682 /*---------------------------------------------*/
bsClose(BitStream * bs)683 void bsClose ( BitStream* bs )
684 {
685    IntNative retVal;
686    if (!bsInUse) panic ( "bsClose" );
687    bsInUse = False;
688 
689    if ( bs->mode == 'w' ) {
690       while ( bs->buffLive < 8 ) {
691          bs->buffLive++;
692          bs->buffer <<= 1;
693       };
694       retVal = putc ( (UChar) (bs->buffer), bs->handle );
695       ERROR_IF_EOF ( retVal );
696       bytesOut++;
697       retVal = fflush ( bs->handle );
698       ERROR_IF_EOF ( retVal );
699    }
700    ERROR_IF_NOT_ZERO( ferror(bs->handle) );
701    retVal = fclose ( bs->handle );
702    ERROR_IF_EOF ( retVal );
703 }
704 
705 
706 /*---------------------------------------------------*/
707 /*--- Generic frequency-table stuff [data defn]   ---*/
708 /*---------------------------------------------------*/
709 
710 #define MAX_SYMBOLS 256
711 
712 /*-- freq[0] is unused, and is kept at zero.
713      freq[MAX_SYMBOLS+1] is also unused and kept at zero.
714      This is for historical reasons, and is no longer
715      necessary.
716 
717      The counts for symbols 1..numSymbols are
718      stored at freq[1] .. freq[numSymbols].
719 
720      Presumably one should make sure that
721        ((incVal + noExceed) / 2) < noExceed
722      so that scaling always produces sensible results.
723 
724      We take incValue == 0 to mean that the counts shouldn't
725      be incremented or scaled.
726 
727      This data decl has to go before the arithmetic coding stuff.
728 --*/
729 
730 typedef
731    struct {
732       UInt32  numScalings;
733       UInt32  numTraffic;
734       UInt32  totFreq;
735       UInt32  numSymbols;
736       UInt32  incValue;
737       UInt32  noExceed;
738       Char   *name;
739       UInt32  freq[MAX_SYMBOLS + 2];
740    }
741    Model;
742 
743 
744 /*---------------------------------------------------*/
745 /*--- The DCC95 arithmetic coder.                 ---*/
746 /*---------------------------------------------------*/
747 
748 /*--
749   This is a clean-room (ie, my own) implementation of the
750   coder described in ``Arithmetic Coding Revisited'',
751   by Alistair Moffat, Radford Neal and Ian Witten,
752   originally presented at the 1995 IEEE Data Compression
753   Conference, Snowbird, Utah, USA in March 1995.
754 
755   The paper has evolved somewhat since then.  This
756   implementation pertains to the June 1996 version of
757   the paper.  In particular, we have an initial value
758   for R of 2^(b-1) rather than 2^(b-1) - 1, and termination
759   of coding (overly conservative here) is different.
760 
761   I don't use the shift-add multiply/divide machinery;
762   I could, but it adds complexity & I'm not convinced about
763   the long-term architectural benefit of that approach.
764   I could be wrong.
765 --*/
766 
767 #define TWO_TO_THE(n)        (1 << (n))
768 #define MAX_BITS_OUTSTANDING 500000000
769 
770 #define smallB 26
771 #define smallF 18
772 
773 UInt32  bigL;
774 UInt32  bigR;
775 UInt32  bigD;
776 UInt32  bitsOutstanding;
777 
778 
779 /*---------------------------------------------*/
minUInt32(UInt32 a,UInt32 b)780 INLINE UInt32 minUInt32 ( UInt32 a, UInt32 b )
781 {
782    if (a < b) return a; else return b;
783 }
784 
785 
786 /*---------------------------------------------*/
arithCodeBitPlusFollow(BitStream * bs,UInt32 bit)787 INLINE void arithCodeBitPlusFollow ( BitStream *bs, UInt32 bit )
788 {
789     bsPutBit ( bs, bit );
790     while ( bitsOutstanding > 0 ) {
791         bsPutBit ( bs, 1 - bit );
792         bitsOutstanding --;
793     }
794 }
795 
796 
797 /*---------------------------------------------*/
arithCodeStartEncoding(BitStream * bs)798 void arithCodeStartEncoding ( BitStream *bs )
799 {
800    bigL = 0;
801    bigR = TWO_TO_THE ( smallB - 1 );
802    bitsOutstanding = 0;
803 }
804 
805 
806 /*---------------------------------------------*/
arithCodeDoneEncoding(BitStream * bs)807 void arithCodeDoneEncoding ( BitStream *bs )
808 {
809     Int32 i;
810 
811     for (i = smallB; i >= 1; i--)
812        arithCodeBitPlusFollow ( bs, (bigL >> (i-1)) & 0x1 );
813 }
814 
815 
816 /*---------------------------------------------*/
arithCodeStartDecoding(BitStream * bs)817 void arithCodeStartDecoding ( BitStream *bs )
818 {
819    Int32 i;
820 
821    bigL = 0;
822    bigR = TWO_TO_THE ( smallB - 1 );
823    bigD = 0;
824    for (i = 1; i <= smallB; i++)
825       bigD = (bigD << 1) + bsGetBit ( bs );
826 }
827 
828 
829 /*---------------------------------------------*/
arithCodeDoneDecoding(BitStream * bs)830 void arithCodeDoneDecoding ( BitStream *bs )
831 {
832    /*--- No action necessary. ---*/
833 }
834 
835 
836 /*---------------------------------------------*/
arithCodeRenormalise_Encode(BitStream * bs)837 INLINE void arithCodeRenormalise_Encode ( BitStream* bs )
838 {
839    while (bigR <= TWO_TO_THE ( smallB-2 ) ) {
840       if ( (bigL + bigR) <= TWO_TO_THE ( smallB-1 ) ) {
841          arithCodeBitPlusFollow ( bs, 0 );
842       } else
843       if ( TWO_TO_THE ( smallB-1 ) <= bigL ) {
844          arithCodeBitPlusFollow ( bs, 1 );
845          bigL = bigL - TWO_TO_THE ( smallB-1 );
846       } else {
847          bitsOutstanding++;
848          bigL = bigL - TWO_TO_THE ( smallB-2 );
849       }
850       bigL = 2 * bigL;
851       bigR = 2 * bigR;
852    }
853 }
854 
855 
856 /*---------------------------------------------*/
arithCodeSymbol(BitStream * bs,Model * m,Int32 symbol)857 void arithCodeSymbol ( BitStream *bs, Model *m, Int32 symbol )
858 {
859    UInt32 smallL, smallH, smallT, smallR, smallR_x_smallL;
860    Int32  i;
861 
862    #if DEBUG
863       assert ( TWO_TO_THE ( smallB-2 ) < bigR );
864       assert ( bigR <= TWO_TO_THE ( smallB-1 ) );
865       assert ( 0 <= bigL );
866       assert ( bigL < TWO_TO_THE ( smallB ) - TWO_TO_THE ( smallB-2 ) );
867       assert ( (bigL + bigR) <= TWO_TO_THE ( smallB ) );
868    #endif
869 
870    /*--- Set smallL and smallH to the cumfreq values
871          respectively prior to and including symbol.
872    ---*/
873    smallT = m->totFreq;
874    smallL = 0;
875    for (i = 1; i < symbol; i++) smallL += m->freq[i];
876    smallH = smallL + m->freq[symbol];
877 
878    smallR = bigR / smallT;
879 
880    smallR_x_smallL = smallR * smallL;
881    bigL = bigL + smallR_x_smallL;
882 
883    if (smallH < smallT)
884       bigR = smallR * (smallH - smallL); else
885       bigR = bigR - smallR_x_smallL;
886 
887    arithCodeRenormalise_Encode ( bs );
888 
889    if (bitsOutstanding > MAX_BITS_OUTSTANDING)
890       panic ( "arithCodeSymbol: too many bits outstanding" );
891 }
892 
893 
894 /*---------------------------------------------*/
arithDecodeSymbol(BitStream * bs,Model * m)895 Int32 arithDecodeSymbol ( BitStream *bs, Model *m )
896 {
897    UInt32 smallL, smallH, smallT, smallR;
898    UInt32 smallR_x_smallL, target, symbol;
899 
900    smallT = m->totFreq;
901 
902    /*--- Get target value. ---*/
903    smallR = bigR / smallT;
904    target = minUInt32 ( smallT-1, bigD / smallR );
905 
906    symbol = 0;
907    smallH = 0;
908    while (smallH <= target) {
909       symbol++;
910       smallH += m->freq[symbol];
911    }
912    smallL = smallH - m->freq[symbol];
913 
914    smallR_x_smallL = smallR * smallL;
915    bigD = bigD - smallR_x_smallL;
916 
917    if (smallH < smallT)
918       bigR = smallR * (smallH - smallL); else
919       bigR = bigR - smallR_x_smallL;
920 
921    while ( bigR <= TWO_TO_THE ( smallB-2 ) ) {
922       bigR = 2 * bigR;
923       bigD = 2 * bigD + bsGetBit ( bs );
924    }
925 
926    return symbol;
927 }
928 
929 
930 /*---------------------------------------------------*/
931 /*--- Generic frequency-table stuff [fn defns]    ---*/
932 /*---------------------------------------------------*/
933 
934 /*---------------------------------------------*/
initModel(Model * m,Char * initName,Int32 initNumSymbols,Int32 initIncValue,Int32 initNoExceed)935 void initModel ( Model  *m,
936                  Char   *initName,
937                  Int32  initNumSymbols,
938                  Int32  initIncValue,
939                  Int32  initNoExceed
940                )
941 {
942    Int32 i;
943 
944    if (initIncValue == 0) {
945       m->totFreq = initNumSymbols;
946       for (i = 1; i <= initNumSymbols; i++)
947          m->freq[i] = 1;
948    } else {
949       m->totFreq = initNumSymbols * initIncValue;
950       for (i = 1; i <= initNumSymbols; i++)
951          m->freq[i] = initIncValue;
952    };
953 
954    m->numSymbols                = initNumSymbols;
955    m->incValue                  = initIncValue;
956    m->noExceed                  = initNoExceed;
957    m->name                      = initName;
958    m->freq[0]                   = 0;
959    m->freq[initNumSymbols + 1]  = 0;
960    m->numScalings               = 0;
961 }
962 
963 
964 /*---------------------------------------------*/
dumpModelStats(Model * m)965 void dumpModelStats ( Model *m )
966 {
967     fprintf (
968        stderr,
969        "model %s:\t scalings %d\n",
970        m->name,
971        m->numScalings
972     );
973 }
974 
975 
976 /*---------------------------------------------*/
updateModel(Model * m,Int32 symbol)977 INLINE void updateModel ( Model *m, Int32 symbol )
978 {
979    UInt32 i;
980 
981    m->totFreq      += m->incValue;
982    m->freq[symbol] += m->incValue;
983    if (m->totFreq > m->noExceed) {
984       m->totFreq = 0;
985       m->numScalings++;
986       for (i = 1; i <= m->numSymbols; i++) {
987          m->freq[i] = (m->freq[i] + 1) >> 1;
988          m->totFreq += m->freq[i];
989       }
990    }
991 }
992 
993 
994 /*---------------------------------------------*/
putSymbol(Model * m,Int32 symbol,BitStream * bs)995 INLINE void putSymbol ( Model *m, Int32 symbol, BitStream *bs )
996 {
997    #if DEBUG
998       if (! (symbol >= 1 && symbol <= m->numSymbols) )
999           fprintf ( stderr,
1000                     "BAD, mod = %s, sym = %d, max = %d\n",
1001                     m->name, symbol, m->numSymbols );
1002    #endif
1003 
1004    arithCodeSymbol ( bs, m, symbol );
1005    updateModel ( m, symbol );
1006 }
1007 
1008 
1009 /*---------------------------------------------*/
getSymbol(Model * m,BitStream * bs)1010 INLINE Int32 getSymbol ( Model *m, BitStream *bs )
1011 {
1012    Int32 symbol;
1013 
1014    symbol = arithDecodeSymbol ( bs, m );
1015    updateModel ( m, symbol );
1016 
1017    #if DEBUG
1018       assert (symbol >= 1 && symbol <= m->numSymbols);
1019    #endif
1020 
1021    return symbol;
1022 }
1023 
1024 
1025 /*---------------------------------------------------*/
1026 /*--- For sending bytes/words thru arith coder    ---*/
1027 /*---------------------------------------------------*/
1028 
1029 Model bogusModel;
1030 
1031 
1032 /*---------------------------------------------*/
initBogusModel(void)1033 void initBogusModel ( void )
1034 {
1035    initModel ( &bogusModel, "bogus", 256, 0, 256 );
1036 }
1037 
1038 
1039 /*---------------------------------------------*/
putUChar(BitStream * bs,UChar c)1040 void putUChar ( BitStream *bs, UChar c )
1041 {
1042    putSymbol ( &bogusModel, 1 + (UInt32)c, bs );
1043 }
1044 
1045 
1046 /*---------------------------------------------*/
putInt32(BitStream * bs,Int32 i)1047 void putInt32 ( BitStream *bs, Int32 i )
1048 {
1049    putUChar ( bs, (UChar) (((UInt32)i >> 24) & 0xFF) );
1050    putUChar ( bs, (UChar) (((UInt32)i >> 16) & 0xFF) );
1051    putUChar ( bs, (UChar) (((UInt32)i >>  8) & 0xFF) );
1052    putUChar ( bs, (UChar) ( (UInt32)i        & 0xFF) );
1053 }
1054 
1055 
1056 /*---------------------------------------------*/
putUInt32(BitStream * bs,UInt32 i)1057 void putUInt32 ( BitStream *bs, UInt32 i )
1058 {
1059    putUChar ( bs, (UChar) ((i >> 24) & 0xFF) );
1060    putUChar ( bs, (UChar) ((i >> 16) & 0xFF) );
1061    putUChar ( bs, (UChar) ((i >>  8) & 0xFF) );
1062    putUChar ( bs, (UChar) ( i        & 0xFF) );
1063 }
1064 
1065 
1066 /*---------------------------------------------*/
getUChar(BitStream * bs)1067 UChar getUChar ( BitStream *bs )
1068 {
1069    return (UChar) (getSymbol ( &bogusModel, bs ) - 1);
1070 }
1071 
1072 
1073 /*---------------------------------------------*/
getInt32(BitStream * bs)1074 Int32 getInt32 ( BitStream *bs )
1075 {
1076    UInt32 res = 0;
1077 
1078    res |= (getUChar ( bs ) << 24);
1079    res |= (getUChar ( bs ) << 16);
1080    res |= (getUChar ( bs ) <<  8);
1081    res |= (getUChar ( bs )      );
1082    return (Int32)res;
1083 }
1084 
1085 
1086 /*---------------------------------------------*/
getUInt32(BitStream * bs)1087 UInt32 getUInt32 ( BitStream *bs )
1088 {
1089    UInt32 res = 0;
1090 
1091    res |= (getUChar ( bs ) << 24);
1092    res |= (getUChar ( bs ) << 16);
1093    res |= (getUChar ( bs ) <<  8);
1094    res |= (getUChar ( bs )      );
1095    return res;
1096 }
1097 
1098 
1099 /*---------------------------------------------------*/
1100 /*--- The structured model proper                 ---*/
1101 /*---------------------------------------------------*/
1102 
1103 #define BASIS           0
1104 #define MODEL_2_3       1
1105 #define MODEL_4_7       2
1106 #define MODEL_8_15      3
1107 #define MODEL_16_31     4
1108 #define MODEL_32_63     5
1109 #define MODEL_64_127    6
1110 #define MODEL_128_255   7
1111 
1112 Model models[8];
1113 
1114 
1115 /*---------------------------------------------*/
1116 /*--
1117   The parameters in these models and bogusModel
1118   -- specifically, the value of 1000 for
1119   max-total-frequency -- determine the lowest
1120   acceptable values for smallF and indirectly smallB
1121   in the arithmetic coder above.
1122 --*/
initModels(void)1123 void initModels ( void )
1124 {
1125    initModel ( &models[BASIS],         "basis",   11,  12,  1000 );
1126    initModel ( &models[MODEL_2_3],     "2-3",     2,   4,   1000 );
1127    initModel ( &models[MODEL_4_7],     "4-7",     4,   3,   1000 );
1128    initModel ( &models[MODEL_8_15],    "8-15",    8,   3,   1000 );
1129    initModel ( &models[MODEL_16_31],   "16-31",   16,  3,   1000 );
1130    initModel ( &models[MODEL_32_63],   "32-63",   32,  3,   1000 );
1131    initModel ( &models[MODEL_64_127],  "64-127",  64,  2,   1000 );
1132    initModel ( &models[MODEL_128_255], "128-255", 128, 1,   1000 );
1133 }
1134 
1135 
1136 /*---------------------------------------------*/
dumpAllModelStats(void)1137 void dumpAllModelStats ( void )
1138 {
1139    dumpModelStats ( &bogusModel );
1140    dumpModelStats ( &models[BASIS] );
1141    dumpModelStats ( &models[MODEL_2_3] );
1142    dumpModelStats ( &models[MODEL_4_7] );
1143    dumpModelStats ( &models[MODEL_8_15] );
1144    dumpModelStats ( &models[MODEL_16_31] );
1145    dumpModelStats ( &models[MODEL_32_63] );
1146    dumpModelStats ( &models[MODEL_64_127] );
1147    dumpModelStats ( &models[MODEL_128_255] );
1148 }
1149 
1150 
1151 
1152 #define VAL_RUNA     1
1153 #define VAL_RUNB     2
1154 #define VAL_ONE      3
1155 #define VAL_2_3      4
1156 #define VAL_4_7      5
1157 #define VAL_8_15     6
1158 #define VAL_16_31    7
1159 #define VAL_32_63    8
1160 #define VAL_64_127   9
1161 #define VAL_128_255  10
1162 #define VAL_EOB      11
1163 
1164 #define RUNA    257
1165 #define RUNB    258
1166 #define EOB     259
1167 #define INVALID 260
1168 
1169 
1170 /*---------------------------------------------*/
getMTFVal(BitStream * bs)1171 Int32 getMTFVal ( BitStream *bs )
1172 {
1173    Int32 retVal;
1174 
1175    switch ( getSymbol ( &models[BASIS], bs ) ) {
1176       case VAL_EOB:
1177          retVal = EOB; break;
1178       case VAL_RUNA:
1179          retVal = RUNA; break;
1180       case VAL_RUNB:
1181          retVal = RUNB; break;
1182       case VAL_ONE:
1183          retVal = 1; break;
1184       case VAL_2_3:
1185          retVal = getSymbol ( &models[MODEL_2_3], bs ) + 2 - 1; break;
1186       case VAL_4_7:
1187          retVal = getSymbol ( &models[MODEL_4_7], bs ) + 4 - 1; break;
1188       case VAL_8_15:
1189          retVal = getSymbol ( &models[MODEL_8_15], bs ) + 8 - 1; break;
1190       case VAL_16_31:
1191          retVal = getSymbol ( &models[MODEL_16_31], bs ) + 16 - 1; break;
1192       case VAL_32_63:
1193          retVal = getSymbol ( &models[MODEL_32_63], bs ) + 32 - 1; break;
1194       case VAL_64_127:
1195          retVal = getSymbol ( &models[MODEL_64_127], bs ) + 64 - 1; break;
1196       default:
1197          retVal = getSymbol ( &models[MODEL_128_255], bs ) + 128 - 1; break;
1198    }
1199    return retVal;
1200 }
1201 
1202 
1203 /*---------------------------------------------*/
sendMTFVal(BitStream * bs,Int32 n)1204 void sendMTFVal ( BitStream *bs, Int32 n )
1205 {
1206    if (n == RUNA) putSymbol ( &models[BASIS], VAL_RUNA, bs ); else
1207    if (n == RUNB) putSymbol ( &models[BASIS], VAL_RUNB, bs ); else
1208    if (n == EOB ) putSymbol ( &models[BASIS], VAL_EOB,  bs ); else
1209 
1210    if (n == 1) putSymbol ( &models[BASIS], VAL_ONE, bs ); else
1211 
1212    if (n >= 2 && n <= 3) {
1213       putSymbol ( &models[BASIS], VAL_2_3, bs );
1214       putSymbol ( &models[MODEL_2_3], n - 2 + 1, bs );
1215    } else
1216 
1217    if (n >= 4 && n <= 7) {
1218       putSymbol ( &models[BASIS], VAL_4_7, bs );
1219       putSymbol ( &models[MODEL_4_7], n - 4 + 1, bs );
1220    } else
1221 
1222    if (n >= 8 && n <= 15) {
1223       putSymbol ( &models[BASIS], VAL_8_15, bs );
1224       putSymbol ( &models[MODEL_8_15], n - 8 + 1, bs );
1225    } else
1226 
1227    if (n >= 16 && n <= 31) {
1228       putSymbol ( &models[BASIS], VAL_16_31, bs );
1229       putSymbol ( &models[MODEL_16_31], n - 16 + 1, bs );
1230    } else
1231 
1232    if (n >= 32 && n <= 63) {
1233       putSymbol ( &models[BASIS], VAL_32_63, bs );
1234       putSymbol ( &models[MODEL_32_63], n - 32 + 1, bs );
1235    } else
1236 
1237    if (n >= 64 && n <= 127) {
1238       putSymbol ( &models[BASIS], VAL_64_127, bs );
1239       putSymbol ( &models[MODEL_64_127], n - 64 + 1, bs );
1240    } else
1241 
1242    if (n >= 128 && n <= 255) {
1243       putSymbol ( &models[BASIS], VAL_128_255, bs );
1244       putSymbol ( &models[MODEL_128_255], n - 128 + 1, bs );
1245    } else {
1246 
1247       panic ( "sendMTFVal: bad value!" );
1248    }
1249 }
1250 
1251 
1252 /*---------------------------------------------------*/
1253 /*--- Move-to-front encoding/decoding             ---*/
1254 /*---------------------------------------------------*/
1255 
1256 /*--
1257   These are the main data structures for
1258   the Burrows-Wheeler transform.
1259 --*/
1260 
1261 /*--
1262   For good performance, fullGt() allows pointers
1263   to get partially denormalised.  As a consequence,
1264   we have to copy some small quantity of data
1265   from the beginning of a block to the end of it
1266   so things still work right.  These constants control
1267   that.
1268 --*/
1269 #define NUM_FULLGT_UNROLLINGS 4
1270 #define MAX_DENORM_OFFSET (4 * NUM_FULLGT_UNROLLINGS)
1271 
1272 
1273 /*--
1274   Pointers to compression and decompression
1275   structures.  Set by
1276      allocateCompressStructures   and
1277      setDecompressStructureSizes
1278 
1279   The structures are always set to be suitable
1280   for a block of size 100000 * blockSize100k.
1281 --*/
1282 UInt32   *words;    /*-- compress              --*/
1283 Int32    *zptr;     /*-- compress & uncompress --*/
1284 Int32    *ftab;     /*-- compress             --*/
1285 
1286 UChar    *block;    /*-- uncompress --*/
1287 UChar    *ll;       /*-- uncompress --*/
1288 
1289 
1290 
1291 /*--
1292   always: lastPP == last+1.
1293   See discussion in sortIt().
1294 --*/
1295 Int32  last;
1296 Int32  lastPP;
1297 
1298 
1299 /*--
1300   index in ptr[] of original string after sorting.
1301 --*/
1302 Int32  origPtr;
1303 
1304 
1305 /*--
1306   always: in the range 0 .. 9.
1307   The current block size is 100000 * this number.
1308 --*/
1309 Int32  blockSize100k;
1310 
1311 
1312 /*---------------------------------------------*/
1313 /*--
1314   Manage memory for compression/decompression.
1315   When compressing, a single block size applies to
1316   all files processed, and that's set when the
1317   program starts.  But when decompressing, each file
1318   processed could have been compressed with a
1319   different block size, so we may have to free
1320   and reallocate on a per-file basis.
1321 
1322   A call with argument of zero means
1323   `free up everything.'  And a value of zero for
1324   blockSize100k means no memory is currently allocated.
1325 --*/
1326 
1327 
1328 /*---------------------------------------------*/
allocateCompressStructures(void)1329 void allocateCompressStructures ( void )
1330 {
1331    Int32 n = 100000 * blockSize100k;
1332    words   = malloc ( (n + MAX_DENORM_OFFSET) * sizeof(Int32) );
1333    zptr    = malloc ( n                       * sizeof(Int32) );
1334    ftab    = malloc ( 65537                   * sizeof(Int32) );
1335 
1336    if (words == NULL || zptr == NULL || ftab == NULL) {
1337       Int32 totalDraw = (n + MAX_DENORM_OFFSET) * sizeof(Int32) +
1338                       n * sizeof(Int32) +
1339                       65537 * sizeof(Int32);
1340 
1341       compressOutOfMemory ( totalDraw, n );
1342    }
1343 }
1344 
1345 
1346 /*---------------------------------------------*/
setDecompressStructureSizes(Int32 newSize100k)1347 void setDecompressStructureSizes ( Int32 newSize100k )
1348 {
1349    assert (0 <= newSize100k   && newSize100k   <= 9);
1350    assert (0 <= blockSize100k && blockSize100k <= 9);
1351 
1352    if (newSize100k == blockSize100k)
1353       return;
1354 
1355    blockSize100k = newSize100k;
1356 
1357    if (block != NULL) free ( block );
1358    if (ll    != NULL) free ( ll    );
1359    if (zptr  != NULL) free ( zptr  );
1360 
1361    if (newSize100k == 0) {
1362       block = NULL;
1363       ll    = NULL;
1364       zptr  = NULL;
1365    } else {
1366       Int32 n = 100000 * newSize100k;
1367       block   = malloc ( n * sizeof(UChar) );
1368       ll      = malloc ( n * sizeof(UChar) );
1369       zptr    = malloc ( n * sizeof(Int32) );
1370 
1371       if (block == NULL || ll == NULL || zptr == NULL) {
1372          Int32 totalDraw = 6 * n * sizeof(UChar);
1373          uncompressOutOfMemory ( totalDraw, n );
1374       }
1375    }
1376 }
1377 
1378 
1379 /*---------------------------------------------*/
1380 #define IF_THEN_ELSE(c,t,e) ((c) ? (t) : (e))
1381 
1382 #define GETFIRST(a)    ((UChar)(words[a] >> 24))
1383 #define GETREST(a)     (words[a] & 0x00ffffff)
1384 #define SETALL(a,w)    words[a] = (w)
1385 #define GETFIRST16(a)  ((UInt32)(words[a] >> 16))
1386 #define GETREST16(a)   (words[a] & 0x0000ffff)
1387 
GETALL(Int32 a)1388 INLINE UInt32 GETALL ( Int32 a )
1389 {
1390    #if DEBUG
1391       assert (a >= 0 && a < lastPP + 4 * NUM_FULLGT_UNROLLINGS);
1392       if (a >= lastPP) assert (words[a] == words[a-lastPP]);
1393    #endif
1394    return words[a];
1395 }
1396 
SETREST16(Int32 a,UInt32 w)1397 INLINE void SETREST16 ( Int32 a, UInt32 w )
1398 {
1399    words[a] = (words[a] & 0xffff0000) | (((UInt32)(w)) & 0x0000ffff);
1400 }
1401 
SETFIRST16(Int32 a,UInt32 w)1402 INLINE void SETFIRST16 ( Int32 a, UInt32 w )
1403 {
1404    words[a] = (words[a] & 0x0000ffff) | (((UInt32)(w)) << 16);
1405 }
1406 
SETREST(Int32 a,UInt32 w)1407 INLINE void SETREST ( Int32 a, UInt32 w )
1408 {
1409    words[a] = (words[a] & 0xff000000) | (((UInt32)(w)) & 0x00ffffff);
1410 }
1411 
SETFIRST(Int32 a,UChar c)1412 INLINE void SETFIRST ( Int32 a, UChar c )
1413 {
1414    words[a] = (words[a] & 0x00ffffff) | (((UInt32)(c)) << 24);
1415 }
1416 
SETSECOND(Int32 a,UChar c)1417 INLINE void SETSECOND ( Int32 a, UChar c )
1418 {
1419    words[a] = (words[a] & 0xff00ffff) | (((UInt32)(c)) << 16);
1420 }
1421 
SETTHIRD(Int32 a,UChar c)1422 INLINE void SETTHIRD ( Int32 a, UChar c )
1423 {
1424    words[a] = (words[a] & 0xffff00ff) | (((UInt32)(c)) << 8);
1425 }
1426 
SETFOURTH(Int32 a,UChar c)1427 INLINE void SETFOURTH ( Int32 a, UChar c )
1428 {
1429    words[a] = (words[a] & 0xffffff00) | (((UInt32)(c)));
1430 }
1431 
1432 
1433 /*---------------------------------------------*/
NORMALISE(Int32 p)1434 INLINE Int32 NORMALISE ( Int32 p )
1435 {
1436    return
1437    IF_THEN_ELSE ( ((p) < 0),
1438                   ((p)+lastPP),
1439                   IF_THEN_ELSE ( ((p)>=lastPP),
1440                                  ((p)-lastPP),
1441                                  (p)
1442                                )
1443                 );
1444 }
1445 
1446 
1447 /*---------------------------------------------*/
NORMALISEHI(Int32 p)1448 INLINE Int32 NORMALISEHI ( Int32 p )
1449 {
1450    return
1451    IF_THEN_ELSE ( ((p)>=lastPP),
1452                   ((p)-lastPP),
1453                   (p)
1454                 );
1455 }
1456 
1457 
1458 /*---------------------------------------------*/
NORMALISELO(Int32 p)1459 INLINE Int32 NORMALISELO ( Int32 p )
1460 {
1461    return
1462    IF_THEN_ELSE ( ((p) < 0),
1463                   ((p)+lastPP),
1464                   (p)
1465                 );
1466 }
1467 
1468 
1469 /*---------------------------------------------*/
1470 /*  The above normalisers are quick but only work when
1471 *   p exceeds the block by less than lastPP, since
1472 *   they renormalise merely by adding or subtracting
1473 *   lastPP.  This one always works, although slowly.
1474 */
STRONG_NORMALISE(Int32 p)1475 INLINE Int32 STRONG_NORMALISE ( Int32 p )
1476 {
1477    /* -ve number MOD +ve number always
1478    *  was one of life's little mysteries ...
1479    */
1480    while (p < 0) { p += lastPP; };
1481    return
1482       p % lastPP;
1483 }
1484 
1485 
1486 /*---------------------------------------------*/
sendZeroes(BitStream * outStream,Int32 zeroesPending)1487 void sendZeroes ( BitStream *outStream, Int32 zeroesPending )
1488 {
1489    UInt32 bitsToSend;
1490    Int32  numBits;
1491 
1492    if (zeroesPending == 0)
1493       return;
1494 
1495    bitsToSend = 0;
1496    numBits = 0;
1497    while (zeroesPending != 0) {
1498       numBits++;
1499       bitsToSend <<= 1;
1500       zeroesPending--;
1501       if ((zeroesPending & 0x1) == 1) bitsToSend |= 1;
1502       zeroesPending >>= 1;
1503    }
1504    while (numBits > 0) {
1505       if ((bitsToSend & 0x1) == 1)
1506          sendMTFVal ( outStream, RUNA ); else
1507          sendMTFVal ( outStream, RUNB );
1508       bitsToSend >>= 1;
1509       numBits--;
1510    }
1511 }
1512 
1513 
1514 /*---------------------------------------------*/
moveToFrontCodeAndSend(BitStream * outStream,Bool thisIsTheLastBlock)1515 void moveToFrontCodeAndSend ( BitStream *outStream,
1516                               Bool      thisIsTheLastBlock
1517                             )
1518 {
1519    UChar  yy[256];
1520    Int32  i, j;
1521    UChar  tmp;
1522    UChar  tmp2;
1523    Int32  zeroesPending;
1524 
1525    zeroesPending = 0;
1526    if (thisIsTheLastBlock)
1527       putInt32 ( outStream, - ( origPtr+1 ) ); else
1528       putInt32 ( outStream,   ( origPtr+1 ) );
1529 
1530    initModels ();
1531 
1532    for (i = 0; i <= 255; i++)
1533       yy[i] = (UChar) i;
1534 
1535    for (i = 0; i <= last; i++) {
1536       UChar ll_i;
1537 
1538       ll_i = GETFIRST ( NORMALISELO ( zptr[i] - 1 ) );
1539 
1540       j = 0;
1541       tmp = yy[j];
1542       while ( ll_i != tmp ) {
1543          j++;
1544          tmp2 = tmp;
1545          tmp = yy[j];
1546          yy[j] = tmp2;
1547       };
1548       yy[0] = tmp;
1549 
1550       if (j == 0) {
1551          zeroesPending++;
1552       } else {
1553          sendZeroes ( outStream, zeroesPending );
1554          zeroesPending = 0;
1555          sendMTFVal ( outStream, j );
1556       }
1557 
1558    }
1559    sendZeroes ( outStream, zeroesPending );
1560    sendMTFVal ( outStream, EOB );
1561 }
1562 
1563 
1564 /*---------------------------------------------*/
getAndMoveToFrontDecode(BitStream * inStream)1565 Bool getAndMoveToFrontDecode ( BitStream *inStream )
1566 {
1567    UChar  yy[256];
1568    Int32  i, j, tmpOrigPtr, nextSym, limit;
1569 
1570    limit = 100000 * blockSize100k;
1571 
1572    tmpOrigPtr = getInt32 ( inStream );
1573    if (tmpOrigPtr < 0)
1574       origPtr = ( -tmpOrigPtr ) - 1; else
1575       origPtr =    tmpOrigPtr   - 1;
1576 
1577    initModels ();
1578 
1579    for (i = 0; i <= 255; i++)
1580       yy[i] = (UChar) i;
1581 
1582    last = -1;
1583 
1584    nextSym = getMTFVal ( inStream );
1585 
1586    LOOPSTART:
1587 
1588    if (nextSym == EOB)
1589       return (tmpOrigPtr < 0);
1590 
1591    /*--- acquire run-length bits, most significant first ---*/
1592    if (nextSym == RUNA || nextSym == RUNB) {
1593       Int32 n = 0;
1594       do {
1595          n <<= 1;
1596          if (nextSym == RUNA) n |= 1;
1597          n++;
1598          nextSym = getMTFVal ( inStream );
1599       }
1600          while (nextSym == RUNA || nextSym == RUNB);
1601       while (n > 0) {
1602          last++; if (last >= limit) blockOverrun();
1603          ll[last] = yy[0];
1604          n--;
1605       }
1606       goto LOOPSTART;
1607    }
1608 
1609    if (nextSym >= 1 && nextSym <= 255) {
1610       last++; if (last >= limit) blockOverrun();
1611       ll[last] = yy[nextSym];
1612 
1613       /*--
1614          This loop is hammered during decompression,
1615          hence the unrolling.
1616 
1617          for (j = nextSym; j > 0; j--) yy[j] = yy[j-1];
1618       --*/
1619 
1620       j = nextSym;
1621       for (; j > 3; j -= 4) {
1622          yy[j]   = yy[j-1];
1623          yy[j-1] = yy[j-2];
1624          yy[j-2] = yy[j-3];
1625          yy[j-3] = yy[j-4];
1626       }
1627       for (; j > 0; j--) yy[j] = yy[j-1];
1628 
1629       yy[0] = ll[last];
1630       nextSym = getMTFVal ( inStream );
1631       goto LOOPSTART;
1632    }
1633 
1634    fprintf ( stderr, "bad MTF value %d\n", nextSym );
1635    panic ( "getAndMoveToFrontDecode\n" );
1636    /*--- panic never returns ---*/
1637    return True;
1638 }
1639 
1640 
1641 /*---------------------------------------------------*/
1642 /*--- Block-sorting machinery                     ---*/
1643 /*---------------------------------------------------*/
1644 
1645 /*---------------------------------------------*/
1646 /*  Doesn't work when lastPP < 4.
1647 */
stripe(void)1648 void stripe ( void )
1649 {
1650    Int32 i;
1651 
1652    for (i = 0; i < lastPP; i++) {
1653       UChar c = GETFIRST(i);
1654       SETSECOND ( NORMALISELO(i-1), c );
1655       SETTHIRD  ( NORMALISELO(i-2), c );
1656       SETFOURTH ( NORMALISELO(i-3), c );
1657    }
1658 }
1659 
1660 
1661 /*---------------------------------------------*/
1662 /*  Doesn't work when
1663 *      lastPP < 4 * NUM_FULLGT_UNROLLINGS.
1664 */
copyOffsetWords(void)1665 void copyOffsetWords ( void )
1666 {
1667    Int32 i;
1668 
1669    for (i = 0; i < 4 * NUM_FULLGT_UNROLLINGS; i++)
1670       words[lastPP+i] = words[i];
1671 }
1672 
1673 
1674 
1675 /*---------------------------------------------*/
1676 /*  Doesn't work when
1677 *      lastPP < 4 * NUM_FULLGT_UNROLLINGS.
1678 */
fullGt(Int32 i1,Int32 i2)1679 INLINE Bool fullGt ( Int32 i1, Int32 i2 )
1680 {
1681    Int32 i1orig = i1;
1682 
1683    if (i1 == i2) return False;
1684 
1685    do {
1686       UInt32 w1;
1687       UInt32 w2;
1688 
1689       #if DEBUG
1690          assert (i1 >= 0);
1691          assert (i2 >= 0);
1692          assert (i1 != i2);
1693          assert (i1 < lastPP);
1694          assert (i2 < lastPP);
1695       #endif
1696 
1697       w1 = GETALL(i1);
1698       w2 = GETALL(i2);
1699       if (w1 != w2) return (w1 > w2);
1700       i1 += 4;
1701       i2 += 4;
1702 
1703       #if DEBUG
1704          assert (i1 < lastPP + 1 * NUM_FULLGT_UNROLLINGS);
1705          assert (i2 < lastPP + 1 * NUM_FULLGT_UNROLLINGS);
1706       #endif
1707 
1708       w1 = GETALL(i1);
1709       w2 = GETALL(i2);
1710       if (w1 != w2) return (w1 > w2);
1711       i1 += 4;
1712       i2 += 4;
1713 
1714       #if DEBUG
1715          assert (i1 < lastPP + 2 * NUM_FULLGT_UNROLLINGS);
1716          assert (i2 < lastPP + 2 * NUM_FULLGT_UNROLLINGS);
1717       #endif
1718 
1719       w1 = GETALL(i1);
1720       w2 = GETALL(i2);
1721       if (w1 != w2) return (w1 > w2);
1722       i1 += 4;
1723       i2 += 4;
1724 
1725       #if DEBUG
1726          assert (i1 < lastPP + 3 * NUM_FULLGT_UNROLLINGS);
1727          assert (i2 < lastPP + 3 * NUM_FULLGT_UNROLLINGS);
1728       #endif
1729 
1730       w1 = GETALL(i1);
1731       w2 = GETALL(i2);
1732       if (w1 != w2) return (w1 > w2);
1733       i1 += 4;
1734       i2 += 4;
1735 
1736       #if DEBUG
1737          assert (i1 < lastPP + 4 * NUM_FULLGT_UNROLLINGS);
1738          assert (i2 < lastPP + 4 * NUM_FULLGT_UNROLLINGS);
1739       #endif
1740 
1741       i1 = NORMALISEHI(i1);
1742       i2 = NORMALISEHI(i2);
1743 
1744       #if DEBUG
1745          assert (i1 >= 0);
1746          assert (i2 >= 0);
1747          assert (i1 != i2);
1748          assert (i1 < lastPP);
1749          assert (i2 < lastPP);
1750       #endif
1751 
1752    }
1753       while (i1 != i1orig);
1754    return False;
1755 }
1756 
1757 
1758 /*---------------------------------------------*/
1759 /*  Requires striping, and therefore doesn't
1760     work when lastPP < 4.  This qsort is
1761     derived from Weiss' book "Data Structures and
1762     Algorithm Analysis in C", Section 7.7.
1763 */
1764 
1765 #define ISORT_BELOW 10
1766 
1767 #if DEBUG
rangeErr(Int32 x,Int32 wuL,Int32 wuR)1768    Int32 rangeErr ( Int32 x, Int32 wuL, Int32 wuR )
1769    {
1770       fprintf ( stderr, "\nRANGE ERROR: %d (%d, %d)\n",
1771                 x, wuL, wuR);
1772       exit(2);
1773    }
1774    #define RC(x) \
1775       ( ((x) >= wuL && (x) <= wuR) ? (x) : rangeErr((x),wuL, wuR) )
1776 #else
1777    #define RC(x) (x)
1778 #endif
1779 
1780 #define SWAP(za,zb)                                           \
1781    { Int32 zl = (za); Int32 zr = (zb);                        \
1782      Int32 zt = zptr[RC(zl)]; zptr[RC(zl)] = zptr[RC(zr)];    \
1783      zptr[RC(zr)] = zt;                                       \
1784    }
1785 
1786 
qsortFull(Int32 left,Int32 right)1787 void qsortFull ( Int32 left, Int32 right )
1788 {
1789    Int32 pivot, v;
1790    Int32 i, j;
1791    Int32 wuC;
1792 
1793    Int32 stackL[40];
1794    Int32 stackR[40];
1795    Int32 sp = 0;
1796 
1797    Int32 wuL = left;
1798    Int32 wuR = right;
1799 
1800    while (True) {
1801 
1802       /*--
1803          At the beginning of this loop, wuL and wuR hold the
1804          bounds of the next work-unit.
1805       --*/
1806       if (wuR - wuL > ISORT_BELOW) {
1807 
1808          /*--
1809             a large Work Unit; partition-exchange
1810          --*/
1811          wuC = (wuL + wuR) >> 1;
1812          if (fullGt ( zptr[RC(wuL)], zptr[RC(wuC)] )) SWAP ( wuL, wuC );
1813          if (fullGt ( zptr[RC(wuL)], zptr[RC(wuR)] )) SWAP ( wuL, wuR );
1814          if (fullGt ( zptr[RC(wuC)], zptr[RC(wuR)] )) SWAP ( wuC, wuR );
1815 
1816          SWAP ( wuC, wuR-1 );
1817          pivot = zptr[RC(wuR-1)];
1818 
1819          i = wuL;
1820          j = wuR - 1;
1821          for (;;) {
1822             do i++; while (fullGt ( pivot, zptr[RC(i)] ));
1823             do j--; while (fullGt ( zptr[RC(j)], pivot ));
1824             if (i < j) SWAP ( i, j ) else break;
1825          }
1826          SWAP ( i, wuR-1 );
1827 
1828          if ((i - wuL) > (wuR - i)) {
1829             stackL[sp] = wuL; stackR[sp] = i-1; sp++; wuL = i+1;
1830          } else {
1831             stackL[sp] = i+1; stackR[sp] = wuR; sp++; wuR = i-1;
1832          }
1833          #if DEBUG
1834             assert (sp <= 14);
1835          #endif
1836 
1837       } else {
1838 
1839          /*--
1840             a small Work-Unit; insertion-sort it
1841          --*/
1842          for (i = wuL + 1; i <= wuR; i++) {
1843             v = zptr[RC(i)];
1844             j = i;
1845             while ( fullGt ( zptr[RC(j-1)], v )) {
1846                zptr[RC(j)] = zptr[RC(j-1)];
1847                j = j - 1;
1848                if (j <= wuL) goto zero;
1849             }
1850             zero:
1851             zptr[RC(j)] = v;
1852          }
1853          if (sp == 0) return;
1854          sp--; wuL = stackL[sp]; wuR = stackR[sp];
1855          #if DEBUG
1856             assert (sp >= 0);
1857          #endif
1858 
1859       } /* if this is a small work-unit */
1860    }
1861 }
1862 
1863 #undef RC
1864 #undef SWAP
1865 #undef ISORT_BELOW
1866 
1867 
1868 /*---------------------------------------------*/
1869 /*  Use of NORMALISEHI is safe here, for any
1870 *   lastPP >= 1 (which is guaranteed), since
1871 *   the max denormalisation is 1.
1872 */
trivialGt(Int32 i1,Int32 i2)1873 INLINE Bool trivialGt ( Int32 i1, Int32 i2 )
1874 {
1875    Int32 k;
1876 
1877    for (k = 0; k <= last; k++) {
1878       UChar c1 = GETFIRST(i1);
1879       UChar c2 = GETFIRST(i2);
1880       if (c1 == c2) {
1881          i1++; i1 = NORMALISEHI(i1);
1882          i2++; i2 = NORMALISEHI(i2);
1883       } else
1884       if (c1 > c2) return True; else return False;
1885    };
1886    return False;
1887 }
1888 
1889 
1890 /*---------------------------------------------*/
1891 /*  Always works.
1892 */
shellTrivial(void)1893 void shellTrivial ( void )
1894 {
1895    Int32 i, j, h, bigN;
1896    Int32 v;
1897 
1898    Int32 ptrLo = 0;
1899    Int32 ptrHi = last;
1900    bigN = ptrHi - ptrLo + 1;
1901    h = 1;
1902    do { h = 3 * h + 1; } while ( ! (h > bigN) );
1903    do {
1904       h = h / 3;
1905       for (i = ptrLo + h; i <= ptrHi; i++) {
1906          v = zptr[i];
1907          j = i;
1908          while ( trivialGt ( zptr[j-h], v ) ) {
1909             zptr[j] = zptr[j-h];
1910             j = j - h;
1911             if (j <= (ptrLo + h - 1)) goto zero;
1912          }
1913          zero:
1914          zptr[j] = v;
1915       }
1916    } while (h != 1);
1917 }
1918 
1919 
1920 /*---------------------------------------------*/
1921 /*  We have to be pretty careful for small block
1922 *   sizes; the usual mechanism won't work properly,
1923 *   all, ultimately, because the pointer normalisation
1924 *   machinery doesn't work right whenever the amount
1925 *   of denormalisation exceeds lastPP.  And the
1926 *   greatest possible amount of denormalisation here
1927 *   is generated in fullGt, as
1928 *      4 * NUM_FULLGT_UNROLLINGS.
1929 *   To make blocks smaller than 4 * NUM_... sort
1930 *   correctly, it seems easiest simply
1931 *   to forget about striping, &c, and just do a simple
1932 *   shellsort on the un-striped block.  The performance
1933 *   loss has to be inconsequential, since 4 * NUM_... is
1934 *   tiny (16 at present).
1935 */
sortIt(void)1936 void sortIt ( void )
1937 {
1938    /*--
1939       lastPP is `last++', ie, is always == last+1.
1940       The two (lastPP and last+1) should be interchangeable.
1941       lastPP is more convenient for fast renormalisation,
1942       that's all.
1943 
1944       In the various block-sized structures, live data runs
1945       from 0 to last inclusive, so lastPP is the number
1946       of live data items.
1947    --*/
1948    lastPP = last + 1;
1949 
1950    if (lastPP <= 1024) {
1951 
1952       /*--
1953          shellTrivial *must* be used for
1954          lastPP <= 4 * NUM_FULLGT_UNROLLINGS.
1955          The 1024 limit is much higher; the purpose is to avoid
1956          lumbering sorting of small blocks with the
1957          fixed overhead of the full counting-sort mechanism.
1958       --*/
1959 
1960       Int32 i;
1961 
1962       if (veryVerbose) fprintf ( stderr, "trivialSort ...\n" );
1963       for (i = 0; i <= last; i++) zptr[i] = i;
1964       shellTrivial();
1965       if (veryVerbose) fprintf ( stderr, "trivialSort done.\n" );
1966 
1967    } else {
1968       Int32 i;
1969       Int32 grade;
1970       Int32 notDone;
1971 
1972       stripe ();
1973 
1974       if (veryVerbose) fprintf ( stderr, "bucket sorting ...\n" );
1975 
1976       for (i = 0; i <= 65536; i++)
1977          ftab[i] = 0;
1978       for (i = 0; i <= last; i++)
1979          ftab[GETFIRST16(i)]++;
1980       for (i = 1; i <= 65536; i++)
1981          ftab[i] += ftab[i-1];
1982 
1983       for (i = 0; i <= last; i++) {
1984          UInt32 j = GETFIRST16(i);
1985          ftab[j]--;
1986          zptr[ftab[j]] = i;
1987       }
1988 
1989       copyOffsetWords();
1990 
1991       notDone = lastPP;
1992       for (grade = 1; grade <= 5; grade++) {
1993          Int32 candNo;
1994          Int32 loBound;
1995          Int32 hiBound;
1996 
1997          switch (grade) {
1998             case 1:  loBound = 2;     hiBound = 15;     break;
1999             case 2:  loBound = 16;    hiBound = 255;    break;
2000             case 3:  loBound = 256;   hiBound = 4095;   break;
2001             case 4:  loBound = 4096;  hiBound = 65535;  break;
2002             case 5:  loBound = 65536; hiBound = 900000; break;
2003             default: panic ( "gradedSort" );            break;
2004          }
2005          if (loBound > lastPP) continue;
2006 
2007          candNo = 0;
2008          for (i = 0; i <= 65535; i++) {
2009 
2010             Int32 freqHere = ftab[i+1] - ftab[i];
2011 
2012             if (freqHere >= loBound && freqHere <= hiBound) {
2013                Int32 j, k;
2014                Int32 lower = ftab[i];
2015                Int32 upper = ftab[i+1] - 1;
2016 
2017                candNo++;
2018                notDone -= freqHere;
2019 
2020                if (veryVerbose) {
2021                   fprintf ( stderr,
2022                             "   %d -> %d:  cand %5d,   freq = %6d,   notdone = %6d",
2023                             loBound, hiBound, candNo, freqHere, notDone );
2024                   fflush ( stderr );
2025                }
2026 
2027                qsortFull ( lower, upper );
2028 
2029                if (freqHere < 65535) {
2030                   for (j = lower, k = 0; j <= upper; j++, k++) {
2031                      Int32 a2update = zptr[j];
2032                      SETREST16(a2update, k);
2033                        if (a2update < (4 * NUM_FULLGT_UNROLLINGS))
2034                           SETREST16(a2update + lastPP, k);
2035                   }
2036                }
2037                if (veryVerbose)
2038                   fprintf ( stderr, "\n" );
2039 
2040             }
2041          }
2042       }
2043    }
2044 }
2045 
2046 
2047 /*---------------------------------------------------*/
2048 /*--- The Reversible Transformation (tm)          ---*/
2049 /*---------------------------------------------------*/
2050 
2051 /*---------------------------------------------*/
2052 /*--- Use: block [0 .. last]
2053       Def: origPtr.  ll [0 .. last] is synthesised later.
2054 ---*/
doReversibleTransformation(void)2055 void doReversibleTransformation ( void )
2056 {
2057    Int32 i;
2058 
2059    if (veryVerbose) fprintf ( stderr, "\n" );
2060 
2061    sortIt ();
2062 
2063    origPtr = -1;
2064    for (i = 0; i <= last; i++)
2065        if (zptr[i] == 0)
2066           { origPtr = i; break; };
2067 
2068    if (origPtr == -1) panic ( "doReversibleTransformation" );
2069 
2070 }
2071 
2072 
2073 /*---------------------------------------------*/
2074 /*--- Use: ll[0 .. last] and origPtr
2075       Def: block[0 .. last]
2076 ---*/
undoReversibleTransformation(void)2077 void undoReversibleTransformation ( void )
2078 {
2079    Int32  cc[256];
2080    Int32  i, j, ch, sum;
2081 
2082    for (i = 0; i <= 255; i++) cc[i] = 0;
2083 
2084    for (i = 0; i <= last; i++) {
2085       UChar ll_i = ll[i];
2086       zptr[i] = cc[ll_i];
2087       cc[ll_i] ++;
2088    };
2089 
2090    sum = 0;
2091    for (ch = 0; ch <= 255; ch++) {
2092       sum = sum + cc[ch];
2093       cc[ch] = sum - cc[ch];
2094    };
2095 
2096    i = origPtr;
2097    for (j = last; j >= 0; j--) {
2098       UChar ll_i = ll[i];
2099       block[j] = ll_i;
2100       i = zptr[i] + cc[ll_i];
2101    };
2102 }
2103 
2104 
2105 /*---------------------------------------------------*/
2106 /*--- The block loader and RLEr                   ---*/
2107 /*---------------------------------------------------*/
2108 
2109 #define SPOT_BASIS_STEP 8000
2110 
2111 /*---------------------------------------------*/
spotBlock(Bool weAreCompressing)2112 void spotBlock ( Bool weAreCompressing )
2113 {
2114    Int32 pos, delta, newdelta;
2115 
2116    pos   = SPOT_BASIS_STEP;
2117    delta = 1;
2118 
2119    while (pos < last) {
2120 
2121       Int32 n;
2122 
2123       if (weAreCompressing)
2124          n = (Int32)GETFIRST(pos) + 1; else
2125          n = (Int32)block[pos]    - 1;
2126 
2127       if (n == 256) n = 0; else if (n == -1)  n = 255;
2128 
2129       if (! (n >= 0 && n <= 255) ) panic ( "spotBlock" );
2130 
2131       if (weAreCompressing)
2132          SETFIRST(pos, (UChar)n); else
2133          block[pos] = (UChar)n;
2134 
2135       switch (delta) {
2136          case 3:  newdelta = 1; break;
2137          case 1:  newdelta = 4; break;
2138          case 4:  newdelta = 5; break;
2139          case 5:  newdelta = 9; break;
2140          case 9:  newdelta = 2; break;
2141          case 2:  newdelta = 6; break;
2142          case 6:  newdelta = 7; break;
2143          case 8:  newdelta = 8; break;
2144          case 7:  newdelta = 3; break;
2145          default: newdelta = 1; break;
2146       }
2147       delta = newdelta;
2148 
2149       pos = pos + SPOT_BASIS_STEP + 17 * (newdelta - 5);
2150    }
2151 }
2152 
2153 
2154 
2155 /*---------------------------------------------*/
2156 /*  Top 16:   run length, 1 to 255.
2157 *   Lower 16: the char, or MY_EOF for EOF.
2158 */
2159 
2160 #define MY_EOF 257
2161 
getRLEpair(FILE * src)2162 INLINE Int32 getRLEpair ( FILE* src )
2163 {
2164    Int32     runLength;
2165    IntNative ch, chLatest;
2166 
2167    ch = getc ( src );
2168 
2169    /*--- Because I have no idea what kind of a value EOF is. ---*/
2170    if (ch == EOF) {
2171       ERROR_IF_NOT_ZERO ( errno );
2172       return (1 << 16) | MY_EOF;
2173    }
2174 
2175    runLength = 0;
2176    do {
2177       chLatest = getc ( src );
2178       runLength++;
2179       bytesIn++;
2180    }
2181       while (ch == chLatest && runLength < 255);
2182 
2183    if ( chLatest != EOF ) {
2184       if ( ungetc ( chLatest, src ) == EOF )
2185          panic ( "getRLEpair: ungetc failed" );
2186    } else {
2187       ERROR_IF_NOT_ZERO ( errno );
2188    }
2189 
2190    /*--- Conditional is just a speedup hack. ---*/
2191    if (runLength == 1) {
2192       UPDATE_CRC ( globalCrc, (UChar)ch );
2193       return (1 << 16) | ch;
2194    } else {
2195       Int32 i;
2196       for (i = 1; i <= runLength; i++)
2197          UPDATE_CRC ( globalCrc, (UChar)ch );
2198       return (runLength << 16) | ch;
2199    }
2200 }
2201 
2202 
2203 /*---------------------------------------------*/
loadAndRLEsource(FILE * src)2204 Bool loadAndRLEsource ( FILE* src )
2205 {
2206    Int32 ch, allowableBlockSize;
2207 
2208    last = -1;
2209    ch   = 0;
2210 
2211    /*--- 20 is just a paranoia constant ---*/
2212    allowableBlockSize = 100000 * blockSize100k - 20;
2213 
2214    while (last < allowableBlockSize && ch != MY_EOF) {
2215       Int32 rlePair, runLen;
2216       rlePair = getRLEpair ( src );
2217       ch      = rlePair & 0xFFFF;
2218       runLen  = (UInt32)rlePair >> 16;
2219 
2220       #if DEBUG
2221          assert (runLen >= 1 && runLen <= 255);
2222       #endif
2223 
2224       if (ch == MY_EOF)
2225          { last++; SETFIRST(last, ((UChar)42)); }
2226          else
2227          switch (runLen) {
2228             case 1:
2229                last++; SETFIRST(last, ((UChar)ch)); break;
2230             case 2:
2231                last++; SETFIRST(last, ((UChar)ch));
2232                last++; SETFIRST(last, ((UChar)ch)); break;
2233             case 3:
2234                last++; SETFIRST(last, ((UChar)ch));
2235                last++; SETFIRST(last, ((UChar)ch));
2236                last++; SETFIRST(last, ((UChar)ch)); break;
2237             default:
2238                last++; SETFIRST(last, ((UChar)ch));
2239                last++; SETFIRST(last, ((UChar)ch));
2240                last++; SETFIRST(last, ((UChar)ch));
2241                last++; SETFIRST(last, ((UChar)ch));
2242                last++; SETFIRST(last, ((UChar)(runLen-4))); break;
2243          }
2244    }
2245    return (ch == MY_EOF);
2246 }
2247 
2248 
2249 /*---------------------------------------------*/
2250 /*--
2251   This new version is derived from some code
2252   sent to me Christian von Roques.
2253 --*/
unRLEandDump(FILE * dst,Bool thisIsTheLastBlock)2254 void unRLEandDump ( FILE* dst, Bool thisIsTheLastBlock )
2255 {
2256    IntNative retVal;
2257    Int32     lastCharToSpew, i, count, chPrev, ch;
2258    UInt32    localCrc;
2259 
2260    if (thisIsTheLastBlock)
2261       lastCharToSpew = last - 1; else
2262       lastCharToSpew = last;
2263 
2264    count    = 0;
2265    i        = 0;
2266    ch       = 256;   /*-- not a char and not EOF --*/
2267    localCrc = getGlobalCRC();
2268 
2269    while ( i <= lastCharToSpew ) {
2270       chPrev = ch;
2271       ch = block[i];
2272       i++;
2273 
2274       retVal = putc ( ch, dst );
2275       ERROR_IF_EOF ( retVal );
2276       UPDATE_CRC ( localCrc, (UChar)ch );
2277 
2278       if (ch != chPrev) {
2279          count = 1;
2280       } else {
2281          count++;
2282          if (count >= 4) {
2283             Int32 j;
2284             for (j = 0;  j < (Int32)block[i];  j++) {
2285                retVal = putc (ch, dst);
2286                ERROR_IF_EOF ( retVal );
2287                UPDATE_CRC ( localCrc, (UChar)ch );
2288             }
2289             i++;
2290             count = 0;
2291          }
2292       }
2293    }
2294 
2295    setGlobalCRC ( localCrc );
2296 
2297    if (thisIsTheLastBlock && block[last] != 42) unblockError ();
2298 }
2299 
2300 
2301 /*---------------------------------------------------*/
2302 /*--- Processing of complete files and streams    ---*/
2303 /*---------------------------------------------------*/
2304 
2305 /*---------------------------------------------*/
compressStream(FILE * stream,FILE * zStream)2306 void compressStream ( FILE *stream, FILE *zStream )
2307 {
2308    IntNative  retVal;
2309    Bool       thisIsTheLastBlock;
2310    BitStream  *zbs;
2311    UInt32     crcToSend;
2312    Int32      blockNo = 1;
2313 
2314    bytesIn  = 0;
2315    bytesOut = 0;
2316 
2317    SET_BINARY_MODE(stream);
2318    SET_BINARY_MODE(zStream);
2319 
2320    zbs = bsOpenWriteStream ( zStream );
2321 
2322    /*--- Write `magic' bytes B and Z,
2323          then 0 indicating file-format revision zero,
2324          followed by a digit indicating blockSize100k.
2325    ---*/
2326    bsPutUChar ( zbs, 'B' );
2327    bsPutUChar ( zbs, 'Z' );
2328    bsPutUChar ( zbs, '0' );
2329    bsPutUChar ( zbs, '0' + blockSize100k );
2330 
2331    initialiseCRC ();
2332    initBogusModel ();
2333    arithCodeStartEncoding ( zbs );
2334 
2335    do {
2336       if (veryVerbose)
2337          fprintf ( stderr, "\nBEGIN block %d\n", blockNo );
2338       blockNo++;
2339       thisIsTheLastBlock = loadAndRLEsource ( stream );
2340       spotBlock ( True );
2341       doReversibleTransformation ();
2342       moveToFrontCodeAndSend ( zbs, thisIsTheLastBlock );
2343    }
2344       while ( ! thisIsTheLastBlock );
2345 
2346    crcToSend = getFinalCRC ();
2347    putUInt32 ( zbs, crcToSend );
2348    if (veryVerbose)
2349       fprintf ( stderr, "\nCRC = 0x%x\n", crcToSend );
2350 
2351    arithCodeDoneEncoding ( zbs );
2352    bsClose ( zbs );
2353    ERROR_IF_NOT_ZERO ( ferror(stream) );
2354    retVal = fclose ( stream );
2355    ERROR_IF_EOF ( retVal );
2356 
2357    if (veryVerbose) {
2358        fprintf ( stderr, "\n" );
2359        dumpAllModelStats ();
2360        fprintf ( stderr, "\n" );
2361    }
2362 
2363    if (bytesIn == 0) bytesIn = 1;
2364    if (bytesOut == 0) bytesOut = 1;
2365 
2366    if (verbose)
2367       fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
2368                         "%5.2f%% saved, %d in, %d out.\n",
2369                 (float)bytesIn / (float)bytesOut,
2370                 (8.0 * (float)bytesOut) / (float)bytesIn,
2371                 100.0 * (1.0 - (float)bytesOut / (float)bytesIn),
2372                 bytesIn,
2373                 bytesOut
2374               );
2375 
2376    if (veryVerbose)
2377       fprintf ( stderr, "\n" );
2378 }
2379 
2380 
2381 /*---------------------------------------------*/
uncompressStream(FILE * zStream,FILE * stream)2382 Bool uncompressStream ( FILE *zStream, FILE *stream )
2383 {
2384    Bool       thisIsTheLastBlock;
2385    BitStream  *zbs;
2386    Int32      magic1, magic2, magic3, magic4;
2387    UInt32     crcStored, crcComputed;
2388    Int32      currBlockNo;
2389    IntNative  retVal;
2390 
2391    SET_BINARY_MODE(stream);
2392    SET_BINARY_MODE(zStream);
2393 
2394    zbs = ( bsOpenReadStream ( zStream ) );
2395 
2396    /*--
2397       A bad magic number is `recoverable from';
2398       return with False so the caller skips the file.
2399    --*/
2400    magic1 = (Int32)bsGetUChar ( zbs );
2401    magic2 = (Int32)bsGetUChar ( zbs );
2402    magic3 = (Int32)bsGetUChar ( zbs );
2403    magic4 = (Int32)bsGetUChar ( zbs );
2404    if (magic1 != 'B' ||
2405        magic2 != 'Z' ||
2406        magic3 != '0' ||
2407        magic4 < '1'  ||
2408        magic4 > '9') {
2409      bsClose ( zbs );
2410      retVal = fclose ( stream );
2411      ERROR_IF_EOF ( retVal );
2412      return False;
2413    }
2414 
2415    setDecompressStructureSizes ( magic4 - '0' );
2416    initialiseCRC ();
2417    initBogusModel ();
2418    arithCodeStartDecoding ( zbs );
2419 
2420    if (veryVerbose) fprintf ( stderr, "\n  " );
2421    currBlockNo = 0;
2422    do {
2423       currBlockNo++;
2424       if (veryVerbose)
2425          fprintf ( stderr, "[%d: ac+mtf ", currBlockNo );
2426       thisIsTheLastBlock = getAndMoveToFrontDecode ( zbs );
2427       if (veryVerbose) fprintf ( stderr, "rt " );
2428       undoReversibleTransformation ();
2429       spotBlock ( False );
2430       if (veryVerbose) fprintf ( stderr, "rld" );
2431       unRLEandDump ( stream, thisIsTheLastBlock );
2432       if (veryVerbose) fprintf ( stderr, "] " );
2433    }
2434       while ( ! thisIsTheLastBlock );
2435 
2436    if (veryVerbose) fprintf ( stderr, "\n  " );
2437 
2438    /*-- A bad CRC is considered a fatal error. --*/
2439    crcStored   = getUInt32 ( zbs );
2440    crcComputed = getFinalCRC ();
2441    if (veryVerbose)
2442       fprintf ( stderr,
2443                 "CRCs: stored = 0x%x, computed = 0x%x\n  ",
2444                 crcStored, crcComputed );
2445    if (crcStored != crcComputed)
2446       crcError ( crcStored, crcComputed );
2447 
2448    arithCodeDoneDecoding ( zbs );
2449    bsClose ( zbs );
2450    ERROR_IF_NOT_ZERO ( ferror(stream) );
2451    retVal = fclose ( stream );
2452    ERROR_IF_EOF ( retVal );
2453    return True;
2454 }
2455 
2456 
2457 
2458 /*---------------------------------------------------*/
2459 /*--- Error [non-] handling grunge                ---*/
2460 /*---------------------------------------------------*/
2461 
2462 /*---------------------------------------------*/
showFileNames(void)2463 void showFileNames ( void )
2464 {
2465    fprintf (
2466       stderr,
2467       "\tInput file = %s, output file = %s\n",
2468       (opMode == OM_STDIN_TO_STDOUT) ? "(stdin)" : inName,
2469       (opMode != OM_FILES_TO_FILES) ? "(stdout)" : outName
2470    );
2471 }
2472 
2473 
2474 /*---------------------------------------------*/
cleanUpAndFail(void)2475 void cleanUpAndFail ( void )
2476 {
2477    IntNative retVal;
2478 
2479    if ( opMode == OM_FILES_TO_FILES ) {
2480       fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
2481                 progName, outName );
2482       if (outputHandleJustInCase != NULL)
2483          fclose ( outputHandleJustInCase );
2484       retVal = remove ( outName );
2485       if (retVal != 0)
2486          fprintf ( stderr,
2487                    "%s: WARNING: deletion of output file (apparently) failed.\n",
2488                    progName );
2489    }
2490    exit ( 1 );
2491 }
2492 
2493 
2494 /*---------------------------------------------*/
panic(Char * s)2495 void panic ( Char* s )
2496 {
2497    fprintf ( stderr,
2498              "\n%s: PANIC -- internal consistency error:\n"
2499              "\t%s\n"
2500              "\tThis is a BUG.  Please report it to me at:\n"
2501              "\tsewardj@cs.man.ac.uk.\n",
2502              progName, s );
2503    showFileNames();
2504    cleanUpAndFail();
2505 }
2506 
2507 
2508 /*---------------------------------------------*/
crcError(UInt32 crcStored,UInt32 crcComputed)2509 void crcError ( UInt32 crcStored, UInt32 crcComputed )
2510 {
2511    fprintf ( stderr,
2512              "\n%s: Data integrity error when decompressing.\n"
2513              "\tStored CRC = 0x%x, computed CRC = 0x%x\n"
2514              "\tThis could be a bug -- please report it to me at:\n"
2515              "\tsewardj@cs.man.ac.uk.\n",
2516              progName, crcStored, crcComputed );
2517    showFileNames();
2518    cleanUpAndFail();
2519 }
2520 
2521 
2522 /*---------------------------------------------*/
compressedStreamEOF(void)2523 void compressedStreamEOF ( void )
2524 {
2525    fprintf ( stderr,
2526              "\n%s: Compressed file ends unexpectedly;\n\t"
2527              "perhaps it is corrupted?  *Possible* reason follows.\n",
2528              progName );
2529    perror ( progName );
2530    showFileNames();
2531    cleanUpAndFail();
2532 }
2533 
2534 
2535 /*---------------------------------------------*/
ioError()2536 void ioError ( )
2537 {
2538    fprintf ( stderr,
2539              "\n%s: I/O or other error, bailing out.  Possible reason follows.\n",
2540              progName );
2541    perror ( progName );
2542    showFileNames();
2543    cleanUpAndFail();
2544 }
2545 
2546 
2547 /*---------------------------------------------*/
blockOverrun()2548 void blockOverrun ()
2549 {
2550    fprintf ( stderr,
2551              "\n%s: block overrun during decompression,\n"
2552              "\twhich probably means the compressed file\n"
2553              "\tis corrupted.\n",
2554              progName );
2555    showFileNames();
2556    cleanUpAndFail();
2557 }
2558 
2559 
2560 /*---------------------------------------------*/
unblockError()2561 void unblockError ()
2562 {
2563    fprintf ( stderr,
2564              "\n%s: compressed file didn't unblock correctly,\n"
2565              "\twhich probably means it is corrupted.\n",
2566              progName );
2567    showFileNames();
2568    cleanUpAndFail();
2569 }
2570 
2571 
2572 /*---------------------------------------------*/
bitStreamEOF()2573 void bitStreamEOF ()
2574 {
2575    fprintf ( stderr,
2576              "\n%s: read past the end of compressed data,\n"
2577              "\twhich probably means it is corrupted.\n",
2578              progName );
2579    showFileNames();
2580    cleanUpAndFail();
2581 }
2582 
2583 
2584 /*---------------------------------------------*/
mySignalCatcher(int n __unused)2585 void mySignalCatcher (int n __unused)
2586 {
2587    fprintf ( stderr,
2588              "\n%s: Control-C (or similar) caught, quitting.\n",
2589              progName );
2590    cleanUpAndFail();
2591 }
2592 
2593 
2594 /*---------------------------------------------*/
mySIGSEGVorSIGBUScatcher(int n __unused)2595 void mySIGSEGVorSIGBUScatcher (int n __unused)
2596 {
2597    if (compressing)
2598       fprintf ( stderr,
2599                 "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
2600                 "\twhich probably indicates a bug in BZIP.  Please\n"
2601                 "\treport it to me at: sewardj@cs.man.ac.uk\n",
2602                 progName );
2603       else
2604       fprintf ( stderr,
2605                 "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
2606                 "\twhich probably indicates that the compressed data\n"
2607                 "\tis corrupted.\n",
2608                 progName );
2609 
2610    showFileNames();
2611    cleanUpAndFail();
2612 }
2613 
2614 
2615 /*---------------------------------------------*/
uncompressOutOfMemory(Int32 draw,Int32 blockSize)2616 void uncompressOutOfMemory ( Int32 draw, Int32 blockSize )
2617 {
2618    fprintf ( stderr,
2619              "\n%s: Can't allocate enough memory for decompression.\n"
2620              "\tRequested %d bytes for a block size of %d.\n"
2621              "\tFind a machine with more memory, perhaps?\n",
2622              progName, draw, blockSize );
2623    showFileNames();
2624    cleanUpAndFail();
2625 }
2626 
2627 
2628 /*---------------------------------------------*/
compressOutOfMemory(Int32 draw,Int32 blockSize)2629 void compressOutOfMemory ( Int32 draw, Int32 blockSize )
2630 {
2631    fprintf ( stderr,
2632              "\n%s: Can't allocate enough memory for compression.\n"
2633              "\tRequested %d bytes for a block size of %d.\n"
2634              "\tReduce the block size, and/or use the -e flag.\n",
2635              progName, draw, blockSize );
2636    showFileNames();
2637    cleanUpAndFail();
2638 }
2639 
2640 
2641 /*---------------------------------------------------*/
2642 /*--- The main driver machinery                   ---*/
2643 /*---------------------------------------------------*/
2644 
2645 /*---------------------------------------------*/
pad(Char * s)2646 void pad ( Char *s )
2647 {
2648    Int32 i;
2649    if ( (Int32)strlen(s) >= longestFileName ) return;
2650    for (i = 1; i <= longestFileName - (Int32)strlen(s); i++)
2651       fprintf ( stderr, " " );
2652 }
2653 
2654 
2655 /*---------------------------------------------*/
fileExists(Char * name)2656 Bool fileExists ( Char* name )
2657 {
2658    FILE *tmp   = fopen ( name, "rb" );
2659    Bool exists = (tmp != NULL);
2660    if (tmp != NULL) fclose ( tmp );
2661    return exists;
2662 }
2663 
2664 
2665 /*---------------------------------------------*/
2666 /*--
2667   if in doubt, return True
2668 --*/
notABogStandardFile(Char * name)2669 Bool notABogStandardFile ( Char* name )
2670 {
2671    IntNative      i;
2672    struct MY_STAT statBuf;
2673 
2674    i = MY_LSTAT ( name, &statBuf );
2675    if (i != 0) return True;
2676    if (MY_S_IFREG(statBuf.st_mode)) return False;
2677    return True;
2678 }
2679 
2680 
2681 /*---------------------------------------------*/
copyDateAndPermissions(Char * srcName,Char * dstName)2682 void copyDateAndPermissions ( Char *srcName, Char *dstName )
2683 {
2684    IntNative      retVal;
2685    struct MY_STAT statBuf;
2686    struct utimbuf uTimBuf;
2687 
2688    retVal = MY_LSTAT ( srcName, &statBuf );
2689    ERROR_IF_NOT_ZERO ( retVal );
2690    uTimBuf.actime = statBuf.st_atime;
2691    uTimBuf.modtime = statBuf.st_mtime;
2692 
2693    retVal = chmod ( dstName, statBuf.st_mode );
2694    ERROR_IF_NOT_ZERO ( retVal );
2695    retVal = utime ( dstName, &uTimBuf );
2696    ERROR_IF_NOT_ZERO ( retVal );
2697 }
2698 
2699 
2700 /*---------------------------------------------*/
endsInBz(Char * name)2701 Bool endsInBz ( Char* name )
2702 {
2703    Int32 n = strlen ( name );
2704    if (n <= 3) return False;
2705    return
2706       (name[n-3] == '.' &&
2707        name[n-2] == 'b' &&
2708        name[n-1] == 'z');
2709 }
2710 
2711 
2712 /*---------------------------------------------*/
containsDubiousChars(Char * name)2713 Bool containsDubiousChars ( Char* name )
2714 {
2715    Bool cdc = False;
2716    for (; *name != '\0'; name++)
2717       if (*name == '?' || *name == '*') cdc = True;
2718    return cdc;
2719 }
2720 
2721 
2722 /*---------------------------------------------*/
compress(Char * name)2723 void compress ( Char *name )
2724 {
2725    FILE *inStr;
2726    FILE *outStr;
2727 
2728    strcpy ( inName, name );
2729    strcpy ( outName, name );
2730    strcat ( outName, ".bz" );
2731 
2732    if ( opMode != OM_STDIN_TO_STDOUT && containsDubiousChars ( inName ) ) {
2733       fprintf ( stderr, "%s: There are no files matching `%s'.\n",
2734       progName, inName );
2735       return;
2736    }
2737    if ( opMode != OM_STDIN_TO_STDOUT && !fileExists ( inName ) ) {
2738       fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
2739                 progName, inName );
2740       return;
2741    }
2742    if ( opMode != OM_STDIN_TO_STDOUT && endsInBz ( inName )) {
2743       fprintf ( stderr, "%s: Input file name %s ends in `.bz', skipping.\n",
2744                 progName, inName );
2745       return;
2746    }
2747    if ( opMode != OM_STDIN_TO_STDOUT && notABogStandardFile ( inName )) {
2748       fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
2749                 progName, inName );
2750       return;
2751    }
2752    if ( opMode == OM_FILES_TO_FILES && fileExists ( outName ) ) {
2753       fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
2754                 progName, outName );
2755       return;
2756    }
2757 
2758    switch ( opMode ) {
2759 
2760       case OM_STDIN_TO_STDOUT:
2761          inStr = stdin;
2762          outStr = stdout;
2763          if ( isatty ( fileno ( stdout ) ) ) {
2764             fprintf ( stderr,
2765                       "%s: I won't write compressed data to a terminal.\n",
2766                       progName );
2767             fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
2768                               progName, progName );
2769             return;
2770          };
2771          break;
2772 
2773       case OM_FILE_TO_STDOUT:
2774          inStr = fopen ( inName, "rb" );
2775          outStr = stdout;
2776          if ( isatty ( fileno ( stdout ) ) ) {
2777             fprintf ( stderr,
2778                       "%s: I won't write compressed data to a terminal.\n",
2779                       progName );
2780             fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
2781                               progName, progName );
2782             return;
2783          };
2784          if ( inStr == NULL ) {
2785             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
2786                       progName, inName );
2787             return;
2788          };
2789          break;
2790 
2791       case OM_FILES_TO_FILES:
2792          inStr = fopen ( inName, "rb" );
2793          outStr = fopen ( outName, "wb" );
2794          if ( outStr == NULL) {
2795             fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
2796                       progName, outName );
2797             return;
2798          }
2799          if ( inStr == NULL ) {
2800             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
2801                       progName, inName );
2802             return;
2803          };
2804          break;
2805 
2806       default:
2807          panic ( "compress: bad opMode" );
2808          break;
2809    }
2810 
2811    if (verbose) {
2812       fprintf ( stderr,
2813                 "  %s: ",
2814                 opMode == OM_STDIN_TO_STDOUT ? "(stdin)" : inName );
2815       pad ( opMode == OM_STDIN_TO_STDOUT ? "(stdin)" : inName );
2816       fflush ( stderr );
2817    }
2818 
2819    /*--- Now the input and output handles are sane.  Do the Biz. ---*/
2820    errno = 0;
2821    outputHandleJustInCase = outStr;
2822    compressStream ( inStr, outStr );
2823    outputHandleJustInCase = NULL;
2824 
2825    /*--- If there was an I/O error, we won't get here. ---*/
2826    if ( opMode == OM_FILES_TO_FILES ) {
2827       copyDateAndPermissions ( inName, outName );
2828       if ( !keepInputFiles ) {
2829          IntNative retVal = remove ( inName );
2830          ERROR_IF_NOT_ZERO ( retVal );
2831       }
2832    }
2833 }
2834 
2835 
2836 /*---------------------------------------------*/
uncompress(Char * name)2837 void uncompress ( Char *name )
2838 {
2839    FILE *inStr;
2840    FILE *outStr;
2841    Bool magicNumberOK;
2842 
2843    strcpy ( inName, name );
2844    strcpy ( outName, name );
2845    if ( endsInBz ( inName ) )
2846       outName [ strlen ( outName ) - 3 ] = '\0';
2847 
2848    if ( opMode != OM_STDIN_TO_STDOUT && containsDubiousChars ( inName ) ) {
2849       fprintf ( stderr, "%s: There are no files matching `%s'.\n",
2850                 progName, inName );
2851       return;
2852    }
2853    if ( opMode != OM_STDIN_TO_STDOUT && !fileExists ( inName ) ) {
2854       fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
2855                 progName, inName );
2856       return;
2857    }
2858    if ( opMode != OM_STDIN_TO_STDOUT && !endsInBz ( inName )) {
2859       fprintf ( stderr,
2860                 "%s: Input file name %s doesn't end in `.bz', skipping.\n",
2861                 progName, inName );
2862       return;
2863    }
2864    if ( opMode != OM_STDIN_TO_STDOUT && notABogStandardFile ( inName )) {
2865       fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
2866                 progName, inName );
2867       return;
2868    }
2869    if ( opMode == OM_FILES_TO_FILES && fileExists ( outName ) ) {
2870       fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
2871                 progName, outName );
2872       return;
2873    }
2874 
2875    switch ( opMode ) {
2876 
2877       case OM_STDIN_TO_STDOUT:
2878          inStr = stdin;
2879          outStr = stdout;
2880          if ( isatty ( fileno ( stdin ) ) ) {
2881             fprintf ( stderr,
2882                       "%s: I won't read compressed data from a terminal.\n",
2883                       progName );
2884             fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
2885                               progName, progName );
2886             return;
2887          };
2888          break;
2889 
2890       case OM_FILE_TO_STDOUT:
2891          inStr = fopen ( inName, "rb" );
2892          outStr = stdout;
2893          if ( inStr == NULL ) {
2894             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
2895                       progName, inName );
2896             return;
2897          };
2898          break;
2899 
2900       case OM_FILES_TO_FILES:
2901          inStr = fopen ( inName, "rb" );
2902          outStr = fopen ( outName, "wb" );
2903          if ( outStr == NULL) {
2904             fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
2905                       progName, outName );
2906             return;
2907          }
2908          if ( inStr == NULL ) {
2909             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
2910                       progName, inName );
2911             return;
2912          };
2913          break;
2914 
2915       default:
2916          panic ( "uncompress: bad opMode" );
2917          break;
2918    }
2919 
2920    if (verbose) {
2921       fprintf ( stderr,
2922                 "  %s: ",
2923                 opMode == OM_STDIN_TO_STDOUT ? "(stdin)" : inName );
2924       pad ( opMode == OM_STDIN_TO_STDOUT ? "(stdin)" : inName );
2925       fflush ( stderr );
2926    }
2927 
2928    /*--- Now the input and output handles are sane.  Do the Biz. ---*/
2929    errno = 0;
2930    outputHandleJustInCase = outStr;
2931    magicNumberOK = uncompressStream ( inStr, outStr );
2932    outputHandleJustInCase = NULL;
2933 
2934    /*--- If there was an I/O error, we won't get here. ---*/
2935    if ( magicNumberOK ) {
2936       if ( opMode == OM_FILES_TO_FILES ) {
2937          copyDateAndPermissions ( inName, outName );
2938          if ( !keepInputFiles ) {
2939             IntNative retVal = remove ( inName );
2940             ERROR_IF_NOT_ZERO ( retVal );
2941          }
2942       }
2943    } else {
2944       if ( opMode == OM_FILES_TO_FILES ) {
2945          IntNative retVal = remove ( outName );
2946          ERROR_IF_NOT_ZERO ( retVal );
2947       }
2948    }
2949 
2950    if ( magicNumberOK ) {
2951       if (verbose)
2952          fprintf ( stderr, "done\n" );
2953    } else {
2954       if (verbose)
2955          fprintf ( stderr, "not a BZIP file, skipping.\n" ); else
2956          fprintf ( stderr,
2957                    "%s: %s is not a BZIP file, skipping.\n",
2958                    progName,
2959                    opMode == OM_STDIN_TO_STDOUT ? "(stdin)" : inName );
2960    }
2961 
2962 }
2963 
2964 
2965 /*---------------------------------------------*/
license(void)2966 void license ( void )
2967 {
2968    fprintf ( stderr,
2969 
2970     "  \n"
2971     "  Copyright (C) 1996 by Julian Seward.\n"
2972     "  \n"
2973     "  This program is free software; you can redistribute it and/or modify\n"
2974     "  it under the terms of the GNU General Public License as published by\n"
2975     "  the Free Software Foundation; either version 2 of the License, or\n"
2976     "  (at your option) any later version.\n"
2977     "  \n"
2978     "  This program is distributed in the hope that it will be useful,\n"
2979     "  but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
2980     "  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
2981     "  GNU General Public License for more details.\n"
2982     "  \n"
2983     "  You should have received a copy of the GNU General Public License\n"
2984     "  along with this program; if not, write to the Free Software\n"
2985     "  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
2986     "  \n"
2987     "  The GNU General Public License is contained in the file LICENSE.\n"
2988     "  \n"
2989    );
2990 }
2991 
2992 
2993 /*---------------------------------------------*/
usage(Char * fullProgName)2994 void usage ( Char *fullProgName )
2995 {
2996    fprintf (
2997       stderr,
2998       "\nusage: %s [flags and input files in any order]\n"
2999       "\n"
3000       "   Flags:  -d          force decompression\n"
3001       "           -f          force compression\n"
3002       "           -c          output to standard out\n"
3003       "           -v, -V      be verbose, or very verbose\n"
3004       "           -k          keep (don't delete) input files\n"
3005       "           -L          display software license\n"
3006       "           -1 .. -9    set block size of 100k .. 900k\n"
3007       "\n"
3008       "   If invoked as `bzip', the default action is to compress.\n"
3009       "              as `bunzip', the default action is to decompress.\n"
3010       "\n"
3011       "   If no file names are given, bzip compresses or decompresses\n"
3012       "   from standard input to standard output.  You can combine\n"
3013       "   flags, so `-v -e -4' means the same as -ve4 or -4ev, &c.\n"
3014       "\n"
3015       "   The default block size is 900k, which soaks up a lot of\n"
3016       "   memory for compression (7700k) and decompression (4500k).\n"
3017       "   You may want to select a smaller block size; see the manual\n"
3018       "   for details.  Smaller sizes give slightly less compression.\n"
3019       "   -e also saves memory during compression, at some speed cost.\n"
3020       "\n",
3021 
3022       fullProgName
3023    );
3024 }
3025 
3026 
3027 /*---------------------------------------------*/
3028 /*--
3029   All the garbage from here to main() is purely to
3030   implement a linked list of command-line arguments,
3031   into which main() copies argv[1 .. argc-1].
3032 
3033   The purpose of this ridiculous exercise is to
3034   facilitate the expansion of wildcard characters
3035   * and ? in filenames for halfwitted OSs like
3036   MSDOS, Windows 95 and NT ... yawn.
3037 
3038   The actual Dirty Work is done by the platform-specific
3039   macro APPEND_FILESPEC.
3040 --*/
3041 
3042 typedef
3043    struct zzzz {
3044       Char        *name;
3045       struct zzzz *link;
3046    }
3047    Cell;
3048 
3049 
3050 /*---------------------------------------------*/
myMalloc(size_t n)3051 void *myMalloc ( size_t n )
3052 {
3053    void* p;
3054 
3055    p = malloc ( n );
3056    if (p == NULL) {
3057       fprintf (
3058          stderr,
3059          "%s: `malloc' failed during processing of command-line args.\n",
3060          progName
3061       );
3062       exit ( 1 );
3063    }
3064    return p;
3065 }
3066 
3067 
3068 /*---------------------------------------------*/
mkCell(void)3069 Cell *mkCell ( void )
3070 {
3071    Cell *c;
3072 
3073    c = (Cell*) myMalloc ( sizeof ( Cell ) );
3074    c->name = NULL;
3075    c->link = NULL;
3076    return c;
3077 }
3078 
3079 
3080 /*---------------------------------------------*/
snocString(Cell * root,Char * name)3081 Cell *snocString ( Cell *root, Char *name )
3082 {
3083    if (root == NULL) {
3084       Cell *tmp = mkCell();
3085       tmp->name = (Char*) myMalloc ( 5 + strlen(name) );
3086       strcpy ( tmp->name, name );
3087       return tmp;
3088    } else {
3089       Cell *tmp = root;
3090       while (tmp->link != NULL) tmp = tmp->link;
3091       tmp->link = snocString ( tmp->link, name );
3092       return root;
3093    }
3094 }
3095 
3096 
3097 
3098 /*---------------------------------------------*/
main(IntNative argc,Char * argv[])3099 IntNative main ( IntNative argc, Char *argv[] )
3100 {
3101    Int32  numFileNames;
3102    Int32  i, j;
3103    Char   *tmp;
3104    Cell   *argList;
3105    Cell   *aa;
3106 
3107    outputHandleJustInCase  = NULL;
3108    ftab                    = NULL;
3109    block                   = NULL;
3110    ll                      = NULL;
3111    words                   = NULL;
3112    zptr                    = NULL;
3113    bsInUse                 = False;
3114    errno                   = 0;
3115 
3116    strcpy ( progNameReally, argv[0] );
3117    progName = &progNameReally[0];
3118    for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++)
3119       if (*tmp == PATH_SEP) progName = tmp + 1;
3120 
3121    argList = NULL;
3122    for (i = 1; i <= argc-1; i++)
3123       APPEND_FILESPEC(argList, argv[i]);
3124 
3125    strcpy ( inName, "-" );
3126    strcpy ( outName, "-" );
3127 
3128 
3129    signal (SIGINT,  mySignalCatcher);
3130    signal (SIGTERM, mySignalCatcher);
3131    signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
3132    #if BZ_UNIX
3133    signal (SIGHUP,  mySignalCatcher);
3134    signal (SIGBUS,  mySIGSEGVorSIGBUScatcher);
3135    #endif
3136 
3137    #if DEBUG
3138       if ( ! (argc > 1 && strcmp ( "-Q", argv[1] ) == 0) )
3139          fprintf ( stderr, "BZIP: *** compiled with debugging ON ***\n" );
3140    #endif
3141 
3142    if (sizeof(Int32) != 4 || sizeof(UInt32) != 4    ||
3143        sizeof(Char)  != 1 || sizeof(UChar)  != 1) {
3144       fprintf ( stderr,
3145                 "BZIP: I require sizeof(Int32) == 4 bytes and\n"
3146                 "\tsizeof(Char) == 1 byte to run properly, sorry!\n"
3147                 "\tProbably you can fix this by defining them correctly,\n"
3148                 "\tand recompiling.\n" );
3149       exit(1);
3150    }
3151 
3152    longestFileName = 7;
3153    numFileNames    = 0;
3154    for (aa = argList; aa != NULL; aa = aa->link)
3155       if (aa->name[0] != '-') {
3156          numFileNames++;
3157          if (longestFileName < (Int32)strlen(aa->name) )
3158             longestFileName = (Int32)strlen(aa->name);
3159       }
3160 
3161    keepInputFiles  = False;
3162    compressing     = True;
3163    verbose         = False;
3164    veryVerbose     = False;
3165 
3166    if (numFileNames == 0)
3167       opMode = OM_STDIN_TO_STDOUT; else
3168       opMode = OM_FILES_TO_FILES;
3169 
3170    if ( (strcmp ( "bunzip",     progName ) == 0) ||
3171         (strcmp ( "BUNZIP",     progName ) == 0) ||
3172         (strcmp ( "bunzip.exe", progName ) == 0) ||
3173         (strcmp ( "BUNZIP.EXE", progName ) == 0) )
3174       compressing = False;
3175 
3176    if (compressing) blockSize100k = 9;
3177 
3178    for (aa = argList; aa != NULL; aa = aa->link)
3179       if (aa->name[0] == '-')
3180          for (j = 1; aa->name[j] != '\0'; j++)
3181             switch (aa->name[j]) {
3182                case 'Q': break;
3183                case 'c': opMode         = OM_FILE_TO_STDOUT; break;
3184                case 'd': compressing    = False; break;
3185                case 'f': compressing    = True; break;
3186                case 'v': verbose        = True; break;
3187                case 'k': keepInputFiles = True; break;
3188                case '1': blockSize100k  = 1; break;
3189                case '2': blockSize100k  = 2; break;
3190                case '3': blockSize100k  = 3; break;
3191                case '4': blockSize100k  = 4; break;
3192                case '5': blockSize100k  = 5; break;
3193                case '6': blockSize100k  = 6; break;
3194                case '7': blockSize100k  = 7; break;
3195                case '8': blockSize100k  = 8; break;
3196                case '9': blockSize100k  = 9; break;
3197                case 'L': license();          break;
3198                case 'V': verbose        = True;
3199                          veryVerbose    = True; break;
3200                default:  fprintf ( stderr, "%s: Bad flag `%s'\n",
3201                                    progName, aa->name );
3202                          usage ( progName );
3203                          exit ( 1 );
3204                          break;
3205          }
3206 
3207    if (verbose) {
3208       fprintf ( stderr,
3209                 "BZIP, a block-sorting file compressor.  "
3210                 "Version 0.21, 25-August-96.\n" );
3211    }
3212 
3213    if ( opMode == OM_FILE_TO_STDOUT && numFileNames != 1) {
3214       fprintf ( stderr, "%s: Option -c requires you to supply exactly one filename.\n",
3215                 progName );
3216       exit ( 1 );
3217    }
3218 
3219    if ( !compressing ) blockSize100k = 0;
3220 
3221    if (compressing) {
3222       allocateCompressStructures();
3223       if (opMode == OM_STDIN_TO_STDOUT)
3224          compress ( "-" );
3225          else
3226          for (aa = argList; aa != NULL; aa = aa->link)
3227             if (aa->name[0] != '-') compress ( aa->name );
3228    } else {
3229       if (opMode == OM_STDIN_TO_STDOUT)
3230          uncompress ( "-" );
3231          else
3232          for (aa = argList; aa != NULL; aa = aa->link)
3233             if (aa->name[0] != '-') uncompress ( aa->name );
3234    }
3235 
3236    exit ( 0 );
3237 }
3238 
3239 
3240 /*-----------------------------------------------------------*/
3241 /*--- end                                          bzip.c ---*/
3242 /*-----------------------------------------------------------*/
3243