1 
2 /*-----------------------------------------------------------*/
3 /*--- A block-sorting, lossless compressor        bzip2.c ---*/
4 /*-----------------------------------------------------------*/
5 
6 /*--
7   This program is bzip2, a lossless, block-sorting data compressor,
8   version 0.1pl2, dated 29-Aug-1997.
9 
10   Copyright (C) 1996, 1997 by Julian Seward.
11      Guildford, Surrey, UK
12      email: jseward@acm.org
13 
14   This program is free software; you can redistribute it and/or modify
15   it under the terms of the GNU General Public License as published by
16   the Free Software Foundation; either version 2 of the License, or
17   (at your option) any later version.
18 
19   This program is distributed in the hope that it will be useful,
20   but WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22   GNU General Public License for more details.
23 
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27 
28   The GNU General Public License is contained in the file LICENSE.
29 
30   This program is based on (at least) the work of:
31      Mike Burrows
32      David Wheeler
33      Peter Fenwick
34      Alistair Moffat
35      Radford Neal
36      Ian H. Witten
37      Robert Sedgewick
38      Jon L. Bentley
39 
40   For more information on these sources, see the file ALGORITHMS.
41 --*/
42 
43 /*----------------------------------------------------*/
44 /*--- IMPORTANT                                    ---*/
45 /*----------------------------------------------------*/
46 
47 /*--
48    WARNING:
49       This program (attempts to) compress data by performing several
50       non-trivial transformations on it.  Unless you are 100% familiar
51       with *all* the algorithms contained herein, and with the
52       consequences of modifying them, you should NOT meddle with the
53       compression or decompression machinery.  Incorrect changes can
54       and very likely *will* lead to disasterous loss of data.
55 
56    DISCLAIMER:
57       I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE
58       USE OF THIS PROGRAM, HOWSOEVER CAUSED.
59 
60       Every compression of a file implies an assumption that the
61       compressed file can be decompressed to reproduce the original.
62       Great efforts in design, coding and testing have been made to
63       ensure that this program works correctly.  However, the
64       complexity of the algorithms, and, in particular, the presence
65       of various special cases in the code which occur with very low
66       but non-zero probability make it impossible to rule out the
67       possibility of bugs remaining in the program.  DO NOT COMPRESS
68       ANY DATA WITH THIS PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE
69       POSSIBILITY, HOWEVER SMALL, THAT THE DATA WILL NOT BE RECOVERABLE.
70 
71       That is not to say this program is inherently unreliable.
72       Indeed, I very much hope the opposite is true.  bzip2 has been
73       carefully constructed and extensively tested.
74 
75    PATENTS:
76       To the best of my knowledge, bzip2 does not use any patented
77       algorithms.  However, I do not have the resources available to
78       carry out a full patent search.  Therefore I cannot give any
79       guarantee of the above statement.
80 --*/
81 
82 
83 
84 /*----------------------------------------------------*/
85 /*--- and now for something much more pleasant :-) ---*/
86 /*----------------------------------------------------*/
87 
88 /*---------------------------------------------*/
89 /*--
90   Place a 1 beside your platform, and 0 elsewhere.
91 --*/
92 
93 /*--
94   Generic 32-bit Unix.
95   Also works on 64-bit Unix boxes.
96 --*/
97 #define BZ_UNIX      1
98 
99 /*--
100   Win32, as seen by Jacob Navia's excellent
101   port of (Chris Fraser & David Hanson)'s excellent
102   lcc compiler.
103 --*/
104 #define BZ_LCCWIN32  0
105 
106 
107 
108 /*---------------------------------------------*/
109 /*--
110   Some stuff for all platforms.
111 --*/
112 
113 #include <stdio.h>
114 #include <stdlib.h>
115 #if DEBUG
116   #include <assert.h>
117 #endif
118 #include <string.h>
119 #include <signal.h>
120 #include <math.h>
121 
122 #define ERROR_IF_EOF(i)       { if ((i) == EOF)  ioError(); }
123 #define ERROR_IF_NOT_ZERO(i)  { if ((i) != 0)    ioError(); }
124 #define ERROR_IF_MINUS_ONE(i) { if ((i) == (-1)) ioError(); }
125 
126 
127 /*---------------------------------------------*/
128 /*--
129    Platform-specific stuff.
130 --*/
131 
132 #if BZ_UNIX
133    #include <sys/types.h>
134    #include <utime.h>
135    #include <unistd.h>
136    #include <malloc.h>
137    #include <sys/stat.h>
138    #include <sys/times.h>
139 
140    #define Int32   int
141    #define UInt32  unsigned int
142    #define Char    char
143    #define UChar   unsigned char
144    #define Int16   short
145    #define UInt16  unsigned short
146 
147    #define PATH_SEP    '/'
148    #define MY_LSTAT    lstat
149    #define MY_S_IFREG  S_ISREG
150    #define MY_STAT     stat
151 
152    #define APPEND_FILESPEC(root, name) \
153       root=snocString((root), (name))
154 
155    #define SET_BINARY_MODE(fd) /**/
156 
157    /*--
158       You should try very hard to persuade your C compiler
159       to inline the bits marked INLINE.  Otherwise bzip2 will
160       run rather slowly.  gcc version 2.x is recommended.
161    --*/
162    #ifdef __GNUC__
163       #define INLINE   inline
164       #define NORETURN __attribute__ ((noreturn))
165    #else
166       #define INLINE   /**/
167       #define NORETURN /**/
168    #endif
169 #endif
170 
171 
172 
173 #if BZ_LCCWIN32
174    #include <io.h>
175    #include <fcntl.h>
176    #include <sys\stat.h>
177 
178    #define Int32   int
179    #define UInt32  unsigned int
180    #define Int16   short
181    #define UInt16  unsigned short
182    #define Char    char
183    #define UChar   unsigned char
184 
185    #define INLINE         /**/
186    #define NORETURN       /**/
187    #define PATH_SEP       '\\'
188    #define MY_LSTAT       _stat
189    #define MY_STAT        _stat
190    #define MY_S_IFREG(x)  ((x) & _S_IFREG)
191 
192    #if 0
193    /*-- lcc-win32 seems to expand wildcards itself --*/
194    #define APPEND_FILESPEC(root, spec)                \
195       do {                                            \
196          if ((spec)[0] == '-') {                      \
197             root = snocString((root), (spec));        \
198          } else {                                     \
199             struct _finddata_t c_file;                \
200             long hFile;                               \
201             hFile = _findfirst((spec), &c_file);      \
202             if ( hFile == -1L ) {                     \
203                root = snocString ((root), (spec));    \
204             } else {                                  \
205                int anInt = 0;                         \
206                while ( anInt == 0 ) {                 \
207                   root = snocString((root),           \
208                             &c_file.name[0]);         \
209                   anInt = _findnext(hFile, &c_file);  \
210                }                                      \
211             }                                         \
212          }                                            \
213       } while ( 0 )
214    #else
215    #define APPEND_FILESPEC(root, name)                \
216       root = snocString ((root), (name))
217    #endif
218 
219    #define SET_BINARY_MODE(fd)                        \
220       do {                                            \
221          int retVal = setmode ( fileno ( fd ),        \
222                                O_BINARY );            \
223          ERROR_IF_MINUS_ONE ( retVal );               \
224       } while ( 0 )
225 
226 #endif
227 
228 
229 /*---------------------------------------------*/
230 /*--
231   Some more stuff for all platforms :-)
232 --*/
233 
234 #define Bool   unsigned char
235 #define True   1
236 #define False  0
237 
238 /*--
239   IntNative is your platform's `native' int size.
240   Only here to avoid probs with 64-bit platforms.
241 --*/
242 #define IntNative int
243 
244 
245 /*--
246    change to 1, or compile with -DDEBUG=1 to debug
247 --*/
248 #ifndef DEBUG
249 #define DEBUG 0
250 #endif
251 
252 
253 /*---------------------------------------------------*/
254 /*---                                             ---*/
255 /*---------------------------------------------------*/
256 
257 /*--
258    Implementation notes, July 1997
259    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
260 
261    Memory allocation
262    ~~~~~~~~~~~~~~~~~
263    All large data structures are allocated on the C heap,
264    for better or for worse.  That includes the various
265    arrays of pointers, striped words, bytes, frequency
266    tables and buffers for compression and decompression.
267 
268    bzip2 can operate at various block-sizes, ranging from
269    100k to 900k in 100k steps, and it allocates only as
270    much as it needs to.  When compressing, we know from the
271    command-line options what the block-size is going to be,
272    so all allocation can be done at start-up; if that
273    succeeds, there can be no further allocation problems.
274 
275    Decompression is more complicated.  Each compressed file
276    contains, in its header, a byte indicating the block
277    size used for compression.  This means bzip2 potentially
278    needs to reallocate memory for each file it deals with,
279    which in turn opens the possibility for a memory allocation
280    failure part way through a run of files, by encountering
281    a file requiring a much larger block size than all the
282    ones preceding it.
283 
284    The policy is to simply give up if a memory allocation
285    failure occurs.  During decompression, it would be
286    possible to move on to subsequent files in the hope that
287    some might ask for a smaller block size, but the
288    complications for doing this seem more trouble than they
289    are worth.
290 
291 
292    Compressed file formats
293    ~~~~~~~~~~~~~~~~~~~~~~~
294    [This is now entirely different from both 0.21, and from
295     any previous Huffman-coded variant of bzip.
296     See the associated file bzip2.txt for details.]
297 
298 
299    Error conditions
300    ~~~~~~~~~~~~~~~~
301    Dealing with error conditions is the least satisfactory
302    aspect of bzip2.  The policy is to try and leave the
303    filesystem in a consistent state, then quit, even if it
304    means not processing some of the files mentioned in the
305    command line.  `A consistent state' means that a file
306    exists either in its compressed or uncompressed form,
307    but not both.  This boils down to the rule `delete the
308    output file if an error condition occurs, leaving the
309    input intact'.  Input files are only deleted when we can
310    be pretty sure the output file has been written and
311    closed successfully.
312 
313    Errors are a dog because there's so many things to
314    deal with.  The following can happen mid-file, and
315    require cleaning up.
316 
317      internal `panics' -- indicating a bug
318      corrupted or inconsistent compressed file
319      can't allocate enough memory to decompress this file
320      I/O error reading/writing/opening/closing
321      signal catches -- Control-C, SIGTERM, SIGHUP.
322 
323    Other conditions, primarily pertaining to file names,
324    can be checked in-between files, which makes dealing
325    with them easier.
326 --*/
327 
328 
329 
330 /*---------------------------------------------------*/
331 /*--- Misc (file handling) data decls             ---*/
332 /*---------------------------------------------------*/
333 
334 UInt32  bytesIn, bytesOut;
335 Int32   verbosity;
336 Bool    keepInputFiles, smallMode, testFailsExist;
337 UInt32  globalCrc;
338 Int32   numFileNames, numFilesProcessed;
339 
340 
341 /*-- source modes; F==file, I==stdin, O==stdout --*/
342 #define SM_I2O           1
343 #define SM_F2O           2
344 #define SM_F2F           3
345 
346 /*-- operation modes --*/
347 #define OM_Z             1
348 #define OM_UNZ           2
349 #define OM_TEST          3
350 
351 Int32   opMode;
352 Int32   srcMode;
353 
354 
355 Int32   longestFileName;
356 Char    inName[1024];
357 Char    outName[1024];
358 Char    *progName;
359 Char    progNameReally[1024];
360 FILE    *outputHandleJustInCase;
361 
362 void    panic                 ( Char* )          NORETURN;
363 void    ioError               ( void )           NORETURN;
364 void    compressOutOfMemory   ( Int32, Int32 )   NORETURN;
365 void    uncompressOutOfMemory ( Int32, Int32 )   NORETURN;
366 void    blockOverrun          ( void )           NORETURN;
367 void    badBlockHeader        ( void )           NORETURN;
368 void    badBGLengths          ( void )           NORETURN;
369 void    crcError              ( UInt32, UInt32 ) NORETURN;
370 void    bitStreamEOF          ( void )           NORETURN;
371 void    cleanUpAndFail        ( Int32 )          NORETURN;
372 void    compressedStreamEOF   ( void )           NORETURN;
373 
374 void*   myMalloc ( Int32 );
375 
376 
377 
378 /*---------------------------------------------------*/
379 /*--- Data decls for the front end                ---*/
380 /*---------------------------------------------------*/
381 
382 /*--
383    The overshoot bytes allow us to avoid most of
384    the cost of pointer renormalisation during
385    comparison of rotations in sorting.
386    The figure of 20 is derived as follows:
387       qSort3 allows an overshoot of up to 10.
388       It then calls simpleSort, which calls
389       fullGtU, also with max overshoot 10.
390       fullGtU does up to 10 comparisons without
391       renormalising, giving 10+10 == 20.
392 --*/
393 #define NUM_OVERSHOOT_BYTES 20
394 
395 /*--
396   These are the main data structures for
397   the Burrows-Wheeler transform.
398 --*/
399 
400 /*--
401   Pointers to compression and decompression
402   structures.  Set by
403      allocateCompressStructures   and
404      setDecompressStructureSizes
405 
406   The structures are always set to be suitable
407   for a block of size 100000 * blockSize100k.
408 --*/
409 UChar    *block;    /*-- compress   --*/
410 UInt16   *quadrant; /*-- compress   --*/
411 Int32    *zptr;     /*-- compress   --*/
412 UInt16   *szptr;    /*-- overlays zptr ---*/
413 Int32    *ftab;     /*-- compress   --*/
414 
415 UInt16   *ll16;     /*-- small decompress --*/
416 UChar    *ll4;      /*-- small decompress --*/
417 
418 Int32    *tt;       /*-- fast decompress  --*/
419 UChar    *ll8;      /*-- fast decompress  --*/
420 
421 
422 /*--
423   freq table collected to save a pass over the data
424   during decompression.
425 --*/
426 Int32   unzftab[256];
427 
428 
429 /*--
430    index of the last char in the block, so
431    the block size == last + 1.
432 --*/
433 Int32  last;
434 
435 
436 /*--
437   index in zptr[] of original string after sorting.
438 --*/
439 Int32  origPtr;
440 
441 
442 /*--
443   always: in the range 0 .. 9.
444   The current block size is 100000 * this number.
445 --*/
446 Int32  blockSize100k;
447 
448 
449 /*--
450   Used when sorting.  If too many long comparisons
451   happen, we stop sorting, randomise the block
452   slightly, and try again.
453 --*/
454 
455 Int32  workFactor;
456 Int32  workDone;
457 Int32  workLimit;
458 Bool   blockRandomised;
459 Bool   firstAttempt;
460 Int32  nBlocksRandomised;
461 
462 
463 
464 /*---------------------------------------------------*/
465 /*--- Data decls for the back end                 ---*/
466 /*---------------------------------------------------*/
467 
468 #define MAX_ALPHA_SIZE 258
469 #define MAX_CODE_LEN    23
470 
471 #define RUNA 0
472 #define RUNB 1
473 
474 #define N_GROUPS 6
475 #define G_SIZE   50
476 #define N_ITERS  4
477 
478 #define MAX_SELECTORS (2 + (900000 / G_SIZE))
479 
480 Bool  inUse[256];
481 Int32 nInUse;
482 
483 UChar seqToUnseq[256];
484 UChar unseqToSeq[256];
485 
486 UChar selector   [MAX_SELECTORS];
487 UChar selectorMtf[MAX_SELECTORS];
488 
489 Int32 nMTF;
490 
491 Int32 mtfFreq[MAX_ALPHA_SIZE];
492 
493 UChar len  [N_GROUPS][MAX_ALPHA_SIZE];
494 
495 /*-- decompress only --*/
496 Int32 limit  [N_GROUPS][MAX_ALPHA_SIZE];
497 Int32 base   [N_GROUPS][MAX_ALPHA_SIZE];
498 Int32 perm   [N_GROUPS][MAX_ALPHA_SIZE];
499 Int32 minLens[N_GROUPS];
500 
501 /*-- compress only --*/
502 Int32  code [N_GROUPS][MAX_ALPHA_SIZE];
503 Int32  rfreq[N_GROUPS][MAX_ALPHA_SIZE];
504 
505 
506 /*---------------------------------------------------*/
507 /*--- 32-bit CRC grunge                           ---*/
508 /*---------------------------------------------------*/
509 
510 /*--
511   I think this is an implementation of the AUTODIN-II,
512   Ethernet & FDDI 32-bit CRC standard.  Vaguely derived
513   from code by Rob Warnock, in Section 51 of the
514   comp.compression FAQ.
515 --*/
516 
517 UInt32 crc32Table[256] = {
518 
519    /*-- Ugly, innit? --*/
520 
521    0x00000000UL, 0x04c11db7UL, 0x09823b6eUL, 0x0d4326d9UL,
522    0x130476dcUL, 0x17c56b6bUL, 0x1a864db2UL, 0x1e475005UL,
523    0x2608edb8UL, 0x22c9f00fUL, 0x2f8ad6d6UL, 0x2b4bcb61UL,
524    0x350c9b64UL, 0x31cd86d3UL, 0x3c8ea00aUL, 0x384fbdbdUL,
525    0x4c11db70UL, 0x48d0c6c7UL, 0x4593e01eUL, 0x4152fda9UL,
526    0x5f15adacUL, 0x5bd4b01bUL, 0x569796c2UL, 0x52568b75UL,
527    0x6a1936c8UL, 0x6ed82b7fUL, 0x639b0da6UL, 0x675a1011UL,
528    0x791d4014UL, 0x7ddc5da3UL, 0x709f7b7aUL, 0x745e66cdUL,
529    0x9823b6e0UL, 0x9ce2ab57UL, 0x91a18d8eUL, 0x95609039UL,
530    0x8b27c03cUL, 0x8fe6dd8bUL, 0x82a5fb52UL, 0x8664e6e5UL,
531    0xbe2b5b58UL, 0xbaea46efUL, 0xb7a96036UL, 0xb3687d81UL,
532    0xad2f2d84UL, 0xa9ee3033UL, 0xa4ad16eaUL, 0xa06c0b5dUL,
533    0xd4326d90UL, 0xd0f37027UL, 0xddb056feUL, 0xd9714b49UL,
534    0xc7361b4cUL, 0xc3f706fbUL, 0xceb42022UL, 0xca753d95UL,
535    0xf23a8028UL, 0xf6fb9d9fUL, 0xfbb8bb46UL, 0xff79a6f1UL,
536    0xe13ef6f4UL, 0xe5ffeb43UL, 0xe8bccd9aUL, 0xec7dd02dUL,
537    0x34867077UL, 0x30476dc0UL, 0x3d044b19UL, 0x39c556aeUL,
538    0x278206abUL, 0x23431b1cUL, 0x2e003dc5UL, 0x2ac12072UL,
539    0x128e9dcfUL, 0x164f8078UL, 0x1b0ca6a1UL, 0x1fcdbb16UL,
540    0x018aeb13UL, 0x054bf6a4UL, 0x0808d07dUL, 0x0cc9cdcaUL,
541    0x7897ab07UL, 0x7c56b6b0UL, 0x71159069UL, 0x75d48ddeUL,
542    0x6b93dddbUL, 0x6f52c06cUL, 0x6211e6b5UL, 0x66d0fb02UL,
543    0x5e9f46bfUL, 0x5a5e5b08UL, 0x571d7dd1UL, 0x53dc6066UL,
544    0x4d9b3063UL, 0x495a2dd4UL, 0x44190b0dUL, 0x40d816baUL,
545    0xaca5c697UL, 0xa864db20UL, 0xa527fdf9UL, 0xa1e6e04eUL,
546    0xbfa1b04bUL, 0xbb60adfcUL, 0xb6238b25UL, 0xb2e29692UL,
547    0x8aad2b2fUL, 0x8e6c3698UL, 0x832f1041UL, 0x87ee0df6UL,
548    0x99a95df3UL, 0x9d684044UL, 0x902b669dUL, 0x94ea7b2aUL,
549    0xe0b41de7UL, 0xe4750050UL, 0xe9362689UL, 0xedf73b3eUL,
550    0xf3b06b3bUL, 0xf771768cUL, 0xfa325055UL, 0xfef34de2UL,
551    0xc6bcf05fUL, 0xc27dede8UL, 0xcf3ecb31UL, 0xcbffd686UL,
552    0xd5b88683UL, 0xd1799b34UL, 0xdc3abdedUL, 0xd8fba05aUL,
553    0x690ce0eeUL, 0x6dcdfd59UL, 0x608edb80UL, 0x644fc637UL,
554    0x7a089632UL, 0x7ec98b85UL, 0x738aad5cUL, 0x774bb0ebUL,
555    0x4f040d56UL, 0x4bc510e1UL, 0x46863638UL, 0x42472b8fUL,
556    0x5c007b8aUL, 0x58c1663dUL, 0x558240e4UL, 0x51435d53UL,
557    0x251d3b9eUL, 0x21dc2629UL, 0x2c9f00f0UL, 0x285e1d47UL,
558    0x36194d42UL, 0x32d850f5UL, 0x3f9b762cUL, 0x3b5a6b9bUL,
559    0x0315d626UL, 0x07d4cb91UL, 0x0a97ed48UL, 0x0e56f0ffUL,
560    0x1011a0faUL, 0x14d0bd4dUL, 0x19939b94UL, 0x1d528623UL,
561    0xf12f560eUL, 0xf5ee4bb9UL, 0xf8ad6d60UL, 0xfc6c70d7UL,
562    0xe22b20d2UL, 0xe6ea3d65UL, 0xeba91bbcUL, 0xef68060bUL,
563    0xd727bbb6UL, 0xd3e6a601UL, 0xdea580d8UL, 0xda649d6fUL,
564    0xc423cd6aUL, 0xc0e2d0ddUL, 0xcda1f604UL, 0xc960ebb3UL,
565    0xbd3e8d7eUL, 0xb9ff90c9UL, 0xb4bcb610UL, 0xb07daba7UL,
566    0xae3afba2UL, 0xaafbe615UL, 0xa7b8c0ccUL, 0xa379dd7bUL,
567    0x9b3660c6UL, 0x9ff77d71UL, 0x92b45ba8UL, 0x9675461fUL,
568    0x8832161aUL, 0x8cf30badUL, 0x81b02d74UL, 0x857130c3UL,
569    0x5d8a9099UL, 0x594b8d2eUL, 0x5408abf7UL, 0x50c9b640UL,
570    0x4e8ee645UL, 0x4a4ffbf2UL, 0x470cdd2bUL, 0x43cdc09cUL,
571    0x7b827d21UL, 0x7f436096UL, 0x7200464fUL, 0x76c15bf8UL,
572    0x68860bfdUL, 0x6c47164aUL, 0x61043093UL, 0x65c52d24UL,
573    0x119b4be9UL, 0x155a565eUL, 0x18197087UL, 0x1cd86d30UL,
574    0x029f3d35UL, 0x065e2082UL, 0x0b1d065bUL, 0x0fdc1becUL,
575    0x3793a651UL, 0x3352bbe6UL, 0x3e119d3fUL, 0x3ad08088UL,
576    0x2497d08dUL, 0x2056cd3aUL, 0x2d15ebe3UL, 0x29d4f654UL,
577    0xc5a92679UL, 0xc1683bceUL, 0xcc2b1d17UL, 0xc8ea00a0UL,
578    0xd6ad50a5UL, 0xd26c4d12UL, 0xdf2f6bcbUL, 0xdbee767cUL,
579    0xe3a1cbc1UL, 0xe760d676UL, 0xea23f0afUL, 0xeee2ed18UL,
580    0xf0a5bd1dUL, 0xf464a0aaUL, 0xf9278673UL, 0xfde69bc4UL,
581    0x89b8fd09UL, 0x8d79e0beUL, 0x803ac667UL, 0x84fbdbd0UL,
582    0x9abc8bd5UL, 0x9e7d9662UL, 0x933eb0bbUL, 0x97ffad0cUL,
583    0xafb010b1UL, 0xab710d06UL, 0xa6322bdfUL, 0xa2f33668UL,
584    0xbcb4666dUL, 0xb8757bdaUL, 0xb5365d03UL, 0xb1f740b4UL
585 };
586 
587 
588 /*---------------------------------------------*/
initialiseCRC(void)589 void initialiseCRC ( void )
590 {
591    globalCrc = 0xffffffffUL;
592 }
593 
594 
595 /*---------------------------------------------*/
getFinalCRC(void)596 UInt32 getFinalCRC ( void )
597 {
598    return ~globalCrc;
599 }
600 
601 
602 /*---------------------------------------------*/
getGlobalCRC(void)603 UInt32 getGlobalCRC ( void )
604 {
605    return globalCrc;
606 }
607 
608 
609 /*---------------------------------------------*/
setGlobalCRC(UInt32 newCrc)610 void setGlobalCRC ( UInt32 newCrc )
611 {
612    globalCrc = newCrc;
613 }
614 
615 
616 /*---------------------------------------------*/
617 #define UPDATE_CRC(crcVar,cha)              \
618 {                                           \
619    crcVar = (crcVar << 8) ^                 \
620             crc32Table[(crcVar >> 24) ^     \
621                        ((UChar)cha)];       \
622 }
623 
624 
625 /*---------------------------------------------------*/
626 /*--- Bit stream I/O                              ---*/
627 /*---------------------------------------------------*/
628 
629 
630 UInt32 bsBuff;
631 Int32  bsLive;
632 FILE*  bsStream;
633 Bool   bsWriting;
634 
635 
636 /*---------------------------------------------*/
bsSetStream(FILE * f,Bool wr)637 void bsSetStream ( FILE* f, Bool wr )
638 {
639    if (bsStream != NULL) panic ( "bsSetStream" );
640    bsStream = f;
641    bsLive = 0;
642    bsBuff = 0;
643    bytesOut = 0;
644    bytesIn = 0;
645    bsWriting = wr;
646 }
647 
648 
649 /*---------------------------------------------*/
bsFinishedWithStream(void)650 void bsFinishedWithStream ( void )
651 {
652    if (bsWriting)
653       while (bsLive > 0) {
654          fputc ( (UChar)(bsBuff >> 24), bsStream );
655          bsBuff <<= 8;
656          bsLive -= 8;
657          bytesOut++;
658       }
659    bsStream = NULL;
660 }
661 
662 
663 /*---------------------------------------------*/
664 #define bsNEEDR(nz)                           \
665 {                                             \
666    while (bsLive < nz) {                      \
667       Int32 zzi = fgetc ( bsStream );         \
668       if (zzi == EOF) compressedStreamEOF();  \
669       bsBuff = (bsBuff << 8) | (zzi & 0xffL); \
670       bsLive += 8;                            \
671    }                                          \
672 }
673 
674 
675 /*---------------------------------------------*/
676 #define bsNEEDW(nz)                           \
677 {                                             \
678    while (bsLive >= 8) {                      \
679       fputc ( (UChar)(bsBuff >> 24),          \
680                bsStream );                    \
681       bsBuff <<= 8;                           \
682       bsLive -= 8;                            \
683       bytesOut++;                             \
684    }                                          \
685 }
686 
687 
688 /*---------------------------------------------*/
689 #define bsR1(vz)                              \
690 {                                             \
691    bsNEEDR(1);                                \
692    vz = (bsBuff >> (bsLive-1)) & 1;           \
693    bsLive--;                                  \
694 }
695 
696 
697 /*---------------------------------------------*/
bsR(Int32 n)698 INLINE UInt32 bsR ( Int32 n )
699 {
700    UInt32 v;
701    bsNEEDR ( n );
702    v = (bsBuff >> (bsLive-n)) & ((1 << n)-1);
703    bsLive -= n;
704    return v;
705 }
706 
707 
708 /*---------------------------------------------*/
bsW(Int32 n,UInt32 v)709 INLINE void bsW ( Int32 n, UInt32 v )
710 {
711    bsNEEDW ( n );
712    bsBuff |= (v << (32 - bsLive - n));
713    bsLive += n;
714 }
715 
716 
717 /*---------------------------------------------*/
bsGetUChar(void)718 UChar bsGetUChar ( void )
719 {
720    return (UChar)bsR(8);
721 }
722 
723 
724 /*---------------------------------------------*/
bsPutUChar(UChar c)725 void bsPutUChar ( UChar c )
726 {
727    bsW(8, (UInt32)c );
728 }
729 
730 
731 /*---------------------------------------------*/
bsGetUInt32(void)732 Int32 bsGetUInt32 ( void )
733 {
734    UInt32 u;
735    u = 0;
736    u = (u << 8) | bsR(8);
737    u = (u << 8) | bsR(8);
738    u = (u << 8) | bsR(8);
739    u = (u << 8) | bsR(8);
740    return u;
741 }
742 
743 
744 /*---------------------------------------------*/
bsGetIntVS(UInt32 numBits)745 UInt32 bsGetIntVS ( UInt32 numBits )
746 {
747    return (UInt32)bsR(numBits);
748 }
749 
750 
751 /*---------------------------------------------*/
bsGetInt32(void)752 UInt32 bsGetInt32 ( void )
753 {
754    return (Int32)bsGetUInt32();
755 }
756 
757 
758 /*---------------------------------------------*/
bsPutUInt32(UInt32 u)759 void bsPutUInt32 ( UInt32 u )
760 {
761    bsW ( 8, (u >> 24) & 0xffL );
762    bsW ( 8, (u >> 16) & 0xffL );
763    bsW ( 8, (u >>  8) & 0xffL );
764    bsW ( 8,  u        & 0xffL );
765 }
766 
767 
768 /*---------------------------------------------*/
bsPutInt32(Int32 c)769 void bsPutInt32 ( Int32 c )
770 {
771    bsPutUInt32 ( (UInt32)c );
772 }
773 
774 
775 /*---------------------------------------------*/
bsPutIntVS(Int32 numBits,UInt32 c)776 void bsPutIntVS ( Int32 numBits, UInt32 c )
777 {
778    bsW ( numBits, c );
779 }
780 
781 
782 /*---------------------------------------------------*/
783 /*--- Huffman coding low-level stuff              ---*/
784 /*---------------------------------------------------*/
785 
786 #define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
787 #define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
788 #define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
789 
790 #define ADDWEIGHTS(zw1,zw2)                           \
791    (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
792    (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
793 
794 #define UPHEAP(z)                                     \
795 {                                                     \
796    Int32 zz, tmp;                                     \
797    zz = z; tmp = heap[zz];                            \
798    while (weight[tmp] < weight[heap[zz >> 1]]) {      \
799       heap[zz] = heap[zz >> 1];                       \
800       zz >>= 1;                                       \
801    }                                                  \
802    heap[zz] = tmp;                                    \
803 }
804 
805 #define DOWNHEAP(z)                                   \
806 {                                                     \
807    Int32 zz, yy, tmp;                                 \
808    zz = z; tmp = heap[zz];                            \
809    while (True) {                                     \
810       yy = zz << 1;                                   \
811       if (yy > nHeap) break;                          \
812       if (yy < nHeap &&                               \
813           weight[heap[yy+1]] < weight[heap[yy]])      \
814          yy++;                                        \
815       if (weight[tmp] < weight[heap[yy]]) break;      \
816       heap[zz] = heap[yy];                            \
817       zz = yy;                                        \
818    }                                                  \
819    heap[zz] = tmp;                                    \
820 }
821 
822 
823 /*---------------------------------------------*/
hbMakeCodeLengths(UChar * len,Int32 * freq,Int32 alphaSize,Int32 maxLen)824 void hbMakeCodeLengths ( UChar *len,
825                          Int32 *freq,
826                          Int32 alphaSize,
827                          Int32 maxLen )
828 {
829    /*--
830       Nodes and heap entries run from 1.  Entry 0
831       for both the heap and nodes is a sentinel.
832    --*/
833    Int32 nNodes, nHeap, n1, n2, i, j, k;
834    Bool  tooLong;
835 
836    Int32 heap   [ MAX_ALPHA_SIZE + 2 ];
837    Int32 weight [ MAX_ALPHA_SIZE * 2 ];
838    Int32 parent [ MAX_ALPHA_SIZE * 2 ];
839 
840    for (i = 0; i < alphaSize; i++)
841       weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
842 
843    while (True) {
844 
845       nNodes = alphaSize;
846       nHeap = 0;
847 
848       heap[0] = 0;
849       weight[0] = 0;
850       parent[0] = -2;
851 
852       for (i = 1; i <= alphaSize; i++) {
853          parent[i] = -1;
854          nHeap++;
855          heap[nHeap] = i;
856          UPHEAP(nHeap);
857       }
858       if (!(nHeap < (MAX_ALPHA_SIZE+2)))
859          panic ( "hbMakeCodeLengths(1)" );
860 
861       while (nHeap > 1) {
862          n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
863          n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
864          nNodes++;
865          parent[n1] = parent[n2] = nNodes;
866          weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
867          parent[nNodes] = -1;
868          nHeap++;
869          heap[nHeap] = nNodes;
870          UPHEAP(nHeap);
871       }
872       if (!(nNodes < (MAX_ALPHA_SIZE * 2)))
873          panic ( "hbMakeCodeLengths(2)" );
874 
875       tooLong = False;
876       for (i = 1; i <= alphaSize; i++) {
877          j = 0;
878          k = i;
879          while (parent[k] >= 0) { k = parent[k]; j++; }
880          len[i-1] = j;
881          if (j > maxLen) tooLong = True;
882       }
883 
884       if (! tooLong) break;
885 
886       for (i = 1; i < alphaSize; i++) {
887          j = weight[i] >> 8;
888          j = 1 + (j / 2);
889          weight[i] = j << 8;
890       }
891    }
892 }
893 
894 
895 /*---------------------------------------------*/
hbAssignCodes(Int32 * code,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)896 void hbAssignCodes ( Int32 *code,
897                      UChar *length,
898                      Int32 minLen,
899                      Int32 maxLen,
900                      Int32 alphaSize )
901 {
902    Int32 n, vec, i;
903 
904    vec = 0;
905    for (n = minLen; n <= maxLen; n++) {
906       for (i = 0; i < alphaSize; i++)
907          if (length[i] == n) { code[i] = vec; vec++; };
908       vec <<= 1;
909    }
910 }
911 
912 
913 /*---------------------------------------------*/
hbCreateDecodeTables(Int32 * limit,Int32 * base,Int32 * perm,UChar * length,Int32 minLen,Int32 maxLen,Int32 alphaSize)914 void hbCreateDecodeTables ( Int32 *limit,
915                             Int32 *base,
916                             Int32 *perm,
917                             UChar *length,
918                             Int32 minLen,
919                             Int32 maxLen,
920                             Int32 alphaSize )
921 {
922    Int32 pp, i, j, vec;
923 
924    pp = 0;
925    for (i = minLen; i <= maxLen; i++)
926       for (j = 0; j < alphaSize; j++)
927          if (length[j] == i) { perm[pp] = j; pp++; };
928 
929    for (i = 0; i < MAX_CODE_LEN; i++) base[i] = 0;
930    for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
931 
932    for (i = 1; i < MAX_CODE_LEN; i++) base[i] += base[i-1];
933 
934    for (i = 0; i < MAX_CODE_LEN; i++) limit[i] = 0;
935    vec = 0;
936 
937    for (i = minLen; i <= maxLen; i++) {
938       vec += (base[i+1] - base[i]);
939       limit[i] = vec-1;
940       vec <<= 1;
941    }
942    for (i = minLen + 1; i <= maxLen; i++)
943       base[i] = ((limit[i-1] + 1) << 1) - base[i];
944 }
945 
946 
947 
948 /*---------------------------------------------------*/
949 /*--- Undoing the reversible transformation       ---*/
950 /*---------------------------------------------------*/
951 
952 /*---------------------------------------------*/
953 #define SET_LL4(i,n)                                          \
954    { if (((i) & 0x1) == 0)                                    \
955         ll4[(i) >> 1] = (ll4[(i) >> 1] & 0xf0) | (n); else    \
956         ll4[(i) >> 1] = (ll4[(i) >> 1] & 0x0f) | ((n) << 4);  \
957    }
958 
959 #define GET_LL4(i)                             \
960     (((UInt32)(ll4[(i) >> 1])) >> (((i) << 2) & 0x4) & 0xF)
961 
962 #define SET_LL(i,n)                       \
963    { ll16[i] = (UInt16)(n & 0x0000ffff);  \
964      SET_LL4(i, n >> 16);                 \
965    }
966 
967 #define GET_LL(i) \
968    (((UInt32)ll16[i]) | (GET_LL4(i) << 16))
969 
970 
971 /*---------------------------------------------*/
972 /*--
973   Manage memory for compression/decompression.
974   When compressing, a single block size applies to
975   all files processed, and that's set when the
976   program starts.  But when decompressing, each file
977   processed could have been compressed with a
978   different block size, so we may have to free
979   and reallocate on a per-file basis.
980 
981   A call with argument of zero means
982   `free up everything.'  And a value of zero for
983   blockSize100k means no memory is currently allocated.
984 --*/
985 
986 
987 /*---------------------------------------------*/
allocateCompressStructures(void)988 void allocateCompressStructures ( void )
989 {
990    Int32 n  = 100000 * blockSize100k;
991    block    = malloc ( (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) );
992    quadrant = malloc ( (n     + NUM_OVERSHOOT_BYTES) * sizeof(Int16) );
993    zptr     = malloc ( n                             * sizeof(Int32) );
994    ftab     = malloc ( 65537                         * sizeof(Int32) );
995 
996    if (block == NULL || quadrant == NULL ||
997        zptr == NULL  || ftab == NULL) {
998       Int32 totalDraw
999          = (n + 1 + NUM_OVERSHOOT_BYTES) * sizeof(UChar) +
1000            (n     + NUM_OVERSHOOT_BYTES) * sizeof(Int16) +
1001            n                             * sizeof(Int32) +
1002            65537                         * sizeof(Int32);
1003 
1004       compressOutOfMemory ( totalDraw, n );
1005    }
1006 
1007    /*--
1008       Since we want valid indexes for block of
1009       -1 to n + NUM_OVERSHOOT_BYTES - 1
1010       inclusive.
1011    --*/
1012    block++;
1013 
1014    /*--
1015       The back end needs a place to store the MTF values
1016       whilst it calculates the coding tables.  We could
1017       put them in the zptr array.  However, these values
1018       will fit in a short, so we overlay szptr at the
1019       start of zptr, in the hope of reducing the number
1020       of cache misses induced by the multiple traversals
1021       of the MTF values when calculating coding tables.
1022       Seems to improve compression speed by about 1%.
1023    --*/
1024    szptr = (UInt16*)zptr;
1025 }
1026 
1027 
1028 /*---------------------------------------------*/
setDecompressStructureSizes(Int32 newSize100k)1029 void setDecompressStructureSizes ( Int32 newSize100k )
1030 {
1031    if (! (0 <= newSize100k   && newSize100k   <= 9 &&
1032           0 <= blockSize100k && blockSize100k <= 9))
1033       panic ( "setDecompressStructureSizes" );
1034 
1035    if (newSize100k == blockSize100k) return;
1036 
1037    blockSize100k = newSize100k;
1038 
1039    if (ll16  != NULL) free ( ll16  );
1040    if (ll4   != NULL) free ( ll4   );
1041    if (ll8   != NULL) free ( ll8   );
1042    if (tt    != NULL) free ( tt    );
1043 
1044    if (newSize100k == 0) return;
1045 
1046    if (smallMode) {
1047 
1048       Int32 n = 100000 * newSize100k;
1049       ll16    = malloc ( n * sizeof(UInt16) );
1050       ll4     = malloc ( ((n+1) >> 1) * sizeof(UChar) );
1051 
1052       if (ll4 == NULL || ll16 == NULL) {
1053          Int32 totalDraw
1054             = n * sizeof(Int16) + ((n+1) >> 1) * sizeof(UChar);
1055          uncompressOutOfMemory ( totalDraw, n );
1056       }
1057 
1058    } else {
1059 
1060       Int32 n = 100000 * newSize100k;
1061       ll8     = malloc ( n * sizeof(UChar) );
1062       tt      = malloc ( n * sizeof(Int32) );
1063 
1064       if (ll8 == NULL || tt == NULL) {
1065          Int32 totalDraw
1066             = n * sizeof(UChar) + n * sizeof(UInt32);
1067          uncompressOutOfMemory ( totalDraw, n );
1068       }
1069 
1070    }
1071 }
1072 
1073 
1074 
1075 /*---------------------------------------------------*/
1076 /*--- The new back end                            ---*/
1077 /*---------------------------------------------------*/
1078 
1079 /*---------------------------------------------*/
makeMaps(void)1080 void makeMaps ( void )
1081 {
1082    Int32 i;
1083    nInUse = 0;
1084    for (i = 0; i < 256; i++)
1085       if (inUse[i]) {
1086          seqToUnseq[nInUse] = i;
1087          unseqToSeq[i] = nInUse;
1088          nInUse++;
1089       }
1090 }
1091 
1092 
1093 /*---------------------------------------------*/
generateMTFValues(void)1094 void generateMTFValues ( void )
1095 {
1096    UChar  yy[256];
1097    Int32  i, j;
1098    UChar  tmp;
1099    UChar  tmp2;
1100    Int32  zPend;
1101    Int32  wr;
1102    Int32  EOB;
1103 
1104    makeMaps();
1105    EOB = nInUse+1;
1106 
1107    for (i = 0; i <= EOB; i++) mtfFreq[i] = 0;
1108 
1109    wr = 0;
1110    zPend = 0;
1111    for (i = 0; i < nInUse; i++) yy[i] = (UChar) i;
1112 
1113 
1114    for (i = 0; i <= last; i++) {
1115       UChar ll_i;
1116 
1117       #if DEBUG
1118          assert (wr <= i);
1119       #endif
1120 
1121       ll_i = unseqToSeq[block[zptr[i] - 1]];
1122       #if DEBUG
1123          assert (ll_i < nInUse);
1124       #endif
1125 
1126       j = 0;
1127       tmp = yy[j];
1128       while ( ll_i != tmp ) {
1129          j++;
1130          tmp2 = tmp;
1131          tmp = yy[j];
1132          yy[j] = tmp2;
1133       };
1134       yy[0] = tmp;
1135 
1136       if (j == 0) {
1137          zPend++;
1138       } else {
1139          if (zPend > 0) {
1140             zPend--;
1141             while (True) {
1142                switch (zPend % 2) {
1143                   case 0: szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
1144                   case 1: szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
1145                };
1146                if (zPend < 2) break;
1147                zPend = (zPend - 2) / 2;
1148             };
1149             zPend = 0;
1150          }
1151          szptr[wr] = j+1; wr++; mtfFreq[j+1]++;
1152       }
1153    }
1154 
1155    if (zPend > 0) {
1156       zPend--;
1157       while (True) {
1158          switch (zPend % 2) {
1159             case 0:  szptr[wr] = RUNA; wr++; mtfFreq[RUNA]++; break;
1160             case 1:  szptr[wr] = RUNB; wr++; mtfFreq[RUNB]++; break;
1161          };
1162          if (zPend < 2) break;
1163          zPend = (zPend - 2) / 2;
1164       };
1165    }
1166 
1167    szptr[wr] = EOB; wr++; mtfFreq[EOB]++;
1168 
1169    nMTF = wr;
1170 }
1171 
1172 
1173 /*---------------------------------------------*/
1174 #define LESSER_ICOST  0
1175 #define GREATER_ICOST 15
1176 
sendMTFValues(void)1177 void sendMTFValues ( void )
1178 {
1179    Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
1180    Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
1181    Int32 nGroups, nBytes;
1182 
1183    /*--
1184    UChar  len [N_GROUPS][MAX_ALPHA_SIZE];
1185    is a global since the decoder also needs it.
1186 
1187    Int32  code[N_GROUPS][MAX_ALPHA_SIZE];
1188    Int32  rfreq[N_GROUPS][MAX_ALPHA_SIZE];
1189    are also globals only used in this proc.
1190    Made global to keep stack frame size small.
1191    --*/
1192 
1193 
1194    UInt16 cost[N_GROUPS];
1195    Int32  fave[N_GROUPS];
1196 
1197    if (verbosity >= 3)
1198       fprintf ( stderr,
1199                 "      %d in block, %d after MTF & 1-2 coding, %d+2 syms in use\n",
1200                 last+1, nMTF, nInUse );
1201 
1202    alphaSize = nInUse+2;
1203    for (t = 0; t < N_GROUPS; t++)
1204       for (v = 0; v < alphaSize; v++)
1205          len[t][v] = GREATER_ICOST;
1206 
1207    /*--- Decide how many coding tables to use ---*/
1208    if (nMTF <= 0) panic ( "sendMTFValues(0)" );
1209    if (nMTF < 200) nGroups = 2; else
1210    if (nMTF < 800) nGroups = 4; else
1211                    nGroups = 6;
1212 
1213    /*--- Generate an initial set of coding tables ---*/
1214    {
1215       Int32 nPart, remF, tFreq, aFreq;
1216 
1217       nPart = nGroups;
1218       remF  = nMTF;
1219       gs = 0;
1220       while (nPart > 0) {
1221          tFreq = remF / nPart;
1222          ge = gs-1;
1223          aFreq = 0;
1224          while (aFreq < tFreq && ge < alphaSize-1) {
1225             ge++;
1226             aFreq += mtfFreq[ge];
1227          }
1228 
1229          if (ge > gs
1230              && nPart != nGroups && nPart != 1
1231              && ((nGroups-nPart) % 2 == 1)) {
1232             aFreq -= mtfFreq[ge];
1233             ge--;
1234          }
1235 
1236          if (verbosity >= 3)
1237             fprintf ( stderr,
1238                       "      initial group %d, [%d .. %d], has %d syms (%4.1f%%)\n",
1239                               nPart, gs, ge, aFreq,
1240                               (100.0 * (float)aFreq) / (float)nMTF );
1241 
1242          for (v = 0; v < alphaSize; v++)
1243             if (v >= gs && v <= ge)
1244                len[nPart-1][v] = LESSER_ICOST; else
1245                len[nPart-1][v] = GREATER_ICOST;
1246 
1247          nPart--;
1248          gs = ge+1;
1249          remF -= aFreq;
1250       }
1251    }
1252 
1253    /*---
1254       Iterate up to N_ITERS times to improve the tables.
1255    ---*/
1256    for (iter = 0; iter < N_ITERS; iter++) {
1257 
1258       for (t = 0; t < nGroups; t++) fave[t] = 0;
1259 
1260       for (t = 0; t < nGroups; t++)
1261          for (v = 0; v < alphaSize; v++)
1262             rfreq[t][v] = 0;
1263 
1264       nSelectors = 0;
1265       totc = 0;
1266       gs = 0;
1267       while (True) {
1268 
1269          /*--- Set group start & end marks. --*/
1270          if (gs >= nMTF) break;
1271          ge = gs + G_SIZE - 1;
1272          if (ge >= nMTF) ge = nMTF-1;
1273 
1274          /*--
1275             Calculate the cost of this group as coded
1276             by each of the coding tables.
1277          --*/
1278          for (t = 0; t < nGroups; t++) cost[t] = 0;
1279 
1280          if (nGroups == 6) {
1281             register UInt16 cost0, cost1, cost2, cost3, cost4, cost5;
1282             cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
1283             for (i = gs; i <= ge; i++) {
1284                UInt16 icv = szptr[i];
1285                cost0 += len[0][icv];
1286                cost1 += len[1][icv];
1287                cost2 += len[2][icv];
1288                cost3 += len[3][icv];
1289                cost4 += len[4][icv];
1290                cost5 += len[5][icv];
1291             }
1292             cost[0] = cost0; cost[1] = cost1; cost[2] = cost2;
1293             cost[3] = cost3; cost[4] = cost4; cost[5] = cost5;
1294          } else {
1295             for (i = gs; i <= ge; i++) {
1296                UInt16 icv = szptr[i];
1297                for (t = 0; t < nGroups; t++) cost[t] += len[t][icv];
1298             }
1299          }
1300 
1301          /*--
1302             Find the coding table which is best for this group,
1303             and record its identity in the selector table.
1304          --*/
1305          bc = 999999999; bt = -1;
1306          for (t = 0; t < nGroups; t++)
1307             if (cost[t] < bc) { bc = cost[t]; bt = t; };
1308          totc += bc;
1309          fave[bt]++;
1310          selector[nSelectors] = bt;
1311          nSelectors++;
1312 
1313          /*--
1314             Increment the symbol frequencies for the selected table.
1315           --*/
1316          for (i = gs; i <= ge; i++)
1317             rfreq[bt][ szptr[i] ]++;
1318 
1319          gs = ge+1;
1320       }
1321       if (verbosity >= 3) {
1322          fprintf ( stderr,
1323                    "      pass %d: size is %d, grp uses are ",
1324                    iter+1, totc/8 );
1325          for (t = 0; t < nGroups; t++)
1326             fprintf ( stderr, "%d ", fave[t] );
1327          fprintf ( stderr, "\n" );
1328       }
1329 
1330       /*--
1331         Recompute the tables based on the accumulated frequencies.
1332       --*/
1333       for (t = 0; t < nGroups; t++)
1334          hbMakeCodeLengths ( &len[t][0], &rfreq[t][0], alphaSize, 20 );
1335    }
1336 
1337 
1338    if (!(nGroups < 8)) panic ( "sendMTFValues(1)" );
1339    if (!(nSelectors < 32768 &&
1340          nSelectors <= (2 + (900000 / G_SIZE))))
1341                        panic ( "sendMTFValues(2)" );
1342 
1343 
1344    /*--- Compute MTF values for the selectors. ---*/
1345    {
1346       UChar pos[N_GROUPS], ll_i, tmp2, tmp;
1347       for (i = 0; i < nGroups; i++) pos[i] = i;
1348       for (i = 0; i < nSelectors; i++) {
1349          ll_i = selector[i];
1350          j = 0;
1351          tmp = pos[j];
1352          while ( ll_i != tmp ) {
1353             j++;
1354             tmp2 = tmp;
1355             tmp = pos[j];
1356             pos[j] = tmp2;
1357          };
1358          pos[0] = tmp;
1359          selectorMtf[i] = j;
1360       }
1361    };
1362 
1363    /*--- Assign actual codes for the tables. --*/
1364    for (t = 0; t < nGroups; t++) {
1365       minLen = 32;
1366       maxLen = 0;
1367       for (i = 0; i < alphaSize; i++) {
1368          if (len[t][i] > maxLen) maxLen = len[t][i];
1369          if (len[t][i] < minLen) minLen = len[t][i];
1370       }
1371       if (maxLen > 20) panic ( "sendMTFValues(3)" );
1372       if (minLen < 1)  panic ( "sendMTFValues(4)" );
1373       hbAssignCodes ( &code[t][0], &len[t][0],
1374                       minLen, maxLen, alphaSize );
1375    }
1376 
1377    /*--- Transmit the mapping table. ---*/
1378    {
1379       Bool inUse16[16];
1380       for (i = 0; i < 16; i++) {
1381           inUse16[i] = False;
1382           for (j = 0; j < 16; j++)
1383              if (inUse[i * 16 + j]) inUse16[i] = True;
1384       }
1385 
1386       nBytes = bytesOut;
1387       for (i = 0; i < 16; i++)
1388          if (inUse16[i]) bsW(1,1); else bsW(1,0);
1389 
1390       for (i = 0; i < 16; i++)
1391          if (inUse16[i])
1392             for (j = 0; j < 16; j++)
1393                if (inUse[i * 16 + j]) bsW(1,1); else bsW(1,0);
1394 
1395       if (verbosity >= 3)
1396          fprintf ( stderr, "      bytes: mapping %d, ", bytesOut-nBytes );
1397    }
1398 
1399    /*--- Now the selectors. ---*/
1400    nBytes = bytesOut;
1401    bsW ( 3, nGroups );
1402    bsW ( 15, nSelectors );
1403    for (i = 0; i < nSelectors; i++) {
1404       for (j = 0; j < selectorMtf[i]; j++) bsW(1,1);
1405       bsW(1,0);
1406    }
1407    if (verbosity >= 3)
1408       fprintf ( stderr, "selectors %d, ", bytesOut-nBytes );
1409 
1410    /*--- Now the coding tables. ---*/
1411    nBytes = bytesOut;
1412 
1413    for (t = 0; t < nGroups; t++) {
1414       Int32 curr = len[t][0];
1415       bsW ( 5, curr );
1416       for (i = 0; i < alphaSize; i++) {
1417          while (curr < len[t][i]) { bsW(2,2); curr++; /* 10 */ };
1418          while (curr > len[t][i]) { bsW(2,3); curr--; /* 11 */ };
1419          bsW ( 1, 0 );
1420       }
1421    }
1422 
1423    if (verbosity >= 3)
1424       fprintf ( stderr, "code lengths %d, ", bytesOut-nBytes );
1425 
1426    /*--- And finally, the block data proper ---*/
1427    nBytes = bytesOut;
1428    selCtr = 0;
1429    gs = 0;
1430    while (True) {
1431       if (gs >= nMTF) break;
1432       ge = gs + G_SIZE - 1;
1433       if (ge >= nMTF) ge = nMTF-1;
1434       for (i = gs; i <= ge; i++) {
1435          #if DEBUG
1436             assert (selector[selCtr] < nGroups);
1437          #endif
1438          bsW ( len  [selector[selCtr]] [szptr[i]],
1439                code [selector[selCtr]] [szptr[i]] );
1440       }
1441 
1442       gs = ge+1;
1443       selCtr++;
1444    }
1445    if (!(selCtr == nSelectors)) panic ( "sendMTFValues(5)" );
1446 
1447    if (verbosity >= 3)
1448       fprintf ( stderr, "codes %d\n", bytesOut-nBytes );
1449 }
1450 
1451 
1452 /*---------------------------------------------*/
moveToFrontCodeAndSend(void)1453 void moveToFrontCodeAndSend ( void )
1454 {
1455    bsPutIntVS ( 24, origPtr );
1456    generateMTFValues();
1457    sendMTFValues();
1458 }
1459 
1460 
1461 /*---------------------------------------------*/
recvDecodingTables(void)1462 void recvDecodingTables ( void )
1463 {
1464    Int32 i, j, t, nGroups, nSelectors, alphaSize;
1465    Int32 minLen, maxLen;
1466    Bool inUse16[16];
1467 
1468    /*--- Receive the mapping table ---*/
1469    for (i = 0; i < 16; i++)
1470       if (bsR(1) == 1)
1471          inUse16[i] = True; else
1472          inUse16[i] = False;
1473 
1474    for (i = 0; i < 256; i++) inUse[i] = False;
1475 
1476    for (i = 0; i < 16; i++)
1477       if (inUse16[i])
1478          for (j = 0; j < 16; j++)
1479             if (bsR(1) == 1) inUse[i * 16 + j] = True;
1480 
1481    makeMaps();
1482    alphaSize = nInUse+2;
1483 
1484    /*--- Now the selectors ---*/
1485    nGroups = bsR ( 3 );
1486    nSelectors = bsR ( 15 );
1487    for (i = 0; i < nSelectors; i++) {
1488       j = 0;
1489       while (bsR(1) == 1) j++;
1490       selectorMtf[i] = j;
1491    }
1492 
1493    /*--- Undo the MTF values for the selectors. ---*/
1494    {
1495       UChar pos[N_GROUPS], tmp, v;
1496       for (v = 0; v < nGroups; v++) pos[v] = v;
1497 
1498       for (i = 0; i < nSelectors; i++) {
1499          v = selectorMtf[i];
1500          tmp = pos[v];
1501          while (v > 0) { pos[v] = pos[v-1]; v--; }
1502          pos[0] = tmp;
1503          selector[i] = tmp;
1504       }
1505    }
1506 
1507    /*--- Now the coding tables ---*/
1508    for (t = 0; t < nGroups; t++) {
1509       Int32 curr = bsR ( 5 );
1510       for (i = 0; i < alphaSize; i++) {
1511          while (bsR(1) == 1) {
1512             if (bsR(1) == 0) curr++; else curr--;
1513          }
1514          len[t][i] = curr;
1515       }
1516    }
1517 
1518    /*--- Create the Huffman decoding tables ---*/
1519    for (t = 0; t < nGroups; t++) {
1520       minLen = 32;
1521       maxLen = 0;
1522       for (i = 0; i < alphaSize; i++) {
1523          if (len[t][i] > maxLen) maxLen = len[t][i];
1524          if (len[t][i] < minLen) minLen = len[t][i];
1525       }
1526       hbCreateDecodeTables (
1527          &limit[t][0], &base[t][0], &perm[t][0], &len[t][0],
1528          minLen, maxLen, alphaSize
1529       );
1530       minLens[t] = minLen;
1531    }
1532 }
1533 
1534 
1535 /*---------------------------------------------*/
1536 #define GET_MTF_VAL(lval)                 \
1537 {                                         \
1538    Int32 zt, zn, zvec, zj;                \
1539    if (groupPos == 0) {                   \
1540       groupNo++;                          \
1541       groupPos = G_SIZE;                  \
1542    }                                      \
1543    groupPos--;                            \
1544    zt = selector[groupNo];                \
1545    zn = minLens[zt];                      \
1546    zvec = bsR ( zn );                     \
1547    while (zvec > limit[zt][zn]) {         \
1548       zn++; bsR1(zj);                     \
1549       zvec = (zvec << 1) | zj;            \
1550    };                                     \
1551    lval = perm[zt][zvec - base[zt][zn]];  \
1552 }
1553 
1554 
1555 /*---------------------------------------------*/
getAndMoveToFrontDecode(void)1556 void getAndMoveToFrontDecode ( void )
1557 {
1558    UChar  yy[256];
1559    Int32  i, j, nextSym, limitLast;
1560    Int32  EOB, groupNo, groupPos;
1561 
1562    limitLast = 100000 * blockSize100k;
1563    origPtr   = bsGetIntVS ( 24 );
1564 
1565    recvDecodingTables();
1566    EOB      = nInUse+1;
1567    groupNo  = -1;
1568    groupPos = 0;
1569 
1570    /*--
1571       Setting up the unzftab entries here is not strictly
1572       necessary, but it does save having to do it later
1573       in a separate pass, and so saves a block's worth of
1574       cache misses.
1575    --*/
1576    for (i = 0; i <= 255; i++) unzftab[i] = 0;
1577 
1578    for (i = 0; i <= 255; i++) yy[i] = (UChar) i;
1579 
1580    last = -1;
1581 
1582    GET_MTF_VAL(nextSym);
1583 
1584    while (True) {
1585 
1586       if (nextSym == EOB) break;
1587 
1588       if (nextSym == RUNA || nextSym == RUNB) {
1589          UChar ch;
1590          Int32 s = -1;
1591          Int32 N = 1;
1592          do {
1593             if (nextSym == RUNA) s = s + (0+1) * N; else
1594             if (nextSym == RUNB) s = s + (1+1) * N;
1595             N = N * 2;
1596             GET_MTF_VAL(nextSym);
1597          }
1598             while (nextSym == RUNA || nextSym == RUNB);
1599 
1600          s++;
1601          ch = seqToUnseq[yy[0]];
1602          unzftab[ch] += s;
1603 
1604          if (smallMode)
1605             while (s > 0) {
1606                last++;
1607                ll16[last] = ch;
1608                s--;
1609             }
1610          else
1611             while (s > 0) {
1612                last++;
1613                ll8[last] = ch;
1614                s--;
1615             };
1616 
1617          if (last >= limitLast) blockOverrun();
1618          continue;
1619 
1620       } else {
1621 
1622          UChar tmp;
1623          last++; if (last >= limitLast) blockOverrun();
1624 
1625          tmp = yy[nextSym-1];
1626          unzftab[seqToUnseq[tmp]]++;
1627          if (smallMode)
1628             ll16[last] = seqToUnseq[tmp]; else
1629             ll8[last]  = seqToUnseq[tmp];
1630 
1631          /*--
1632             This loop is hammered during decompression,
1633             hence the unrolling.
1634 
1635             for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
1636          --*/
1637 
1638          j = nextSym-1;
1639          for (; j > 3; j -= 4) {
1640             yy[j]   = yy[j-1];
1641             yy[j-1] = yy[j-2];
1642             yy[j-2] = yy[j-3];
1643             yy[j-3] = yy[j-4];
1644          }
1645          for (; j > 0; j--) yy[j] = yy[j-1];
1646 
1647          yy[0] = tmp;
1648          GET_MTF_VAL(nextSym);
1649          continue;
1650       }
1651    }
1652 }
1653 
1654 
1655 /*---------------------------------------------------*/
1656 /*--- Block-sorting machinery                     ---*/
1657 /*---------------------------------------------------*/
1658 
1659 /*---------------------------------------------*/
1660 /*--
1661   Compare two strings in block.  We assume (see
1662   discussion above) that i1 and i2 have a max
1663   offset of 10 on entry, and that the first
1664   bytes of both block and quadrant have been
1665   copied into the "overshoot area", ie
1666   into the subscript range
1667   [last+1 .. last+NUM_OVERSHOOT_BYTES].
1668 --*/
fullGtU(Int32 i1,Int32 i2)1669 INLINE Bool fullGtU ( Int32 i1, Int32 i2 )
1670 {
1671    Int32 k;
1672    UChar c1, c2;
1673    UInt16 s1, s2;
1674 
1675    #if DEBUG
1676       /*--
1677         shellsort shouldn't ask to compare
1678         something with itself.
1679       --*/
1680       assert (i1 != i2);
1681    #endif
1682 
1683    c1 = block[i1];
1684    c2 = block[i2];
1685    if (c1 != c2) return (c1 > c2);
1686    i1++; i2++;
1687 
1688    c1 = block[i1];
1689    c2 = block[i2];
1690    if (c1 != c2) return (c1 > c2);
1691    i1++; i2++;
1692 
1693    c1 = block[i1];
1694    c2 = block[i2];
1695    if (c1 != c2) return (c1 > c2);
1696    i1++; i2++;
1697 
1698    c1 = block[i1];
1699    c2 = block[i2];
1700    if (c1 != c2) return (c1 > c2);
1701    i1++; i2++;
1702 
1703    c1 = block[i1];
1704    c2 = block[i2];
1705    if (c1 != c2) return (c1 > c2);
1706    i1++; i2++;
1707 
1708    c1 = block[i1];
1709    c2 = block[i2];
1710    if (c1 != c2) return (c1 > c2);
1711    i1++; i2++;
1712 
1713    k = last + 1;
1714 
1715    do {
1716 
1717       c1 = block[i1];
1718       c2 = block[i2];
1719       if (c1 != c2) return (c1 > c2);
1720       s1 = quadrant[i1];
1721       s2 = quadrant[i2];
1722       if (s1 != s2) return (s1 > s2);
1723       i1++; i2++;
1724 
1725       c1 = block[i1];
1726       c2 = block[i2];
1727       if (c1 != c2) return (c1 > c2);
1728       s1 = quadrant[i1];
1729       s2 = quadrant[i2];
1730       if (s1 != s2) return (s1 > s2);
1731       i1++; i2++;
1732 
1733       c1 = block[i1];
1734       c2 = block[i2];
1735       if (c1 != c2) return (c1 > c2);
1736       s1 = quadrant[i1];
1737       s2 = quadrant[i2];
1738       if (s1 != s2) return (s1 > s2);
1739       i1++; i2++;
1740 
1741       c1 = block[i1];
1742       c2 = block[i2];
1743       if (c1 != c2) return (c1 > c2);
1744       s1 = quadrant[i1];
1745       s2 = quadrant[i2];
1746       if (s1 != s2) return (s1 > s2);
1747       i1++; i2++;
1748 
1749       if (i1 > last) { i1 -= last; i1--; };
1750       if (i2 > last) { i2 -= last; i2--; };
1751 
1752       k -= 4;
1753       workDone++;
1754    }
1755       while (k >= 0);
1756 
1757    return False;
1758 }
1759 
1760 /*---------------------------------------------*/
1761 /*--
1762    Knuth's increments seem to work better
1763    than Incerpi-Sedgewick here.  Possibly
1764    because the number of elems to sort is
1765    usually small, typically <= 20.
1766 --*/
1767 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
1768                    9841, 29524, 88573, 265720,
1769                    797161, 2391484 };
1770 
simpleSort(Int32 lo,Int32 hi,Int32 d)1771 void simpleSort ( Int32 lo, Int32 hi, Int32 d )
1772 {
1773    Int32 i, j, h, bigN, hp;
1774    Int32 v;
1775 
1776    bigN = hi - lo + 1;
1777    if (bigN < 2) return;
1778 
1779    hp = 0;
1780    while (incs[hp] < bigN) hp++;
1781    hp--;
1782 
1783    for (; hp >= 0; hp--) {
1784       h = incs[hp];
1785       if (verbosity >= 5)
1786          fprintf ( stderr, "          shell increment %d\n", h );
1787 
1788       i = lo + h;
1789       while (True) {
1790 
1791          /*-- copy 1 --*/
1792          if (i > hi) break;
1793          v = zptr[i];
1794          j = i;
1795          while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1796             zptr[j] = zptr[j-h];
1797             j = j - h;
1798             if (j <= (lo + h - 1)) break;
1799          }
1800          zptr[j] = v;
1801          i++;
1802 
1803          /*-- copy 2 --*/
1804          if (i > hi) break;
1805          v = zptr[i];
1806          j = i;
1807          while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1808             zptr[j] = zptr[j-h];
1809             j = j - h;
1810             if (j <= (lo + h - 1)) break;
1811          }
1812          zptr[j] = v;
1813          i++;
1814 
1815          /*-- copy 3 --*/
1816          if (i > hi) break;
1817          v = zptr[i];
1818          j = i;
1819          while ( fullGtU ( zptr[j-h]+d, v+d ) ) {
1820             zptr[j] = zptr[j-h];
1821             j = j - h;
1822             if (j <= (lo + h - 1)) break;
1823          }
1824          zptr[j] = v;
1825          i++;
1826 
1827          if (workDone > workLimit && firstAttempt) return;
1828       }
1829    }
1830 }
1831 
1832 
1833 /*---------------------------------------------*/
1834 /*--
1835    The following is an implementation of
1836    an elegant 3-way quicksort for strings,
1837    described in a paper "Fast Algorithms for
1838    Sorting and Searching Strings", by Robert
1839    Sedgewick and Jon L. Bentley.
1840 --*/
1841 
1842 #define swap(lv1, lv2) \
1843    { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
1844 
vswap(Int32 p1,Int32 p2,Int32 n)1845 INLINE void vswap ( Int32 p1, Int32 p2, Int32 n )
1846 {
1847    while (n > 0) {
1848       swap(zptr[p1], zptr[p2]);
1849       p1++; p2++; n--;
1850    }
1851 }
1852 
med3(UChar a,UChar b,UChar c)1853 INLINE UChar med3 ( UChar a, UChar b, UChar c )
1854 {
1855    UChar t;
1856    if (a > b) { t = a; a = b; b = t; };
1857    if (b > c) { t = b; b = c; c = t; };
1858    if (a > b)          b = a;
1859    return b;
1860 }
1861 
1862 
1863 #define min(a,b) ((a) < (b)) ? (a) : (b)
1864 
1865 typedef
1866    struct { Int32 ll; Int32 hh; Int32 dd; }
1867    StackElem;
1868 
1869 #define push(lz,hz,dz) { stack[sp].ll = lz; \
1870                          stack[sp].hh = hz; \
1871                          stack[sp].dd = dz; \
1872                          sp++; }
1873 
1874 #define pop(lz,hz,dz) { sp--;               \
1875                         lz = stack[sp].ll;  \
1876                         hz = stack[sp].hh;  \
1877                         dz = stack[sp].dd; }
1878 
1879 #define SMALL_THRESH 20
1880 #define DEPTH_THRESH 10
1881 
1882 /*--
1883    If you are ever unlucky/improbable enough
1884    to get a stack overflow whilst sorting,
1885    increase the following constant and try
1886    again.  In practice I have never seen the
1887    stack go above 27 elems, so the following
1888    limit seems very generous.
1889 --*/
1890 #define QSORT_STACK_SIZE 1000
1891 
1892 
qSort3(Int32 loSt,Int32 hiSt,Int32 dSt)1893 void qSort3 ( Int32 loSt, Int32 hiSt, Int32 dSt )
1894 {
1895    Int32 unLo, unHi, ltLo, gtHi, med, n, m;
1896    Int32 sp, lo, hi, d;
1897    StackElem stack[QSORT_STACK_SIZE];
1898 
1899    sp = 0;
1900    push ( loSt, hiSt, dSt );
1901 
1902    while (sp > 0) {
1903 
1904       if (sp >= QSORT_STACK_SIZE) panic ( "stack overflow in qSort3" );
1905 
1906       pop ( lo, hi, d );
1907 
1908       if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
1909          simpleSort ( lo, hi, d );
1910          if (workDone > workLimit && firstAttempt) return;
1911          continue;
1912       }
1913 
1914       med = med3 ( block[zptr[ lo         ]+d],
1915                    block[zptr[ hi         ]+d],
1916                    block[zptr[ (lo+hi)>>1 ]+d] );
1917 
1918       unLo = ltLo = lo;
1919       unHi = gtHi = hi;
1920 
1921       while (True) {
1922          while (True) {
1923             if (unLo > unHi) break;
1924             n = ((Int32)block[zptr[unLo]+d]) - med;
1925             if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
1926             if (n >  0) break;
1927             unLo++;
1928          }
1929          while (True) {
1930             if (unLo > unHi) break;
1931             n = ((Int32)block[zptr[unHi]+d]) - med;
1932             if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
1933             if (n <  0) break;
1934             unHi--;
1935          }
1936          if (unLo > unHi) break;
1937          swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
1938       }
1939       #if DEBUG
1940          assert (unHi == unLo-1);
1941       #endif
1942 
1943       if (gtHi < ltLo) {
1944          push(lo, hi, d+1 );
1945          continue;
1946       }
1947 
1948       n = min(ltLo-lo, unLo-ltLo); vswap(lo, unLo-n, n);
1949       m = min(hi-gtHi, gtHi-unHi); vswap(unLo, hi-m+1, m);
1950 
1951       n = lo + unLo - ltLo - 1;
1952       m = hi - (gtHi - unHi) + 1;
1953 
1954       push ( lo, n, d );
1955       push ( n+1, m-1, d+1 );
1956       push ( m, hi, d );
1957    }
1958 }
1959 
1960 
1961 /*---------------------------------------------*/
1962 
1963 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
1964 
1965 #define SETMASK (1 << 21)
1966 #define CLEARMASK (~(SETMASK))
1967 
sortIt(void)1968 void sortIt ( void )
1969 {
1970    Int32 i, j, ss, sb;
1971    Int32 runningOrder[256];
1972    Int32 copy[256];
1973    Bool bigDone[256];
1974    UChar c1, c2;
1975    Int32 numQSorted;
1976 
1977    /*--
1978       In the various block-sized structures, live data runs
1979       from 0 to last+NUM_OVERSHOOT_BYTES inclusive.  First,
1980       set up the overshoot area for block.
1981    --*/
1982 
1983    if (verbosity >= 4) fprintf ( stderr, "        sort initialise ...\n" );
1984    for (i = 0; i < NUM_OVERSHOOT_BYTES; i++)
1985        block[last+i+1] = block[i % (last+1)];
1986    for (i = 0; i <= last+NUM_OVERSHOOT_BYTES; i++)
1987        quadrant[i] = 0;
1988 
1989    block[-1] = block[last];
1990 
1991    if (last < 4000) {
1992 
1993       /*--
1994          Use simpleSort(), since the full sorting mechanism
1995          has quite a large constant overhead.
1996       --*/
1997       if (verbosity >= 4) fprintf ( stderr, "        simpleSort ...\n" );
1998       for (i = 0; i <= last; i++) zptr[i] = i;
1999       firstAttempt = False;
2000       workDone = workLimit = 0;
2001       simpleSort ( 0, last, 0 );
2002       if (verbosity >= 4) fprintf ( stderr, "        simpleSort done.\n" );
2003 
2004    } else {
2005 
2006       numQSorted = 0;
2007       for (i = 0; i <= 255; i++) bigDone[i] = False;
2008 
2009       if (verbosity >= 4) fprintf ( stderr, "        bucket sorting ...\n" );
2010 
2011       for (i = 0; i <= 65536; i++) ftab[i] = 0;
2012 
2013       c1 = block[-1];
2014       for (i = 0; i <= last; i++) {
2015          c2 = block[i];
2016          ftab[(c1 << 8) + c2]++;
2017          c1 = c2;
2018       }
2019 
2020       for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
2021 
2022       c1 = block[0];
2023       for (i = 0; i < last; i++) {
2024          c2 = block[i+1];
2025          j = (c1 << 8) + c2;
2026          c1 = c2;
2027          ftab[j]--;
2028          zptr[ftab[j]] = i;
2029       }
2030       j = (block[last] << 8) + block[0];
2031       ftab[j]--;
2032       zptr[ftab[j]] = last;
2033 
2034       /*--
2035          Now ftab contains the first loc of every small bucket.
2036          Calculate the running order, from smallest to largest
2037          big bucket.
2038       --*/
2039 
2040       for (i = 0; i <= 255; i++) runningOrder[i] = i;
2041 
2042       {
2043          Int32 vv;
2044          Int32 h = 1;
2045          do h = 3 * h + 1; while (h <= 256);
2046          do {
2047             h = h / 3;
2048             for (i = h; i <= 255; i++) {
2049                vv = runningOrder[i];
2050                j = i;
2051                while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
2052                   runningOrder[j] = runningOrder[j-h];
2053                   j = j - h;
2054                   if (j <= (h - 1)) goto zero;
2055                }
2056                zero:
2057                runningOrder[j] = vv;
2058             }
2059          } while (h != 1);
2060       }
2061 
2062       /*--
2063          The main sorting loop.
2064       --*/
2065 
2066       for (i = 0; i <= 255; i++) {
2067 
2068          /*--
2069             Process big buckets, starting with the least full.
2070          --*/
2071          ss = runningOrder[i];
2072 
2073          /*--
2074             Complete the big bucket [ss] by quicksorting
2075             any unsorted small buckets [ss, j].  Hopefully
2076             previous pointer-scanning phases have already
2077             completed many of the small buckets [ss, j], so
2078             we don't have to sort them at all.
2079          --*/
2080          for (j = 0; j <= 255; j++) {
2081             sb = (ss << 8) + j;
2082             if ( ! (ftab[sb] & SETMASK) ) {
2083                Int32 lo = ftab[sb]   & CLEARMASK;
2084                Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
2085                if (hi > lo) {
2086                   if (verbosity >= 4)
2087                      fprintf ( stderr,
2088                                "        qsort [0x%x, 0x%x]   done %d   this %d\n",
2089                                ss, j, numQSorted, hi - lo + 1 );
2090                   qSort3 ( lo, hi, 2 );
2091                   numQSorted += ( hi - lo + 1 );
2092                   if (workDone > workLimit && firstAttempt) return;
2093                }
2094                ftab[sb] |= SETMASK;
2095             }
2096          }
2097 
2098          /*--
2099             The ss big bucket is now done.  Record this fact,
2100             and update the quadrant descriptors.  Remember to
2101             update quadrants in the overshoot area too, if
2102             necessary.  The "if (i < 255)" test merely skips
2103             this updating for the last bucket processed, since
2104             updating for the last bucket is pointless.
2105          --*/
2106          bigDone[ss] = True;
2107 
2108          if (i < 255) {
2109             Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
2110             Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
2111             Int32 shifts   = 0;
2112 
2113             while ((bbSize >> shifts) > 65534) shifts++;
2114 
2115             for (j = 0; j < bbSize; j++) {
2116                Int32 a2update     = zptr[bbStart + j];
2117                UInt16 qVal        = (UInt16)(j >> shifts);
2118                quadrant[a2update] = qVal;
2119                if (a2update < NUM_OVERSHOOT_BYTES)
2120                   quadrant[a2update + last + 1] = qVal;
2121             }
2122 
2123             if (! ( ((bbSize-1) >> shifts) <= 65535 )) panic ( "sortIt" );
2124          }
2125 
2126          /*--
2127             Now scan this big bucket so as to synthesise the
2128             sorted order for small buckets [t, ss] for all t != ss.
2129          --*/
2130          for (j = 0; j <= 255; j++)
2131             copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
2132 
2133          for (j = ftab[ss << 8] & CLEARMASK;
2134               j < (ftab[(ss+1) << 8] & CLEARMASK);
2135               j++) {
2136             c1 = block[zptr[j]-1];
2137             if ( ! bigDone[c1] ) {
2138                zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
2139                copy[c1] ++;
2140             }
2141          }
2142 
2143          for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
2144       }
2145       if (verbosity >= 4)
2146          fprintf ( stderr, "        %d pointers, %d sorted, %d scanned\n",
2147                            last+1, numQSorted, (last+1) - numQSorted );
2148    }
2149 }
2150 
2151 
2152 /*---------------------------------------------------*/
2153 /*--- Stuff for randomising repetitive blocks     ---*/
2154 /*---------------------------------------------------*/
2155 
2156 /*---------------------------------------------*/
2157 Int32 rNums[512] = {
2158    619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
2159    985, 724, 205, 454, 863, 491, 741, 242, 949, 214,
2160    733, 859, 335, 708, 621, 574, 73, 654, 730, 472,
2161    419, 436, 278, 496, 867, 210, 399, 680, 480, 51,
2162    878, 465, 811, 169, 869, 675, 611, 697, 867, 561,
2163    862, 687, 507, 283, 482, 129, 807, 591, 733, 623,
2164    150, 238, 59, 379, 684, 877, 625, 169, 643, 105,
2165    170, 607, 520, 932, 727, 476, 693, 425, 174, 647,
2166    73, 122, 335, 530, 442, 853, 695, 249, 445, 515,
2167    909, 545, 703, 919, 874, 474, 882, 500, 594, 612,
2168    641, 801, 220, 162, 819, 984, 589, 513, 495, 799,
2169    161, 604, 958, 533, 221, 400, 386, 867, 600, 782,
2170    382, 596, 414, 171, 516, 375, 682, 485, 911, 276,
2171    98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
2172    227, 730, 475, 186, 263, 647, 537, 686, 600, 224,
2173    469, 68, 770, 919, 190, 373, 294, 822, 808, 206,
2174    184, 943, 795, 384, 383, 461, 404, 758, 839, 887,
2175    715, 67, 618, 276, 204, 918, 873, 777, 604, 560,
2176    951, 160, 578, 722, 79, 804, 96, 409, 713, 940,
2177    652, 934, 970, 447, 318, 353, 859, 672, 112, 785,
2178    645, 863, 803, 350, 139, 93, 354, 99, 820, 908,
2179    609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
2180    653, 282, 762, 623, 680, 81, 927, 626, 789, 125,
2181    411, 521, 938, 300, 821, 78, 343, 175, 128, 250,
2182    170, 774, 972, 275, 999, 639, 495, 78, 352, 126,
2183    857, 956, 358, 619, 580, 124, 737, 594, 701, 612,
2184    669, 112, 134, 694, 363, 992, 809, 743, 168, 974,
2185    944, 375, 748, 52, 600, 747, 642, 182, 862, 81,
2186    344, 805, 988, 739, 511, 655, 814, 334, 249, 515,
2187    897, 955, 664, 981, 649, 113, 974, 459, 893, 228,
2188    433, 837, 553, 268, 926, 240, 102, 654, 459, 51,
2189    686, 754, 806, 760, 493, 403, 415, 394, 687, 700,
2190    946, 670, 656, 610, 738, 392, 760, 799, 887, 653,
2191    978, 321, 576, 617, 626, 502, 894, 679, 243, 440,
2192    680, 879, 194, 572, 640, 724, 926, 56, 204, 700,
2193    707, 151, 457, 449, 797, 195, 791, 558, 945, 679,
2194    297, 59, 87, 824, 713, 663, 412, 693, 342, 606,
2195    134, 108, 571, 364, 631, 212, 174, 643, 304, 329,
2196    343, 97, 430, 751, 497, 314, 983, 374, 822, 928,
2197    140, 206, 73, 263, 980, 736, 876, 478, 430, 305,
2198    170, 514, 364, 692, 829, 82, 855, 953, 676, 246,
2199    369, 970, 294, 750, 807, 827, 150, 790, 288, 923,
2200    804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
2201    896, 831, 547, 261, 524, 462, 293, 465, 502, 56,
2202    661, 821, 976, 991, 658, 869, 905, 758, 745, 193,
2203    768, 550, 608, 933, 378, 286, 215, 979, 792, 961,
2204    61, 688, 793, 644, 986, 403, 106, 366, 905, 644,
2205    372, 567, 466, 434, 645, 210, 389, 550, 919, 135,
2206    780, 773, 635, 389, 707, 100, 626, 958, 165, 504,
2207    920, 176, 193, 713, 857, 265, 203, 50, 668, 108,
2208    645, 990, 626, 197, 510, 357, 358, 850, 858, 364,
2209    936, 638
2210 };
2211 
2212 
2213 #define RAND_DECLS                                \
2214    Int32 rNToGo = 0;                              \
2215    Int32 rTPos  = 0;                              \
2216 
2217 #define RAND_MASK ((rNToGo == 1) ? 1 : 0)
2218 
2219 #define RAND_UPD_MASK                             \
2220    if (rNToGo == 0) {                             \
2221       rNToGo = rNums[rTPos];                      \
2222       rTPos++; if (rTPos == 512) rTPos = 0;       \
2223    }                                              \
2224    rNToGo--;
2225 
2226 
2227 
2228 /*---------------------------------------------------*/
2229 /*--- The Reversible Transformation (tm)          ---*/
2230 /*---------------------------------------------------*/
2231 
2232 /*---------------------------------------------*/
randomiseBlock(void)2233 void randomiseBlock ( void )
2234 {
2235    Int32 i;
2236    RAND_DECLS;
2237    for (i = 0; i < 256; i++) inUse[i] = False;
2238 
2239    for (i = 0; i <= last; i++) {
2240       RAND_UPD_MASK;
2241       block[i] ^= RAND_MASK;
2242       inUse[block[i]] = True;
2243    }
2244 }
2245 
2246 
2247 /*---------------------------------------------*/
doReversibleTransformation(void)2248 void doReversibleTransformation ( void )
2249 {
2250    Int32 i;
2251 
2252    if (verbosity >= 2) fprintf ( stderr, "\n" );
2253 
2254    workLimit       = workFactor * last;
2255    workDone        = 0;
2256    blockRandomised = False;
2257    firstAttempt    = True;
2258 
2259    sortIt ();
2260 
2261    if (verbosity >= 3)
2262       fprintf ( stderr, "      %d work, %d block, ratio %5.2f\n",
2263                         workDone, last, (float)workDone / (float)(last) );
2264 
2265    if (workDone > workLimit && firstAttempt) {
2266       if (verbosity >= 2)
2267          fprintf ( stderr, "    sorting aborted; randomising block\n" );
2268       randomiseBlock ();
2269       workLimit = workDone = 0;
2270       blockRandomised = True;
2271       firstAttempt = False;
2272       sortIt();
2273       if (verbosity >= 3)
2274          fprintf ( stderr, "      %d work, %d block, ratio %f\n",
2275                            workDone, last, (float)workDone / (float)(last) );
2276    }
2277 
2278    origPtr = -1;
2279    for (i = 0; i <= last; i++)
2280        if (zptr[i] == 0)
2281           { origPtr = i; break; };
2282 
2283    if (origPtr == -1) panic ( "doReversibleTransformation" );
2284 }
2285 
2286 
2287 /*---------------------------------------------*/
2288 
indexIntoF(Int32 indx,Int32 * cftab)2289 INLINE Int32 indexIntoF ( Int32 indx, Int32 *cftab )
2290 {
2291    Int32 nb, na, mid;
2292    nb = 0;
2293    na = 256;
2294    do {
2295       mid = (nb + na) >> 1;
2296       if (indx >= cftab[mid]) nb = mid; else na = mid;
2297    }
2298    while (na - nb != 1);
2299    return nb;
2300 }
2301 
2302 
2303 #define GET_SMALL(cccc)                     \
2304                                             \
2305       cccc = indexIntoF ( tPos, cftab );    \
2306       tPos = GET_LL(tPos);
2307 
2308 
undoReversibleTransformation_small(FILE * dst)2309 void undoReversibleTransformation_small ( FILE* dst )
2310 {
2311    Int32  cftab[257], cftabAlso[257];
2312    Int32  i, j, tmp, tPos;
2313    UChar  ch;
2314 
2315    /*--
2316       We assume here that the global array unzftab will
2317       already be holding the frequency counts for
2318       ll8[0 .. last].
2319    --*/
2320 
2321    /*-- Set up cftab to facilitate generation of indexIntoF --*/
2322    cftab[0] = 0;
2323    for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
2324    for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
2325 
2326    /*-- Make a copy of it, used in generation of T --*/
2327    for (i = 0; i <= 256; i++) cftabAlso[i] = cftab[i];
2328 
2329    /*-- compute the T vector --*/
2330    for (i = 0; i <= last; i++) {
2331       ch = (UChar)ll16[i];
2332       SET_LL(i, cftabAlso[ch]);
2333       cftabAlso[ch]++;
2334    }
2335 
2336    /*--
2337       Compute T^(-1) by pointer reversal on T.  This is rather
2338       subtle, in that, if the original block was two or more
2339       (in general, N) concatenated copies of the same thing,
2340       the T vector will consist of N cycles, each of length
2341       blocksize / N, and decoding will involve traversing one
2342       of these cycles N times.  Which particular cycle doesn't
2343       matter -- they are all equivalent.  The tricky part is to
2344       make sure that the pointer reversal creates a correct
2345       reversed cycle for us to traverse.  So, the code below
2346       simply reverses whatever cycle origPtr happens to fall into,
2347       without regard to the cycle length.  That gives one reversed
2348       cycle, which for normal blocks, is the entire block-size long.
2349       For repeated blocks, it will be interspersed with the other
2350       N-1 non-reversed cycles.  Providing that the F-subscripting
2351       phase which follows starts at origPtr, all then works ok.
2352    --*/
2353    i = origPtr;
2354    j = GET_LL(i);
2355    do {
2356       tmp = GET_LL(j);
2357       SET_LL(j, i);
2358       i = j;
2359       j = tmp;
2360    }
2361       while (i != origPtr);
2362 
2363    /*--
2364       We recreate the original by subscripting F through T^(-1).
2365       The run-length-decoder below requires characters incrementally,
2366       so tPos is set to a starting value, and is updated by
2367       the GET_SMALL macro.
2368    --*/
2369    tPos   = origPtr;
2370 
2371    /*-------------------------------------------------*/
2372    /*--
2373       This is pretty much a verbatim copy of the
2374       run-length decoder present in the distribution
2375       bzip-0.21; it has to be here to avoid creating
2376       block[] as an intermediary structure.  As in 0.21,
2377       this code derives from some sent to me by
2378       Christian von Roques.
2379 
2380       It allows dst==NULL, so as to support the test (-t)
2381       option without slowing down the fast decompression
2382       code.
2383    --*/
2384    {
2385       IntNative retVal;
2386       Int32     i2, count, chPrev, ch2;
2387       UInt32    localCrc;
2388 
2389       count    = 0;
2390       i2       = 0;
2391       ch2      = 256;   /*-- not a char and not EOF --*/
2392       localCrc = getGlobalCRC();
2393 
2394       {
2395          RAND_DECLS;
2396          while ( i2 <= last ) {
2397             chPrev = ch2;
2398             GET_SMALL(ch2);
2399             if (blockRandomised) {
2400                RAND_UPD_MASK;
2401                ch2 ^= (UInt32)RAND_MASK;
2402             }
2403             i2++;
2404 
2405             if (dst)
2406                retVal = putc ( ch2, dst );
2407 
2408             UPDATE_CRC ( localCrc, (UChar)ch2 );
2409 
2410             if (ch2 != chPrev) {
2411                count = 1;
2412             } else {
2413                count++;
2414                if (count >= 4) {
2415                   Int32 j2;
2416                   UChar z;
2417                   GET_SMALL(z);
2418                   if (blockRandomised) {
2419                      RAND_UPD_MASK;
2420                      z ^= RAND_MASK;
2421                   }
2422                   for (j2 = 0;  j2 < (Int32)z;  j2++) {
2423                      if (dst) retVal = putc (ch2, dst);
2424                      UPDATE_CRC ( localCrc, (UChar)ch2 );
2425                   }
2426                   i2++;
2427                   count = 0;
2428                }
2429             }
2430          }
2431       }
2432 
2433       setGlobalCRC ( localCrc );
2434    }
2435    /*-- end of the in-line run-length-decoder. --*/
2436 }
2437 #undef GET_SMALL
2438 
2439 
2440 /*---------------------------------------------*/
2441 
2442 #define GET_FAST(cccc)                       \
2443                                              \
2444       cccc = ll8[tPos];                      \
2445       tPos = tt[tPos];
2446 
2447 
undoReversibleTransformation_fast(FILE * dst)2448 void undoReversibleTransformation_fast ( FILE* dst )
2449 {
2450    Int32  cftab[257];
2451    Int32  i, tPos;
2452    UChar  ch;
2453 
2454    /*--
2455       We assume here that the global array unzftab will
2456       already be holding the frequency counts for
2457       ll8[0 .. last].
2458    --*/
2459 
2460    /*-- Set up cftab to facilitate generation of T^(-1) --*/
2461    cftab[0] = 0;
2462    for (i = 1; i <= 256; i++) cftab[i] = unzftab[i-1];
2463    for (i = 1; i <= 256; i++) cftab[i] += cftab[i-1];
2464 
2465    /*-- compute the T^(-1) vector --*/
2466    for (i = 0; i <= last; i++) {
2467       ch = (UChar)ll8[i];
2468       tt[cftab[ch]] = i;
2469       cftab[ch]++;
2470    }
2471 
2472    /*--
2473       We recreate the original by subscripting L through T^(-1).
2474       The run-length-decoder below requires characters incrementally,
2475       so tPos is set to a starting value, and is updated by
2476       the GET_FAST macro.
2477    --*/
2478    tPos   = tt[origPtr];
2479 
2480    /*-------------------------------------------------*/
2481    /*--
2482       This is pretty much a verbatim copy of the
2483       run-length decoder present in the distribution
2484       bzip-0.21; it has to be here to avoid creating
2485       block[] as an intermediary structure.  As in 0.21,
2486       this code derives from some sent to me by
2487       Christian von Roques.
2488    --*/
2489    {
2490       IntNative retVal;
2491       Int32     i2, count, chPrev, ch2;
2492       UInt32    localCrc;
2493 
2494       count    = 0;
2495       i2       = 0;
2496       ch2      = 256;   /*-- not a char and not EOF --*/
2497       localCrc = getGlobalCRC();
2498 
2499       if (blockRandomised) {
2500          RAND_DECLS;
2501          while ( i2 <= last ) {
2502             chPrev = ch2;
2503             GET_FAST(ch2);
2504             RAND_UPD_MASK;
2505             ch2 ^= (UInt32)RAND_MASK;
2506             i2++;
2507 
2508             retVal = putc ( ch2, dst );
2509             UPDATE_CRC ( localCrc, (UChar)ch2 );
2510 
2511             if (ch2 != chPrev) {
2512                count = 1;
2513             } else {
2514                count++;
2515                if (count >= 4) {
2516                   Int32 j2;
2517                   UChar z;
2518                   GET_FAST(z);
2519                   RAND_UPD_MASK;
2520                   z ^= RAND_MASK;
2521                   for (j2 = 0;  j2 < (Int32)z;  j2++) {
2522                      retVal = putc (ch2, dst);
2523                      UPDATE_CRC ( localCrc, (UChar)ch2 );
2524                   }
2525                   i2++;
2526                   count = 0;
2527                }
2528             }
2529          }
2530 
2531       } else {
2532 
2533          while ( i2 <= last ) {
2534             chPrev = ch2;
2535             GET_FAST(ch2);
2536             i2++;
2537 
2538             retVal = putc ( ch2, dst );
2539             UPDATE_CRC ( localCrc, (UChar)ch2 );
2540 
2541             if (ch2 != chPrev) {
2542                count = 1;
2543             } else {
2544                count++;
2545                if (count >= 4) {
2546                   Int32 j2;
2547                   UChar z;
2548                   GET_FAST(z);
2549                   for (j2 = 0;  j2 < (Int32)z;  j2++) {
2550                      retVal = putc (ch2, dst);
2551                      UPDATE_CRC ( localCrc, (UChar)ch2 );
2552                   }
2553                   i2++;
2554                   count = 0;
2555                }
2556             }
2557          }
2558 
2559       }   /*-- if (blockRandomised) --*/
2560 
2561       setGlobalCRC ( localCrc );
2562    }
2563    /*-- end of the in-line run-length-decoder. --*/
2564 }
2565 #undef GET_FAST
2566 
2567 
2568 /*---------------------------------------------------*/
2569 /*--- The block loader and RLEr                   ---*/
2570 /*---------------------------------------------------*/
2571 
2572 /*---------------------------------------------*/
2573 /*  Top 16:   run length, 1 to 255.
2574 *   Lower 16: the char, or MY_EOF for EOF.
2575 */
2576 
2577 #define MY_EOF 257
2578 
getRLEpair(FILE * src)2579 INLINE Int32 getRLEpair ( FILE* src )
2580 {
2581    Int32     runLength;
2582    IntNative ch, chLatest;
2583 
2584    ch = getc ( src );
2585 
2586    /*--- Because I have no idea what kind of a value EOF is. ---*/
2587    if (ch == EOF) {
2588       ERROR_IF_NOT_ZERO ( ferror(src));
2589       return (1 << 16) | MY_EOF;
2590    }
2591 
2592    runLength = 0;
2593    do {
2594       chLatest = getc ( src );
2595       runLength++;
2596       bytesIn++;
2597    }
2598       while (ch == chLatest && runLength < 255);
2599 
2600    if ( chLatest != EOF ) {
2601       if ( ungetc ( chLatest, src ) == EOF )
2602          panic ( "getRLEpair: ungetc failed" );
2603    } else {
2604       ERROR_IF_NOT_ZERO ( ferror(src) );
2605    }
2606 
2607    /*--- Conditional is just a speedup hack. ---*/
2608    if (runLength == 1) {
2609       UPDATE_CRC ( globalCrc, (UChar)ch );
2610       return (1 << 16) | ch;
2611    } else {
2612       Int32 i;
2613       for (i = 1; i <= runLength; i++)
2614          UPDATE_CRC ( globalCrc, (UChar)ch );
2615       return (runLength << 16) | ch;
2616    }
2617 }
2618 
2619 
2620 /*---------------------------------------------*/
loadAndRLEsource(FILE * src)2621 void loadAndRLEsource ( FILE* src )
2622 {
2623    Int32 ch, allowableBlockSize, i;
2624 
2625    last = -1;
2626    ch   = 0;
2627 
2628    for (i = 0; i < 256; i++) inUse[i] = False;
2629 
2630    /*--- 20 is just a paranoia constant ---*/
2631    allowableBlockSize = 100000 * blockSize100k - 20;
2632 
2633    while (last < allowableBlockSize && ch != MY_EOF) {
2634       Int32 rlePair, runLen;
2635       rlePair = getRLEpair ( src );
2636       ch      = rlePair & 0xFFFF;
2637       runLen  = (UInt32)rlePair >> 16;
2638 
2639       #if DEBUG
2640          assert (runLen >= 1 && runLen <= 255);
2641       #endif
2642 
2643       if (ch != MY_EOF) {
2644          inUse[ch] = True;
2645          switch (runLen) {
2646             case 1:
2647                last++; block[last] = (UChar)ch; break;
2648             case 2:
2649                last++; block[last] = (UChar)ch;
2650                last++; block[last] = (UChar)ch; break;
2651             case 3:
2652                last++; block[last] = (UChar)ch;
2653                last++; block[last] = (UChar)ch;
2654                last++; block[last] = (UChar)ch; break;
2655             default:
2656                inUse[runLen-4] = True;
2657                last++; block[last] = (UChar)ch;
2658                last++; block[last] = (UChar)ch;
2659                last++; block[last] = (UChar)ch;
2660                last++; block[last] = (UChar)ch;
2661                last++; block[last] = (UChar)(runLen-4); break;
2662          }
2663       }
2664    }
2665 }
2666 
2667 
2668 /*---------------------------------------------------*/
2669 /*--- Processing of complete files and streams    ---*/
2670 /*---------------------------------------------------*/
2671 
2672 /*---------------------------------------------*/
compressStream(FILE * stream,FILE * zStream)2673 void compressStream ( FILE *stream, FILE *zStream )
2674 {
2675    IntNative  retVal;
2676    UInt32     blockCRC, combinedCRC;
2677    Int32      blockNo;
2678 
2679    blockNo  = 0;
2680    bytesIn  = 0;
2681    bytesOut = 0;
2682    nBlocksRandomised = 0;
2683 
2684    SET_BINARY_MODE(stream);
2685    SET_BINARY_MODE(zStream);
2686 
2687    ERROR_IF_NOT_ZERO ( ferror(stream) );
2688    ERROR_IF_NOT_ZERO ( ferror(zStream) );
2689 
2690    bsSetStream ( zStream, True );
2691 
2692    /*--- Write `magic' bytes B and Z,
2693          then h indicating file-format == huffmanised,
2694          followed by a digit indicating blockSize100k.
2695    ---*/
2696    bsPutUChar ( 'B' );
2697    bsPutUChar ( 'Z' );
2698    bsPutUChar ( 'h' );
2699    bsPutUChar ( '0' + blockSize100k );
2700 
2701    combinedCRC = 0;
2702 
2703    if (verbosity >= 2) fprintf ( stderr, "\n" );
2704 
2705    while (True) {
2706 
2707       blockNo++;
2708       initialiseCRC ();
2709       loadAndRLEsource ( stream );
2710       ERROR_IF_NOT_ZERO ( ferror(stream) );
2711       if (last == -1) break;
2712 
2713       blockCRC = getFinalCRC ();
2714       combinedCRC = (combinedCRC << 1) | (combinedCRC >> 31);
2715       combinedCRC ^= blockCRC;
2716 
2717       if (verbosity >= 2)
2718          fprintf ( stderr, "    block %d: crc = 0x%8x, combined CRC = 0x%8x, size = %d",
2719                            blockNo, blockCRC, combinedCRC, last+1 );
2720 
2721       /*-- sort the block and establish posn of original string --*/
2722       doReversibleTransformation ();
2723 
2724       /*--
2725         A 6-byte block header, the value chosen arbitrarily
2726         as 0x314159265359 :-).  A 32 bit value does not really
2727         give a strong enough guarantee that the value will not
2728         appear by chance in the compressed datastream.  Worst-case
2729         probability of this event, for a 900k block, is about
2730         2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
2731         For a compressed file of size 100Gb -- about 100000 blocks --
2732         only a 48-bit marker will do.  NB: normal compression/
2733         decompression do *not* rely on these statistical properties.
2734         They are only important when trying to recover blocks from
2735         damaged files.
2736       --*/
2737       bsPutUChar ( 0x31 ); bsPutUChar ( 0x41 );
2738       bsPutUChar ( 0x59 ); bsPutUChar ( 0x26 );
2739       bsPutUChar ( 0x53 ); bsPutUChar ( 0x59 );
2740 
2741       /*-- Now the block's CRC, so it is in a known place. --*/
2742       bsPutUInt32 ( blockCRC );
2743 
2744       /*-- Now a single bit indicating randomisation. --*/
2745       if (blockRandomised) {
2746          bsW(1,1); nBlocksRandomised++;
2747       } else
2748          bsW(1,0);
2749 
2750       /*-- Finally, block's contents proper. --*/
2751       moveToFrontCodeAndSend ();
2752 
2753       ERROR_IF_NOT_ZERO ( ferror(zStream) );
2754    }
2755 
2756    if (verbosity >= 2 && nBlocksRandomised > 0)
2757       fprintf ( stderr, "    %d block%s needed randomisation\n",
2758                         nBlocksRandomised,
2759                         nBlocksRandomised == 1 ? "" : "s" );
2760 
2761    /*--
2762       Now another magic 48-bit number, 0x177245385090, to
2763       indicate the end of the last block.  (sqrt(pi), if
2764       you want to know.  I did want to use e, but it contains
2765       too much repetition -- 27 18 28 18 28 46 -- for me
2766       to feel statistically comfortable.  Call me paranoid.)
2767    --*/
2768 
2769    bsPutUChar ( 0x17 ); bsPutUChar ( 0x72 );
2770    bsPutUChar ( 0x45 ); bsPutUChar ( 0x38 );
2771    bsPutUChar ( 0x50 ); bsPutUChar ( 0x90 );
2772 
2773    bsPutUInt32 ( combinedCRC );
2774    if (verbosity >= 2)
2775       fprintf ( stderr, "    final combined CRC = 0x%x\n   ", combinedCRC );
2776 
2777    /*-- Close the files in an utterly paranoid way. --*/
2778    bsFinishedWithStream ();
2779 
2780    ERROR_IF_NOT_ZERO ( ferror(zStream) );
2781    retVal = fflush ( zStream );
2782    ERROR_IF_EOF ( retVal );
2783    retVal = fclose ( zStream );
2784    ERROR_IF_EOF ( retVal );
2785 
2786    ERROR_IF_NOT_ZERO ( ferror(stream) );
2787    retVal = fclose ( stream );
2788    ERROR_IF_EOF ( retVal );
2789 
2790    if (bytesIn == 0) bytesIn = 1;
2791    if (bytesOut == 0) bytesOut = 1;
2792 
2793    if (verbosity >= 1)
2794       fprintf ( stderr, "%6.3f:1, %6.3f bits/byte, "
2795                         "%5.2f%% saved, %d in, %d out.\n",
2796                 (float)bytesIn / (float)bytesOut,
2797                 (8.0 * (float)bytesOut) / (float)bytesIn,
2798                 100.0 * (1.0 - (float)bytesOut / (float)bytesIn),
2799                 bytesIn,
2800                 bytesOut
2801               );
2802 }
2803 
2804 
2805 /*---------------------------------------------*/
uncompressStream(FILE * zStream,FILE * stream)2806 Bool uncompressStream ( FILE *zStream, FILE *stream )
2807 {
2808    UChar      magic1, magic2, magic3, magic4;
2809    UChar      magic5, magic6;
2810    UInt32     storedBlockCRC, storedCombinedCRC;
2811    UInt32     computedBlockCRC, computedCombinedCRC;
2812    Int32      currBlockNo;
2813    IntNative  retVal;
2814 
2815    SET_BINARY_MODE(stream);
2816    SET_BINARY_MODE(zStream);
2817 
2818    ERROR_IF_NOT_ZERO ( ferror(stream) );
2819    ERROR_IF_NOT_ZERO ( ferror(zStream) );
2820 
2821    bsSetStream ( zStream, False );
2822 
2823    /*--
2824       A bad magic number is `recoverable from';
2825       return with False so the caller skips the file.
2826    --*/
2827    magic1 = bsGetUChar ();
2828    magic2 = bsGetUChar ();
2829    magic3 = bsGetUChar ();
2830    magic4 = bsGetUChar ();
2831    if (magic1 != 'B' ||
2832        magic2 != 'Z' ||
2833        magic3 != 'h' ||
2834        magic4 < '1'  ||
2835        magic4 > '9') {
2836      bsFinishedWithStream();
2837      retVal = fclose ( stream );
2838      ERROR_IF_EOF ( retVal );
2839      return False;
2840    }
2841 
2842    setDecompressStructureSizes ( magic4 - '0' );
2843    computedCombinedCRC = 0;
2844 
2845    if (verbosity >= 2) fprintf ( stderr, "\n    " );
2846    currBlockNo = 0;
2847 
2848    while (True) {
2849       magic1 = bsGetUChar ();
2850       magic2 = bsGetUChar ();
2851       magic3 = bsGetUChar ();
2852       magic4 = bsGetUChar ();
2853       magic5 = bsGetUChar ();
2854       magic6 = bsGetUChar ();
2855       if (magic1 == 0x17 && magic2 == 0x72 &&
2856           magic3 == 0x45 && magic4 == 0x38 &&
2857           magic5 == 0x50 && magic6 == 0x90) break;
2858 
2859       if (magic1 != 0x31 || magic2 != 0x41 ||
2860           magic3 != 0x59 || magic4 != 0x26 ||
2861           magic5 != 0x53 || magic6 != 0x59) badBlockHeader();
2862 
2863       storedBlockCRC = bsGetUInt32 ();
2864 
2865       if (bsR(1) == 1)
2866          blockRandomised = True; else
2867          blockRandomised = False;
2868 
2869       currBlockNo++;
2870       if (verbosity >= 2)
2871          fprintf ( stderr, "[%d: huff+mtf ", currBlockNo );
2872       getAndMoveToFrontDecode ();
2873       ERROR_IF_NOT_ZERO ( ferror(zStream) );
2874 
2875       initialiseCRC();
2876       if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
2877       if (smallMode)
2878          undoReversibleTransformation_small ( stream );
2879          else
2880          undoReversibleTransformation_fast  ( stream );
2881 
2882       ERROR_IF_NOT_ZERO ( ferror(stream) );
2883 
2884       computedBlockCRC = getFinalCRC();
2885       if (verbosity >= 3)
2886          fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
2887       if (verbosity >= 2) fprintf ( stderr, "] " );
2888 
2889       /*-- A bad CRC is considered a fatal error. --*/
2890       if (storedBlockCRC != computedBlockCRC)
2891          crcError ( storedBlockCRC, computedBlockCRC );
2892 
2893       computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
2894       computedCombinedCRC ^= computedBlockCRC;
2895    };
2896 
2897    if (verbosity >= 2) fprintf ( stderr, "\n    " );
2898 
2899    storedCombinedCRC  = bsGetUInt32 ();
2900    if (verbosity >= 2)
2901       fprintf ( stderr,
2902                 "combined CRCs: stored = 0x%x, computed = 0x%x\n    ",
2903                 storedCombinedCRC, computedCombinedCRC );
2904    if (storedCombinedCRC != computedCombinedCRC)
2905       crcError ( storedCombinedCRC, computedCombinedCRC );
2906 
2907 
2908    bsFinishedWithStream ();
2909    ERROR_IF_NOT_ZERO ( ferror(zStream) );
2910    retVal = fclose ( zStream );
2911    ERROR_IF_EOF ( retVal );
2912 
2913    ERROR_IF_NOT_ZERO ( ferror(stream) );
2914    retVal = fflush ( stream );
2915    ERROR_IF_NOT_ZERO ( retVal );
2916    if (stream != stdout) {
2917       retVal = fclose ( stream );
2918       ERROR_IF_EOF ( retVal );
2919    }
2920    return True;
2921 }
2922 
2923 
2924 /*---------------------------------------------*/
testStream(FILE * zStream)2925 Bool testStream ( FILE *zStream )
2926 {
2927    UChar      magic1, magic2, magic3, magic4;
2928    UChar      magic5, magic6;
2929    UInt32     storedBlockCRC, storedCombinedCRC;
2930    UInt32     computedBlockCRC, computedCombinedCRC;
2931    Int32      currBlockNo;
2932    IntNative  retVal;
2933 
2934    SET_BINARY_MODE(zStream);
2935    ERROR_IF_NOT_ZERO ( ferror(zStream) );
2936 
2937    bsSetStream ( zStream, False );
2938 
2939    magic1 = bsGetUChar ();
2940    magic2 = bsGetUChar ();
2941    magic3 = bsGetUChar ();
2942    magic4 = bsGetUChar ();
2943    if (magic1 != 'B' ||
2944        magic2 != 'Z' ||
2945        magic3 != 'h' ||
2946        magic4 < '1'  ||
2947        magic4 > '9') {
2948      bsFinishedWithStream();
2949      fclose ( zStream );
2950      fprintf ( stderr, "\n%s: bad magic number (ie, not created by bzip2)\n",
2951                        inName );
2952      return False;
2953    }
2954 
2955    smallMode = True;
2956    setDecompressStructureSizes ( magic4 - '0' );
2957    computedCombinedCRC = 0;
2958 
2959    if (verbosity >= 2) fprintf ( stderr, "\n" );
2960    currBlockNo = 0;
2961 
2962    while (True) {
2963       magic1 = bsGetUChar ();
2964       magic2 = bsGetUChar ();
2965       magic3 = bsGetUChar ();
2966       magic4 = bsGetUChar ();
2967       magic5 = bsGetUChar ();
2968       magic6 = bsGetUChar ();
2969       if (magic1 == 0x17 && magic2 == 0x72 &&
2970           magic3 == 0x45 && magic4 == 0x38 &&
2971           magic5 == 0x50 && magic6 == 0x90) break;
2972 
2973       currBlockNo++;
2974       if (magic1 != 0x31 || magic2 != 0x41 ||
2975           magic3 != 0x59 || magic4 != 0x26 ||
2976           magic5 != 0x53 || magic6 != 0x59) {
2977          bsFinishedWithStream();
2978          fclose ( zStream );
2979          fprintf ( stderr,
2980                    "\n%s, block %d: bad header (not == 0x314159265359)\n",
2981                    inName, currBlockNo );
2982          return False;
2983       }
2984       storedBlockCRC = bsGetUInt32 ();
2985 
2986       if (bsR(1) == 1)
2987          blockRandomised = True; else
2988          blockRandomised = False;
2989 
2990       if (verbosity >= 2)
2991          fprintf ( stderr, "    block [%d: huff+mtf ", currBlockNo );
2992       getAndMoveToFrontDecode ();
2993       ERROR_IF_NOT_ZERO ( ferror(zStream) );
2994 
2995       initialiseCRC();
2996       if (verbosity >= 2) fprintf ( stderr, "rt+rld" );
2997       undoReversibleTransformation_small ( NULL );
2998 
2999       computedBlockCRC = getFinalCRC();
3000       if (verbosity >= 3)
3001          fprintf ( stderr, " {0x%x, 0x%x}", storedBlockCRC, computedBlockCRC );
3002       if (verbosity >= 2) fprintf ( stderr, "] " );
3003 
3004       if (storedBlockCRC != computedBlockCRC) {
3005          bsFinishedWithStream();
3006          fclose ( zStream );
3007          fprintf ( stderr, "\n%s, block %d: computed CRC does not match stored one\n",
3008                            inName, currBlockNo );
3009          return False;
3010       }
3011 
3012       if (verbosity >= 2) fprintf ( stderr, "ok\n" );
3013       computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >> 31);
3014       computedCombinedCRC ^= computedBlockCRC;
3015    };
3016 
3017    storedCombinedCRC  = bsGetUInt32 ();
3018    if (verbosity >= 2)
3019       fprintf ( stderr,
3020                 "    combined CRCs: stored = 0x%x, computed = 0x%x\n    ",
3021                 storedCombinedCRC, computedCombinedCRC );
3022    if (storedCombinedCRC != computedCombinedCRC) {
3023       bsFinishedWithStream();
3024       fclose ( zStream );
3025       fprintf ( stderr, "\n%s: computed CRC does not match stored one\n",
3026                         inName );
3027       return False;
3028    }
3029 
3030    bsFinishedWithStream ();
3031    ERROR_IF_NOT_ZERO ( ferror(zStream) );
3032    retVal = fclose ( zStream );
3033    ERROR_IF_EOF ( retVal );
3034    return True;
3035 }
3036 
3037 
3038 
3039 /*---------------------------------------------------*/
3040 /*--- Error [non-] handling grunge                ---*/
3041 /*---------------------------------------------------*/
3042 
3043 /*---------------------------------------------*/
cadvise(void)3044 void cadvise ( void )
3045 {
3046    fprintf (
3047       stderr,
3048       "\nIt is possible that the compressed file(s) have become corrupted.\n"
3049         "You can use the -tvv option to test integrity of such files.\n\n"
3050         "You can use the `bzip2recover' program to *attempt* to recover\n"
3051         "data from undamaged sections of corrupted files.\n\n"
3052     );
3053 }
3054 
3055 
3056 /*---------------------------------------------*/
showFileNames(void)3057 void showFileNames ( void )
3058 {
3059    fprintf (
3060       stderr,
3061       "\tInput file = %s, output file = %s\n",
3062       inName==NULL  ? "(null)" : inName,
3063       outName==NULL ? "(null)" : outName
3064    );
3065 }
3066 
3067 
3068 /*---------------------------------------------*/
cleanUpAndFail(Int32 ec)3069 void cleanUpAndFail ( Int32 ec )
3070 {
3071    IntNative retVal;
3072 
3073    if ( srcMode == SM_F2F && opMode != OM_TEST ) {
3074       fprintf ( stderr, "%s: Deleting output file %s, if it exists.\n",
3075                 progName,
3076                 outName==NULL ? "(null)" : outName );
3077       if (outputHandleJustInCase != NULL)
3078          fclose ( outputHandleJustInCase );
3079       retVal = remove ( outName );
3080       if (retVal != 0)
3081          fprintf ( stderr,
3082                    "%s: WARNING: deletion of output file (apparently) failed.\n",
3083                    progName );
3084    }
3085    if (numFileNames > 0 && numFilesProcessed < numFileNames) {
3086       fprintf ( stderr,
3087                 "%s: WARNING: some files have not been processed:\n"
3088                 "\t%d specified on command line, %d not processed yet.\n\n",
3089                 progName, numFileNames,
3090                           numFileNames - numFilesProcessed );
3091    }
3092    exit ( ec );
3093 }
3094 
3095 
3096 /*---------------------------------------------*/
panic(Char * s)3097 void panic ( Char* s )
3098 {
3099    fprintf ( stderr,
3100              "\n%s: PANIC -- internal consistency error:\n"
3101              "\t%s\n"
3102              "\tThis is a BUG.  Please report it to me at:\n"
3103              "\tjseward@acm.org\n",
3104              progName, s );
3105    showFileNames();
3106    cleanUpAndFail( 3 );
3107 }
3108 
3109 
3110 /*---------------------------------------------*/
badBGLengths(void)3111 void badBGLengths ( void )
3112 {
3113    fprintf ( stderr,
3114              "\n%s: error when reading background model code lengths,\n"
3115              "\twhich probably means the compressed file is corrupted.\n",
3116              progName );
3117    showFileNames();
3118    cadvise();
3119    cleanUpAndFail( 2 );
3120 }
3121 
3122 
3123 /*---------------------------------------------*/
crcError(UInt32 crcStored,UInt32 crcComputed)3124 void crcError ( UInt32 crcStored, UInt32 crcComputed )
3125 {
3126    fprintf ( stderr,
3127              "\n%s: Data integrity error when decompressing.\n"
3128              "\tStored CRC = 0x%x, computed CRC = 0x%x\n",
3129              progName, crcStored, crcComputed );
3130    showFileNames();
3131    cadvise();
3132    cleanUpAndFail( 2 );
3133 }
3134 
3135 
3136 /*---------------------------------------------*/
compressedStreamEOF(void)3137 void compressedStreamEOF ( void )
3138 {
3139    fprintf ( stderr,
3140              "\n%s: Compressed file ends unexpectedly;\n\t"
3141              "perhaps it is corrupted?  *Possible* reason follows.\n",
3142              progName );
3143    perror ( progName );
3144    showFileNames();
3145    cadvise();
3146    cleanUpAndFail( 2 );
3147 }
3148 
3149 
3150 /*---------------------------------------------*/
ioError()3151 void ioError ( )
3152 {
3153    fprintf ( stderr,
3154              "\n%s: I/O or other error, bailing out.  Possible reason follows.\n",
3155              progName );
3156    perror ( progName );
3157    showFileNames();
3158    cleanUpAndFail( 1 );
3159 }
3160 
3161 
3162 /*---------------------------------------------*/
blockOverrun()3163 void blockOverrun ()
3164 {
3165    fprintf ( stderr,
3166              "\n%s: block overrun during decompression,\n"
3167              "\twhich probably means the compressed file\n"
3168              "\tis corrupted.\n",
3169              progName );
3170    showFileNames();
3171    cadvise();
3172    cleanUpAndFail( 2 );
3173 }
3174 
3175 
3176 /*---------------------------------------------*/
badBlockHeader()3177 void badBlockHeader ()
3178 {
3179    fprintf ( stderr,
3180              "\n%s: bad block header in the compressed file,\n"
3181              "\twhich probably means it is corrupted.\n",
3182              progName );
3183    showFileNames();
3184    cadvise();
3185    cleanUpAndFail( 2 );
3186 }
3187 
3188 
3189 /*---------------------------------------------*/
bitStreamEOF()3190 void bitStreamEOF ()
3191 {
3192    fprintf ( stderr,
3193              "\n%s: read past the end of compressed data,\n"
3194              "\twhich probably means it is corrupted.\n",
3195              progName );
3196    showFileNames();
3197    cadvise();
3198    cleanUpAndFail( 2 );
3199 }
3200 
3201 
3202 /*---------------------------------------------*/
mySignalCatcher(IntNative n)3203 void mySignalCatcher ( IntNative n )
3204 {
3205    fprintf ( stderr,
3206              "\n%s: Control-C (or similar) caught, quitting.\n",
3207              progName );
3208    cleanUpAndFail(1);
3209 }
3210 
3211 
3212 /*---------------------------------------------*/
mySIGSEGVorSIGBUScatcher(IntNative n)3213 void mySIGSEGVorSIGBUScatcher ( IntNative n )
3214 {
3215    if (opMode == OM_Z)
3216       fprintf ( stderr,
3217                 "\n%s: Caught a SIGSEGV or SIGBUS whilst compressing,\n"
3218                 "\twhich probably indicates a bug in bzip2.  Please\n"
3219                 "\treport it to me at: jseward@acm.org\n",
3220                 progName );
3221       else
3222       fprintf ( stderr,
3223                 "\n%s: Caught a SIGSEGV or SIGBUS whilst decompressing,\n"
3224                 "\twhich probably indicates that the compressed data\n"
3225                 "\tis corrupted.\n",
3226                 progName );
3227 
3228    showFileNames();
3229    if (opMode == OM_Z)
3230       cleanUpAndFail( 3 ); else
3231       { cadvise(); cleanUpAndFail( 2 ); }
3232 }
3233 
3234 
3235 /*---------------------------------------------*/
uncompressOutOfMemory(Int32 draw,Int32 blockSize)3236 void uncompressOutOfMemory ( Int32 draw, Int32 blockSize )
3237 {
3238    fprintf ( stderr,
3239              "\n%s: Can't allocate enough memory for decompression.\n"
3240              "\tRequested %d bytes for a block size of %d.\n"
3241              "\tTry selecting space-economic decompress (with flag -s)\n"
3242              "\tand failing that, find a machine with more memory.\n",
3243              progName, draw, blockSize );
3244    showFileNames();
3245    cleanUpAndFail(1);
3246 }
3247 
3248 
3249 /*---------------------------------------------*/
compressOutOfMemory(Int32 draw,Int32 blockSize)3250 void compressOutOfMemory ( Int32 draw, Int32 blockSize )
3251 {
3252    fprintf ( stderr,
3253              "\n%s: Can't allocate enough memory for compression.\n"
3254              "\tRequested %d bytes for a block size of %d.\n"
3255              "\tTry selecting a small block size (with flag -s).\n",
3256              progName, draw, blockSize );
3257    showFileNames();
3258    cleanUpAndFail(1);
3259 }
3260 
3261 
3262 /*---------------------------------------------------*/
3263 /*--- The main driver machinery                   ---*/
3264 /*---------------------------------------------------*/
3265 
3266 /*---------------------------------------------*/
pad(Char * s)3267 void pad ( Char *s )
3268 {
3269    Int32 i;
3270    if ( (Int32)strlen(s) >= longestFileName ) return;
3271    for (i = 1; i <= longestFileName - (Int32)strlen(s); i++)
3272       fprintf ( stderr, " " );
3273 }
3274 
3275 
3276 /*---------------------------------------------*/
fileExists(Char * name)3277 Bool fileExists ( Char* name )
3278 {
3279    FILE *tmp   = fopen ( name, "rb" );
3280    Bool exists = (tmp != NULL);
3281    if (tmp != NULL) fclose ( tmp );
3282    return exists;
3283 }
3284 
3285 
3286 /*---------------------------------------------*/
3287 /*--
3288   if in doubt, return True
3289 --*/
notABogStandardFile(Char * name)3290 Bool notABogStandardFile ( Char* name )
3291 {
3292    IntNative      i;
3293    struct MY_STAT statBuf;
3294 
3295    i = MY_LSTAT ( name, &statBuf );
3296    if (i != 0) return True;
3297    if (MY_S_IFREG(statBuf.st_mode)) return False;
3298    return True;
3299 }
3300 
3301 
3302 /*---------------------------------------------*/
copyDateAndPermissions(Char * srcName,Char * dstName)3303 void copyDateAndPermissions ( Char *srcName, Char *dstName )
3304 {
3305    #if BZ_UNIX
3306    IntNative      retVal;
3307    struct MY_STAT statBuf;
3308    struct utimbuf uTimBuf;
3309 
3310    retVal = MY_LSTAT ( srcName, &statBuf );
3311    ERROR_IF_NOT_ZERO ( retVal );
3312    uTimBuf.actime = statBuf.st_atime;
3313    uTimBuf.modtime = statBuf.st_mtime;
3314 
3315    retVal = chmod ( dstName, statBuf.st_mode );
3316    ERROR_IF_NOT_ZERO ( retVal );
3317    retVal = utime ( dstName, &uTimBuf );
3318    ERROR_IF_NOT_ZERO ( retVal );
3319    #endif
3320 }
3321 
3322 
3323 /*---------------------------------------------*/
endsInBz2(Char * name)3324 Bool endsInBz2 ( Char* name )
3325 {
3326    Int32 n = strlen ( name );
3327    if (n <= 4) return False;
3328    return
3329       (name[n-4] == '.' &&
3330        name[n-3] == 'b' &&
3331        name[n-2] == 'z' &&
3332        name[n-1] == '2');
3333 }
3334 
3335 
3336 /*---------------------------------------------*/
containsDubiousChars(Char * name)3337 Bool containsDubiousChars ( Char* name )
3338 {
3339    Bool cdc = False;
3340    for (; *name != '\0'; name++)
3341       if (*name == '?' || *name == '*') cdc = True;
3342    return cdc;
3343 }
3344 
3345 
3346 /*---------------------------------------------*/
compress(Char * name)3347 void compress ( Char *name )
3348 {
3349    FILE *inStr;
3350    FILE *outStr;
3351 
3352    if (name == NULL && srcMode != SM_I2O)
3353       panic ( "compress: bad modes\n" );
3354 
3355    switch (srcMode) {
3356       case SM_I2O: strcpy ( inName, "(stdin)" );
3357                    strcpy ( outName, "(stdout)" ); break;
3358       case SM_F2F: strcpy ( inName, name );
3359                    strcpy ( outName, name );
3360                    strcat ( outName, ".bz2" ); break;
3361       case SM_F2O: strcpy ( inName, name );
3362                    strcpy ( outName, "(stdout)" ); break;
3363    }
3364 
3365    if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
3366       fprintf ( stderr, "%s: There are no files matching `%s'.\n",
3367       progName, inName );
3368       return;
3369    }
3370    if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
3371       fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
3372                 progName, inName );
3373       return;
3374    }
3375    if ( srcMode != SM_I2O && endsInBz2 ( inName )) {
3376       fprintf ( stderr, "%s: Input file name %s ends in `.bz2', skipping.\n",
3377                 progName, inName );
3378       return;
3379    }
3380    if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
3381       fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3382                 progName, inName );
3383       return;
3384    }
3385    if ( srcMode == SM_F2F && fileExists ( outName ) ) {
3386       fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
3387                 progName, outName );
3388       return;
3389    }
3390 
3391    switch ( srcMode ) {
3392 
3393       case SM_I2O:
3394          inStr = stdin;
3395          outStr = stdout;
3396          if ( isatty ( fileno ( stdout ) ) ) {
3397             fprintf ( stderr,
3398                       "%s: I won't write compressed data to a terminal.\n",
3399                       progName );
3400             fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3401                               progName, progName );
3402             return;
3403          };
3404          break;
3405 
3406       case SM_F2O:
3407          inStr = fopen ( inName, "rb" );
3408          outStr = stdout;
3409          if ( isatty ( fileno ( stdout ) ) ) {
3410             fprintf ( stderr,
3411                       "%s: I won't write compressed data to a terminal.\n",
3412                       progName );
3413             fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3414                               progName, progName );
3415             return;
3416          };
3417          if ( inStr == NULL ) {
3418             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3419                       progName, inName );
3420             return;
3421          };
3422          break;
3423 
3424       case SM_F2F:
3425          inStr = fopen ( inName, "rb" );
3426          outStr = fopen ( outName, "wb" );
3427          if ( outStr == NULL) {
3428             fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
3429                       progName, outName );
3430             return;
3431          }
3432          if ( inStr == NULL ) {
3433             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3434                       progName, inName );
3435             return;
3436          };
3437          break;
3438 
3439       default:
3440          panic ( "compress: bad srcMode" );
3441          break;
3442    }
3443 
3444    if (verbosity >= 1) {
3445       fprintf ( stderr,  "  %s: ", inName );
3446       pad ( inName );
3447       fflush ( stderr );
3448    }
3449 
3450    /*--- Now the input and output handles are sane.  Do the Biz. ---*/
3451    outputHandleJustInCase = outStr;
3452    compressStream ( inStr, outStr );
3453    outputHandleJustInCase = NULL;
3454 
3455    /*--- If there was an I/O error, we won't get here. ---*/
3456    if ( srcMode == SM_F2F ) {
3457       copyDateAndPermissions ( inName, outName );
3458       if ( !keepInputFiles ) {
3459          IntNative retVal = remove ( inName );
3460          ERROR_IF_NOT_ZERO ( retVal );
3461       }
3462    }
3463 }
3464 
3465 
3466 /*---------------------------------------------*/
uncompress(Char * name)3467 void uncompress ( Char *name )
3468 {
3469    FILE *inStr;
3470    FILE *outStr;
3471    Bool magicNumberOK;
3472 
3473    if (name == NULL && srcMode != SM_I2O)
3474       panic ( "uncompress: bad modes\n" );
3475 
3476    switch (srcMode) {
3477       case SM_I2O: strcpy ( inName, "(stdin)" );
3478                    strcpy ( outName, "(stdout)" ); break;
3479       case SM_F2F: strcpy ( inName, name );
3480                    strcpy ( outName, name );
3481                    if (endsInBz2 ( outName ))
3482                       outName [ strlen ( outName ) - 4 ] = '\0';
3483                    break;
3484       case SM_F2O: strcpy ( inName, name );
3485                    strcpy ( outName, "(stdout)" ); break;
3486    }
3487 
3488    if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
3489       fprintf ( stderr, "%s: There are no files matching `%s'.\n",
3490                 progName, inName );
3491       return;
3492    }
3493    if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
3494       fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
3495                 progName, inName );
3496       return;
3497    }
3498    if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
3499       fprintf ( stderr,
3500                 "%s: Input file name %s doesn't end in `.bz2', skipping.\n",
3501                 progName, inName );
3502       return;
3503    }
3504    if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
3505       fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3506                 progName, inName );
3507       return;
3508    }
3509    if ( srcMode == SM_F2F && fileExists ( outName ) ) {
3510       fprintf ( stderr, "%s: Output file %s already exists, skipping.\n",
3511                 progName, outName );
3512       return;
3513    }
3514 
3515    switch ( srcMode ) {
3516 
3517       case SM_I2O:
3518          inStr = stdin;
3519          outStr = stdout;
3520          if ( isatty ( fileno ( stdin ) ) ) {
3521             fprintf ( stderr,
3522                       "%s: I won't read compressed data from a terminal.\n",
3523                       progName );
3524             fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3525                               progName, progName );
3526             return;
3527          };
3528          break;
3529 
3530       case SM_F2O:
3531          inStr = fopen ( inName, "rb" );
3532          outStr = stdout;
3533          if ( inStr == NULL ) {
3534             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3535                       progName, inName );
3536             return;
3537          };
3538          break;
3539 
3540       case SM_F2F:
3541          inStr = fopen ( inName, "rb" );
3542          outStr = fopen ( outName, "wb" );
3543          if ( outStr == NULL) {
3544             fprintf ( stderr, "%s: Can't create output file %s, skipping.\n",
3545                       progName, outName );
3546             return;
3547          }
3548          if ( inStr == NULL ) {
3549             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3550                       progName, inName );
3551             return;
3552          };
3553          break;
3554 
3555       default:
3556          panic ( "uncompress: bad srcMode" );
3557          break;
3558    }
3559 
3560    if (verbosity >= 1) {
3561       fprintf ( stderr, "  %s: ", inName );
3562       pad ( inName );
3563       fflush ( stderr );
3564    }
3565 
3566    /*--- Now the input and output handles are sane.  Do the Biz. ---*/
3567    outputHandleJustInCase = outStr;
3568    magicNumberOK = uncompressStream ( inStr, outStr );
3569    outputHandleJustInCase = NULL;
3570 
3571    /*--- If there was an I/O error, we won't get here. ---*/
3572    if ( magicNumberOK ) {
3573       if ( srcMode == SM_F2F ) {
3574          copyDateAndPermissions ( inName, outName );
3575          if ( !keepInputFiles ) {
3576             IntNative retVal = remove ( inName );
3577             ERROR_IF_NOT_ZERO ( retVal );
3578          }
3579       }
3580    } else {
3581       if ( srcMode == SM_F2F ) {
3582          IntNative retVal = remove ( outName );
3583          ERROR_IF_NOT_ZERO ( retVal );
3584       }
3585    }
3586 
3587    if ( magicNumberOK ) {
3588       if (verbosity >= 1)
3589          fprintf ( stderr, "done\n" );
3590    } else {
3591       if (verbosity >= 1)
3592          fprintf ( stderr, "not a bzip2 file, skipping.\n" ); else
3593          fprintf ( stderr,
3594                    "%s: %s is not a bzip2 file, skipping.\n",
3595                    progName, inName );
3596    }
3597 
3598 }
3599 
3600 
3601 /*---------------------------------------------*/
testf(Char * name)3602 void testf ( Char *name )
3603 {
3604    FILE *inStr;
3605    Bool allOK;
3606 
3607    if (name == NULL && srcMode != SM_I2O)
3608       panic ( "testf: bad modes\n" );
3609 
3610    strcpy ( outName, "(none)" );
3611    switch (srcMode) {
3612       case SM_I2O: strcpy ( inName, "(stdin)" ); break;
3613       case SM_F2F: strcpy ( inName, name ); break;
3614       case SM_F2O: strcpy ( inName, name ); break;
3615    }
3616 
3617    if ( srcMode != SM_I2O && containsDubiousChars ( inName ) ) {
3618       fprintf ( stderr, "%s: There are no files matching `%s'.\n",
3619                 progName, inName );
3620       return;
3621    }
3622    if ( srcMode != SM_I2O && !fileExists ( inName ) ) {
3623       fprintf ( stderr, "%s: Input file %s doesn't exist, skipping.\n",
3624                 progName, inName );
3625       return;
3626    }
3627    if ( srcMode != SM_I2O && !endsInBz2 ( inName )) {
3628       fprintf ( stderr,
3629                 "%s: Input file name %s doesn't end in `.bz2', skipping.\n",
3630                 progName, inName );
3631       return;
3632    }
3633    if ( srcMode != SM_I2O && notABogStandardFile ( inName )) {
3634       fprintf ( stderr, "%s: Input file %s is not a normal file, skipping.\n",
3635                 progName, inName );
3636       return;
3637    }
3638 
3639    switch ( srcMode ) {
3640 
3641       case SM_I2O:
3642          if ( isatty ( fileno ( stdin ) ) ) {
3643             fprintf ( stderr,
3644                       "%s: I won't read compressed data from a terminal.\n",
3645                       progName );
3646             fprintf ( stderr, "%s: For help, type: `%s --help'.\n",
3647                               progName, progName );
3648             return;
3649          };
3650          inStr = stdin;
3651          break;
3652 
3653       case SM_F2O: case SM_F2F:
3654          inStr = fopen ( inName, "rb" );
3655          if ( inStr == NULL ) {
3656             fprintf ( stderr, "%s: Can't open input file %s, skipping.\n",
3657                       progName, inName );
3658             return;
3659          };
3660          break;
3661 
3662       default:
3663          panic ( "testf: bad srcMode" );
3664          break;
3665    }
3666 
3667    if (verbosity >= 1) {
3668       fprintf ( stderr, "  %s: ", inName );
3669       pad ( inName );
3670       fflush ( stderr );
3671    }
3672 
3673    /*--- Now the input handle is sane.  Do the Biz. ---*/
3674    allOK = testStream ( inStr );
3675 
3676    if (allOK && verbosity >= 1) fprintf ( stderr, "ok\n" );
3677    if (!allOK) testFailsExist = True;
3678 }
3679 
3680 
3681 /*---------------------------------------------*/
license(void)3682 void license ( void )
3683 {
3684    fprintf ( stderr,
3685 
3686     "bzip2, a block-sorting file compressor.  "
3687     "Version 0.1pl2, 29-Aug-97.\n"
3688     "   \n"
3689     "   Copyright (C) 1996, 1997 by Julian Seward.\n"
3690     "   \n"
3691     "   This program is free software; you can redistribute it and/or modify\n"
3692     "   it under the terms of the GNU General Public License as published by\n"
3693     "   the Free Software Foundation; either version 2 of the License, or\n"
3694     "   (at your option) any later version.\n"
3695     "   \n"
3696     "   This program is distributed in the hope that it will be useful,\n"
3697     "   but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
3698     "   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n"
3699     "   GNU General Public License for more details.\n"
3700     "   \n"
3701     "   You should have received a copy of the GNU General Public License\n"
3702     "   along with this program; if not, write to the Free Software\n"
3703     "   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"
3704     "   \n"
3705     "   The GNU General Public License is contained in the file LICENSE.\n"
3706     "   \n"
3707    );
3708 }
3709 
3710 
3711 /*---------------------------------------------*/
usage(Char * fullProgName)3712 void usage ( Char *fullProgName )
3713 {
3714    fprintf (
3715       stderr,
3716       "bzip2, a block-sorting file compressor.  "
3717       "Version 0.1pl2, 29-Aug-97.\n"
3718       "\n   usage: %s [flags and input files in any order]\n"
3719       "\n"
3720       "   -h --help           print this message\n"
3721       "   -d --decompress     force decompression\n"
3722       "   -f --compress       force compression\n"
3723       "   -t --test           test compressed file integrity\n"
3724       "   -c --stdout         output to standard out\n"
3725       "   -v --verbose        be verbose (a 2nd -v gives more)\n"
3726       "   -k --keep           keep (don't delete) input files\n"
3727       "   -L --license        display software version & license\n"
3728       "   -V --version        display software version & license\n"
3729       "   -s --small          use less memory (at most 2500k)\n"
3730       "   -1 .. -9            set block size to 100k .. 900k\n"
3731       "   --repetitive-fast   compress repetitive blocks faster\n"
3732       "   --repetitive-best   compress repetitive blocks better\n"
3733       "\n"
3734       "   If invoked as `bzip2', the default action is to compress.\n"
3735       "              as `bunzip2', the default action is to decompress.\n"
3736       "\n"
3737       "   If no file names are given, bzip2 compresses or decompresses\n"
3738       "   from standard input to standard output.  You can combine\n"
3739       "   flags, so `-v -4' means the same as -v4 or -4v, &c.\n"
3740       #if BZ_UNIX
3741       "\n"
3742       #endif
3743       ,
3744 
3745       fullProgName
3746    );
3747 }
3748 
3749 
3750 /*---------------------------------------------*/
3751 /*--
3752   All the garbage from here to main() is purely to
3753   implement a linked list of command-line arguments,
3754   into which main() copies argv[1 .. argc-1].
3755 
3756   The purpose of this ridiculous exercise is to
3757   facilitate the expansion of wildcard characters
3758   * and ? in filenames for halfwitted OSs like
3759   MSDOS, Windows 95 and NT.
3760 
3761   The actual Dirty Work is done by the platform-specific
3762   macro APPEND_FILESPEC.
3763 --*/
3764 
3765 typedef
3766    struct zzzz {
3767       Char        *name;
3768       struct zzzz *link;
3769    }
3770    Cell;
3771 
3772 
3773 /*---------------------------------------------*/
myMalloc(Int32 n)3774 void *myMalloc ( Int32 n )
3775 {
3776    void* p;
3777 
3778    p = malloc ( (size_t)n );
3779    if (p == NULL) {
3780       fprintf (
3781          stderr,
3782          "%s: `malloc' failed on request for %d bytes.\n",
3783          progName, n
3784       );
3785       exit ( 1 );
3786    }
3787    return p;
3788 }
3789 
3790 
3791 /*---------------------------------------------*/
mkCell(void)3792 Cell *mkCell ( void )
3793 {
3794    Cell *c;
3795 
3796    c = (Cell*) myMalloc ( sizeof ( Cell ) );
3797    c->name = NULL;
3798    c->link = NULL;
3799    return c;
3800 }
3801 
3802 
3803 /*---------------------------------------------*/
snocString(Cell * root,Char * name)3804 Cell *snocString ( Cell *root, Char *name )
3805 {
3806    if (root == NULL) {
3807       Cell *tmp = mkCell();
3808       tmp->name = (Char*) myMalloc ( 5 + strlen(name) );
3809       strcpy ( tmp->name, name );
3810       return tmp;
3811    } else {
3812       Cell *tmp = root;
3813       while (tmp->link != NULL) tmp = tmp->link;
3814       tmp->link = snocString ( tmp->link, name );
3815       return root;
3816    }
3817 }
3818 
3819 
3820 
3821 /*---------------------------------------------*/
3822 #define ISFLAG(s) (strcmp(aa->name, (s))==0)
3823 
3824 
main(IntNative argc,Char * argv[])3825 IntNative main ( IntNative argc, Char *argv[] )
3826 {
3827    Int32  i, j;
3828    Char   *tmp;
3829    Cell   *argList;
3830    Cell   *aa;
3831 
3832 
3833    #if DEBUG
3834       fprintf ( stderr, "bzip2: *** compiled with debugging ON ***\n" );
3835    #endif
3836 
3837    /*-- Be really really really paranoid :-) --*/
3838    if (sizeof(Int32) != 4 || sizeof(UInt32) != 4  ||
3839        sizeof(Int16) != 2 || sizeof(UInt16) != 2  ||
3840        sizeof(Char)  != 1 || sizeof(UChar)  != 1) {
3841       fprintf ( stderr,
3842                 "bzip2: I'm not configured correctly for this platform!\n"
3843                 "\tI require Int32, Int16 and Char to have sizes\n"
3844                 "\tof 4, 2 and 1 bytes to run properly, and they don't.\n"
3845                 "\tProbably you can fix this by defining them correctly,\n"
3846                 "\tand recompiling.  Bye!\n" );
3847       exit(1);
3848    }
3849 
3850 
3851    /*-- Set up signal handlers --*/
3852    signal (SIGINT,  mySignalCatcher);
3853    signal (SIGTERM, mySignalCatcher);
3854    signal (SIGSEGV, mySIGSEGVorSIGBUScatcher);
3855    #if BZ_UNIX
3856    signal (SIGHUP,  mySignalCatcher);
3857    signal (SIGBUS,  mySIGSEGVorSIGBUScatcher);
3858    #endif
3859 
3860 
3861    /*-- Initialise --*/
3862    outputHandleJustInCase  = NULL;
3863    ftab                    = NULL;
3864    ll4                     = NULL;
3865    ll16                    = NULL;
3866    ll8                     = NULL;
3867    tt                      = NULL;
3868    block                   = NULL;
3869    zptr                    = NULL;
3870    smallMode               = False;
3871    keepInputFiles          = False;
3872    verbosity               = 0;
3873    blockSize100k           = 9;
3874    testFailsExist          = False;
3875    bsStream                = NULL;
3876    numFileNames            = 0;
3877    numFilesProcessed       = 0;
3878    workFactor              = 30;
3879 
3880    strcpy ( inName,  "(none)" );
3881    strcpy ( outName, "(none)" );
3882 
3883    strcpy ( progNameReally, argv[0] );
3884    progName = &progNameReally[0];
3885    for (tmp = &progNameReally[0]; *tmp != '\0'; tmp++)
3886       if (*tmp == PATH_SEP) progName = tmp + 1;
3887 
3888 
3889    /*-- Expand filename wildcards in arg list --*/
3890    argList = NULL;
3891    for (i = 1; i <= argc-1; i++)
3892       APPEND_FILESPEC(argList, argv[i]);
3893 
3894 
3895    /*-- Find the length of the longest filename --*/
3896    longestFileName = 7;
3897    numFileNames    = 0;
3898    for (aa = argList; aa != NULL; aa = aa->link)
3899       if (aa->name[0] != '-') {
3900          numFileNames++;
3901          if (longestFileName < (Int32)strlen(aa->name) )
3902             longestFileName = (Int32)strlen(aa->name);
3903       }
3904 
3905 
3906    /*-- Determine what to do (compress/uncompress/test). --*/
3907    /*-- Note that subsequent flag handling may change this. --*/
3908    opMode = OM_Z;
3909 
3910    if ( (strcmp ( "bunzip2",     progName ) == 0) ||
3911         (strcmp ( "BUNZIP2",     progName ) == 0) ||
3912         (strcmp ( "bunzip2.exe", progName ) == 0) ||
3913         (strcmp ( "BUNZIP2.EXE", progName ) == 0) )
3914       opMode = OM_UNZ;
3915 
3916 
3917    /*-- Determine source modes; flag handling may change this too. --*/
3918    if (numFileNames == 0)
3919       srcMode = SM_I2O; else srcMode = SM_F2F;
3920 
3921 
3922    /*-- Look at the flags. --*/
3923    for (aa = argList; aa != NULL; aa = aa->link)
3924       if (aa->name[0] == '-' && aa->name[1] != '-')
3925          for (j = 1; aa->name[j] != '\0'; j++)
3926             switch (aa->name[j]) {
3927                case 'c': srcMode          = SM_F2O; break;
3928                case 'd': opMode           = OM_UNZ; break;
3929                case 'f': opMode           = OM_Z; break;
3930                case 't': opMode           = OM_TEST; break;
3931                case 'k': keepInputFiles   = True; break;
3932                case 's': smallMode        = True; break;
3933                case '1': blockSize100k    = 1; break;
3934                case '2': blockSize100k    = 2; break;
3935                case '3': blockSize100k    = 3; break;
3936                case '4': blockSize100k    = 4; break;
3937                case '5': blockSize100k    = 5; break;
3938                case '6': blockSize100k    = 6; break;
3939                case '7': blockSize100k    = 7; break;
3940                case '8': blockSize100k    = 8; break;
3941                case '9': blockSize100k    = 9; break;
3942                case 'V':
3943                case 'L': license();            break;
3944                case 'v': verbosity++; break;
3945                case 'h': usage ( progName );
3946                          exit ( 1 );
3947                          break;
3948                default:  fprintf ( stderr, "%s: Bad flag `%s'\n",
3949                                    progName, aa->name );
3950                          usage ( progName );
3951                          exit ( 1 );
3952                          break;
3953          }
3954 
3955    /*-- And again ... --*/
3956    for (aa = argList; aa != NULL; aa = aa->link) {
3957       if (ISFLAG("--stdout"))            srcMode          = SM_F2O;  else
3958       if (ISFLAG("--decompress"))        opMode           = OM_UNZ;  else
3959       if (ISFLAG("--compress"))          opMode           = OM_Z;    else
3960       if (ISFLAG("--test"))              opMode           = OM_TEST; else
3961       if (ISFLAG("--keep"))              keepInputFiles   = True;    else
3962       if (ISFLAG("--small"))             smallMode        = True;    else
3963       if (ISFLAG("--version"))           license();                  else
3964       if (ISFLAG("--license"))           license();                  else
3965       if (ISFLAG("--repetitive-fast"))   workFactor = 5;             else
3966       if (ISFLAG("--repetitive-best"))   workFactor = 150;           else
3967       if (ISFLAG("--verbose"))           verbosity++;                else
3968       if (ISFLAG("--help"))              { usage ( progName ); exit ( 1 ); }
3969          else
3970          if (strncmp ( aa->name, "--", 2) == 0) {
3971             fprintf ( stderr, "%s: Bad flag `%s'\n", progName, aa->name );
3972             usage ( progName );
3973             exit ( 1 );
3974          }
3975    }
3976 
3977    if (opMode == OM_Z && smallMode) blockSize100k = 2;
3978 
3979    if (opMode == OM_Z && srcMode == SM_F2O && numFileNames > 1) {
3980       fprintf ( stderr, "%s: I won't compress multiple files to stdout.\n",
3981                 progName );
3982       exit ( 1 );
3983    }
3984 
3985    if (srcMode == SM_F2O && numFileNames == 0) {
3986       fprintf ( stderr, "%s: -c expects at least one filename.\n",
3987                 progName );
3988       exit ( 1 );
3989    }
3990 
3991    if (opMode == OM_TEST && srcMode == SM_F2O) {
3992       fprintf ( stderr, "%s: -c and -t cannot be used together.\n",
3993                 progName );
3994       exit ( 1 );
3995    }
3996 
3997    if (opMode != OM_Z) blockSize100k = 0;
3998 
3999    if (opMode == OM_Z) {
4000       allocateCompressStructures();
4001       if (srcMode == SM_I2O)
4002          compress ( NULL );
4003          else
4004          for (aa = argList; aa != NULL; aa = aa->link)
4005             if (aa->name[0] != '-') {
4006                numFilesProcessed++;
4007                compress ( aa->name );
4008             }
4009    } else
4010    if (opMode == OM_UNZ) {
4011       if (srcMode == SM_I2O)
4012          uncompress ( NULL );
4013          else
4014          for (aa = argList; aa != NULL; aa = aa->link)
4015             if (aa->name[0] != '-') {
4016                numFilesProcessed++;
4017                uncompress ( aa->name );
4018             }
4019    } else {
4020       testFailsExist = False;
4021       if (srcMode == SM_I2O)
4022          testf ( NULL );
4023          else
4024          for (aa = argList; aa != NULL; aa = aa->link)
4025             if (aa->name[0] != '-') {
4026                numFilesProcessed++;
4027                testf ( aa->name );
4028             }
4029       if (testFailsExist) {
4030          fprintf ( stderr,
4031            "\n"
4032            "You can use the `bzip2recover' program to *attempt* to recover\n"
4033            "data from undamaged sections of corrupted files.\n\n"
4034          );
4035          exit(2);
4036       }
4037    }
4038    return 0;
4039 }
4040 
4041 
4042 /*-----------------------------------------------------------*/
4043 /*--- end                                         bzip2.c ---*/
4044 /*-----------------------------------------------------------*/
4045