1 #pragma prototyped
2 
3 /*-----------------------------------------------------------*/
4 /*--- Block recoverer program for bzip2                   ---*/
5 /*---                                      bzip2recover.c ---*/
6 /*-----------------------------------------------------------*/
7 
8 /*--
9   This program is bzip2recover, a program to attempt data
10   salvage from damaged files created by the accompanying
11   bzip2-0.9.0c program.
12 
13   Copyright (C) 1996-1998 Julian R Seward.  All rights reserved.
14 
15   Redistribution and use in source and binary forms, with or without
16   modification, are permitted provided that the following conditions
17   are met:
18 
19   1. Redistributions of source code must retain the above copyright
20      notice, this list of conditions and the following disclaimer.
21 
22   2. The origin of this software must not be misrepresented; you must
23      not claim that you wrote the original software.  If you use this
24      software in a product, an acknowledgment in the product
25      documentation would be appreciated but is not required.
26 
27   3. Altered source versions must be plainly marked as such, and must
28      not be misrepresented as being the original software.
29 
30   4. The name of the author may not be used to endorse or promote
31      products derived from this software without specific prior written
32      permission.
33 
34   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
35   OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
36   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37   ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
38   DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
40   GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
41   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
42   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 
46   Julian Seward, Guildford, Surrey, UK.
47   jseward@acm.org
48   bzip2/libbzip2 version 0.9.0c of 18 October 1998
49 --*/
50 
51 /*--
52   This program is a complete hack and should be rewritten
53   properly.  It isn't very complicated.
54 --*/
55 
56 #include <stdio.h>
57 #include <errno.h>
58 #include <stdlib.h>
59 #include <string.h>
60 
61 typedef  unsigned int   UInt32;
62 typedef  int            Int32;
63 typedef  unsigned char  UChar;
64 typedef  char           Char;
65 typedef  unsigned char  Bool;
66 #define True    ((Bool)1)
67 #define False   ((Bool)0)
68 
69 
70 Char inFileName[2000];
71 Char outFileName[2000];
72 Char progName[2000];
73 
74 UInt32 bytesOut = 0;
75 UInt32 bytesIn  = 0;
76 
77 
78 /*---------------------------------------------------*/
79 /*--- I/O errors                                  ---*/
80 /*---------------------------------------------------*/
81 
82 /*---------------------------------------------*/
readError(void)83 void readError ( void )
84 {
85    fprintf ( stderr,
86              "%s: I/O error reading `%s', possible reason follows.\n",
87             progName, inFileName );
88    perror ( progName );
89    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
90              progName );
91    exit ( 1 );
92 }
93 
94 
95 /*---------------------------------------------*/
writeError(void)96 void writeError ( void )
97 {
98    fprintf ( stderr,
99              "%s: I/O error reading `%s', possible reason follows.\n",
100             progName, inFileName );
101    perror ( progName );
102    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
103              progName );
104    exit ( 1 );
105 }
106 
107 
108 /*---------------------------------------------*/
mallocFail(Int32 n)109 void mallocFail ( Int32 n )
110 {
111    fprintf ( stderr,
112              "%s: malloc failed on request for %d bytes.\n",
113             progName, n );
114    fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
115              progName );
116    exit ( 1 );
117 }
118 
119 
120 /*---------------------------------------------------*/
121 /*--- Bit stream I/O                              ---*/
122 /*---------------------------------------------------*/
123 
124 typedef
125    struct {
126       FILE*  handle;
127       Int32  buffer;
128       Int32  buffLive;
129       Char   mode;
130    }
131    BitStream;
132 
133 
134 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)135 BitStream* bsOpenReadStream ( FILE* stream )
136 {
137    BitStream *bs = malloc ( sizeof(BitStream) );
138    if (bs == NULL) mallocFail ( sizeof(BitStream) );
139    bs->handle = stream;
140    bs->buffer = 0;
141    bs->buffLive = 0;
142    bs->mode = 'r';
143    return bs;
144 }
145 
146 
147 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)148 BitStream* bsOpenWriteStream ( FILE* stream )
149 {
150    BitStream *bs = malloc ( sizeof(BitStream) );
151    if (bs == NULL) mallocFail ( sizeof(BitStream) );
152    bs->handle = stream;
153    bs->buffer = 0;
154    bs->buffLive = 0;
155    bs->mode = 'w';
156    return bs;
157 }
158 
159 
160 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)161 void bsPutBit ( BitStream* bs, Int32 bit )
162 {
163    if (bs->buffLive == 8) {
164       Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
165       if (retVal == EOF) writeError();
166       bytesOut++;
167       bs->buffLive = 1;
168       bs->buffer = bit & 0x1;
169    } else {
170       bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
171       bs->buffLive++;
172    };
173 }
174 
175 
176 /*---------------------------------------------*/
177 /*--
178    Returns 0 or 1, or 2 to indicate EOF.
179 --*/
bsGetBit(BitStream * bs)180 Int32 bsGetBit ( BitStream* bs )
181 {
182    if (bs->buffLive > 0) {
183       bs->buffLive --;
184       return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
185    } else {
186       Int32 retVal = getc ( bs->handle );
187       if ( retVal == EOF ) {
188          if (errno != 0) readError();
189          return 2;
190       }
191       bs->buffLive = 7;
192       bs->buffer = retVal;
193       return ( ((bs->buffer) >> 7) & 0x1 );
194    }
195 }
196 
197 
198 /*---------------------------------------------*/
bsClose(BitStream * bs)199 void bsClose ( BitStream* bs )
200 {
201    Int32 retVal;
202 
203    if ( bs->mode == 'w' ) {
204       while ( bs->buffLive < 8 ) {
205          bs->buffLive++;
206          bs->buffer <<= 1;
207       };
208       retVal = putc ( (UChar) (bs->buffer), bs->handle );
209       if (retVal == EOF) writeError();
210       bytesOut++;
211       retVal = fflush ( bs->handle );
212       if (retVal == EOF) writeError();
213    }
214    retVal = fclose ( bs->handle );
215    if (retVal == EOF) {
216       if (bs->mode == 'w') writeError(); else readError();
217    }
218    free ( bs );
219 }
220 
221 
222 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)223 void bsPutUChar ( BitStream* bs, UChar c )
224 {
225    Int32 i;
226    for (i = 7; i >= 0; i--)
227       bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
228 }
229 
230 
231 /*---------------------------------------------*/
bsPutUInt32(BitStream * bs,UInt32 c)232 void bsPutUInt32 ( BitStream* bs, UInt32 c )
233 {
234    Int32 i;
235 
236    for (i = 31; i >= 0; i--)
237       bsPutBit ( bs, (c >> i) & 0x1 );
238 }
239 
240 
241 /*---------------------------------------------*/
endsInBz2(Char * name)242 Bool endsInBz2 ( Char* name )
243 {
244    Int32 n = strlen ( name );
245    if (n <= 4) return False;
246    return
247       (name[n-4] == '.' &&
248        name[n-3] == 'b' &&
249        name[n-2] == 'z' &&
250        name[n-1] == '2');
251 }
252 
253 
254 /*---------------------------------------------------*/
255 /*---                                             ---*/
256 /*---------------------------------------------------*/
257 
258 #define BLOCK_HEADER_HI  0x00003141UL
259 #define BLOCK_HEADER_LO  0x59265359UL
260 
261 #define BLOCK_ENDMARK_HI 0x00001772UL
262 #define BLOCK_ENDMARK_LO 0x45385090UL
263 
main(Int32 argc,Char ** argv)264 Int32 main ( Int32 argc, Char** argv )
265 {
266    FILE*       inFile;
267    FILE*       outFile;
268    BitStream*  bsIn, *bsWr;
269    Int32       currBlock, b, wrBlock;
270    UInt32      bitsRead;
271    UInt32      bStart[20000];
272    UInt32      bEnd[20000];
273 
274    UInt32      rbStart[20000];
275    UInt32      rbEnd[20000];
276    Int32       rbCtr;
277 
278 
279    UInt32      buffHi, buffLo, blockCRC;
280    Char*       p;
281 
282    strcpy ( progName, argv[0] );
283    inFileName[0] = outFileName[0] = 0;
284 
285    fprintf ( stderr, "bzip2recover v0.9.0c: extracts blocks from damaged .bz2 files.\n" );
286 
287    if (argc != 2) {
288       fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
289                         progName, progName );
290       exit(1);
291    }
292 
293    strcpy ( inFileName, argv[1] );
294 
295    inFile = fopen ( inFileName, "rb" );
296    if (inFile == NULL) {
297       fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
298       exit(1);
299    }
300 
301    bsIn = bsOpenReadStream ( inFile );
302    fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
303 
304    bitsRead = 0;
305    buffHi = buffLo = 0;
306    currBlock = 0;
307    bStart[currBlock] = 0;
308 
309    rbCtr = 0;
310 
311    while (True) {
312       b = bsGetBit ( bsIn );
313       bitsRead++;
314       if (b == 2) {
315          if (bitsRead >= bStart[currBlock] &&
316             (bitsRead - bStart[currBlock]) >= 40) {
317             bEnd[currBlock] = bitsRead-1;
318             if (currBlock > 0)
319                fprintf ( stderr, "   block %d runs from %d to %d (incomplete)\n",
320                          currBlock,  bStart[currBlock], bEnd[currBlock] );
321          } else
322             currBlock--;
323          break;
324       }
325       buffHi = (buffHi << 1) | (buffLo >> 31);
326       buffLo = (buffLo << 1) | (b & 1);
327       if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
328              && buffLo == BLOCK_HEADER_LO)
329            ||
330            ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
331              && buffLo == BLOCK_ENDMARK_LO)
332          ) {
333          if (bitsRead > 49)
334             bEnd[currBlock] = bitsRead-49; else
335             bEnd[currBlock] = 0;
336          if (currBlock > 0 &&
337 	     (bEnd[currBlock] - bStart[currBlock]) >= 130) {
338             fprintf ( stderr, "   block %d runs from %d to %d\n",
339                       rbCtr+1,  bStart[currBlock], bEnd[currBlock] );
340             rbStart[rbCtr] = bStart[currBlock];
341             rbEnd[rbCtr] = bEnd[currBlock];
342             rbCtr++;
343          }
344          currBlock++;
345 
346          bStart[currBlock] = bitsRead;
347       }
348    }
349 
350    bsClose ( bsIn );
351 
352    /*-- identified blocks run from 1 to rbCtr inclusive. --*/
353 
354    if (rbCtr < 1) {
355       fprintf ( stderr,
356                 "%s: sorry, I couldn't find any block boundaries.\n",
357                 progName );
358       exit(1);
359    };
360 
361    fprintf ( stderr, "%s: splitting into blocks\n", progName );
362 
363    inFile = fopen ( inFileName, "rb" );
364    if (inFile == NULL) {
365       fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
366       exit(1);
367    }
368    bsIn = bsOpenReadStream ( inFile );
369 
370    /*-- placate gcc's dataflow analyser --*/
371    blockCRC = 0; bsWr = 0;
372 
373    bitsRead = 0;
374    outFile = NULL;
375    wrBlock = 0;
376    while (True) {
377       b = bsGetBit(bsIn);
378       if (b == 2) break;
379       buffHi = (buffHi << 1) | (buffLo >> 31);
380       buffLo = (buffLo << 1) | (b & 1);
381       if (bitsRead == 47+rbStart[wrBlock])
382          blockCRC = (buffHi << 16) | (buffLo >> 16);
383 
384       if (outFile != NULL && bitsRead >= rbStart[wrBlock]
385                           && bitsRead <= rbEnd[wrBlock]) {
386          bsPutBit ( bsWr, b );
387       }
388 
389       bitsRead++;
390 
391       if (bitsRead == rbEnd[wrBlock]+1) {
392          if (outFile != NULL) {
393             bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
394             bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
395             bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
396             bsPutUInt32 ( bsWr, blockCRC );
397             bsClose ( bsWr );
398          }
399          if (wrBlock >= rbCtr) break;
400          wrBlock++;
401       } else
402       if (bitsRead == rbStart[wrBlock]) {
403          outFileName[0] = 0;
404          sprintf ( outFileName, "rec%4d", wrBlock+1 );
405          for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0';
406          strcat ( outFileName, inFileName );
407          if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
408 
409          fprintf ( stderr, "   writing block %d to `%s' ...\n",
410                            wrBlock+1, outFileName );
411 
412          outFile = fopen ( outFileName, "wb" );
413          if (outFile == NULL) {
414             fprintf ( stderr, "%s: can't write `%s'\n",
415                       progName, outFileName );
416             exit(1);
417          }
418          bsWr = bsOpenWriteStream ( outFile );
419          bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' );
420          bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' );
421          bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
422          bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
423          bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
424       }
425    }
426 
427    fprintf ( stderr, "%s: finished\n", progName );
428    return 0;
429 }
430 
431 
432 
433 /*-----------------------------------------------------------*/
434 /*--- end                                  bzip2recover.c ---*/
435 /*-----------------------------------------------------------*/
436