1 #pragma prototyped
2
3 /*-----------------------------------------------------------*/
4 /*--- Block recoverer program for bzip2 ---*/
5 /*--- bzip2recover.c ---*/
6 /*-----------------------------------------------------------*/
7
8 /*--
9 This program is bzip2recover, a program to attempt data
10 salvage from damaged files created by the accompanying
11 bzip2-0.9.0c program.
12
13 Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
14
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions
17 are met:
18
19 1. Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21
22 2. The origin of this software must not be misrepresented; you must
23 not claim that you wrote the original software. If you use this
24 software in a product, an acknowledgment in the product
25 documentation would be appreciated but is not required.
26
27 3. Altered source versions must be plainly marked as such, and must
28 not be misrepresented as being the original software.
29
30 4. The name of the author may not be used to endorse or promote
31 products derived from this software without specific prior written
32 permission.
33
34 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
35 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
36 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
38 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
40 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
41 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
42 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45
46 Julian Seward, Guildford, Surrey, UK.
47 jseward@acm.org
48 bzip2/libbzip2 version 0.9.0c of 18 October 1998
49 --*/
50
51 /*--
52 This program is a complete hack and should be rewritten
53 properly. It isn't very complicated.
54 --*/
55
56 #include <stdio.h>
57 #include <errno.h>
58 #include <stdlib.h>
59 #include <string.h>
60
61 typedef unsigned int UInt32;
62 typedef int Int32;
63 typedef unsigned char UChar;
64 typedef char Char;
65 typedef unsigned char Bool;
66 #define True ((Bool)1)
67 #define False ((Bool)0)
68
69
70 Char inFileName[2000];
71 Char outFileName[2000];
72 Char progName[2000];
73
74 UInt32 bytesOut = 0;
75 UInt32 bytesIn = 0;
76
77
78 /*---------------------------------------------------*/
79 /*--- I/O errors ---*/
80 /*---------------------------------------------------*/
81
82 /*---------------------------------------------*/
readError(void)83 void readError ( void )
84 {
85 fprintf ( stderr,
86 "%s: I/O error reading `%s', possible reason follows.\n",
87 progName, inFileName );
88 perror ( progName );
89 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
90 progName );
91 exit ( 1 );
92 }
93
94
95 /*---------------------------------------------*/
writeError(void)96 void writeError ( void )
97 {
98 fprintf ( stderr,
99 "%s: I/O error reading `%s', possible reason follows.\n",
100 progName, inFileName );
101 perror ( progName );
102 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
103 progName );
104 exit ( 1 );
105 }
106
107
108 /*---------------------------------------------*/
mallocFail(Int32 n)109 void mallocFail ( Int32 n )
110 {
111 fprintf ( stderr,
112 "%s: malloc failed on request for %d bytes.\n",
113 progName, n );
114 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n",
115 progName );
116 exit ( 1 );
117 }
118
119
120 /*---------------------------------------------------*/
121 /*--- Bit stream I/O ---*/
122 /*---------------------------------------------------*/
123
124 typedef
125 struct {
126 FILE* handle;
127 Int32 buffer;
128 Int32 buffLive;
129 Char mode;
130 }
131 BitStream;
132
133
134 /*---------------------------------------------*/
bsOpenReadStream(FILE * stream)135 BitStream* bsOpenReadStream ( FILE* stream )
136 {
137 BitStream *bs = malloc ( sizeof(BitStream) );
138 if (bs == NULL) mallocFail ( sizeof(BitStream) );
139 bs->handle = stream;
140 bs->buffer = 0;
141 bs->buffLive = 0;
142 bs->mode = 'r';
143 return bs;
144 }
145
146
147 /*---------------------------------------------*/
bsOpenWriteStream(FILE * stream)148 BitStream* bsOpenWriteStream ( FILE* stream )
149 {
150 BitStream *bs = malloc ( sizeof(BitStream) );
151 if (bs == NULL) mallocFail ( sizeof(BitStream) );
152 bs->handle = stream;
153 bs->buffer = 0;
154 bs->buffLive = 0;
155 bs->mode = 'w';
156 return bs;
157 }
158
159
160 /*---------------------------------------------*/
bsPutBit(BitStream * bs,Int32 bit)161 void bsPutBit ( BitStream* bs, Int32 bit )
162 {
163 if (bs->buffLive == 8) {
164 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle );
165 if (retVal == EOF) writeError();
166 bytesOut++;
167 bs->buffLive = 1;
168 bs->buffer = bit & 0x1;
169 } else {
170 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) );
171 bs->buffLive++;
172 };
173 }
174
175
176 /*---------------------------------------------*/
177 /*--
178 Returns 0 or 1, or 2 to indicate EOF.
179 --*/
bsGetBit(BitStream * bs)180 Int32 bsGetBit ( BitStream* bs )
181 {
182 if (bs->buffLive > 0) {
183 bs->buffLive --;
184 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 );
185 } else {
186 Int32 retVal = getc ( bs->handle );
187 if ( retVal == EOF ) {
188 if (errno != 0) readError();
189 return 2;
190 }
191 bs->buffLive = 7;
192 bs->buffer = retVal;
193 return ( ((bs->buffer) >> 7) & 0x1 );
194 }
195 }
196
197
198 /*---------------------------------------------*/
bsClose(BitStream * bs)199 void bsClose ( BitStream* bs )
200 {
201 Int32 retVal;
202
203 if ( bs->mode == 'w' ) {
204 while ( bs->buffLive < 8 ) {
205 bs->buffLive++;
206 bs->buffer <<= 1;
207 };
208 retVal = putc ( (UChar) (bs->buffer), bs->handle );
209 if (retVal == EOF) writeError();
210 bytesOut++;
211 retVal = fflush ( bs->handle );
212 if (retVal == EOF) writeError();
213 }
214 retVal = fclose ( bs->handle );
215 if (retVal == EOF) {
216 if (bs->mode == 'w') writeError(); else readError();
217 }
218 free ( bs );
219 }
220
221
222 /*---------------------------------------------*/
bsPutUChar(BitStream * bs,UChar c)223 void bsPutUChar ( BitStream* bs, UChar c )
224 {
225 Int32 i;
226 for (i = 7; i >= 0; i--)
227 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 );
228 }
229
230
231 /*---------------------------------------------*/
bsPutUInt32(BitStream * bs,UInt32 c)232 void bsPutUInt32 ( BitStream* bs, UInt32 c )
233 {
234 Int32 i;
235
236 for (i = 31; i >= 0; i--)
237 bsPutBit ( bs, (c >> i) & 0x1 );
238 }
239
240
241 /*---------------------------------------------*/
endsInBz2(Char * name)242 Bool endsInBz2 ( Char* name )
243 {
244 Int32 n = strlen ( name );
245 if (n <= 4) return False;
246 return
247 (name[n-4] == '.' &&
248 name[n-3] == 'b' &&
249 name[n-2] == 'z' &&
250 name[n-1] == '2');
251 }
252
253
254 /*---------------------------------------------------*/
255 /*--- ---*/
256 /*---------------------------------------------------*/
257
258 #define BLOCK_HEADER_HI 0x00003141UL
259 #define BLOCK_HEADER_LO 0x59265359UL
260
261 #define BLOCK_ENDMARK_HI 0x00001772UL
262 #define BLOCK_ENDMARK_LO 0x45385090UL
263
main(Int32 argc,Char ** argv)264 Int32 main ( Int32 argc, Char** argv )
265 {
266 FILE* inFile;
267 FILE* outFile;
268 BitStream* bsIn, *bsWr;
269 Int32 currBlock, b, wrBlock;
270 UInt32 bitsRead;
271 UInt32 bStart[20000];
272 UInt32 bEnd[20000];
273
274 UInt32 rbStart[20000];
275 UInt32 rbEnd[20000];
276 Int32 rbCtr;
277
278
279 UInt32 buffHi, buffLo, blockCRC;
280 Char* p;
281
282 strcpy ( progName, argv[0] );
283 inFileName[0] = outFileName[0] = 0;
284
285 fprintf ( stderr, "bzip2recover v0.9.0c: extracts blocks from damaged .bz2 files.\n" );
286
287 if (argc != 2) {
288 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n",
289 progName, progName );
290 exit(1);
291 }
292
293 strcpy ( inFileName, argv[1] );
294
295 inFile = fopen ( inFileName, "rb" );
296 if (inFile == NULL) {
297 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName );
298 exit(1);
299 }
300
301 bsIn = bsOpenReadStream ( inFile );
302 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName );
303
304 bitsRead = 0;
305 buffHi = buffLo = 0;
306 currBlock = 0;
307 bStart[currBlock] = 0;
308
309 rbCtr = 0;
310
311 while (True) {
312 b = bsGetBit ( bsIn );
313 bitsRead++;
314 if (b == 2) {
315 if (bitsRead >= bStart[currBlock] &&
316 (bitsRead - bStart[currBlock]) >= 40) {
317 bEnd[currBlock] = bitsRead-1;
318 if (currBlock > 0)
319 fprintf ( stderr, " block %d runs from %d to %d (incomplete)\n",
320 currBlock, bStart[currBlock], bEnd[currBlock] );
321 } else
322 currBlock--;
323 break;
324 }
325 buffHi = (buffHi << 1) | (buffLo >> 31);
326 buffLo = (buffLo << 1) | (b & 1);
327 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI
328 && buffLo == BLOCK_HEADER_LO)
329 ||
330 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI
331 && buffLo == BLOCK_ENDMARK_LO)
332 ) {
333 if (bitsRead > 49)
334 bEnd[currBlock] = bitsRead-49; else
335 bEnd[currBlock] = 0;
336 if (currBlock > 0 &&
337 (bEnd[currBlock] - bStart[currBlock]) >= 130) {
338 fprintf ( stderr, " block %d runs from %d to %d\n",
339 rbCtr+1, bStart[currBlock], bEnd[currBlock] );
340 rbStart[rbCtr] = bStart[currBlock];
341 rbEnd[rbCtr] = bEnd[currBlock];
342 rbCtr++;
343 }
344 currBlock++;
345
346 bStart[currBlock] = bitsRead;
347 }
348 }
349
350 bsClose ( bsIn );
351
352 /*-- identified blocks run from 1 to rbCtr inclusive. --*/
353
354 if (rbCtr < 1) {
355 fprintf ( stderr,
356 "%s: sorry, I couldn't find any block boundaries.\n",
357 progName );
358 exit(1);
359 };
360
361 fprintf ( stderr, "%s: splitting into blocks\n", progName );
362
363 inFile = fopen ( inFileName, "rb" );
364 if (inFile == NULL) {
365 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName );
366 exit(1);
367 }
368 bsIn = bsOpenReadStream ( inFile );
369
370 /*-- placate gcc's dataflow analyser --*/
371 blockCRC = 0; bsWr = 0;
372
373 bitsRead = 0;
374 outFile = NULL;
375 wrBlock = 0;
376 while (True) {
377 b = bsGetBit(bsIn);
378 if (b == 2) break;
379 buffHi = (buffHi << 1) | (buffLo >> 31);
380 buffLo = (buffLo << 1) | (b & 1);
381 if (bitsRead == 47+rbStart[wrBlock])
382 blockCRC = (buffHi << 16) | (buffLo >> 16);
383
384 if (outFile != NULL && bitsRead >= rbStart[wrBlock]
385 && bitsRead <= rbEnd[wrBlock]) {
386 bsPutBit ( bsWr, b );
387 }
388
389 bitsRead++;
390
391 if (bitsRead == rbEnd[wrBlock]+1) {
392 if (outFile != NULL) {
393 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 );
394 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 );
395 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 );
396 bsPutUInt32 ( bsWr, blockCRC );
397 bsClose ( bsWr );
398 }
399 if (wrBlock >= rbCtr) break;
400 wrBlock++;
401 } else
402 if (bitsRead == rbStart[wrBlock]) {
403 outFileName[0] = 0;
404 sprintf ( outFileName, "rec%4d", wrBlock+1 );
405 for (p = outFileName; *p != 0; p++) if (*p == ' ') *p = '0';
406 strcat ( outFileName, inFileName );
407 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" );
408
409 fprintf ( stderr, " writing block %d to `%s' ...\n",
410 wrBlock+1, outFileName );
411
412 outFile = fopen ( outFileName, "wb" );
413 if (outFile == NULL) {
414 fprintf ( stderr, "%s: can't write `%s'\n",
415 progName, outFileName );
416 exit(1);
417 }
418 bsWr = bsOpenWriteStream ( outFile );
419 bsPutUChar ( bsWr, 'B' ); bsPutUChar ( bsWr, 'Z' );
420 bsPutUChar ( bsWr, 'h' ); bsPutUChar ( bsWr, '9' );
421 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 );
422 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 );
423 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 );
424 }
425 }
426
427 fprintf ( stderr, "%s: finished\n", progName );
428 return 0;
429 }
430
431
432
433 /*-----------------------------------------------------------*/
434 /*--- end bzip2recover.c ---*/
435 /*-----------------------------------------------------------*/
436