1 /* Lziprecover - Data recovery tool for the lzip format
2 Copyright (C) 2009-2021 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #define _FILE_OFFSET_BITS 64
19
20 #include <algorithm>
21 #include <cerrno>
22 #include <climits>
23 #include <csignal>
24 #include <cstdio>
25 #include <cstring>
26 #include <string>
27 #include <vector>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <sys/mman.h>
31 #include <sys/stat.h>
32 #include <sys/wait.h>
33
34 #include "lzip.h"
35 #include "md5.h"
36 #include "mtester.h"
37 #include "lzip_index.h"
38
39
40 namespace {
41
42 const char * final_msg = 0;
43
44 bool pending_newline = false;
45
print_pending_newline(const char terminator)46 void print_pending_newline( const char terminator )
47 { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
48 pending_newline = false; }
49
50 int fatal_retval = 0;
51
fatal(const int retval)52 int fatal( const int retval )
53 { if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
54
55 // Returns the position of the damaged area in the member, or -1 if error.
zeroed_sector_pos(const char * const input_filename,const uint8_t * const mbuffer,const long long msize,long long * const sizep,uint8_t * const valuep)56 long long zeroed_sector_pos( const char * const input_filename,
57 const uint8_t * const mbuffer, const long long msize,
58 long long * const sizep, uint8_t * const valuep )
59 {
60 enum { minlen = 8 }; // min number of consecutive identical bytes
61 long long i = Lzip_header::size;
62 const long long end = msize - minlen;
63 long long begin = -1;
64 long long size = 0;
65 uint8_t value = 0;
66 while( i < end ) // leave i pointing to the first differing byte
67 {
68 const uint8_t byte = mbuffer[i++];
69 if( mbuffer[i] == byte )
70 {
71 const long long pos = i - 1;
72 ++i;
73 while( i < msize && mbuffer[i] == byte ) ++i;
74 if( i - pos >= minlen )
75 {
76 if( size > 0 )
77 { show_file_error( input_filename,
78 "Member contains more than one damaged area." );
79 return -1; }
80 begin = pos;
81 size = i - pos;
82 value = byte;
83 break;
84 }
85 }
86 }
87 if( begin < 0 || size <= 0 )
88 { show_file_error( input_filename, "Can't locate damaged area." );
89 return -1; }
90 *sizep = size;
91 *valuep = value;
92 return begin;
93 }
94
95
prepare_master2(const uint8_t * const mbuffer,const long long msize,const long long begin,const unsigned dictionary_size)96 const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
97 const long long msize,
98 const long long begin,
99 const unsigned dictionary_size )
100 {
101 long long pos_limit = std::max( begin - 16, (long long)Lzip_header::size );
102 LZ_mtester * master = new LZ_mtester( mbuffer, msize, dictionary_size );
103 if( master->test_member( pos_limit ) != -1 ||
104 master->member_position() > (unsigned long long)begin )
105 { delete master; return 0; }
106 // decompress as much data as possible without surpassing begin
107 while( pos_limit < begin && master->test_member( pos_limit + 1 ) == -1 &&
108 master->member_position() <= (unsigned long long)begin )
109 ++pos_limit;
110 delete master;
111 master = new LZ_mtester( mbuffer, msize, dictionary_size );
112 if( master->test_member( pos_limit ) == -1 &&
113 master->member_position() <= (unsigned long long)begin ) return master;
114 delete master;
115 return 0;
116 }
117
118
119 /* Locate in the reference file (rbuf) the truncated data in the dictionary.
120 The reference file must match from the last byte decoded back to the
121 beginning of the file or to the beginning of the dictionary.
122 Choose the match nearest to the beginning of the file.
123 As a fallback, locate the longest partial match at least 512 bytes long.
124 Returns the offset in file of the first undecoded byte, or -1 if no match. */
match_file(const LZ_mtester & master,const uint8_t * const rbuf,const long long rsize,const char * const reference_filename)125 long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
126 const long long rsize,
127 const char * const reference_filename )
128 {
129 const uint8_t * prev_buffer;
130 int dec_size, prev_size;
131 const uint8_t * const dec_buffer =
132 master.get_buffers( &prev_buffer, &dec_size, &prev_size );
133 if( dec_size < 4 )
134 { if( verbosity >= 1 )
135 { std::printf( "'%s' can't match: not enough data in dictionary.\n",
136 reference_filename ); pending_newline = false; }
137 return -1; }
138 long long offset = -1; // offset in file of the first undecoded byte
139 bool multiple = false;
140 const uint8_t last_byte = dec_buffer[dec_size-1];
141 for( long long i = rsize - 1; i >= 3; --i ) // match at least 4 bytes at bof
142 if( rbuf[i] == last_byte )
143 {
144 // compare file with the two parts of the dictionary
145 int len = std::min( (long long)dec_size - 1, i );
146 if( std::memcmp( rbuf + i - len, dec_buffer + dec_size - 1 - len, len ) == 0 )
147 {
148 int len2 = std::min( (long long)prev_size, i - len );
149 if( len2 <= 0 || !prev_buffer ||
150 std::memcmp( rbuf + i - len - len2,
151 prev_buffer + prev_size - len2, len2 ) == 0 )
152 {
153 if( offset >= 0 ) multiple = true;
154 offset = i + 1;
155 i -= len + len2;
156 }
157 }
158 }
159 if( offset >= 0 )
160 {
161 if( multiple && verbosity >= 1 )
162 { std::printf( "warning: %s: Multiple matches. Using match at offset %lld\n",
163 reference_filename, offset ); std::fflush( stdout ); }
164 if( !multiple && verbosity >= 2 )
165 { std::printf( "%s: Match found at offset %lld\n",
166 reference_filename, offset ); std::fflush( stdout ); }
167 return offset;
168 }
169 int maxlen = 0; // choose longest match in reference file
170 for( long long i = rsize - 1; i >= 0; --i )
171 if( rbuf[i] == last_byte )
172 {
173 // compare file with the two parts of the dictionary
174 const int size1 = std::min( (long long)dec_size, i + 1 );
175 int len = 1;
176 while( len < size1 && rbuf[i-len] == dec_buffer[dec_size-len-1] ) ++len;
177 if( len == size1 )
178 {
179 int size2 = std::min( (long long)prev_size, i + 1 - size1 );
180 while( len < size1 + size2 &&
181 rbuf[i-len] == prev_buffer[prev_size+size1-len] ) ++len;
182 }
183 if( len > maxlen ) { maxlen = len; offset = i + 1; i -= len; }
184 }
185 if( maxlen >= 512 && offset >= 0 )
186 {
187 if( verbosity >= 1 )
188 { std::printf( "warning: %s: Partial match found at offset %lld, len %d."
189 " Reference data may be mixed with other data.\n",
190 reference_filename, offset, maxlen );
191 std::fflush( stdout ); }
192 return offset;
193 }
194 if( verbosity >= 1 )
195 { std::printf( "'%s' does not match with decoded data.\n",
196 reference_filename ); pending_newline = false; }
197 return -1;
198 }
199
200
show_close_error(const char * const prog_name="data feeder")201 void show_close_error( const char * const prog_name = "data feeder" )
202 {
203 if( verbosity >= 0 )
204 std::fprintf( stderr, "%s: Error closing output of %s: %s\n",
205 program_name, prog_name, std::strerror( errno ) );
206 }
207
208
show_exec_error(const char * const prog_name)209 void show_exec_error( const char * const prog_name )
210 {
211 if( verbosity >= 0 )
212 std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
213 program_name, prog_name, std::strerror( errno ) );
214 }
215
216
show_fork_error(const char * const prog_name)217 void show_fork_error( const char * const prog_name )
218 {
219 if( verbosity >= 0 )
220 std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
221 program_name, prog_name, std::strerror( errno ) );
222 }
223
224
225 /* Returns -1 if child not terminated, 1 in case of error, or exit status of
226 child process 'pid'. */
child_status(const pid_t pid,const char * const name)227 int child_status( const pid_t pid, const char * const name )
228 {
229 int status;
230 while( true )
231 {
232 const int tmp = waitpid( pid, &status, WNOHANG );
233 if( tmp == -1 && errno != EINTR )
234 {
235 if( verbosity >= 0 )
236 std::fprintf( stderr, "%s: Error checking status of '%s': %s\n",
237 program_name, name, std::strerror( errno ) );
238 return 1;
239 }
240 if( tmp == 0 ) return -1; // child not terminated
241 if( tmp == pid ) break; // child terminated
242 }
243 if( WIFEXITED( status ) ) return WEXITSTATUS( status );
244 return 1;
245 }
246
247
248 // Returns exit status of child process 'pid', or 1 in case of error.
249 //
wait_for_child(const pid_t pid,const char * const name)250 int wait_for_child( const pid_t pid, const char * const name )
251 {
252 int status;
253 while( waitpid( pid, &status, 0 ) == -1 )
254 {
255 if( errno != EINTR )
256 {
257 if( verbosity >= 0 )
258 std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
259 program_name, name, std::strerror( errno ) );
260 return 1;
261 }
262 }
263 if( WIFEXITED( status ) ) return WEXITSTATUS( status );
264 return 1;
265 }
266
267
good_status(const pid_t pid,const char * const name,const bool finished)268 bool good_status( const pid_t pid, const char * const name, const bool finished )
269 {
270 bool error = false;
271 if( pid )
272 {
273 if( !finished )
274 {
275 const int tmp = child_status( pid, name );
276 if( tmp < 0 ) // child not terminated
277 { kill( pid, SIGTERM ); wait_for_child( pid, name ); }
278 else if( tmp != 0 ) error = true; // child status != 0
279 }
280 else
281 if( wait_for_child( pid, name ) != 0 ) error = true;
282 if( error )
283 {
284 if( verbosity >= 0 )
285 std::fprintf( stderr, "%s: %s: Child terminated with error status.\n",
286 program_name, name );
287 return false;
288 }
289 }
290 return !error;
291 }
292
293
294 /* Feed to lzip through 'ofd' the data decompressed up to 'good_dsize'
295 (master->data_position) followed by the reference data from byte at
296 offset 'offset' of reference file, up to a total of 'dsize' bytes. */
feed_data(uint8_t * const mbuffer,const long long msize,const long long dsize,const unsigned long long good_dsize,const uint8_t * const rbuf,const long long rsize,const long long offset,const unsigned dictionary_size,const int ofd)297 bool feed_data( uint8_t * const mbuffer, const long long msize,
298 const long long dsize, const unsigned long long good_dsize,
299 const uint8_t * const rbuf, const long long rsize,
300 const long long offset, const unsigned dictionary_size,
301 const int ofd )
302 {
303 LZ_mtester mtester( mbuffer, msize, dictionary_size, ofd );
304 if( mtester.test_member( LLONG_MAX, good_dsize ) != -1 ||
305 good_dsize != mtester.data_position() )
306 { show_error( "Error decompressing prefix data for compressor." );
307 return false; }
308 // limit reference data to remaining decompressed data in member
309 const long long end =
310 std::min( (unsigned long long)rsize, dsize - good_dsize + offset );
311 for( long long i = offset; i < end; )
312 {
313 const int size = std::min( end - i, 65536LL );
314 if( writeblock( ofd, rbuf + i, size ) != size )
315 { show_error( "Error writing reference data to compressor", errno );
316 return false; }
317 i += size;
318 }
319 return true;
320 }
321
322
323 /* Try to reproduce the zeroed sector.
324 Return value: -1 = failure, 0 = success, > 0 = fatal error. */
try_reproduce(uint8_t * const mbuffer,const long long msize,const long long dsize,const unsigned long long good_dsize,const long long begin,const long long end,const uint8_t * const rbuf,const long long rsize,const long long offset,const unsigned dictionary_size,const char ** const lzip_argv,MD5SUM * const md5sump,const char terminator,const bool auto0=false)325 int try_reproduce( uint8_t * const mbuffer, const long long msize,
326 const long long dsize, const unsigned long long good_dsize,
327 const long long begin, const long long end,
328 const uint8_t * const rbuf, const long long rsize,
329 const long long offset, const unsigned dictionary_size,
330 const char ** const lzip_argv, MD5SUM * const md5sump,
331 const char terminator, const bool auto0 = false )
332 {
333 int fda[2]; // pipe to compressor
334 int fda2[2]; // pipe from compressor
335 if( pipe( fda ) < 0 || pipe( fda2 ) < 0 )
336 { show_error( "Can't create pipe", errno ); return fatal( 1 ); }
337 const pid_t pid = fork();
338 if( pid == 0 ) // child 1 (compressor feeder)
339 {
340 if( close( fda[0] ) != 0 ||
341 close( fda2[0] ) != 0 || close( fda2[1] ) != 0 ||
342 !feed_data( mbuffer, msize, dsize, good_dsize, rbuf, rsize, offset,
343 dictionary_size, fda[1] ) )
344 { close( fda[1] ); _exit( 2 ); }
345 if( close( fda[1] ) != 0 )
346 { show_close_error(); _exit( 2 ); }
347 _exit( 0 );
348 }
349 if( pid < 0 ) // parent
350 { show_fork_error( "data feeder" ); return fatal( 1 ); }
351
352 const pid_t pid2 = fork();
353 if( pid2 == 0 ) // child 2 (compressor)
354 {
355 if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
356 dup2( fda2[1], STDOUT_FILENO ) >= 0 &&
357 close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
358 close( fda2[0] ) == 0 && close( fda2[1] ) == 0 )
359 execvp( lzip_argv[0], (char **)lzip_argv );
360 show_exec_error( lzip_argv[0] );
361 _exit( 2 );
362 }
363 if( pid2 < 0 ) // parent
364 { show_fork_error( lzip_argv[0] ); return fatal( 1 ); }
365
366 close( fda[0] ); close( fda[1] ); close( fda2[1] );
367 const long long xend = std::min( end + 4, msize );
368 int retval = 0; // -1 = mismatch
369 bool first_post = true;
370 bool same_ds = true; // reproduced DS == header DS
371 bool tail_mismatch = false; // mismatch after end
372 for( long long i = 0; i < xend; )
373 {
374 enum { buffer_size = 16384 }; // 65536 makes it slower
375 uint8_t buffer[buffer_size];
376 if( verbosity >= 2 && i >= 65536 && terminator )
377 {
378 if( first_post )
379 { first_post = false; print_pending_newline( terminator ); }
380 std::printf( " Reproducing position %lld %c", i, terminator );
381 std::fflush( stdout ); pending_newline = true;
382 }
383 const int rd = readblock( fda2[0], buffer, buffer_size );
384 // not enough reference data to fill zeroed sector at this level
385 if( rd <= 0 ) { if( i < end ) retval = -1; break; }
386 int j = 0;
387 /* Compare reproduced bytes with data in mbuffer.
388 Do not fail because of a mismatch beyond the end of the zeroed sector
389 to prevent the reproduction from failing because of the reference file
390 just covering the zeroed sector. */
391 for( ; j < rd && i < begin; ++j, ++i )
392 if( mbuffer[i] != buffer[j] ) // mismatch
393 {
394 if( i != 5 ) { retval = -1; goto done; } // ignore different DS
395 const Lzip_header * header = (const Lzip_header *)buffer;
396 if( header->dictionary_size() != dictionary_size ) same_ds = false;
397 }
398 // copy reproduced bytes into zeroed sector of mbuffer
399 for( ; j < rd && i < end; ++j, ++i ) mbuffer[i] = buffer[j];
400 for( ; j < rd && i < xend; ++j, ++i )
401 if( mbuffer[i] != buffer[j] ) { tail_mismatch = true; goto done; }
402 }
403 done:
404 if( !first_post && terminator ) print_pending_newline( terminator );
405 if( close( fda2[0] ) != 0 ) { show_close_error( "compressor" ); retval = 1; }
406 if( !good_status( pid, "data feeder", false ) ||
407 !good_status( pid2, lzip_argv[0], false ) ) retval = auto0 ? -1 : 1;
408 if( !retval ) // test whole member after reproduction
409 {
410 if( md5sump ) md5sump->reset();
411 LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, md5sump );
412 if( mtester.test_member() != 0 || !mtester.finished() )
413 {
414 if( verbosity >= 2 && same_ds && begin >= 4096 && terminator )
415 {
416 if( !tail_mismatch )
417 final_msg = " Zeroed sector reproduced, but CRC does not match."
418 " (Multiple damages in file?).\n";
419 else if( !final_msg )
420 final_msg = " Zeroed sector reproduced, but data after it does not"
421 " match. (Maybe wrong reference data or lzip version).\n";
422 }
423 retval = -1; // incorrect reproduction of zeroed sector
424 }
425 }
426 return retval;
427 }
428
429
430 // Return value: -1 = master failed, 0 = success, > 0 = failure
reproduce_member(uint8_t * const mbuffer,const long long msize,const long long dsize,const char * const lzip_name,const char * const reference_filename,const long long begin,const long long size,const int lzip_level,MD5SUM * const md5sump,const char terminator)431 int reproduce_member( uint8_t * const mbuffer, const long long msize,
432 const long long dsize, const char * const lzip_name,
433 const char * const reference_filename,
434 const long long begin, const long long size,
435 const int lzip_level, MD5SUM * const md5sump,
436 const char terminator )
437 {
438 struct stat st;
439 const int rfd = open_instream( reference_filename, &st, false, true );
440 if( rfd < 0 ) return fatal( 1 );
441 if( st.st_size > LLONG_MAX )
442 { show_file_error( reference_filename, "File too large." ); close( rfd );
443 return fatal( 2 ); }
444 const long long rsize = st.st_size;
445 const uint8_t * const rbuf =
446 (const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
447 close( rfd );
448 if( rbuf == MAP_FAILED )
449 { show_file_error( reference_filename, "Can't mmap", errno );
450 return fatal( 1 ); }
451
452 const Lzip_header & header = *(const Lzip_header *)mbuffer;
453 const unsigned dictionary_size = header.dictionary_size();
454 const LZ_mtester * const master =
455 prepare_master2( mbuffer, msize, begin, dictionary_size );
456 if( !master ) return -1;
457 if( verbosity >= 2 )
458 {
459 std::printf( " (master mpos = %llu, dpos = %llu)\n",
460 master->member_position(), master->data_position() );
461 std::fflush( stdout );
462 }
463
464 const long long offset = match_file( *master, rbuf, rsize, reference_filename );
465 if( offset < 0 ) { delete master; return 2; } // no match
466 // Reference data from offset must be at least as large as zeroed sector
467 // minus member trailer if trailer is inside the zeroed sector.
468 const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0;
469 if( rsize - offset < size - t )
470 { show_file_error( reference_filename, "Not enough reference data after match." );
471 delete master; return 2; }
472
473 const unsigned long long good_dsize = master->data_position();
474 const long long end = begin + size;
475 char level_str[8] = "-0"; // compression level or match length limit
476 char dict_str[16];
477 snprintf( dict_str, sizeof dict_str, "-s%u", dictionary_size );
478 const char * lzip0_argv[3] = { lzip_name, "-0", 0 };
479 const char * lzip_argv[4] = { lzip_name, level_str, dict_str, 0 };
480 if( lzip_level >= 0 )
481 for( unsigned char level = '0'; level <= '9'; ++level )
482 {
483 if( std::isdigit( lzip_level ) && level != lzip_level ) continue;
484 level_str[1] = level;
485 if( verbosity >= 1 && terminator )
486 {
487 std::printf( "Trying level %s %c", level_str, terminator );
488 std::fflush( stdout ); pending_newline = true;
489 }
490 const bool level0 = level == '0';
491 const bool auto0 = ( level0 && lzip_level != '0' );
492 int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
493 rbuf, rsize, offset, dictionary_size,
494 level0 ? lzip0_argv : lzip_argv, md5sump, terminator, auto0 );
495 if( ret >= 0 )
496 { delete master; munmap( (void *)rbuf, rsize ); return ret; }
497 }
498 if( lzip_level <= 0 )
499 {
500 for( int len = min_match_len_limit; len <= max_match_len; ++len )
501 {
502 if( lzip_level < -1 && -lzip_level != len ) continue;
503 snprintf( level_str, sizeof level_str, "-m%u", len );
504 if( verbosity >= 1 && terminator )
505 {
506 std::printf( "Trying match length limit %d %c", len, terminator );
507 std::fflush( stdout ); pending_newline = true;
508 }
509 int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
510 rbuf, rsize, offset, dictionary_size,
511 lzip_argv, md5sump, terminator );
512 if( ret >= 0 )
513 { delete master; munmap( (void *)rbuf, rsize ); return ret; }
514 }
515 }
516 delete master;
517 munmap( (void *)rbuf, rsize );
518 return 2;
519 }
520
521 } // end namespace
522
523
reproduce_file(const std::string & input_filename,const std::string & default_output_filename,const char * const lzip_name,const char * const reference_filename,const int lzip_level,const char terminator,const bool force)524 int reproduce_file( const std::string & input_filename,
525 const std::string & default_output_filename,
526 const char * const lzip_name,
527 const char * const reference_filename,
528 const int lzip_level, const char terminator,
529 const bool force )
530 {
531 struct stat in_stats;
532 const int infd =
533 open_instream( input_filename.c_str(), &in_stats, false, true );
534 if( infd < 0 ) return 1;
535
536 const Lzip_index lzip_index( infd, true, true, true );
537 if( lzip_index.retval() != 0 )
538 { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
539 return lzip_index.retval(); }
540
541 output_filename = default_output_filename.empty() ?
542 insert_fixed( input_filename ) : default_output_filename;
543 if( !force && file_exists( output_filename ) ) return 1;
544 outfd = -1;
545 int errors = 0;
546 const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
547 for( long i = 0; i < lzip_index.members(); ++i )
548 {
549 const long long dsize = lzip_index.dblock( i ).size();
550 const long long mpos = lzip_index.mblock( i ).pos();
551 const long long msize = lzip_index.mblock( i ).size();
552 if( verbosity >= 1 && lzip_index.members() > 1 )
553 {
554 std::printf( "Testing member %ld of %ld %c",
555 i + 1, lzip_index.members(), terminator );
556 std::fflush( stdout ); pending_newline = true;
557 }
558 if( !safe_seek( infd, mpos ) ) return 1;
559 long long failure_pos = 0;
560 if( test_member_from_file( infd, msize, &failure_pos ) == 0 )
561 continue; // member is not damaged
562 print_pending_newline( terminator );
563 if( ++errors > 1 ) break; // only one member can be reproduced
564 if( failure_pos < Lzip_header::size ) // End Of File
565 { show_file_error( input_filename.c_str(), "Unexpected end of file." );
566 return 2; }
567
568 // without mmap, 3 times more memory are required because of fork
569 const long mpos_rem = mpos % page_size;
570 uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
571 PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
572 if( mbuffer_base == MAP_FAILED )
573 { show_file_error( input_filename.c_str(), "Can't mmap", errno ); return 1; }
574 uint8_t * const mbuffer = mbuffer_base + mpos_rem;
575 long long size = 0;
576 uint8_t value = 0;
577 const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
578 msize, &size, &value );
579 if( begin < 0 ) return 2;
580 if( failure_pos < begin )
581 { show_file_error( input_filename.c_str(),
582 "Data error found before damaged area." ); return 2; }
583 if( verbosity >= 1 )
584 {
585 std::printf( "Reproducing bad area in member %ld of %ld\n"
586 " (begin = %lld, size = %lld, value = 0x%02X)\n",
587 i + 1, lzip_index.members(), begin, size, value );
588 std::fflush( stdout );
589 }
590 const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
591 reference_filename, begin, size, lzip_level, 0, terminator );
592 if( ret <= 0 ) print_pending_newline( terminator );
593 if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
594 if( ret == 0 )
595 {
596 if( outfd < 0 ) // first damaged member reproduced
597 {
598 if( !safe_seek( infd, 0 ) ) return 1;
599 set_signal_handler();
600 if( !open_outstream( true, true ) ) return 1;
601 if( !copy_file( infd, outfd ) ) // copy whole file
602 cleanup_and_fail( 1 );
603 }
604 if( seek_write( outfd, mbuffer + begin, size, mpos + begin ) != size )
605 { show_file_error( output_filename.c_str(), "Error writing file", errno );
606 cleanup_and_fail( 1 ); }
607 if( verbosity >= 1 )
608 std::fputs( "Member reproduced successfully.\n", stdout );
609 }
610 munmap( mbuffer_base, msize + mpos_rem );
611 if( ret > 0 )
612 {
613 if( final_msg )
614 { std::fputs( final_msg, stdout ); std::fflush( stdout ); }
615 show_file_error( input_filename.c_str(),
616 "Unable to reproduce member." ); return ret;
617 }
618 }
619
620 if( outfd < 0 )
621 {
622 if( verbosity >= 1 )
623 std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
624 return 0;
625 }
626 if( close_outstream( &in_stats ) != 0 ) return 1;
627 if( verbosity >= 0 )
628 {
629 if( errors > 1 )
630 std::fputs( "One member reproduced."
631 " Copy of input file still contains errors.\n", stdout );
632 else
633 std::fputs( "Copy of input file reproduced successfully.\n", stdout );
634 }
635 return 0;
636 }
637
638
639 /* Passes a 0 terminator to other functions to prevent intramember feedback.
640 Exits only in case of fatal error. (reference file too large, etc). */
debug_reproduce_file(const std::string & input_filename,const char * const lzip_name,const char * const reference_filename,const Block & range,const int sector_size,const int lzip_level)641 int debug_reproduce_file( const std::string & input_filename,
642 const char * const lzip_name,
643 const char * const reference_filename,
644 const Block & range, const int sector_size,
645 const int lzip_level )
646 {
647 struct stat in_stats; // not used
648 const int infd =
649 open_instream( input_filename.c_str(), &in_stats, false, true );
650 if( infd < 0 ) return 1;
651
652 const Lzip_index lzip_index( infd, true, true );
653 if( lzip_index.retval() != 0 )
654 { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
655 return lzip_index.retval(); }
656
657 const long long cdata_size = lzip_index.cdata_size();
658 if( range.pos() >= cdata_size )
659 { show_file_error( input_filename.c_str(),
660 "Range is beyond end of last member." ); return 1; }
661
662 const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
663 const long long positions_to_test =
664 ( ( std::min( range.end(), cdata_size ) - range.pos() ) +
665 sector_size - 9 ) / sector_size;
666 long positions = 0, successes = 0, failed_comparisons = 0;
667 long alternative_reproductions = 0;
668 const bool pct_enabled = cdata_size > sector_size &&
669 isatty( STDERR_FILENO ) && !isatty( STDOUT_FILENO );
670 for( long i = 0; i < lzip_index.members(); ++i )
671 {
672 const long long mpos = lzip_index.mblock( i ).pos();
673 const long long msize = lzip_index.mblock( i ).size();
674 if( !range.overlaps( mpos, msize ) ) continue;
675 const long long dsize = lzip_index.dblock( i ).size();
676 const unsigned dictionary_size = lzip_index.dictionary_size( i );
677
678 // md5sums of original not damaged member (compressed and decompressed)
679 uint8_t md5_digest_c[16], md5_digest_d[16];
680 bool md5_valid = false;
681 const long long rm_end = std::min( range.end(), mpos + msize );
682 for( long long sector_pos = std::max( range.pos(), mpos );
683 sector_pos + 8 <= rm_end; sector_pos += sector_size )
684 {
685 // without mmap, 3 times more memory are required because of fork
686 const long mpos_rem = mpos % page_size;
687 uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
688 PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
689 if( mbuffer_base == MAP_FAILED )
690 { show_file_error( input_filename.c_str(), "Can't mmap", errno );
691 return 1; }
692 uint8_t * const mbuffer = mbuffer_base + mpos_rem;
693 if( !md5_valid )
694 {
695 if( verbosity >= 0 ) // give a clue of the range being tested
696 { std::printf( "Reproducing: %s\nReference file: %s\nTesting "
697 "sectors of size %llu at file positions %llu to %llu\n",
698 input_filename.c_str(), reference_filename,
699 std::min( (long long)sector_size, rm_end - sector_pos ),
700 sector_pos, rm_end - 1 ); std::fflush( stdout ); }
701 md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c );
702 MD5SUM md5sum;
703 LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
704 if( mtester.test_member() != 0 || !mtester.finished() )
705 {
706 if( verbosity >= 0 )
707 { std::printf( "Member %ld of %ld already damaged (failure pos "
708 "= %llu)\n", i + 1, lzip_index.members(),
709 mpos + mtester.member_position() );
710 std::fflush( stdout ); }
711 munmap( mbuffer_base, msize + mpos_rem ); break;
712 }
713 md5sum.md5_finish( md5_digest_d );
714 }
715 ++positions;
716 const int sector_sz =
717 std::min( rm_end - sector_pos, (long long)sector_size );
718 // set mbuffer[sector] to 0
719 std::memset( mbuffer + ( sector_pos - mpos ), 0, sector_sz );
720 long long size = 0;
721 uint8_t value = 0;
722 const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
723 msize, &size, &value );
724 if( begin < 0 ) return 2;
725 MD5SUM md5sum;
726 const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
727 reference_filename, begin, size, lzip_level, &md5sum, 0 );
728 if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
729 if( ret == 0 )
730 {
731 ++successes;
732 uint8_t new_digest[16];
733 md5sum.md5_finish( new_digest );
734 if( std::memcmp( md5_digest_d, new_digest, 16 ) != 0 )
735 {
736 ++failed_comparisons;
737 if( verbosity >= 0 )
738 std::printf( "Comparison failed at pos %llu\n", sector_pos );
739 }
740 else if( !check_md5( mbuffer, msize, md5_digest_c ) )
741 {
742 ++alternative_reproductions;
743 if( verbosity >= 0 )
744 std::printf( "Alternative reproduction at pos %llu\n", sector_pos );
745 }
746 else if( verbosity >= 0 )
747 std::printf( "Reproduction succeeded at pos %llu\n", sector_pos );
748 }
749 else if( verbosity >= 0 ) // ret > 0
750 std::printf( "Unable to reproduce at pos %llu\n", sector_pos );
751 if( verbosity >= 0 )
752 {
753 std::fflush( stdout ); // flush result line
754 if( pct_enabled ) // show feedback
755 std::fprintf( stderr, "\r%ld sectors %ld successes %ld failcomp "
756 "%ld altrep %3u%% done\r", positions, successes,
757 failed_comparisons, alternative_reproductions,
758 (unsigned)( ( positions * 100.0 ) / positions_to_test ) );
759 }
760 munmap( mbuffer_base, msize + mpos_rem );
761 if( fatal_retval ) goto done;
762 }
763 }
764 done:
765 if( verbosity >= 0 )
766 {
767 std::printf( "\n%8ld sectors tested"
768 "\n%8ld reproductions returned with zero status",
769 positions, successes );
770 if( successes > 0 )
771 {
772 if( failed_comparisons > 0 )
773 std::printf( ", of which\n%8ld comparisons failed\n",
774 failed_comparisons );
775 else std::fputs( "\n all comparisons passed\n", stdout );
776 if( alternative_reproductions > 0 )
777 std::printf( "%8ld alternative reproductions found\n",
778 alternative_reproductions );
779 }
780 else std::fputc( '\n', stdout );
781 if( fatal_retval )
782 std::fputs( "Exiting because of a fatal error\n", stdout );
783 }
784 return fatal_retval;
785 }
786