1 /* Lziprecover - Data recovery tool for the lzip format
2    Copyright (C) 2009-2021 Antonio Diaz Diaz.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 
18 #define _FILE_OFFSET_BITS 64
19 
20 #include <algorithm>
21 #include <cerrno>
22 #include <climits>
23 #include <csignal>
24 #include <cstdio>
25 #include <cstring>
26 #include <string>
27 #include <vector>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <sys/mman.h>
31 #include <sys/stat.h>
32 #include <sys/wait.h>
33 
34 #include "lzip.h"
35 #include "md5.h"
36 #include "mtester.h"
37 #include "lzip_index.h"
38 
39 
40 namespace {
41 
42 const char * final_msg = 0;
43 
44 bool pending_newline = false;
45 
print_pending_newline(const char terminator)46 void print_pending_newline( const char terminator )
47   { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
48     pending_newline = false; }
49 
50 int fatal_retval = 0;
51 
fatal(const int retval)52 int fatal( const int retval )
53   { if( fatal_retval == 0 ) fatal_retval = retval; return retval; }
54 
55 // Returns the position of the damaged area in the member, or -1 if error.
zeroed_sector_pos(const char * const input_filename,const uint8_t * const mbuffer,const long long msize,long long * const sizep,uint8_t * const valuep)56 long long zeroed_sector_pos( const char * const input_filename,
57                         const uint8_t * const mbuffer, const long long msize,
58                         long long * const sizep, uint8_t * const valuep )
59   {
60   enum { minlen = 8 };		// min number of consecutive identical bytes
61   long long i = Lzip_header::size;
62   const long long end = msize - minlen;
63   long long begin = -1;
64   long long size = 0;
65   uint8_t value = 0;
66   while( i < end )	// leave i pointing to the first differing byte
67     {
68     const uint8_t byte = mbuffer[i++];
69     if( mbuffer[i] == byte )
70       {
71       const long long pos = i - 1;
72       ++i;
73       while( i < msize && mbuffer[i] == byte ) ++i;
74       if( i - pos >= minlen )
75         {
76         if( size > 0 )
77           { show_file_error( input_filename,
78                              "Member contains more than one damaged area." );
79             return -1; }
80         begin = pos;
81         size = i - pos;
82         value = byte;
83         break;
84         }
85       }
86     }
87   if( begin < 0 || size <= 0 )
88     { show_file_error( input_filename, "Can't locate damaged area." );
89       return -1; }
90   *sizep = size;
91   *valuep = value;
92   return begin;
93   }
94 
95 
prepare_master2(const uint8_t * const mbuffer,const long long msize,const long long begin,const unsigned dictionary_size)96 const LZ_mtester * prepare_master2( const uint8_t * const mbuffer,
97                                     const long long msize,
98                                     const long long begin,
99                                     const unsigned dictionary_size )
100   {
101   long long pos_limit = std::max( begin - 16, (long long)Lzip_header::size );
102   LZ_mtester * master = new LZ_mtester( mbuffer, msize, dictionary_size );
103   if( master->test_member( pos_limit ) != -1 ||
104       master->member_position() > (unsigned long long)begin )
105     { delete master; return 0; }
106   // decompress as much data as possible without surpassing begin
107   while( pos_limit < begin && master->test_member( pos_limit + 1 ) == -1 &&
108          master->member_position() <= (unsigned long long)begin )
109     ++pos_limit;
110   delete master;
111   master = new LZ_mtester( mbuffer, msize, dictionary_size );
112   if( master->test_member( pos_limit ) == -1 &&
113       master->member_position() <= (unsigned long long)begin ) return master;
114   delete master;
115   return 0;
116   }
117 
118 
119 /* Locate in the reference file (rbuf) the truncated data in the dictionary.
120    The reference file must match from the last byte decoded back to the
121    beginning of the file or to the beginning of the dictionary.
122    Choose the match nearest to the beginning of the file.
123    As a fallback, locate the longest partial match at least 512 bytes long.
124    Returns the offset in file of the first undecoded byte, or -1 if no match. */
match_file(const LZ_mtester & master,const uint8_t * const rbuf,const long long rsize,const char * const reference_filename)125 long long match_file( const LZ_mtester & master, const uint8_t * const rbuf,
126                       const long long rsize,
127                       const char * const reference_filename )
128   {
129   const uint8_t * prev_buffer;
130   int dec_size, prev_size;
131   const uint8_t * const dec_buffer =
132     master.get_buffers( &prev_buffer, &dec_size, &prev_size );
133   if( dec_size < 4 )
134     { if( verbosity >= 1 )
135         { std::printf( "'%s' can't match: not enough data in dictionary.\n",
136                        reference_filename ); pending_newline = false; }
137       return -1; }
138   long long offset = -1;	// offset in file of the first undecoded byte
139   bool multiple = false;
140   const uint8_t last_byte = dec_buffer[dec_size-1];
141   for( long long i = rsize - 1; i >= 3; --i )	// match at least 4 bytes at bof
142     if( rbuf[i] == last_byte )
143       {
144       // compare file with the two parts of the dictionary
145       int len = std::min( (long long)dec_size - 1, i );
146       if( std::memcmp( rbuf + i - len, dec_buffer + dec_size - 1 - len, len ) == 0 )
147         {
148         int len2 = std::min( (long long)prev_size, i - len );
149         if( len2 <= 0 || !prev_buffer ||
150             std::memcmp( rbuf + i - len - len2,
151                          prev_buffer + prev_size - len2, len2 ) == 0 )
152           {
153           if( offset >= 0 ) multiple = true;
154           offset = i + 1;
155           i -= len + len2;
156           }
157         }
158       }
159   if( offset >= 0 )
160     {
161     if( multiple && verbosity >= 1 )
162       { std::printf( "warning: %s: Multiple matches. Using match at offset %lld\n",
163                      reference_filename, offset ); std::fflush( stdout ); }
164     if( !multiple && verbosity >= 2 )
165       { std::printf( "%s: Match found at offset %lld\n",
166                      reference_filename, offset ); std::fflush( stdout ); }
167     return offset;
168     }
169   int maxlen = 0;		// choose longest match in reference file
170   for( long long i = rsize - 1; i >= 0; --i )
171     if( rbuf[i] == last_byte )
172       {
173       // compare file with the two parts of the dictionary
174       const int size1 = std::min( (long long)dec_size, i + 1 );
175       int len = 1;
176       while( len < size1 && rbuf[i-len] == dec_buffer[dec_size-len-1] ) ++len;
177       if( len == size1 )
178         {
179         int size2 = std::min( (long long)prev_size, i + 1 - size1 );
180         while( len < size1 + size2 &&
181                rbuf[i-len] == prev_buffer[prev_size+size1-len] ) ++len;
182         }
183       if( len > maxlen ) { maxlen = len; offset = i + 1; i -= len; }
184       }
185   if( maxlen >= 512 && offset >= 0 )
186     {
187     if( verbosity >= 1 )
188       { std::printf( "warning: %s: Partial match found at offset %lld, len %d."
189                      " Reference data may be mixed with other data.\n",
190                      reference_filename, offset, maxlen );
191         std::fflush( stdout ); }
192     return offset;
193     }
194   if( verbosity >= 1 )
195     { std::printf( "'%s' does not match with decoded data.\n",
196                    reference_filename ); pending_newline = false; }
197   return -1;
198   }
199 
200 
show_close_error(const char * const prog_name="data feeder")201 void show_close_error( const char * const prog_name = "data feeder" )
202   {
203   if( verbosity >= 0 )
204     std::fprintf( stderr, "%s: Error closing output of %s: %s\n",
205                   program_name, prog_name, std::strerror( errno ) );
206   }
207 
208 
show_exec_error(const char * const prog_name)209 void show_exec_error( const char * const prog_name )
210   {
211   if( verbosity >= 0 )
212     std::fprintf( stderr, "%s: Can't exec '%s': %s\n",
213                   program_name, prog_name, std::strerror( errno ) );
214   }
215 
216 
show_fork_error(const char * const prog_name)217 void show_fork_error( const char * const prog_name )
218   {
219   if( verbosity >= 0 )
220     std::fprintf( stderr, "%s: Can't fork '%s': %s\n",
221                   program_name, prog_name, std::strerror( errno ) );
222   }
223 
224 
225 /* Returns -1 if child not terminated, 1 in case of error, or exit status of
226    child process 'pid'. */
child_status(const pid_t pid,const char * const name)227 int child_status( const pid_t pid, const char * const name )
228   {
229   int status;
230   while( true )
231     {
232     const int tmp = waitpid( pid, &status, WNOHANG );
233     if( tmp == -1 && errno != EINTR )
234       {
235       if( verbosity >= 0 )
236         std::fprintf( stderr, "%s: Error checking status of '%s': %s\n",
237                       program_name, name, std::strerror( errno ) );
238       return 1;
239       }
240     if( tmp == 0 ) return -1;			// child not terminated
241     if( tmp == pid ) break;			// child terminated
242     }
243   if( WIFEXITED( status ) ) return WEXITSTATUS( status );
244   return 1;
245   }
246 
247 
248 // Returns exit status of child process 'pid', or 1 in case of error.
249 //
wait_for_child(const pid_t pid,const char * const name)250 int wait_for_child( const pid_t pid, const char * const name )
251   {
252   int status;
253   while( waitpid( pid, &status, 0 ) == -1 )
254     {
255     if( errno != EINTR )
256       {
257       if( verbosity >= 0 )
258         std::fprintf( stderr, "%s: Error waiting termination of '%s': %s\n",
259                       program_name, name, std::strerror( errno ) );
260       return 1;
261       }
262     }
263   if( WIFEXITED( status ) ) return WEXITSTATUS( status );
264   return 1;
265   }
266 
267 
good_status(const pid_t pid,const char * const name,const bool finished)268 bool good_status( const pid_t pid, const char * const name, const bool finished )
269   {
270   bool error = false;
271   if( pid )
272     {
273     if( !finished )
274       {
275       const int tmp = child_status( pid, name );
276       if( tmp < 0 )				// child not terminated
277         { kill( pid, SIGTERM ); wait_for_child( pid, name ); }
278       else if( tmp != 0 ) error = true;		// child status != 0
279       }
280     else
281       if( wait_for_child( pid, name ) != 0 ) error = true;
282     if( error )
283       {
284       if( verbosity >= 0 )
285         std::fprintf( stderr, "%s: %s: Child terminated with error status.\n",
286                       program_name, name );
287       return false;
288       }
289     }
290   return !error;
291   }
292 
293 
294 /* Feed to lzip through 'ofd' the data decompressed up to 'good_dsize'
295    (master->data_position) followed by the reference data from byte at
296    offset 'offset' of reference file, up to a total of 'dsize' bytes. */
feed_data(uint8_t * const mbuffer,const long long msize,const long long dsize,const unsigned long long good_dsize,const uint8_t * const rbuf,const long long rsize,const long long offset,const unsigned dictionary_size,const int ofd)297 bool feed_data( uint8_t * const mbuffer, const long long msize,
298                 const long long dsize, const unsigned long long good_dsize,
299                 const uint8_t * const rbuf, const long long rsize,
300                 const long long offset, const unsigned dictionary_size,
301                 const int ofd )
302   {
303   LZ_mtester mtester( mbuffer, msize, dictionary_size, ofd );
304   if( mtester.test_member( LLONG_MAX, good_dsize ) != -1 ||
305       good_dsize != mtester.data_position() )
306     { show_error( "Error decompressing prefix data for compressor." );
307       return false; }
308   // limit reference data to remaining decompressed data in member
309   const long long end =
310     std::min( (unsigned long long)rsize, dsize - good_dsize + offset );
311   for( long long i = offset; i < end; )
312     {
313     const int size = std::min( end - i, 65536LL );
314     if( writeblock( ofd, rbuf + i, size ) != size )
315       { show_error( "Error writing reference data to compressor", errno );
316         return false; }
317     i += size;
318     }
319   return true;
320   }
321 
322 
323 /* Try to reproduce the zeroed sector.
324    Return value: -1 = failure, 0 = success, > 0 = fatal error. */
try_reproduce(uint8_t * const mbuffer,const long long msize,const long long dsize,const unsigned long long good_dsize,const long long begin,const long long end,const uint8_t * const rbuf,const long long rsize,const long long offset,const unsigned dictionary_size,const char ** const lzip_argv,MD5SUM * const md5sump,const char terminator,const bool auto0=false)325 int try_reproduce( uint8_t * const mbuffer, const long long msize,
326                    const long long dsize, const unsigned long long good_dsize,
327                    const long long begin, const long long end,
328                    const uint8_t * const rbuf, const long long rsize,
329                    const long long offset, const unsigned dictionary_size,
330                    const char ** const lzip_argv, MD5SUM * const md5sump,
331                    const char terminator, const bool auto0 = false )
332   {
333   int fda[2];				// pipe to compressor
334   int fda2[2];				// pipe from compressor
335   if( pipe( fda ) < 0 || pipe( fda2 ) < 0 )
336     { show_error( "Can't create pipe", errno ); return fatal( 1 ); }
337   const pid_t pid = fork();
338   if( pid == 0 )			// child 1 (compressor feeder)
339     {
340     if( close( fda[0] ) != 0 ||
341         close( fda2[0] ) != 0 || close( fda2[1] ) != 0 ||
342         !feed_data( mbuffer, msize, dsize, good_dsize, rbuf, rsize, offset,
343                     dictionary_size, fda[1] ) )
344       { close( fda[1] ); _exit( 2 ); }
345     if( close( fda[1] ) != 0 )
346       { show_close_error(); _exit( 2 ); }
347     _exit( 0 );
348     }
349   if( pid < 0 )			// parent
350     { show_fork_error( "data feeder" ); return fatal( 1 ); }
351 
352   const pid_t pid2 = fork();
353   if( pid2 == 0 )			// child 2 (compressor)
354     {
355     if( dup2( fda[0], STDIN_FILENO ) >= 0 &&
356         dup2( fda2[1], STDOUT_FILENO ) >= 0 &&
357         close( fda[0] ) == 0 && close( fda[1] ) == 0 &&
358         close( fda2[0] ) == 0 && close( fda2[1] ) == 0 )
359       execvp( lzip_argv[0], (char **)lzip_argv );
360     show_exec_error( lzip_argv[0] );
361     _exit( 2 );
362     }
363   if( pid2 < 0 )			// parent
364     { show_fork_error( lzip_argv[0] ); return fatal( 1 ); }
365 
366   close( fda[0] ); close( fda[1] ); close( fda2[1] );
367   const long long xend = std::min( end + 4, msize );
368   int retval = 0;				// -1 = mismatch
369   bool first_post = true;
370   bool same_ds = true;				// reproduced DS == header DS
371   bool tail_mismatch = false;			// mismatch after end
372   for( long long i = 0; i < xend; )
373     {
374     enum { buffer_size = 16384 };		// 65536 makes it slower
375     uint8_t buffer[buffer_size];
376     if( verbosity >= 2 && i >= 65536 && terminator )
377       {
378       if( first_post )
379         { first_post = false; print_pending_newline( terminator ); }
380       std::printf( "  Reproducing position %lld %c", i, terminator );
381       std::fflush( stdout ); pending_newline = true;
382       }
383     const int rd = readblock( fda2[0], buffer, buffer_size );
384     // not enough reference data to fill zeroed sector at this level
385     if( rd <= 0 ) { if( i < end ) retval = -1; break; }
386     int j = 0;
387     /* Compare reproduced bytes with data in mbuffer.
388        Do not fail because of a mismatch beyond the end of the zeroed sector
389        to prevent the reproduction from failing because of the reference file
390        just covering the zeroed sector. */
391     for( ; j < rd && i < begin; ++j, ++i )
392       if( mbuffer[i] != buffer[j] )			// mismatch
393         {
394         if( i != 5 ) { retval = -1; goto done; }	// ignore different DS
395         const Lzip_header * header = (const Lzip_header *)buffer;
396         if( header->dictionary_size() != dictionary_size ) same_ds = false;
397         }
398     // copy reproduced bytes into zeroed sector of mbuffer
399     for( ; j < rd && i < end; ++j, ++i ) mbuffer[i] = buffer[j];
400     for( ; j < rd && i < xend; ++j, ++i )
401       if( mbuffer[i] != buffer[j] ) { tail_mismatch = true; goto done; }
402     }
403 done:
404   if( !first_post && terminator ) print_pending_newline( terminator );
405   if( close( fda2[0] ) != 0 ) { show_close_error( "compressor" ); retval = 1; }
406   if( !good_status( pid, "data feeder", false ) ||
407       !good_status( pid2, lzip_argv[0], false ) ) retval = auto0 ? -1 : 1;
408   if( !retval )		// test whole member after reproduction
409     {
410     if( md5sump ) md5sump->reset();
411     LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, md5sump );
412     if( mtester.test_member() != 0 || !mtester.finished() )
413       {
414       if( verbosity >= 2 && same_ds && begin >= 4096 && terminator )
415         {
416         if( !tail_mismatch )
417           final_msg = "  Zeroed sector reproduced, but CRC does not match."
418                       " (Multiple damages in file?).\n";
419         else if( !final_msg )
420           final_msg = "  Zeroed sector reproduced, but data after it does not"
421                       " match. (Maybe wrong reference data or lzip version).\n";
422         }
423       retval = -1;		// incorrect reproduction of zeroed sector
424       }
425     }
426   return retval;
427   }
428 
429 
430 // Return value: -1 = master failed, 0 = success, > 0 = failure
reproduce_member(uint8_t * const mbuffer,const long long msize,const long long dsize,const char * const lzip_name,const char * const reference_filename,const long long begin,const long long size,const int lzip_level,MD5SUM * const md5sump,const char terminator)431 int reproduce_member( uint8_t * const mbuffer, const long long msize,
432                       const long long dsize, const char * const lzip_name,
433                       const char * const reference_filename,
434                       const long long begin, const long long size,
435                       const int lzip_level, MD5SUM * const md5sump,
436                       const char terminator )
437   {
438   struct stat st;
439   const int rfd = open_instream( reference_filename, &st, false, true );
440   if( rfd < 0 ) return fatal( 1 );
441   if( st.st_size > LLONG_MAX )
442     { show_file_error( reference_filename, "File too large." ); close( rfd );
443       return fatal( 2 ); }
444   const long long rsize = st.st_size;
445   const uint8_t * const rbuf =
446     (const uint8_t *)mmap( 0, rsize, PROT_READ, MAP_PRIVATE, rfd, 0 );
447   close( rfd );
448   if( rbuf == MAP_FAILED )
449     { show_file_error( reference_filename, "Can't mmap", errno );
450       return fatal( 1 ); }
451 
452   const Lzip_header & header = *(const Lzip_header *)mbuffer;
453   const unsigned dictionary_size = header.dictionary_size();
454   const LZ_mtester * const master =
455     prepare_master2( mbuffer, msize, begin, dictionary_size );
456   if( !master ) return -1;
457   if( verbosity >= 2 )
458     {
459     std::printf( "  (master mpos = %llu, dpos = %llu)\n",
460                  master->member_position(), master->data_position() );
461     std::fflush( stdout );
462     }
463 
464   const long long offset = match_file( *master, rbuf, rsize, reference_filename );
465   if( offset < 0 ) { delete master; return 2; }		// no match
466   // Reference data from offset must be at least as large as zeroed sector
467   // minus member trailer if trailer is inside the zeroed sector.
468   const int t = ( begin + size >= msize ) ? 16 + Lzip_trailer::size : 0;
469   if( rsize - offset < size - t )
470     { show_file_error( reference_filename, "Not enough reference data after match." );
471       delete master; return 2; }
472 
473   const unsigned long long good_dsize = master->data_position();
474   const long long end = begin + size;
475   char level_str[8] = "-0";	// compression level or match length limit
476   char dict_str[16];
477   snprintf( dict_str, sizeof dict_str, "-s%u", dictionary_size );
478   const char * lzip0_argv[3] = { lzip_name, "-0", 0 };
479   const char * lzip_argv[4] = { lzip_name, level_str, dict_str, 0 };
480   if( lzip_level >= 0 )
481     for( unsigned char level = '0'; level <= '9'; ++level )
482       {
483       if( std::isdigit( lzip_level ) && level != lzip_level ) continue;
484       level_str[1] = level;
485       if( verbosity >= 1 && terminator )
486         {
487         std::printf( "Trying level %s %c", level_str, terminator );
488         std::fflush( stdout ); pending_newline = true;
489         }
490       const bool level0 = level == '0';
491       const bool auto0 = ( level0 && lzip_level != '0' );
492       int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
493                                rbuf, rsize, offset, dictionary_size,
494                 level0 ? lzip0_argv : lzip_argv, md5sump, terminator, auto0 );
495       if( ret >= 0 )
496         { delete master; munmap( (void *)rbuf, rsize ); return ret; }
497       }
498   if( lzip_level <= 0 )
499     {
500     for( int len = min_match_len_limit; len <= max_match_len; ++len )
501       {
502       if( lzip_level < -1 && -lzip_level != len ) continue;
503       snprintf( level_str, sizeof level_str, "-m%u", len );
504       if( verbosity >= 1 && terminator )
505         {
506         std::printf( "Trying match length limit %d %c", len, terminator );
507         std::fflush( stdout ); pending_newline = true;
508         }
509       int ret = try_reproduce( mbuffer, msize, dsize, good_dsize, begin, end,
510                                rbuf, rsize, offset, dictionary_size,
511                                lzip_argv, md5sump, terminator );
512       if( ret >= 0 )
513         { delete master; munmap( (void *)rbuf, rsize ); return ret; }
514       }
515     }
516   delete master;
517   munmap( (void *)rbuf, rsize );
518   return 2;
519   }
520 
521 } // end namespace
522 
523 
reproduce_file(const std::string & input_filename,const std::string & default_output_filename,const char * const lzip_name,const char * const reference_filename,const int lzip_level,const char terminator,const bool force)524 int reproduce_file( const std::string & input_filename,
525                     const std::string & default_output_filename,
526                     const char * const lzip_name,
527                     const char * const reference_filename,
528                     const int lzip_level, const char terminator,
529                     const bool force )
530   {
531   struct stat in_stats;
532   const int infd =
533     open_instream( input_filename.c_str(), &in_stats, false, true );
534   if( infd < 0 ) return 1;
535 
536   const Lzip_index lzip_index( infd, true, true, true );
537   if( lzip_index.retval() != 0 )
538     { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
539       return lzip_index.retval(); }
540 
541   output_filename = default_output_filename.empty() ?
542                     insert_fixed( input_filename ) : default_output_filename;
543   if( !force && file_exists( output_filename ) ) return 1;
544   outfd = -1;
545   int errors = 0;
546   const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
547   for( long i = 0; i < lzip_index.members(); ++i )
548     {
549     const long long dsize = lzip_index.dblock( i ).size();
550     const long long mpos = lzip_index.mblock( i ).pos();
551     const long long msize = lzip_index.mblock( i ).size();
552     if( verbosity >= 1 && lzip_index.members() > 1 )
553       {
554       std::printf( "Testing member %ld of %ld %c",
555                    i + 1, lzip_index.members(), terminator );
556       std::fflush( stdout ); pending_newline = true;
557       }
558     if( !safe_seek( infd, mpos ) ) return 1;
559     long long failure_pos = 0;
560     if( test_member_from_file( infd, msize, &failure_pos ) == 0 )
561       continue;				// member is not damaged
562     print_pending_newline( terminator );
563     if( ++errors > 1 ) break;	// only one member can be reproduced
564     if( failure_pos < Lzip_header::size )		// End Of File
565       { show_file_error( input_filename.c_str(), "Unexpected end of file." );
566         return 2; }
567 
568     // without mmap, 3 times more memory are required because of fork
569     const long mpos_rem = mpos % page_size;
570     uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
571               PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
572     if( mbuffer_base == MAP_FAILED )
573       { show_file_error( input_filename.c_str(), "Can't mmap", errno ); return 1; }
574     uint8_t * const mbuffer = mbuffer_base + mpos_rem;
575     long long size = 0;
576     uint8_t value = 0;
577     const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
578                                                msize, &size, &value );
579     if( begin < 0 ) return 2;
580     if( failure_pos < begin )
581       { show_file_error( input_filename.c_str(),
582                          "Data error found before damaged area." ); return 2; }
583     if( verbosity >= 1 )
584       {
585       std::printf( "Reproducing bad area in member %ld of %ld\n"
586                    "  (begin = %lld, size = %lld, value = 0x%02X)\n",
587                    i + 1, lzip_index.members(), begin, size, value );
588       std::fflush( stdout );
589       }
590     const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
591                   reference_filename, begin, size, lzip_level, 0, terminator );
592     if( ret <= 0 ) print_pending_newline( terminator );
593     if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
594     if( ret == 0 )
595       {
596       if( outfd < 0 )			// first damaged member reproduced
597         {
598         if( !safe_seek( infd, 0 ) ) return 1;
599         set_signal_handler();
600         if( !open_outstream( true, true ) ) return 1;
601         if( !copy_file( infd, outfd ) )		// copy whole file
602           cleanup_and_fail( 1 );
603         }
604       if( seek_write( outfd, mbuffer + begin, size, mpos + begin ) != size )
605         { show_file_error( output_filename.c_str(), "Error writing file", errno );
606           cleanup_and_fail( 1 ); }
607       if( verbosity >= 1 )
608         std::fputs( "Member reproduced successfully.\n", stdout );
609       }
610     munmap( mbuffer_base, msize + mpos_rem );
611     if( ret > 0 )
612       {
613       if( final_msg )
614         { std::fputs( final_msg, stdout ); std::fflush( stdout ); }
615       show_file_error( input_filename.c_str(),
616                        "Unable to reproduce member." ); return ret;
617       }
618     }
619 
620   if( outfd < 0 )
621     {
622     if( verbosity >= 1 )
623       std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
624     return 0;
625     }
626   if( close_outstream( &in_stats ) != 0 ) return 1;
627   if( verbosity >= 0 )
628     {
629     if( errors > 1 )
630       std::fputs( "One member reproduced."
631                   " Copy of input file still contains errors.\n", stdout );
632     else
633       std::fputs( "Copy of input file reproduced successfully.\n", stdout );
634     }
635   return 0;
636   }
637 
638 
639 /* Passes a 0 terminator to other functions to prevent intramember feedback.
640    Exits only in case of fatal error. (reference file too large, etc). */
debug_reproduce_file(const std::string & input_filename,const char * const lzip_name,const char * const reference_filename,const Block & range,const int sector_size,const int lzip_level)641 int debug_reproduce_file( const std::string & input_filename,
642                           const char * const lzip_name,
643                           const char * const reference_filename,
644                           const Block & range, const int sector_size,
645                           const int lzip_level )
646   {
647   struct stat in_stats;				// not used
648   const int infd =
649     open_instream( input_filename.c_str(), &in_stats, false, true );
650   if( infd < 0 ) return 1;
651 
652   const Lzip_index lzip_index( infd, true, true );
653   if( lzip_index.retval() != 0 )
654     { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
655       return lzip_index.retval(); }
656 
657   const long long cdata_size = lzip_index.cdata_size();
658   if( range.pos() >= cdata_size )
659     { show_file_error( input_filename.c_str(),
660                        "Range is beyond end of last member." ); return 1; }
661 
662   const long page_size = std::max( 1L, sysconf( _SC_PAGESIZE ) );
663   const long long positions_to_test =
664     ( ( std::min( range.end(), cdata_size ) - range.pos() ) +
665       sector_size - 9 ) / sector_size;
666   long positions = 0, successes = 0, failed_comparisons = 0;
667   long alternative_reproductions = 0;
668   const bool pct_enabled = cdata_size > sector_size &&
669                            isatty( STDERR_FILENO ) && !isatty( STDOUT_FILENO );
670   for( long i = 0; i < lzip_index.members(); ++i )
671     {
672     const long long mpos = lzip_index.mblock( i ).pos();
673     const long long msize = lzip_index.mblock( i ).size();
674     if( !range.overlaps( mpos, msize ) ) continue;
675     const long long dsize = lzip_index.dblock( i ).size();
676     const unsigned dictionary_size = lzip_index.dictionary_size( i );
677 
678     // md5sums of original not damaged member (compressed and decompressed)
679     uint8_t md5_digest_c[16], md5_digest_d[16];
680     bool md5_valid = false;
681     const long long rm_end = std::min( range.end(), mpos + msize );
682     for( long long sector_pos = std::max( range.pos(), mpos );
683          sector_pos + 8 <= rm_end; sector_pos += sector_size )
684       {
685       // without mmap, 3 times more memory are required because of fork
686       const long mpos_rem = mpos % page_size;
687       uint8_t * const mbuffer_base = (uint8_t *)mmap( 0, msize + mpos_rem,
688                 PROT_READ | PROT_WRITE, MAP_PRIVATE, infd, mpos - mpos_rem );
689       if( mbuffer_base == MAP_FAILED )
690         { show_file_error( input_filename.c_str(), "Can't mmap", errno );
691           return 1; }
692       uint8_t * const mbuffer = mbuffer_base + mpos_rem;
693       if( !md5_valid )
694         {
695         if( verbosity >= 0 )	// give a clue of the range being tested
696           { std::printf( "Reproducing:    %s\nReference file: %s\nTesting "
697                          "sectors of size %llu at file positions %llu to %llu\n",
698                          input_filename.c_str(), reference_filename,
699                          std::min( (long long)sector_size, rm_end - sector_pos ),
700                          sector_pos, rm_end - 1 ); std::fflush( stdout ); }
701         md5_valid = true; compute_md5( mbuffer, msize, md5_digest_c );
702         MD5SUM md5sum;
703         LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
704         if( mtester.test_member() != 0 || !mtester.finished() )
705           {
706           if( verbosity >= 0 )
707             { std::printf( "Member %ld of %ld already damaged (failure pos "
708                            "= %llu)\n", i + 1, lzip_index.members(),
709                            mpos + mtester.member_position() );
710               std::fflush( stdout ); }
711           munmap( mbuffer_base, msize + mpos_rem ); break;
712           }
713         md5sum.md5_finish( md5_digest_d );
714         }
715       ++positions;
716       const int sector_sz =
717         std::min( rm_end - sector_pos, (long long)sector_size );
718       // set mbuffer[sector] to 0
719       std::memset( mbuffer + ( sector_pos - mpos ), 0, sector_sz );
720       long long size = 0;
721       uint8_t value = 0;
722       const long long begin = zeroed_sector_pos( input_filename.c_str(), mbuffer,
723                                                  msize, &size, &value );
724       if( begin < 0 ) return 2;
725       MD5SUM md5sum;
726       const int ret = reproduce_member( mbuffer, msize, dsize, lzip_name,
727                     reference_filename, begin, size, lzip_level, &md5sum, 0 );
728       if( ret < 0 ) { show_error( "Can't prepare master." ); return 1; }
729       if( ret == 0 )
730         {
731         ++successes;
732         uint8_t new_digest[16];
733         md5sum.md5_finish( new_digest );
734         if( std::memcmp( md5_digest_d, new_digest, 16 ) != 0 )
735           {
736           ++failed_comparisons;
737           if( verbosity >= 0 )
738             std::printf( "Comparison failed at pos %llu\n", sector_pos );
739           }
740         else if( !check_md5( mbuffer, msize, md5_digest_c ) )
741           {
742           ++alternative_reproductions;
743           if( verbosity >= 0 )
744             std::printf( "Alternative reproduction at pos %llu\n", sector_pos );
745           }
746         else if( verbosity >= 0 )
747           std::printf( "Reproduction succeeded at pos %llu\n", sector_pos );
748         }
749       else if( verbosity >= 0 )				// ret > 0
750         std::printf( "Unable to reproduce at pos %llu\n", sector_pos );
751       if( verbosity >= 0 )
752         {
753         std::fflush( stdout );				// flush result line
754         if( pct_enabled )				// show feedback
755           std::fprintf( stderr, "\r%ld sectors  %ld successes  %ld failcomp  "
756                         "%ld altrep  %3u%% done\r", positions, successes,
757                         failed_comparisons, alternative_reproductions,
758                         (unsigned)( ( positions * 100.0 ) / positions_to_test ) );
759         }
760       munmap( mbuffer_base, msize + mpos_rem );
761       if( fatal_retval ) goto done;
762       }
763     }
764 done:
765   if( verbosity >= 0 )
766     {
767     std::printf( "\n%8ld sectors tested"
768                  "\n%8ld reproductions returned with zero status",
769                  positions, successes );
770     if( successes > 0 )
771       {
772       if( failed_comparisons > 0 )
773         std::printf( ", of which\n%8ld comparisons failed\n",
774                      failed_comparisons );
775       else std::fputs( "\n         all comparisons passed\n", stdout );
776       if( alternative_reproductions > 0 )
777         std::printf( "%8ld alternative reproductions found\n",
778                      alternative_reproductions );
779       }
780     else std::fputc( '\n', stdout );
781     if( fatal_retval )
782       std::fputs( "Exiting because of a fatal error\n", stdout );
783     }
784   return fatal_retval;
785   }
786