1 /* Lziprecover - Data recovery tool for the lzip format
2    Copyright (C) 2009-2021 Antonio Diaz Diaz.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 
18 #define _FILE_OFFSET_BITS 64
19 
20 #include <algorithm>
21 #include <cerrno>
22 #include <climits>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <cstring>
26 #include <string>
27 #include <vector>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <sys/stat.h>
31 
32 #include "lzip.h"
33 #include "mtester.h"
34 #include "lzip_index.h"
35 
36 
37 namespace {
38 
39 bool pending_newline = false;
40 
print_pending_newline(const char terminator)41 void print_pending_newline( const char terminator )
42   { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
43     pending_newline = false; }
44 
45 
gross_damage(const long long msize,const uint8_t * const mbuffer)46 bool gross_damage( const long long msize, const uint8_t * const mbuffer )
47   {
48   enum { maxlen = 7 };		// max number of consecutive identical bytes
49   long i = Lzip_header::size;
50   const long end = msize - Lzip_trailer::size - maxlen;
51   while( i < end )
52     {
53     const uint8_t byte = mbuffer[i];
54     int len = 0;			// does not count the first byte
55     while( mbuffer[++i] == byte ) if( ++len >= maxlen ) return true;
56     }
57   return false;
58   }
59 
60 
61 // Return value: 0 = no change, 5 = repaired pos
repair_dictionary_size(const long long msize,uint8_t * const mbuffer)62 int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
63   {
64   const unsigned long long dictionary_size_9 = 1 << 25;	// dict size of opt -9
65   Lzip_header & header = *(Lzip_header *)mbuffer;
66   unsigned dictionary_size = header.dictionary_size();
67   const Lzip_trailer & trailer =
68     *(const Lzip_trailer *)( mbuffer + msize - Lzip_trailer::size );
69   const unsigned long long data_size = trailer.data_size();
70   const bool valid_ds = isvalid_ds( dictionary_size );
71   if( valid_ds && dictionary_size >= data_size ) return 0;	// can't be bad
72 
73   if( !valid_ds || dictionary_size < dictionary_size_9 )
74     {
75     dictionary_size = std::min( data_size, dictionary_size_9 );
76     if( dictionary_size < min_dictionary_size )
77       dictionary_size = min_dictionary_size;
78     LZ_mtester mtester( mbuffer, msize, dictionary_size );
79     const int result = mtester.test_member();
80     if( result == 0 )
81       { header.dictionary_size( dictionary_size ); return 5; }	// fix DS
82     if( result != 1 || mtester.max_distance() <= dictionary_size ||
83         mtester.max_distance() > max_dictionary_size ) return 0;
84     }
85   if( data_size > dictionary_size_9 )
86     {
87     dictionary_size =
88       std::min( data_size, (unsigned long long)max_dictionary_size );
89     LZ_mtester mtester( mbuffer, msize, dictionary_size );
90     if( mtester.test_member() == 0 )
91       { header.dictionary_size( dictionary_size ); return 5; }	// fix DS
92     }
93   return 0;
94   }
95 
96 
prepare_master(const uint8_t * const buffer,const long buffer_size,const unsigned long pos_limit,const unsigned dictionary_size)97 const LZ_mtester * prepare_master( const uint8_t * const buffer,
98                                    const long buffer_size,
99                                    const unsigned long pos_limit,
100                                    const unsigned dictionary_size )
101   {
102   LZ_mtester * const master =
103     new LZ_mtester( buffer, buffer_size, dictionary_size );
104   if( master->test_member( pos_limit ) == -1 ) return master;
105   delete master;
106   return 0;
107   }
108 
109 
test_member_rest(const LZ_mtester & master,long * const failure_posp=0)110 bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 )
111   {
112   LZ_mtester mtester( master );
113   mtester.duplicate_buffer();
114   if( mtester.test_member() == 0 && mtester.finished() ) return true;
115   if( failure_posp ) *failure_posp = mtester.member_position();
116   return false;
117   }
118 
119 
120 // Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos
repair_member(const long long mpos,const long long msize,uint8_t * const mbuffer,const long begin,const long end,const unsigned dictionary_size,const char terminator)121 long repair_member( const long long mpos, const long long msize,
122                     uint8_t * const mbuffer, const long begin, const long end,
123                     const unsigned dictionary_size, const char terminator )
124   {
125   for( long pos = end; pos >= begin && pos > end - 50000; )
126     {
127     const long min_pos = std::max( begin, pos - 100 );
128     const unsigned long pos_limit = std::max( min_pos - 16, 0L );
129     const LZ_mtester * master =
130       prepare_master( mbuffer, msize, pos_limit, dictionary_size );
131     if( !master ) return -1;
132     for( ; pos >= min_pos; --pos )
133       {
134       if( verbosity >= 2 )
135         {
136         std::printf( "  Trying position %llu %c", mpos + pos, terminator );
137         std::fflush( stdout ); pending_newline = true;
138         }
139       for( int j = 0; j < 255; ++j )
140         {
141         ++mbuffer[pos];
142         if( test_member_rest( *master ) ) { delete master; return pos; }
143         }
144       ++mbuffer[pos];
145       }
146     delete master;
147     }
148   return 0;
149   }
150 
151 } // end namespace
152 
153 
seek_write(const int fd,const uint8_t * const buf,const long long size,const long long pos)154 long long seek_write( const int fd, const uint8_t * const buf,
155                       const long long size, const long long pos )
156   {
157   if( lseek( fd, pos, SEEK_SET ) == pos )
158     return writeblock( fd, buf, size );
159   return 0;
160   }
161 
162 
read_member(const int infd,const long long mpos,const long long msize)163 uint8_t * read_member( const int infd, const long long mpos,
164                        const long long msize )
165   {
166   if( msize <= 0 || msize > LONG_MAX )
167     { show_error( "Member is larger than LONG_MAX." ); return 0; }
168   if( !safe_seek( infd, mpos ) ) return 0;
169   uint8_t * const buffer = new uint8_t[msize];
170 
171   if( readblock( infd, buffer, msize ) != msize )
172     { show_error( "Error reading input file", errno );
173       delete[] buffer; return 0; }
174   return buffer;
175   }
176 
177 
repair_file(const std::string & input_filename,const std::string & default_output_filename,const char terminator,const bool force)178 int repair_file( const std::string & input_filename,
179                  const std::string & default_output_filename,
180                  const char terminator, const bool force )
181   {
182   struct stat in_stats;
183   const int infd =
184     open_instream( input_filename.c_str(), &in_stats, false, true );
185   if( infd < 0 ) return 1;
186 
187   const Lzip_index lzip_index( infd, true, true, true );
188   if( lzip_index.retval() != 0 )
189     { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
190       return lzip_index.retval(); }
191 
192   output_filename = default_output_filename.empty() ?
193                     insert_fixed( input_filename ) : default_output_filename;
194   if( !force && file_exists( output_filename ) ) return 1;
195   outfd = -1;
196   for( long i = 0; i < lzip_index.members(); ++i )
197     {
198     const long long mpos = lzip_index.mblock( i ).pos();
199     const long long msize = lzip_index.mblock( i ).size();
200     if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 );
201     long long failure_pos = 0;
202     if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue;
203     if( failure_pos < Lzip_header::size )		// End Of File
204       { show_error( "Can't repair error in input file." );
205         cleanup_and_fail( 2 ); }
206     if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
207 
208     if( verbosity >= 2 )		// damaged member found
209       {
210       std::printf( "Repairing member %ld of %ld  (failure pos = %llu)\n",
211                    i + 1, lzip_index.members(), mpos + failure_pos );
212       std::fflush( stdout );
213       }
214     uint8_t * const mbuffer = read_member( infd, mpos, msize );
215     if( !mbuffer ) cleanup_and_fail( 1 );
216     const Lzip_header & header = *(const Lzip_header *)mbuffer;
217     const unsigned dictionary_size = header.dictionary_size();
218     long pos = 0;
219     if( !gross_damage( msize, mbuffer ) )
220       {
221       pos = repair_dictionary_size( msize, mbuffer );
222       if( pos == 0 )
223         pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
224                              Lzip_header::size + 6, dictionary_size, terminator );
225       if( pos == 0 )
226         pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
227                              failure_pos, dictionary_size, terminator );
228       print_pending_newline( terminator );
229       }
230     if( pos < 0 )
231       { show_error( "Can't prepare master." ); cleanup_and_fail( 1 ); }
232     if( pos > 0 )
233       {
234       if( outfd < 0 )		// first damaged member repaired
235         {
236         if( !safe_seek( infd, 0 ) ) return 1;
237         set_signal_handler();
238         if( !open_outstream( true, true ) ) return 1;
239         if( !copy_file( infd, outfd ) )		// copy whole file
240           cleanup_and_fail( 1 );
241         }
242       if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
243         { show_error( "Error writing output file", errno );
244           cleanup_and_fail( 1 ); }
245       }
246     delete[] mbuffer;
247     if( pos == 0 )
248       {
249       show_error( "Can't repair input file. Error is probably larger than 1 byte." );
250       cleanup_and_fail( 2 );
251       }
252     }
253 
254   if( outfd < 0 )
255     {
256     if( verbosity >= 1 )
257       std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
258     return 0;
259     }
260   if( close_outstream( &in_stats ) != 0 ) return 1;
261   if( verbosity >= 1 )
262     std::fputs( "Copy of input file repaired successfully.\n", stdout );
263   return 0;
264   }
265 
266 
debug_delay(const std::string & input_filename,Block range,const char terminator)267 int debug_delay( const std::string & input_filename, Block range,
268                  const char terminator )
269   {
270   struct stat in_stats;				// not used
271   const int infd =
272     open_instream( input_filename.c_str(), &in_stats, false, true );
273   if( infd < 0 ) return 1;
274 
275   const Lzip_index lzip_index( infd, true, true );
276   if( lzip_index.retval() != 0 )
277     { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
278       return lzip_index.retval(); }
279 
280   if( range.end() > lzip_index.cdata_size() )
281     range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) );
282   if( range.size() <= 0 )
283     { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
284 
285   for( long i = 0; i < lzip_index.members(); ++i )
286     {
287     const Block & mb = lzip_index.mblock( i );
288     if( !range.overlaps( mb ) ) continue;
289     const long long mpos = lzip_index.mblock( i ).pos();
290     const long long msize = lzip_index.mblock( i ).size();
291     const unsigned dictionary_size = lzip_index.dictionary_size( i );
292     if( verbosity >= 2 )
293       {
294       std::printf( "Finding max delay in member %ld of %ld  (mpos = %llu, msize = %llu)\n",
295                    i + 1, lzip_index.members(), mpos, msize );
296       std::fflush( stdout );
297       }
298     uint8_t * const mbuffer = read_member( infd, mpos, msize );
299     if( !mbuffer ) return 1;
300     long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL );
301     const long end = std::min( range.end() - mpos, msize );
302     long max_delay = 0;
303     while( pos < end )
304       {
305       const unsigned long pos_limit = std::max( pos - 16, 0L );
306       const LZ_mtester * master =
307         prepare_master( mbuffer, msize, pos_limit, dictionary_size );
308       if( !master )
309         { show_error( "Can't prepare master." ); return 1; }
310       const long partial_end = std::min( pos + 100, end );
311       for( ; pos < partial_end; ++pos )
312         {
313         if( verbosity >= 2 )
314           {
315           std::printf( "  Delays at position %llu %c", mpos + pos, terminator );
316           std::fflush( stdout ); pending_newline = true;
317           }
318         int value = -1;
319         for( int j = 0; j < 256; ++j )
320           {
321           ++mbuffer[pos];
322           if( j == 255 ) break;
323           long failure_pos = 0;
324           if( test_member_rest( *master, &failure_pos ) ) continue;
325           const long delay = failure_pos - pos;
326           if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
327           }
328         if( value >= 0 && verbosity >= 2 )
329           {
330           std::printf( "  New max delay %lu at position %llu (0x%02X)\n",
331                        max_delay, mpos + pos, value );
332           std::fflush( stdout ); pending_newline = false;
333           }
334         if( pos + max_delay >= msize ) { pos = end; break; }
335         }
336       delete master;
337       }
338     delete[] mbuffer;
339     print_pending_newline( terminator );
340     }
341 
342   if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
343   return 0;
344   }
345 
346 
debug_repair(const std::string & input_filename,const Bad_byte & bad_byte,const char terminator)347 int debug_repair( const std::string & input_filename,
348                   const Bad_byte & bad_byte, const char terminator )
349   {
350   struct stat in_stats;				// not used
351   const int infd =
352     open_instream( input_filename.c_str(), &in_stats, false, true );
353   if( infd < 0 ) return 1;
354 
355   const Lzip_index lzip_index( infd, true, true );
356   if( lzip_index.retval() != 0 )
357     { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
358       return lzip_index.retval(); }
359 
360   long idx = 0;
361   for( ; idx < lzip_index.members(); ++idx )
362     if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break;
363   if( idx >= lzip_index.members() )
364     { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
365 
366   const long long mpos = lzip_index.mblock( idx ).pos();
367   const long long msize = lzip_index.mblock( idx ).size();
368   {
369   long long failure_pos = 0;
370   if( !safe_seek( infd, mpos ) ) return 1;
371   if( test_member_from_file( infd, msize, &failure_pos ) != 0 )
372     {
373     if( verbosity >= 0 )
374       std::fprintf( stderr, "Member %ld of %ld already damaged  (failure pos = %llu)\n",
375                     idx + 1, lzip_index.members(), mpos + failure_pos );
376     return 2;
377     }
378   }
379   uint8_t * const mbuffer = read_member( infd, mpos, msize );
380   if( !mbuffer ) return 1;
381   const Lzip_header & header = *(const Lzip_header *)mbuffer;
382   const unsigned dictionary_size = header.dictionary_size();
383   const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
384   const uint8_t bad_value = bad_byte( good_value );
385   mbuffer[bad_byte.pos-mpos] = bad_value;
386   long failure_pos = 0;
387   if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) )
388     {
389     const LZ_mtester * master =
390       prepare_master( mbuffer, msize, 0, header.dictionary_size() );
391     if( !master )
392       { show_error( "Can't prepare master." ); delete[] mbuffer; return 1; }
393     if( test_member_rest( *master, &failure_pos ) )
394       {
395       if( verbosity >= 1 )
396         std::fputs( "Member decompressed with no errors.\n", stdout );
397       delete master;
398       delete[] mbuffer;
399       return 0;
400       }
401     delete master;
402     }
403   if( verbosity >= 2 )
404     {
405     std::printf( "Test repairing member %ld of %ld  (mpos = %llu, msize = %llu)\n"
406                  "  (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n",
407                  idx + 1, lzip_index.members(), mpos, msize,
408                  bad_byte.pos, good_value, bad_value, mpos + failure_pos,
409                  mpos + failure_pos - bad_byte.pos );
410     std::fflush( stdout );
411     }
412   if( failure_pos >= msize ) failure_pos = msize - 1;
413   long pos = repair_dictionary_size( msize, mbuffer );
414   if( pos == 0 )
415     pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
416                          Lzip_header::size + 6, dictionary_size, terminator );
417   if( pos == 0 )
418     pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
419                          failure_pos, dictionary_size, terminator );
420   print_pending_newline( terminator );
421   delete[] mbuffer;
422   if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; }
423   if( pos == 0 ) internal_error( "can't repair input file." );
424   if( verbosity >= 1 ) std::fputs( "Member repaired successfully.\n", stdout );
425   return 0;
426   }
427 
428 
429 /* If show_packets is true, print to stdout descriptions of the decoded LZMA
430    packets. Print also some global values; total number of packets in
431    member, max distance (rep0) and its file position, max LZMA packet size
432    in each member and the file position of these packets.
433    (Packet sizes are a fractionary number of bytes. The packet and marker
434    sizes shown by option -X are the number of extra bytes required to decode
435    the packet, not counting the data present in the range decoder before and
436    after the decoding. The max marker size of a 'Sync Flush marker' does not
437    include the 5 bytes read by rdec.load).
438 */
debug_decompress(const std::string & input_filename,const Bad_byte & bad_byte,const bool show_packets)439 int debug_decompress( const std::string & input_filename,
440                       const Bad_byte & bad_byte, const bool show_packets )
441   {
442   struct stat in_stats;
443   const int infd =
444     open_instream( input_filename.c_str(), &in_stats, false, true );
445   if( infd < 0 ) return 1;
446 
447   const Lzip_index lzip_index( infd, true, true );
448   if( lzip_index.retval() != 0 )
449     { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
450       return lzip_index.retval(); }
451 
452   outfd = show_packets ? -1 : STDOUT_FILENO;
453   int retval = 0;
454   for( long i = 0; i < lzip_index.members(); ++i )
455     {
456     const long long dpos = lzip_index.dblock( i ).pos();
457     const long long mpos = lzip_index.mblock( i ).pos();
458     const long long msize = lzip_index.mblock( i ).size();
459     const unsigned dictionary_size = lzip_index.dictionary_size( i );
460     if( verbosity >= 1 && show_packets )
461       std::printf( "Decoding LZMA packets in member %ld of %ld  (mpos = %llu, msize = %llu)\n"
462                    "  mpos   dpos\n",
463                    i + 1, lzip_index.members(), mpos, msize );
464     if( !isvalid_ds( dictionary_size ) )
465       { show_error( bad_dict_msg ); retval = 2; break; }
466     uint8_t * const mbuffer = read_member( infd, mpos, msize );
467     if( !mbuffer ) { retval = 1; break; }
468     if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) )
469       {
470       const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
471       const uint8_t bad_value = bad_byte( good_value );
472       mbuffer[bad_byte.pos-mpos] = bad_value;
473       if( verbosity >= 1 && show_packets )
474         std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n",
475                      bad_byte.pos, good_value, bad_value );
476       }
477     LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd );
478     const int result = mtester.debug_decode_member( dpos, mpos, show_packets );
479     delete[] mbuffer;
480     if( show_packets )
481       {
482       const std::vector< unsigned long long > & mppv = mtester.max_packet_posv();
483       const unsigned mpackets = mppv.size();
484       std::printf( "Total packets in member   = %llu\n"
485                    "Max distance in any match = %u at file position %llu\n"
486                    "Max marker size found = %u\n"
487                    "Max packet size found = %u (%u packets)%s",
488                     mtester.total_packets(), mtester.max_distance(),
489                     mtester.max_distance_pos(), mtester.max_marker_size(),
490                     mtester.max_packet_size(), mpackets,
491                     mpackets ? " at file positions" : "" );
492       for( unsigned i = 0; i < mpackets; ++i )
493         std::printf( " %llu", mppv[i] );
494       std::fputc( '\n', stdout );
495       }
496     if( result != 0 )
497       {
498       if( verbosity >= 0 && result <= 2 && show_packets )
499         std::printf( "%s at pos %llu\n", ( result == 2 ) ?
500                      "File ends unexpectedly" : "Decoder error",
501                      mpos + mtester.member_position() );
502       retval = 2; break;
503       }
504     if( i + 1 < lzip_index.members() && show_packets )
505       std::fputc( '\n', stdout );
506     }
507 
508   retval = std::max( retval, close_outstream( &in_stats ) );
509   if( verbosity >= 1 && show_packets && retval == 0 )
510     std::fputs( "Done.\n", stdout );
511   return retval;
512   }
513