1 /* Lziprecover - Data recovery tool for the lzip format
2 Copyright (C) 2009-2021 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #define _FILE_OFFSET_BITS 64
19
20 #include <algorithm>
21 #include <cerrno>
22 #include <climits>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <cstring>
26 #include <string>
27 #include <vector>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <sys/stat.h>
31
32 #include "lzip.h"
33 #include "mtester.h"
34 #include "lzip_index.h"
35
36
37 namespace {
38
39 bool pending_newline = false;
40
print_pending_newline(const char terminator)41 void print_pending_newline( const char terminator )
42 { if( pending_newline && terminator != '\n' ) std::fputc( '\n', stdout );
43 pending_newline = false; }
44
45
gross_damage(const long long msize,const uint8_t * const mbuffer)46 bool gross_damage( const long long msize, const uint8_t * const mbuffer )
47 {
48 enum { maxlen = 7 }; // max number of consecutive identical bytes
49 long i = Lzip_header::size;
50 const long end = msize - Lzip_trailer::size - maxlen;
51 while( i < end )
52 {
53 const uint8_t byte = mbuffer[i];
54 int len = 0; // does not count the first byte
55 while( mbuffer[++i] == byte ) if( ++len >= maxlen ) return true;
56 }
57 return false;
58 }
59
60
61 // Return value: 0 = no change, 5 = repaired pos
repair_dictionary_size(const long long msize,uint8_t * const mbuffer)62 int repair_dictionary_size( const long long msize, uint8_t * const mbuffer )
63 {
64 const unsigned long long dictionary_size_9 = 1 << 25; // dict size of opt -9
65 Lzip_header & header = *(Lzip_header *)mbuffer;
66 unsigned dictionary_size = header.dictionary_size();
67 const Lzip_trailer & trailer =
68 *(const Lzip_trailer *)( mbuffer + msize - Lzip_trailer::size );
69 const unsigned long long data_size = trailer.data_size();
70 const bool valid_ds = isvalid_ds( dictionary_size );
71 if( valid_ds && dictionary_size >= data_size ) return 0; // can't be bad
72
73 if( !valid_ds || dictionary_size < dictionary_size_9 )
74 {
75 dictionary_size = std::min( data_size, dictionary_size_9 );
76 if( dictionary_size < min_dictionary_size )
77 dictionary_size = min_dictionary_size;
78 LZ_mtester mtester( mbuffer, msize, dictionary_size );
79 const int result = mtester.test_member();
80 if( result == 0 )
81 { header.dictionary_size( dictionary_size ); return 5; } // fix DS
82 if( result != 1 || mtester.max_distance() <= dictionary_size ||
83 mtester.max_distance() > max_dictionary_size ) return 0;
84 }
85 if( data_size > dictionary_size_9 )
86 {
87 dictionary_size =
88 std::min( data_size, (unsigned long long)max_dictionary_size );
89 LZ_mtester mtester( mbuffer, msize, dictionary_size );
90 if( mtester.test_member() == 0 )
91 { header.dictionary_size( dictionary_size ); return 5; } // fix DS
92 }
93 return 0;
94 }
95
96
prepare_master(const uint8_t * const buffer,const long buffer_size,const unsigned long pos_limit,const unsigned dictionary_size)97 const LZ_mtester * prepare_master( const uint8_t * const buffer,
98 const long buffer_size,
99 const unsigned long pos_limit,
100 const unsigned dictionary_size )
101 {
102 LZ_mtester * const master =
103 new LZ_mtester( buffer, buffer_size, dictionary_size );
104 if( master->test_member( pos_limit ) == -1 ) return master;
105 delete master;
106 return 0;
107 }
108
109
test_member_rest(const LZ_mtester & master,long * const failure_posp=0)110 bool test_member_rest( const LZ_mtester & master, long * const failure_posp = 0 )
111 {
112 LZ_mtester mtester( master );
113 mtester.duplicate_buffer();
114 if( mtester.test_member() == 0 && mtester.finished() ) return true;
115 if( failure_posp ) *failure_posp = mtester.member_position();
116 return false;
117 }
118
119
120 // Return value: -1 = master failed, 0 = begin reached, >0 = repaired pos
repair_member(const long long mpos,const long long msize,uint8_t * const mbuffer,const long begin,const long end,const unsigned dictionary_size,const char terminator)121 long repair_member( const long long mpos, const long long msize,
122 uint8_t * const mbuffer, const long begin, const long end,
123 const unsigned dictionary_size, const char terminator )
124 {
125 for( long pos = end; pos >= begin && pos > end - 50000; )
126 {
127 const long min_pos = std::max( begin, pos - 100 );
128 const unsigned long pos_limit = std::max( min_pos - 16, 0L );
129 const LZ_mtester * master =
130 prepare_master( mbuffer, msize, pos_limit, dictionary_size );
131 if( !master ) return -1;
132 for( ; pos >= min_pos; --pos )
133 {
134 if( verbosity >= 2 )
135 {
136 std::printf( " Trying position %llu %c", mpos + pos, terminator );
137 std::fflush( stdout ); pending_newline = true;
138 }
139 for( int j = 0; j < 255; ++j )
140 {
141 ++mbuffer[pos];
142 if( test_member_rest( *master ) ) { delete master; return pos; }
143 }
144 ++mbuffer[pos];
145 }
146 delete master;
147 }
148 return 0;
149 }
150
151 } // end namespace
152
153
seek_write(const int fd,const uint8_t * const buf,const long long size,const long long pos)154 long long seek_write( const int fd, const uint8_t * const buf,
155 const long long size, const long long pos )
156 {
157 if( lseek( fd, pos, SEEK_SET ) == pos )
158 return writeblock( fd, buf, size );
159 return 0;
160 }
161
162
read_member(const int infd,const long long mpos,const long long msize)163 uint8_t * read_member( const int infd, const long long mpos,
164 const long long msize )
165 {
166 if( msize <= 0 || msize > LONG_MAX )
167 { show_error( "Member is larger than LONG_MAX." ); return 0; }
168 if( !safe_seek( infd, mpos ) ) return 0;
169 uint8_t * const buffer = new uint8_t[msize];
170
171 if( readblock( infd, buffer, msize ) != msize )
172 { show_error( "Error reading input file", errno );
173 delete[] buffer; return 0; }
174 return buffer;
175 }
176
177
repair_file(const std::string & input_filename,const std::string & default_output_filename,const char terminator,const bool force)178 int repair_file( const std::string & input_filename,
179 const std::string & default_output_filename,
180 const char terminator, const bool force )
181 {
182 struct stat in_stats;
183 const int infd =
184 open_instream( input_filename.c_str(), &in_stats, false, true );
185 if( infd < 0 ) return 1;
186
187 const Lzip_index lzip_index( infd, true, true, true );
188 if( lzip_index.retval() != 0 )
189 { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
190 return lzip_index.retval(); }
191
192 output_filename = default_output_filename.empty() ?
193 insert_fixed( input_filename ) : default_output_filename;
194 if( !force && file_exists( output_filename ) ) return 1;
195 outfd = -1;
196 for( long i = 0; i < lzip_index.members(); ++i )
197 {
198 const long long mpos = lzip_index.mblock( i ).pos();
199 const long long msize = lzip_index.mblock( i ).size();
200 if( !safe_seek( infd, mpos ) ) cleanup_and_fail( 1 );
201 long long failure_pos = 0;
202 if( test_member_from_file( infd, msize, &failure_pos ) == 0 ) continue;
203 if( failure_pos < Lzip_header::size ) // End Of File
204 { show_error( "Can't repair error in input file." );
205 cleanup_and_fail( 2 ); }
206 if( failure_pos >= msize - 8 ) failure_pos = msize - 8 - 1;
207
208 if( verbosity >= 2 ) // damaged member found
209 {
210 std::printf( "Repairing member %ld of %ld (failure pos = %llu)\n",
211 i + 1, lzip_index.members(), mpos + failure_pos );
212 std::fflush( stdout );
213 }
214 uint8_t * const mbuffer = read_member( infd, mpos, msize );
215 if( !mbuffer ) cleanup_and_fail( 1 );
216 const Lzip_header & header = *(const Lzip_header *)mbuffer;
217 const unsigned dictionary_size = header.dictionary_size();
218 long pos = 0;
219 if( !gross_damage( msize, mbuffer ) )
220 {
221 pos = repair_dictionary_size( msize, mbuffer );
222 if( pos == 0 )
223 pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
224 Lzip_header::size + 6, dictionary_size, terminator );
225 if( pos == 0 )
226 pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
227 failure_pos, dictionary_size, terminator );
228 print_pending_newline( terminator );
229 }
230 if( pos < 0 )
231 { show_error( "Can't prepare master." ); cleanup_and_fail( 1 ); }
232 if( pos > 0 )
233 {
234 if( outfd < 0 ) // first damaged member repaired
235 {
236 if( !safe_seek( infd, 0 ) ) return 1;
237 set_signal_handler();
238 if( !open_outstream( true, true ) ) return 1;
239 if( !copy_file( infd, outfd ) ) // copy whole file
240 cleanup_and_fail( 1 );
241 }
242 if( seek_write( outfd, mbuffer + pos, 1, mpos + pos ) != 1 )
243 { show_error( "Error writing output file", errno );
244 cleanup_and_fail( 1 ); }
245 }
246 delete[] mbuffer;
247 if( pos == 0 )
248 {
249 show_error( "Can't repair input file. Error is probably larger than 1 byte." );
250 cleanup_and_fail( 2 );
251 }
252 }
253
254 if( outfd < 0 )
255 {
256 if( verbosity >= 1 )
257 std::fputs( "Input file has no errors. Recovery is not needed.\n", stdout );
258 return 0;
259 }
260 if( close_outstream( &in_stats ) != 0 ) return 1;
261 if( verbosity >= 1 )
262 std::fputs( "Copy of input file repaired successfully.\n", stdout );
263 return 0;
264 }
265
266
debug_delay(const std::string & input_filename,Block range,const char terminator)267 int debug_delay( const std::string & input_filename, Block range,
268 const char terminator )
269 {
270 struct stat in_stats; // not used
271 const int infd =
272 open_instream( input_filename.c_str(), &in_stats, false, true );
273 if( infd < 0 ) return 1;
274
275 const Lzip_index lzip_index( infd, true, true );
276 if( lzip_index.retval() != 0 )
277 { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
278 return lzip_index.retval(); }
279
280 if( range.end() > lzip_index.cdata_size() )
281 range.size( std::max( 0LL, lzip_index.cdata_size() - range.pos() ) );
282 if( range.size() <= 0 )
283 { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
284
285 for( long i = 0; i < lzip_index.members(); ++i )
286 {
287 const Block & mb = lzip_index.mblock( i );
288 if( !range.overlaps( mb ) ) continue;
289 const long long mpos = lzip_index.mblock( i ).pos();
290 const long long msize = lzip_index.mblock( i ).size();
291 const unsigned dictionary_size = lzip_index.dictionary_size( i );
292 if( verbosity >= 2 )
293 {
294 std::printf( "Finding max delay in member %ld of %ld (mpos = %llu, msize = %llu)\n",
295 i + 1, lzip_index.members(), mpos, msize );
296 std::fflush( stdout );
297 }
298 uint8_t * const mbuffer = read_member( infd, mpos, msize );
299 if( !mbuffer ) return 1;
300 long pos = std::max( range.pos() - mpos, Lzip_header::size + 1LL );
301 const long end = std::min( range.end() - mpos, msize );
302 long max_delay = 0;
303 while( pos < end )
304 {
305 const unsigned long pos_limit = std::max( pos - 16, 0L );
306 const LZ_mtester * master =
307 prepare_master( mbuffer, msize, pos_limit, dictionary_size );
308 if( !master )
309 { show_error( "Can't prepare master." ); return 1; }
310 const long partial_end = std::min( pos + 100, end );
311 for( ; pos < partial_end; ++pos )
312 {
313 if( verbosity >= 2 )
314 {
315 std::printf( " Delays at position %llu %c", mpos + pos, terminator );
316 std::fflush( stdout ); pending_newline = true;
317 }
318 int value = -1;
319 for( int j = 0; j < 256; ++j )
320 {
321 ++mbuffer[pos];
322 if( j == 255 ) break;
323 long failure_pos = 0;
324 if( test_member_rest( *master, &failure_pos ) ) continue;
325 const long delay = failure_pos - pos;
326 if( delay > max_delay ) { max_delay = delay; value = mbuffer[pos]; }
327 }
328 if( value >= 0 && verbosity >= 2 )
329 {
330 std::printf( " New max delay %lu at position %llu (0x%02X)\n",
331 max_delay, mpos + pos, value );
332 std::fflush( stdout ); pending_newline = false;
333 }
334 if( pos + max_delay >= msize ) { pos = end; break; }
335 }
336 delete master;
337 }
338 delete[] mbuffer;
339 print_pending_newline( terminator );
340 }
341
342 if( verbosity >= 1 ) std::fputs( "Done.\n", stdout );
343 return 0;
344 }
345
346
debug_repair(const std::string & input_filename,const Bad_byte & bad_byte,const char terminator)347 int debug_repair( const std::string & input_filename,
348 const Bad_byte & bad_byte, const char terminator )
349 {
350 struct stat in_stats; // not used
351 const int infd =
352 open_instream( input_filename.c_str(), &in_stats, false, true );
353 if( infd < 0 ) return 1;
354
355 const Lzip_index lzip_index( infd, true, true );
356 if( lzip_index.retval() != 0 )
357 { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
358 return lzip_index.retval(); }
359
360 long idx = 0;
361 for( ; idx < lzip_index.members(); ++idx )
362 if( lzip_index.mblock( idx ).includes( bad_byte.pos ) ) break;
363 if( idx >= lzip_index.members() )
364 { show_file_error( input_filename.c_str(), "Nothing to do." ); return 0; }
365
366 const long long mpos = lzip_index.mblock( idx ).pos();
367 const long long msize = lzip_index.mblock( idx ).size();
368 {
369 long long failure_pos = 0;
370 if( !safe_seek( infd, mpos ) ) return 1;
371 if( test_member_from_file( infd, msize, &failure_pos ) != 0 )
372 {
373 if( verbosity >= 0 )
374 std::fprintf( stderr, "Member %ld of %ld already damaged (failure pos = %llu)\n",
375 idx + 1, lzip_index.members(), mpos + failure_pos );
376 return 2;
377 }
378 }
379 uint8_t * const mbuffer = read_member( infd, mpos, msize );
380 if( !mbuffer ) return 1;
381 const Lzip_header & header = *(const Lzip_header *)mbuffer;
382 const unsigned dictionary_size = header.dictionary_size();
383 const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
384 const uint8_t bad_value = bad_byte( good_value );
385 mbuffer[bad_byte.pos-mpos] = bad_value;
386 long failure_pos = 0;
387 if( bad_byte.pos != 5 || isvalid_ds( header.dictionary_size() ) )
388 {
389 const LZ_mtester * master =
390 prepare_master( mbuffer, msize, 0, header.dictionary_size() );
391 if( !master )
392 { show_error( "Can't prepare master." ); delete[] mbuffer; return 1; }
393 if( test_member_rest( *master, &failure_pos ) )
394 {
395 if( verbosity >= 1 )
396 std::fputs( "Member decompressed with no errors.\n", stdout );
397 delete master;
398 delete[] mbuffer;
399 return 0;
400 }
401 delete master;
402 }
403 if( verbosity >= 2 )
404 {
405 std::printf( "Test repairing member %ld of %ld (mpos = %llu, msize = %llu)\n"
406 " (damage pos = %llu (0x%02X->0x%02X), failure pos = %llu, delay = %lld )\n",
407 idx + 1, lzip_index.members(), mpos, msize,
408 bad_byte.pos, good_value, bad_value, mpos + failure_pos,
409 mpos + failure_pos - bad_byte.pos );
410 std::fflush( stdout );
411 }
412 if( failure_pos >= msize ) failure_pos = msize - 1;
413 long pos = repair_dictionary_size( msize, mbuffer );
414 if( pos == 0 )
415 pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 1,
416 Lzip_header::size + 6, dictionary_size, terminator );
417 if( pos == 0 )
418 pos = repair_member( mpos, msize, mbuffer, Lzip_header::size + 7,
419 failure_pos, dictionary_size, terminator );
420 print_pending_newline( terminator );
421 delete[] mbuffer;
422 if( pos < 0 ) { show_error( "Can't prepare master." ); return 1; }
423 if( pos == 0 ) internal_error( "can't repair input file." );
424 if( verbosity >= 1 ) std::fputs( "Member repaired successfully.\n", stdout );
425 return 0;
426 }
427
428
429 /* If show_packets is true, print to stdout descriptions of the decoded LZMA
430 packets. Print also some global values; total number of packets in
431 member, max distance (rep0) and its file position, max LZMA packet size
432 in each member and the file position of these packets.
433 (Packet sizes are a fractionary number of bytes. The packet and marker
434 sizes shown by option -X are the number of extra bytes required to decode
435 the packet, not counting the data present in the range decoder before and
436 after the decoding. The max marker size of a 'Sync Flush marker' does not
437 include the 5 bytes read by rdec.load).
438 */
debug_decompress(const std::string & input_filename,const Bad_byte & bad_byte,const bool show_packets)439 int debug_decompress( const std::string & input_filename,
440 const Bad_byte & bad_byte, const bool show_packets )
441 {
442 struct stat in_stats;
443 const int infd =
444 open_instream( input_filename.c_str(), &in_stats, false, true );
445 if( infd < 0 ) return 1;
446
447 const Lzip_index lzip_index( infd, true, true );
448 if( lzip_index.retval() != 0 )
449 { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
450 return lzip_index.retval(); }
451
452 outfd = show_packets ? -1 : STDOUT_FILENO;
453 int retval = 0;
454 for( long i = 0; i < lzip_index.members(); ++i )
455 {
456 const long long dpos = lzip_index.dblock( i ).pos();
457 const long long mpos = lzip_index.mblock( i ).pos();
458 const long long msize = lzip_index.mblock( i ).size();
459 const unsigned dictionary_size = lzip_index.dictionary_size( i );
460 if( verbosity >= 1 && show_packets )
461 std::printf( "Decoding LZMA packets in member %ld of %ld (mpos = %llu, msize = %llu)\n"
462 " mpos dpos\n",
463 i + 1, lzip_index.members(), mpos, msize );
464 if( !isvalid_ds( dictionary_size ) )
465 { show_error( bad_dict_msg ); retval = 2; break; }
466 uint8_t * const mbuffer = read_member( infd, mpos, msize );
467 if( !mbuffer ) { retval = 1; break; }
468 if( bad_byte.pos >= 0 && lzip_index.mblock( i ).includes( bad_byte.pos ) )
469 {
470 const uint8_t good_value = mbuffer[bad_byte.pos-mpos];
471 const uint8_t bad_value = bad_byte( good_value );
472 mbuffer[bad_byte.pos-mpos] = bad_value;
473 if( verbosity >= 1 && show_packets )
474 std::printf( "Byte at pos %llu changed from 0x%02X to 0x%02X\n",
475 bad_byte.pos, good_value, bad_value );
476 }
477 LZ_mtester mtester( mbuffer, msize, dictionary_size, outfd );
478 const int result = mtester.debug_decode_member( dpos, mpos, show_packets );
479 delete[] mbuffer;
480 if( show_packets )
481 {
482 const std::vector< unsigned long long > & mppv = mtester.max_packet_posv();
483 const unsigned mpackets = mppv.size();
484 std::printf( "Total packets in member = %llu\n"
485 "Max distance in any match = %u at file position %llu\n"
486 "Max marker size found = %u\n"
487 "Max packet size found = %u (%u packets)%s",
488 mtester.total_packets(), mtester.max_distance(),
489 mtester.max_distance_pos(), mtester.max_marker_size(),
490 mtester.max_packet_size(), mpackets,
491 mpackets ? " at file positions" : "" );
492 for( unsigned i = 0; i < mpackets; ++i )
493 std::printf( " %llu", mppv[i] );
494 std::fputc( '\n', stdout );
495 }
496 if( result != 0 )
497 {
498 if( verbosity >= 0 && result <= 2 && show_packets )
499 std::printf( "%s at pos %llu\n", ( result == 2 ) ?
500 "File ends unexpectedly" : "Decoder error",
501 mpos + mtester.member_position() );
502 retval = 2; break;
503 }
504 if( i + 1 < lzip_index.members() && show_packets )
505 std::fputc( '\n', stdout );
506 }
507
508 retval = std::max( retval, close_outstream( &in_stats ) );
509 if( verbosity >= 1 && show_packets && retval == 0 )
510 std::fputs( "Done.\n", stdout );
511 return retval;
512 }
513