1 /* Lziprecover - Data recovery tool for the lzip format
2    Copyright (C) 2009-2021 Antonio Diaz Diaz.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 
18 #define _FILE_OFFSET_BITS 64
19 
20 #include <algorithm>
21 #include <cerrno>
22 #include <climits>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <cstring>
26 #include <string>
27 #include <vector>
28 #include <stdint.h>
29 #include <unistd.h>
30 #include <sys/stat.h>
31 
32 #include "lzip.h"
33 #include "md5.h"
34 #include "mtester.h"
35 #include "lzip_index.h"
36 
37 
38 namespace {
39 
verify_member(const uint8_t * const mbuffer,const long long msize,const unsigned dictionary_size,const char * const name,uint8_t digest[16])40 bool verify_member( const uint8_t * const mbuffer, const long long msize,
41                     const unsigned dictionary_size, const char * const name,
42                     uint8_t digest[16] )
43   {
44   MD5SUM md5sum;
45   LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
46   if( mtester.test_member() != 0 || !mtester.finished() )
47     { show_file_error( name, "Error verifying input file." ); return false; }
48   md5sum.md5_finish( digest );
49   return true;
50   }
51 
52 
compare_member(const uint8_t * const mbuffer,const long long msize,const unsigned dictionary_size,const long long byte_pos,const uint8_t digest[16])53 bool compare_member( const uint8_t * const mbuffer, const long long msize,
54                      const unsigned dictionary_size,
55                      const long long byte_pos, const uint8_t digest[16] )
56   {
57   MD5SUM md5sum;
58   LZ_mtester mtester( mbuffer, msize, dictionary_size, -1, &md5sum );
59   bool error = ( mtester.test_member() != 0 || !mtester.finished() );
60   if( !error )
61     {
62     uint8_t new_digest[16];
63     md5sum.md5_finish( new_digest );
64     if( std::memcmp( digest, new_digest, 16 ) != 0 ) error = true;
65     }
66   if( error && verbosity >= 0 )
67     std::printf( "byte %llu comparison failed\n", byte_pos );
68   return !error;
69   }
70 
71 
test_member_rest(const LZ_mtester & master,long * const failure_posp,const unsigned long long byte_pos)72 int test_member_rest( const LZ_mtester & master, long * const failure_posp,
73                       const unsigned long long byte_pos )
74   {
75   LZ_mtester mtester( master );
76   mtester.duplicate_buffer();
77   int result = mtester.test_member( LLONG_MAX, LLONG_MAX, stdout, byte_pos );
78   if( result == 0 && !mtester.finished() ) result = -1;
79   if( result != 0 ) *failure_posp = mtester.member_position();
80   return result;
81   }
82 
83 
next_pct_pos(const Lzip_index & lzip_index,const int i,const int pct)84 long next_pct_pos( const Lzip_index & lzip_index, const int i, const int pct )
85   {
86   if( pct <= 0 ) return 0;
87   const long long cdata_size = lzip_index.cdata_size();
88   const long long mpos = lzip_index.mblock( i ).pos();
89   const long long msize = lzip_index.mblock( i ).size();
90   long long pct_pos = (long long)( cdata_size / ( 100.0 / pct ) );
91 
92   if( pct_pos <= mpos ) pct_pos = 0;
93   else if( pct_pos == cdata_size ) pct_pos = msize - 21;	// 100%
94   else if( pct_pos >= mpos + msize ) pct_pos = msize;
95   else pct_pos -= mpos;
96   return pct_pos;
97   }
98 
99 } // end namespace
100 
101 
102 /* Test 1-bit errors in LZMA streams in file.
103    Unless verbosity >= 1, print only the bytes with interesting results. */
lunzcrash(const std::string & input_filename)104 int lunzcrash( const std::string & input_filename )
105   {
106   struct stat in_stats;				// not used
107   const int infd =
108     open_instream( input_filename.c_str(), &in_stats, false, true );
109   if( infd < 0 ) return 1;
110 
111   const Lzip_index lzip_index( infd, true, true );
112   if( lzip_index.retval() != 0 )
113     { show_file_error( input_filename.c_str(), lzip_index.error().c_str() );
114       return lzip_index.retval(); }
115   if( verbosity >= 2 ) printf( "Testing file '%s'\n", input_filename.c_str() );
116 
117   const long long cdata_size = lzip_index.cdata_size();
118   long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
119   int pct = ( cdata_size >= 1000 && isatty( STDERR_FILENO ) ) ? 0 : 100;
120   for( long i = 0; i < lzip_index.members(); ++i )
121     {
122     const long long mpos = lzip_index.mblock( i ).pos();
123     const long long msize = lzip_index.mblock( i ).size();
124     const unsigned dictionary_size = lzip_index.dictionary_size( i );
125     uint8_t * const mbuffer = read_member( infd, mpos, msize );
126     if( !mbuffer ) return 1;
127     uint8_t md5_orig[16];
128     if( !verify_member( mbuffer, msize, dictionary_size,
129                         input_filename.c_str(), md5_orig ) ) return 2;
130     long pct_pos = next_pct_pos( lzip_index, i, pct );
131     long pos = Lzip_header::size + 1, printed = 0;	// last pos printed
132     const long end = msize - 20;
133     if( verbosity == 0 )	// give a clue of the range being tested
134       std::printf( "Testing bytes %llu to %llu\n", mpos + pos, mpos + end - 1 );
135     LZ_mtester master( mbuffer, msize, dictionary_size );
136     for( ; pos < end; ++pos )
137       {
138       const long pos_limit = pos - 16;
139       if( pos_limit > 0 && master.test_member( pos_limit ) != -1 )
140         { show_error( "Can't advance master." ); return 1; }
141       if( verbosity >= 0 && pos >= pct_pos )
142         { std::fprintf( stderr, "\r%3u%% done\r", pct ); ++pct;
143           pct_pos = next_pct_pos( lzip_index, i, pct ); }
144       if( verbosity >= 1 )
145         { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
146       ++positions;
147       const uint8_t byte = mbuffer[pos];
148       for( uint8_t mask = 1; mask != 0; mask <<= 1 )
149         {
150         ++decompressions;
151         mbuffer[pos] ^= mask;
152         long failure_pos = 0;
153         const int result = test_member_rest( master, &failure_pos,
154                            ( printed < pos ) ? mpos + pos : 0 );
155         if( result == 0 )
156           {
157           ++successes;
158           if( verbosity >= 0 )
159             {
160             if( printed < pos )
161               { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
162             std::printf( "0x%02X (0x%02X^0x%02X) passed the test\n",
163                          mbuffer[pos], byte, mask );
164             }
165           if( !compare_member( mbuffer, msize, dictionary_size, mpos + pos,
166                                md5_orig ) ) ++failed_comparisons;
167           }
168         else if( result == 1 )
169           {
170           if( verbosity >= 2 ||
171               ( verbosity >= 1 && failure_pos - pos >= 10000 ) ||
172               ( verbosity >= 0 && failure_pos - pos >= 50000 ) )
173             {
174             if( printed < pos )
175               { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
176             std::printf( "Decoder error at pos %llu\n", mpos + failure_pos );
177             }
178           }
179         else if( result == 3 || result == 4 )	// test_member printed the error
180           { if( verbosity >= 0 && printed < pos ) printed = pos; }
181         else if( verbosity >= 0 )
182           {
183           if( printed < pos )
184             { std::printf( "byte %llu\n", mpos + pos ); printed = pos; }
185           if( result == 2 )
186             std::printf( "File ends unexpectedly at pos %llu\n",
187                          mpos + failure_pos );
188           else
189             std::printf( "Unknown error code '%d'\n", result );
190           }
191         mbuffer[pos] ^= mask;
192         }
193       }
194     delete[] mbuffer;
195     }
196 
197   if( verbosity >= 0 )
198     {
199     std::printf( "\n%8ld bytes tested\n%8ld total decompressions"
200                  "\n%8ld decompressions returned with zero status",
201                  positions, decompressions, successes );
202     if( successes > 0 )
203       {
204       if( failed_comparisons > 0 )
205         std::printf( ", of which\n%8ld comparisons failed\n",
206                      failed_comparisons );
207       else std::fputs( "\n         all comparisons passed\n", stdout );
208       }
209     else std::fputc( '\n', stdout );
210     }
211   return 0;
212   }
213 
214 
md5sum_files(const std::vector<std::string> & filenames)215 int md5sum_files( const std::vector< std::string > & filenames )
216   {
217   int retval = 0;
218   bool stdin_used = false;
219 
220   for( unsigned i = 0; i < filenames.size(); ++i )
221     {
222     const bool from_stdin = ( filenames[i] == "-" );
223     if( from_stdin ) { if( stdin_used ) continue; else stdin_used = true; }
224     const char * const input_filename = filenames[i].c_str();
225     struct stat in_stats;				// not used
226     const int infd = from_stdin ? STDIN_FILENO :
227       open_instream( input_filename, &in_stats, false );
228     if( infd < 0 ) { set_retval( retval, 1 ); continue; }
229 
230     enum { buffer_size = 16384 };
231     uint8_t buffer[buffer_size], md5_digest[16];
232     MD5SUM md5sum;
233     while( true )
234       {
235       const int len = readblock( infd, buffer, buffer_size );
236       if( len != buffer_size && errno ) throw Error( "Read error" );
237       if( len > 0 ) md5sum.md5_update( buffer, len );
238       if( len < buffer_size ) break;
239       }
240     md5sum.md5_finish( md5_digest );
241     if( close( infd ) != 0 )
242       { show_file_error( input_filename, "Error closing input file", errno );
243         return 1; }
244 
245     for( int i = 0; i < 16; ++i ) std::printf( "%02x", md5_digest[i] );
246     std::printf( "  %s\n", input_filename );
247     std::fflush( stdout );
248     }
249   return retval;
250   }
251