1 /* Unzcrash - Tests robustness of decompressors to corrupted data.
2    Inspired by unzcrash.c from Julian Seward's bzip2.
3    Copyright (C) 2008-2021 Antonio Diaz Diaz.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 /*
19    Exit status: 0 for a normal exit, 1 for environmental problems
20    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
21    corrupt or invalid input file, 3 for an internal consistency error
22    (eg, bug) which caused unzcrash to panic.
23 */
24 
25 #define _FILE_OFFSET_BITS 64
26 
27 #include <algorithm>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <string>
35 #include <vector>
36 #include <stdint.h>
37 #include <unistd.h>
38 
39 #include "arg_parser.h"
40 
41 #if CHAR_BIT != 8
42 #error "Environments where CHAR_BIT != 8 are not supported."
43 #endif
44 
45 #ifndef INT64_MAX
46 #define INT64_MAX  0x7FFFFFFFFFFFFFFFLL
47 #endif
48 
49 void show_error( const char * const msg, const int errcode = 0,
50                  const bool help = false );
51 
52 namespace {
53 
54 const char * const program_name = "unzcrash";
55 const char * invocation_name = program_name;		// default value
56 
57 int verbosity = 0;
58 
59 
show_help()60 void show_help()
61   {
62   std::printf( "Unzcrash tests the robustness of decompressors to corrupted data.\n"
63                "\nBy default, unzcrash reads the file specified and then repeatedly\n"
64                "decompresses it, increasing 256 times each byte of the compressed data, so\n"
65                "as to test all possible one-byte errors. Note that it may take years or even\n"
66                "centuries to test all possible one-byte errors in a large file (tens of MB).\n"
67                "\nIf the option '--block' is given, unzcrash reads the file specified and\n"
68                "then repeatedly decompresses it, setting all bytes in each successive block\n"
69                "to the value given, so as to test all possible full sector errors.\n"
70                "\nIf the option '--truncate' is given, unzcrash reads the file specified\n"
71                "and then repeatedly decompresses it, truncating the file to increasing\n"
72                "lengths, so as to test all possible truncation points.\n"
73                "\nNone of the three test modes described above should cause any invalid memory\n"
74                "accesses. If any of them does, please, report it as a bug to the maintainers\n"
75                "of the decompressor being tested.\n"
76                "\nIf the decompressor returns with zero status, unzcrash compares the output\n"
77                "of the decompressor for the original and corrupt files. If the outputs\n"
78                "differ, it means that the decompressor returned a false negative; it failed\n"
79                "to recognize the corruption and produced garbage output. The only exception\n"
80                "is when a multimember file is truncated just after the last byte of a\n"
81                "member, producing a shorter but valid compressed file. Except in this latter\n"
82                "case, please, report any false negative as a bug.\n"
83                "\nIn order to compare the outputs, unzcrash needs a 'zcmp' program able to\n"
84                "understand the format being tested. For example the zcmp provided by zutils.\n"
85                "Use '--zcmp=false' to disable comparisons.\n"
86                "\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name );
87   std::printf( "\nOptions:\n"
88                "  -h, --help                    display this help and exit\n"
89                "  -V, --version                 output version information and exit\n"
90                "  -b, --bits=<range>            test N-bit errors instead of full byte\n"
91                "  -B, --block[=<size>][,<val>]  test blocks of given size [512,0]\n"
92                "  -d, --delta=<n>               test one byte/block/truncation every n bytes\n"
93                "  -e, --set-byte=<pos>,<val>    set byte at position <pos> to value <val>\n"
94                "  -n, --no-verify               skip initial verification of file.lz\n"
95                "  -p, --position=<bytes>        first byte position to test [default 0]\n"
96                "  -q, --quiet                   suppress all messages\n"
97                "  -s, --size=<bytes>            number of byte positions to test [all]\n"
98                "  -t, --truncate                test decompression of truncated file\n"
99                "  -v, --verbose                 be verbose (a 2nd -v gives more)\n"
100                "  -z, --zcmp=<command>          set zcmp command name and options [zcmp]\n"
101                "Examples of <range>:  1  1,2,3  1-4  1,3-5,8  1-3,5-8\n"
102                "A negative position is relative to the end of file.\n"
103                "A negative size is relative to the rest of the file.\n"
104                "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
105                "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
106                "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
107                "caused unzcrash to panic.\n"
108                "\nReport bugs to lzip-bug@nongnu.org\n"
109                "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
110   }
111 
112 } // end namespace
113 
114 
115 #include "main_common.cc"
116 
117 
118 namespace {
119 
parse_block(const char * const ptr,long & size,uint8_t & value)120 void parse_block( const char * const ptr, long & size, uint8_t & value )
121   {
122   const char * tail = ptr;
123 
124   if( tail[0] != ',' )
125     size = getnum( ptr, 0, 1, INT_MAX, &tail );
126   if( tail[0] == ',' )
127     value = getnum( tail + 1, 0, 0, 255 );
128   else if( tail[0] )
129     {
130     show_error( "Bad separator in argument of '--block'", 0, true );
131     std::exit( 1 );
132     }
133   }
134 
135 
136 struct Bad_byte
137   {
138   enum Mode { literal, delta, flip };
139   long long pos;
140   Mode mode;
141   uint8_t value;
142 
Bad_byte__anon23301f5a0211::Bad_byte143   Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
operator ()__anon23301f5a0211::Bad_byte144   uint8_t operator()( const uint8_t old_value ) const
145     {
146     if( mode == delta ) return old_value + value;
147     if( mode == flip ) return old_value ^ value;
148     return value;
149     }
150   };
151 
152 
153 // Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
154 //
parse_pos_value(const char * const ptr,Bad_byte & bad_byte)155 void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
156   {
157   const char * tail;
158   bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
159   if( tail[0] != ',' )
160     {
161     show_error( "Bad separator between <pos> and <val>.", 0, true );
162     std::exit( 1 );
163     }
164   if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
165   else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
166   else bad_byte.mode = Bad_byte::literal;
167   bad_byte.value = getnum( tail + 1, 0, 0, 255 );
168   }
169 
170 
171 /* Returns the address of a malloc'd buffer containing the file data and
172    the file size in '*size'.
173    In case of error, returns 0 and does not modify '*size'.
174 */
read_file(const char * const name,long * const size)175 uint8_t * read_file( const char * const name, long * const size )
176   {
177   FILE * const f = std::fopen( name, "rb" );
178   if( !f )
179     {
180     if( verbosity >= 0 )
181       std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
182                     program_name, name, std::strerror( errno ) );
183     return 0;
184     }
185 
186   long buffer_size = 1 << 20;
187   uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
188   if( !buffer ) { show_error( mem_msg ); return 0; }
189   long file_size = std::fread( buffer, 1, buffer_size, f );
190   while( file_size >= buffer_size )
191     {
192     if( buffer_size >= LONG_MAX )
193       {
194       if( verbosity >= 0 )
195         std::fprintf( stderr, "%s: Input file '%s' is too large.\n",
196                       program_name, name );
197       std::free( buffer ); return 0;
198       }
199     buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
200     uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
201     if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; }
202     buffer = tmp;
203     file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
204     }
205   if( std::ferror( f ) || !std::feof( f ) )
206     {
207     if( verbosity >= 0 )
208       std::fprintf( stderr, "%s: Error reading file '%s': %s\n",
209                     program_name, name, std::strerror( errno ) );
210     std::free( buffer ); return 0;
211     }
212   std::fclose( f );
213   *size = file_size;
214   return buffer;
215   }
216 
217 
218 class Bitset8			// 8 value bitset (1 to 8)
219   {
220   bool data[8];
valid_digit(const unsigned char ch)221   static bool valid_digit( const unsigned char ch )
222     { return ( ch >= '1' && ch <= '8' ); }
223 
224 public:
Bitset8()225   Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; }
226 
includes(const int i) const227   bool includes( const int i ) const
228     { return ( i >= 1 && i <= 8 && data[i-1] ); }
229 
230   // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
parse(const char * p)231   bool parse( const char * p )
232     {
233     for( int i = 0; i < 8; ++i ) data[i] = false;
234     while( true )
235       {
236       const unsigned char ch1 = *p++;
237       if( !valid_digit( ch1 ) ) break;
238       if( *p != '-' ) data[ch1-'1'] = true;
239       else
240         {
241         ++p;
242         if( !valid_digit( *p ) || ch1 > *p ) break;
243         for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true;
244         ++p;
245         }
246       if( *p == 0 ) return true;
247       if( *p == ',' ) ++p; else break;
248       }
249     show_error( "Invalid value or range." );
250     return false;
251     }
252 
253   // number of N-bit errors per byte (N=0 to 8): 1 8 28 56 70 56 28 8 1
print() const254   void print() const
255     {
256     std::fflush( stderr );
257     int c = 0;
258     for( int i = 0; i < 8; ++i ) if( data[i] ) ++c;
259     if( c == 8 ) std::fputs( "Testing full byte.\n", stdout );
260     else if( c == 0 ) std::fputs( "Nothing to test.\n", stdout );
261     else
262       {
263       std::fputs( "Testing ", stdout );
264       for( int i = 0; i < 8; ++i )
265         if( data[i] )
266           {
267           std::printf( "%d", i + 1 );
268           if( --c ) std::fputc( ',', stdout );
269           }
270       std::fputs( " bit errors.\n", stdout );
271       }
272     std::fflush( stdout );
273     }
274   };
275 
276 
differing_bits(const uint8_t byte1,const uint8_t byte2)277 int differing_bits( const uint8_t byte1, const uint8_t byte2 )
278   {
279   int count = 0;
280   uint8_t dif = byte1 ^ byte2;
281   while( dif )
282     { count += ( dif & 1 ); dif >>= 1; }
283   return count;
284   }
285 
286 } // end namespace
287 
288 
main(const int argc,const char * const argv[])289 int main( const int argc, const char * const argv[] )
290   {
291   enum Mode { m_block, m_byte, m_truncate };
292   const char * mode_str[3] = { "block", "byte", "size" };
293   Bitset8 bits;			// if Bitset8::parse not called test full byte
294   Bad_byte bad_byte;
295   const char * zcmp_program = "zcmp";
296   long pos = 0;
297   long max_size = LONG_MAX;
298   long delta = 0;		// to be set later
299   long block_size = 512;
300   Mode program_mode = m_byte;
301   uint8_t block_value = 0;
302   bool verify = true;
303   if( argc > 0 ) invocation_name = argv[0];
304 
305   const Arg_parser::Option options[] =
306     {
307     { 'h', "help",      Arg_parser::no  },
308     { 'b', "bits",      Arg_parser::yes },
309     { 'B', "block",     Arg_parser::maybe },
310     { 'd', "delta",     Arg_parser::yes },
311     { 'e', "set-byte",  Arg_parser::yes },
312     { 'n', "no-verify", Arg_parser::no  },
313     { 'p', "position",  Arg_parser::yes },
314     { 'q', "quiet",     Arg_parser::no  },
315     { 's', "size",      Arg_parser::yes },
316     { 't', "truncate",  Arg_parser::no  },
317     { 'v', "verbose",   Arg_parser::no  },
318     { 'V', "version",   Arg_parser::no  },
319     { 'z', "zcmp",      Arg_parser::yes },
320     {  0 , 0,           Arg_parser::no  } };
321 
322   const Arg_parser parser( argc, argv, options );
323   if( parser.error().size() )				// bad option
324     { show_error( parser.error().c_str(), 0, true ); return 1; }
325 
326   int argind = 0;
327   for( ; argind < parser.arguments(); ++argind )
328     {
329     const int code = parser.code( argind );
330     if( !code ) break;					// no more options
331     const char * const arg = parser.argument( argind ).c_str();
332     switch( code )
333       {
334       case 'h': show_help(); return 0;
335       case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break;
336       case 'B': if( arg[0] ) parse_block( arg, block_size, block_value );
337                 program_mode = m_block; break;
338       case 'd': delta = getnum( arg, block_size, 1, INT_MAX ); break;
339       case 'e': parse_pos_value( arg, bad_byte ); break;
340       case 'n': verify = false; break;
341       case 'p': pos = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break;
342       case 'q': verbosity = -1; break;
343       case 's': max_size = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break;
344       case 't': program_mode = m_truncate; break;
345       case 'v': if( verbosity < 4 ) ++verbosity; break;
346       case 'V': show_version(); return 0;
347       case 'z': zcmp_program = arg; break;
348       default : internal_error( "uncaught option." );
349       }
350     } // end process options
351 
352   if( argind + 2 != parser.arguments() )
353     {
354     if( verbosity >= 0 )
355       std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name );
356     return 1;
357     }
358 
359   if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1;
360 
361   const char * const filename = parser.argument( argind + 1 ).c_str();
362   long file_size = 0;
363   uint8_t * const buffer = read_file( filename, &file_size );
364   if( !buffer ) return 1;
365   const char * const command = parser.argument( argind ).c_str();
366   char zcmp_command[1024] = { 0 };
367   if( std::strcmp( zcmp_program, "false" ) != 0 )
368     snprintf( zcmp_command, sizeof zcmp_command, "%s '%s' -",
369               zcmp_program, filename );
370 
371   // verify original file
372   if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename );
373   if( verify )
374     {
375     FILE * f = popen( command, "w" );
376     if( !f )
377       { show_error( "Can't open pipe to decompressor", errno ); return 1; }
378     if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
379       { show_error( "Can't write to decompressor", errno ); return 1; }
380     if( pclose( f ) != 0 )
381       {
382       if( verbosity >= 0 )
383         std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command );
384       return 1;
385       }
386     if( zcmp_command[0] )
387       {
388       f = popen( zcmp_command, "w" );
389       if( !f )
390         { show_error( "Can't open pipe to zcmp command", errno ); return 1; }
391       if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
392         { show_error( "Can't write to zcmp command", errno ); return 1; }
393       if( pclose( f ) != 0 )
394         {
395         show_error( "zcmp command failed. Disabling comparisons" );
396         zcmp_command[0] = 0;
397         }
398       }
399     }
400 
401   std::signal( SIGPIPE, SIG_IGN );
402 
403   if( pos < 0 ) pos = std::max( 0L, file_size + pos );
404   if( pos >= file_size || max_size == 0 ||
405       ( max_size < 0 && -max_size >= file_size - pos ) )
406     { show_error( "Nothing to do; domain is empty." ); return 0; }
407   if( max_size < 0 ) max_size += file_size - pos;
408   const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
409   if( bad_byte.pos >= file_size )
410     { show_error( "Position of '--set-byte' is beyond end of file." );
411       return 1; }
412   if( bad_byte.pos >= 0 )
413     buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
414   long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
415   if( program_mode == m_truncate )
416     for( long i = pos; i < end; i += std::min( delta, end - i ) )
417       {
418       if( verbosity >= 0 )
419         std::fprintf( stderr, "length %ld\n", i );
420       ++positions; ++decompressions;
421       FILE * f = popen( command, "w" );
422       if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
423       std::fwrite( buffer, 1, i, f );
424       if( pclose( f ) == 0 )
425         {
426         ++successes;
427         if( verbosity >= 0 )
428           std::fputs( "passed the test\n", stderr );
429         if( zcmp_command[0] )
430           {
431           f = popen( zcmp_command, "w" );
432           if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
433           std::fwrite( buffer, 1, i, f );
434           if( pclose( f ) != 0 )
435             {
436             ++failed_comparisons;
437             if( verbosity >= 0 )
438               std::fprintf( stderr, "length %ld comparison failed\n", i );
439             }
440           }
441         }
442       }
443   else if( program_mode == m_block )
444     {
445     uint8_t * block = (uint8_t *)std::malloc( block_size );
446     if( !block ) { show_error( mem_msg ); return 1; }
447     for( long i = pos; i < end; i += std::min( delta, end - i ) )
448       {
449       const long size = std::min( block_size, file_size - i );
450       if( verbosity >= 0 )
451         std::fprintf( stderr, "block %ld,%ld\n", i, size );
452       ++positions; ++decompressions;
453       FILE * f = popen( command, "w" );
454       if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
455       std::memcpy( block , buffer + i, size );
456       std::memset( buffer + i, block_value, size );
457       std::fwrite( buffer, 1, file_size, f );
458       if( pclose( f ) == 0 )
459         {
460         ++successes;
461         if( verbosity >= 0 )
462           std::fputs( "passed the test\n", stderr );
463         if( zcmp_command[0] )
464           {
465           f = popen( zcmp_command, "w" );
466           if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
467           std::fwrite( buffer, 1, file_size, f );
468           if( pclose( f ) != 0 )
469             {
470             ++failed_comparisons;
471             if( verbosity >= 0 )
472               std::fprintf( stderr, "block %ld,%ld comparison failed\n", i, size );
473             }
474           }
475         }
476       std::memcpy( buffer + i, block, size );
477       }
478     std::free( block );
479     }
480   else
481     {
482     if( verbosity >= 1 ) bits.print();
483     for( long i = pos; i < end; i += std::min( delta, end - i ) )
484       {
485       if( verbosity >= 0 )
486         std::fprintf( stderr, "byte %ld\n", i );
487       ++positions;
488       const uint8_t byte = buffer[i];
489       for( int j = 1; j < 256; ++j )
490         {
491         ++buffer[i];
492         if( bits.includes( differing_bits( byte, buffer[i] ) ) )
493           {
494           ++decompressions;
495           if( verbosity >= 2 )
496             std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
497                           buffer[i], byte, j );
498           FILE * f = popen( command, "w" );
499           if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
500           std::fwrite( buffer, 1, file_size, f );
501           if( pclose( f ) == 0 )
502             {
503             ++successes;
504             if( verbosity >= 0 )
505               { if( verbosity < 2 )	// else already printed above
506                   std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
507                                 buffer[i], byte, j );
508                 std::fputs( "passed the test\n", stderr ); }
509             if( zcmp_command[0] )
510               {
511               f = popen( zcmp_command, "w" );
512               if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
513               std::fwrite( buffer, 1, file_size, f );
514               if( pclose( f ) != 0 )
515                 {
516                 ++failed_comparisons;
517                 if( verbosity >= 0 )
518                   std::fprintf( stderr, "byte %ld comparison failed\n", i );
519                 }
520               }
521             }
522           }
523         }
524       buffer[i] = byte;
525       }
526     }
527 
528   if( verbosity >= 0 )
529     {
530     std::fprintf( stderr, "\n%8ld %ss tested\n%8ld total decompressions"
531                           "\n%8ld decompressions returned with zero status",
532                   positions, mode_str[program_mode], decompressions, successes );
533     if( successes > 0 )
534       {
535       if( zcmp_command[0] == 0 )
536         std::fputs( "\n         comparisons disabled\n", stderr );
537       else if( failed_comparisons > 0 )
538         std::fprintf( stderr, ", of which\n%8ld comparisons failed\n",
539                       failed_comparisons );
540       else std::fputs( "\n         all comparisons passed\n", stderr );
541       }
542     else std::fputc( '\n', stderr );
543     }
544 
545   std::free( buffer );
546   return 0;
547   }
548