1 /* Unzcrash - Tests robustness of decompressors to corrupted data.
2 Inspired by unzcrash.c from Julian Seward's bzip2.
3 Copyright (C) 2008-2021 Antonio Diaz Diaz.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 /*
19 Exit status: 0 for a normal exit, 1 for environmental problems
20 (file not found, invalid flags, I/O errors, etc), 2 to indicate a
21 corrupt or invalid input file, 3 for an internal consistency error
22 (eg, bug) which caused unzcrash to panic.
23 */
24
25 #define _FILE_OFFSET_BITS 64
26
27 #include <algorithm>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <string>
35 #include <vector>
36 #include <stdint.h>
37 #include <unistd.h>
38
39 #include "arg_parser.h"
40
41 #if CHAR_BIT != 8
42 #error "Environments where CHAR_BIT != 8 are not supported."
43 #endif
44
45 #ifndef INT64_MAX
46 #define INT64_MAX 0x7FFFFFFFFFFFFFFFLL
47 #endif
48
49 void show_error( const char * const msg, const int errcode = 0,
50 const bool help = false );
51
52 namespace {
53
54 const char * const program_name = "unzcrash";
55 const char * invocation_name = program_name; // default value
56
57 int verbosity = 0;
58
59
show_help()60 void show_help()
61 {
62 std::printf( "Unzcrash tests the robustness of decompressors to corrupted data.\n"
63 "\nBy default, unzcrash reads the file specified and then repeatedly\n"
64 "decompresses it, increasing 256 times each byte of the compressed data, so\n"
65 "as to test all possible one-byte errors. Note that it may take years or even\n"
66 "centuries to test all possible one-byte errors in a large file (tens of MB).\n"
67 "\nIf the option '--block' is given, unzcrash reads the file specified and\n"
68 "then repeatedly decompresses it, setting all bytes in each successive block\n"
69 "to the value given, so as to test all possible full sector errors.\n"
70 "\nIf the option '--truncate' is given, unzcrash reads the file specified\n"
71 "and then repeatedly decompresses it, truncating the file to increasing\n"
72 "lengths, so as to test all possible truncation points.\n"
73 "\nNone of the three test modes described above should cause any invalid memory\n"
74 "accesses. If any of them does, please, report it as a bug to the maintainers\n"
75 "of the decompressor being tested.\n"
76 "\nIf the decompressor returns with zero status, unzcrash compares the output\n"
77 "of the decompressor for the original and corrupt files. If the outputs\n"
78 "differ, it means that the decompressor returned a false negative; it failed\n"
79 "to recognize the corruption and produced garbage output. The only exception\n"
80 "is when a multimember file is truncated just after the last byte of a\n"
81 "member, producing a shorter but valid compressed file. Except in this latter\n"
82 "case, please, report any false negative as a bug.\n"
83 "\nIn order to compare the outputs, unzcrash needs a 'zcmp' program able to\n"
84 "understand the format being tested. For example the zcmp provided by zutils.\n"
85 "Use '--zcmp=false' to disable comparisons.\n"
86 "\nUsage: %s [options] 'lzip -t' file.lz\n", invocation_name );
87 std::printf( "\nOptions:\n"
88 " -h, --help display this help and exit\n"
89 " -V, --version output version information and exit\n"
90 " -b, --bits=<range> test N-bit errors instead of full byte\n"
91 " -B, --block[=<size>][,<val>] test blocks of given size [512,0]\n"
92 " -d, --delta=<n> test one byte/block/truncation every n bytes\n"
93 " -e, --set-byte=<pos>,<val> set byte at position <pos> to value <val>\n"
94 " -n, --no-verify skip initial verification of file.lz\n"
95 " -p, --position=<bytes> first byte position to test [default 0]\n"
96 " -q, --quiet suppress all messages\n"
97 " -s, --size=<bytes> number of byte positions to test [all]\n"
98 " -t, --truncate test decompression of truncated file\n"
99 " -v, --verbose be verbose (a 2nd -v gives more)\n"
100 " -z, --zcmp=<command> set zcmp command name and options [zcmp]\n"
101 "Examples of <range>: 1 1,2,3 1-4 1,3-5,8 1-3,5-8\n"
102 "A negative position is relative to the end of file.\n"
103 "A negative size is relative to the rest of the file.\n"
104 "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
105 "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
106 "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
107 "caused unzcrash to panic.\n"
108 "\nReport bugs to lzip-bug@nongnu.org\n"
109 "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
110 }
111
112 } // end namespace
113
114
115 #include "main_common.cc"
116
117
118 namespace {
119
parse_block(const char * const ptr,long & size,uint8_t & value)120 void parse_block( const char * const ptr, long & size, uint8_t & value )
121 {
122 const char * tail = ptr;
123
124 if( tail[0] != ',' )
125 size = getnum( ptr, 0, 1, INT_MAX, &tail );
126 if( tail[0] == ',' )
127 value = getnum( tail + 1, 0, 0, 255 );
128 else if( tail[0] )
129 {
130 show_error( "Bad separator in argument of '--block'", 0, true );
131 std::exit( 1 );
132 }
133 }
134
135
136 struct Bad_byte
137 {
138 enum Mode { literal, delta, flip };
139 long long pos;
140 Mode mode;
141 uint8_t value;
142
Bad_byte__anon23301f5a0211::Bad_byte143 Bad_byte() : pos( -1 ), mode( literal ), value( 0 ) {}
operator ()__anon23301f5a0211::Bad_byte144 uint8_t operator()( const uint8_t old_value ) const
145 {
146 if( mode == delta ) return old_value + value;
147 if( mode == flip ) return old_value ^ value;
148 return value;
149 }
150 };
151
152
153 // Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
154 //
parse_pos_value(const char * const ptr,Bad_byte & bad_byte)155 void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
156 {
157 const char * tail;
158 bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
159 if( tail[0] != ',' )
160 {
161 show_error( "Bad separator between <pos> and <val>.", 0, true );
162 std::exit( 1 );
163 }
164 if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
165 else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
166 else bad_byte.mode = Bad_byte::literal;
167 bad_byte.value = getnum( tail + 1, 0, 0, 255 );
168 }
169
170
171 /* Returns the address of a malloc'd buffer containing the file data and
172 the file size in '*size'.
173 In case of error, returns 0 and does not modify '*size'.
174 */
read_file(const char * const name,long * const size)175 uint8_t * read_file( const char * const name, long * const size )
176 {
177 FILE * const f = std::fopen( name, "rb" );
178 if( !f )
179 {
180 if( verbosity >= 0 )
181 std::fprintf( stderr, "%s: Can't open input file '%s': %s\n",
182 program_name, name, std::strerror( errno ) );
183 return 0;
184 }
185
186 long buffer_size = 1 << 20;
187 uint8_t * buffer = (uint8_t *)std::malloc( buffer_size );
188 if( !buffer ) { show_error( mem_msg ); return 0; }
189 long file_size = std::fread( buffer, 1, buffer_size, f );
190 while( file_size >= buffer_size )
191 {
192 if( buffer_size >= LONG_MAX )
193 {
194 if( verbosity >= 0 )
195 std::fprintf( stderr, "%s: Input file '%s' is too large.\n",
196 program_name, name );
197 std::free( buffer ); return 0;
198 }
199 buffer_size = ( buffer_size <= LONG_MAX / 2 ) ? 2 * buffer_size : LONG_MAX;
200 uint8_t * const tmp = (uint8_t *)std::realloc( buffer, buffer_size );
201 if( !tmp ) { show_error( mem_msg ); std::free( buffer ); return 0; }
202 buffer = tmp;
203 file_size += std::fread( buffer + file_size, 1, buffer_size - file_size, f );
204 }
205 if( std::ferror( f ) || !std::feof( f ) )
206 {
207 if( verbosity >= 0 )
208 std::fprintf( stderr, "%s: Error reading file '%s': %s\n",
209 program_name, name, std::strerror( errno ) );
210 std::free( buffer ); return 0;
211 }
212 std::fclose( f );
213 *size = file_size;
214 return buffer;
215 }
216
217
218 class Bitset8 // 8 value bitset (1 to 8)
219 {
220 bool data[8];
valid_digit(const unsigned char ch)221 static bool valid_digit( const unsigned char ch )
222 { return ( ch >= '1' && ch <= '8' ); }
223
224 public:
Bitset8()225 Bitset8() { for( int i = 0; i < 8; ++i ) data[i] = true; }
226
includes(const int i) const227 bool includes( const int i ) const
228 { return ( i >= 1 && i <= 8 && data[i-1] ); }
229
230 // Recognized formats: 1 1,2,3 1-4 1,3-5,8 1-3,5-8
parse(const char * p)231 bool parse( const char * p )
232 {
233 for( int i = 0; i < 8; ++i ) data[i] = false;
234 while( true )
235 {
236 const unsigned char ch1 = *p++;
237 if( !valid_digit( ch1 ) ) break;
238 if( *p != '-' ) data[ch1-'1'] = true;
239 else
240 {
241 ++p;
242 if( !valid_digit( *p ) || ch1 > *p ) break;
243 for( int c = ch1; c <= *p; ++c ) data[c-'1'] = true;
244 ++p;
245 }
246 if( *p == 0 ) return true;
247 if( *p == ',' ) ++p; else break;
248 }
249 show_error( "Invalid value or range." );
250 return false;
251 }
252
253 // number of N-bit errors per byte (N=0 to 8): 1 8 28 56 70 56 28 8 1
print() const254 void print() const
255 {
256 std::fflush( stderr );
257 int c = 0;
258 for( int i = 0; i < 8; ++i ) if( data[i] ) ++c;
259 if( c == 8 ) std::fputs( "Testing full byte.\n", stdout );
260 else if( c == 0 ) std::fputs( "Nothing to test.\n", stdout );
261 else
262 {
263 std::fputs( "Testing ", stdout );
264 for( int i = 0; i < 8; ++i )
265 if( data[i] )
266 {
267 std::printf( "%d", i + 1 );
268 if( --c ) std::fputc( ',', stdout );
269 }
270 std::fputs( " bit errors.\n", stdout );
271 }
272 std::fflush( stdout );
273 }
274 };
275
276
differing_bits(const uint8_t byte1,const uint8_t byte2)277 int differing_bits( const uint8_t byte1, const uint8_t byte2 )
278 {
279 int count = 0;
280 uint8_t dif = byte1 ^ byte2;
281 while( dif )
282 { count += ( dif & 1 ); dif >>= 1; }
283 return count;
284 }
285
286 } // end namespace
287
288
main(const int argc,const char * const argv[])289 int main( const int argc, const char * const argv[] )
290 {
291 enum Mode { m_block, m_byte, m_truncate };
292 const char * mode_str[3] = { "block", "byte", "size" };
293 Bitset8 bits; // if Bitset8::parse not called test full byte
294 Bad_byte bad_byte;
295 const char * zcmp_program = "zcmp";
296 long pos = 0;
297 long max_size = LONG_MAX;
298 long delta = 0; // to be set later
299 long block_size = 512;
300 Mode program_mode = m_byte;
301 uint8_t block_value = 0;
302 bool verify = true;
303 if( argc > 0 ) invocation_name = argv[0];
304
305 const Arg_parser::Option options[] =
306 {
307 { 'h', "help", Arg_parser::no },
308 { 'b', "bits", Arg_parser::yes },
309 { 'B', "block", Arg_parser::maybe },
310 { 'd', "delta", Arg_parser::yes },
311 { 'e', "set-byte", Arg_parser::yes },
312 { 'n', "no-verify", Arg_parser::no },
313 { 'p', "position", Arg_parser::yes },
314 { 'q', "quiet", Arg_parser::no },
315 { 's', "size", Arg_parser::yes },
316 { 't', "truncate", Arg_parser::no },
317 { 'v', "verbose", Arg_parser::no },
318 { 'V', "version", Arg_parser::no },
319 { 'z', "zcmp", Arg_parser::yes },
320 { 0 , 0, Arg_parser::no } };
321
322 const Arg_parser parser( argc, argv, options );
323 if( parser.error().size() ) // bad option
324 { show_error( parser.error().c_str(), 0, true ); return 1; }
325
326 int argind = 0;
327 for( ; argind < parser.arguments(); ++argind )
328 {
329 const int code = parser.code( argind );
330 if( !code ) break; // no more options
331 const char * const arg = parser.argument( argind ).c_str();
332 switch( code )
333 {
334 case 'h': show_help(); return 0;
335 case 'b': if( !bits.parse( arg ) ) return 1; program_mode = m_byte; break;
336 case 'B': if( arg[0] ) parse_block( arg, block_size, block_value );
337 program_mode = m_block; break;
338 case 'd': delta = getnum( arg, block_size, 1, INT_MAX ); break;
339 case 'e': parse_pos_value( arg, bad_byte ); break;
340 case 'n': verify = false; break;
341 case 'p': pos = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break;
342 case 'q': verbosity = -1; break;
343 case 's': max_size = getnum( arg, block_size, -LONG_MAX, LONG_MAX ); break;
344 case 't': program_mode = m_truncate; break;
345 case 'v': if( verbosity < 4 ) ++verbosity; break;
346 case 'V': show_version(); return 0;
347 case 'z': zcmp_program = arg; break;
348 default : internal_error( "uncaught option." );
349 }
350 } // end process options
351
352 if( argind + 2 != parser.arguments() )
353 {
354 if( verbosity >= 0 )
355 std::fprintf( stderr, "Usage: %s 'lzip -t' file.lz\n", invocation_name );
356 return 1;
357 }
358
359 if( delta <= 0 ) delta = ( program_mode == m_block ) ? block_size : 1;
360
361 const char * const filename = parser.argument( argind + 1 ).c_str();
362 long file_size = 0;
363 uint8_t * const buffer = read_file( filename, &file_size );
364 if( !buffer ) return 1;
365 const char * const command = parser.argument( argind ).c_str();
366 char zcmp_command[1024] = { 0 };
367 if( std::strcmp( zcmp_program, "false" ) != 0 )
368 snprintf( zcmp_command, sizeof zcmp_command, "%s '%s' -",
369 zcmp_program, filename );
370
371 // verify original file
372 if( verbosity >= 1 ) fprintf( stderr, "Testing file '%s'\n", filename );
373 if( verify )
374 {
375 FILE * f = popen( command, "w" );
376 if( !f )
377 { show_error( "Can't open pipe to decompressor", errno ); return 1; }
378 if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
379 { show_error( "Can't write to decompressor", errno ); return 1; }
380 if( pclose( f ) != 0 )
381 {
382 if( verbosity >= 0 )
383 std::fprintf( stderr, "%s: Can't run '%s'.\n", program_name, command );
384 return 1;
385 }
386 if( zcmp_command[0] )
387 {
388 f = popen( zcmp_command, "w" );
389 if( !f )
390 { show_error( "Can't open pipe to zcmp command", errno ); return 1; }
391 if( (long)std::fwrite( buffer, 1, file_size, f ) != file_size )
392 { show_error( "Can't write to zcmp command", errno ); return 1; }
393 if( pclose( f ) != 0 )
394 {
395 show_error( "zcmp command failed. Disabling comparisons" );
396 zcmp_command[0] = 0;
397 }
398 }
399 }
400
401 std::signal( SIGPIPE, SIG_IGN );
402
403 if( pos < 0 ) pos = std::max( 0L, file_size + pos );
404 if( pos >= file_size || max_size == 0 ||
405 ( max_size < 0 && -max_size >= file_size - pos ) )
406 { show_error( "Nothing to do; domain is empty." ); return 0; }
407 if( max_size < 0 ) max_size += file_size - pos;
408 const long end = ( ( max_size < file_size - pos ) ? pos + max_size : file_size );
409 if( bad_byte.pos >= file_size )
410 { show_error( "Position of '--set-byte' is beyond end of file." );
411 return 1; }
412 if( bad_byte.pos >= 0 )
413 buffer[bad_byte.pos] = bad_byte( buffer[bad_byte.pos] );
414 long positions = 0, decompressions = 0, successes = 0, failed_comparisons = 0;
415 if( program_mode == m_truncate )
416 for( long i = pos; i < end; i += std::min( delta, end - i ) )
417 {
418 if( verbosity >= 0 )
419 std::fprintf( stderr, "length %ld\n", i );
420 ++positions; ++decompressions;
421 FILE * f = popen( command, "w" );
422 if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
423 std::fwrite( buffer, 1, i, f );
424 if( pclose( f ) == 0 )
425 {
426 ++successes;
427 if( verbosity >= 0 )
428 std::fputs( "passed the test\n", stderr );
429 if( zcmp_command[0] )
430 {
431 f = popen( zcmp_command, "w" );
432 if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
433 std::fwrite( buffer, 1, i, f );
434 if( pclose( f ) != 0 )
435 {
436 ++failed_comparisons;
437 if( verbosity >= 0 )
438 std::fprintf( stderr, "length %ld comparison failed\n", i );
439 }
440 }
441 }
442 }
443 else if( program_mode == m_block )
444 {
445 uint8_t * block = (uint8_t *)std::malloc( block_size );
446 if( !block ) { show_error( mem_msg ); return 1; }
447 for( long i = pos; i < end; i += std::min( delta, end - i ) )
448 {
449 const long size = std::min( block_size, file_size - i );
450 if( verbosity >= 0 )
451 std::fprintf( stderr, "block %ld,%ld\n", i, size );
452 ++positions; ++decompressions;
453 FILE * f = popen( command, "w" );
454 if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
455 std::memcpy( block , buffer + i, size );
456 std::memset( buffer + i, block_value, size );
457 std::fwrite( buffer, 1, file_size, f );
458 if( pclose( f ) == 0 )
459 {
460 ++successes;
461 if( verbosity >= 0 )
462 std::fputs( "passed the test\n", stderr );
463 if( zcmp_command[0] )
464 {
465 f = popen( zcmp_command, "w" );
466 if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
467 std::fwrite( buffer, 1, file_size, f );
468 if( pclose( f ) != 0 )
469 {
470 ++failed_comparisons;
471 if( verbosity >= 0 )
472 std::fprintf( stderr, "block %ld,%ld comparison failed\n", i, size );
473 }
474 }
475 }
476 std::memcpy( buffer + i, block, size );
477 }
478 std::free( block );
479 }
480 else
481 {
482 if( verbosity >= 1 ) bits.print();
483 for( long i = pos; i < end; i += std::min( delta, end - i ) )
484 {
485 if( verbosity >= 0 )
486 std::fprintf( stderr, "byte %ld\n", i );
487 ++positions;
488 const uint8_t byte = buffer[i];
489 for( int j = 1; j < 256; ++j )
490 {
491 ++buffer[i];
492 if( bits.includes( differing_bits( byte, buffer[i] ) ) )
493 {
494 ++decompressions;
495 if( verbosity >= 2 )
496 std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
497 buffer[i], byte, j );
498 FILE * f = popen( command, "w" );
499 if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
500 std::fwrite( buffer, 1, file_size, f );
501 if( pclose( f ) == 0 )
502 {
503 ++successes;
504 if( verbosity >= 0 )
505 { if( verbosity < 2 ) // else already printed above
506 std::fprintf( stderr, "0x%02X (0x%02X+0x%02X) ",
507 buffer[i], byte, j );
508 std::fputs( "passed the test\n", stderr ); }
509 if( zcmp_command[0] )
510 {
511 f = popen( zcmp_command, "w" );
512 if( !f ) { show_error( "Can't open pipe", errno ); return 1; }
513 std::fwrite( buffer, 1, file_size, f );
514 if( pclose( f ) != 0 )
515 {
516 ++failed_comparisons;
517 if( verbosity >= 0 )
518 std::fprintf( stderr, "byte %ld comparison failed\n", i );
519 }
520 }
521 }
522 }
523 }
524 buffer[i] = byte;
525 }
526 }
527
528 if( verbosity >= 0 )
529 {
530 std::fprintf( stderr, "\n%8ld %ss tested\n%8ld total decompressions"
531 "\n%8ld decompressions returned with zero status",
532 positions, mode_str[program_mode], decompressions, successes );
533 if( successes > 0 )
534 {
535 if( zcmp_command[0] == 0 )
536 std::fputs( "\n comparisons disabled\n", stderr );
537 else if( failed_comparisons > 0 )
538 std::fprintf( stderr, ", of which\n%8ld comparisons failed\n",
539 failed_comparisons );
540 else std::fputs( "\n all comparisons passed\n", stderr );
541 }
542 else std::fputc( '\n', stderr );
543 }
544
545 std::free( buffer );
546 return 0;
547 }
548