1 /* Lziprecover - Data recovery tool for the lzip format
2 Copyright (C) 2009-2021 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17 /*
18 Exit status: 0 for a normal exit, 1 for environmental problems
19 (file not found, invalid flags, I/O errors, etc), 2 to indicate a
20 corrupt or invalid input file, 3 for an internal consistency error
21 (eg, bug) which caused lziprecover to panic.
22 */
23
24 #define _FILE_OFFSET_BITS 64
25
26 #include <algorithm>
27 #include <cctype>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <new>
35 #include <string>
36 #include <vector>
37 #include <fcntl.h>
38 #include <stdint.h>
39 #include <unistd.h>
40 #include <utime.h>
41 #include <sys/stat.h>
42 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
43 #include <io.h>
44 #if defined(__MSVCRT__)
45 #define fchmod(x,y) 0
46 #define fchown(x,y,z) 0
47 #define SIGHUP SIGTERM
48 #define S_ISSOCK(x) 0
49 #ifndef S_IRGRP
50 #define S_IRGRP 0
51 #define S_IWGRP 0
52 #define S_IROTH 0
53 #define S_IWOTH 0
54 #endif
55 #endif
56 #if defined(__DJGPP__)
57 #define S_ISSOCK(x) 0
58 #define S_ISVTX 0
59 #endif
60 #endif
61
62 #include "arg_parser.h"
63 #include "lzip.h"
64 #include "decoder.h"
65
66 #ifndef O_BINARY
67 #define O_BINARY 0
68 #endif
69
70 #if CHAR_BIT != 8
71 #error "Environments where CHAR_BIT != 8 are not supported."
72 #endif
73
74 int verbosity = 0;
75
76 const char * const program_name = "lziprecover";
77 std::string output_filename; // global vars for output file
78 int outfd = -1; // see 'delete_output_on_interrupt' below
79
80 namespace {
81
82 const char * invocation_name = program_name; // default value
83
84 const struct { const char * from; const char * to; } known_extensions[] = {
85 { ".lz", "" },
86 { ".tlz", ".tar" },
87 { 0, 0 } };
88
89 enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
90 m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge,
91 m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce,
92 m_show_packets, m_split, m_strip, m_test, m_unzcrash };
93
94 /* Variable used in signal handler context.
95 It is not declared volatile because the handler never returns. */
96 bool delete_output_on_interrupt = false;
97
98
show_help()99 void show_help()
100 {
101 std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
102 "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
103 "files, produce a correct file by merging the good parts of two or more\n"
104 "damaged copies, reproduce a missing (zeroed) sector using a reference file,\n"
105 "extract data from damaged files, decompress files, and test integrity of\n"
106 "files.\n"
107 "\nLziprecover can repair perfectly most files with small errors (up to one\n"
108 "single-byte error per member), without the need of any extra redundance\n"
109 "at all. Losing an entire archive just because of a corrupt byte near the\n"
110 "beginning is a thing of the past.\n"
111 "\nLziprecover can remove the damaged members from multimember files, for\n"
112 "example multimember tar.lz archives.\n"
113 "\nLziprecover provides random access to the data in multimember files; it only\n"
114 "decompresses the members containing the desired data.\n"
115 "\nLziprecover facilitates the management of metadata stored as trailing data\n"
116 "in lzip files.\n"
117 "\nLziprecover is not a replacement for regular backups, but a last line of\n"
118 "defense for the case where the backups are also damaged.\n"
119 "\nUsage: %s [options] [files]\n", invocation_name );
120 std::printf( "\nOptions:\n"
121 " -h, --help display this help and exit\n"
122 " -V, --version output version information and exit\n"
123 " -a, --trailing-error exit with error status if trailing data\n"
124 " -A, --alone-to-lz convert lzma-alone files to lzip format\n"
125 " -c, --stdout write to standard output, keep input files\n"
126 " -d, --decompress decompress\n"
127 " -D, --range-decompress=<n-m> decompress a range of bytes to stdout\n"
128 " -e, --reproduce try to reproduce a zeroed sector in file\n"
129 " --lzip-level=N|a|m[N] reproduce one level, all, or match length\n"
130 " --lzip-name=<name> name of lzip executable for --reproduce\n"
131 " --reference-file=<file> reference file for --reproduce\n"
132 " -f, --force overwrite existing output files\n"
133 " -i, --ignore-errors ignore some errors in -d, -D, -l, -t, --dump\n"
134 " -k, --keep keep (don't delete) input files\n"
135 " -l, --list print (un)compressed file sizes\n"
136 " -m, --merge correct errors in file using several copies\n"
137 " -o, --output=<file> place the output into <file>\n"
138 " -q, --quiet suppress all messages\n"
139 " -R, --repair try to repair a small error in file\n"
140 " -s, --split split multimember file in single-member files\n"
141 " -t, --test test compressed file integrity\n"
142 " -v, --verbose be verbose (a 2nd -v gives more)\n"
143 " --loose-trailing allow trailing data seeming corrupt header\n"
144 " --dump=<list>:d:t dump members listed/damaged, tdata to stdout\n"
145 " --remove=<list>:d:t remove members, tdata from files in place\n"
146 " --strip=<list>:d:t copy files to stdout stripping members given\n" );
147 if( verbosity >= 1 )
148 {
149 std::printf( "\nDebug options for experts:\n"
150 " -E, --debug-reproduce=<range>[,ss] set range to 0 and try to reproduce file\n"
151 " -M, --md5sum print the MD5 digests of the input files\n"
152 " -S, --nrep-stats[=<val>] print stats of N-byte repeated sequences\n"
153 " -U, --unzcrash test 1-bit errors in the input file\n"
154 " -W, --debug-decompress=<pos>,<val> set pos to val and decompress to stdout\n"
155 " -X, --show-packets[=<pos>,<val>] show in stdout the decoded LZMA packets\n"
156 " -Y, --debug-delay=<range> find max error detection delay in <range>\n"
157 " -Z, --debug-repair=<pos>,<val> test repair one-byte error at <pos>\n" );
158 }
159 std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n"
160 "from standard input to standard output.\n"
161 "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
162 "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
163 "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
164 "'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
165 "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
166 "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
167 "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
168 "caused lziprecover to panic.\n"
169 "\nReport bugs to lzip-bug@nongnu.org\n"
170 "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
171 }
172
173 } // end namespace
174
operator ()(const char * const msg,FILE * const f) const175 void Pretty_print::operator()( const char * const msg, FILE * const f ) const
176 {
177 if( verbosity >= 0 )
178 {
179 if( first_post )
180 {
181 first_post = false;
182 std::fputs( padded_name.c_str(), f );
183 if( !msg ) std::fflush( f );
184 }
185 if( msg ) std::fprintf( f, "%s\n", msg );
186 }
187 }
188
189
bad_version(const unsigned version)190 const char * bad_version( const unsigned version )
191 {
192 static char buf[80];
193 snprintf( buf, sizeof buf, "Version %u member format not supported.",
194 version );
195 return buf;
196 }
197
198
format_ds(const unsigned dictionary_size)199 const char * format_ds( const unsigned dictionary_size )
200 {
201 enum { bufsize = 16, factor = 1024 };
202 static char buf[bufsize];
203 const char * const prefix[8] =
204 { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
205 const char * p = "";
206 const char * np = " ";
207 unsigned num = dictionary_size;
208 bool exact = ( num % factor == 0 );
209
210 for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
211 { num /= factor; if( num % factor != 0 ) exact = false;
212 p = prefix[i]; np = ""; }
213 snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
214 return buf;
215 }
216
217
show_header(const unsigned dictionary_size)218 void show_header( const unsigned dictionary_size )
219 {
220 std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
221 }
222
223
224 #include "main_common.cc"
225
226
227 // Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8)
parse(const char * p)228 void Member_list::parse( const char * p )
229 {
230 while( true )
231 {
232 const char * tp = p; // points to terminator; ':' or null
233 while( *tp && *tp != ':' ) ++tp;
234 const unsigned len = tp - p;
235 if( std::isalpha( *(const unsigned char *)p ) )
236 {
237 if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
238 { damaged = true; goto next; }
239 if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 )
240 { tdata = true; goto next; }
241 }
242 {
243 const bool reverse = ( *p == 'r' );
244 if( reverse ) ++p;
245 if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
246 std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
247 while( std::isdigit( *(const unsigned char *)p ) )
248 {
249 const char * tail;
250 const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1;
251 if( rvp->size() && pos < rvp->back().end() ) break;
252 const int size = (*tail == '-') ?
253 getnum( tail + 1, 0, pos + 1, INT_MAX, &tail ) - pos : 1;
254 rvp->push_back( Block( pos, size ) );
255 if( tail == tp ) goto next;
256 if( *tail == ',' ) p = tail + 1; else break;
257 }
258 }
259 show_error( "Invalid list of members." );
260 std::exit( 1 );
261 next:
262 if( *(p = tp) != 0 ) ++p; else return;
263 }
264 }
265
266
267 namespace {
268
269 // Recognized formats: <digit> 'a' m[<match_length>]
270 //
parse_lzip_level(const char * const p)271 int parse_lzip_level( const char * const p )
272 {
273 if( *p == 'a' || std::isdigit( *(const unsigned char *)p ) ) return *p;
274 if( *p != 'm' )
275 {
276 show_error( "Bad argument in option '--lzip-level'.", 0, true );
277 std::exit( 1 );
278 }
279 if( p[1] == 0 ) return -1;
280 return -getnum( p + 1, 0, min_match_len_limit, max_match_len );
281 }
282
283
284 /* Recognized format: <range>[,<sector_size>]
285 range formats: <begin> <begin>-<end> <begin>,<size> ,<size>
286 */
parse_range(const char * const ptr,Block & range,int * const sector_sizep=0)287 void parse_range( const char * const ptr, Block & range,
288 int * const sector_sizep = 0 )
289 {
290 const char * tail = ptr;
291 long long value =
292 ( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, 0, INT64_MAX - 1, &tail );
293 if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' )
294 {
295 range.pos( value );
296 if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
297 const bool is_size = ( tail[0] == ',' );
298 if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
299 else value = getnum( tail + 1, 0, 1, INT64_MAX, &tail ); // size
300 if( is_size || value > range.pos() )
301 {
302 if( !is_size ) value -= range.pos();
303 if( INT64_MAX - range.pos() >= value )
304 {
305 range.size( value );
306 if( sector_sizep && tail[0] == ',' )
307 *sector_sizep = getnum( tail + 1, 0, 8, INT_MAX );
308 return;
309 }
310 }
311 }
312 show_error( "Bad decompression range.", 0, true );
313 std::exit( 1 );
314 }
315
316
317 // Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
318 //
parse_pos_value(const char * const ptr,Bad_byte & bad_byte)319 void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
320 {
321 const char * tail;
322 bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
323 if( tail[0] != ',' )
324 {
325 show_error( "Bad separator between <pos> and <val>.", 0, true );
326 std::exit( 1 );
327 }
328 if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
329 else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
330 else bad_byte.mode = Bad_byte::literal;
331 bad_byte.value = getnum( tail + 1, 0, 0, 255 );
332 }
333
334
one_file(const int files)335 void one_file( const int files )
336 {
337 if( files != 1 )
338 {
339 show_error( "You must specify exactly 1 file.", 0, true );
340 std::exit( 1 );
341 }
342 }
343
344
set_mode(Mode & program_mode,const Mode new_mode)345 void set_mode( Mode & program_mode, const Mode new_mode )
346 {
347 if( program_mode != m_none && program_mode != new_mode )
348 {
349 show_error( "Only one operation can be specified.", 0, true );
350 std::exit( 1 );
351 }
352 program_mode = new_mode;
353 }
354
355
extension_index(const std::string & name)356 int extension_index( const std::string & name )
357 {
358 for( int eindex = 0; known_extensions[eindex].from; ++eindex )
359 {
360 const std::string ext( known_extensions[eindex].from );
361 if( name.size() > ext.size() &&
362 name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
363 return eindex;
364 }
365 return -1;
366 }
367
368
set_a_outname(const std::string & name)369 void set_a_outname( const std::string & name )
370 {
371 output_filename = name;
372 if( name.size() > 5 && name.compare( name.size() - 5, 5, ".lzma" ) == 0 )
373 output_filename.erase( name.size() - 2 );
374 else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
375 output_filename.insert( name.size() - 2, "ar." );
376 else if( name.size() <= 3 || name.compare( name.size() - 3, 3, ".lz" ) != 0 )
377 output_filename += known_extensions[0].from;
378 }
379
380
set_d_outname(const std::string & name,const int eindex)381 void set_d_outname( const std::string & name, const int eindex )
382 {
383 if( eindex >= 0 )
384 {
385 const std::string from( known_extensions[eindex].from );
386 if( name.size() > from.size() )
387 {
388 output_filename.assign( name, 0, name.size() - from.size() );
389 output_filename += known_extensions[eindex].to;
390 return;
391 }
392 }
393 output_filename = name; output_filename += ".out";
394 if( verbosity >= 1 )
395 std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
396 program_name, name.c_str(), output_filename.c_str() );
397 }
398
399 } // end namespace
400
open_instream(const char * const name,struct stat * const in_statsp,const bool one_to_one,const bool reg_only)401 int open_instream( const char * const name, struct stat * const in_statsp,
402 const bool one_to_one, const bool reg_only )
403 {
404 int infd = open( name, O_RDONLY | O_BINARY );
405 if( infd < 0 )
406 show_file_error( name, "Can't open input file", errno );
407 else
408 {
409 const int i = fstat( infd, in_statsp );
410 const mode_t mode = in_statsp->st_mode;
411 const bool can_read = ( i == 0 && !reg_only &&
412 ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
413 S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
414 if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
415 {
416 if( verbosity >= 0 )
417 std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
418 program_name, name, ( can_read && one_to_one ) ?
419 ",\n and neither '-c' nor '-o' were specified" : "" );
420 close( infd );
421 infd = -1;
422 }
423 }
424 return infd;
425 }
426
427
open_truncable_stream(const char * const name,struct stat * const in_statsp)428 int open_truncable_stream( const char * const name,
429 struct stat * const in_statsp )
430 {
431 int fd = open( name, O_RDWR | O_BINARY );
432 if( fd < 0 )
433 show_file_error( name, "Can't open input file", errno );
434 else
435 {
436 const int i = fstat( fd, in_statsp );
437 const mode_t mode = in_statsp->st_mode;
438 if( i != 0 || !S_ISREG( mode ) )
439 { show_file_error( name, "Not a regular file." ); close( fd ); fd = -1; }
440 }
441 return fd;
442 }
443
444
open_outstream(const bool force,const bool protect,const bool rw,const bool skipping)445 bool open_outstream( const bool force, const bool protect,
446 const bool rw, const bool skipping )
447 {
448 const mode_t usr_rw = S_IRUSR | S_IWUSR;
449 const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
450 const mode_t outfd_mode = protect ? usr_rw : all_rw;
451 int flags = O_CREAT | ( rw ? O_RDWR : O_WRONLY ) | O_BINARY;
452 if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
453
454 outfd = open( output_filename.c_str(), flags, outfd_mode );
455 if( outfd >= 0 ) delete_output_on_interrupt = true;
456 else if( verbosity >= 0 )
457 {
458 if( errno == EEXIST )
459 std::fprintf( stderr, "%s: Output file '%s' already exists%s.\n",
460 program_name, output_filename.c_str(), skipping ?
461 ", skipping" : ". Use '--force' to overwrite it" );
462 else
463 std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
464 program_name, output_filename.c_str(), std::strerror( errno ) );
465 }
466 return ( outfd >= 0 );
467 }
468
469
file_exists(const std::string & filename)470 bool file_exists( const std::string & filename )
471 {
472 struct stat st;
473 if( stat( filename.c_str(), &st ) == 0 )
474 {
475 if( verbosity >= 0 )
476 std::fprintf( stderr, "%s: Output file '%s' already exists."
477 " Use '--force' to overwrite it.\n",
478 program_name, filename.c_str() );
479 return true;
480 }
481 return false;
482 }
483
484
set_signals(void (* action)(int))485 void set_signals( void (*action)(int) )
486 {
487 std::signal( SIGHUP, action );
488 std::signal( SIGINT, action );
489 std::signal( SIGTERM, action );
490 }
491
492
cleanup_and_fail(const int retval)493 void cleanup_and_fail( const int retval )
494 {
495 set_signals( SIG_IGN ); // ignore signals
496 if( delete_output_on_interrupt )
497 {
498 delete_output_on_interrupt = false;
499 if( verbosity >= 0 )
500 std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
501 program_name, output_filename.c_str() );
502 if( outfd >= 0 ) { close( outfd ); outfd = -1; }
503 if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
504 show_error( "WARNING: deletion of output file (apparently) failed." );
505 }
506 std::exit( retval );
507 }
508
509 namespace {
510
signal_handler(int)511 extern "C" void signal_handler( int )
512 {
513 show_error( "Control-C or similar caught, quitting." );
514 cleanup_and_fail( 1 );
515 }
516
517
check_tty_in(const char * const input_filename,const int infd,const Mode program_mode,int & retval)518 bool check_tty_in( const char * const input_filename, const int infd,
519 const Mode program_mode, int & retval )
520 {
521 if( isatty( infd ) ) // all modes read compressed data
522 { show_file_error( input_filename,
523 "I won't read compressed data from a terminal." );
524 close( infd ); set_retval( retval, 1 );
525 if( program_mode != m_test ) cleanup_and_fail( retval );
526 return false; }
527 return true;
528 }
529
check_tty_out(const Mode program_mode)530 bool check_tty_out( const Mode program_mode )
531 {
532 if( program_mode == m_alone_to_lz && isatty( outfd ) )
533 { show_file_error( output_filename.size() ?
534 output_filename.c_str() : "(stdout)",
535 "I won't write compressed data to a terminal." );
536 return false; }
537 return true;
538 }
539
540
541 // Set permissions, owner, and times.
close_and_set_permissions(const struct stat * const in_statsp)542 void close_and_set_permissions( const struct stat * const in_statsp )
543 {
544 bool warning = false;
545 if( in_statsp )
546 {
547 const mode_t mode = in_statsp->st_mode;
548 // fchown will in many cases return with EPERM, which can be safely ignored.
549 if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
550 { if( fchmod( outfd, mode ) != 0 ) warning = true; }
551 else
552 if( errno != EPERM ||
553 fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
554 warning = true;
555 }
556 if( close( outfd ) != 0 )
557 {
558 show_error( "Error closing output file", errno );
559 cleanup_and_fail( 1 );
560 }
561 outfd = -1;
562 delete_output_on_interrupt = false;
563 if( in_statsp )
564 {
565 struct utimbuf t;
566 t.actime = in_statsp->st_atime;
567 t.modtime = in_statsp->st_mtime;
568 if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
569 }
570 if( warning && verbosity >= 1 )
571 show_error( "Can't change output file attributes." );
572 }
573
574
xdigit(const unsigned value)575 unsigned char xdigit( const unsigned value )
576 {
577 if( value <= 9 ) return '0' + value;
578 if( value <= 15 ) return 'A' + value - 10;
579 return 0;
580 }
581
582
show_trailing_data(const uint8_t * const data,const int size,const Pretty_print & pp,const bool all,const int ignore_trailing)583 bool show_trailing_data( const uint8_t * const data, const int size,
584 const Pretty_print & pp, const bool all,
585 const int ignore_trailing ) // -1 = show
586 {
587 if( verbosity >= 4 || ignore_trailing <= 0 )
588 {
589 std::string msg;
590 if( !all ) msg = "first bytes of ";
591 msg += "trailing data = ";
592 for( int i = 0; i < size; ++i )
593 {
594 msg += xdigit( data[i] >> 4 );
595 msg += xdigit( data[i] & 0x0F );
596 msg += ' ';
597 }
598 msg += '\'';
599 for( int i = 0; i < size; ++i )
600 { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
601 msg += '\'';
602 pp( msg.c_str() );
603 if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg );
604 }
605 return ( ignore_trailing > 0 );
606 }
607
608
decompress(const unsigned long long cfile_size,const int infd,const Pretty_print & pp,const bool ignore_errors,const bool ignore_trailing,const bool loose_trailing,const bool testing)609 int decompress( const unsigned long long cfile_size, const int infd,
610 const Pretty_print & pp, const bool ignore_errors,
611 const bool ignore_trailing, const bool loose_trailing,
612 const bool testing )
613 {
614 int retval = 0;
615 unsigned long long partial_file_pos = 0;
616 Range_decoder rdec( infd );
617 for( bool first_member = true; ; first_member = false )
618 {
619 Lzip_header header;
620 rdec.reset_member_position();
621 const int size = rdec.read_header_carefully( header, ignore_errors );
622 if( rdec.finished() || // End Of File
623 ( size < Lzip_header::size && !rdec.find_header( header ) ) )
624 {
625 if( first_member )
626 { show_file_error( pp.name(), "File ends unexpectedly at member header." );
627 retval = 2; }
628 else if( header.verify_prefix( size ) )
629 { pp( "Truncated header in multimember file." );
630 show_trailing_data( header.data, size, pp, true, -1 );
631 retval = 2; }
632 else if( size > 0 && !show_trailing_data( header.data, size, pp,
633 true, ignore_trailing ) )
634 retval = 2;
635 break;
636 }
637 if( !header.verify_magic() )
638 {
639 if( first_member )
640 { show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
641 else if( !loose_trailing && header.verify_corrupt() )
642 { pp( corrupt_mm_msg );
643 show_trailing_data( header.data, size, pp, false, -1 );
644 retval = 2; }
645 else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
646 retval = 2;
647 if( ignore_errors ) { pp.reset(); continue; } else break;
648 }
649 if( !header.verify_version() )
650 { pp( bad_version( header.version() ) ); retval = 2;
651 if( ignore_errors ) { pp.reset(); continue; } else break; }
652 const unsigned dictionary_size = header.dictionary_size();
653 if( !isvalid_ds( dictionary_size ) )
654 { pp( bad_dict_msg ); retval = 2;
655 if( ignore_errors ) { pp.reset(); continue; } else break; }
656
657 if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
658
659 LZ_decoder decoder( rdec, dictionary_size, outfd );
660 show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
661 const int result = decoder.decode_member( pp );
662 partial_file_pos += rdec.member_position();
663 if( result != 0 )
664 {
665 if( verbosity >= 0 && result <= 2 )
666 {
667 pp();
668 std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
669 "File ends unexpectedly" : "Decoder error",
670 partial_file_pos );
671 }
672 retval = 2; if( ignore_errors ) { pp.reset(); continue; } else break;
673 }
674 if( verbosity >= 2 )
675 { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
676 }
677 if( verbosity == 1 && retval == 0 )
678 std::fputs( testing ? "ok\n" : "done\n", stderr );
679 if( retval == 2 && ignore_errors ) retval = 0;
680 return retval;
681 }
682
683 } // end namespace
684
set_signal_handler()685 void set_signal_handler() { set_signals( signal_handler ); }
686
close_outstream(const struct stat * const in_statsp)687 int close_outstream( const struct stat * const in_statsp )
688 {
689 if( delete_output_on_interrupt )
690 close_and_set_permissions( in_statsp );
691 if( outfd >= 0 && close( outfd ) != 0 )
692 { show_error( "Error closing stdout", errno ); return 1; }
693 outfd = -1;
694 return 0;
695 }
696
697
insert_fixed(std::string name)698 std::string insert_fixed( std::string name )
699 {
700 if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 )
701 name.insert( name.size() - 7, "_fixed" );
702 else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 )
703 name.insert( name.size() - 3, "_fixed" );
704 else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
705 name.insert( name.size() - 4, "_fixed" );
706 else name += "_fixed.lz";
707 return name;
708 }
709
710
show_file_error(const char * const filename,const char * const msg,const int errcode)711 void show_file_error( const char * const filename, const char * const msg,
712 const int errcode )
713 {
714 if( verbosity >= 0 )
715 std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
716 ( errcode > 0 ) ? ": " : "",
717 ( errcode > 0 ) ? std::strerror( errcode ) : "" );
718 }
719
720
show_2file_error(const char * const msg1,const char * const name1,const char * const name2,const char * const msg2)721 void show_2file_error( const char * const msg1, const char * const name1,
722 const char * const name2, const char * const msg2 )
723 {
724 if( verbosity >= 0 )
725 std::fprintf( stderr, "%s: %s '%s' and '%s' %s\n",
726 program_name, msg1, name1, name2, msg2 );
727 }
728
729
show_dprogress(const unsigned long long cfile_size,const unsigned long long partial_size,const Range_decoder * const d,const Pretty_print * const p)730 void show_dprogress( const unsigned long long cfile_size,
731 const unsigned long long partial_size,
732 const Range_decoder * const d,
733 const Pretty_print * const p )
734 {
735 static unsigned long long csize = 0; // file_size / 100
736 static unsigned long long psize = 0;
737 static const Range_decoder * rdec = 0;
738 static const Pretty_print * pp = 0;
739 static int counter = 0;
740 static bool enabled = true;
741
742 if( !enabled ) return;
743 if( p ) // initialize static vars
744 {
745 if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
746 csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0;
747 }
748 if( rdec && pp && --counter <= 0 )
749 {
750 const unsigned long long pos = psize + rdec->member_position();
751 counter = 7; // update display every 114688 bytes
752 if( csize > 0 )
753 std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 );
754 else
755 std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
756 pp->reset(); (*pp)(); // restore cursor position
757 }
758 }
759
760
main(const int argc,const char * const argv[])761 int main( const int argc, const char * const argv[] )
762 {
763 Block range( 0, 0 );
764 int sector_size = INT_MAX; // default larger than practical range
765 Bad_byte bad_byte;
766 Member_list member_list;
767 std::string default_output_filename;
768 std::vector< std::string > filenames;
769 const char * lzip_name = "lzip"; // default is lzip
770 const char * reference_filename = 0;
771 Mode program_mode = m_none;
772 int lzip_level = 0; // 0 = test all levels and match lengths
773 // '0'..'9' = level, 'a' = all levels
774 // -5..-273 = match length, -1 = all lengths
775 int repeated_byte = -1; // 0 to 255, or -1 for all values
776 bool force = false;
777 bool ignore_errors = false;
778 bool ignore_trailing = true;
779 bool keep_input_files = false;
780 bool loose_trailing = false;
781 bool to_stdout = false;
782 if( argc > 0 ) invocation_name = argv[0];
783
784 enum { opt_du = 256, opt_lt, opt_lzl, opt_lzn, opt_ref, opt_re, opt_st };
785 const Arg_parser::Option options[] =
786 {
787 { 'a', "trailing-error", Arg_parser::no },
788 { 'A', "alone-to-lz", Arg_parser::no },
789 { 'c', "stdout", Arg_parser::no },
790 { 'd', "decompress", Arg_parser::no },
791 { 'D', "range-decompress", Arg_parser::yes },
792 { 'e', "reproduce", Arg_parser::no },
793 { 'E', "debug-reproduce", Arg_parser::yes },
794 { 'f', "force", Arg_parser::no },
795 { 'h', "help", Arg_parser::no },
796 { 'i', "ignore-errors", Arg_parser::no },
797 { 'k', "keep", Arg_parser::no },
798 { 'l', "list", Arg_parser::no },
799 { 'm', "merge", Arg_parser::no },
800 { 'M', "md5sum", Arg_parser::no },
801 { 'n', "threads", Arg_parser::yes },
802 { 'o', "output", Arg_parser::yes },
803 { 'q', "quiet", Arg_parser::no },
804 { 'R', "repair", Arg_parser::no },
805 { 's', "split", Arg_parser::no },
806 { 'S', "nrep-stats", Arg_parser::maybe },
807 { 't', "test", Arg_parser::no },
808 { 'U', "unzcrash", Arg_parser::no },
809 { 'v', "verbose", Arg_parser::no },
810 { 'V', "version", Arg_parser::no },
811 { 'W', "debug-decompress", Arg_parser::yes },
812 { 'X', "show-packets", Arg_parser::maybe },
813 { 'Y', "debug-delay", Arg_parser::yes },
814 { 'Z', "debug-repair", Arg_parser::yes },
815 { opt_du, "dump", Arg_parser::yes },
816 { opt_lt, "loose-trailing", Arg_parser::no },
817 { opt_lzl, "lzip-level", Arg_parser::yes },
818 { opt_lzn, "lzip-name", Arg_parser::yes },
819 { opt_ref, "reference-file", Arg_parser::yes },
820 { opt_re, "remove", Arg_parser::yes },
821 { opt_st, "strip", Arg_parser::yes },
822 { 0 , 0, Arg_parser::no } };
823
824 const Arg_parser parser( argc, argv, options );
825 if( parser.error().size() ) // bad option
826 { show_error( parser.error().c_str(), 0, true ); return 1; }
827
828 int argind = 0;
829 for( ; argind < parser.arguments(); ++argind )
830 {
831 const int code = parser.code( argind );
832 if( !code ) break; // no more options
833 const std::string & sarg = parser.argument( argind );
834 const char * const arg = sarg.c_str();
835 switch( code )
836 {
837 case 'a': ignore_trailing = false; break;
838 case 'A': set_mode( program_mode, m_alone_to_lz ); break;
839 case 'c': to_stdout = true; break;
840 case 'd': set_mode( program_mode, m_decompress ); break;
841 case 'D': set_mode( program_mode, m_range_dec );
842 parse_range( arg, range ); break;
843 case 'e': set_mode( program_mode, m_reproduce ); break;
844 case 'E': set_mode( program_mode, m_reproduce );
845 parse_range( arg, range, §or_size ); break;
846 case 'f': force = true; break;
847 case 'h': show_help(); return 0;
848 case 'i': ignore_errors = true; break;
849 case 'k': keep_input_files = true; break;
850 case 'l': set_mode( program_mode, m_list ); break;
851 case 'm': set_mode( program_mode, m_merge ); break;
852 case 'M': set_mode( program_mode, m_md5sum ); break;
853 case 'n': break;
854 case 'o': if( sarg == "-" ) to_stdout = true;
855 else { default_output_filename = sarg; } break;
856 case 'q': verbosity = -1; break;
857 case 'R': set_mode( program_mode, m_repair ); break;
858 case 's': set_mode( program_mode, m_split ); break;
859 case 'S': if( arg[0] ) repeated_byte = getnum( arg, 0, 0, 255 );
860 set_mode( program_mode, m_nrep_stats ); break;
861 case 't': set_mode( program_mode, m_test ); break;
862 case 'U': set_mode( program_mode, m_unzcrash ); break;
863 case 'v': if( verbosity < 4 ) ++verbosity; break;
864 case 'V': show_version(); return 0;
865 case 'W': set_mode( program_mode, m_debug_decompress );
866 parse_pos_value( arg, bad_byte ); break;
867 case 'X': set_mode( program_mode, m_show_packets );
868 if( arg[0] ) { parse_pos_value( arg, bad_byte ); } break;
869 case 'Y': set_mode( program_mode, m_debug_delay );
870 parse_range( arg, range ); break;
871 case 'Z': set_mode( program_mode, m_debug_repair );
872 parse_pos_value( arg, bad_byte ); break;
873 case opt_du: set_mode( program_mode, m_dump );
874 member_list.parse( arg ); break;
875 case opt_lt: loose_trailing = true; break;
876 case opt_lzl: lzip_level = parse_lzip_level( arg ); break;
877 case opt_lzn: lzip_name = arg; break;
878 case opt_ref: reference_filename = arg; break;
879 case opt_re: set_mode( program_mode, m_remove );
880 member_list.parse( arg ); break;
881 case opt_st: set_mode( program_mode, m_strip );
882 member_list.parse( arg ); break;
883 default : internal_error( "uncaught option." );
884 }
885 } // end process options
886
887 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
888 setmode( STDIN_FILENO, O_BINARY );
889 setmode( STDOUT_FILENO, O_BINARY );
890 #endif
891
892 if( program_mode == m_none )
893 {
894 show_error( "You must specify the operation to be performed.", 0, true );
895 return 1;
896 }
897
898 bool filenames_given = false;
899 for( ; argind < parser.arguments(); ++argind )
900 {
901 filenames.push_back( parser.argument( argind ) );
902 if( filenames.back() != "-" ) filenames_given = true;
903 }
904
905 const char terminator = isatty( STDOUT_FILENO ) ? '\r' : '\n';
906 try {
907 switch( program_mode )
908 {
909 case m_none: internal_error( "invalid operation." ); break;
910 case m_alone_to_lz: break;
911 case m_debug_decompress:
912 one_file( filenames.size() );
913 return debug_decompress( filenames[0], bad_byte, false );
914 case m_debug_delay:
915 one_file( filenames.size() );
916 return debug_delay( filenames[0], range, terminator );
917 case m_debug_repair:
918 one_file( filenames.size() );
919 return debug_repair( filenames[0], bad_byte, terminator );
920 case m_decompress: break;
921 case m_dump:
922 case m_strip:
923 if( filenames.size() < 1 )
924 { show_error( "You must specify at least 1 file.", 0, true ); return 1; }
925 return dump_members( filenames, default_output_filename, member_list,
926 force, ignore_errors, ignore_trailing,
927 loose_trailing, program_mode == m_strip, to_stdout );
928 case m_list: break;
929 case m_md5sum: break;
930 case m_merge:
931 if( filenames.size() < 2 )
932 { show_error( "You must specify at least 2 files.", 0, true ); return 1; }
933 return merge_files( filenames, default_output_filename, terminator, force );
934 case m_nrep_stats: return print_nrep_stats( filenames, repeated_byte,
935 ignore_errors, ignore_trailing, loose_trailing );
936 case m_range_dec:
937 one_file( filenames.size() );
938 return range_decompress( filenames[0], default_output_filename, range,
939 force, ignore_errors, ignore_trailing,
940 loose_trailing, to_stdout );
941 case m_remove:
942 if( filenames.size() < 1 )
943 { show_error( "You must specify at least 1 file.", 0, true ); return 1; }
944 return remove_members( filenames, member_list, ignore_errors,
945 ignore_trailing, loose_trailing );
946 case m_repair:
947 one_file( filenames.size() );
948 return repair_file( filenames[0], default_output_filename, terminator, force );
949 case m_reproduce:
950 one_file( filenames.size() );
951 if( !reference_filename || !reference_filename[0] )
952 { show_error( "You must specify a reference file.", 0, true ); return 1; }
953 if( range.size() > 0 )
954 return debug_reproduce_file( filenames[0], lzip_name,
955 reference_filename, range, sector_size, lzip_level );
956 else
957 return reproduce_file( filenames[0], default_output_filename,
958 lzip_name, reference_filename, lzip_level, terminator, force );
959 case m_show_packets:
960 one_file( filenames.size() );
961 return debug_decompress( filenames[0], bad_byte, true );
962 case m_split:
963 one_file( filenames.size() );
964 return split_file( filenames[0], default_output_filename, force );
965 case m_test: break;
966 case m_unzcrash:
967 one_file( filenames.size() );
968 return lunzcrash( filenames[0] );
969 }
970 }
971 catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }
972 catch( Error & e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
973
974 if( filenames.empty() ) filenames.push_back("-");
975
976 if( program_mode == m_list )
977 return list_files( filenames, ignore_errors, ignore_trailing, loose_trailing );
978 if( program_mode == m_md5sum )
979 return md5sum_files( filenames );
980
981 if( program_mode != m_alone_to_lz && program_mode != m_decompress &&
982 program_mode != m_test )
983 internal_error( "invalid decompressor operation." );
984
985 if( program_mode == m_test ) to_stdout = false; // apply overrides
986 if( program_mode == m_test || to_stdout ) default_output_filename.clear();
987
988 if( to_stdout && program_mode != m_test ) // check tty only once
989 { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
990 else outfd = -1;
991
992 const bool to_file = !to_stdout && program_mode != m_test &&
993 default_output_filename.size();
994 if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
995 set_signals( signal_handler );
996
997 Pretty_print pp( filenames );
998
999 int failed_tests = 0;
1000 int retval = 0;
1001 const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
1002 bool stdin_used = false;
1003 for( unsigned i = 0; i < filenames.size(); ++i )
1004 {
1005 std::string input_filename;
1006 int infd;
1007 struct stat in_stats;
1008
1009 pp.set_name( filenames[i] );
1010 if( filenames[i] == "-" )
1011 {
1012 if( stdin_used ) continue; else stdin_used = true;
1013 infd = STDIN_FILENO;
1014 if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
1015 if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
1016 }
1017 else
1018 {
1019 input_filename = filenames[i];
1020 infd = open_instream( input_filename.c_str(), &in_stats, one_to_one );
1021 if( infd < 0 ) { set_retval( retval, 1 ); continue; }
1022 if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
1023 if( one_to_one ) // open outfd after verifying infd
1024 {
1025 if( program_mode == m_alone_to_lz ) set_a_outname( input_filename );
1026 else set_d_outname( input_filename, extension_index( input_filename ) );
1027 if( !open_outstream( force, true ) )
1028 { close( infd ); set_retval( retval, 1 ); continue; }
1029 }
1030 }
1031
1032 if( one_to_one && !check_tty_out( program_mode ) )
1033 { set_retval( retval, 1 ); return retval; } // don't delete a tty
1034
1035 if( to_file && outfd < 0 ) // open outfd after verifying infd
1036 {
1037 output_filename = default_output_filename;
1038 if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
1039 return 1; // check tty only once and don't try to delete a tty
1040 }
1041
1042 const struct stat * const in_statsp =
1043 ( input_filename.size() && one_to_one ) ? &in_stats : 0;
1044 const unsigned long long cfile_size =
1045 ( input_filename.size() && S_ISREG( in_stats.st_mode ) ) ?
1046 ( in_stats.st_size + 99 ) / 100 : 0;
1047 int tmp;
1048 try {
1049 if( program_mode == m_alone_to_lz )
1050 tmp = alone_to_lz( infd, pp );
1051 else
1052 tmp = decompress( cfile_size, infd, pp, ignore_errors, ignore_trailing,
1053 loose_trailing, program_mode == m_test );
1054 }
1055 catch( std::bad_alloc & ) { pp( mem_msg ); tmp = 1; }
1056 catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
1057 if( close( infd ) != 0 )
1058 { show_file_error( pp.name(), "Error closing input file", errno );
1059 set_retval( tmp, 1 ); }
1060 set_retval( retval, tmp );
1061 if( tmp )
1062 { if( program_mode != m_test ) cleanup_and_fail( retval );
1063 else ++failed_tests; }
1064
1065 if( delete_output_on_interrupt && one_to_one )
1066 close_and_set_permissions( in_statsp );
1067 if( input_filename.size() && !keep_input_files && one_to_one &&
1068 ( program_mode != m_decompress || !ignore_errors ) )
1069 std::remove( input_filename.c_str() );
1070 }
1071 if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o
1072 else if( outfd >= 0 && close( outfd ) != 0 ) // -c
1073 {
1074 show_error( "Error closing stdout", errno );
1075 set_retval( retval, 1 );
1076 }
1077 if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
1078 std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
1079 program_name, failed_tests,
1080 ( failed_tests == 1 ) ? "file" : "files" );
1081 return retval;
1082 }
1083