1 /* Lziprecover - Data recovery tool for the lzip format
2    Copyright (C) 2009-2021 Antonio Diaz Diaz.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 /*
18    Exit status: 0 for a normal exit, 1 for environmental problems
19    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
20    corrupt or invalid input file, 3 for an internal consistency error
21    (eg, bug) which caused lziprecover to panic.
22 */
23 
24 #define _FILE_OFFSET_BITS 64
25 
26 #include <algorithm>
27 #include <cctype>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <new>
35 #include <string>
36 #include <vector>
37 #include <fcntl.h>
38 #include <stdint.h>
39 #include <unistd.h>
40 #include <utime.h>
41 #include <sys/stat.h>
42 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
43 #include <io.h>
44 #if defined(__MSVCRT__)
45 #define fchmod(x,y) 0
46 #define fchown(x,y,z) 0
47 #define SIGHUP SIGTERM
48 #define S_ISSOCK(x) 0
49 #ifndef S_IRGRP
50 #define S_IRGRP 0
51 #define S_IWGRP 0
52 #define S_IROTH 0
53 #define S_IWOTH 0
54 #endif
55 #endif
56 #if defined(__DJGPP__)
57 #define S_ISSOCK(x) 0
58 #define S_ISVTX 0
59 #endif
60 #endif
61 
62 #include "arg_parser.h"
63 #include "lzip.h"
64 #include "decoder.h"
65 
66 #ifndef O_BINARY
67 #define O_BINARY 0
68 #endif
69 
70 #if CHAR_BIT != 8
71 #error "Environments where CHAR_BIT != 8 are not supported."
72 #endif
73 
74 int verbosity = 0;
75 
76 const char * const program_name = "lziprecover";
77 std::string output_filename;	// global vars for output file
78 int outfd = -1;			// see 'delete_output_on_interrupt' below
79 
80 namespace {
81 
82 const char * invocation_name = program_name;		// default value
83 
84 const struct { const char * from; const char * to; } known_extensions[] = {
85   { ".lz",  ""     },
86   { ".tlz", ".tar" },
87   { 0,      0      } };
88 
89 enum Mode { m_none, m_alone_to_lz, m_debug_decompress, m_debug_delay,
90             m_debug_repair, m_decompress, m_dump, m_list, m_md5sum, m_merge,
91             m_nrep_stats, m_range_dec, m_remove, m_repair, m_reproduce,
92             m_show_packets, m_split, m_strip, m_test, m_unzcrash };
93 
94 /* Variable used in signal handler context.
95    It is not declared volatile because the handler never returns. */
96 bool delete_output_on_interrupt = false;
97 
98 
show_help()99 void show_help()
100   {
101   std::printf( "Lziprecover is a data recovery tool and decompressor for files in the lzip\n"
102                "compressed data format (.lz). Lziprecover is able to repair slightly damaged\n"
103                "files, produce a correct file by merging the good parts of two or more\n"
104                "damaged copies, reproduce a missing (zeroed) sector using a reference file,\n"
105                "extract data from damaged files, decompress files, and test integrity of\n"
106                "files.\n"
107                "\nLziprecover can repair perfectly most files with small errors (up to one\n"
108                "single-byte error per member), without the need of any extra redundance\n"
109                "at all. Losing an entire archive just because of a corrupt byte near the\n"
110                "beginning is a thing of the past.\n"
111                "\nLziprecover can remove the damaged members from multimember files, for\n"
112                "example multimember tar.lz archives.\n"
113                "\nLziprecover provides random access to the data in multimember files; it only\n"
114                "decompresses the members containing the desired data.\n"
115                "\nLziprecover facilitates the management of metadata stored as trailing data\n"
116                "in lzip files.\n"
117                "\nLziprecover is not a replacement for regular backups, but a last line of\n"
118                "defense for the case where the backups are also damaged.\n"
119                "\nUsage: %s [options] [files]\n", invocation_name );
120   std::printf( "\nOptions:\n"
121                "  -h, --help                    display this help and exit\n"
122                "  -V, --version                 output version information and exit\n"
123                "  -a, --trailing-error          exit with error status if trailing data\n"
124                "  -A, --alone-to-lz             convert lzma-alone files to lzip format\n"
125                "  -c, --stdout                  write to standard output, keep input files\n"
126                "  -d, --decompress              decompress\n"
127                "  -D, --range-decompress=<n-m>  decompress a range of bytes to stdout\n"
128                "  -e, --reproduce               try to reproduce a zeroed sector in file\n"
129                "      --lzip-level=N|a|m[N]     reproduce one level, all, or match length\n"
130                "      --lzip-name=<name>        name of lzip executable for --reproduce\n"
131                "      --reference-file=<file>   reference file for --reproduce\n"
132                "  -f, --force                   overwrite existing output files\n"
133                "  -i, --ignore-errors           ignore some errors in -d, -D, -l, -t, --dump\n"
134                "  -k, --keep                    keep (don't delete) input files\n"
135                "  -l, --list                    print (un)compressed file sizes\n"
136                "  -m, --merge                   correct errors in file using several copies\n"
137                "  -o, --output=<file>           place the output into <file>\n"
138                "  -q, --quiet                   suppress all messages\n"
139                "  -R, --repair                  try to repair a small error in file\n"
140                "  -s, --split                   split multimember file in single-member files\n"
141                "  -t, --test                    test compressed file integrity\n"
142                "  -v, --verbose                 be verbose (a 2nd -v gives more)\n"
143                "      --loose-trailing          allow trailing data seeming corrupt header\n"
144                "      --dump=<list>:d:t         dump members listed/damaged, tdata to stdout\n"
145                "      --remove=<list>:d:t       remove members, tdata from files in place\n"
146                "      --strip=<list>:d:t        copy files to stdout stripping members given\n" );
147   if( verbosity >= 1 )
148     {
149     std::printf( "\nDebug options for experts:\n"
150                  "  -E, --debug-reproduce=<range>[,ss]  set range to 0 and try to reproduce file\n"
151                  "  -M, --md5sum                      print the MD5 digests of the input files\n"
152                  "  -S, --nrep-stats[=<val>]          print stats of N-byte repeated sequences\n"
153                  "  -U, --unzcrash                    test 1-bit errors in the input file\n"
154                  "  -W, --debug-decompress=<pos>,<val>  set pos to val and decompress to stdout\n"
155                  "  -X, --show-packets[=<pos>,<val>]  show in stdout the decoded LZMA packets\n"
156                  "  -Y, --debug-delay=<range>         find max error detection delay in <range>\n"
157                  "  -Z, --debug-repair=<pos>,<val>    test repair one-byte error at <pos>\n" );
158     }
159   std::printf( "\nIf no file names are given, or if a file is '-', lziprecover decompresses\n"
160                "from standard input to standard output.\n"
161                "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
162                "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
163                "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
164                "'tar -xf foo.tar.lz' or 'lziprecover -cd foo.tar.lz | tar -xf -'.\n"
165                "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
166                "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
167                "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
168                "caused lziprecover to panic.\n"
169                "\nReport bugs to lzip-bug@nongnu.org\n"
170                "Lziprecover home page: http://www.nongnu.org/lzip/lziprecover.html\n" );
171   }
172 
173 } // end namespace
174 
operator ()(const char * const msg,FILE * const f) const175 void Pretty_print::operator()( const char * const msg, FILE * const f ) const
176   {
177   if( verbosity >= 0 )
178     {
179     if( first_post )
180       {
181       first_post = false;
182       std::fputs( padded_name.c_str(), f );
183       if( !msg ) std::fflush( f );
184       }
185     if( msg ) std::fprintf( f, "%s\n", msg );
186     }
187   }
188 
189 
bad_version(const unsigned version)190 const char * bad_version( const unsigned version )
191   {
192   static char buf[80];
193   snprintf( buf, sizeof buf, "Version %u member format not supported.",
194             version );
195   return buf;
196   }
197 
198 
format_ds(const unsigned dictionary_size)199 const char * format_ds( const unsigned dictionary_size )
200   {
201   enum { bufsize = 16, factor = 1024 };
202   static char buf[bufsize];
203   const char * const prefix[8] =
204     { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
205   const char * p = "";
206   const char * np = "  ";
207   unsigned num = dictionary_size;
208   bool exact = ( num % factor == 0 );
209 
210   for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
211     { num /= factor; if( num % factor != 0 ) exact = false;
212       p = prefix[i]; np = ""; }
213   snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
214   return buf;
215   }
216 
217 
show_header(const unsigned dictionary_size)218 void show_header( const unsigned dictionary_size )
219   {
220   std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
221   }
222 
223 
224 #include "main_common.cc"
225 
226 
227 // Colon-separated list of "damaged", "tdata", [r][^]<list> (1 1,3-5,8)
parse(const char * p)228 void Member_list::parse( const char * p )
229   {
230   while( true )
231     {
232     const char * tp = p;	// points to terminator; ':' or null
233     while( *tp && *tp != ':' ) ++tp;
234     const unsigned len = tp - p;
235     if( std::isalpha( *(const unsigned char *)p ) )
236       {
237       if( len <= 7 && std::strncmp( "damaged", p, len ) == 0 )
238         { damaged = true; goto next; }
239       if( len <= 5 && std::strncmp( "tdata", p, len ) == 0 )
240         { tdata = true; goto next; }
241       }
242     {
243     const bool reverse = ( *p == 'r' );
244     if( reverse ) ++p;
245     if( *p == '^' ) { ++p; if( reverse ) rin = false; else in = false; }
246     std::vector< Block > * rvp = reverse ? &rrange_vector : &range_vector;
247     while( std::isdigit( *(const unsigned char *)p ) )
248       {
249       const char * tail;
250       const int pos = getnum( p, 0, 1, INT_MAX, &tail ) - 1;
251       if( rvp->size() && pos < rvp->back().end() ) break;
252       const int size = (*tail == '-') ?
253         getnum( tail + 1, 0, pos + 1, INT_MAX, &tail ) - pos : 1;
254       rvp->push_back( Block( pos, size ) );
255       if( tail == tp ) goto next;
256       if( *tail == ',' ) p = tail + 1; else break;
257       }
258     }
259     show_error( "Invalid list of members." );
260     std::exit( 1 );
261 next:
262     if( *(p = tp) != 0 ) ++p; else return;
263     }
264   }
265 
266 
267 namespace {
268 
269 // Recognized formats: <digit> 'a' m[<match_length>]
270 //
parse_lzip_level(const char * const p)271 int parse_lzip_level( const char * const p )
272   {
273   if( *p == 'a' || std::isdigit( *(const unsigned char *)p ) ) return *p;
274   if( *p != 'm' )
275     {
276     show_error( "Bad argument in option '--lzip-level'.", 0, true );
277     std::exit( 1 );
278     }
279   if( p[1] == 0 ) return -1;
280   return -getnum( p + 1, 0, min_match_len_limit, max_match_len );
281   }
282 
283 
284 /* Recognized format: <range>[,<sector_size>]
285    range formats: <begin> <begin>-<end> <begin>,<size> ,<size>
286 */
parse_range(const char * const ptr,Block & range,int * const sector_sizep=0)287 void parse_range( const char * const ptr, Block & range,
288                   int * const sector_sizep = 0 )
289   {
290   const char * tail = ptr;
291   long long value =
292     ( ptr[0] == ',' ) ? 0 : getnum( ptr, 0, 0, INT64_MAX - 1, &tail );
293   if( tail[0] == 0 || tail[0] == ',' || tail[0] == '-' )
294     {
295     range.pos( value );
296     if( tail[0] == 0 ) { range.size( INT64_MAX - value ); return; }
297     const bool is_size = ( tail[0] == ',' );
298     if( sector_sizep && tail[1] == ',' ) { value = INT64_MAX - value; ++tail; }
299     else value = getnum( tail + 1, 0, 1, INT64_MAX, &tail );	// size
300     if( is_size || value > range.pos() )
301       {
302       if( !is_size ) value -= range.pos();
303       if( INT64_MAX - range.pos() >= value )
304         {
305         range.size( value );
306         if( sector_sizep && tail[0] == ',' )
307           *sector_sizep = getnum( tail + 1, 0, 8, INT_MAX );
308         return;
309         }
310       }
311     }
312   show_error( "Bad decompression range.", 0, true );
313   std::exit( 1 );
314   }
315 
316 
317 // Recognized formats: <pos>,<value> <pos>,+<value> <pos>,f<value>
318 //
parse_pos_value(const char * const ptr,Bad_byte & bad_byte)319 void parse_pos_value( const char * const ptr, Bad_byte & bad_byte )
320   {
321   const char * tail;
322   bad_byte.pos = getnum( ptr, 0, 0, INT64_MAX, &tail );
323   if( tail[0] != ',' )
324     {
325     show_error( "Bad separator between <pos> and <val>.", 0, true );
326     std::exit( 1 );
327     }
328   if( tail[1] == '+' ) { ++tail; bad_byte.mode = Bad_byte::delta; }
329   else if( tail[1] == 'f' ) { ++tail; bad_byte.mode = Bad_byte::flip; }
330   else bad_byte.mode = Bad_byte::literal;
331   bad_byte.value = getnum( tail + 1, 0, 0, 255 );
332   }
333 
334 
one_file(const int files)335 void one_file( const int files )
336   {
337   if( files != 1 )
338     {
339     show_error( "You must specify exactly 1 file.", 0, true );
340     std::exit( 1 );
341     }
342   }
343 
344 
set_mode(Mode & program_mode,const Mode new_mode)345 void set_mode( Mode & program_mode, const Mode new_mode )
346   {
347   if( program_mode != m_none && program_mode != new_mode )
348     {
349     show_error( "Only one operation can be specified.", 0, true );
350     std::exit( 1 );
351     }
352   program_mode = new_mode;
353   }
354 
355 
extension_index(const std::string & name)356 int extension_index( const std::string & name )
357   {
358   for( int eindex = 0; known_extensions[eindex].from; ++eindex )
359     {
360     const std::string ext( known_extensions[eindex].from );
361     if( name.size() > ext.size() &&
362         name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
363       return eindex;
364     }
365   return -1;
366   }
367 
368 
set_a_outname(const std::string & name)369 void set_a_outname( const std::string & name )
370   {
371   output_filename = name;
372   if( name.size() > 5 && name.compare( name.size() - 5, 5, ".lzma" ) == 0 )
373     output_filename.erase( name.size() - 2 );
374   else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
375     output_filename.insert( name.size() - 2, "ar." );
376   else if( name.size() <= 3 || name.compare( name.size() - 3, 3, ".lz" ) != 0 )
377     output_filename += known_extensions[0].from;
378   }
379 
380 
set_d_outname(const std::string & name,const int eindex)381 void set_d_outname( const std::string & name, const int eindex )
382   {
383   if( eindex >= 0 )
384     {
385     const std::string from( known_extensions[eindex].from );
386     if( name.size() > from.size() )
387       {
388       output_filename.assign( name, 0, name.size() - from.size() );
389       output_filename += known_extensions[eindex].to;
390       return;
391       }
392     }
393   output_filename = name; output_filename += ".out";
394   if( verbosity >= 1 )
395     std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
396                   program_name, name.c_str(), output_filename.c_str() );
397   }
398 
399 } // end namespace
400 
open_instream(const char * const name,struct stat * const in_statsp,const bool one_to_one,const bool reg_only)401 int open_instream( const char * const name, struct stat * const in_statsp,
402                    const bool one_to_one, const bool reg_only )
403   {
404   int infd = open( name, O_RDONLY | O_BINARY );
405   if( infd < 0 )
406     show_file_error( name, "Can't open input file", errno );
407   else
408     {
409     const int i = fstat( infd, in_statsp );
410     const mode_t mode = in_statsp->st_mode;
411     const bool can_read = ( i == 0 && !reg_only &&
412                             ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
413                               S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
414     if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
415       {
416       if( verbosity >= 0 )
417         std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
418                       program_name, name, ( can_read && one_to_one ) ?
419                       ",\n             and neither '-c' nor '-o' were specified" : "" );
420       close( infd );
421       infd = -1;
422       }
423     }
424   return infd;
425   }
426 
427 
open_truncable_stream(const char * const name,struct stat * const in_statsp)428 int open_truncable_stream( const char * const name,
429                            struct stat * const in_statsp )
430   {
431   int fd = open( name, O_RDWR | O_BINARY );
432   if( fd < 0 )
433     show_file_error( name, "Can't open input file", errno );
434   else
435     {
436     const int i = fstat( fd, in_statsp );
437     const mode_t mode = in_statsp->st_mode;
438     if( i != 0 || !S_ISREG( mode ) )
439       { show_file_error( name, "Not a regular file." ); close( fd ); fd = -1; }
440     }
441   return fd;
442   }
443 
444 
open_outstream(const bool force,const bool protect,const bool rw,const bool skipping)445 bool open_outstream( const bool force, const bool protect,
446                      const bool rw, const bool skipping )
447   {
448   const mode_t usr_rw = S_IRUSR | S_IWUSR;
449   const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
450   const mode_t outfd_mode = protect ? usr_rw : all_rw;
451   int flags = O_CREAT | ( rw ? O_RDWR : O_WRONLY ) | O_BINARY;
452   if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
453 
454   outfd = open( output_filename.c_str(), flags, outfd_mode );
455   if( outfd >= 0 ) delete_output_on_interrupt = true;
456   else if( verbosity >= 0 )
457     {
458     if( errno == EEXIST )
459       std::fprintf( stderr, "%s: Output file '%s' already exists%s.\n",
460                     program_name, output_filename.c_str(), skipping ?
461                     ", skipping" : ". Use '--force' to overwrite it" );
462     else
463       std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
464                     program_name, output_filename.c_str(), std::strerror( errno ) );
465     }
466   return ( outfd >= 0 );
467   }
468 
469 
file_exists(const std::string & filename)470 bool file_exists( const std::string & filename )
471   {
472   struct stat st;
473   if( stat( filename.c_str(), &st ) == 0 )
474     {
475     if( verbosity >= 0 )
476       std::fprintf( stderr, "%s: Output file '%s' already exists."
477                             " Use '--force' to overwrite it.\n",
478                     program_name, filename.c_str() );
479     return true;
480     }
481   return false;
482   }
483 
484 
set_signals(void (* action)(int))485 void set_signals( void (*action)(int) )
486   {
487   std::signal( SIGHUP, action );
488   std::signal( SIGINT, action );
489   std::signal( SIGTERM, action );
490   }
491 
492 
cleanup_and_fail(const int retval)493 void cleanup_and_fail( const int retval )
494   {
495   set_signals( SIG_IGN );			// ignore signals
496   if( delete_output_on_interrupt )
497     {
498     delete_output_on_interrupt = false;
499     if( verbosity >= 0 )
500       std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
501                     program_name, output_filename.c_str() );
502     if( outfd >= 0 ) { close( outfd ); outfd = -1; }
503     if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
504       show_error( "WARNING: deletion of output file (apparently) failed." );
505     }
506   std::exit( retval );
507   }
508 
509 namespace {
510 
signal_handler(int)511 extern "C" void signal_handler( int )
512   {
513   show_error( "Control-C or similar caught, quitting." );
514   cleanup_and_fail( 1 );
515   }
516 
517 
check_tty_in(const char * const input_filename,const int infd,const Mode program_mode,int & retval)518 bool check_tty_in( const char * const input_filename, const int infd,
519                    const Mode program_mode, int & retval )
520   {
521   if( isatty( infd ) )			// all modes read compressed data
522     { show_file_error( input_filename,
523                        "I won't read compressed data from a terminal." );
524       close( infd ); set_retval( retval, 1 );
525       if( program_mode != m_test ) cleanup_and_fail( retval );
526       return false; }
527   return true;
528   }
529 
check_tty_out(const Mode program_mode)530 bool check_tty_out( const Mode program_mode )
531   {
532   if( program_mode == m_alone_to_lz && isatty( outfd ) )
533     { show_file_error( output_filename.size() ?
534                        output_filename.c_str() : "(stdout)",
535                        "I won't write compressed data to a terminal." );
536       return false; }
537   return true;
538   }
539 
540 
541 // Set permissions, owner, and times.
close_and_set_permissions(const struct stat * const in_statsp)542 void close_and_set_permissions( const struct stat * const in_statsp )
543   {
544   bool warning = false;
545   if( in_statsp )
546     {
547     const mode_t mode = in_statsp->st_mode;
548     // fchown will in many cases return with EPERM, which can be safely ignored.
549     if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
550       { if( fchmod( outfd, mode ) != 0 ) warning = true; }
551     else
552       if( errno != EPERM ||
553           fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
554         warning = true;
555     }
556   if( close( outfd ) != 0 )
557     {
558     show_error( "Error closing output file", errno );
559     cleanup_and_fail( 1 );
560     }
561   outfd = -1;
562   delete_output_on_interrupt = false;
563   if( in_statsp )
564     {
565     struct utimbuf t;
566     t.actime = in_statsp->st_atime;
567     t.modtime = in_statsp->st_mtime;
568     if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
569     }
570   if( warning && verbosity >= 1 )
571     show_error( "Can't change output file attributes." );
572   }
573 
574 
xdigit(const unsigned value)575 unsigned char xdigit( const unsigned value )
576   {
577   if( value <= 9 ) return '0' + value;
578   if( value <= 15 ) return 'A' + value - 10;
579   return 0;
580   }
581 
582 
show_trailing_data(const uint8_t * const data,const int size,const Pretty_print & pp,const bool all,const int ignore_trailing)583 bool show_trailing_data( const uint8_t * const data, const int size,
584                          const Pretty_print & pp, const bool all,
585                          const int ignore_trailing )	// -1 = show
586   {
587   if( verbosity >= 4 || ignore_trailing <= 0 )
588     {
589     std::string msg;
590     if( !all ) msg = "first bytes of ";
591     msg += "trailing data = ";
592     for( int i = 0; i < size; ++i )
593       {
594       msg += xdigit( data[i] >> 4 );
595       msg += xdigit( data[i] & 0x0F );
596       msg += ' ';
597       }
598     msg += '\'';
599     for( int i = 0; i < size; ++i )
600       { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
601     msg += '\'';
602     pp( msg.c_str() );
603     if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg );
604     }
605   return ( ignore_trailing > 0 );
606   }
607 
608 
decompress(const unsigned long long cfile_size,const int infd,const Pretty_print & pp,const bool ignore_errors,const bool ignore_trailing,const bool loose_trailing,const bool testing)609 int decompress( const unsigned long long cfile_size, const int infd,
610                 const Pretty_print & pp, const bool ignore_errors,
611                 const bool ignore_trailing, const bool loose_trailing,
612                 const bool testing )
613   {
614   int retval = 0;
615   unsigned long long partial_file_pos = 0;
616   Range_decoder rdec( infd );
617   for( bool first_member = true; ; first_member = false )
618     {
619     Lzip_header header;
620     rdec.reset_member_position();
621     const int size = rdec.read_header_carefully( header, ignore_errors );
622     if( rdec.finished() ||			// End Of File
623         ( size < Lzip_header::size && !rdec.find_header( header ) ) )
624       {
625       if( first_member )
626         { show_file_error( pp.name(), "File ends unexpectedly at member header." );
627           retval = 2; }
628       else if( header.verify_prefix( size ) )
629         { pp( "Truncated header in multimember file." );
630           show_trailing_data( header.data, size, pp, true, -1 );
631           retval = 2; }
632       else if( size > 0 && !show_trailing_data( header.data, size, pp,
633                                                 true, ignore_trailing ) )
634         retval = 2;
635       break;
636       }
637     if( !header.verify_magic() )
638       {
639       if( first_member )
640         { show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
641       else if( !loose_trailing && header.verify_corrupt() )
642         { pp( corrupt_mm_msg );
643           show_trailing_data( header.data, size, pp, false, -1 );
644           retval = 2; }
645       else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
646         retval = 2;
647       if( ignore_errors ) { pp.reset(); continue; } else break;
648       }
649     if( !header.verify_version() )
650       { pp( bad_version( header.version() ) ); retval = 2;
651         if( ignore_errors ) { pp.reset(); continue; } else break; }
652     const unsigned dictionary_size = header.dictionary_size();
653     if( !isvalid_ds( dictionary_size ) )
654       { pp( bad_dict_msg ); retval = 2;
655         if( ignore_errors ) { pp.reset(); continue; } else break; }
656 
657     if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
658 
659     LZ_decoder decoder( rdec, dictionary_size, outfd );
660     show_dprogress( cfile_size, partial_file_pos, &rdec, &pp );	// init
661     const int result = decoder.decode_member( pp );
662     partial_file_pos += rdec.member_position();
663     if( result != 0 )
664       {
665       if( verbosity >= 0 && result <= 2 )
666         {
667         pp();
668         std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
669                       "File ends unexpectedly" : "Decoder error",
670                       partial_file_pos );
671         }
672       retval = 2; if( ignore_errors ) { pp.reset(); continue; } else break;
673       }
674     if( verbosity >= 2 )
675       { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
676     }
677   if( verbosity == 1 && retval == 0 )
678     std::fputs( testing ? "ok\n" : "done\n", stderr );
679   if( retval == 2 && ignore_errors ) retval = 0;
680   return retval;
681   }
682 
683 } // end namespace
684 
set_signal_handler()685 void set_signal_handler() { set_signals( signal_handler ); }
686 
close_outstream(const struct stat * const in_statsp)687 int close_outstream( const struct stat * const in_statsp )
688   {
689   if( delete_output_on_interrupt )
690     close_and_set_permissions( in_statsp );
691   if( outfd >= 0 && close( outfd ) != 0 )
692     { show_error( "Error closing stdout", errno ); return 1; }
693   outfd = -1;
694   return 0;
695   }
696 
697 
insert_fixed(std::string name)698 std::string insert_fixed( std::string name )
699   {
700   if( name.size() > 7 && name.compare( name.size() - 7, 7, ".tar.lz" ) == 0 )
701     name.insert( name.size() - 7, "_fixed" );
702   else if( name.size() > 3 && name.compare( name.size() - 3, 3, ".lz" ) == 0 )
703     name.insert( name.size() - 3, "_fixed" );
704   else if( name.size() > 4 && name.compare( name.size() - 4, 4, ".tlz" ) == 0 )
705     name.insert( name.size() - 4, "_fixed" );
706   else name += "_fixed.lz";
707   return name;
708   }
709 
710 
show_file_error(const char * const filename,const char * const msg,const int errcode)711 void show_file_error( const char * const filename, const char * const msg,
712                       const int errcode )
713   {
714   if( verbosity >= 0 )
715     std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
716                   ( errcode > 0 ) ? ": " : "",
717                   ( errcode > 0 ) ? std::strerror( errcode ) : "" );
718   }
719 
720 
show_2file_error(const char * const msg1,const char * const name1,const char * const name2,const char * const msg2)721 void show_2file_error( const char * const msg1, const char * const name1,
722                        const char * const name2, const char * const msg2 )
723   {
724   if( verbosity >= 0 )
725     std::fprintf( stderr, "%s: %s '%s' and '%s' %s\n",
726                   program_name, msg1, name1, name2, msg2 );
727   }
728 
729 
show_dprogress(const unsigned long long cfile_size,const unsigned long long partial_size,const Range_decoder * const d,const Pretty_print * const p)730 void show_dprogress( const unsigned long long cfile_size,
731                      const unsigned long long partial_size,
732                      const Range_decoder * const d,
733                      const Pretty_print * const p )
734   {
735   static unsigned long long csize = 0;		// file_size / 100
736   static unsigned long long psize = 0;
737   static const Range_decoder * rdec = 0;
738   static const Pretty_print * pp = 0;
739   static int counter = 0;
740   static bool enabled = true;
741 
742   if( !enabled ) return;
743   if( p )					// initialize static vars
744     {
745     if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
746     csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0;
747     }
748   if( rdec && pp && --counter <= 0 )
749     {
750     const unsigned long long pos = psize + rdec->member_position();
751     counter = 7;		// update display every 114688 bytes
752     if( csize > 0 )
753       std::fprintf( stderr, "%4llu%%  %.1f MB\r", pos / csize, pos / 1000000.0 );
754     else
755       std::fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
756     pp->reset(); (*pp)();			// restore cursor position
757     }
758   }
759 
760 
main(const int argc,const char * const argv[])761 int main( const int argc, const char * const argv[] )
762   {
763   Block range( 0, 0 );
764   int sector_size = INT_MAX;		// default larger than practical range
765   Bad_byte bad_byte;
766   Member_list member_list;
767   std::string default_output_filename;
768   std::vector< std::string > filenames;
769   const char * lzip_name = "lzip";		// default is lzip
770   const char * reference_filename = 0;
771   Mode program_mode = m_none;
772   int lzip_level = 0;		//  0 = test all levels and match lengths
773 				// '0'..'9' = level, 'a' = all levels
774 				// -5..-273 = match length, -1 = all lengths
775   int repeated_byte = -1;	// 0 to 255, or -1 for all values
776   bool force = false;
777   bool ignore_errors = false;
778   bool ignore_trailing = true;
779   bool keep_input_files = false;
780   bool loose_trailing = false;
781   bool to_stdout = false;
782   if( argc > 0 ) invocation_name = argv[0];
783 
784   enum { opt_du = 256, opt_lt, opt_lzl, opt_lzn, opt_ref, opt_re, opt_st };
785   const Arg_parser::Option options[] =
786     {
787     { 'a', "trailing-error",     Arg_parser::no  },
788     { 'A', "alone-to-lz",        Arg_parser::no  },
789     { 'c', "stdout",             Arg_parser::no  },
790     { 'd', "decompress",         Arg_parser::no  },
791     { 'D', "range-decompress",   Arg_parser::yes },
792     { 'e', "reproduce",          Arg_parser::no  },
793     { 'E', "debug-reproduce",    Arg_parser::yes },
794     { 'f', "force",              Arg_parser::no  },
795     { 'h', "help",               Arg_parser::no  },
796     { 'i', "ignore-errors",      Arg_parser::no  },
797     { 'k', "keep",               Arg_parser::no  },
798     { 'l', "list",               Arg_parser::no  },
799     { 'm', "merge",              Arg_parser::no  },
800     { 'M', "md5sum",             Arg_parser::no  },
801     { 'n', "threads",            Arg_parser::yes },
802     { 'o', "output",             Arg_parser::yes },
803     { 'q', "quiet",              Arg_parser::no  },
804     { 'R', "repair",             Arg_parser::no  },
805     { 's', "split",              Arg_parser::no  },
806     { 'S', "nrep-stats",         Arg_parser::maybe },
807     { 't', "test",               Arg_parser::no  },
808     { 'U', "unzcrash",           Arg_parser::no  },
809     { 'v', "verbose",            Arg_parser::no  },
810     { 'V', "version",            Arg_parser::no  },
811     { 'W', "debug-decompress",   Arg_parser::yes },
812     { 'X', "show-packets",       Arg_parser::maybe },
813     { 'Y', "debug-delay",        Arg_parser::yes },
814     { 'Z', "debug-repair",       Arg_parser::yes },
815     { opt_du,  "dump",           Arg_parser::yes },
816     { opt_lt,  "loose-trailing", Arg_parser::no  },
817     { opt_lzl, "lzip-level",     Arg_parser::yes },
818     { opt_lzn, "lzip-name",      Arg_parser::yes },
819     { opt_ref, "reference-file", Arg_parser::yes },
820     { opt_re,  "remove",         Arg_parser::yes },
821     { opt_st,  "strip",          Arg_parser::yes },
822     {  0 , 0,                    Arg_parser::no  } };
823 
824   const Arg_parser parser( argc, argv, options );
825   if( parser.error().size() )				// bad option
826     { show_error( parser.error().c_str(), 0, true ); return 1; }
827 
828   int argind = 0;
829   for( ; argind < parser.arguments(); ++argind )
830     {
831     const int code = parser.code( argind );
832     if( !code ) break;					// no more options
833     const std::string & sarg = parser.argument( argind );
834     const char * const arg = sarg.c_str();
835     switch( code )
836       {
837       case 'a': ignore_trailing = false; break;
838       case 'A': set_mode( program_mode, m_alone_to_lz ); break;
839       case 'c': to_stdout = true; break;
840       case 'd': set_mode( program_mode, m_decompress ); break;
841       case 'D': set_mode( program_mode, m_range_dec );
842                 parse_range( arg, range ); break;
843       case 'e': set_mode( program_mode, m_reproduce ); break;
844       case 'E': set_mode( program_mode, m_reproduce );
845                 parse_range( arg, range, &sector_size ); break;
846       case 'f': force = true; break;
847       case 'h': show_help(); return 0;
848       case 'i': ignore_errors = true; break;
849       case 'k': keep_input_files = true; break;
850       case 'l': set_mode( program_mode, m_list ); break;
851       case 'm': set_mode( program_mode, m_merge ); break;
852       case 'M': set_mode( program_mode, m_md5sum ); break;
853       case 'n': break;
854       case 'o': if( sarg == "-" ) to_stdout = true;
855                 else { default_output_filename = sarg; } break;
856       case 'q': verbosity = -1; break;
857       case 'R': set_mode( program_mode, m_repair ); break;
858       case 's': set_mode( program_mode, m_split ); break;
859       case 'S': if( arg[0] ) repeated_byte = getnum( arg, 0, 0, 255 );
860                 set_mode( program_mode, m_nrep_stats ); break;
861       case 't': set_mode( program_mode, m_test ); break;
862       case 'U': set_mode( program_mode, m_unzcrash ); break;
863       case 'v': if( verbosity < 4 ) ++verbosity; break;
864       case 'V': show_version(); return 0;
865       case 'W': set_mode( program_mode, m_debug_decompress );
866                 parse_pos_value( arg, bad_byte ); break;
867       case 'X': set_mode( program_mode, m_show_packets );
868                 if( arg[0] ) { parse_pos_value( arg, bad_byte ); } break;
869       case 'Y': set_mode( program_mode, m_debug_delay );
870                 parse_range( arg, range ); break;
871       case 'Z': set_mode( program_mode, m_debug_repair );
872                 parse_pos_value( arg, bad_byte ); break;
873       case opt_du: set_mode( program_mode, m_dump );
874                    member_list.parse( arg ); break;
875       case opt_lt: loose_trailing = true; break;
876       case opt_lzl: lzip_level = parse_lzip_level( arg ); break;
877       case opt_lzn: lzip_name = arg; break;
878       case opt_ref: reference_filename = arg; break;
879       case opt_re: set_mode( program_mode, m_remove );
880                    member_list.parse( arg ); break;
881       case opt_st: set_mode( program_mode, m_strip );
882                    member_list.parse( arg ); break;
883       default : internal_error( "uncaught option." );
884       }
885     } // end process options
886 
887 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
888   setmode( STDIN_FILENO, O_BINARY );
889   setmode( STDOUT_FILENO, O_BINARY );
890 #endif
891 
892   if( program_mode == m_none )
893     {
894     show_error( "You must specify the operation to be performed.", 0, true );
895     return 1;
896     }
897 
898   bool filenames_given = false;
899   for( ; argind < parser.arguments(); ++argind )
900     {
901     filenames.push_back( parser.argument( argind ) );
902     if( filenames.back() != "-" ) filenames_given = true;
903     }
904 
905   const char terminator = isatty( STDOUT_FILENO ) ? '\r' : '\n';
906   try {
907   switch( program_mode )
908     {
909     case m_none: internal_error( "invalid operation." ); break;
910     case m_alone_to_lz: break;
911     case m_debug_decompress:
912       one_file( filenames.size() );
913       return debug_decompress( filenames[0], bad_byte, false );
914     case m_debug_delay:
915       one_file( filenames.size() );
916       return debug_delay( filenames[0], range, terminator );
917     case m_debug_repair:
918       one_file( filenames.size() );
919       return debug_repair( filenames[0], bad_byte, terminator );
920     case m_decompress: break;
921     case m_dump:
922     case m_strip:
923       if( filenames.size() < 1 )
924         { show_error( "You must specify at least 1 file.", 0, true ); return 1; }
925       return dump_members( filenames, default_output_filename, member_list,
926                            force, ignore_errors, ignore_trailing,
927                            loose_trailing, program_mode == m_strip, to_stdout );
928     case m_list: break;
929     case m_md5sum: break;
930     case m_merge:
931       if( filenames.size() < 2 )
932         { show_error( "You must specify at least 2 files.", 0, true ); return 1; }
933       return merge_files( filenames, default_output_filename, terminator, force );
934     case m_nrep_stats: return print_nrep_stats( filenames, repeated_byte,
935                               ignore_errors, ignore_trailing, loose_trailing );
936     case m_range_dec:
937       one_file( filenames.size() );
938       return range_decompress( filenames[0], default_output_filename, range,
939                                force, ignore_errors, ignore_trailing,
940                                loose_trailing, to_stdout );
941     case m_remove:
942       if( filenames.size() < 1 )
943         { show_error( "You must specify at least 1 file.", 0, true ); return 1; }
944       return remove_members( filenames, member_list, ignore_errors,
945                              ignore_trailing, loose_trailing );
946     case m_repair:
947       one_file( filenames.size() );
948       return repair_file( filenames[0], default_output_filename, terminator, force );
949     case m_reproduce:
950       one_file( filenames.size() );
951       if( !reference_filename || !reference_filename[0] )
952         { show_error( "You must specify a reference file.", 0, true ); return 1; }
953       if( range.size() > 0 )
954         return debug_reproduce_file( filenames[0], lzip_name,
955           reference_filename, range, sector_size, lzip_level );
956       else
957         return reproduce_file( filenames[0], default_output_filename,
958           lzip_name, reference_filename, lzip_level, terminator, force );
959     case m_show_packets:
960       one_file( filenames.size() );
961       return debug_decompress( filenames[0], bad_byte, true );
962     case m_split:
963       one_file( filenames.size() );
964       return split_file( filenames[0], default_output_filename, force );
965     case m_test: break;
966     case m_unzcrash:
967       one_file( filenames.size() );
968       return lunzcrash( filenames[0] );
969     }
970     }
971   catch( std::bad_alloc & ) { show_error( mem_msg ); cleanup_and_fail( 1 ); }
972   catch( Error & e ) { show_error( e.msg, errno ); cleanup_and_fail( 1 ); }
973 
974   if( filenames.empty() ) filenames.push_back("-");
975 
976   if( program_mode == m_list )
977     return list_files( filenames, ignore_errors, ignore_trailing, loose_trailing );
978   if( program_mode == m_md5sum )
979     return md5sum_files( filenames );
980 
981   if( program_mode != m_alone_to_lz && program_mode != m_decompress &&
982       program_mode != m_test )
983     internal_error( "invalid decompressor operation." );
984 
985   if( program_mode == m_test ) to_stdout = false;	// apply overrides
986   if( program_mode == m_test || to_stdout ) default_output_filename.clear();
987 
988   if( to_stdout && program_mode != m_test )	// check tty only once
989     { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
990   else outfd = -1;
991 
992   const bool to_file = !to_stdout && program_mode != m_test &&
993                        default_output_filename.size();
994   if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
995     set_signals( signal_handler );
996 
997   Pretty_print pp( filenames );
998 
999   int failed_tests = 0;
1000   int retval = 0;
1001   const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
1002   bool stdin_used = false;
1003   for( unsigned i = 0; i < filenames.size(); ++i )
1004     {
1005     std::string input_filename;
1006     int infd;
1007     struct stat in_stats;
1008 
1009     pp.set_name( filenames[i] );
1010     if( filenames[i] == "-" )
1011       {
1012       if( stdin_used ) continue; else stdin_used = true;
1013       infd = STDIN_FILENO;
1014       if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
1015       if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
1016       }
1017     else
1018       {
1019       input_filename = filenames[i];
1020       infd = open_instream( input_filename.c_str(), &in_stats, one_to_one );
1021       if( infd < 0 ) { set_retval( retval, 1 ); continue; }
1022       if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
1023       if( one_to_one )			// open outfd after verifying infd
1024         {
1025         if( program_mode == m_alone_to_lz ) set_a_outname( input_filename );
1026         else set_d_outname( input_filename, extension_index( input_filename ) );
1027         if( !open_outstream( force, true ) )
1028           { close( infd ); set_retval( retval, 1 ); continue; }
1029         }
1030       }
1031 
1032     if( one_to_one && !check_tty_out( program_mode ) )
1033       { set_retval( retval, 1 ); return retval; }	// don't delete a tty
1034 
1035     if( to_file && outfd < 0 )		// open outfd after verifying infd
1036       {
1037       output_filename = default_output_filename;
1038       if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
1039         return 1;	// check tty only once and don't try to delete a tty
1040       }
1041 
1042     const struct stat * const in_statsp =
1043       ( input_filename.size() && one_to_one ) ? &in_stats : 0;
1044     const unsigned long long cfile_size =
1045       ( input_filename.size() && S_ISREG( in_stats.st_mode ) ) ?
1046         ( in_stats.st_size + 99 ) / 100 : 0;
1047     int tmp;
1048     try {
1049       if( program_mode == m_alone_to_lz )
1050         tmp = alone_to_lz( infd, pp );
1051       else
1052         tmp = decompress( cfile_size, infd, pp, ignore_errors, ignore_trailing,
1053                           loose_trailing, program_mode == m_test );
1054       }
1055     catch( std::bad_alloc & ) { pp( mem_msg ); tmp = 1; }
1056     catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
1057     if( close( infd ) != 0 )
1058       { show_file_error( pp.name(), "Error closing input file", errno );
1059         set_retval( tmp, 1 ); }
1060     set_retval( retval, tmp );
1061     if( tmp )
1062       { if( program_mode != m_test ) cleanup_and_fail( retval );
1063         else ++failed_tests; }
1064 
1065     if( delete_output_on_interrupt && one_to_one )
1066       close_and_set_permissions( in_statsp );
1067     if( input_filename.size() && !keep_input_files && one_to_one &&
1068         ( program_mode != m_decompress || !ignore_errors ) )
1069       std::remove( input_filename.c_str() );
1070     }
1071   if( delete_output_on_interrupt ) close_and_set_permissions( 0 );	// -o
1072   else if( outfd >= 0 && close( outfd ) != 0 )				// -c
1073     {
1074     show_error( "Error closing stdout", errno );
1075     set_retval( retval, 1 );
1076     }
1077   if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
1078     std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
1079                   program_name, failed_tests,
1080                   ( failed_tests == 1 ) ? "file" : "files" );
1081   return retval;
1082   }
1083