1 /* Lzip - LZMA lossless data compressor
2    Copyright (C) 2008-2021 Antonio Diaz Diaz.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 /*
18    Exit status: 0 for a normal exit, 1 for environmental problems
19    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
20    corrupt or invalid input file, 3 for an internal consistency error
21    (eg, bug) which caused lzip to panic.
22 */
23 
24 #define _FILE_OFFSET_BITS 64
25 
26 #include <algorithm>
27 #include <cctype>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <new>
35 #include <string>
36 #include <vector>
37 #include <fcntl.h>
38 #include <stdint.h>
39 #include <unistd.h>
40 #include <utime.h>
41 #include <sys/stat.h>
42 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
43 #include <io.h>
44 #if defined(__MSVCRT__)
45 #define fchmod(x,y) 0
46 #define fchown(x,y,z) 0
47 #define strtoull std::strtoul
48 #define SIGHUP SIGTERM
49 #define S_ISSOCK(x) 0
50 #ifndef S_IRGRP
51 #define S_IRGRP 0
52 #define S_IWGRP 0
53 #define S_IROTH 0
54 #define S_IWOTH 0
55 #endif
56 #endif
57 #if defined(__DJGPP__)
58 #define S_ISSOCK(x) 0
59 #define S_ISVTX 0
60 #endif
61 #endif
62 
63 #include "arg_parser.h"
64 #include "lzip.h"
65 #include "decoder.h"
66 #include "encoder_base.h"
67 #include "encoder.h"
68 #include "fast_encoder.h"
69 
70 #ifndef O_BINARY
71 #define O_BINARY 0
72 #endif
73 
74 #if CHAR_BIT != 8
75 #error "Environments where CHAR_BIT != 8 are not supported."
76 #endif
77 
78 int verbosity = 0;
79 
80 namespace {
81 
82 const char * const program_name = "lzip";
83 const char * const program_year = "2021";
84 const char * invocation_name = program_name;		// default value
85 
86 const struct { const char * from; const char * to; } known_extensions[] = {
87   { ".lz",  ""     },
88   { ".tlz", ".tar" },
89   { 0,      0      } };
90 
91 struct Lzma_options
92   {
93   int dictionary_size;		// 4 KiB .. 512 MiB
94   int match_len_limit;		// 5 .. 273
95   };
96 
97 enum Mode { m_compress, m_decompress, m_list, m_test };
98 
99 /* Variables used in signal handler context.
100    They are not declared volatile because the handler never returns. */
101 std::string output_filename;
102 int outfd = -1;
103 bool delete_output_on_interrupt = false;
104 
105 
show_help()106 void show_help()
107   {
108   std::printf( "Lzip is a lossless data compressor with a user interface similar to the one\n"
109                "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
110                "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
111                "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
112                "compress most files more than bzip2 (lzip -9). Decompression speed is\n"
113                "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
114                "a data recovery perspective. Lzip has been designed, written, and tested\n"
115                "with great care to replace gzip and bzip2 as the standard general-purpose\n"
116                "compressed format for unix-like systems.\n"
117                "\nUsage: %s [options] [files]\n", invocation_name );
118   std::printf( "\nOptions:\n"
119                "  -h, --help                     display this help and exit\n"
120                "  -V, --version                  output version information and exit\n"
121                "  -a, --trailing-error           exit with error status if trailing data\n"
122                "  -b, --member-size=<bytes>      set member size limit in bytes\n"
123                "  -c, --stdout                   write to standard output, keep input files\n"
124                "  -d, --decompress               decompress\n"
125                "  -f, --force                    overwrite existing output files\n"
126                "  -F, --recompress               force re-compression of compressed files\n"
127                "  -k, --keep                     keep (don't delete) input files\n"
128                "  -l, --list                     print (un)compressed file sizes\n"
129                "  -m, --match-length=<bytes>     set match length limit in bytes [36]\n"
130                "  -o, --output=<file>            write to <file>, keep input files\n"
131                "  -q, --quiet                    suppress all messages\n"
132                "  -s, --dictionary-size=<bytes>  set dictionary size limit in bytes [8 MiB]\n"
133                "  -S, --volume-size=<bytes>      set volume size limit in bytes\n"
134                "  -t, --test                     test compressed file integrity\n"
135                "  -v, --verbose                  be verbose (a 2nd -v gives more)\n"
136                "  -0 .. -9                       set compression level [default 6]\n"
137                "      --fast                     alias for -0\n"
138                "      --best                     alias for -9\n"
139                "      --loose-trailing           allow trailing data seeming corrupt header\n"
140                "\nIf no file names are given, or if a file is '-', lzip compresses or\n"
141                "decompresses from standard input to standard output.\n"
142                "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
143                "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
144                "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
145                "to 2^29 bytes.\n"
146                "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
147                "scale optimal for all files. If your files are large, very repetitive,\n"
148                "etc, you may need to use the options --dictionary-size and --match-length\n"
149                "directly to achieve optimal performance.\n"
150                "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
151                "'tar -xf foo.tar.lz' or 'lzip -cd foo.tar.lz | tar -xf -'.\n"
152                "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
153                "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
154                "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
155                "caused lzip to panic.\n"
156                "\nThe ideas embodied in lzip are due to (at least) the following people:\n"
157                "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n"
158                "definition of Markov chains), G.N.N. Martin (for the definition of range\n"
159                "encoding), Igor Pavlov (for putting all the above together in LZMA), and\n"
160                "Julian Seward (for bzip2's CLI).\n"
161                "\nReport bugs to lzip-bug@nongnu.org\n"
162                "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
163   }
164 
165 
show_version()166 void show_version()
167   {
168   std::printf( "%s %s\n", program_name, PROGVERSION );
169   std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
170   std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
171                "This is free software: you are free to change and redistribute it.\n"
172                "There is NO WARRANTY, to the extent permitted by law.\n" );
173   }
174 
175 } // end namespace
176 
operator ()(const char * const msg) const177 void Pretty_print::operator()( const char * const msg ) const
178   {
179   if( verbosity >= 0 )
180     {
181     if( first_post )
182       {
183       first_post = false;
184       std::fputs( padded_name.c_str(), stderr );
185       if( !msg ) std::fflush( stderr );
186       }
187     if( msg ) std::fprintf( stderr, "%s\n", msg );
188     }
189   }
190 
191 
bad_version(const unsigned version)192 const char * bad_version( const unsigned version )
193   {
194   static char buf[80];
195   snprintf( buf, sizeof buf, "Version %u member format not supported.",
196             version );
197   return buf;
198   }
199 
200 
format_ds(const unsigned dictionary_size)201 const char * format_ds( const unsigned dictionary_size )
202   {
203   enum { bufsize = 16, factor = 1024 };
204   static char buf[bufsize];
205   const char * const prefix[8] =
206     { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
207   const char * p = "";
208   const char * np = "  ";
209   unsigned num = dictionary_size;
210   bool exact = ( num % factor == 0 );
211 
212   for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
213     { num /= factor; if( num % factor != 0 ) exact = false;
214       p = prefix[i]; np = ""; }
215   snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
216   return buf;
217   }
218 
219 
show_header(const unsigned dictionary_size)220 void show_header( const unsigned dictionary_size )
221   {
222   std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
223   }
224 
225 namespace {
226 
getnum(const char * const ptr,const unsigned long long llimit,const unsigned long long ulimit)227 unsigned long long getnum( const char * const ptr,
228                            const unsigned long long llimit,
229                            const unsigned long long ulimit )
230   {
231   char * tail;
232   errno = 0;
233   unsigned long long result = strtoull( ptr, &tail, 0 );
234   if( tail == ptr )
235     {
236     show_error( "Bad or missing numerical argument.", 0, true );
237     std::exit( 1 );
238     }
239 
240   if( !errno && tail[0] )
241     {
242     const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000;
243     int exponent = 0;				// 0 = bad multiplier
244     switch( tail[0] )
245       {
246       case 'Y': exponent = 8; break;
247       case 'Z': exponent = 7; break;
248       case 'E': exponent = 6; break;
249       case 'P': exponent = 5; break;
250       case 'T': exponent = 4; break;
251       case 'G': exponent = 3; break;
252       case 'M': exponent = 2; break;
253       case 'K': if( factor == 1024 ) exponent = 1; break;
254       case 'k': if( factor == 1000 ) exponent = 1; break;
255       }
256     if( exponent <= 0 )
257       {
258       show_error( "Bad multiplier in numerical argument.", 0, true );
259       std::exit( 1 );
260       }
261     for( int i = 0; i < exponent; ++i )
262       {
263       if( ulimit / factor >= result ) result *= factor;
264       else { errno = ERANGE; break; }
265       }
266     }
267   if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
268   if( errno )
269     {
270     show_error( "Numerical argument out of limits." );
271     std::exit( 1 );
272     }
273   return result;
274   }
275 
276 
get_dict_size(const char * const arg)277 int get_dict_size( const char * const arg )
278   {
279   char * tail;
280   const long bits = std::strtol( arg, &tail, 0 );
281   if( bits >= min_dictionary_bits &&
282       bits <= max_dictionary_bits && *tail == 0 )
283     return 1 << bits;
284   return getnum( arg, min_dictionary_size, max_dictionary_size );
285   }
286 
287 
set_mode(Mode & program_mode,const Mode new_mode)288 void set_mode( Mode & program_mode, const Mode new_mode )
289   {
290   if( program_mode != m_compress && program_mode != new_mode )
291     {
292     show_error( "Only one operation can be specified.", 0, true );
293     std::exit( 1 );
294     }
295   program_mode = new_mode;
296   }
297 
298 
extension_index(const std::string & name)299 int extension_index( const std::string & name )
300   {
301   for( int eindex = 0; known_extensions[eindex].from; ++eindex )
302     {
303     const std::string ext( known_extensions[eindex].from );
304     if( name.size() > ext.size() &&
305         name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
306       return eindex;
307     }
308   return -1;
309   }
310 
311 
set_c_outname(const std::string & name,const bool filenames_given,const bool force_ext,const bool multifile)312 void set_c_outname( const std::string & name, const bool filenames_given,
313                     const bool force_ext, const bool multifile )
314   {
315   /* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when
316      reading from standard input. */
317   output_filename = name;
318   if( multifile ) output_filename += "00001";
319   if( force_ext || multifile ||
320       ( !filenames_given && extension_index( output_filename ) < 0 ) )
321     output_filename += known_extensions[0].from;
322   }
323 
324 
set_d_outname(const std::string & name,const int eindex)325 void set_d_outname( const std::string & name, const int eindex )
326   {
327   if( eindex >= 0 )
328     {
329     const std::string from( known_extensions[eindex].from );
330     if( name.size() > from.size() )
331       {
332       output_filename.assign( name, 0, name.size() - from.size() );
333       output_filename += known_extensions[eindex].to;
334       return;
335       }
336     }
337   output_filename = name; output_filename += ".out";
338   if( verbosity >= 1 )
339     std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
340                   program_name, name.c_str(), output_filename.c_str() );
341   }
342 
343 } // end namespace
344 
open_instream(const char * const name,struct stat * const in_statsp,const bool one_to_one,const bool reg_only)345 int open_instream( const char * const name, struct stat * const in_statsp,
346                    const bool one_to_one, const bool reg_only )
347   {
348   int infd = open( name, O_RDONLY | O_BINARY );
349   if( infd < 0 )
350     show_file_error( name, "Can't open input file", errno );
351   else
352     {
353     const int i = fstat( infd, in_statsp );
354     const mode_t mode = in_statsp->st_mode;
355     const bool can_read = ( i == 0 && !reg_only &&
356                             ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
357                               S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
358     if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
359       {
360       if( verbosity >= 0 )
361         std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
362                       program_name, name, ( can_read && one_to_one ) ?
363                       ",\n      and neither '-c' nor '-o' were specified" : "" );
364       close( infd );
365       infd = -1;
366       }
367     }
368   return infd;
369   }
370 
371 namespace {
372 
open_instream2(const char * const name,struct stat * const in_statsp,const Mode program_mode,const int eindex,const bool one_to_one,const bool recompress)373 int open_instream2( const char * const name, struct stat * const in_statsp,
374                     const Mode program_mode, const int eindex,
375                     const bool one_to_one, const bool recompress )
376   {
377   if( program_mode == m_compress && !recompress && eindex >= 0 )
378     {
379     if( verbosity >= 0 )
380       std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
381                     program_name, name, known_extensions[eindex].from );
382     return -1;
383     }
384   return open_instream( name, in_statsp, one_to_one, false );
385   }
386 
387 
open_outstream(const bool force,const bool protect)388 bool open_outstream( const bool force, const bool protect )
389   {
390   const mode_t usr_rw = S_IRUSR | S_IWUSR;
391   const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
392   const mode_t outfd_mode = protect ? usr_rw : all_rw;
393   int flags = O_CREAT | O_WRONLY | O_BINARY;
394   if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
395 
396   outfd = open( output_filename.c_str(), flags, outfd_mode );
397   if( outfd >= 0 ) delete_output_on_interrupt = true;
398   else if( verbosity >= 0 )
399     {
400     if( errno == EEXIST )
401       std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
402                     program_name, output_filename.c_str() );
403     else
404       std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
405                     program_name, output_filename.c_str(), std::strerror( errno ) );
406     }
407   return ( outfd >= 0 );
408   }
409 
410 
set_signals(void (* action)(int))411 void set_signals( void (*action)(int) )
412   {
413   std::signal( SIGHUP, action );
414   std::signal( SIGINT, action );
415   std::signal( SIGTERM, action );
416   }
417 
418 
cleanup_and_fail(const int retval)419 void cleanup_and_fail( const int retval )
420   {
421   set_signals( SIG_IGN );			// ignore signals
422   if( delete_output_on_interrupt )
423     {
424     delete_output_on_interrupt = false;
425     if( verbosity >= 0 )
426       std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
427                     program_name, output_filename.c_str() );
428     if( outfd >= 0 ) { close( outfd ); outfd = -1; }
429     if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
430       show_error( "WARNING: deletion of output file (apparently) failed." );
431     }
432   std::exit( retval );
433   }
434 
435 
signal_handler(int)436 extern "C" void signal_handler( int )
437   {
438   show_error( "Control-C or similar caught, quitting." );
439   cleanup_and_fail( 1 );
440   }
441 
442 
check_tty_in(const char * const input_filename,const int infd,const Mode program_mode,int & retval)443 bool check_tty_in( const char * const input_filename, const int infd,
444                    const Mode program_mode, int & retval )
445   {
446   if( ( program_mode == m_decompress || program_mode == m_test ) &&
447       isatty( infd ) )				// for example /dev/tty
448     { show_file_error( input_filename,
449                        "I won't read compressed data from a terminal." );
450       close( infd ); set_retval( retval, 1 );
451       if( program_mode != m_test ) cleanup_and_fail( retval );
452       return false; }
453   return true;
454   }
455 
check_tty_out(const Mode program_mode)456 bool check_tty_out( const Mode program_mode )
457   {
458   if( program_mode == m_compress && isatty( outfd ) )
459     { show_file_error( output_filename.size() ?
460                        output_filename.c_str() : "(stdout)",
461                        "I won't write compressed data to a terminal." );
462       return false; }
463   return true;
464   }
465 
466 
467 // Set permissions, owner, and times.
close_and_set_permissions(const struct stat * const in_statsp)468 void close_and_set_permissions( const struct stat * const in_statsp )
469   {
470   bool warning = false;
471   if( in_statsp )
472     {
473     const mode_t mode = in_statsp->st_mode;
474     // fchown will in many cases return with EPERM, which can be safely ignored.
475     if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
476       { if( fchmod( outfd, mode ) != 0 ) warning = true; }
477     else
478       if( errno != EPERM ||
479           fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
480         warning = true;
481     }
482   if( close( outfd ) != 0 )
483     {
484     show_error( "Error closing output file", errno );
485     cleanup_and_fail( 1 );
486     }
487   outfd = -1;
488   delete_output_on_interrupt = false;
489   if( in_statsp )
490     {
491     struct utimbuf t;
492     t.actime = in_statsp->st_atime;
493     t.modtime = in_statsp->st_mtime;
494     if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
495     }
496   if( warning && verbosity >= 1 )
497     show_error( "Can't change output file attributes." );
498   }
499 
500 
next_filename()501 bool next_filename()
502   {
503   const unsigned name_len = output_filename.size();
504   const unsigned ext_len = std::strlen( known_extensions[0].from );
505   if( name_len >= ext_len + 5 )				// "*00001.lz"
506     for( int i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j )
507       {
508       if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
509       else output_filename[i] = '0';
510       }
511   return false;
512   }
513 
514 
compress(const unsigned long long cfile_size,const unsigned long long member_size,const unsigned long long volume_size,const int infd,const Lzma_options & encoder_options,const Pretty_print & pp,const struct stat * const in_statsp,const bool zero)515 int compress( const unsigned long long cfile_size,
516               const unsigned long long member_size,
517               const unsigned long long volume_size, const int infd,
518               const Lzma_options & encoder_options, const Pretty_print & pp,
519               const struct stat * const in_statsp, const bool zero )
520   {
521   int retval = 0;
522   LZ_encoder_base * encoder = 0;		// polymorphic encoder
523   if( verbosity >= 1 ) pp();
524 
525   if( zero )
526     encoder = new FLZ_encoder( infd, outfd );
527   else
528     {
529     Lzip_header header;
530     if( header.dictionary_size( encoder_options.dictionary_size ) &&
531         encoder_options.match_len_limit >= min_match_len_limit &&
532         encoder_options.match_len_limit <= max_match_len )
533       encoder = new LZ_encoder( header.dictionary_size(),
534                                 encoder_options.match_len_limit, infd, outfd );
535     else internal_error( "invalid argument to encoder." );
536     }
537 
538   unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
539   while( true )		// encode one member per iteration
540     {
541     const unsigned long long size = ( volume_size > 0 ) ?
542       std::min( member_size, volume_size - partial_volume_size ) : member_size;
543     show_cprogress( cfile_size, in_size, encoder, &pp );	// init
544     if( !encoder->encode_member( size ) )
545       { pp( "Encoder error." ); retval = 1; break; }
546     in_size += encoder->data_position();
547     out_size += encoder->member_position();
548     if( encoder->data_finished() ) break;
549     if( volume_size > 0 )
550       {
551       partial_volume_size += encoder->member_position();
552       if( partial_volume_size >= volume_size - min_dictionary_size )
553         {
554         partial_volume_size = 0;
555         if( delete_output_on_interrupt )
556           {
557           close_and_set_permissions( in_statsp );
558           if( !next_filename() )
559             { pp( "Too many volume files." ); retval = 1; break; }
560           if( !open_outstream( true, in_statsp ) ) { retval = 1; break; }
561           }
562         }
563       }
564     encoder->reset();
565     }
566 
567   if( retval == 0 && verbosity >= 1 )
568     {
569     if( in_size == 0 || out_size == 0 )
570       std::fputs( " no data compressed.\n", stderr );
571     else
572       std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, "
573                             "%llu in, %llu out.\n",
574                     (double)in_size / out_size,
575                     ( 100.0 * out_size ) / in_size,
576                     100.0 - ( ( 100.0 * out_size ) / in_size ),
577                     in_size, out_size );
578     }
579   delete encoder;
580   return retval;
581   }
582 
583 
xdigit(const unsigned value)584 unsigned char xdigit( const unsigned value )
585   {
586   if( value <= 9 ) return '0' + value;
587   if( value <= 15 ) return 'A' + value - 10;
588   return 0;
589   }
590 
591 
show_trailing_data(const uint8_t * const data,const int size,const Pretty_print & pp,const bool all,const int ignore_trailing)592 bool show_trailing_data( const uint8_t * const data, const int size,
593                          const Pretty_print & pp, const bool all,
594                          const int ignore_trailing )	// -1 = show
595   {
596   if( verbosity >= 4 || ignore_trailing <= 0 )
597     {
598     std::string msg;
599     if( !all ) msg = "first bytes of ";
600     msg += "trailing data = ";
601     for( int i = 0; i < size; ++i )
602       {
603       msg += xdigit( data[i] >> 4 );
604       msg += xdigit( data[i] & 0x0F );
605       msg += ' ';
606       }
607     msg += '\'';
608     for( int i = 0; i < size; ++i )
609       { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
610     msg += '\'';
611     pp( msg.c_str() );
612     if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg );
613     }
614   return ( ignore_trailing > 0 );
615   }
616 
617 
decompress(const unsigned long long cfile_size,const int infd,const Pretty_print & pp,const bool ignore_trailing,const bool loose_trailing,const bool testing)618 int decompress( const unsigned long long cfile_size, const int infd,
619                 const Pretty_print & pp, const bool ignore_trailing,
620                 const bool loose_trailing, const bool testing )
621   {
622   int retval = 0;
623   unsigned long long partial_file_pos = 0;
624   Range_decoder rdec( infd );
625   for( bool first_member = true; ; first_member = false )
626     {
627     Lzip_header header;
628     rdec.reset_member_position();
629     const int size = rdec.read_data( header.data, Lzip_header::size );
630     if( rdec.finished() )			// End Of File
631       {
632       if( first_member )
633         { show_file_error( pp.name(), "File ends unexpectedly at member header." );
634           retval = 2; }
635       else if( header.verify_prefix( size ) )
636         { pp( "Truncated header in multimember file." );
637           show_trailing_data( header.data, size, pp, true, -1 );
638           retval = 2; }
639       else if( size > 0 && !show_trailing_data( header.data, size, pp,
640                                                 true, ignore_trailing ) )
641         retval = 2;
642       break;
643       }
644     if( !header.verify_magic() )
645       {
646       if( first_member )
647         { show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
648       else if( !loose_trailing && header.verify_corrupt() )
649         { pp( corrupt_mm_msg );
650           show_trailing_data( header.data, size, pp, false, -1 );
651           retval = 2; }
652       else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
653         retval = 2;
654       break;
655       }
656     if( !header.verify_version() )
657       { pp( bad_version( header.version() ) ); retval = 2; break; }
658     const unsigned dictionary_size = header.dictionary_size();
659     if( !isvalid_ds( dictionary_size ) )
660       { pp( bad_dict_msg ); retval = 2; break; }
661 
662     if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
663 
664     LZ_decoder decoder( rdec, dictionary_size, outfd );
665     show_dprogress( cfile_size, partial_file_pos, &rdec, &pp );	// init
666     const int result = decoder.decode_member( pp );
667     partial_file_pos += rdec.member_position();
668     if( result != 0 )
669       {
670       if( verbosity >= 0 && result <= 2 )
671         {
672         pp();
673         std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
674                       "File ends unexpectedly" : "Decoder error",
675                       partial_file_pos );
676         }
677       retval = 2; break;
678       }
679     if( verbosity >= 2 )
680       { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
681     }
682   if( verbosity == 1 && retval == 0 )
683     std::fputs( testing ? "ok\n" : "done\n", stderr );
684   return retval;
685   }
686 
687 } // end namespace
688 
689 
show_error(const char * const msg,const int errcode,const bool help)690 void show_error( const char * const msg, const int errcode, const bool help )
691   {
692   if( verbosity < 0 ) return;
693   if( msg && msg[0] )
694     std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
695                   ( errcode > 0 ) ? ": " : "",
696                   ( errcode > 0 ) ? std::strerror( errcode ) : "" );
697   if( help )
698     std::fprintf( stderr, "Try '%s --help' for more information.\n",
699                   invocation_name );
700   }
701 
702 
show_file_error(const char * const filename,const char * const msg,const int errcode)703 void show_file_error( const char * const filename, const char * const msg,
704                       const int errcode )
705   {
706   if( verbosity >= 0 )
707     std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
708                   ( errcode > 0 ) ? ": " : "",
709                   ( errcode > 0 ) ? std::strerror( errcode ) : "" );
710   }
711 
712 
internal_error(const char * const msg)713 void internal_error( const char * const msg )
714   {
715   if( verbosity >= 0 )
716     std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
717   std::exit( 3 );
718   }
719 
720 
show_cprogress(const unsigned long long cfile_size,const unsigned long long partial_size,const Matchfinder_base * const m,const Pretty_print * const p)721 void show_cprogress( const unsigned long long cfile_size,
722                      const unsigned long long partial_size,
723                      const Matchfinder_base * const m,
724                      const Pretty_print * const p )
725   {
726   static unsigned long long csize = 0;		// file_size / 100
727   static unsigned long long psize = 0;
728   static const Matchfinder_base * mb = 0;
729   static const Pretty_print * pp = 0;
730   static bool enabled = true;
731 
732   if( !enabled ) return;
733   if( p )					// initialize static vars
734     {
735     if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
736     csize = cfile_size; psize = partial_size; mb = m; pp = p;
737     }
738   if( mb && pp )
739     {
740     const unsigned long long pos = psize + mb->data_position();
741     if( csize > 0 )
742       std::fprintf( stderr, "%4llu%%  %.1f MB\r", pos / csize, pos / 1000000.0 );
743     else
744       std::fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
745     pp->reset(); (*pp)();			// restore cursor position
746     }
747   }
748 
749 
show_dprogress(const unsigned long long cfile_size,const unsigned long long partial_size,const Range_decoder * const d,const Pretty_print * const p)750 void show_dprogress( const unsigned long long cfile_size,
751                      const unsigned long long partial_size,
752                      const Range_decoder * const d,
753                      const Pretty_print * const p )
754   {
755   static unsigned long long csize = 0;		// file_size / 100
756   static unsigned long long psize = 0;
757   static const Range_decoder * rdec = 0;
758   static const Pretty_print * pp = 0;
759   static int counter = 0;
760   static bool enabled = true;
761 
762   if( !enabled ) return;
763   if( p )					// initialize static vars
764     {
765     if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
766     csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0;
767     }
768   if( rdec && pp && --counter <= 0 )
769     {
770     const unsigned long long pos = psize + rdec->member_position();
771     counter = 7;		// update display every 114688 bytes
772     if( csize > 0 )
773       std::fprintf( stderr, "%4llu%%  %.1f MB\r", pos / csize, pos / 1000000.0 );
774     else
775       std::fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
776     pp->reset(); (*pp)();			// restore cursor position
777     }
778   }
779 
780 
main(const int argc,const char * const argv[])781 int main( const int argc, const char * const argv[] )
782   {
783   /* Mapping from gzip/bzip2 style 1..9 compression modes
784      to the corresponding LZMA compression modes. */
785   const Lzma_options option_mapping[] =
786     {
787     { 1 << 16,  16 },		// -0
788     { 1 << 20,   5 },		// -1
789     { 3 << 19,   6 },		// -2
790     { 1 << 21,   8 },		// -3
791     { 3 << 20,  12 },		// -4
792     { 1 << 22,  20 },		// -5
793     { 1 << 23,  36 },		// -6
794     { 1 << 24,  68 },		// -7
795     { 3 << 23, 132 },		// -8
796     { 1 << 25, 273 } };		// -9
797   Lzma_options encoder_options = option_mapping[6];	// default = "-6"
798   const unsigned long long max_member_size = 0x0008000000000000ULL; /* 2 PiB */
799   const unsigned long long max_volume_size = 0x4000000000000000ULL; /* 4 EiB */
800   unsigned long long member_size = max_member_size;
801   unsigned long long volume_size = 0;
802   std::string default_output_filename;
803   std::vector< std::string > filenames;
804   Mode program_mode = m_compress;
805   bool force = false;
806   bool ignore_trailing = true;
807   bool keep_input_files = false;
808   bool loose_trailing = false;
809   bool recompress = false;
810   bool to_stdout = false;
811   bool zero = false;
812   if( argc > 0 ) invocation_name = argv[0];
813 
814   enum { opt_lt = 256 };
815   const Arg_parser::Option options[] =
816     {
817     { '0', "fast",              Arg_parser::no  },
818     { '1', 0,                   Arg_parser::no  },
819     { '2', 0,                   Arg_parser::no  },
820     { '3', 0,                   Arg_parser::no  },
821     { '4', 0,                   Arg_parser::no  },
822     { '5', 0,                   Arg_parser::no  },
823     { '6', 0,                   Arg_parser::no  },
824     { '7', 0,                   Arg_parser::no  },
825     { '8', 0,                   Arg_parser::no  },
826     { '9', "best",              Arg_parser::no  },
827     { 'a', "trailing-error",    Arg_parser::no  },
828     { 'b', "member-size",       Arg_parser::yes },
829     { 'c', "stdout",            Arg_parser::no  },
830     { 'd', "decompress",        Arg_parser::no  },
831     { 'f', "force",             Arg_parser::no  },
832     { 'F', "recompress",        Arg_parser::no  },
833     { 'h', "help",              Arg_parser::no  },
834     { 'k', "keep",              Arg_parser::no  },
835     { 'l', "list",              Arg_parser::no  },
836     { 'm', "match-length",      Arg_parser::yes },
837     { 'n', "threads",           Arg_parser::yes },
838     { 'o', "output",            Arg_parser::yes },
839     { 'q', "quiet",             Arg_parser::no  },
840     { 's', "dictionary-size",   Arg_parser::yes },
841     { 'S', "volume-size",       Arg_parser::yes },
842     { 't', "test",              Arg_parser::no  },
843     { 'v', "verbose",           Arg_parser::no  },
844     { 'V', "version",           Arg_parser::no  },
845     { opt_lt, "loose-trailing", Arg_parser::no  },
846     {  0, 0,                    Arg_parser::no  } };
847 
848   const Arg_parser parser( argc, argv, options );
849   if( parser.error().size() )				// bad option
850     { show_error( parser.error().c_str(), 0, true ); return 1; }
851 
852   int argind = 0;
853   for( ; argind < parser.arguments(); ++argind )
854     {
855     const int code = parser.code( argind );
856     if( !code ) break;					// no more options
857     const std::string & sarg = parser.argument( argind );
858     const char * const arg = sarg.c_str();
859     switch( code )
860       {
861       case '0': case '1': case '2': case '3': case '4':
862       case '5': case '6': case '7': case '8': case '9':
863                 zero = ( code == '0' );
864                 encoder_options = option_mapping[code-'0']; break;
865       case 'a': ignore_trailing = false; break;
866       case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
867       case 'c': to_stdout = true; break;
868       case 'd': set_mode( program_mode, m_decompress ); break;
869       case 'f': force = true; break;
870       case 'F': recompress = true; break;
871       case 'h': show_help(); return 0;
872       case 'k': keep_input_files = true; break;
873       case 'l': set_mode( program_mode, m_list ); break;
874       case 'm': encoder_options.match_len_limit =
875                   getnum( arg, min_match_len_limit, max_match_len );
876                 zero = false; break;
877       case 'n': break;
878       case 'o': if( sarg == "-" ) to_stdout = true;
879                 else { default_output_filename = sarg; } break;
880       case 'q': verbosity = -1; break;
881       case 's': encoder_options.dictionary_size = get_dict_size( arg );
882                 zero = false; break;
883       case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
884       case 't': set_mode( program_mode, m_test ); break;
885       case 'v': if( verbosity < 4 ) ++verbosity; break;
886       case 'V': show_version(); return 0;
887       case opt_lt: loose_trailing = true; break;
888       default : internal_error( "uncaught option." );
889       }
890     } // end process options
891 
892 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
893   setmode( STDIN_FILENO, O_BINARY );
894   setmode( STDOUT_FILENO, O_BINARY );
895 #endif
896 
897   bool filenames_given = false;
898   for( ; argind < parser.arguments(); ++argind )
899     {
900     filenames.push_back( parser.argument( argind ) );
901     if( filenames.back() != "-" ) filenames_given = true;
902     }
903   if( filenames.empty() ) filenames.push_back("-");
904 
905   if( program_mode == m_list )
906     return list_files( filenames, ignore_trailing, loose_trailing );
907 
908   if( program_mode == m_compress )
909     {
910     if( volume_size > 0 && !to_stdout && default_output_filename.size() &&
911         filenames.size() > 1 )
912       { show_error( "Only can compress one file when using '-o' and '-S'.",
913                     0, true ); return 1; }
914     dis_slots.init();
915     prob_prices.init();
916     }
917   else volume_size = 0;
918   if( program_mode == m_test ) to_stdout = false;	// apply overrides
919   if( program_mode == m_test || to_stdout ) default_output_filename.clear();
920 
921   if( to_stdout && program_mode != m_test )	// check tty only once
922     { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
923   else outfd = -1;
924 
925   const bool to_file = !to_stdout && program_mode != m_test &&
926                        default_output_filename.size();
927   if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
928     set_signals( signal_handler );
929 
930   Pretty_print pp( filenames );
931 
932   int failed_tests = 0;
933   int retval = 0;
934   const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
935   bool stdin_used = false;
936   for( unsigned i = 0; i < filenames.size(); ++i )
937     {
938     std::string input_filename;
939     int infd;
940     struct stat in_stats;
941 
942     pp.set_name( filenames[i] );
943     if( filenames[i] == "-" )
944       {
945       if( stdin_used ) continue; else stdin_used = true;
946       infd = STDIN_FILENO;
947       if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
948       if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
949       }
950     else
951       {
952       const int eindex = extension_index( input_filename = filenames[i] );
953       infd = open_instream2( input_filename.c_str(), &in_stats, program_mode,
954                              eindex, one_to_one, recompress );
955       if( infd < 0 ) { set_retval( retval, 1 ); continue; }
956       if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
957       if( one_to_one )			// open outfd after verifying infd
958         {
959         if( program_mode == m_compress )
960           set_c_outname( input_filename, true, true, volume_size > 0 );
961         else set_d_outname( input_filename, eindex );
962         if( !open_outstream( force, true ) )
963           { close( infd ); set_retval( retval, 1 ); continue; }
964         }
965       }
966 
967     if( one_to_one && !check_tty_out( program_mode ) )
968       { set_retval( retval, 1 ); return retval; }	// don't delete a tty
969 
970     if( to_file && outfd < 0 )		// open outfd after verifying infd
971       {
972       if( program_mode == m_compress ) set_c_outname( default_output_filename,
973                                        filenames_given, false, volume_size > 0 );
974       else output_filename = default_output_filename;
975       if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
976         return 1;	// check tty only once and don't try to delete a tty
977       }
978 
979     const struct stat * const in_statsp =
980       ( input_filename.size() && one_to_one ) ? &in_stats : 0;
981     const unsigned long long cfile_size =
982       ( input_filename.size() && S_ISREG( in_stats.st_mode ) ) ?
983         ( in_stats.st_size + 99 ) / 100 : 0;
984     int tmp;
985     try {
986       if( program_mode == m_compress )
987         tmp = compress( cfile_size, member_size, volume_size, infd,
988                         encoder_options, pp, in_statsp, zero );
989       else
990         tmp = decompress( cfile_size, infd, pp, ignore_trailing,
991                           loose_trailing, program_mode == m_test );
992       }
993     catch( std::bad_alloc & )
994       { pp( ( program_mode == m_compress ) ?
995             "Not enough memory. Try a smaller dictionary size." :
996             "Not enough memory." ); tmp = 1; }
997     catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
998     if( close( infd ) != 0 )
999       { show_file_error( pp.name(), "Error closing input file", errno );
1000         set_retval( tmp, 1 ); }
1001     set_retval( retval, tmp );
1002     if( tmp )
1003       { if( program_mode != m_test ) cleanup_and_fail( retval );
1004         else ++failed_tests; }
1005 
1006     if( delete_output_on_interrupt && one_to_one )
1007       close_and_set_permissions( in_statsp );
1008     if( input_filename.size() && !keep_input_files && one_to_one &&
1009         ( program_mode != m_compress || volume_size == 0 ) )
1010       std::remove( input_filename.c_str() );
1011     }
1012   if( delete_output_on_interrupt ) close_and_set_permissions( 0 );	// -o
1013   else if( outfd >= 0 && close( outfd ) != 0 )				// -c
1014     {
1015     show_error( "Error closing stdout", errno );
1016     set_retval( retval, 1 );
1017     }
1018   if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
1019     std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
1020                   program_name, failed_tests,
1021                   ( failed_tests == 1 ) ? "file" : "files" );
1022   return retval;
1023   }
1024