1 /* Plzip - Massively parallel implementation of lzip
2    Copyright (C) 2009 Laszlo Ersek.
3    Copyright (C) 2009-2021 Antonio Diaz Diaz.
4 
5    This program is free software: you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation, either version 2 of the License, or
8    (at your option) any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 */
18 /*
19    Exit status: 0 for a normal exit, 1 for environmental problems
20    (file not found, invalid flags, I/O errors, etc), 2 to indicate a
21    corrupt or invalid input file, 3 for an internal consistency error
22    (eg, bug) which caused plzip to panic.
23 */
24 
25 #define _FILE_OFFSET_BITS 64
26 
27 #include <algorithm>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <string>
35 #include <vector>
36 #include <fcntl.h>
37 #include <stdint.h>
38 #include <unistd.h>
39 #include <utime.h>
40 #include <sys/stat.h>
41 #include <lzlib.h>
42 #if defined(__MSVCRT__) || defined(__OS2__)
43 #include <io.h>
44 #if defined(__MSVCRT__)
45 #define fchmod(x,y) 0
46 #define fchown(x,y,z) 0
47 #define strtoull std::strtoul
48 #define SIGHUP SIGTERM
49 #define S_ISSOCK(x) 0
50 #ifndef S_IRGRP
51 #define S_IRGRP 0
52 #define S_IWGRP 0
53 #define S_IROTH 0
54 #define S_IWOTH 0
55 #endif
56 #endif
57 #endif
58 
59 #include "arg_parser.h"
60 #include "lzip.h"
61 
62 #ifndef O_BINARY
63 #define O_BINARY 0
64 #endif
65 
66 #if CHAR_BIT != 8
67 #error "Environments where CHAR_BIT != 8 are not supported."
68 #endif
69 
70 int verbosity = 0;
71 
72 namespace {
73 
74 const char * const program_name = "plzip";
75 const char * const program_year = "2021";
76 const char * invocation_name = program_name;		// default value
77 
78 const struct { const char * from; const char * to; } known_extensions[] = {
79   { ".lz",  ""     },
80   { ".tlz", ".tar" },
81   { 0,      0      } };
82 
83 struct Lzma_options
84   {
85   int dictionary_size;		// 4 KiB .. 512 MiB
86   int match_len_limit;		// 5 .. 273
87   };
88 
89 enum Mode { m_compress, m_decompress, m_list, m_test };
90 
91 /* Variables used in signal handler context.
92    They are not declared volatile because the handler never returns. */
93 std::string output_filename;
94 int outfd = -1;
95 bool delete_output_on_interrupt = false;
96 
97 
show_help(const long num_online)98 void show_help( const long num_online )
99   {
100   std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip, fully\n"
101                "compatible with lzip 1.4 or newer. Plzip uses the compression library lzlib.\n"
102                "\nLzip is a lossless data compressor with a user interface similar to the one\n"
103                "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
104                "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
105                "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
106                "compress most files more than bzip2 (lzip -9). Decompression speed is\n"
107                "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
108                "a data recovery perspective. Lzip has been designed, written, and tested\n"
109                "with great care to replace gzip and bzip2 as the standard general-purpose\n"
110                "compressed format for unix-like systems.\n"
111                "\nPlzip can compress/decompress large files on multiprocessor machines much\n"
112                "faster than lzip, at the cost of a slightly reduced compression ratio (0.4\n"
113                "to 2 percent larger compressed files). Note that the number of usable\n"
114                "threads is limited by file size; on files larger than a few GB plzip can use\n"
115                "hundreds of processors, but on files of only a few MB plzip is no faster\n"
116                "than lzip.\n"
117                "\nUsage: %s [options] [files]\n", invocation_name );
118   std::printf( "\nOptions:\n"
119                "  -h, --help                     display this help and exit\n"
120                "  -V, --version                  output version information and exit\n"
121                "  -a, --trailing-error           exit with error status if trailing data\n"
122                "  -B, --data-size=<bytes>        set size of input data blocks [2x8=16 MiB]\n"
123                "  -c, --stdout                   write to standard output, keep input files\n"
124                "  -d, --decompress               decompress\n"
125                "  -f, --force                    overwrite existing output files\n"
126                "  -F, --recompress               force re-compression of compressed files\n"
127                "  -k, --keep                     keep (don't delete) input files\n"
128                "  -l, --list                     print (un)compressed file sizes\n"
129                "  -m, --match-length=<bytes>     set match length limit in bytes [36]\n"
130                "  -n, --threads=<n>              set number of (de)compression threads [%ld]\n"
131                "  -o, --output=<file>            write to <file>, keep input files\n"
132                "  -q, --quiet                    suppress all messages\n"
133                "  -s, --dictionary-size=<bytes>  set dictionary size limit in bytes [8 MiB]\n"
134                "  -t, --test                     test compressed file integrity\n"
135                "  -v, --verbose                  be verbose (a 2nd -v gives more)\n"
136                "  -0 .. -9                       set compression level [default 6]\n"
137                "      --fast                     alias for -0\n"
138                "      --best                     alias for -9\n"
139                "      --loose-trailing           allow trailing data seeming corrupt header\n"
140                "      --in-slots=<n>             number of 1 MiB input packets buffered [4]\n"
141                "      --out-slots=<n>            number of 1 MiB output packets buffered [64]\n"
142                "      --check-lib                compare version of lzlib.h with liblz.{a,so}\n",
143                num_online );
144   if( verbosity >= 1 )
145     {
146     std::printf( "      --debug=<level>        print mode(2), debug statistics(1) to stderr\n" );
147     }
148   std::printf( "\nIf no file names are given, or if a file is '-', plzip compresses or\n"
149                "decompresses from standard input to standard output.\n"
150                "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
151                "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
152                "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
153                "to 2^29 bytes.\n"
154                "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
155                "scale optimal for all files. If your files are large, very repetitive,\n"
156                "etc, you may need to use the options --dictionary-size and --match-length\n"
157                "directly to achieve optimal performance.\n"
158                "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
159                "'tar -xf foo.tar.lz' or 'plzip -cd foo.tar.lz | tar -xf -'.\n"
160                "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
161                "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
162                "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
163                "caused plzip to panic.\n"
164                "\nReport bugs to lzip-bug@nongnu.org\n"
165                "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" );
166   }
167 
168 
show_version()169 void show_version()
170   {
171   std::printf( "%s %s\n", program_name, PROGVERSION );
172   std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" );
173   std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
174   std::printf( "Using lzlib %s\n", LZ_version() );
175   std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
176                "This is free software: you are free to change and redistribute it.\n"
177                "There is NO WARRANTY, to the extent permitted by law.\n" );
178   }
179 
180 
check_lib()181 int check_lib()
182   {
183   bool warning = false;
184   if( std::strcmp( LZ_version_string, LZ_version() ) != 0 )
185     { warning = true;
186       if( verbosity >= 0 )
187         std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n",
188                      LZ_version_string, LZ_version() ); }
189 #if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
190   if( LZ_API_VERSION != LZ_api_version() )
191     { warning = true;
192       if( verbosity >= 0 )
193         std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n",
194                      LZ_API_VERSION, LZ_api_version() ); }
195 #endif
196   if( verbosity >= 1 )
197     {
198     std::printf( "Using lzlib %s\n", LZ_version() );
199 #if !defined LZ_API_VERSION
200     std::fputs( "LZ_API_VERSION is not defined.\n", stdout );
201 #elif LZ_API_VERSION >= 1012
202     std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() );
203 #else
204     std::printf( "Compiled with LZ_API_VERSION = %u. "
205                  "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION );
206 #endif
207     }
208   return warning;
209   }
210 
211 } // end namespace
212 
operator ()(const char * const msg) const213 void Pretty_print::operator()( const char * const msg ) const
214   {
215   if( verbosity >= 0 )
216     {
217     if( first_post )
218       {
219       first_post = false;
220       std::fputs( padded_name.c_str(), stderr );
221       if( !msg ) std::fflush( stderr );
222       }
223     if( msg ) std::fprintf( stderr, "%s\n", msg );
224     }
225   }
226 
227 
bad_version(const unsigned version)228 const char * bad_version( const unsigned version )
229   {
230   static char buf[80];
231   snprintf( buf, sizeof buf, "Version %u member format not supported.",
232             version );
233   return buf;
234   }
235 
236 
format_ds(const unsigned dictionary_size)237 const char * format_ds( const unsigned dictionary_size )
238   {
239   enum { bufsize = 16, factor = 1024 };
240   static char buf[bufsize];
241   const char * const prefix[8] =
242     { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
243   const char * p = "";
244   const char * np = "  ";
245   unsigned num = dictionary_size;
246   bool exact = ( num % factor == 0 );
247 
248   for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
249     { num /= factor; if( num % factor != 0 ) exact = false;
250       p = prefix[i]; np = ""; }
251   snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
252   return buf;
253   }
254 
255 
show_header(const unsigned dictionary_size)256 void show_header( const unsigned dictionary_size )
257   {
258   std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
259   }
260 
261 namespace {
262 
getnum(const char * const ptr,const unsigned long long llimit,const unsigned long long ulimit)263 unsigned long long getnum( const char * const ptr,
264                            const unsigned long long llimit,
265                            const unsigned long long ulimit )
266   {
267   char * tail;
268   errno = 0;
269   unsigned long long result = strtoull( ptr, &tail, 0 );
270   if( tail == ptr )
271     {
272     show_error( "Bad or missing numerical argument.", 0, true );
273     std::exit( 1 );
274     }
275 
276   if( !errno && tail[0] )
277     {
278     const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000;
279     int exponent = 0;				// 0 = bad multiplier
280     switch( tail[0] )
281       {
282       case 'Y': exponent = 8; break;
283       case 'Z': exponent = 7; break;
284       case 'E': exponent = 6; break;
285       case 'P': exponent = 5; break;
286       case 'T': exponent = 4; break;
287       case 'G': exponent = 3; break;
288       case 'M': exponent = 2; break;
289       case 'K': if( factor == 1024 ) exponent = 1; break;
290       case 'k': if( factor == 1000 ) exponent = 1; break;
291       }
292     if( exponent <= 0 )
293       {
294       show_error( "Bad multiplier in numerical argument.", 0, true );
295       std::exit( 1 );
296       }
297     for( int i = 0; i < exponent; ++i )
298       {
299       if( ulimit / factor >= result ) result *= factor;
300       else { errno = ERANGE; break; }
301       }
302     }
303   if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
304   if( errno )
305     {
306     show_error( "Numerical argument out of limits." );
307     std::exit( 1 );
308     }
309   return result;
310   }
311 
312 
get_dict_size(const char * const arg)313 int get_dict_size( const char * const arg )
314   {
315   char * tail;
316   const long bits = std::strtol( arg, &tail, 0 );
317   if( bits >= LZ_min_dictionary_bits() &&
318       bits <= LZ_max_dictionary_bits() && *tail == 0 )
319     return 1 << bits;
320   int dictionary_size = getnum( arg, LZ_min_dictionary_size(),
321                                      LZ_max_dictionary_size() );
322   if( dictionary_size == 65535 ) ++dictionary_size;	// no fast encoder
323   return dictionary_size;
324   }
325 
326 
set_mode(Mode & program_mode,const Mode new_mode)327 void set_mode( Mode & program_mode, const Mode new_mode )
328   {
329   if( program_mode != m_compress && program_mode != new_mode )
330     {
331     show_error( "Only one operation can be specified.", 0, true );
332     std::exit( 1 );
333     }
334   program_mode = new_mode;
335   }
336 
337 
extension_index(const std::string & name)338 int extension_index( const std::string & name )
339   {
340   for( int eindex = 0; known_extensions[eindex].from; ++eindex )
341     {
342     const std::string ext( known_extensions[eindex].from );
343     if( name.size() > ext.size() &&
344         name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
345       return eindex;
346     }
347   return -1;
348   }
349 
350 
set_c_outname(const std::string & name,const bool filenames_given,const bool force_ext)351 void set_c_outname( const std::string & name, const bool filenames_given,
352                     const bool force_ext )
353   {
354   /* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when
355      reading from standard input. */
356   output_filename = name;
357   if( force_ext ||
358       ( !filenames_given && extension_index( output_filename ) < 0 ) )
359     output_filename += known_extensions[0].from;
360   }
361 
362 
set_d_outname(const std::string & name,const int eindex)363 void set_d_outname( const std::string & name, const int eindex )
364   {
365   if( eindex >= 0 )
366     {
367     const std::string from( known_extensions[eindex].from );
368     if( name.size() > from.size() )
369       {
370       output_filename.assign( name, 0, name.size() - from.size() );
371       output_filename += known_extensions[eindex].to;
372       return;
373       }
374     }
375   output_filename = name; output_filename += ".out";
376   if( verbosity >= 1 )
377     std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
378                   program_name, name.c_str(), output_filename.c_str() );
379   }
380 
381 } // end namespace
382 
open_instream(const char * const name,struct stat * const in_statsp,const bool one_to_one,const bool reg_only)383 int open_instream( const char * const name, struct stat * const in_statsp,
384                    const bool one_to_one, const bool reg_only )
385   {
386   int infd = open( name, O_RDONLY | O_BINARY );
387   if( infd < 0 )
388     show_file_error( name, "Can't open input file", errno );
389   else
390     {
391     const int i = fstat( infd, in_statsp );
392     const mode_t mode = in_statsp->st_mode;
393     const bool can_read = ( i == 0 && !reg_only &&
394                             ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
395                               S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
396     if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
397       {
398       if( verbosity >= 0 )
399         std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
400                       program_name, name, ( can_read && one_to_one ) ?
401                       ",\n       and neither '-c' nor '-o' were specified" : "" );
402       close( infd );
403       infd = -1;
404       }
405     }
406   return infd;
407   }
408 
409 namespace {
410 
open_instream2(const char * const name,struct stat * const in_statsp,const Mode program_mode,const int eindex,const bool one_to_one,const bool recompress)411 int open_instream2( const char * const name, struct stat * const in_statsp,
412                     const Mode program_mode, const int eindex,
413                     const bool one_to_one, const bool recompress )
414   {
415   if( program_mode == m_compress && !recompress && eindex >= 0 )
416     {
417     if( verbosity >= 0 )
418       std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
419                     program_name, name, known_extensions[eindex].from );
420     return -1;
421     }
422   return open_instream( name, in_statsp, one_to_one, false );
423   }
424 
425 
open_outstream(const bool force,const bool protect)426 bool open_outstream( const bool force, const bool protect )
427   {
428   const mode_t usr_rw = S_IRUSR | S_IWUSR;
429   const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
430   const mode_t outfd_mode = protect ? usr_rw : all_rw;
431   int flags = O_CREAT | O_WRONLY | O_BINARY;
432   if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
433 
434   outfd = open( output_filename.c_str(), flags, outfd_mode );
435   if( outfd >= 0 ) delete_output_on_interrupt = true;
436   else if( verbosity >= 0 )
437     {
438     if( errno == EEXIST )
439       std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
440                     program_name, output_filename.c_str() );
441     else
442       std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
443                     program_name, output_filename.c_str(), std::strerror( errno ) );
444     }
445   return ( outfd >= 0 );
446   }
447 
448 
set_signals(void (* action)(int))449 void set_signals( void (*action)(int) )
450   {
451   std::signal( SIGHUP, action );
452   std::signal( SIGINT, action );
453   std::signal( SIGTERM, action );
454   }
455 
456 } // end namespace
457 
458 /* This can be called from any thread, main thread or sub-threads alike,
459    since they all call common helper functions like 'xlock' that call
460    cleanup_and_fail() in case of an error.
461 */
cleanup_and_fail(const int retval)462 void cleanup_and_fail( const int retval )
463   {
464   // only one thread can delete and exit
465   static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
466 
467   set_signals( SIG_IGN );			// ignore signals
468   pthread_mutex_lock( &mutex );		// ignore errors to avoid loop
469   const int saved_verbosity = verbosity;
470   verbosity = -1;		// suppress messages from other threads
471   if( delete_output_on_interrupt )
472     {
473     delete_output_on_interrupt = false;
474     if( saved_verbosity >= 0 )
475       std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
476                     program_name, output_filename.c_str() );
477     if( outfd >= 0 ) { close( outfd ); outfd = -1; }
478     if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT &&
479         saved_verbosity >= 0 )
480       std::fprintf( stderr, "%s: WARNING: deletion of output file "
481                     "(apparently) failed.\n", program_name );
482     }
483   std::exit( retval );
484   }
485 
486 namespace {
487 
signal_handler(int)488 extern "C" void signal_handler( int )
489   {
490   show_error( "Control-C or similar caught, quitting." );
491   cleanup_and_fail( 1 );
492   }
493 
494 
check_tty_in(const char * const input_filename,const int infd,const Mode program_mode,int & retval)495 bool check_tty_in( const char * const input_filename, const int infd,
496                    const Mode program_mode, int & retval )
497   {
498   if( ( program_mode == m_decompress || program_mode == m_test ) &&
499       isatty( infd ) )				// for example /dev/tty
500     { show_file_error( input_filename,
501                        "I won't read compressed data from a terminal." );
502       close( infd ); set_retval( retval, 1 );
503       if( program_mode != m_test ) cleanup_and_fail( retval );
504       return false; }
505   return true;
506   }
507 
check_tty_out(const Mode program_mode)508 bool check_tty_out( const Mode program_mode )
509   {
510   if( program_mode == m_compress && isatty( outfd ) )
511     { show_file_error( output_filename.size() ?
512                        output_filename.c_str() : "(stdout)",
513                        "I won't write compressed data to a terminal." );
514       return false; }
515   return true;
516   }
517 
518 
519 // Set permissions, owner, and times.
close_and_set_permissions(const struct stat * const in_statsp)520 void close_and_set_permissions( const struct stat * const in_statsp )
521   {
522   bool warning = false;
523   if( in_statsp )
524     {
525     const mode_t mode = in_statsp->st_mode;
526     // fchown will in many cases return with EPERM, which can be safely ignored.
527     if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
528       { if( fchmod( outfd, mode ) != 0 ) warning = true; }
529     else
530       if( errno != EPERM ||
531           fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
532         warning = true;
533     }
534   if( close( outfd ) != 0 )
535     {
536     show_error( "Error closing output file", errno );
537     cleanup_and_fail( 1 );
538     }
539   outfd = -1;
540   delete_output_on_interrupt = false;
541   if( in_statsp )
542     {
543     struct utimbuf t;
544     t.actime = in_statsp->st_atime;
545     t.modtime = in_statsp->st_mtime;
546     if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
547     }
548   if( warning && verbosity >= 1 )
549     show_error( "Can't change output file attributes." );
550   }
551 
552 } // end namespace
553 
554 
show_error(const char * const msg,const int errcode,const bool help)555 void show_error( const char * const msg, const int errcode, const bool help )
556   {
557   if( verbosity < 0 ) return;
558   if( msg && msg[0] )
559     std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
560                   ( errcode > 0 ) ? ": " : "",
561                   ( errcode > 0 ) ? std::strerror( errcode ) : "" );
562   if( help )
563     std::fprintf( stderr, "Try '%s --help' for more information.\n",
564                   invocation_name );
565   }
566 
567 
show_file_error(const char * const filename,const char * const msg,const int errcode)568 void show_file_error( const char * const filename, const char * const msg,
569                       const int errcode )
570   {
571   if( verbosity >= 0 )
572     std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
573                   ( errcode > 0 ) ? ": " : "",
574                   ( errcode > 0 ) ? std::strerror( errcode ) : "" );
575   }
576 
577 
internal_error(const char * const msg)578 void internal_error( const char * const msg )
579   {
580   if( verbosity >= 0 )
581     std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
582   std::exit( 3 );
583   }
584 
585 
show_progress(const unsigned long long packet_size,const unsigned long long cfile_size,const Pretty_print * const p)586 void show_progress( const unsigned long long packet_size,
587                     const unsigned long long cfile_size,
588                     const Pretty_print * const p )
589   {
590   static unsigned long long csize = 0;		// file_size / 100
591   static unsigned long long pos = 0;
592   static const Pretty_print * pp = 0;
593   static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
594   static bool enabled = true;
595 
596   if( !enabled ) return;
597   if( p )					// initialize static vars
598     {
599     if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
600     csize = cfile_size; pos = 0; pp = p;
601     }
602   if( pp )
603     {
604     xlock( &mutex );
605     pos += packet_size;
606     if( csize > 0 )
607       std::fprintf( stderr, "%4llu%%  %.1f MB\r", pos / csize, pos / 1000000.0 );
608     else
609       std::fprintf( stderr, "  %.1f MB\r", pos / 1000000.0 );
610     pp->reset(); (*pp)();			// restore cursor position
611     xunlock( &mutex );
612     }
613   }
614 
615 
616 #if defined(__MSVCRT__)
617 #include <windows.h>
618 #define _SC_NPROCESSORS_ONLN   1
619 #define _SC_THREAD_THREADS_MAX 2
620 
sysconf(int flag)621 long sysconf( int flag )
622   {
623   if( flag == _SC_NPROCESSORS_ONLN )
624     {
625     SYSTEM_INFO si;
626     GetSystemInfo( &si );
627     return si.dwNumberOfProcessors;
628     }
629   if( flag != _SC_THREAD_THREADS_MAX ) errno = EINVAL;
630   return -1;		// unlimited threads or error
631   }
632 
633 #endif	// __MSVCRT__
634 
635 
main(const int argc,const char * const argv[])636 int main( const int argc, const char * const argv[] )
637   {
638   /* Mapping from gzip/bzip2 style 1..9 compression modes
639      to the corresponding LZMA compression modes. */
640   const Lzma_options option_mapping[] =
641     {
642     {   65535,  16 },		// -0 (65535,16 chooses fast encoder)
643     { 1 << 20,   5 },		// -1
644     { 3 << 19,   6 },		// -2
645     { 1 << 21,   8 },		// -3
646     { 3 << 20,  12 },		// -4
647     { 1 << 22,  20 },		// -5
648     { 1 << 23,  36 },		// -6
649     { 1 << 24,  68 },		// -7
650     { 3 << 23, 132 },		// -8
651     { 1 << 25, 273 } };		// -9
652   Lzma_options encoder_options = option_mapping[6];	// default = "-6"
653   std::string default_output_filename;
654   std::vector< std::string > filenames;
655   int data_size = 0;
656   int debug_level = 0;
657   int num_workers = 0;		// start this many worker threads
658   int in_slots = 4;
659   int out_slots = 64;
660   Mode program_mode = m_compress;
661   bool force = false;
662   bool ignore_trailing = true;
663   bool keep_input_files = false;
664   bool loose_trailing = false;
665   bool recompress = false;
666   bool to_stdout = false;
667   if( argc > 0 ) invocation_name = argv[0];
668 
669   enum { opt_chk = 256, opt_dbg, opt_in, opt_lt, opt_out };
670   const Arg_parser::Option options[] =
671     {
672     { '0', "fast",              Arg_parser::no  },
673     { '1', 0,                   Arg_parser::no  },
674     { '2', 0,                   Arg_parser::no  },
675     { '3', 0,                   Arg_parser::no  },
676     { '4', 0,                   Arg_parser::no  },
677     { '5', 0,                   Arg_parser::no  },
678     { '6', 0,                   Arg_parser::no  },
679     { '7', 0,                   Arg_parser::no  },
680     { '8', 0,                   Arg_parser::no  },
681     { '9', "best",              Arg_parser::no  },
682     { 'a', "trailing-error",    Arg_parser::no  },
683     { 'b', "member-size",       Arg_parser::yes },
684     { 'B', "data-size",         Arg_parser::yes },
685     { 'c', "stdout",            Arg_parser::no  },
686     { 'd', "decompress",        Arg_parser::no  },
687     { 'f', "force",             Arg_parser::no  },
688     { 'F', "recompress",        Arg_parser::no  },
689     { 'h', "help",              Arg_parser::no  },
690     { 'k', "keep",              Arg_parser::no  },
691     { 'l', "list",              Arg_parser::no  },
692     { 'm', "match-length",      Arg_parser::yes },
693     { 'n', "threads",           Arg_parser::yes },
694     { 'o', "output",            Arg_parser::yes },
695     { 'q', "quiet",             Arg_parser::no  },
696     { 's', "dictionary-size",   Arg_parser::yes },
697     { 'S', "volume-size",       Arg_parser::yes },
698     { 't', "test",              Arg_parser::no  },
699     { 'v', "verbose",           Arg_parser::no  },
700     { 'V', "version",           Arg_parser::no  },
701     { opt_chk, "check-lib",     Arg_parser::no  },
702     { opt_dbg, "debug",         Arg_parser::yes },
703     { opt_in, "in-slots",       Arg_parser::yes },
704     { opt_lt, "loose-trailing", Arg_parser::no  },
705     { opt_out, "out-slots",     Arg_parser::yes },
706     {  0, 0,                    Arg_parser::no  } };
707 
708   const Arg_parser parser( argc, argv, options );
709   if( parser.error().size() )				// bad option
710     { show_error( parser.error().c_str(), 0, true ); return 1; }
711 
712   const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
713   long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
714   if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
715     max_workers = INT_MAX / sizeof (pthread_t);
716 
717   int argind = 0;
718   for( ; argind < parser.arguments(); ++argind )
719     {
720     const int code = parser.code( argind );
721     if( !code ) break;					// no more options
722     const std::string & sarg = parser.argument( argind );
723     const char * const arg = sarg.c_str();
724     switch( code )
725       {
726       case '0': case '1': case '2': case '3': case '4':
727       case '5': case '6': case '7': case '8': case '9':
728                 encoder_options = option_mapping[code-'0']; break;
729       case 'a': ignore_trailing = false; break;
730       case 'b': break;
731       case 'B': data_size = getnum( arg, 2 * LZ_min_dictionary_size(),
732                                     2 * LZ_max_dictionary_size() ); break;
733       case 'c': to_stdout = true; break;
734       case 'd': set_mode( program_mode, m_decompress ); break;
735       case 'f': force = true; break;
736       case 'F': recompress = true; break;
737       case 'h': show_help( num_online ); return 0;
738       case 'k': keep_input_files = true; break;
739       case 'l': set_mode( program_mode, m_list ); break;
740       case 'm': encoder_options.match_len_limit =
741                   getnum( arg, LZ_min_match_len_limit(),
742                                LZ_max_match_len_limit() ); break;
743       case 'n': num_workers = getnum( arg, 1, max_workers ); break;
744       case 'o': if( sarg == "-" ) to_stdout = true;
745                 else { default_output_filename = sarg; } break;
746       case 'q': verbosity = -1; break;
747       case 's': encoder_options.dictionary_size = get_dict_size( arg );
748                 break;
749       case 'S': break;
750       case 't': set_mode( program_mode, m_test ); break;
751       case 'v': if( verbosity < 4 ) ++verbosity; break;
752       case 'V': show_version(); return 0;
753       case opt_chk: return check_lib();
754       case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
755       case opt_in: in_slots = getnum( arg, 1, 64 ); break;
756       case opt_lt: loose_trailing = true; break;
757       case opt_out: out_slots = getnum( arg, 1, 1024 ); break;
758       default : internal_error( "uncaught option." );
759       }
760     } // end process options
761 
762   if( LZ_version()[0] < '1' )
763     { show_error( "Wrong library version. At least lzlib 1.0 is required." );
764       return 1; }
765 
766 #if defined(__MSVCRT__) || defined(__OS2__)
767   setmode( STDIN_FILENO, O_BINARY );
768   setmode( STDOUT_FILENO, O_BINARY );
769 #endif
770 
771   bool filenames_given = false;
772   for( ; argind < parser.arguments(); ++argind )
773     {
774     filenames.push_back( parser.argument( argind ) );
775     if( filenames.back() != "-" ) filenames_given = true;
776     }
777   if( filenames.empty() ) filenames.push_back("-");
778 
779   if( program_mode == m_list )
780     return list_files( filenames, ignore_trailing, loose_trailing );
781 
782   const bool fast = encoder_options.dictionary_size == 65535 &&
783                     encoder_options.match_len_limit == 16;
784   if( data_size <= 0 )
785     {
786     if( fast ) data_size = 1 << 20;
787     else data_size = 2 * std::max( 65536, encoder_options.dictionary_size );
788     }
789   else if( !fast && data_size < encoder_options.dictionary_size )
790     encoder_options.dictionary_size =
791       std::max( data_size, LZ_min_dictionary_size() );
792 
793   if( num_workers <= 0 )
794     {
795     if( program_mode == m_compress && sizeof (void *) <= 4 )
796       {
797       // use less than 2.22 GiB on 32 bit systems
798       const long long limit = ( 27LL << 25 ) + ( 11LL << 27 );	// 4 * 568 MiB
799       const long long mem = ( 27LL * data_size ) / 8 +
800         ( fast ? 3LL << 19 : 11LL * encoder_options.dictionary_size );
801       const int nmax32 = std::max( limit / mem, 1LL );
802       if( max_workers > nmax32 ) max_workers = nmax32;
803       }
804     num_workers = std::min( num_online, max_workers );
805     }
806 
807   if( program_mode == m_test ) to_stdout = false;	// apply overrides
808   if( program_mode == m_test || to_stdout ) default_output_filename.clear();
809 
810   if( to_stdout && program_mode != m_test )	// check tty only once
811     { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
812   else outfd = -1;
813 
814   const bool to_file = !to_stdout && program_mode != m_test &&
815                        default_output_filename.size();
816   if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
817     set_signals( signal_handler );
818 
819   Pretty_print pp( filenames );
820 
821   int failed_tests = 0;
822   int retval = 0;
823   const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
824   bool stdin_used = false;
825   for( unsigned i = 0; i < filenames.size(); ++i )
826     {
827     std::string input_filename;
828     int infd;
829     struct stat in_stats;
830 
831     pp.set_name( filenames[i] );
832     if( filenames[i] == "-" )
833       {
834       if( stdin_used ) continue; else stdin_used = true;
835       infd = STDIN_FILENO;
836       if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
837       if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
838       }
839     else
840       {
841       const int eindex = extension_index( input_filename = filenames[i] );
842       infd = open_instream2( input_filename.c_str(), &in_stats, program_mode,
843                              eindex, one_to_one, recompress );
844       if( infd < 0 ) { set_retval( retval, 1 ); continue; }
845       if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
846       if( one_to_one )			// open outfd after verifying infd
847         {
848         if( program_mode == m_compress )
849           set_c_outname( input_filename, true, true );
850         else set_d_outname( input_filename, eindex );
851         if( !open_outstream( force, true ) )
852           { close( infd ); set_retval( retval, 1 ); continue; }
853         }
854       }
855 
856     if( one_to_one && !check_tty_out( program_mode ) )
857       { set_retval( retval, 1 ); return retval; }	// don't delete a tty
858 
859     if( to_file && outfd < 0 )		// open outfd after verifying infd
860       {
861       if( program_mode == m_compress ) set_c_outname( default_output_filename,
862                                        filenames_given, false );
863       else output_filename = default_output_filename;
864       if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
865         return 1;	// check tty only once and don't try to delete a tty
866       }
867 
868     const struct stat * const in_statsp =
869       ( input_filename.size() && one_to_one ) ? &in_stats : 0;
870     const bool infd_isreg = input_filename.size() && S_ISREG( in_stats.st_mode );
871     const unsigned long long cfile_size =
872       infd_isreg ? ( in_stats.st_size + 99 ) / 100 : 0;
873     int tmp;
874     if( program_mode == m_compress )
875       tmp = compress( cfile_size, data_size, encoder_options.dictionary_size,
876                       encoder_options.match_len_limit, num_workers,
877                       infd, outfd, pp, debug_level );
878     else
879       tmp = decompress( cfile_size, num_workers, infd, outfd, pp,
880                         debug_level, in_slots, out_slots, ignore_trailing,
881                         loose_trailing, infd_isreg, one_to_one );
882     if( close( infd ) != 0 )
883       { show_file_error( pp.name(), "Error closing input file", errno );
884         set_retval( tmp, 1 ); }
885     set_retval( retval, tmp );
886     if( tmp )
887       { if( program_mode != m_test ) cleanup_and_fail( retval );
888         else ++failed_tests; }
889 
890     if( delete_output_on_interrupt && one_to_one )
891       close_and_set_permissions( in_statsp );
892     if( input_filename.size() && !keep_input_files && one_to_one )
893       std::remove( input_filename.c_str() );
894     }
895   if( delete_output_on_interrupt ) close_and_set_permissions( 0 );	// -o
896   else if( outfd >= 0 && close( outfd ) != 0 )				// -c
897     {
898     show_error( "Error closing stdout", errno );
899     set_retval( retval, 1 );
900     }
901   if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
902     std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
903                   program_name, failed_tests,
904                   ( failed_tests == 1 ) ? "file" : "files" );
905   return retval;
906   }
907