1 /* Plzip - Massively parallel implementation of lzip
2 Copyright (C) 2009 Laszlo Ersek.
3 Copyright (C) 2009-2021 Antonio Diaz Diaz.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18 /*
19 Exit status: 0 for a normal exit, 1 for environmental problems
20 (file not found, invalid flags, I/O errors, etc), 2 to indicate a
21 corrupt or invalid input file, 3 for an internal consistency error
22 (eg, bug) which caused plzip to panic.
23 */
24
25 #define _FILE_OFFSET_BITS 64
26
27 #include <algorithm>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <string>
35 #include <vector>
36 #include <fcntl.h>
37 #include <stdint.h>
38 #include <unistd.h>
39 #include <utime.h>
40 #include <sys/stat.h>
41 #include <lzlib.h>
42 #if defined(__MSVCRT__) || defined(__OS2__)
43 #include <io.h>
44 #if defined(__MSVCRT__)
45 #define fchmod(x,y) 0
46 #define fchown(x,y,z) 0
47 #define strtoull std::strtoul
48 #define SIGHUP SIGTERM
49 #define S_ISSOCK(x) 0
50 #ifndef S_IRGRP
51 #define S_IRGRP 0
52 #define S_IWGRP 0
53 #define S_IROTH 0
54 #define S_IWOTH 0
55 #endif
56 #endif
57 #endif
58
59 #include "arg_parser.h"
60 #include "lzip.h"
61
62 #ifndef O_BINARY
63 #define O_BINARY 0
64 #endif
65
66 #if CHAR_BIT != 8
67 #error "Environments where CHAR_BIT != 8 are not supported."
68 #endif
69
70 int verbosity = 0;
71
72 namespace {
73
74 const char * const program_name = "plzip";
75 const char * const program_year = "2021";
76 const char * invocation_name = program_name; // default value
77
78 const struct { const char * from; const char * to; } known_extensions[] = {
79 { ".lz", "" },
80 { ".tlz", ".tar" },
81 { 0, 0 } };
82
83 struct Lzma_options
84 {
85 int dictionary_size; // 4 KiB .. 512 MiB
86 int match_len_limit; // 5 .. 273
87 };
88
89 enum Mode { m_compress, m_decompress, m_list, m_test };
90
91 /* Variables used in signal handler context.
92 They are not declared volatile because the handler never returns. */
93 std::string output_filename;
94 int outfd = -1;
95 bool delete_output_on_interrupt = false;
96
97
show_help(const long num_online)98 void show_help( const long num_online )
99 {
100 std::printf( "Plzip is a massively parallel (multi-threaded) implementation of lzip, fully\n"
101 "compatible with lzip 1.4 or newer. Plzip uses the compression library lzlib.\n"
102 "\nLzip is a lossless data compressor with a user interface similar to the one\n"
103 "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
104 "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
105 "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
106 "compress most files more than bzip2 (lzip -9). Decompression speed is\n"
107 "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
108 "a data recovery perspective. Lzip has been designed, written, and tested\n"
109 "with great care to replace gzip and bzip2 as the standard general-purpose\n"
110 "compressed format for unix-like systems.\n"
111 "\nPlzip can compress/decompress large files on multiprocessor machines much\n"
112 "faster than lzip, at the cost of a slightly reduced compression ratio (0.4\n"
113 "to 2 percent larger compressed files). Note that the number of usable\n"
114 "threads is limited by file size; on files larger than a few GB plzip can use\n"
115 "hundreds of processors, but on files of only a few MB plzip is no faster\n"
116 "than lzip.\n"
117 "\nUsage: %s [options] [files]\n", invocation_name );
118 std::printf( "\nOptions:\n"
119 " -h, --help display this help and exit\n"
120 " -V, --version output version information and exit\n"
121 " -a, --trailing-error exit with error status if trailing data\n"
122 " -B, --data-size=<bytes> set size of input data blocks [2x8=16 MiB]\n"
123 " -c, --stdout write to standard output, keep input files\n"
124 " -d, --decompress decompress\n"
125 " -f, --force overwrite existing output files\n"
126 " -F, --recompress force re-compression of compressed files\n"
127 " -k, --keep keep (don't delete) input files\n"
128 " -l, --list print (un)compressed file sizes\n"
129 " -m, --match-length=<bytes> set match length limit in bytes [36]\n"
130 " -n, --threads=<n> set number of (de)compression threads [%ld]\n"
131 " -o, --output=<file> write to <file>, keep input files\n"
132 " -q, --quiet suppress all messages\n"
133 " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n"
134 " -t, --test test compressed file integrity\n"
135 " -v, --verbose be verbose (a 2nd -v gives more)\n"
136 " -0 .. -9 set compression level [default 6]\n"
137 " --fast alias for -0\n"
138 " --best alias for -9\n"
139 " --loose-trailing allow trailing data seeming corrupt header\n"
140 " --in-slots=<n> number of 1 MiB input packets buffered [4]\n"
141 " --out-slots=<n> number of 1 MiB output packets buffered [64]\n"
142 " --check-lib compare version of lzlib.h with liblz.{a,so}\n",
143 num_online );
144 if( verbosity >= 1 )
145 {
146 std::printf( " --debug=<level> print mode(2), debug statistics(1) to stderr\n" );
147 }
148 std::printf( "\nIf no file names are given, or if a file is '-', plzip compresses or\n"
149 "decompresses from standard input to standard output.\n"
150 "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
151 "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
152 "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
153 "to 2^29 bytes.\n"
154 "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
155 "scale optimal for all files. If your files are large, very repetitive,\n"
156 "etc, you may need to use the options --dictionary-size and --match-length\n"
157 "directly to achieve optimal performance.\n"
158 "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
159 "'tar -xf foo.tar.lz' or 'plzip -cd foo.tar.lz | tar -xf -'.\n"
160 "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
161 "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
162 "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
163 "caused plzip to panic.\n"
164 "\nReport bugs to lzip-bug@nongnu.org\n"
165 "Plzip home page: http://www.nongnu.org/lzip/plzip.html\n" );
166 }
167
168
show_version()169 void show_version()
170 {
171 std::printf( "%s %s\n", program_name, PROGVERSION );
172 std::printf( "Copyright (C) 2009 Laszlo Ersek.\n" );
173 std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
174 std::printf( "Using lzlib %s\n", LZ_version() );
175 std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
176 "This is free software: you are free to change and redistribute it.\n"
177 "There is NO WARRANTY, to the extent permitted by law.\n" );
178 }
179
180
check_lib()181 int check_lib()
182 {
183 bool warning = false;
184 if( std::strcmp( LZ_version_string, LZ_version() ) != 0 )
185 { warning = true;
186 if( verbosity >= 0 )
187 std::printf( "warning: LZ_version_string != LZ_version() (%s vs %s)\n",
188 LZ_version_string, LZ_version() ); }
189 #if defined LZ_API_VERSION && LZ_API_VERSION >= 1012
190 if( LZ_API_VERSION != LZ_api_version() )
191 { warning = true;
192 if( verbosity >= 0 )
193 std::printf( "warning: LZ_API_VERSION != LZ_api_version() (%u vs %u)\n",
194 LZ_API_VERSION, LZ_api_version() ); }
195 #endif
196 if( verbosity >= 1 )
197 {
198 std::printf( "Using lzlib %s\n", LZ_version() );
199 #if !defined LZ_API_VERSION
200 std::fputs( "LZ_API_VERSION is not defined.\n", stdout );
201 #elif LZ_API_VERSION >= 1012
202 std::printf( "Using LZ_API_VERSION = %u\n", LZ_api_version() );
203 #else
204 std::printf( "Compiled with LZ_API_VERSION = %u. "
205 "Using an unknown LZ_API_VERSION\n", LZ_API_VERSION );
206 #endif
207 }
208 return warning;
209 }
210
211 } // end namespace
212
operator ()(const char * const msg) const213 void Pretty_print::operator()( const char * const msg ) const
214 {
215 if( verbosity >= 0 )
216 {
217 if( first_post )
218 {
219 first_post = false;
220 std::fputs( padded_name.c_str(), stderr );
221 if( !msg ) std::fflush( stderr );
222 }
223 if( msg ) std::fprintf( stderr, "%s\n", msg );
224 }
225 }
226
227
bad_version(const unsigned version)228 const char * bad_version( const unsigned version )
229 {
230 static char buf[80];
231 snprintf( buf, sizeof buf, "Version %u member format not supported.",
232 version );
233 return buf;
234 }
235
236
format_ds(const unsigned dictionary_size)237 const char * format_ds( const unsigned dictionary_size )
238 {
239 enum { bufsize = 16, factor = 1024 };
240 static char buf[bufsize];
241 const char * const prefix[8] =
242 { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
243 const char * p = "";
244 const char * np = " ";
245 unsigned num = dictionary_size;
246 bool exact = ( num % factor == 0 );
247
248 for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
249 { num /= factor; if( num % factor != 0 ) exact = false;
250 p = prefix[i]; np = ""; }
251 snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
252 return buf;
253 }
254
255
show_header(const unsigned dictionary_size)256 void show_header( const unsigned dictionary_size )
257 {
258 std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
259 }
260
261 namespace {
262
getnum(const char * const ptr,const unsigned long long llimit,const unsigned long long ulimit)263 unsigned long long getnum( const char * const ptr,
264 const unsigned long long llimit,
265 const unsigned long long ulimit )
266 {
267 char * tail;
268 errno = 0;
269 unsigned long long result = strtoull( ptr, &tail, 0 );
270 if( tail == ptr )
271 {
272 show_error( "Bad or missing numerical argument.", 0, true );
273 std::exit( 1 );
274 }
275
276 if( !errno && tail[0] )
277 {
278 const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000;
279 int exponent = 0; // 0 = bad multiplier
280 switch( tail[0] )
281 {
282 case 'Y': exponent = 8; break;
283 case 'Z': exponent = 7; break;
284 case 'E': exponent = 6; break;
285 case 'P': exponent = 5; break;
286 case 'T': exponent = 4; break;
287 case 'G': exponent = 3; break;
288 case 'M': exponent = 2; break;
289 case 'K': if( factor == 1024 ) exponent = 1; break;
290 case 'k': if( factor == 1000 ) exponent = 1; break;
291 }
292 if( exponent <= 0 )
293 {
294 show_error( "Bad multiplier in numerical argument.", 0, true );
295 std::exit( 1 );
296 }
297 for( int i = 0; i < exponent; ++i )
298 {
299 if( ulimit / factor >= result ) result *= factor;
300 else { errno = ERANGE; break; }
301 }
302 }
303 if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
304 if( errno )
305 {
306 show_error( "Numerical argument out of limits." );
307 std::exit( 1 );
308 }
309 return result;
310 }
311
312
get_dict_size(const char * const arg)313 int get_dict_size( const char * const arg )
314 {
315 char * tail;
316 const long bits = std::strtol( arg, &tail, 0 );
317 if( bits >= LZ_min_dictionary_bits() &&
318 bits <= LZ_max_dictionary_bits() && *tail == 0 )
319 return 1 << bits;
320 int dictionary_size = getnum( arg, LZ_min_dictionary_size(),
321 LZ_max_dictionary_size() );
322 if( dictionary_size == 65535 ) ++dictionary_size; // no fast encoder
323 return dictionary_size;
324 }
325
326
set_mode(Mode & program_mode,const Mode new_mode)327 void set_mode( Mode & program_mode, const Mode new_mode )
328 {
329 if( program_mode != m_compress && program_mode != new_mode )
330 {
331 show_error( "Only one operation can be specified.", 0, true );
332 std::exit( 1 );
333 }
334 program_mode = new_mode;
335 }
336
337
extension_index(const std::string & name)338 int extension_index( const std::string & name )
339 {
340 for( int eindex = 0; known_extensions[eindex].from; ++eindex )
341 {
342 const std::string ext( known_extensions[eindex].from );
343 if( name.size() > ext.size() &&
344 name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
345 return eindex;
346 }
347 return -1;
348 }
349
350
set_c_outname(const std::string & name,const bool filenames_given,const bool force_ext)351 void set_c_outname( const std::string & name, const bool filenames_given,
352 const bool force_ext )
353 {
354 /* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when
355 reading from standard input. */
356 output_filename = name;
357 if( force_ext ||
358 ( !filenames_given && extension_index( output_filename ) < 0 ) )
359 output_filename += known_extensions[0].from;
360 }
361
362
set_d_outname(const std::string & name,const int eindex)363 void set_d_outname( const std::string & name, const int eindex )
364 {
365 if( eindex >= 0 )
366 {
367 const std::string from( known_extensions[eindex].from );
368 if( name.size() > from.size() )
369 {
370 output_filename.assign( name, 0, name.size() - from.size() );
371 output_filename += known_extensions[eindex].to;
372 return;
373 }
374 }
375 output_filename = name; output_filename += ".out";
376 if( verbosity >= 1 )
377 std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
378 program_name, name.c_str(), output_filename.c_str() );
379 }
380
381 } // end namespace
382
open_instream(const char * const name,struct stat * const in_statsp,const bool one_to_one,const bool reg_only)383 int open_instream( const char * const name, struct stat * const in_statsp,
384 const bool one_to_one, const bool reg_only )
385 {
386 int infd = open( name, O_RDONLY | O_BINARY );
387 if( infd < 0 )
388 show_file_error( name, "Can't open input file", errno );
389 else
390 {
391 const int i = fstat( infd, in_statsp );
392 const mode_t mode = in_statsp->st_mode;
393 const bool can_read = ( i == 0 && !reg_only &&
394 ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
395 S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
396 if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
397 {
398 if( verbosity >= 0 )
399 std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
400 program_name, name, ( can_read && one_to_one ) ?
401 ",\n and neither '-c' nor '-o' were specified" : "" );
402 close( infd );
403 infd = -1;
404 }
405 }
406 return infd;
407 }
408
409 namespace {
410
open_instream2(const char * const name,struct stat * const in_statsp,const Mode program_mode,const int eindex,const bool one_to_one,const bool recompress)411 int open_instream2( const char * const name, struct stat * const in_statsp,
412 const Mode program_mode, const int eindex,
413 const bool one_to_one, const bool recompress )
414 {
415 if( program_mode == m_compress && !recompress && eindex >= 0 )
416 {
417 if( verbosity >= 0 )
418 std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
419 program_name, name, known_extensions[eindex].from );
420 return -1;
421 }
422 return open_instream( name, in_statsp, one_to_one, false );
423 }
424
425
open_outstream(const bool force,const bool protect)426 bool open_outstream( const bool force, const bool protect )
427 {
428 const mode_t usr_rw = S_IRUSR | S_IWUSR;
429 const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
430 const mode_t outfd_mode = protect ? usr_rw : all_rw;
431 int flags = O_CREAT | O_WRONLY | O_BINARY;
432 if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
433
434 outfd = open( output_filename.c_str(), flags, outfd_mode );
435 if( outfd >= 0 ) delete_output_on_interrupt = true;
436 else if( verbosity >= 0 )
437 {
438 if( errno == EEXIST )
439 std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
440 program_name, output_filename.c_str() );
441 else
442 std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
443 program_name, output_filename.c_str(), std::strerror( errno ) );
444 }
445 return ( outfd >= 0 );
446 }
447
448
set_signals(void (* action)(int))449 void set_signals( void (*action)(int) )
450 {
451 std::signal( SIGHUP, action );
452 std::signal( SIGINT, action );
453 std::signal( SIGTERM, action );
454 }
455
456 } // end namespace
457
458 /* This can be called from any thread, main thread or sub-threads alike,
459 since they all call common helper functions like 'xlock' that call
460 cleanup_and_fail() in case of an error.
461 */
cleanup_and_fail(const int retval)462 void cleanup_and_fail( const int retval )
463 {
464 // only one thread can delete and exit
465 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
466
467 set_signals( SIG_IGN ); // ignore signals
468 pthread_mutex_lock( &mutex ); // ignore errors to avoid loop
469 const int saved_verbosity = verbosity;
470 verbosity = -1; // suppress messages from other threads
471 if( delete_output_on_interrupt )
472 {
473 delete_output_on_interrupt = false;
474 if( saved_verbosity >= 0 )
475 std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
476 program_name, output_filename.c_str() );
477 if( outfd >= 0 ) { close( outfd ); outfd = -1; }
478 if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT &&
479 saved_verbosity >= 0 )
480 std::fprintf( stderr, "%s: WARNING: deletion of output file "
481 "(apparently) failed.\n", program_name );
482 }
483 std::exit( retval );
484 }
485
486 namespace {
487
signal_handler(int)488 extern "C" void signal_handler( int )
489 {
490 show_error( "Control-C or similar caught, quitting." );
491 cleanup_and_fail( 1 );
492 }
493
494
check_tty_in(const char * const input_filename,const int infd,const Mode program_mode,int & retval)495 bool check_tty_in( const char * const input_filename, const int infd,
496 const Mode program_mode, int & retval )
497 {
498 if( ( program_mode == m_decompress || program_mode == m_test ) &&
499 isatty( infd ) ) // for example /dev/tty
500 { show_file_error( input_filename,
501 "I won't read compressed data from a terminal." );
502 close( infd ); set_retval( retval, 1 );
503 if( program_mode != m_test ) cleanup_and_fail( retval );
504 return false; }
505 return true;
506 }
507
check_tty_out(const Mode program_mode)508 bool check_tty_out( const Mode program_mode )
509 {
510 if( program_mode == m_compress && isatty( outfd ) )
511 { show_file_error( output_filename.size() ?
512 output_filename.c_str() : "(stdout)",
513 "I won't write compressed data to a terminal." );
514 return false; }
515 return true;
516 }
517
518
519 // Set permissions, owner, and times.
close_and_set_permissions(const struct stat * const in_statsp)520 void close_and_set_permissions( const struct stat * const in_statsp )
521 {
522 bool warning = false;
523 if( in_statsp )
524 {
525 const mode_t mode = in_statsp->st_mode;
526 // fchown will in many cases return with EPERM, which can be safely ignored.
527 if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
528 { if( fchmod( outfd, mode ) != 0 ) warning = true; }
529 else
530 if( errno != EPERM ||
531 fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
532 warning = true;
533 }
534 if( close( outfd ) != 0 )
535 {
536 show_error( "Error closing output file", errno );
537 cleanup_and_fail( 1 );
538 }
539 outfd = -1;
540 delete_output_on_interrupt = false;
541 if( in_statsp )
542 {
543 struct utimbuf t;
544 t.actime = in_statsp->st_atime;
545 t.modtime = in_statsp->st_mtime;
546 if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
547 }
548 if( warning && verbosity >= 1 )
549 show_error( "Can't change output file attributes." );
550 }
551
552 } // end namespace
553
554
show_error(const char * const msg,const int errcode,const bool help)555 void show_error( const char * const msg, const int errcode, const bool help )
556 {
557 if( verbosity < 0 ) return;
558 if( msg && msg[0] )
559 std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
560 ( errcode > 0 ) ? ": " : "",
561 ( errcode > 0 ) ? std::strerror( errcode ) : "" );
562 if( help )
563 std::fprintf( stderr, "Try '%s --help' for more information.\n",
564 invocation_name );
565 }
566
567
show_file_error(const char * const filename,const char * const msg,const int errcode)568 void show_file_error( const char * const filename, const char * const msg,
569 const int errcode )
570 {
571 if( verbosity >= 0 )
572 std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
573 ( errcode > 0 ) ? ": " : "",
574 ( errcode > 0 ) ? std::strerror( errcode ) : "" );
575 }
576
577
internal_error(const char * const msg)578 void internal_error( const char * const msg )
579 {
580 if( verbosity >= 0 )
581 std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
582 std::exit( 3 );
583 }
584
585
show_progress(const unsigned long long packet_size,const unsigned long long cfile_size,const Pretty_print * const p)586 void show_progress( const unsigned long long packet_size,
587 const unsigned long long cfile_size,
588 const Pretty_print * const p )
589 {
590 static unsigned long long csize = 0; // file_size / 100
591 static unsigned long long pos = 0;
592 static const Pretty_print * pp = 0;
593 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
594 static bool enabled = true;
595
596 if( !enabled ) return;
597 if( p ) // initialize static vars
598 {
599 if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
600 csize = cfile_size; pos = 0; pp = p;
601 }
602 if( pp )
603 {
604 xlock( &mutex );
605 pos += packet_size;
606 if( csize > 0 )
607 std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 );
608 else
609 std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
610 pp->reset(); (*pp)(); // restore cursor position
611 xunlock( &mutex );
612 }
613 }
614
615
616 #if defined(__MSVCRT__)
617 #include <windows.h>
618 #define _SC_NPROCESSORS_ONLN 1
619 #define _SC_THREAD_THREADS_MAX 2
620
sysconf(int flag)621 long sysconf( int flag )
622 {
623 if( flag == _SC_NPROCESSORS_ONLN )
624 {
625 SYSTEM_INFO si;
626 GetSystemInfo( &si );
627 return si.dwNumberOfProcessors;
628 }
629 if( flag != _SC_THREAD_THREADS_MAX ) errno = EINVAL;
630 return -1; // unlimited threads or error
631 }
632
633 #endif // __MSVCRT__
634
635
main(const int argc,const char * const argv[])636 int main( const int argc, const char * const argv[] )
637 {
638 /* Mapping from gzip/bzip2 style 1..9 compression modes
639 to the corresponding LZMA compression modes. */
640 const Lzma_options option_mapping[] =
641 {
642 { 65535, 16 }, // -0 (65535,16 chooses fast encoder)
643 { 1 << 20, 5 }, // -1
644 { 3 << 19, 6 }, // -2
645 { 1 << 21, 8 }, // -3
646 { 3 << 20, 12 }, // -4
647 { 1 << 22, 20 }, // -5
648 { 1 << 23, 36 }, // -6
649 { 1 << 24, 68 }, // -7
650 { 3 << 23, 132 }, // -8
651 { 1 << 25, 273 } }; // -9
652 Lzma_options encoder_options = option_mapping[6]; // default = "-6"
653 std::string default_output_filename;
654 std::vector< std::string > filenames;
655 int data_size = 0;
656 int debug_level = 0;
657 int num_workers = 0; // start this many worker threads
658 int in_slots = 4;
659 int out_slots = 64;
660 Mode program_mode = m_compress;
661 bool force = false;
662 bool ignore_trailing = true;
663 bool keep_input_files = false;
664 bool loose_trailing = false;
665 bool recompress = false;
666 bool to_stdout = false;
667 if( argc > 0 ) invocation_name = argv[0];
668
669 enum { opt_chk = 256, opt_dbg, opt_in, opt_lt, opt_out };
670 const Arg_parser::Option options[] =
671 {
672 { '0', "fast", Arg_parser::no },
673 { '1', 0, Arg_parser::no },
674 { '2', 0, Arg_parser::no },
675 { '3', 0, Arg_parser::no },
676 { '4', 0, Arg_parser::no },
677 { '5', 0, Arg_parser::no },
678 { '6', 0, Arg_parser::no },
679 { '7', 0, Arg_parser::no },
680 { '8', 0, Arg_parser::no },
681 { '9', "best", Arg_parser::no },
682 { 'a', "trailing-error", Arg_parser::no },
683 { 'b', "member-size", Arg_parser::yes },
684 { 'B', "data-size", Arg_parser::yes },
685 { 'c', "stdout", Arg_parser::no },
686 { 'd', "decompress", Arg_parser::no },
687 { 'f', "force", Arg_parser::no },
688 { 'F', "recompress", Arg_parser::no },
689 { 'h', "help", Arg_parser::no },
690 { 'k', "keep", Arg_parser::no },
691 { 'l', "list", Arg_parser::no },
692 { 'm', "match-length", Arg_parser::yes },
693 { 'n', "threads", Arg_parser::yes },
694 { 'o', "output", Arg_parser::yes },
695 { 'q', "quiet", Arg_parser::no },
696 { 's', "dictionary-size", Arg_parser::yes },
697 { 'S', "volume-size", Arg_parser::yes },
698 { 't', "test", Arg_parser::no },
699 { 'v', "verbose", Arg_parser::no },
700 { 'V', "version", Arg_parser::no },
701 { opt_chk, "check-lib", Arg_parser::no },
702 { opt_dbg, "debug", Arg_parser::yes },
703 { opt_in, "in-slots", Arg_parser::yes },
704 { opt_lt, "loose-trailing", Arg_parser::no },
705 { opt_out, "out-slots", Arg_parser::yes },
706 { 0, 0, Arg_parser::no } };
707
708 const Arg_parser parser( argc, argv, options );
709 if( parser.error().size() ) // bad option
710 { show_error( parser.error().c_str(), 0, true ); return 1; }
711
712 const long num_online = std::max( 1L, sysconf( _SC_NPROCESSORS_ONLN ) );
713 long max_workers = sysconf( _SC_THREAD_THREADS_MAX );
714 if( max_workers < 1 || max_workers > INT_MAX / (int)sizeof (pthread_t) )
715 max_workers = INT_MAX / sizeof (pthread_t);
716
717 int argind = 0;
718 for( ; argind < parser.arguments(); ++argind )
719 {
720 const int code = parser.code( argind );
721 if( !code ) break; // no more options
722 const std::string & sarg = parser.argument( argind );
723 const char * const arg = sarg.c_str();
724 switch( code )
725 {
726 case '0': case '1': case '2': case '3': case '4':
727 case '5': case '6': case '7': case '8': case '9':
728 encoder_options = option_mapping[code-'0']; break;
729 case 'a': ignore_trailing = false; break;
730 case 'b': break;
731 case 'B': data_size = getnum( arg, 2 * LZ_min_dictionary_size(),
732 2 * LZ_max_dictionary_size() ); break;
733 case 'c': to_stdout = true; break;
734 case 'd': set_mode( program_mode, m_decompress ); break;
735 case 'f': force = true; break;
736 case 'F': recompress = true; break;
737 case 'h': show_help( num_online ); return 0;
738 case 'k': keep_input_files = true; break;
739 case 'l': set_mode( program_mode, m_list ); break;
740 case 'm': encoder_options.match_len_limit =
741 getnum( arg, LZ_min_match_len_limit(),
742 LZ_max_match_len_limit() ); break;
743 case 'n': num_workers = getnum( arg, 1, max_workers ); break;
744 case 'o': if( sarg == "-" ) to_stdout = true;
745 else { default_output_filename = sarg; } break;
746 case 'q': verbosity = -1; break;
747 case 's': encoder_options.dictionary_size = get_dict_size( arg );
748 break;
749 case 'S': break;
750 case 't': set_mode( program_mode, m_test ); break;
751 case 'v': if( verbosity < 4 ) ++verbosity; break;
752 case 'V': show_version(); return 0;
753 case opt_chk: return check_lib();
754 case opt_dbg: debug_level = getnum( arg, 0, 3 ); break;
755 case opt_in: in_slots = getnum( arg, 1, 64 ); break;
756 case opt_lt: loose_trailing = true; break;
757 case opt_out: out_slots = getnum( arg, 1, 1024 ); break;
758 default : internal_error( "uncaught option." );
759 }
760 } // end process options
761
762 if( LZ_version()[0] < '1' )
763 { show_error( "Wrong library version. At least lzlib 1.0 is required." );
764 return 1; }
765
766 #if defined(__MSVCRT__) || defined(__OS2__)
767 setmode( STDIN_FILENO, O_BINARY );
768 setmode( STDOUT_FILENO, O_BINARY );
769 #endif
770
771 bool filenames_given = false;
772 for( ; argind < parser.arguments(); ++argind )
773 {
774 filenames.push_back( parser.argument( argind ) );
775 if( filenames.back() != "-" ) filenames_given = true;
776 }
777 if( filenames.empty() ) filenames.push_back("-");
778
779 if( program_mode == m_list )
780 return list_files( filenames, ignore_trailing, loose_trailing );
781
782 const bool fast = encoder_options.dictionary_size == 65535 &&
783 encoder_options.match_len_limit == 16;
784 if( data_size <= 0 )
785 {
786 if( fast ) data_size = 1 << 20;
787 else data_size = 2 * std::max( 65536, encoder_options.dictionary_size );
788 }
789 else if( !fast && data_size < encoder_options.dictionary_size )
790 encoder_options.dictionary_size =
791 std::max( data_size, LZ_min_dictionary_size() );
792
793 if( num_workers <= 0 )
794 {
795 if( program_mode == m_compress && sizeof (void *) <= 4 )
796 {
797 // use less than 2.22 GiB on 32 bit systems
798 const long long limit = ( 27LL << 25 ) + ( 11LL << 27 ); // 4 * 568 MiB
799 const long long mem = ( 27LL * data_size ) / 8 +
800 ( fast ? 3LL << 19 : 11LL * encoder_options.dictionary_size );
801 const int nmax32 = std::max( limit / mem, 1LL );
802 if( max_workers > nmax32 ) max_workers = nmax32;
803 }
804 num_workers = std::min( num_online, max_workers );
805 }
806
807 if( program_mode == m_test ) to_stdout = false; // apply overrides
808 if( program_mode == m_test || to_stdout ) default_output_filename.clear();
809
810 if( to_stdout && program_mode != m_test ) // check tty only once
811 { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
812 else outfd = -1;
813
814 const bool to_file = !to_stdout && program_mode != m_test &&
815 default_output_filename.size();
816 if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
817 set_signals( signal_handler );
818
819 Pretty_print pp( filenames );
820
821 int failed_tests = 0;
822 int retval = 0;
823 const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
824 bool stdin_used = false;
825 for( unsigned i = 0; i < filenames.size(); ++i )
826 {
827 std::string input_filename;
828 int infd;
829 struct stat in_stats;
830
831 pp.set_name( filenames[i] );
832 if( filenames[i] == "-" )
833 {
834 if( stdin_used ) continue; else stdin_used = true;
835 infd = STDIN_FILENO;
836 if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
837 if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
838 }
839 else
840 {
841 const int eindex = extension_index( input_filename = filenames[i] );
842 infd = open_instream2( input_filename.c_str(), &in_stats, program_mode,
843 eindex, one_to_one, recompress );
844 if( infd < 0 ) { set_retval( retval, 1 ); continue; }
845 if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
846 if( one_to_one ) // open outfd after verifying infd
847 {
848 if( program_mode == m_compress )
849 set_c_outname( input_filename, true, true );
850 else set_d_outname( input_filename, eindex );
851 if( !open_outstream( force, true ) )
852 { close( infd ); set_retval( retval, 1 ); continue; }
853 }
854 }
855
856 if( one_to_one && !check_tty_out( program_mode ) )
857 { set_retval( retval, 1 ); return retval; } // don't delete a tty
858
859 if( to_file && outfd < 0 ) // open outfd after verifying infd
860 {
861 if( program_mode == m_compress ) set_c_outname( default_output_filename,
862 filenames_given, false );
863 else output_filename = default_output_filename;
864 if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
865 return 1; // check tty only once and don't try to delete a tty
866 }
867
868 const struct stat * const in_statsp =
869 ( input_filename.size() && one_to_one ) ? &in_stats : 0;
870 const bool infd_isreg = input_filename.size() && S_ISREG( in_stats.st_mode );
871 const unsigned long long cfile_size =
872 infd_isreg ? ( in_stats.st_size + 99 ) / 100 : 0;
873 int tmp;
874 if( program_mode == m_compress )
875 tmp = compress( cfile_size, data_size, encoder_options.dictionary_size,
876 encoder_options.match_len_limit, num_workers,
877 infd, outfd, pp, debug_level );
878 else
879 tmp = decompress( cfile_size, num_workers, infd, outfd, pp,
880 debug_level, in_slots, out_slots, ignore_trailing,
881 loose_trailing, infd_isreg, one_to_one );
882 if( close( infd ) != 0 )
883 { show_file_error( pp.name(), "Error closing input file", errno );
884 set_retval( tmp, 1 ); }
885 set_retval( retval, tmp );
886 if( tmp )
887 { if( program_mode != m_test ) cleanup_and_fail( retval );
888 else ++failed_tests; }
889
890 if( delete_output_on_interrupt && one_to_one )
891 close_and_set_permissions( in_statsp );
892 if( input_filename.size() && !keep_input_files && one_to_one )
893 std::remove( input_filename.c_str() );
894 }
895 if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o
896 else if( outfd >= 0 && close( outfd ) != 0 ) // -c
897 {
898 show_error( "Error closing stdout", errno );
899 set_retval( retval, 1 );
900 }
901 if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
902 std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
903 program_name, failed_tests,
904 ( failed_tests == 1 ) ? "file" : "files" );
905 return retval;
906 }
907