1 /* Lzip - LZMA lossless data compressor
2 Copyright (C) 2008-2021 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17 /*
18 Exit status: 0 for a normal exit, 1 for environmental problems
19 (file not found, invalid flags, I/O errors, etc), 2 to indicate a
20 corrupt or invalid input file, 3 for an internal consistency error
21 (eg, bug) which caused lzip to panic.
22 */
23
24 #define _FILE_OFFSET_BITS 64
25
26 #include <algorithm>
27 #include <cctype>
28 #include <cerrno>
29 #include <climits>
30 #include <csignal>
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cstring>
34 #include <new>
35 #include <string>
36 #include <vector>
37 #include <fcntl.h>
38 #include <stdint.h>
39 #include <unistd.h>
40 #include <utime.h>
41 #include <sys/stat.h>
42 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
43 #include <io.h>
44 #if defined(__MSVCRT__)
45 #define fchmod(x,y) 0
46 #define fchown(x,y,z) 0
47 #define strtoull std::strtoul
48 #define SIGHUP SIGTERM
49 #define S_ISSOCK(x) 0
50 #ifndef S_IRGRP
51 #define S_IRGRP 0
52 #define S_IWGRP 0
53 #define S_IROTH 0
54 #define S_IWOTH 0
55 #endif
56 #endif
57 #if defined(__DJGPP__)
58 #define S_ISSOCK(x) 0
59 #define S_ISVTX 0
60 #endif
61 #endif
62
63 #include "arg_parser.h"
64 #include "lzip.h"
65 #include "decoder.h"
66 #include "encoder_base.h"
67 #include "encoder.h"
68 #include "fast_encoder.h"
69
70 #ifndef O_BINARY
71 #define O_BINARY 0
72 #endif
73
74 #if CHAR_BIT != 8
75 #error "Environments where CHAR_BIT != 8 are not supported."
76 #endif
77
78 int verbosity = 0;
79
80 namespace {
81
82 const char * const program_name = "lzip";
83 const char * const program_year = "2021";
84 const char * invocation_name = program_name; // default value
85
86 const struct { const char * from; const char * to; } known_extensions[] = {
87 { ".lz", "" },
88 { ".tlz", ".tar" },
89 { 0, 0 } };
90
91 struct Lzma_options
92 {
93 int dictionary_size; // 4 KiB .. 512 MiB
94 int match_len_limit; // 5 .. 273
95 };
96
97 enum Mode { m_compress, m_decompress, m_list, m_test };
98
99 /* Variables used in signal handler context.
100 They are not declared volatile because the handler never returns. */
101 std::string output_filename;
102 int outfd = -1;
103 bool delete_output_on_interrupt = false;
104
105
show_help()106 void show_help()
107 {
108 std::printf( "Lzip is a lossless data compressor with a user interface similar to the one\n"
109 "of gzip or bzip2. Lzip uses a simplified form of the 'Lempel-Ziv-Markov\n"
110 "chain-Algorithm' (LZMA) stream format, chosen to maximize safety and\n"
111 "interoperability. Lzip can compress about as fast as gzip (lzip -0) or\n"
112 "compress most files more than bzip2 (lzip -9). Decompression speed is\n"
113 "intermediate between gzip and bzip2. Lzip is better than gzip and bzip2 from\n"
114 "a data recovery perspective. Lzip has been designed, written, and tested\n"
115 "with great care to replace gzip and bzip2 as the standard general-purpose\n"
116 "compressed format for unix-like systems.\n"
117 "\nUsage: %s [options] [files]\n", invocation_name );
118 std::printf( "\nOptions:\n"
119 " -h, --help display this help and exit\n"
120 " -V, --version output version information and exit\n"
121 " -a, --trailing-error exit with error status if trailing data\n"
122 " -b, --member-size=<bytes> set member size limit in bytes\n"
123 " -c, --stdout write to standard output, keep input files\n"
124 " -d, --decompress decompress\n"
125 " -f, --force overwrite existing output files\n"
126 " -F, --recompress force re-compression of compressed files\n"
127 " -k, --keep keep (don't delete) input files\n"
128 " -l, --list print (un)compressed file sizes\n"
129 " -m, --match-length=<bytes> set match length limit in bytes [36]\n"
130 " -o, --output=<file> write to <file>, keep input files\n"
131 " -q, --quiet suppress all messages\n"
132 " -s, --dictionary-size=<bytes> set dictionary size limit in bytes [8 MiB]\n"
133 " -S, --volume-size=<bytes> set volume size limit in bytes\n"
134 " -t, --test test compressed file integrity\n"
135 " -v, --verbose be verbose (a 2nd -v gives more)\n"
136 " -0 .. -9 set compression level [default 6]\n"
137 " --fast alias for -0\n"
138 " --best alias for -9\n"
139 " --loose-trailing allow trailing data seeming corrupt header\n"
140 "\nIf no file names are given, or if a file is '-', lzip compresses or\n"
141 "decompresses from standard input to standard output.\n"
142 "Numbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
143 "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n"
144 "Dictionary sizes 12 to 29 are interpreted as powers of two, meaning 2^12\n"
145 "to 2^29 bytes.\n"
146 "\nThe bidimensional parameter space of LZMA can't be mapped to a linear\n"
147 "scale optimal for all files. If your files are large, very repetitive,\n"
148 "etc, you may need to use the options --dictionary-size and --match-length\n"
149 "directly to achieve optimal performance.\n"
150 "\nTo extract all the files from archive 'foo.tar.lz', use the commands\n"
151 "'tar -xf foo.tar.lz' or 'lzip -cd foo.tar.lz | tar -xf -'.\n"
152 "\nExit status: 0 for a normal exit, 1 for environmental problems (file\n"
153 "not found, invalid flags, I/O errors, etc), 2 to indicate a corrupt or\n"
154 "invalid input file, 3 for an internal consistency error (eg, bug) which\n"
155 "caused lzip to panic.\n"
156 "\nThe ideas embodied in lzip are due to (at least) the following people:\n"
157 "Abraham Lempel and Jacob Ziv (for the LZ algorithm), Andrey Markov (for the\n"
158 "definition of Markov chains), G.N.N. Martin (for the definition of range\n"
159 "encoding), Igor Pavlov (for putting all the above together in LZMA), and\n"
160 "Julian Seward (for bzip2's CLI).\n"
161 "\nReport bugs to lzip-bug@nongnu.org\n"
162 "Lzip home page: http://www.nongnu.org/lzip/lzip.html\n" );
163 }
164
165
show_version()166 void show_version()
167 {
168 std::printf( "%s %s\n", program_name, PROGVERSION );
169 std::printf( "Copyright (C) %s Antonio Diaz Diaz.\n", program_year );
170 std::printf( "License GPLv2+: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html>\n"
171 "This is free software: you are free to change and redistribute it.\n"
172 "There is NO WARRANTY, to the extent permitted by law.\n" );
173 }
174
175 } // end namespace
176
operator ()(const char * const msg) const177 void Pretty_print::operator()( const char * const msg ) const
178 {
179 if( verbosity >= 0 )
180 {
181 if( first_post )
182 {
183 first_post = false;
184 std::fputs( padded_name.c_str(), stderr );
185 if( !msg ) std::fflush( stderr );
186 }
187 if( msg ) std::fprintf( stderr, "%s\n", msg );
188 }
189 }
190
191
bad_version(const unsigned version)192 const char * bad_version( const unsigned version )
193 {
194 static char buf[80];
195 snprintf( buf, sizeof buf, "Version %u member format not supported.",
196 version );
197 return buf;
198 }
199
200
format_ds(const unsigned dictionary_size)201 const char * format_ds( const unsigned dictionary_size )
202 {
203 enum { bufsize = 16, factor = 1024 };
204 static char buf[bufsize];
205 const char * const prefix[8] =
206 { "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi" };
207 const char * p = "";
208 const char * np = " ";
209 unsigned num = dictionary_size;
210 bool exact = ( num % factor == 0 );
211
212 for( int i = 0; i < 8 && ( num > 9999 || ( exact && num >= factor ) ); ++i )
213 { num /= factor; if( num % factor != 0 ) exact = false;
214 p = prefix[i]; np = ""; }
215 snprintf( buf, bufsize, "%s%4u %sB", np, num, p );
216 return buf;
217 }
218
219
show_header(const unsigned dictionary_size)220 void show_header( const unsigned dictionary_size )
221 {
222 std::fprintf( stderr, "dict %s, ", format_ds( dictionary_size ) );
223 }
224
225 namespace {
226
getnum(const char * const ptr,const unsigned long long llimit,const unsigned long long ulimit)227 unsigned long long getnum( const char * const ptr,
228 const unsigned long long llimit,
229 const unsigned long long ulimit )
230 {
231 char * tail;
232 errno = 0;
233 unsigned long long result = strtoull( ptr, &tail, 0 );
234 if( tail == ptr )
235 {
236 show_error( "Bad or missing numerical argument.", 0, true );
237 std::exit( 1 );
238 }
239
240 if( !errno && tail[0] )
241 {
242 const unsigned factor = ( tail[1] == 'i' ) ? 1024 : 1000;
243 int exponent = 0; // 0 = bad multiplier
244 switch( tail[0] )
245 {
246 case 'Y': exponent = 8; break;
247 case 'Z': exponent = 7; break;
248 case 'E': exponent = 6; break;
249 case 'P': exponent = 5; break;
250 case 'T': exponent = 4; break;
251 case 'G': exponent = 3; break;
252 case 'M': exponent = 2; break;
253 case 'K': if( factor == 1024 ) exponent = 1; break;
254 case 'k': if( factor == 1000 ) exponent = 1; break;
255 }
256 if( exponent <= 0 )
257 {
258 show_error( "Bad multiplier in numerical argument.", 0, true );
259 std::exit( 1 );
260 }
261 for( int i = 0; i < exponent; ++i )
262 {
263 if( ulimit / factor >= result ) result *= factor;
264 else { errno = ERANGE; break; }
265 }
266 }
267 if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
268 if( errno )
269 {
270 show_error( "Numerical argument out of limits." );
271 std::exit( 1 );
272 }
273 return result;
274 }
275
276
get_dict_size(const char * const arg)277 int get_dict_size( const char * const arg )
278 {
279 char * tail;
280 const long bits = std::strtol( arg, &tail, 0 );
281 if( bits >= min_dictionary_bits &&
282 bits <= max_dictionary_bits && *tail == 0 )
283 return 1 << bits;
284 return getnum( arg, min_dictionary_size, max_dictionary_size );
285 }
286
287
set_mode(Mode & program_mode,const Mode new_mode)288 void set_mode( Mode & program_mode, const Mode new_mode )
289 {
290 if( program_mode != m_compress && program_mode != new_mode )
291 {
292 show_error( "Only one operation can be specified.", 0, true );
293 std::exit( 1 );
294 }
295 program_mode = new_mode;
296 }
297
298
extension_index(const std::string & name)299 int extension_index( const std::string & name )
300 {
301 for( int eindex = 0; known_extensions[eindex].from; ++eindex )
302 {
303 const std::string ext( known_extensions[eindex].from );
304 if( name.size() > ext.size() &&
305 name.compare( name.size() - ext.size(), ext.size(), ext ) == 0 )
306 return eindex;
307 }
308 return -1;
309 }
310
311
set_c_outname(const std::string & name,const bool filenames_given,const bool force_ext,const bool multifile)312 void set_c_outname( const std::string & name, const bool filenames_given,
313 const bool force_ext, const bool multifile )
314 {
315 /* zupdate < 1.9 depends on lzip adding the extension '.lz' to name when
316 reading from standard input. */
317 output_filename = name;
318 if( multifile ) output_filename += "00001";
319 if( force_ext || multifile ||
320 ( !filenames_given && extension_index( output_filename ) < 0 ) )
321 output_filename += known_extensions[0].from;
322 }
323
324
set_d_outname(const std::string & name,const int eindex)325 void set_d_outname( const std::string & name, const int eindex )
326 {
327 if( eindex >= 0 )
328 {
329 const std::string from( known_extensions[eindex].from );
330 if( name.size() > from.size() )
331 {
332 output_filename.assign( name, 0, name.size() - from.size() );
333 output_filename += known_extensions[eindex].to;
334 return;
335 }
336 }
337 output_filename = name; output_filename += ".out";
338 if( verbosity >= 1 )
339 std::fprintf( stderr, "%s: Can't guess original name for '%s' -- using '%s'\n",
340 program_name, name.c_str(), output_filename.c_str() );
341 }
342
343 } // end namespace
344
open_instream(const char * const name,struct stat * const in_statsp,const bool one_to_one,const bool reg_only)345 int open_instream( const char * const name, struct stat * const in_statsp,
346 const bool one_to_one, const bool reg_only )
347 {
348 int infd = open( name, O_RDONLY | O_BINARY );
349 if( infd < 0 )
350 show_file_error( name, "Can't open input file", errno );
351 else
352 {
353 const int i = fstat( infd, in_statsp );
354 const mode_t mode = in_statsp->st_mode;
355 const bool can_read = ( i == 0 && !reg_only &&
356 ( S_ISBLK( mode ) || S_ISCHR( mode ) ||
357 S_ISFIFO( mode ) || S_ISSOCK( mode ) ) );
358 if( i != 0 || ( !S_ISREG( mode ) && ( !can_read || one_to_one ) ) )
359 {
360 if( verbosity >= 0 )
361 std::fprintf( stderr, "%s: Input file '%s' is not a regular file%s.\n",
362 program_name, name, ( can_read && one_to_one ) ?
363 ",\n and neither '-c' nor '-o' were specified" : "" );
364 close( infd );
365 infd = -1;
366 }
367 }
368 return infd;
369 }
370
371 namespace {
372
open_instream2(const char * const name,struct stat * const in_statsp,const Mode program_mode,const int eindex,const bool one_to_one,const bool recompress)373 int open_instream2( const char * const name, struct stat * const in_statsp,
374 const Mode program_mode, const int eindex,
375 const bool one_to_one, const bool recompress )
376 {
377 if( program_mode == m_compress && !recompress && eindex >= 0 )
378 {
379 if( verbosity >= 0 )
380 std::fprintf( stderr, "%s: Input file '%s' already has '%s' suffix.\n",
381 program_name, name, known_extensions[eindex].from );
382 return -1;
383 }
384 return open_instream( name, in_statsp, one_to_one, false );
385 }
386
387
open_outstream(const bool force,const bool protect)388 bool open_outstream( const bool force, const bool protect )
389 {
390 const mode_t usr_rw = S_IRUSR | S_IWUSR;
391 const mode_t all_rw = usr_rw | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
392 const mode_t outfd_mode = protect ? usr_rw : all_rw;
393 int flags = O_CREAT | O_WRONLY | O_BINARY;
394 if( force ) flags |= O_TRUNC; else flags |= O_EXCL;
395
396 outfd = open( output_filename.c_str(), flags, outfd_mode );
397 if( outfd >= 0 ) delete_output_on_interrupt = true;
398 else if( verbosity >= 0 )
399 {
400 if( errno == EEXIST )
401 std::fprintf( stderr, "%s: Output file '%s' already exists, skipping.\n",
402 program_name, output_filename.c_str() );
403 else
404 std::fprintf( stderr, "%s: Can't create output file '%s': %s\n",
405 program_name, output_filename.c_str(), std::strerror( errno ) );
406 }
407 return ( outfd >= 0 );
408 }
409
410
set_signals(void (* action)(int))411 void set_signals( void (*action)(int) )
412 {
413 std::signal( SIGHUP, action );
414 std::signal( SIGINT, action );
415 std::signal( SIGTERM, action );
416 }
417
418
cleanup_and_fail(const int retval)419 void cleanup_and_fail( const int retval )
420 {
421 set_signals( SIG_IGN ); // ignore signals
422 if( delete_output_on_interrupt )
423 {
424 delete_output_on_interrupt = false;
425 if( verbosity >= 0 )
426 std::fprintf( stderr, "%s: Deleting output file '%s', if it exists.\n",
427 program_name, output_filename.c_str() );
428 if( outfd >= 0 ) { close( outfd ); outfd = -1; }
429 if( std::remove( output_filename.c_str() ) != 0 && errno != ENOENT )
430 show_error( "WARNING: deletion of output file (apparently) failed." );
431 }
432 std::exit( retval );
433 }
434
435
signal_handler(int)436 extern "C" void signal_handler( int )
437 {
438 show_error( "Control-C or similar caught, quitting." );
439 cleanup_and_fail( 1 );
440 }
441
442
check_tty_in(const char * const input_filename,const int infd,const Mode program_mode,int & retval)443 bool check_tty_in( const char * const input_filename, const int infd,
444 const Mode program_mode, int & retval )
445 {
446 if( ( program_mode == m_decompress || program_mode == m_test ) &&
447 isatty( infd ) ) // for example /dev/tty
448 { show_file_error( input_filename,
449 "I won't read compressed data from a terminal." );
450 close( infd ); set_retval( retval, 1 );
451 if( program_mode != m_test ) cleanup_and_fail( retval );
452 return false; }
453 return true;
454 }
455
check_tty_out(const Mode program_mode)456 bool check_tty_out( const Mode program_mode )
457 {
458 if( program_mode == m_compress && isatty( outfd ) )
459 { show_file_error( output_filename.size() ?
460 output_filename.c_str() : "(stdout)",
461 "I won't write compressed data to a terminal." );
462 return false; }
463 return true;
464 }
465
466
467 // Set permissions, owner, and times.
close_and_set_permissions(const struct stat * const in_statsp)468 void close_and_set_permissions( const struct stat * const in_statsp )
469 {
470 bool warning = false;
471 if( in_statsp )
472 {
473 const mode_t mode = in_statsp->st_mode;
474 // fchown will in many cases return with EPERM, which can be safely ignored.
475 if( fchown( outfd, in_statsp->st_uid, in_statsp->st_gid ) == 0 )
476 { if( fchmod( outfd, mode ) != 0 ) warning = true; }
477 else
478 if( errno != EPERM ||
479 fchmod( outfd, mode & ~( S_ISUID | S_ISGID | S_ISVTX ) ) != 0 )
480 warning = true;
481 }
482 if( close( outfd ) != 0 )
483 {
484 show_error( "Error closing output file", errno );
485 cleanup_and_fail( 1 );
486 }
487 outfd = -1;
488 delete_output_on_interrupt = false;
489 if( in_statsp )
490 {
491 struct utimbuf t;
492 t.actime = in_statsp->st_atime;
493 t.modtime = in_statsp->st_mtime;
494 if( utime( output_filename.c_str(), &t ) != 0 ) warning = true;
495 }
496 if( warning && verbosity >= 1 )
497 show_error( "Can't change output file attributes." );
498 }
499
500
next_filename()501 bool next_filename()
502 {
503 const unsigned name_len = output_filename.size();
504 const unsigned ext_len = std::strlen( known_extensions[0].from );
505 if( name_len >= ext_len + 5 ) // "*00001.lz"
506 for( int i = name_len - ext_len - 1, j = 0; j < 5; --i, ++j )
507 {
508 if( output_filename[i] < '9' ) { ++output_filename[i]; return true; }
509 else output_filename[i] = '0';
510 }
511 return false;
512 }
513
514
compress(const unsigned long long cfile_size,const unsigned long long member_size,const unsigned long long volume_size,const int infd,const Lzma_options & encoder_options,const Pretty_print & pp,const struct stat * const in_statsp,const bool zero)515 int compress( const unsigned long long cfile_size,
516 const unsigned long long member_size,
517 const unsigned long long volume_size, const int infd,
518 const Lzma_options & encoder_options, const Pretty_print & pp,
519 const struct stat * const in_statsp, const bool zero )
520 {
521 int retval = 0;
522 LZ_encoder_base * encoder = 0; // polymorphic encoder
523 if( verbosity >= 1 ) pp();
524
525 if( zero )
526 encoder = new FLZ_encoder( infd, outfd );
527 else
528 {
529 Lzip_header header;
530 if( header.dictionary_size( encoder_options.dictionary_size ) &&
531 encoder_options.match_len_limit >= min_match_len_limit &&
532 encoder_options.match_len_limit <= max_match_len )
533 encoder = new LZ_encoder( header.dictionary_size(),
534 encoder_options.match_len_limit, infd, outfd );
535 else internal_error( "invalid argument to encoder." );
536 }
537
538 unsigned long long in_size = 0, out_size = 0, partial_volume_size = 0;
539 while( true ) // encode one member per iteration
540 {
541 const unsigned long long size = ( volume_size > 0 ) ?
542 std::min( member_size, volume_size - partial_volume_size ) : member_size;
543 show_cprogress( cfile_size, in_size, encoder, &pp ); // init
544 if( !encoder->encode_member( size ) )
545 { pp( "Encoder error." ); retval = 1; break; }
546 in_size += encoder->data_position();
547 out_size += encoder->member_position();
548 if( encoder->data_finished() ) break;
549 if( volume_size > 0 )
550 {
551 partial_volume_size += encoder->member_position();
552 if( partial_volume_size >= volume_size - min_dictionary_size )
553 {
554 partial_volume_size = 0;
555 if( delete_output_on_interrupt )
556 {
557 close_and_set_permissions( in_statsp );
558 if( !next_filename() )
559 { pp( "Too many volume files." ); retval = 1; break; }
560 if( !open_outstream( true, in_statsp ) ) { retval = 1; break; }
561 }
562 }
563 }
564 encoder->reset();
565 }
566
567 if( retval == 0 && verbosity >= 1 )
568 {
569 if( in_size == 0 || out_size == 0 )
570 std::fputs( " no data compressed.\n", stderr );
571 else
572 std::fprintf( stderr, "%6.3f:1, %5.2f%% ratio, %5.2f%% saved, "
573 "%llu in, %llu out.\n",
574 (double)in_size / out_size,
575 ( 100.0 * out_size ) / in_size,
576 100.0 - ( ( 100.0 * out_size ) / in_size ),
577 in_size, out_size );
578 }
579 delete encoder;
580 return retval;
581 }
582
583
xdigit(const unsigned value)584 unsigned char xdigit( const unsigned value )
585 {
586 if( value <= 9 ) return '0' + value;
587 if( value <= 15 ) return 'A' + value - 10;
588 return 0;
589 }
590
591
show_trailing_data(const uint8_t * const data,const int size,const Pretty_print & pp,const bool all,const int ignore_trailing)592 bool show_trailing_data( const uint8_t * const data, const int size,
593 const Pretty_print & pp, const bool all,
594 const int ignore_trailing ) // -1 = show
595 {
596 if( verbosity >= 4 || ignore_trailing <= 0 )
597 {
598 std::string msg;
599 if( !all ) msg = "first bytes of ";
600 msg += "trailing data = ";
601 for( int i = 0; i < size; ++i )
602 {
603 msg += xdigit( data[i] >> 4 );
604 msg += xdigit( data[i] & 0x0F );
605 msg += ' ';
606 }
607 msg += '\'';
608 for( int i = 0; i < size; ++i )
609 { if( std::isprint( data[i] ) ) msg += data[i]; else msg += '.'; }
610 msg += '\'';
611 pp( msg.c_str() );
612 if( ignore_trailing == 0 ) show_file_error( pp.name(), trailing_msg );
613 }
614 return ( ignore_trailing > 0 );
615 }
616
617
decompress(const unsigned long long cfile_size,const int infd,const Pretty_print & pp,const bool ignore_trailing,const bool loose_trailing,const bool testing)618 int decompress( const unsigned long long cfile_size, const int infd,
619 const Pretty_print & pp, const bool ignore_trailing,
620 const bool loose_trailing, const bool testing )
621 {
622 int retval = 0;
623 unsigned long long partial_file_pos = 0;
624 Range_decoder rdec( infd );
625 for( bool first_member = true; ; first_member = false )
626 {
627 Lzip_header header;
628 rdec.reset_member_position();
629 const int size = rdec.read_data( header.data, Lzip_header::size );
630 if( rdec.finished() ) // End Of File
631 {
632 if( first_member )
633 { show_file_error( pp.name(), "File ends unexpectedly at member header." );
634 retval = 2; }
635 else if( header.verify_prefix( size ) )
636 { pp( "Truncated header in multimember file." );
637 show_trailing_data( header.data, size, pp, true, -1 );
638 retval = 2; }
639 else if( size > 0 && !show_trailing_data( header.data, size, pp,
640 true, ignore_trailing ) )
641 retval = 2;
642 break;
643 }
644 if( !header.verify_magic() )
645 {
646 if( first_member )
647 { show_file_error( pp.name(), bad_magic_msg ); retval = 2; }
648 else if( !loose_trailing && header.verify_corrupt() )
649 { pp( corrupt_mm_msg );
650 show_trailing_data( header.data, size, pp, false, -1 );
651 retval = 2; }
652 else if( !show_trailing_data( header.data, size, pp, false, ignore_trailing ) )
653 retval = 2;
654 break;
655 }
656 if( !header.verify_version() )
657 { pp( bad_version( header.version() ) ); retval = 2; break; }
658 const unsigned dictionary_size = header.dictionary_size();
659 if( !isvalid_ds( dictionary_size ) )
660 { pp( bad_dict_msg ); retval = 2; break; }
661
662 if( verbosity >= 2 || ( verbosity == 1 && first_member ) ) pp();
663
664 LZ_decoder decoder( rdec, dictionary_size, outfd );
665 show_dprogress( cfile_size, partial_file_pos, &rdec, &pp ); // init
666 const int result = decoder.decode_member( pp );
667 partial_file_pos += rdec.member_position();
668 if( result != 0 )
669 {
670 if( verbosity >= 0 && result <= 2 )
671 {
672 pp();
673 std::fprintf( stderr, "%s at pos %llu\n", ( result == 2 ) ?
674 "File ends unexpectedly" : "Decoder error",
675 partial_file_pos );
676 }
677 retval = 2; break;
678 }
679 if( verbosity >= 2 )
680 { std::fputs( testing ? "ok\n" : "done\n", stderr ); pp.reset(); }
681 }
682 if( verbosity == 1 && retval == 0 )
683 std::fputs( testing ? "ok\n" : "done\n", stderr );
684 return retval;
685 }
686
687 } // end namespace
688
689
show_error(const char * const msg,const int errcode,const bool help)690 void show_error( const char * const msg, const int errcode, const bool help )
691 {
692 if( verbosity < 0 ) return;
693 if( msg && msg[0] )
694 std::fprintf( stderr, "%s: %s%s%s\n", program_name, msg,
695 ( errcode > 0 ) ? ": " : "",
696 ( errcode > 0 ) ? std::strerror( errcode ) : "" );
697 if( help )
698 std::fprintf( stderr, "Try '%s --help' for more information.\n",
699 invocation_name );
700 }
701
702
show_file_error(const char * const filename,const char * const msg,const int errcode)703 void show_file_error( const char * const filename, const char * const msg,
704 const int errcode )
705 {
706 if( verbosity >= 0 )
707 std::fprintf( stderr, "%s: %s: %s%s%s\n", program_name, filename, msg,
708 ( errcode > 0 ) ? ": " : "",
709 ( errcode > 0 ) ? std::strerror( errcode ) : "" );
710 }
711
712
internal_error(const char * const msg)713 void internal_error( const char * const msg )
714 {
715 if( verbosity >= 0 )
716 std::fprintf( stderr, "%s: internal error: %s\n", program_name, msg );
717 std::exit( 3 );
718 }
719
720
show_cprogress(const unsigned long long cfile_size,const unsigned long long partial_size,const Matchfinder_base * const m,const Pretty_print * const p)721 void show_cprogress( const unsigned long long cfile_size,
722 const unsigned long long partial_size,
723 const Matchfinder_base * const m,
724 const Pretty_print * const p )
725 {
726 static unsigned long long csize = 0; // file_size / 100
727 static unsigned long long psize = 0;
728 static const Matchfinder_base * mb = 0;
729 static const Pretty_print * pp = 0;
730 static bool enabled = true;
731
732 if( !enabled ) return;
733 if( p ) // initialize static vars
734 {
735 if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
736 csize = cfile_size; psize = partial_size; mb = m; pp = p;
737 }
738 if( mb && pp )
739 {
740 const unsigned long long pos = psize + mb->data_position();
741 if( csize > 0 )
742 std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 );
743 else
744 std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
745 pp->reset(); (*pp)(); // restore cursor position
746 }
747 }
748
749
show_dprogress(const unsigned long long cfile_size,const unsigned long long partial_size,const Range_decoder * const d,const Pretty_print * const p)750 void show_dprogress( const unsigned long long cfile_size,
751 const unsigned long long partial_size,
752 const Range_decoder * const d,
753 const Pretty_print * const p )
754 {
755 static unsigned long long csize = 0; // file_size / 100
756 static unsigned long long psize = 0;
757 static const Range_decoder * rdec = 0;
758 static const Pretty_print * pp = 0;
759 static int counter = 0;
760 static bool enabled = true;
761
762 if( !enabled ) return;
763 if( p ) // initialize static vars
764 {
765 if( verbosity < 2 || !isatty( STDERR_FILENO ) ) { enabled = false; return; }
766 csize = cfile_size; psize = partial_size; rdec = d; pp = p; counter = 0;
767 }
768 if( rdec && pp && --counter <= 0 )
769 {
770 const unsigned long long pos = psize + rdec->member_position();
771 counter = 7; // update display every 114688 bytes
772 if( csize > 0 )
773 std::fprintf( stderr, "%4llu%% %.1f MB\r", pos / csize, pos / 1000000.0 );
774 else
775 std::fprintf( stderr, " %.1f MB\r", pos / 1000000.0 );
776 pp->reset(); (*pp)(); // restore cursor position
777 }
778 }
779
780
main(const int argc,const char * const argv[])781 int main( const int argc, const char * const argv[] )
782 {
783 /* Mapping from gzip/bzip2 style 1..9 compression modes
784 to the corresponding LZMA compression modes. */
785 const Lzma_options option_mapping[] =
786 {
787 { 1 << 16, 16 }, // -0
788 { 1 << 20, 5 }, // -1
789 { 3 << 19, 6 }, // -2
790 { 1 << 21, 8 }, // -3
791 { 3 << 20, 12 }, // -4
792 { 1 << 22, 20 }, // -5
793 { 1 << 23, 36 }, // -6
794 { 1 << 24, 68 }, // -7
795 { 3 << 23, 132 }, // -8
796 { 1 << 25, 273 } }; // -9
797 Lzma_options encoder_options = option_mapping[6]; // default = "-6"
798 const unsigned long long max_member_size = 0x0008000000000000ULL; /* 2 PiB */
799 const unsigned long long max_volume_size = 0x4000000000000000ULL; /* 4 EiB */
800 unsigned long long member_size = max_member_size;
801 unsigned long long volume_size = 0;
802 std::string default_output_filename;
803 std::vector< std::string > filenames;
804 Mode program_mode = m_compress;
805 bool force = false;
806 bool ignore_trailing = true;
807 bool keep_input_files = false;
808 bool loose_trailing = false;
809 bool recompress = false;
810 bool to_stdout = false;
811 bool zero = false;
812 if( argc > 0 ) invocation_name = argv[0];
813
814 enum { opt_lt = 256 };
815 const Arg_parser::Option options[] =
816 {
817 { '0', "fast", Arg_parser::no },
818 { '1', 0, Arg_parser::no },
819 { '2', 0, Arg_parser::no },
820 { '3', 0, Arg_parser::no },
821 { '4', 0, Arg_parser::no },
822 { '5', 0, Arg_parser::no },
823 { '6', 0, Arg_parser::no },
824 { '7', 0, Arg_parser::no },
825 { '8', 0, Arg_parser::no },
826 { '9', "best", Arg_parser::no },
827 { 'a', "trailing-error", Arg_parser::no },
828 { 'b', "member-size", Arg_parser::yes },
829 { 'c', "stdout", Arg_parser::no },
830 { 'd', "decompress", Arg_parser::no },
831 { 'f', "force", Arg_parser::no },
832 { 'F', "recompress", Arg_parser::no },
833 { 'h', "help", Arg_parser::no },
834 { 'k', "keep", Arg_parser::no },
835 { 'l', "list", Arg_parser::no },
836 { 'm', "match-length", Arg_parser::yes },
837 { 'n', "threads", Arg_parser::yes },
838 { 'o', "output", Arg_parser::yes },
839 { 'q', "quiet", Arg_parser::no },
840 { 's', "dictionary-size", Arg_parser::yes },
841 { 'S', "volume-size", Arg_parser::yes },
842 { 't', "test", Arg_parser::no },
843 { 'v', "verbose", Arg_parser::no },
844 { 'V', "version", Arg_parser::no },
845 { opt_lt, "loose-trailing", Arg_parser::no },
846 { 0, 0, Arg_parser::no } };
847
848 const Arg_parser parser( argc, argv, options );
849 if( parser.error().size() ) // bad option
850 { show_error( parser.error().c_str(), 0, true ); return 1; }
851
852 int argind = 0;
853 for( ; argind < parser.arguments(); ++argind )
854 {
855 const int code = parser.code( argind );
856 if( !code ) break; // no more options
857 const std::string & sarg = parser.argument( argind );
858 const char * const arg = sarg.c_str();
859 switch( code )
860 {
861 case '0': case '1': case '2': case '3': case '4':
862 case '5': case '6': case '7': case '8': case '9':
863 zero = ( code == '0' );
864 encoder_options = option_mapping[code-'0']; break;
865 case 'a': ignore_trailing = false; break;
866 case 'b': member_size = getnum( arg, 100000, max_member_size ); break;
867 case 'c': to_stdout = true; break;
868 case 'd': set_mode( program_mode, m_decompress ); break;
869 case 'f': force = true; break;
870 case 'F': recompress = true; break;
871 case 'h': show_help(); return 0;
872 case 'k': keep_input_files = true; break;
873 case 'l': set_mode( program_mode, m_list ); break;
874 case 'm': encoder_options.match_len_limit =
875 getnum( arg, min_match_len_limit, max_match_len );
876 zero = false; break;
877 case 'n': break;
878 case 'o': if( sarg == "-" ) to_stdout = true;
879 else { default_output_filename = sarg; } break;
880 case 'q': verbosity = -1; break;
881 case 's': encoder_options.dictionary_size = get_dict_size( arg );
882 zero = false; break;
883 case 'S': volume_size = getnum( arg, 100000, max_volume_size ); break;
884 case 't': set_mode( program_mode, m_test ); break;
885 case 'v': if( verbosity < 4 ) ++verbosity; break;
886 case 'V': show_version(); return 0;
887 case opt_lt: loose_trailing = true; break;
888 default : internal_error( "uncaught option." );
889 }
890 } // end process options
891
892 #if defined(__MSVCRT__) || defined(__OS2__) || defined(__DJGPP__)
893 setmode( STDIN_FILENO, O_BINARY );
894 setmode( STDOUT_FILENO, O_BINARY );
895 #endif
896
897 bool filenames_given = false;
898 for( ; argind < parser.arguments(); ++argind )
899 {
900 filenames.push_back( parser.argument( argind ) );
901 if( filenames.back() != "-" ) filenames_given = true;
902 }
903 if( filenames.empty() ) filenames.push_back("-");
904
905 if( program_mode == m_list )
906 return list_files( filenames, ignore_trailing, loose_trailing );
907
908 if( program_mode == m_compress )
909 {
910 if( volume_size > 0 && !to_stdout && default_output_filename.size() &&
911 filenames.size() > 1 )
912 { show_error( "Only can compress one file when using '-o' and '-S'.",
913 0, true ); return 1; }
914 dis_slots.init();
915 prob_prices.init();
916 }
917 else volume_size = 0;
918 if( program_mode == m_test ) to_stdout = false; // apply overrides
919 if( program_mode == m_test || to_stdout ) default_output_filename.clear();
920
921 if( to_stdout && program_mode != m_test ) // check tty only once
922 { outfd = STDOUT_FILENO; if( !check_tty_out( program_mode ) ) return 1; }
923 else outfd = -1;
924
925 const bool to_file = !to_stdout && program_mode != m_test &&
926 default_output_filename.size();
927 if( !to_stdout && program_mode != m_test && ( filenames_given || to_file ) )
928 set_signals( signal_handler );
929
930 Pretty_print pp( filenames );
931
932 int failed_tests = 0;
933 int retval = 0;
934 const bool one_to_one = !to_stdout && program_mode != m_test && !to_file;
935 bool stdin_used = false;
936 for( unsigned i = 0; i < filenames.size(); ++i )
937 {
938 std::string input_filename;
939 int infd;
940 struct stat in_stats;
941
942 pp.set_name( filenames[i] );
943 if( filenames[i] == "-" )
944 {
945 if( stdin_used ) continue; else stdin_used = true;
946 infd = STDIN_FILENO;
947 if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
948 if( one_to_one ) { outfd = STDOUT_FILENO; output_filename.clear(); }
949 }
950 else
951 {
952 const int eindex = extension_index( input_filename = filenames[i] );
953 infd = open_instream2( input_filename.c_str(), &in_stats, program_mode,
954 eindex, one_to_one, recompress );
955 if( infd < 0 ) { set_retval( retval, 1 ); continue; }
956 if( !check_tty_in( pp.name(), infd, program_mode, retval ) ) continue;
957 if( one_to_one ) // open outfd after verifying infd
958 {
959 if( program_mode == m_compress )
960 set_c_outname( input_filename, true, true, volume_size > 0 );
961 else set_d_outname( input_filename, eindex );
962 if( !open_outstream( force, true ) )
963 { close( infd ); set_retval( retval, 1 ); continue; }
964 }
965 }
966
967 if( one_to_one && !check_tty_out( program_mode ) )
968 { set_retval( retval, 1 ); return retval; } // don't delete a tty
969
970 if( to_file && outfd < 0 ) // open outfd after verifying infd
971 {
972 if( program_mode == m_compress ) set_c_outname( default_output_filename,
973 filenames_given, false, volume_size > 0 );
974 else output_filename = default_output_filename;
975 if( !open_outstream( force, false ) || !check_tty_out( program_mode ) )
976 return 1; // check tty only once and don't try to delete a tty
977 }
978
979 const struct stat * const in_statsp =
980 ( input_filename.size() && one_to_one ) ? &in_stats : 0;
981 const unsigned long long cfile_size =
982 ( input_filename.size() && S_ISREG( in_stats.st_mode ) ) ?
983 ( in_stats.st_size + 99 ) / 100 : 0;
984 int tmp;
985 try {
986 if( program_mode == m_compress )
987 tmp = compress( cfile_size, member_size, volume_size, infd,
988 encoder_options, pp, in_statsp, zero );
989 else
990 tmp = decompress( cfile_size, infd, pp, ignore_trailing,
991 loose_trailing, program_mode == m_test );
992 }
993 catch( std::bad_alloc & )
994 { pp( ( program_mode == m_compress ) ?
995 "Not enough memory. Try a smaller dictionary size." :
996 "Not enough memory." ); tmp = 1; }
997 catch( Error & e ) { pp(); show_error( e.msg, errno ); tmp = 1; }
998 if( close( infd ) != 0 )
999 { show_file_error( pp.name(), "Error closing input file", errno );
1000 set_retval( tmp, 1 ); }
1001 set_retval( retval, tmp );
1002 if( tmp )
1003 { if( program_mode != m_test ) cleanup_and_fail( retval );
1004 else ++failed_tests; }
1005
1006 if( delete_output_on_interrupt && one_to_one )
1007 close_and_set_permissions( in_statsp );
1008 if( input_filename.size() && !keep_input_files && one_to_one &&
1009 ( program_mode != m_compress || volume_size == 0 ) )
1010 std::remove( input_filename.c_str() );
1011 }
1012 if( delete_output_on_interrupt ) close_and_set_permissions( 0 ); // -o
1013 else if( outfd >= 0 && close( outfd ) != 0 ) // -c
1014 {
1015 show_error( "Error closing stdout", errno );
1016 set_retval( retval, 1 );
1017 }
1018 if( failed_tests > 0 && verbosity >= 1 && filenames.size() > 1 )
1019 std::fprintf( stderr, "%s: warning: %d %s failed the test.\n",
1020 program_name, failed_tests,
1021 ( failed_tests == 1 ) ? "file" : "files" );
1022 return retval;
1023 }
1024