1 /* Zcat - decompress and concatenate files to standard output
2    Copyright (C) 2010-2021 Antonio Diaz Diaz.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 2 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 
18 #define _FILE_OFFSET_BITS 64
19 
20 #include <cerrno>
21 #include <climits>
22 #include <csignal>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <cstring>
26 #include <list>
27 #include <string>
28 #include <vector>
29 #include <dirent.h>
30 #include <fcntl.h>
31 #include <stdint.h>
32 #include <unistd.h>
33 #include <sys/stat.h>
34 #if defined(__MSVCRT__) || defined(__OS2__)
35 #include <io.h>
36 #endif
37 
38 #include "arg_parser.h"
39 #include "rc.h"
40 #include "zutils.h"
41 
42 
43 namespace {
44 
45 #include "recursive.cc"
46 #include "zcatgrep.cc"
47 
48 struct Cat_options
49   {
50   int number_lines;		// 0 = no, 1 = nonblank, 2 = all
51   bool show_ends;
52   bool show_nonprinting;
53   bool show_tabs;
54   bool squeeze_blank;
55 
Cat_options__anon054e6c870111::Cat_options56   Cat_options()
57     : number_lines( 0 ), show_ends( false ), show_nonprinting( false ),
58       show_tabs( false ), squeeze_blank( false ) {}
59   };
60 
61 
62 class Line_number		// unlimited size line counter
63   {
64   std::string str;
65   unsigned first_digit_pos;
66 
67 public:
Line_number()68   Line_number() : str( "     0\t" ), first_digit_pos( 5 ) {}
69 
next()70   void next()
71     {
72     for( unsigned i = str.size() - 1; i > first_digit_pos; )
73       {
74       if( str[--i] < '9' ) { ++str[i]; return; }
75       str[i] = '0';
76       }
77     if( first_digit_pos > 0 ) str[--first_digit_pos] = '1';
78     else str.insert( str.begin() + first_digit_pos, '1' );
79     }
80 
sprint(uint8_t * const buf)81   int sprint( uint8_t * const buf )
82     {
83     std::memcpy( buf, str.c_str(), str.size() );
84     return str.size();
85     }
86   };
87 
88 Line_number line_number;
89 
90 
show_help()91 void show_help()
92   {
93   std::printf( "zcat copies each file argument to standard output in sequence. If any\n"
94                "file given is compressed, its decompressed content is copied. If a file\n"
95                "given does not exist, and its name does not end with one of the known\n"
96                "extensions, zcat tries the compressed file names corresponding to the\n"
97                "formats supported. If a file fails to decompress, zcat continues copying the\n"
98                "rest of the files.\n"
99                "\nIf a file is specified as '-', data are read from standard input,\n"
100                "decompressed if needed, and sent to standard output. Data read from\n"
101                "standard input must be of the same type; all uncompressed or all in the\n"
102                "same compressed format.\n"
103                "\nIf no files are specified, recursive searches examine the current\n"
104                "working directory, and nonrecursive searches read standard input.\n"
105                "\nThe formats supported are bzip2, gzip, lzip, and xz.\n"
106                "\nUsage: zcat [options] [files]\n"
107                "\nExit status is 0 if no errors occurred, 1 otherwise.\n"
108                "\nOptions:\n"
109                "  -h, --help                   display this help and exit\n"
110                "  -V, --version                output version information and exit\n"
111                "  -A, --show-all               equivalent to '-vET'\n"
112                "  -b, --number-nonblank        number nonblank output lines\n"
113                "  -e                           equivalent to '-vE'\n"
114                "  -E, --show-ends              display '$' at end of each line\n"
115                "  -M, --format=<list>          process only the formats in <list>\n"
116                "  -n, --number                 number all output lines\n"
117                "  -N, --no-rcfile              don't read runtime configuration file\n"
118                "  -O, --force-format=<fmt>     force the format given (bz2, gz, lz, xz)\n"
119                "  -q, --quiet                  suppress all messages\n"
120                "  -r, --recursive              operate recursively on directories\n"
121                "  -R, --dereference-recursive  recursively follow symbolic links\n"
122                "  -s, --squeeze-blank          never more than one single blank line\n"
123                "  -t                           equivalent to '-vT'\n"
124                "  -T, --show-tabs              display TAB characters as '^I'\n"
125                "  -v, --show-nonprinting       use '^' and 'M-' notation, except for LF and TAB\n"
126                "      --verbose                verbose mode (show error messages)\n"
127                "      --bz2=<command>          set compressor and options for bzip2 format\n"
128                "      --gz=<command>           set compressor and options for gzip format\n"
129                "      --lz=<command>           set compressor and options for lzip format\n"
130                "      --xz=<command>           set compressor and options for xz format\n" );
131   show_help_addr();
132   }
133 
134 
do_cat(const int infd,const int buffer_size,uint8_t * const inbuf,uint8_t * const outbuf,const std::string & input_filename,const Cat_options & cat_options)135 bool do_cat( const int infd, const int buffer_size,
136              uint8_t * const inbuf, uint8_t * const outbuf,
137              const std::string & input_filename,
138              const Cat_options & cat_options )
139   {
140   static int at_bol = 1;	// at begin of line. 0 = false, 1 = true,
141 				// 2 = at begin of second blank line.
142   int inpos = 0;		// positions in buffers
143   int outpos = 0;
144   int rd = -1;			// bytes read by the last readblock
145   unsigned char c;
146 
147   while( true )
148     {
149     do {
150       if( outpos >= buffer_size )
151         {
152         if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
153           { show_error( "Write error", errno ); return false; }
154         outpos = 0;
155         }
156       if( inpos > rd )			// inbuf is empty
157         {
158         rd = readblock( infd, inbuf, buffer_size );
159         if( rd != buffer_size && errno )
160           {
161           show_file_error( input_filename.c_str(), "Read error", errno );
162           return false;
163           }
164         if( rd == 0 )
165           {
166           if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
167             { show_error( "Write error", errno ); return false; }
168           outpos = 0;
169           return true;
170           }
171         inpos = 0;
172         inbuf[rd] = '\n';		// sentinel newline
173         }
174       else				// a real newline was found
175         {
176         if( at_bol > 1 )
177           {
178           if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; }
179           }
180         else ++at_bol;
181         if( at_bol > 1 && cat_options.number_lines == 2 )
182           {
183           line_number.next();
184           outpos += line_number.sprint( &outbuf[outpos] );
185           }
186         if( cat_options.show_ends ) outbuf[outpos++] = '$';
187         outbuf[outpos++] = '\n';		// output the newline
188         }
189       c = inbuf[inpos++];
190       }
191     while( c == '\n' );
192 
193     if( at_bol > 0 && cat_options.number_lines )
194       {
195       line_number.next();
196       outpos += line_number.sprint( &outbuf[outpos] );
197       }
198     at_bol = 0;
199 
200     // the loops below continue until a newline (real or sentinel) is found
201 
202     if( cat_options.show_nonprinting )
203       while( true )
204         {
205         if( c < 32 || c >= 127 )
206           {
207           if( c == '\n' ) break;
208           if( c != '\t' || cat_options.show_tabs )
209             {
210             if( c >= 128 )
211               { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; }
212             if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; }
213             else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; }
214             }
215           }
216         outbuf[outpos++] = c;
217         c = inbuf[inpos++];
218         }
219     else				// not quoting
220       while( c != '\n' )
221         {
222         if( c == '\t' && cat_options.show_tabs )
223           { c += 64; outbuf[outpos++] = '^'; }
224         outbuf[outpos++] = c;
225         c = inbuf[inpos++];
226         }
227     }
228   }
229 
230 
cat(int infd,const int format_index,const std::string & input_filename,const Cat_options & cat_options)231 bool cat( int infd, const int format_index, const std::string & input_filename,
232           const Cat_options & cat_options )
233   {
234   enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 };
235     // input buffer with space for sentinel newline at the end
236   uint8_t * const inbuf = new uint8_t[buffer_size+1];
237     // output buffer with space for character quoting, 255-digit line number,
238     // worst case flushing respect to inbuf, and a canary byte.
239   uint8_t * const outbuf = new uint8_t[outbuf_size];
240   outbuf[outbuf_size-1] = 0;
241   Children children;
242   bool error = false;
243 
244   if( !set_data_feeder( input_filename, &infd, children, format_index ) ||
245       !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) )
246     error = true;
247   if( !good_status( children, !error ) ) error = true;
248   if( !error && close( infd ) != 0 ) { show_close_error(); error = true; }
249   if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." );
250   delete[] outbuf; delete[] inbuf;
251   return !error;
252   }
253 
254 } // end namespace
255 
256 
main(const int argc,const char * const argv[])257 int main( const int argc, const char * const argv[] )
258   {
259   enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt };
260   int format_index = -1;
261   int recursive = 0;			// 1 = '-r', 2 = '-R'
262   std::list< std::string > filenames;
263   Cat_options cat_options;
264   program_name = "zcat";
265   invocation_name = ( argc > 0 ) ? argv[0] : program_name;
266 
267   const Arg_parser::Option options[] =
268     {
269     { 'A', "show-all",              Arg_parser::no  },	// cat
270     { 'b', "number-nonblank",       Arg_parser::no  },	// cat
271     { 'c', "stdout",                Arg_parser::no  },	// gzip
272     { 'd', "decompress",            Arg_parser::no  },	// gzip
273     { 'e',  0,                      Arg_parser::no  },	// cat
274     { 'E', "show-ends",             Arg_parser::no  },	// cat
275     { 'f', "force",                 Arg_parser::no  },	// gzip
276     { 'h', "help",                  Arg_parser::no  },
277     { 'l', "list",                  Arg_parser::no  },	// gzip
278     { 'L', "license",               Arg_parser::no  },	// gzip
279     { 'M', "format",                Arg_parser::yes },
280     { 'n', "number",                Arg_parser::no  },	// cat
281     { 'N', "no-rcfile",             Arg_parser::no  },
282     { 'O', "force-format",          Arg_parser::yes },
283     { 'q', "quiet",                 Arg_parser::no  },
284     { 'r', "recursive",             Arg_parser::no  },
285     { 'R', "dereference-recursive", Arg_parser::no  },
286     { 's', "squeeze-blank",         Arg_parser::no  },	// cat
287     { 't',  0,                      Arg_parser::no  },	// cat
288     { 'T', "show-tabs",             Arg_parser::no  },	// cat
289     { 'v', "show-nonprinting",      Arg_parser::no  },	// cat
290     { 'V', "version",               Arg_parser::no  },
291     { verbose_opt, "verbose",       Arg_parser::no  },
292     { bz2_opt,     "bz2",           Arg_parser::yes },
293     { gz_opt,      "gz",            Arg_parser::yes },
294     { lz_opt,      "lz",            Arg_parser::yes },
295     { xz_opt,      "xz",            Arg_parser::yes },
296     {  0 ,  0,                      Arg_parser::no  } };
297 
298   const Arg_parser parser( argc, argv, options );
299   if( parser.error().size() )				// bad option
300     { show_error( parser.error().c_str(), 0, true ); return 1; }
301 
302   maybe_process_config_file( parser );
303 
304   int argind = 0;
305   for( ; argind < parser.arguments(); ++argind )
306     {
307     const int code = parser.code( argind );
308     if( !code ) break;					// no more options
309     const std::string & arg = parser.argument( argind );
310     switch( code )
311       {
312       case 'A': cat_options.show_ends = true;
313                 cat_options.show_nonprinting = true;
314                 cat_options.show_tabs = true; break;
315       case 'b': cat_options.number_lines = 1; break;
316       case 'c': break;
317       case 'd': break;
318       case 'e': cat_options.show_nonprinting = true;	// fall through
319       case 'E': cat_options.show_ends = true; break;
320       case 'f': break;
321       case 'h': show_help(); return 0;
322       case 'l': break;
323       case 'L': break;
324       case 'M': parse_format_list( arg ); break;
325       case 'n': if( cat_options.number_lines == 0 )
326                   { cat_options.number_lines = 2; } break;
327       case 'N': break;
328       case 'O': format_index = parse_format_type( arg ); break;
329       case 'q': verbosity = -1; break;
330       case 'r': recursive = 1; break;
331       case 'R': recursive = 2; break;
332       case 's': cat_options.squeeze_blank = true; break;
333       case 't': cat_options.show_nonprinting = true;	// fall through
334       case 'T': cat_options.show_tabs = true; break;
335       case 'v': cat_options.show_nonprinting = true; break;
336       case 'V': show_version(); return 0;
337       case verbose_opt: if( verbosity < 4 ) ++verbosity; break;
338       case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break;
339       case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break;
340       case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break;
341       case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break;
342       default : internal_error( "uncaught option." );
343       }
344     } // end process options
345 
346 #if defined(__MSVCRT__) || defined(__OS2__)
347   setmode( STDIN_FILENO, O_BINARY );
348   setmode( STDOUT_FILENO, O_BINARY );
349 #endif
350 
351   for( ; argind < parser.arguments(); ++argind )
352     filenames.push_back( parser.argument( argind ) );
353 
354   if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" );
355 
356   std::string input_filename;
357   bool error = false;
358   bool stdin_used = false;
359   while( next_filename( filenames, input_filename, error, recursive ) )
360     {
361     int infd;
362     if( input_filename == "." )
363       {
364       if( stdin_used ) continue; else stdin_used = true;
365       infd = STDIN_FILENO; input_filename = "-";
366       }
367     else
368       {
369       infd = open_instream( input_filename, format_index < 0 );
370       if( infd < 0 ) { error = true; continue; }
371       }
372 
373     if( !cat( infd, format_index, input_filename, cat_options ) ) error = true;
374 
375     if( close( infd ) != 0 )
376       { show_file_error( input_filename.c_str(), "Error closing input file",
377                          errno ); error = true; }
378     }
379 
380   if( std::fclose( stdout ) != 0 )
381     {
382     show_error( "Error closing stdout", errno );
383     error = true;
384     }
385   return error;
386   }
387