1 /* Zcat - decompress and concatenate files to standard output
2 Copyright (C) 2010-2021 Antonio Diaz Diaz.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #define _FILE_OFFSET_BITS 64
19
20 #include <cerrno>
21 #include <climits>
22 #include <csignal>
23 #include <cstdio>
24 #include <cstdlib>
25 #include <cstring>
26 #include <list>
27 #include <string>
28 #include <vector>
29 #include <dirent.h>
30 #include <fcntl.h>
31 #include <stdint.h>
32 #include <unistd.h>
33 #include <sys/stat.h>
34 #if defined(__MSVCRT__) || defined(__OS2__)
35 #include <io.h>
36 #endif
37
38 #include "arg_parser.h"
39 #include "rc.h"
40 #include "zutils.h"
41
42
43 namespace {
44
45 #include "recursive.cc"
46 #include "zcatgrep.cc"
47
48 struct Cat_options
49 {
50 int number_lines; // 0 = no, 1 = nonblank, 2 = all
51 bool show_ends;
52 bool show_nonprinting;
53 bool show_tabs;
54 bool squeeze_blank;
55
Cat_options__anon054e6c870111::Cat_options56 Cat_options()
57 : number_lines( 0 ), show_ends( false ), show_nonprinting( false ),
58 show_tabs( false ), squeeze_blank( false ) {}
59 };
60
61
62 class Line_number // unlimited size line counter
63 {
64 std::string str;
65 unsigned first_digit_pos;
66
67 public:
Line_number()68 Line_number() : str( " 0\t" ), first_digit_pos( 5 ) {}
69
next()70 void next()
71 {
72 for( unsigned i = str.size() - 1; i > first_digit_pos; )
73 {
74 if( str[--i] < '9' ) { ++str[i]; return; }
75 str[i] = '0';
76 }
77 if( first_digit_pos > 0 ) str[--first_digit_pos] = '1';
78 else str.insert( str.begin() + first_digit_pos, '1' );
79 }
80
sprint(uint8_t * const buf)81 int sprint( uint8_t * const buf )
82 {
83 std::memcpy( buf, str.c_str(), str.size() );
84 return str.size();
85 }
86 };
87
88 Line_number line_number;
89
90
show_help()91 void show_help()
92 {
93 std::printf( "zcat copies each file argument to standard output in sequence. If any\n"
94 "file given is compressed, its decompressed content is copied. If a file\n"
95 "given does not exist, and its name does not end with one of the known\n"
96 "extensions, zcat tries the compressed file names corresponding to the\n"
97 "formats supported. If a file fails to decompress, zcat continues copying the\n"
98 "rest of the files.\n"
99 "\nIf a file is specified as '-', data are read from standard input,\n"
100 "decompressed if needed, and sent to standard output. Data read from\n"
101 "standard input must be of the same type; all uncompressed or all in the\n"
102 "same compressed format.\n"
103 "\nIf no files are specified, recursive searches examine the current\n"
104 "working directory, and nonrecursive searches read standard input.\n"
105 "\nThe formats supported are bzip2, gzip, lzip, and xz.\n"
106 "\nUsage: zcat [options] [files]\n"
107 "\nExit status is 0 if no errors occurred, 1 otherwise.\n"
108 "\nOptions:\n"
109 " -h, --help display this help and exit\n"
110 " -V, --version output version information and exit\n"
111 " -A, --show-all equivalent to '-vET'\n"
112 " -b, --number-nonblank number nonblank output lines\n"
113 " -e equivalent to '-vE'\n"
114 " -E, --show-ends display '$' at end of each line\n"
115 " -M, --format=<list> process only the formats in <list>\n"
116 " -n, --number number all output lines\n"
117 " -N, --no-rcfile don't read runtime configuration file\n"
118 " -O, --force-format=<fmt> force the format given (bz2, gz, lz, xz)\n"
119 " -q, --quiet suppress all messages\n"
120 " -r, --recursive operate recursively on directories\n"
121 " -R, --dereference-recursive recursively follow symbolic links\n"
122 " -s, --squeeze-blank never more than one single blank line\n"
123 " -t equivalent to '-vT'\n"
124 " -T, --show-tabs display TAB characters as '^I'\n"
125 " -v, --show-nonprinting use '^' and 'M-' notation, except for LF and TAB\n"
126 " --verbose verbose mode (show error messages)\n"
127 " --bz2=<command> set compressor and options for bzip2 format\n"
128 " --gz=<command> set compressor and options for gzip format\n"
129 " --lz=<command> set compressor and options for lzip format\n"
130 " --xz=<command> set compressor and options for xz format\n" );
131 show_help_addr();
132 }
133
134
do_cat(const int infd,const int buffer_size,uint8_t * const inbuf,uint8_t * const outbuf,const std::string & input_filename,const Cat_options & cat_options)135 bool do_cat( const int infd, const int buffer_size,
136 uint8_t * const inbuf, uint8_t * const outbuf,
137 const std::string & input_filename,
138 const Cat_options & cat_options )
139 {
140 static int at_bol = 1; // at begin of line. 0 = false, 1 = true,
141 // 2 = at begin of second blank line.
142 int inpos = 0; // positions in buffers
143 int outpos = 0;
144 int rd = -1; // bytes read by the last readblock
145 unsigned char c;
146
147 while( true )
148 {
149 do {
150 if( outpos >= buffer_size )
151 {
152 if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
153 { show_error( "Write error", errno ); return false; }
154 outpos = 0;
155 }
156 if( inpos > rd ) // inbuf is empty
157 {
158 rd = readblock( infd, inbuf, buffer_size );
159 if( rd != buffer_size && errno )
160 {
161 show_file_error( input_filename.c_str(), "Read error", errno );
162 return false;
163 }
164 if( rd == 0 )
165 {
166 if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
167 { show_error( "Write error", errno ); return false; }
168 outpos = 0;
169 return true;
170 }
171 inpos = 0;
172 inbuf[rd] = '\n'; // sentinel newline
173 }
174 else // a real newline was found
175 {
176 if( at_bol > 1 )
177 {
178 if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; }
179 }
180 else ++at_bol;
181 if( at_bol > 1 && cat_options.number_lines == 2 )
182 {
183 line_number.next();
184 outpos += line_number.sprint( &outbuf[outpos] );
185 }
186 if( cat_options.show_ends ) outbuf[outpos++] = '$';
187 outbuf[outpos++] = '\n'; // output the newline
188 }
189 c = inbuf[inpos++];
190 }
191 while( c == '\n' );
192
193 if( at_bol > 0 && cat_options.number_lines )
194 {
195 line_number.next();
196 outpos += line_number.sprint( &outbuf[outpos] );
197 }
198 at_bol = 0;
199
200 // the loops below continue until a newline (real or sentinel) is found
201
202 if( cat_options.show_nonprinting )
203 while( true )
204 {
205 if( c < 32 || c >= 127 )
206 {
207 if( c == '\n' ) break;
208 if( c != '\t' || cat_options.show_tabs )
209 {
210 if( c >= 128 )
211 { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; }
212 if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; }
213 else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; }
214 }
215 }
216 outbuf[outpos++] = c;
217 c = inbuf[inpos++];
218 }
219 else // not quoting
220 while( c != '\n' )
221 {
222 if( c == '\t' && cat_options.show_tabs )
223 { c += 64; outbuf[outpos++] = '^'; }
224 outbuf[outpos++] = c;
225 c = inbuf[inpos++];
226 }
227 }
228 }
229
230
cat(int infd,const int format_index,const std::string & input_filename,const Cat_options & cat_options)231 bool cat( int infd, const int format_index, const std::string & input_filename,
232 const Cat_options & cat_options )
233 {
234 enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 };
235 // input buffer with space for sentinel newline at the end
236 uint8_t * const inbuf = new uint8_t[buffer_size+1];
237 // output buffer with space for character quoting, 255-digit line number,
238 // worst case flushing respect to inbuf, and a canary byte.
239 uint8_t * const outbuf = new uint8_t[outbuf_size];
240 outbuf[outbuf_size-1] = 0;
241 Children children;
242 bool error = false;
243
244 if( !set_data_feeder( input_filename, &infd, children, format_index ) ||
245 !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) )
246 error = true;
247 if( !good_status( children, !error ) ) error = true;
248 if( !error && close( infd ) != 0 ) { show_close_error(); error = true; }
249 if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." );
250 delete[] outbuf; delete[] inbuf;
251 return !error;
252 }
253
254 } // end namespace
255
256
main(const int argc,const char * const argv[])257 int main( const int argc, const char * const argv[] )
258 {
259 enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt };
260 int format_index = -1;
261 int recursive = 0; // 1 = '-r', 2 = '-R'
262 std::list< std::string > filenames;
263 Cat_options cat_options;
264 program_name = "zcat";
265 invocation_name = ( argc > 0 ) ? argv[0] : program_name;
266
267 const Arg_parser::Option options[] =
268 {
269 { 'A', "show-all", Arg_parser::no }, // cat
270 { 'b', "number-nonblank", Arg_parser::no }, // cat
271 { 'c', "stdout", Arg_parser::no }, // gzip
272 { 'd', "decompress", Arg_parser::no }, // gzip
273 { 'e', 0, Arg_parser::no }, // cat
274 { 'E', "show-ends", Arg_parser::no }, // cat
275 { 'f', "force", Arg_parser::no }, // gzip
276 { 'h', "help", Arg_parser::no },
277 { 'l', "list", Arg_parser::no }, // gzip
278 { 'L', "license", Arg_parser::no }, // gzip
279 { 'M', "format", Arg_parser::yes },
280 { 'n', "number", Arg_parser::no }, // cat
281 { 'N', "no-rcfile", Arg_parser::no },
282 { 'O', "force-format", Arg_parser::yes },
283 { 'q', "quiet", Arg_parser::no },
284 { 'r', "recursive", Arg_parser::no },
285 { 'R', "dereference-recursive", Arg_parser::no },
286 { 's', "squeeze-blank", Arg_parser::no }, // cat
287 { 't', 0, Arg_parser::no }, // cat
288 { 'T', "show-tabs", Arg_parser::no }, // cat
289 { 'v', "show-nonprinting", Arg_parser::no }, // cat
290 { 'V', "version", Arg_parser::no },
291 { verbose_opt, "verbose", Arg_parser::no },
292 { bz2_opt, "bz2", Arg_parser::yes },
293 { gz_opt, "gz", Arg_parser::yes },
294 { lz_opt, "lz", Arg_parser::yes },
295 { xz_opt, "xz", Arg_parser::yes },
296 { 0 , 0, Arg_parser::no } };
297
298 const Arg_parser parser( argc, argv, options );
299 if( parser.error().size() ) // bad option
300 { show_error( parser.error().c_str(), 0, true ); return 1; }
301
302 maybe_process_config_file( parser );
303
304 int argind = 0;
305 for( ; argind < parser.arguments(); ++argind )
306 {
307 const int code = parser.code( argind );
308 if( !code ) break; // no more options
309 const std::string & arg = parser.argument( argind );
310 switch( code )
311 {
312 case 'A': cat_options.show_ends = true;
313 cat_options.show_nonprinting = true;
314 cat_options.show_tabs = true; break;
315 case 'b': cat_options.number_lines = 1; break;
316 case 'c': break;
317 case 'd': break;
318 case 'e': cat_options.show_nonprinting = true; // fall through
319 case 'E': cat_options.show_ends = true; break;
320 case 'f': break;
321 case 'h': show_help(); return 0;
322 case 'l': break;
323 case 'L': break;
324 case 'M': parse_format_list( arg ); break;
325 case 'n': if( cat_options.number_lines == 0 )
326 { cat_options.number_lines = 2; } break;
327 case 'N': break;
328 case 'O': format_index = parse_format_type( arg ); break;
329 case 'q': verbosity = -1; break;
330 case 'r': recursive = 1; break;
331 case 'R': recursive = 2; break;
332 case 's': cat_options.squeeze_blank = true; break;
333 case 't': cat_options.show_nonprinting = true; // fall through
334 case 'T': cat_options.show_tabs = true; break;
335 case 'v': cat_options.show_nonprinting = true; break;
336 case 'V': show_version(); return 0;
337 case verbose_opt: if( verbosity < 4 ) ++verbosity; break;
338 case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break;
339 case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break;
340 case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break;
341 case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break;
342 default : internal_error( "uncaught option." );
343 }
344 } // end process options
345
346 #if defined(__MSVCRT__) || defined(__OS2__)
347 setmode( STDIN_FILENO, O_BINARY );
348 setmode( STDOUT_FILENO, O_BINARY );
349 #endif
350
351 for( ; argind < parser.arguments(); ++argind )
352 filenames.push_back( parser.argument( argind ) );
353
354 if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" );
355
356 std::string input_filename;
357 bool error = false;
358 bool stdin_used = false;
359 while( next_filename( filenames, input_filename, error, recursive ) )
360 {
361 int infd;
362 if( input_filename == "." )
363 {
364 if( stdin_used ) continue; else stdin_used = true;
365 infd = STDIN_FILENO; input_filename = "-";
366 }
367 else
368 {
369 infd = open_instream( input_filename, format_index < 0 );
370 if( infd < 0 ) { error = true; continue; }
371 }
372
373 if( !cat( infd, format_index, input_filename, cat_options ) ) error = true;
374
375 if( close( infd ) != 0 )
376 { show_file_error( input_filename.c_str(), "Error closing input file",
377 errno ); error = true; }
378 }
379
380 if( std::fclose( stdout ) != 0 )
381 {
382 show_error( "Error closing stdout", errno );
383 error = true;
384 }
385 return error;
386 }
387