1 /* cat -- concatenate files and print on the standard output.
2    Copyright (C) 1988-2020 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Differences from the Unix cat:
18    * Always unbuffered, -u is ignored.
19    * Usually much faster than other versions of cat, the difference
20    is especially apparent when using the -v option.
21 
22    By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman.  */
23 
24 #include <config.h>
25 
26 #include <stdio.h>
27 #include <getopt.h>
28 #include <sys/types.h>
29 
30 #if HAVE_STROPTS_H
31 # include <stropts.h>
32 #endif
33 #include <sys/ioctl.h>
34 
35 #include "system.h"
36 #include "ioblksize.h"
37 #include "die.h"
38 #include "error.h"
39 #include "fadvise.h"
40 #include "full-write.h"
41 #include "safe-read.h"
42 #include "xbinary-io.h"
43 
44 /* The official name of this program (e.g., no 'g' prefix).  */
45 #define PROGRAM_NAME "cat"
46 
47 #define AUTHORS \
48   proper_name ("Torbjorn Granlund"), \
49   proper_name ("Richard M. Stallman")
50 
51 /* Name of input file.  May be "-".  */
52 static char const *infile;
53 
54 /* Descriptor on which input file is open.  */
55 static int input_desc;
56 
57 /* Buffer for line numbers.
58    An 11 digit counter may overflow within an hour on a P2/466,
59    an 18 digit counter needs about 1000y */
60 #define LINE_COUNTER_BUF_LEN 20
61 static char line_buf[LINE_COUNTER_BUF_LEN] =
62   {
63     ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
64     ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
65     '\t', '\0'
66   };
67 
68 /* Position in 'line_buf' where printing starts.  This will not change
69    unless the number of lines is larger than 999999.  */
70 static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;
71 
72 /* Position of the first digit in 'line_buf'.  */
73 static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;
74 
75 /* Position of the last digit in 'line_buf'.  */
76 static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
77 
78 /* Preserves the 'cat' function's local 'newlines' between invocations.  */
79 static int newlines2 = 0;
80 
81 void
usage(int status)82 usage (int status)
83 {
84   if (status != EXIT_SUCCESS)
85     emit_try_help ();
86   else
87     {
88       printf (_("\
89 Usage: %s [OPTION]... [FILE]...\n\
90 "),
91               program_name);
92       fputs (_("\
93 Concatenate FILE(s) to standard output.\n\
94 "), stdout);
95 
96       emit_stdin_note ();
97 
98       fputs (_("\
99 \n\
100   -A, --show-all           equivalent to -vET\n\
101   -b, --number-nonblank    number nonempty output lines, overrides -n\n\
102   -e                       equivalent to -vE\n\
103   -E, --show-ends          display $ at end of each line\n\
104   -n, --number             number all output lines\n\
105   -s, --squeeze-blank      suppress repeated empty output lines\n\
106 "), stdout);
107       fputs (_("\
108   -t                       equivalent to -vT\n\
109   -T, --show-tabs          display TAB characters as ^I\n\
110   -u                       (ignored)\n\
111   -v, --show-nonprinting   use ^ and M- notation, except for LFD and TAB\n\
112 "), stdout);
113       fputs (HELP_OPTION_DESCRIPTION, stdout);
114       fputs (VERSION_OPTION_DESCRIPTION, stdout);
115       printf (_("\
116 \n\
117 Examples:\n\
118   %s f - g  Output f's contents, then standard input, then g's contents.\n\
119   %s        Copy standard input to standard output.\n\
120 "),
121               program_name, program_name);
122       emit_ancillary_info (PROGRAM_NAME);
123     }
124   exit (status);
125 }
126 
127 /* Compute the next line number.  */
128 
129 static void
next_line_num(void)130 next_line_num (void)
131 {
132   char *endp = line_num_end;
133   do
134     {
135       if ((*endp)++ < '9')
136         return;
137       *endp-- = '0';
138     }
139   while (endp >= line_num_start);
140   if (line_num_start > line_buf)
141     *--line_num_start = '1';
142   else
143     *line_buf = '>';
144   if (line_num_start < line_num_print)
145     line_num_print--;
146 }
147 
148 /* Plain cat.  Copies the file behind 'input_desc' to STDOUT_FILENO.
149    Return true if successful.  */
150 
151 static bool
simple_cat(char * buf,size_t bufsize)152 simple_cat (
153      /* Pointer to the buffer, used by reads and writes.  */
154      char *buf,
155 
156      /* Number of characters preferably read or written by each read and write
157         call.  */
158      size_t bufsize)
159 {
160   /* Actual number of characters read, and therefore written.  */
161   size_t n_read;
162 
163   /* Loop until the end of the file.  */
164 
165   while (true)
166     {
167       /* Read a block of input.  */
168 
169       n_read = safe_read (input_desc, buf, bufsize);
170       if (n_read == SAFE_READ_ERROR)
171         {
172           error (0, errno, "%s", quotef (infile));
173           return false;
174         }
175 
176       /* End of this file?  */
177 
178       if (n_read == 0)
179         return true;
180 
181       /* Write this block out.  */
182 
183       {
184         /* The following is ok, since we know that 0 < n_read.  */
185         size_t n = n_read;
186         if (full_write (STDOUT_FILENO, buf, n) != n)
187           die (EXIT_FAILURE, errno, _("write error"));
188       }
189     }
190 }
191 
192 /* Write any pending output to STDOUT_FILENO.
193    Pending is defined to be the *BPOUT - OUTBUF bytes starting at OUTBUF.
194    Then set *BPOUT to OUTPUT if it's not already that value.  */
195 
196 static inline void
write_pending(char * outbuf,char ** bpout)197 write_pending (char *outbuf, char **bpout)
198 {
199   size_t n_write = *bpout - outbuf;
200   if (0 < n_write)
201     {
202       if (full_write (STDOUT_FILENO, outbuf, n_write) != n_write)
203         die (EXIT_FAILURE, errno, _("write error"));
204       *bpout = outbuf;
205     }
206 }
207 
208 /* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
209    Return true if successful.
210    Called if any option more than -u was specified.
211 
212    A newline character is always put at the end of the buffer, to make
213    an explicit test for buffer end unnecessary.  */
214 
215 static bool
cat(char * inbuf,size_t insize,char * outbuf,size_t outsize,bool show_nonprinting,bool show_tabs,bool number,bool number_nonblank,bool show_ends,bool squeeze_blank)216 cat (
217      /* Pointer to the beginning of the input buffer.  */
218      char *inbuf,
219 
220      /* Number of characters read in each read call.  */
221      size_t insize,
222 
223      /* Pointer to the beginning of the output buffer.  */
224      char *outbuf,
225 
226      /* Number of characters written by each write call.  */
227      size_t outsize,
228 
229      /* Variables that have values according to the specified options.  */
230      bool show_nonprinting,
231      bool show_tabs,
232      bool number,
233      bool number_nonblank,
234      bool show_ends,
235      bool squeeze_blank)
236 {
237   /* Last character read from the input buffer.  */
238   unsigned char ch;
239 
240   /* Pointer to the next character in the input buffer.  */
241   char *bpin;
242 
243   /* Pointer to the first non-valid byte in the input buffer, i.e., the
244      current end of the buffer.  */
245   char *eob;
246 
247   /* Pointer to the position where the next character shall be written.  */
248   char *bpout;
249 
250   /* Number of characters read by the last read call.  */
251   size_t n_read;
252 
253   /* Determines how many consecutive newlines there have been in the
254      input.  0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
255      etc.  Initially 0 to indicate that we are at the beginning of a
256      new line.  The "state" of the procedure is determined by
257      NEWLINES.  */
258   int newlines = newlines2;
259 
260 #ifdef FIONREAD
261   /* If nonzero, use the FIONREAD ioctl, as an optimization.
262      (On Ultrix, it is not supported on NFS file systems.)  */
263   bool use_fionread = true;
264 #endif
265 
266   /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
267      is read immediately.  */
268 
269   eob = inbuf;
270   bpin = eob + 1;
271 
272   bpout = outbuf;
273 
274   while (true)
275     {
276       do
277         {
278           /* Write if there are at least OUTSIZE bytes in OUTBUF.  */
279 
280           if (outbuf + outsize <= bpout)
281             {
282               char *wp = outbuf;
283               size_t remaining_bytes;
284               do
285                 {
286                   if (full_write (STDOUT_FILENO, wp, outsize) != outsize)
287                     die (EXIT_FAILURE, errno, _("write error"));
288                   wp += outsize;
289                   remaining_bytes = bpout - wp;
290                 }
291               while (outsize <= remaining_bytes);
292 
293               /* Move the remaining bytes to the beginning of the
294                  buffer.  */
295 
296               memmove (outbuf, wp, remaining_bytes);
297               bpout = outbuf + remaining_bytes;
298             }
299 
300           /* Is INBUF empty?  */
301 
302           if (bpin > eob)
303             {
304               bool input_pending = false;
305 #ifdef FIONREAD
306               int n_to_read = 0;
307 
308               /* Is there any input to read immediately?
309                  If not, we are about to wait,
310                  so write all buffered output before waiting.  */
311 
312               if (use_fionread
313                   && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
314                 {
315                   /* Ultrix returns EOPNOTSUPP on NFS;
316                      HP-UX returns ENOTTY on pipes.
317                      SunOS returns EINVAL and
318                      More/BSD returns ENODEV on special files
319                      like /dev/null.
320                      Irix-5 returns ENOSYS on pipes.  */
321                   if (errno == EOPNOTSUPP || errno == ENOTTY
322                       || errno == EINVAL || errno == ENODEV
323                       || errno == ENOSYS)
324                     use_fionread = false;
325                   else
326                     {
327                       error (0, errno, _("cannot do ioctl on %s"),
328                              quoteaf (infile));
329                       newlines2 = newlines;
330                       return false;
331                     }
332                 }
333               if (n_to_read != 0)
334                 input_pending = true;
335 #endif
336 
337               if (!input_pending)
338                 write_pending (outbuf, &bpout);
339 
340               /* Read more input into INBUF.  */
341 
342               n_read = safe_read (input_desc, inbuf, insize);
343               if (n_read == SAFE_READ_ERROR)
344                 {
345                   error (0, errno, "%s", quotef (infile));
346                   write_pending (outbuf, &bpout);
347                   newlines2 = newlines;
348                   return false;
349                 }
350               if (n_read == 0)
351                 {
352                   write_pending (outbuf, &bpout);
353                   newlines2 = newlines;
354                   return true;
355                 }
356 
357               /* Update the pointers and insert a sentinel at the buffer
358                  end.  */
359 
360               bpin = inbuf;
361               eob = bpin + n_read;
362               *eob = '\n';
363             }
364           else
365             {
366               /* It was a real (not a sentinel) newline.  */
367 
368               /* Was the last line empty?
369                  (i.e., have two or more consecutive newlines been read?)  */
370 
371               if (++newlines > 0)
372                 {
373                   if (newlines >= 2)
374                     {
375                       /* Limit this to 2 here.  Otherwise, with lots of
376                          consecutive newlines, the counter could wrap
377                          around at INT_MAX.  */
378                       newlines = 2;
379 
380                       /* Are multiple adjacent empty lines to be substituted
381                          by single ditto (-s), and this was the second empty
382                          line?  */
383                       if (squeeze_blank)
384                         {
385                           ch = *bpin++;
386                           continue;
387                         }
388                     }
389 
390                   /* Are line numbers to be written at empty lines (-n)?  */
391 
392                   if (number && !number_nonblank)
393                     {
394                       next_line_num ();
395                       bpout = stpcpy (bpout, line_num_print);
396                     }
397                 }
398 
399               /* Output a currency symbol if requested (-e).  */
400 
401               if (show_ends)
402                 *bpout++ = '$';
403 
404               /* Output the newline.  */
405 
406               *bpout++ = '\n';
407             }
408           ch = *bpin++;
409         }
410       while (ch == '\n');
411 
412       /* Are we at the beginning of a line, and line numbers are requested?  */
413 
414       if (newlines >= 0 && number)
415         {
416           next_line_num ();
417           bpout = stpcpy (bpout, line_num_print);
418         }
419 
420       /* Here CH cannot contain a newline character.  */
421 
422       /* The loops below continue until a newline character is found,
423          which means that the buffer is empty or that a proper newline
424          has been found.  */
425 
426       /* If quoting, i.e., at least one of -v, -e, or -t specified,
427          scan for chars that need conversion.  */
428       if (show_nonprinting)
429         {
430           while (true)
431             {
432               if (ch >= 32)
433                 {
434                   if (ch < 127)
435                     *bpout++ = ch;
436                   else if (ch == 127)
437                     {
438                       *bpout++ = '^';
439                       *bpout++ = '?';
440                     }
441                   else
442                     {
443                       *bpout++ = 'M';
444                       *bpout++ = '-';
445                       if (ch >= 128 + 32)
446                         {
447                           if (ch < 128 + 127)
448                             *bpout++ = ch - 128;
449                           else
450                             {
451                               *bpout++ = '^';
452                               *bpout++ = '?';
453                             }
454                         }
455                       else
456                         {
457                           *bpout++ = '^';
458                           *bpout++ = ch - 128 + 64;
459                         }
460                     }
461                 }
462               else if (ch == '\t' && !show_tabs)
463                 *bpout++ = '\t';
464               else if (ch == '\n')
465                 {
466                   newlines = -1;
467                   break;
468                 }
469               else
470                 {
471                   *bpout++ = '^';
472                   *bpout++ = ch + 64;
473                 }
474 
475               ch = *bpin++;
476             }
477         }
478       else
479         {
480           /* Not quoting, neither of -v, -e, or -t specified.  */
481           while (true)
482             {
483               if (ch == '\t' && show_tabs)
484                 {
485                   *bpout++ = '^';
486                   *bpout++ = ch + 64;
487                 }
488               else if (ch != '\n')
489                 *bpout++ = ch;
490               else
491                 {
492                   newlines = -1;
493                   break;
494                 }
495 
496               ch = *bpin++;
497             }
498         }
499     }
500 }
501 
502 int
main(int argc,char ** argv)503 main (int argc, char **argv)
504 {
505   /* Optimal size of i/o operations of output.  */
506   size_t outsize;
507 
508   /* Optimal size of i/o operations of input.  */
509   size_t insize;
510 
511   size_t page_size = getpagesize ();
512 
513   /* Pointer to the input buffer.  */
514   char *inbuf;
515 
516   /* Pointer to the output buffer.  */
517   char *outbuf;
518 
519   bool ok = true;
520   int c;
521 
522   /* Index in argv to processed argument.  */
523   int argind;
524 
525   /* Device number of the output (file or whatever).  */
526   dev_t out_dev;
527 
528   /* I-node number of the output.  */
529   ino_t out_ino;
530 
531   /* True if the output is a regular file.  */
532   bool out_isreg;
533 
534   /* Nonzero if we have ever read standard input.  */
535   bool have_read_stdin = false;
536 
537   struct stat stat_buf;
538 
539   /* Variables that are set according to the specified options.  */
540   bool number = false;
541   bool number_nonblank = false;
542   bool squeeze_blank = false;
543   bool show_ends = false;
544   bool show_nonprinting = false;
545   bool show_tabs = false;
546   int file_open_mode = O_RDONLY;
547 
548   static struct option const long_options[] =
549   {
550     {"number-nonblank", no_argument, NULL, 'b'},
551     {"number", no_argument, NULL, 'n'},
552     {"squeeze-blank", no_argument, NULL, 's'},
553     {"show-nonprinting", no_argument, NULL, 'v'},
554     {"show-ends", no_argument, NULL, 'E'},
555     {"show-tabs", no_argument, NULL, 'T'},
556     {"show-all", no_argument, NULL, 'A'},
557     {GETOPT_HELP_OPTION_DECL},
558     {GETOPT_VERSION_OPTION_DECL},
559     {NULL, 0, NULL, 0}
560   };
561 
562   initialize_main (&argc, &argv);
563   set_program_name (argv[0]);
564   setlocale (LC_ALL, "");
565   bindtextdomain (PACKAGE, LOCALEDIR);
566   textdomain (PACKAGE);
567 
568   /* Arrange to close stdout if we exit via the
569      case_GETOPT_HELP_CHAR or case_GETOPT_VERSION_CHAR code.
570      Normally STDOUT_FILENO is used rather than stdout, so
571      close_stdout does nothing.  */
572   atexit (close_stdout);
573 
574   /* Parse command line options.  */
575 
576   while ((c = getopt_long (argc, argv, "benstuvAET", long_options, NULL))
577          != -1)
578     {
579       switch (c)
580         {
581         case 'b':
582           number = true;
583           number_nonblank = true;
584           break;
585 
586         case 'e':
587           show_ends = true;
588           show_nonprinting = true;
589           break;
590 
591         case 'n':
592           number = true;
593           break;
594 
595         case 's':
596           squeeze_blank = true;
597           break;
598 
599         case 't':
600           show_tabs = true;
601           show_nonprinting = true;
602           break;
603 
604         case 'u':
605           /* We provide the -u feature unconditionally.  */
606           break;
607 
608         case 'v':
609           show_nonprinting = true;
610           break;
611 
612         case 'A':
613           show_nonprinting = true;
614           show_ends = true;
615           show_tabs = true;
616           break;
617 
618         case 'E':
619           show_ends = true;
620           break;
621 
622         case 'T':
623           show_tabs = true;
624           break;
625 
626         case_GETOPT_HELP_CHAR;
627 
628         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
629 
630         default:
631           usage (EXIT_FAILURE);
632         }
633     }
634 
635   /* Get device, i-node number, and optimal blocksize of output.  */
636 
637   if (fstat (STDOUT_FILENO, &stat_buf) < 0)
638     die (EXIT_FAILURE, errno, _("standard output"));
639 
640   outsize = io_blksize (stat_buf);
641   out_dev = stat_buf.st_dev;
642   out_ino = stat_buf.st_ino;
643   out_isreg = S_ISREG (stat_buf.st_mode) != 0;
644 
645   if (! (number || show_ends || squeeze_blank))
646     {
647       file_open_mode |= O_BINARY;
648       xset_binary_mode (STDOUT_FILENO, O_BINARY);
649     }
650 
651   /* Check if any of the input files are the same as the output file.  */
652 
653   /* Main loop.  */
654 
655   infile = "-";
656   argind = optind;
657 
658   do
659     {
660       if (argind < argc)
661         infile = argv[argind];
662 
663       if (STREQ (infile, "-"))
664         {
665           have_read_stdin = true;
666           input_desc = STDIN_FILENO;
667           if (file_open_mode & O_BINARY)
668             xset_binary_mode (STDIN_FILENO, O_BINARY);
669         }
670       else
671         {
672           input_desc = open (infile, file_open_mode);
673           if (input_desc < 0)
674             {
675               error (0, errno, "%s", quotef (infile));
676               ok = false;
677               continue;
678             }
679         }
680 
681       if (fstat (input_desc, &stat_buf) < 0)
682         {
683           error (0, errno, "%s", quotef (infile));
684           ok = false;
685           goto contin;
686         }
687       insize = io_blksize (stat_buf);
688 
689       fdadvise (input_desc, 0, 0, FADVISE_SEQUENTIAL);
690 
691       /* Don't copy a nonempty regular file to itself, as that would
692          merely exhaust the output device.  It's better to catch this
693          error earlier rather than later.  */
694 
695       if (out_isreg
696           && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino
697           && lseek (input_desc, 0, SEEK_CUR) < stat_buf.st_size)
698         {
699           error (0, 0, _("%s: input file is output file"), quotef (infile));
700           ok = false;
701           goto contin;
702         }
703 
704       /* Select which version of 'cat' to use.  If any format-oriented
705          options were given use 'cat'; otherwise use 'simple_cat'.  */
706 
707       if (! (number || show_ends || show_nonprinting
708              || show_tabs || squeeze_blank))
709         {
710           insize = MAX (insize, outsize);
711           inbuf = xmalloc (insize + page_size - 1);
712 
713           ok &= simple_cat (ptr_align (inbuf, page_size), insize);
714         }
715       else
716         {
717           inbuf = xmalloc (insize + 1 + page_size - 1);
718 
719           /* Why are
720              (OUTSIZE - 1 + INSIZE * 4 + LINE_COUNTER_BUF_LEN + PAGE_SIZE - 1)
721              bytes allocated for the output buffer?
722 
723              A test whether output needs to be written is done when the input
724              buffer empties or when a newline appears in the input.  After
725              output is written, at most (OUTSIZE - 1) bytes will remain in the
726              buffer.  Now INSIZE bytes of input is read.  Each input character
727              may grow by a factor of 4 (by the prepending of M-^).  If all
728              characters do, and no newlines appear in this block of input, we
729              will have at most (OUTSIZE - 1 + INSIZE * 4) bytes in the buffer.
730              If the last character in the preceding block of input was a
731              newline, a line number may be written (according to the given
732              options) as the first thing in the output buffer. (Done after the
733              new input is read, but before processing of the input begins.)
734              A line number requires seldom more than LINE_COUNTER_BUF_LEN
735              positions.
736 
737              Align the output buffer to a page size boundary, for efficiency
738              on some paging implementations, so add PAGE_SIZE - 1 bytes to the
739              request to make room for the alignment.  */
740 
741           outbuf = xmalloc (outsize - 1 + insize * 4 + LINE_COUNTER_BUF_LEN
742                             + page_size - 1);
743 
744           ok &= cat (ptr_align (inbuf, page_size), insize,
745                      ptr_align (outbuf, page_size), outsize, show_nonprinting,
746                      show_tabs, number, number_nonblank, show_ends,
747                      squeeze_blank);
748 
749           free (outbuf);
750         }
751 
752       free (inbuf);
753 
754     contin:
755       if (!STREQ (infile, "-") && close (input_desc) < 0)
756         {
757           error (0, errno, "%s", quotef (infile));
758           ok = false;
759         }
760     }
761   while (++argind < argc);
762 
763   if (have_read_stdin && close (STDIN_FILENO) < 0)
764     die (EXIT_FAILURE, errno, _("closing standard input"));
765 
766   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
767 }
768