1 /* tac - concatenate and print files in reverse
2    Copyright (C) 1988-2018 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
16 
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18    GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
19 
20 /* Copy each FILE, or the standard input if none are given or when a
21    FILE name of "-" is encountered, to the standard output with the
22    order of the records reversed.  The records are separated by
23    instances of a string, or a newline if none is given.  By default, the
24    separator string is attached to the end of the record that it
25    follows in the file.
26 
27    Options:
28    -b, --before			The separator is attached to the beginning
29                                 of the record that it precedes in the file.
30    -r, --regex			The separator is a regular expression.
31    -s, --separator=separator	Use SEPARATOR as the record separator.
32 
33    To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
36 
37 #include <config.h>
38 
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
43 
44 #include <regex.h>
45 
46 #include "die.h"
47 #include "error.h"
48 #include "filenamecat.h"
49 #include "safe-read.h"
50 #include "stdlib--.h"
51 #include "xbinary-io.h"
52 
53 /* The official name of this program (e.g., no 'g' prefix).  */
54 #define PROGRAM_NAME "tac"
55 
56 #define AUTHORS \
57   proper_name ("Jay Lepreau"), \
58   proper_name ("David MacKenzie")
59 
60 #if defined __MSDOS__ || defined _WIN32
61 /* Define this to non-zero on systems for which the regular mechanism
62    (of unlinking an open file and expecting to be able to write, seek
63    back to the beginning, then reread it) doesn't work.  E.g., on Windows
64    and DOS systems.  */
65 # define DONT_UNLINK_WHILE_OPEN 1
66 #endif
67 
68 
69 #ifndef DEFAULT_TMPDIR
70 # define DEFAULT_TMPDIR "/tmp"
71 #endif
72 
73 /* The number of bytes per atomic read. */
74 #define INITIAL_READSIZE 8192
75 
76 /* The number of bytes per atomic write. */
77 #define WRITESIZE 8192
78 
79 /* The string that separates the records of the file. */
80 static char const *separator;
81 
82 /* True if we have ever read standard input.  */
83 static bool have_read_stdin = false;
84 
85 /* If true, print 'separator' along with the record preceding it
86    in the file; otherwise with the record following it. */
87 static bool separator_ends_record;
88 
89 /* 0 if 'separator' is to be matched as a regular expression;
90    otherwise, the length of 'separator', used as a sentinel to
91    stop the search. */
92 static size_t sentinel_length;
93 
94 /* The length of a match with 'separator'.  If 'sentinel_length' is 0,
95    'match_length' is computed every time a match succeeds;
96    otherwise, it is simply the length of 'separator'. */
97 static size_t match_length;
98 
99 /* The input buffer. */
100 static char *G_buffer;
101 
102 /* The number of bytes to read at once into 'buffer'. */
103 static size_t read_size;
104 
105 /* The size of 'buffer'.  This is read_size * 2 + sentinel_length + 2.
106    The extra 2 bytes allow 'past_end' to have a value beyond the
107    end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
108 static size_t G_buffer_size;
109 
110 /* The compiled regular expression representing 'separator'. */
111 static struct re_pattern_buffer compiled_separator;
112 static char compiled_separator_fastmap[UCHAR_MAX + 1];
113 static struct re_registers regs;
114 
115 static struct option const longopts[] =
116 {
117   {"before", no_argument, NULL, 'b'},
118   {"regex", no_argument, NULL, 'r'},
119   {"separator", required_argument, NULL, 's'},
120   {GETOPT_HELP_OPTION_DECL},
121   {GETOPT_VERSION_OPTION_DECL},
122   {NULL, 0, NULL, 0}
123 };
124 
125 void
usage(int status)126 usage (int status)
127 {
128   if (status != EXIT_SUCCESS)
129     emit_try_help ();
130   else
131     {
132       printf (_("\
133 Usage: %s [OPTION]... [FILE]...\n\
134 "),
135               program_name);
136       fputs (_("\
137 Write each FILE to standard output, last line first.\n\
138 "), stdout);
139 
140       emit_stdin_note ();
141       emit_mandatory_arg_note ();
142 
143       fputs (_("\
144   -b, --before             attach the separator before instead of after\n\
145   -r, --regex              interpret the separator as a regular expression\n\
146   -s, --separator=STRING   use STRING as the separator instead of newline\n\
147 "), stdout);
148       fputs (HELP_OPTION_DESCRIPTION, stdout);
149       fputs (VERSION_OPTION_DESCRIPTION, stdout);
150       emit_ancillary_info (PROGRAM_NAME);
151     }
152   exit (status);
153 }
154 
155 /* Print the characters from START to PAST_END - 1.
156    If START is NULL, just flush the buffer. */
157 
158 static void
output(const char * start,const char * past_end)159 output (const char *start, const char *past_end)
160 {
161   static char buffer[WRITESIZE];
162   static size_t bytes_in_buffer = 0;
163   size_t bytes_to_add = past_end - start;
164   size_t bytes_available = WRITESIZE - bytes_in_buffer;
165 
166   if (start == 0)
167     {
168       fwrite (buffer, 1, bytes_in_buffer, stdout);
169       bytes_in_buffer = 0;
170       return;
171     }
172 
173   /* Write out as many full buffers as possible. */
174   while (bytes_to_add >= bytes_available)
175     {
176       memcpy (buffer + bytes_in_buffer, start, bytes_available);
177       bytes_to_add -= bytes_available;
178       start += bytes_available;
179       fwrite (buffer, 1, WRITESIZE, stdout);
180       bytes_in_buffer = 0;
181       bytes_available = WRITESIZE;
182     }
183 
184   memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
185   bytes_in_buffer += bytes_to_add;
186 }
187 
188 /* Print in reverse the file open on descriptor FD for reading FILE.
189    The file is already positioned at FILE_POS, which should be near its end.
190    Return true if successful.  */
191 
192 static bool
tac_seekable(int input_fd,const char * file,off_t file_pos)193 tac_seekable (int input_fd, const char *file, off_t file_pos)
194 {
195   /* Pointer to the location in 'G_buffer' where the search for
196      the next separator will begin. */
197   char *match_start;
198 
199   /* Pointer to one past the rightmost character in 'G_buffer' that
200      has not been printed yet. */
201   char *past_end;
202 
203   /* Length of the record growing in 'G_buffer'. */
204   size_t saved_record_size;
205 
206   /* True if 'output' has not been called yet for any file.
207      Only used when the separator is attached to the preceding record. */
208   bool first_time = true;
209   char first_char = *separator;	/* Speed optimization, non-regexp. */
210   char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
211   size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
212 
213   /* Arrange for the first read to lop off enough to leave the rest of the
214      file a multiple of 'read_size'.  Since 'read_size' can change, this may
215      not always hold during the program run, but since it usually will, leave
216      it here for i/o efficiency (page/sector boundaries and all that).
217      Note: the efficiency gain has not been verified. */
218   size_t remainder = file_pos % read_size;
219   if (remainder != 0)
220     {
221       file_pos -= remainder;
222       if (lseek (input_fd, file_pos, SEEK_SET) < 0)
223         error (0, errno, _("%s: seek failed"), quotef (file));
224     }
225 
226   /* Scan backward, looking for end of file.  This caters to proc-like
227      file systems where the file size is just an estimate.  */
228   while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
229          && file_pos != 0)
230     {
231       off_t rsize = read_size;
232       if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
233         error (0, errno, _("%s: seek failed"), quotef (file));
234       file_pos -= read_size;
235     }
236 
237   /* Now scan forward, looking for end of file.  */
238   while (saved_record_size == read_size)
239     {
240       size_t nread = safe_read (input_fd, G_buffer, read_size);
241       if (nread == 0)
242         break;
243       saved_record_size = nread;
244       if (saved_record_size == SAFE_READ_ERROR)
245         break;
246       file_pos += nread;
247     }
248 
249   if (saved_record_size == SAFE_READ_ERROR)
250     {
251       error (0, errno, _("%s: read error"), quotef (file));
252       return false;
253     }
254 
255   match_start = past_end = G_buffer + saved_record_size;
256   /* For non-regexp search, move past impossible positions for a match. */
257   if (sentinel_length)
258     match_start -= match_length1;
259 
260   while (true)
261     {
262       /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
263          with 'separator'; for speed, use strncmp if 'separator' contains no
264          metacharacters.
265          If the match succeeds, set 'match_start' to point to the start of
266          the match and 'match_length' to the length of the match.
267          Otherwise, make 'match_start' < 'G_buffer'. */
268       if (sentinel_length == 0)
269         {
270           size_t i = match_start - G_buffer;
271           regoff_t ri = i;
272           regoff_t range = 1 - ri;
273           regoff_t ret;
274 
275           if (1 < range)
276             die (EXIT_FAILURE, 0, _("record too large"));
277 
278           if (range == 1
279               || ((ret = re_search (&compiled_separator, G_buffer,
280                                     i, i - 1, range, &regs))
281                   == -1))
282             match_start = G_buffer - 1;
283           else if (ret == -2)
284             {
285               die (EXIT_FAILURE, 0,
286                    _("error in regular expression search"));
287             }
288           else
289             {
290               match_start = G_buffer + regs.start[0];
291               match_length = regs.end[0] - regs.start[0];
292             }
293         }
294       else
295         {
296           /* 'match_length' is constant for non-regexp boundaries. */
297           while (*--match_start != first_char
298                  || (match_length1 && !STREQ_LEN (match_start + 1, separator1,
299                                                   match_length1)))
300             /* Do nothing. */ ;
301         }
302 
303       /* Check whether we backed off the front of 'G_buffer' without finding
304          a match for 'separator'. */
305       if (match_start < G_buffer)
306         {
307           if (file_pos == 0)
308             {
309               /* Hit the beginning of the file; print the remaining record. */
310               output (G_buffer, past_end);
311               return true;
312             }
313 
314           saved_record_size = past_end - G_buffer;
315           if (saved_record_size > read_size)
316             {
317               /* 'G_buffer_size' is about twice 'read_size', so since
318                  we want to read in another 'read_size' bytes before
319                  the data already in 'G_buffer', we need to increase
320                  'G_buffer_size'. */
321               char *newbuffer;
322               size_t offset = sentinel_length ? sentinel_length : 1;
323               size_t old_G_buffer_size = G_buffer_size;
324 
325               read_size *= 2;
326               G_buffer_size = read_size * 2 + sentinel_length + 2;
327               if (G_buffer_size < old_G_buffer_size)
328                 xalloc_die ();
329               newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
330               newbuffer += offset;
331               G_buffer = newbuffer;
332             }
333 
334           /* Back up to the start of the next bufferfull of the file.  */
335           if (file_pos >= read_size)
336             file_pos -= read_size;
337           else
338             {
339               read_size = file_pos;
340               file_pos = 0;
341             }
342           if (lseek (input_fd, file_pos, SEEK_SET) < 0)
343             error (0, errno, _("%s: seek failed"), quotef (file));
344 
345           /* Shift the pending record data right to make room for the new.
346              The source and destination regions probably overlap.  */
347           memmove (G_buffer + read_size, G_buffer, saved_record_size);
348           past_end = G_buffer + read_size + saved_record_size;
349           /* For non-regexp searches, avoid unnecessary scanning. */
350           if (sentinel_length)
351             match_start = G_buffer + read_size;
352           else
353             match_start = past_end;
354 
355           if (safe_read (input_fd, G_buffer, read_size) != read_size)
356             {
357               error (0, errno, _("%s: read error"), quotef (file));
358               return false;
359             }
360         }
361       else
362         {
363           /* Found a match of 'separator'. */
364           if (separator_ends_record)
365             {
366               char *match_end = match_start + match_length;
367 
368               /* If this match of 'separator' isn't at the end of the
369                  file, print the record. */
370               if (!first_time || match_end != past_end)
371                 output (match_end, past_end);
372               past_end = match_end;
373               first_time = false;
374             }
375           else
376             {
377               output (match_start, past_end);
378               past_end = match_start;
379             }
380 
381           /* For non-regex matching, we can back up.  */
382           if (sentinel_length > 0)
383             match_start -= match_length - 1;
384         }
385     }
386 }
387 
388 #if DONT_UNLINK_WHILE_OPEN
389 
390 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
391    Using atexit like this is wrong, since it can fail
392    when called e.g. 32 or more times.
393    But this isn't a big deal, since the code is used only on WOE/DOS
394    systems, and few people invoke tac on that many nonseekable files.  */
395 
396 static const char *file_to_remove;
397 static FILE *fp_to_close;
398 
399 static void
unlink_tempfile(void)400 unlink_tempfile (void)
401 {
402   fclose (fp_to_close);
403   unlink (file_to_remove);
404 }
405 
406 static void
record_or_unlink_tempfile(char const * fn,FILE * fp)407 record_or_unlink_tempfile (char const *fn, FILE *fp)
408 {
409   if (!file_to_remove)
410     {
411       file_to_remove = fn;
412       fp_to_close = fp;
413       atexit (unlink_tempfile);
414     }
415 }
416 
417 #else
418 
419 static void
record_or_unlink_tempfile(char const * fn,FILE * fp _GL_UNUSED)420 record_or_unlink_tempfile (char const *fn, FILE *fp _GL_UNUSED)
421 {
422   unlink (fn);
423 }
424 
425 #endif
426 
427 /* A wrapper around mkstemp that gives us both an open stream pointer,
428    FP, and the corresponding FILE_NAME.  Always return the same FP/name
429    pair, rewinding/truncating it upon each reuse.  */
430 static bool
temp_stream(FILE ** fp,char ** file_name)431 temp_stream (FILE **fp, char **file_name)
432 {
433   static char *tempfile = NULL;
434   static FILE *tmp_fp;
435   if (tempfile == NULL)
436     {
437       char const *t = getenv ("TMPDIR");
438       char const *tempdir = t ? t : DEFAULT_TMPDIR;
439       tempfile = mfile_name_concat (tempdir, "tacXXXXXX", NULL);
440       if (tempdir == NULL)
441         {
442           error (0, 0, _("memory exhausted"));
443           return false;
444         }
445 
446       /* FIXME: there's a small window between a successful mkstemp call
447          and the unlink that's performed by record_or_unlink_tempfile.
448          If we're interrupted in that interval, this code fails to remove
449          the temporary file.  On systems that define DONT_UNLINK_WHILE_OPEN,
450          the window is much larger -- it extends to the atexit-called
451          unlink_tempfile.
452          FIXME: clean up upon fatal signal.  Don't block them, in case
453          $TMPFILE is a remote file system.  */
454 
455       int fd = mkstemp (tempfile);
456       if (fd < 0)
457         {
458           error (0, errno, _("failed to create temporary file in %s"),
459                  quoteaf (tempdir));
460           goto Reset;
461         }
462 
463       tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
464       if (! tmp_fp)
465         {
466           error (0, errno, _("failed to open %s for writing"),
467                  quoteaf (tempfile));
468           close (fd);
469           unlink (tempfile);
470         Reset:
471           free (tempfile);
472           tempfile = NULL;
473           return false;
474         }
475 
476       record_or_unlink_tempfile (tempfile, tmp_fp);
477     }
478   else
479     {
480       clearerr (tmp_fp);
481       if (fseeko (tmp_fp, 0, SEEK_SET) < 0
482           || ftruncate (fileno (tmp_fp), 0) < 0)
483         {
484           error (0, errno, _("failed to rewind stream for %s"),
485                  quoteaf (tempfile));
486           return false;
487         }
488     }
489 
490   *fp = tmp_fp;
491   *file_name = tempfile;
492   return true;
493 }
494 
495 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
496    a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
497    and file name.  Return the number of bytes copied, or -1 on error.  */
498 
499 static off_t
copy_to_temp(FILE ** g_tmp,char ** g_tempfile,int input_fd,char const * file)500 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
501 {
502   FILE *fp;
503   char *file_name;
504   uintmax_t bytes_copied = 0;
505   if (!temp_stream (&fp, &file_name))
506     return -1;
507 
508   while (1)
509     {
510       size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
511       if (bytes_read == 0)
512         break;
513       if (bytes_read == SAFE_READ_ERROR)
514         {
515           error (0, errno, _("%s: read error"), quotef (file));
516           return -1;
517         }
518 
519       if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
520         {
521           error (0, errno, _("%s: write error"), quotef (file_name));
522           return -1;
523         }
524 
525       /* Implicitly <= OFF_T_MAX due to preceding fwrite(),
526          but unsigned type used to avoid compiler warnings
527          not aware of this fact.  */
528       bytes_copied += bytes_read;
529     }
530 
531   if (fflush (fp) != 0)
532     {
533       error (0, errno, _("%s: write error"), quotef (file_name));
534       return -1;
535     }
536 
537   *g_tmp = fp;
538   *g_tempfile = file_name;
539   return bytes_copied;
540 }
541 
542 /* Copy INPUT_FD to a temporary, then tac that file.
543    Return true if successful.  */
544 
545 static bool
tac_nonseekable(int input_fd,const char * file)546 tac_nonseekable (int input_fd, const char *file)
547 {
548   FILE *tmp_stream;
549   char *tmp_file;
550   off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
551   if (bytes_copied < 0)
552     return false;
553 
554   bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
555   return ok;
556 }
557 
558 /* Print FILE in reverse, copying it to a temporary
559    file first if it is not seekable.
560    Return true if successful.  */
561 
562 static bool
tac_file(const char * filename)563 tac_file (const char *filename)
564 {
565   bool ok;
566   off_t file_size;
567   int fd;
568   bool is_stdin = STREQ (filename, "-");
569 
570   if (is_stdin)
571     {
572       have_read_stdin = true;
573       fd = STDIN_FILENO;
574       filename = _("standard input");
575       xset_binary_mode (STDIN_FILENO, O_BINARY);
576     }
577   else
578     {
579       fd = open (filename, O_RDONLY | O_BINARY);
580       if (fd < 0)
581         {
582           error (0, errno, _("failed to open %s for reading"),
583                  quoteaf (filename));
584           return false;
585         }
586     }
587 
588   file_size = lseek (fd, 0, SEEK_END);
589 
590   ok = (file_size < 0 || isatty (fd)
591         ? tac_nonseekable (fd, filename)
592         : tac_seekable (fd, filename, file_size));
593 
594   if (!is_stdin && close (fd) != 0)
595     {
596       error (0, errno, _("%s: read error"), quotef (filename));
597       ok = false;
598     }
599   return ok;
600 }
601 
602 int
main(int argc,char ** argv)603 main (int argc, char **argv)
604 {
605   const char *error_message;	/* Return value from re_compile_pattern. */
606   int optc;
607   bool ok;
608   size_t half_buffer_size;
609 
610   /* Initializer for file_list if no file-arguments
611      were specified on the command line.  */
612   static char const *const default_file_list[] = {"-", NULL};
613   char const *const *file;
614 
615   initialize_main (&argc, &argv);
616   set_program_name (argv[0]);
617   setlocale (LC_ALL, "");
618   bindtextdomain (PACKAGE, LOCALEDIR);
619   textdomain (PACKAGE);
620 
621   atexit (close_stdout);
622 
623   separator = "\n";
624   sentinel_length = 1;
625   separator_ends_record = true;
626 
627   while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
628     {
629       switch (optc)
630         {
631         case 'b':
632           separator_ends_record = false;
633           break;
634         case 'r':
635           sentinel_length = 0;
636           break;
637         case 's':
638           separator = optarg;
639           break;
640         case_GETOPT_HELP_CHAR;
641         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
642         default:
643           usage (EXIT_FAILURE);
644         }
645     }
646 
647   if (sentinel_length == 0)
648     {
649       if (*separator == 0)
650         die (EXIT_FAILURE, 0, _("separator cannot be empty"));
651 
652       compiled_separator.buffer = NULL;
653       compiled_separator.allocated = 0;
654       compiled_separator.fastmap = compiled_separator_fastmap;
655       compiled_separator.translate = NULL;
656       error_message = re_compile_pattern (separator, strlen (separator),
657                                           &compiled_separator);
658       if (error_message)
659         die (EXIT_FAILURE, 0, "%s", (error_message));
660     }
661   else
662     match_length = sentinel_length = *separator ? strlen (separator) : 1;
663 
664   read_size = INITIAL_READSIZE;
665   while (sentinel_length >= read_size / 2)
666     {
667       if (SIZE_MAX / 2 < read_size)
668         xalloc_die ();
669       read_size *= 2;
670     }
671   half_buffer_size = read_size + sentinel_length + 1;
672   G_buffer_size = 2 * half_buffer_size;
673   if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
674     xalloc_die ();
675   G_buffer = xmalloc (G_buffer_size);
676   if (sentinel_length)
677     {
678       memcpy (G_buffer, separator, sentinel_length + 1);
679       G_buffer += sentinel_length;
680     }
681   else
682     {
683       ++G_buffer;
684     }
685 
686   file = (optind < argc
687           ? (char const *const *) &argv[optind]
688           : default_file_list);
689 
690   xset_binary_mode (STDOUT_FILENO, O_BINARY);
691 
692   {
693     ok = true;
694     for (size_t i = 0; file[i]; ++i)
695       ok &= tac_file (file[i]);
696   }
697 
698   /* Flush the output buffer. */
699   output ((char *) NULL, (char *) NULL);
700 
701   if (have_read_stdin && close (STDIN_FILENO) < 0)
702     {
703       error (0, errno, "-");
704       ok = false;
705     }
706 
707 #ifdef lint
708   size_t offset = sentinel_length ? sentinel_length : 1;
709   free (G_buffer - offset);
710 #endif
711 
712   return ok ? EXIT_SUCCESS : EXIT_FAILURE;
713 }
714