1 /* tac - concatenate and print files in reverse
2 Copyright (C) 1988-2018 Free Software Foundation, Inc.
3
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
16
17 /* Written by Jay Lepreau (lepreau@cs.utah.edu).
18 GNU enhancements by David MacKenzie (djm@gnu.ai.mit.edu). */
19
20 /* Copy each FILE, or the standard input if none are given or when a
21 FILE name of "-" is encountered, to the standard output with the
22 order of the records reversed. The records are separated by
23 instances of a string, or a newline if none is given. By default, the
24 separator string is attached to the end of the record that it
25 follows in the file.
26
27 Options:
28 -b, --before The separator is attached to the beginning
29 of the record that it precedes in the file.
30 -r, --regex The separator is a regular expression.
31 -s, --separator=separator Use SEPARATOR as the record separator.
32
33 To reverse a file byte by byte, use (in bash, ksh, or sh):
34 tac -r -s '.\|
35 ' file */
36
37 #include <config.h>
38
39 #include <stdio.h>
40 #include <getopt.h>
41 #include <sys/types.h>
42 #include "system.h"
43
44 #include <regex.h>
45
46 #include "die.h"
47 #include "error.h"
48 #include "filenamecat.h"
49 #include "safe-read.h"
50 #include "stdlib--.h"
51 #include "xbinary-io.h"
52
53 /* The official name of this program (e.g., no 'g' prefix). */
54 #define PROGRAM_NAME "tac"
55
56 #define AUTHORS \
57 proper_name ("Jay Lepreau"), \
58 proper_name ("David MacKenzie")
59
60 #if defined __MSDOS__ || defined _WIN32
61 /* Define this to non-zero on systems for which the regular mechanism
62 (of unlinking an open file and expecting to be able to write, seek
63 back to the beginning, then reread it) doesn't work. E.g., on Windows
64 and DOS systems. */
65 # define DONT_UNLINK_WHILE_OPEN 1
66 #endif
67
68
69 #ifndef DEFAULT_TMPDIR
70 # define DEFAULT_TMPDIR "/tmp"
71 #endif
72
73 /* The number of bytes per atomic read. */
74 #define INITIAL_READSIZE 8192
75
76 /* The number of bytes per atomic write. */
77 #define WRITESIZE 8192
78
79 /* The string that separates the records of the file. */
80 static char const *separator;
81
82 /* True if we have ever read standard input. */
83 static bool have_read_stdin = false;
84
85 /* If true, print 'separator' along with the record preceding it
86 in the file; otherwise with the record following it. */
87 static bool separator_ends_record;
88
89 /* 0 if 'separator' is to be matched as a regular expression;
90 otherwise, the length of 'separator', used as a sentinel to
91 stop the search. */
92 static size_t sentinel_length;
93
94 /* The length of a match with 'separator'. If 'sentinel_length' is 0,
95 'match_length' is computed every time a match succeeds;
96 otherwise, it is simply the length of 'separator'. */
97 static size_t match_length;
98
99 /* The input buffer. */
100 static char *G_buffer;
101
102 /* The number of bytes to read at once into 'buffer'. */
103 static size_t read_size;
104
105 /* The size of 'buffer'. This is read_size * 2 + sentinel_length + 2.
106 The extra 2 bytes allow 'past_end' to have a value beyond the
107 end of 'G_buffer' and 'match_start' to run off the front of 'G_buffer'. */
108 static size_t G_buffer_size;
109
110 /* The compiled regular expression representing 'separator'. */
111 static struct re_pattern_buffer compiled_separator;
112 static char compiled_separator_fastmap[UCHAR_MAX + 1];
113 static struct re_registers regs;
114
115 static struct option const longopts[] =
116 {
117 {"before", no_argument, NULL, 'b'},
118 {"regex", no_argument, NULL, 'r'},
119 {"separator", required_argument, NULL, 's'},
120 {GETOPT_HELP_OPTION_DECL},
121 {GETOPT_VERSION_OPTION_DECL},
122 {NULL, 0, NULL, 0}
123 };
124
125 void
usage(int status)126 usage (int status)
127 {
128 if (status != EXIT_SUCCESS)
129 emit_try_help ();
130 else
131 {
132 printf (_("\
133 Usage: %s [OPTION]... [FILE]...\n\
134 "),
135 program_name);
136 fputs (_("\
137 Write each FILE to standard output, last line first.\n\
138 "), stdout);
139
140 emit_stdin_note ();
141 emit_mandatory_arg_note ();
142
143 fputs (_("\
144 -b, --before attach the separator before instead of after\n\
145 -r, --regex interpret the separator as a regular expression\n\
146 -s, --separator=STRING use STRING as the separator instead of newline\n\
147 "), stdout);
148 fputs (HELP_OPTION_DESCRIPTION, stdout);
149 fputs (VERSION_OPTION_DESCRIPTION, stdout);
150 emit_ancillary_info (PROGRAM_NAME);
151 }
152 exit (status);
153 }
154
155 /* Print the characters from START to PAST_END - 1.
156 If START is NULL, just flush the buffer. */
157
158 static void
output(const char * start,const char * past_end)159 output (const char *start, const char *past_end)
160 {
161 static char buffer[WRITESIZE];
162 static size_t bytes_in_buffer = 0;
163 size_t bytes_to_add = past_end - start;
164 size_t bytes_available = WRITESIZE - bytes_in_buffer;
165
166 if (start == 0)
167 {
168 fwrite (buffer, 1, bytes_in_buffer, stdout);
169 bytes_in_buffer = 0;
170 return;
171 }
172
173 /* Write out as many full buffers as possible. */
174 while (bytes_to_add >= bytes_available)
175 {
176 memcpy (buffer + bytes_in_buffer, start, bytes_available);
177 bytes_to_add -= bytes_available;
178 start += bytes_available;
179 fwrite (buffer, 1, WRITESIZE, stdout);
180 bytes_in_buffer = 0;
181 bytes_available = WRITESIZE;
182 }
183
184 memcpy (buffer + bytes_in_buffer, start, bytes_to_add);
185 bytes_in_buffer += bytes_to_add;
186 }
187
188 /* Print in reverse the file open on descriptor FD for reading FILE.
189 The file is already positioned at FILE_POS, which should be near its end.
190 Return true if successful. */
191
192 static bool
tac_seekable(int input_fd,const char * file,off_t file_pos)193 tac_seekable (int input_fd, const char *file, off_t file_pos)
194 {
195 /* Pointer to the location in 'G_buffer' where the search for
196 the next separator will begin. */
197 char *match_start;
198
199 /* Pointer to one past the rightmost character in 'G_buffer' that
200 has not been printed yet. */
201 char *past_end;
202
203 /* Length of the record growing in 'G_buffer'. */
204 size_t saved_record_size;
205
206 /* True if 'output' has not been called yet for any file.
207 Only used when the separator is attached to the preceding record. */
208 bool first_time = true;
209 char first_char = *separator; /* Speed optimization, non-regexp. */
210 char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
211 size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
212
213 /* Arrange for the first read to lop off enough to leave the rest of the
214 file a multiple of 'read_size'. Since 'read_size' can change, this may
215 not always hold during the program run, but since it usually will, leave
216 it here for i/o efficiency (page/sector boundaries and all that).
217 Note: the efficiency gain has not been verified. */
218 size_t remainder = file_pos % read_size;
219 if (remainder != 0)
220 {
221 file_pos -= remainder;
222 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
223 error (0, errno, _("%s: seek failed"), quotef (file));
224 }
225
226 /* Scan backward, looking for end of file. This caters to proc-like
227 file systems where the file size is just an estimate. */
228 while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
229 && file_pos != 0)
230 {
231 off_t rsize = read_size;
232 if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
233 error (0, errno, _("%s: seek failed"), quotef (file));
234 file_pos -= read_size;
235 }
236
237 /* Now scan forward, looking for end of file. */
238 while (saved_record_size == read_size)
239 {
240 size_t nread = safe_read (input_fd, G_buffer, read_size);
241 if (nread == 0)
242 break;
243 saved_record_size = nread;
244 if (saved_record_size == SAFE_READ_ERROR)
245 break;
246 file_pos += nread;
247 }
248
249 if (saved_record_size == SAFE_READ_ERROR)
250 {
251 error (0, errno, _("%s: read error"), quotef (file));
252 return false;
253 }
254
255 match_start = past_end = G_buffer + saved_record_size;
256 /* For non-regexp search, move past impossible positions for a match. */
257 if (sentinel_length)
258 match_start -= match_length1;
259
260 while (true)
261 {
262 /* Search backward from 'match_start' - 1 to 'G_buffer' for a match
263 with 'separator'; for speed, use strncmp if 'separator' contains no
264 metacharacters.
265 If the match succeeds, set 'match_start' to point to the start of
266 the match and 'match_length' to the length of the match.
267 Otherwise, make 'match_start' < 'G_buffer'. */
268 if (sentinel_length == 0)
269 {
270 size_t i = match_start - G_buffer;
271 regoff_t ri = i;
272 regoff_t range = 1 - ri;
273 regoff_t ret;
274
275 if (1 < range)
276 die (EXIT_FAILURE, 0, _("record too large"));
277
278 if (range == 1
279 || ((ret = re_search (&compiled_separator, G_buffer,
280 i, i - 1, range, ®s))
281 == -1))
282 match_start = G_buffer - 1;
283 else if (ret == -2)
284 {
285 die (EXIT_FAILURE, 0,
286 _("error in regular expression search"));
287 }
288 else
289 {
290 match_start = G_buffer + regs.start[0];
291 match_length = regs.end[0] - regs.start[0];
292 }
293 }
294 else
295 {
296 /* 'match_length' is constant for non-regexp boundaries. */
297 while (*--match_start != first_char
298 || (match_length1 && !STREQ_LEN (match_start + 1, separator1,
299 match_length1)))
300 /* Do nothing. */ ;
301 }
302
303 /* Check whether we backed off the front of 'G_buffer' without finding
304 a match for 'separator'. */
305 if (match_start < G_buffer)
306 {
307 if (file_pos == 0)
308 {
309 /* Hit the beginning of the file; print the remaining record. */
310 output (G_buffer, past_end);
311 return true;
312 }
313
314 saved_record_size = past_end - G_buffer;
315 if (saved_record_size > read_size)
316 {
317 /* 'G_buffer_size' is about twice 'read_size', so since
318 we want to read in another 'read_size' bytes before
319 the data already in 'G_buffer', we need to increase
320 'G_buffer_size'. */
321 char *newbuffer;
322 size_t offset = sentinel_length ? sentinel_length : 1;
323 size_t old_G_buffer_size = G_buffer_size;
324
325 read_size *= 2;
326 G_buffer_size = read_size * 2 + sentinel_length + 2;
327 if (G_buffer_size < old_G_buffer_size)
328 xalloc_die ();
329 newbuffer = xrealloc (G_buffer - offset, G_buffer_size);
330 newbuffer += offset;
331 G_buffer = newbuffer;
332 }
333
334 /* Back up to the start of the next bufferfull of the file. */
335 if (file_pos >= read_size)
336 file_pos -= read_size;
337 else
338 {
339 read_size = file_pos;
340 file_pos = 0;
341 }
342 if (lseek (input_fd, file_pos, SEEK_SET) < 0)
343 error (0, errno, _("%s: seek failed"), quotef (file));
344
345 /* Shift the pending record data right to make room for the new.
346 The source and destination regions probably overlap. */
347 memmove (G_buffer + read_size, G_buffer, saved_record_size);
348 past_end = G_buffer + read_size + saved_record_size;
349 /* For non-regexp searches, avoid unnecessary scanning. */
350 if (sentinel_length)
351 match_start = G_buffer + read_size;
352 else
353 match_start = past_end;
354
355 if (safe_read (input_fd, G_buffer, read_size) != read_size)
356 {
357 error (0, errno, _("%s: read error"), quotef (file));
358 return false;
359 }
360 }
361 else
362 {
363 /* Found a match of 'separator'. */
364 if (separator_ends_record)
365 {
366 char *match_end = match_start + match_length;
367
368 /* If this match of 'separator' isn't at the end of the
369 file, print the record. */
370 if (!first_time || match_end != past_end)
371 output (match_end, past_end);
372 past_end = match_end;
373 first_time = false;
374 }
375 else
376 {
377 output (match_start, past_end);
378 past_end = match_start;
379 }
380
381 /* For non-regex matching, we can back up. */
382 if (sentinel_length > 0)
383 match_start -= match_length - 1;
384 }
385 }
386 }
387
388 #if DONT_UNLINK_WHILE_OPEN
389
390 /* FIXME-someday: remove all of this DONT_UNLINK_WHILE_OPEN junk.
391 Using atexit like this is wrong, since it can fail
392 when called e.g. 32 or more times.
393 But this isn't a big deal, since the code is used only on WOE/DOS
394 systems, and few people invoke tac on that many nonseekable files. */
395
396 static const char *file_to_remove;
397 static FILE *fp_to_close;
398
399 static void
unlink_tempfile(void)400 unlink_tempfile (void)
401 {
402 fclose (fp_to_close);
403 unlink (file_to_remove);
404 }
405
406 static void
record_or_unlink_tempfile(char const * fn,FILE * fp)407 record_or_unlink_tempfile (char const *fn, FILE *fp)
408 {
409 if (!file_to_remove)
410 {
411 file_to_remove = fn;
412 fp_to_close = fp;
413 atexit (unlink_tempfile);
414 }
415 }
416
417 #else
418
419 static void
record_or_unlink_tempfile(char const * fn,FILE * fp _GL_UNUSED)420 record_or_unlink_tempfile (char const *fn, FILE *fp _GL_UNUSED)
421 {
422 unlink (fn);
423 }
424
425 #endif
426
427 /* A wrapper around mkstemp that gives us both an open stream pointer,
428 FP, and the corresponding FILE_NAME. Always return the same FP/name
429 pair, rewinding/truncating it upon each reuse. */
430 static bool
temp_stream(FILE ** fp,char ** file_name)431 temp_stream (FILE **fp, char **file_name)
432 {
433 static char *tempfile = NULL;
434 static FILE *tmp_fp;
435 if (tempfile == NULL)
436 {
437 char const *t = getenv ("TMPDIR");
438 char const *tempdir = t ? t : DEFAULT_TMPDIR;
439 tempfile = mfile_name_concat (tempdir, "tacXXXXXX", NULL);
440 if (tempdir == NULL)
441 {
442 error (0, 0, _("memory exhausted"));
443 return false;
444 }
445
446 /* FIXME: there's a small window between a successful mkstemp call
447 and the unlink that's performed by record_or_unlink_tempfile.
448 If we're interrupted in that interval, this code fails to remove
449 the temporary file. On systems that define DONT_UNLINK_WHILE_OPEN,
450 the window is much larger -- it extends to the atexit-called
451 unlink_tempfile.
452 FIXME: clean up upon fatal signal. Don't block them, in case
453 $TMPFILE is a remote file system. */
454
455 int fd = mkstemp (tempfile);
456 if (fd < 0)
457 {
458 error (0, errno, _("failed to create temporary file in %s"),
459 quoteaf (tempdir));
460 goto Reset;
461 }
462
463 tmp_fp = fdopen (fd, (O_BINARY ? "w+b" : "w+"));
464 if (! tmp_fp)
465 {
466 error (0, errno, _("failed to open %s for writing"),
467 quoteaf (tempfile));
468 close (fd);
469 unlink (tempfile);
470 Reset:
471 free (tempfile);
472 tempfile = NULL;
473 return false;
474 }
475
476 record_or_unlink_tempfile (tempfile, tmp_fp);
477 }
478 else
479 {
480 clearerr (tmp_fp);
481 if (fseeko (tmp_fp, 0, SEEK_SET) < 0
482 || ftruncate (fileno (tmp_fp), 0) < 0)
483 {
484 error (0, errno, _("failed to rewind stream for %s"),
485 quoteaf (tempfile));
486 return false;
487 }
488 }
489
490 *fp = tmp_fp;
491 *file_name = tempfile;
492 return true;
493 }
494
495 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
496 a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
497 and file name. Return the number of bytes copied, or -1 on error. */
498
499 static off_t
copy_to_temp(FILE ** g_tmp,char ** g_tempfile,int input_fd,char const * file)500 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
501 {
502 FILE *fp;
503 char *file_name;
504 uintmax_t bytes_copied = 0;
505 if (!temp_stream (&fp, &file_name))
506 return -1;
507
508 while (1)
509 {
510 size_t bytes_read = safe_read (input_fd, G_buffer, read_size);
511 if (bytes_read == 0)
512 break;
513 if (bytes_read == SAFE_READ_ERROR)
514 {
515 error (0, errno, _("%s: read error"), quotef (file));
516 return -1;
517 }
518
519 if (fwrite (G_buffer, 1, bytes_read, fp) != bytes_read)
520 {
521 error (0, errno, _("%s: write error"), quotef (file_name));
522 return -1;
523 }
524
525 /* Implicitly <= OFF_T_MAX due to preceding fwrite(),
526 but unsigned type used to avoid compiler warnings
527 not aware of this fact. */
528 bytes_copied += bytes_read;
529 }
530
531 if (fflush (fp) != 0)
532 {
533 error (0, errno, _("%s: write error"), quotef (file_name));
534 return -1;
535 }
536
537 *g_tmp = fp;
538 *g_tempfile = file_name;
539 return bytes_copied;
540 }
541
542 /* Copy INPUT_FD to a temporary, then tac that file.
543 Return true if successful. */
544
545 static bool
tac_nonseekable(int input_fd,const char * file)546 tac_nonseekable (int input_fd, const char *file)
547 {
548 FILE *tmp_stream;
549 char *tmp_file;
550 off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
551 if (bytes_copied < 0)
552 return false;
553
554 bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
555 return ok;
556 }
557
558 /* Print FILE in reverse, copying it to a temporary
559 file first if it is not seekable.
560 Return true if successful. */
561
562 static bool
tac_file(const char * filename)563 tac_file (const char *filename)
564 {
565 bool ok;
566 off_t file_size;
567 int fd;
568 bool is_stdin = STREQ (filename, "-");
569
570 if (is_stdin)
571 {
572 have_read_stdin = true;
573 fd = STDIN_FILENO;
574 filename = _("standard input");
575 xset_binary_mode (STDIN_FILENO, O_BINARY);
576 }
577 else
578 {
579 fd = open (filename, O_RDONLY | O_BINARY);
580 if (fd < 0)
581 {
582 error (0, errno, _("failed to open %s for reading"),
583 quoteaf (filename));
584 return false;
585 }
586 }
587
588 file_size = lseek (fd, 0, SEEK_END);
589
590 ok = (file_size < 0 || isatty (fd)
591 ? tac_nonseekable (fd, filename)
592 : tac_seekable (fd, filename, file_size));
593
594 if (!is_stdin && close (fd) != 0)
595 {
596 error (0, errno, _("%s: read error"), quotef (filename));
597 ok = false;
598 }
599 return ok;
600 }
601
602 int
main(int argc,char ** argv)603 main (int argc, char **argv)
604 {
605 const char *error_message; /* Return value from re_compile_pattern. */
606 int optc;
607 bool ok;
608 size_t half_buffer_size;
609
610 /* Initializer for file_list if no file-arguments
611 were specified on the command line. */
612 static char const *const default_file_list[] = {"-", NULL};
613 char const *const *file;
614
615 initialize_main (&argc, &argv);
616 set_program_name (argv[0]);
617 setlocale (LC_ALL, "");
618 bindtextdomain (PACKAGE, LOCALEDIR);
619 textdomain (PACKAGE);
620
621 atexit (close_stdout);
622
623 separator = "\n";
624 sentinel_length = 1;
625 separator_ends_record = true;
626
627 while ((optc = getopt_long (argc, argv, "brs:", longopts, NULL)) != -1)
628 {
629 switch (optc)
630 {
631 case 'b':
632 separator_ends_record = false;
633 break;
634 case 'r':
635 sentinel_length = 0;
636 break;
637 case 's':
638 separator = optarg;
639 break;
640 case_GETOPT_HELP_CHAR;
641 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
642 default:
643 usage (EXIT_FAILURE);
644 }
645 }
646
647 if (sentinel_length == 0)
648 {
649 if (*separator == 0)
650 die (EXIT_FAILURE, 0, _("separator cannot be empty"));
651
652 compiled_separator.buffer = NULL;
653 compiled_separator.allocated = 0;
654 compiled_separator.fastmap = compiled_separator_fastmap;
655 compiled_separator.translate = NULL;
656 error_message = re_compile_pattern (separator, strlen (separator),
657 &compiled_separator);
658 if (error_message)
659 die (EXIT_FAILURE, 0, "%s", (error_message));
660 }
661 else
662 match_length = sentinel_length = *separator ? strlen (separator) : 1;
663
664 read_size = INITIAL_READSIZE;
665 while (sentinel_length >= read_size / 2)
666 {
667 if (SIZE_MAX / 2 < read_size)
668 xalloc_die ();
669 read_size *= 2;
670 }
671 half_buffer_size = read_size + sentinel_length + 1;
672 G_buffer_size = 2 * half_buffer_size;
673 if (! (read_size < half_buffer_size && half_buffer_size < G_buffer_size))
674 xalloc_die ();
675 G_buffer = xmalloc (G_buffer_size);
676 if (sentinel_length)
677 {
678 memcpy (G_buffer, separator, sentinel_length + 1);
679 G_buffer += sentinel_length;
680 }
681 else
682 {
683 ++G_buffer;
684 }
685
686 file = (optind < argc
687 ? (char const *const *) &argv[optind]
688 : default_file_list);
689
690 xset_binary_mode (STDOUT_FILENO, O_BINARY);
691
692 {
693 ok = true;
694 for (size_t i = 0; file[i]; ++i)
695 ok &= tac_file (file[i]);
696 }
697
698 /* Flush the output buffer. */
699 output ((char *) NULL, (char *) NULL);
700
701 if (have_read_stdin && close (STDIN_FILENO) < 0)
702 {
703 error (0, errno, "-");
704 ok = false;
705 }
706
707 #ifdef lint
708 size_t offset = sentinel_length ? sentinel_length : 1;
709 free (G_buffer - offset);
710 #endif
711
712 return ok ? EXIT_SUCCESS : EXIT_FAILURE;
713 }
714