xref: /dragonfly/contrib/grep/src/grep.c (revision 09d4459f)
1 /* grep.c - main driver file for grep.
2    Copyright (C) 1992, 1997-2002, 2004-2020 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17    02110-1301, USA.  */
18 
19 /* Written July 1992 by Mike Haertel.  */
20 
21 #include <config.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <wchar.h>
25 #include <inttypes.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include "system.h"
29 
30 #include "argmatch.h"
31 #include "c-ctype.h"
32 #include "c-stack.h"
33 #include "closeout.h"
34 #include "colorize.h"
35 #include "die.h"
36 #include "error.h"
37 #include "exclude.h"
38 #include "exitfail.h"
39 #include "fcntl-safer.h"
40 #include "fts_.h"
41 #include "getopt.h"
42 #include "getprogname.h"
43 #include "grep.h"
44 #include "intprops.h"
45 #include "propername.h"
46 #include "quote.h"
47 #include "safe-read.h"
48 #include "search.h"
49 #include "c-strcase.h"
50 #include "version-etc.h"
51 #include "xalloc.h"
52 #include "xbinary-io.h"
53 #include "xstrtol.h"
54 
55 enum { SEP_CHAR_SELECTED = ':' };
56 enum { SEP_CHAR_REJECTED = '-' };
57 static char const SEP_STR_GROUP[] = "--";
58 
59 /* When stdout is connected to a regular file, save its stat
60    information here, so that we can automatically skip it, thus
61    avoiding a potential (racy) infinite loop.  */
62 static struct stat out_stat;
63 
64 /* if non-zero, display usage information and exit */
65 static int show_help;
66 
67 /* Print the version on standard output and exit.  */
68 static bool show_version;
69 
70 /* Suppress diagnostics for nonexistent or unreadable files.  */
71 static bool suppress_errors;
72 
73 /* If nonzero, use color markers.  */
74 static int color_option;
75 
76 /* Show only the part of a line matching the expression. */
77 static bool only_matching;
78 
79 /* If nonzero, make sure first content char in a line is on a tab stop. */
80 static bool align_tabs;
81 
82 /* Print width of line numbers and byte offsets.  Nonzero if ALIGN_TABS.  */
83 static int offset_width;
84 
85 /* See below */
86 struct FL_pair
87   {
88     char const *filename;
89     size_t lineno;
90   };
91 
92 /* A list of lineno,filename pairs corresponding to -f FILENAME
93    arguments. Since we store the concatenation of all patterns in
94    a single array, KEYS, be they from the command line via "-e PAT"
95    or read from one or more -f-specified FILENAMES.  Given this
96    invocation, grep -f <(seq 5) -f <(seq 2) -f <(seq 3) FILE, there
97    will be three entries in LF_PAIR: {1, x} {6, y} {8, z}, where
98    x, y and z are just place-holders for shell-generated names.  */
99 static struct FL_pair *fl_pair;
100 static size_t n_fl_pair_slots;
101 /* Count not only -f-specified files, but also individual -e operands
102    and any command-line argument that serves as a regular expression.  */
103 static size_t n_pattern_files;
104 
105 /* The number of patterns seen so far.
106    It is advanced by fl_add and, when needed, used in pattern_file_name
107    to derive a file-relative line number.  */
108 static size_t n_patterns;
109 
110 /* Return the number of newline bytes in BUF with size SIZE.  */
111 static size_t _GL_ATTRIBUTE_PURE
count_nl_bytes(char const * buf,size_t size)112 count_nl_bytes (char const *buf, size_t size)
113 {
114   char const *p = buf;
115   char const *end_p = buf + size;
116   size_t n = 0;
117   while ((p = memchr (p, '\n', end_p - p)))
118     p++, n++;
119   return n;
120 }
121 
122 /* Append a FILENAME,line-number pair to FL_PAIR, and update
123    pattern-related counts from the contents of BUF with SIZE bytes.  */
124 static void
fl_add(char const * buf,size_t size,char const * filename)125 fl_add (char const *buf, size_t size, char const *filename)
126 {
127   if (n_fl_pair_slots <= n_pattern_files)
128     fl_pair = x2nrealloc (fl_pair, &n_fl_pair_slots, sizeof *fl_pair);
129 
130   fl_pair[n_pattern_files].lineno = n_patterns + 1;
131   fl_pair[n_pattern_files].filename = filename;
132   n_pattern_files++;
133   n_patterns += count_nl_bytes (buf, size);
134 }
135 
136 /* Map the line number, LINENO, of one of the input patterns to the
137    name of the file from which it came.  If it was read from stdin
138    or if it was specified on the command line, return "-".  */
139 char const * _GL_ATTRIBUTE_PURE
pattern_file_name(size_t lineno,size_t * new_lineno)140 pattern_file_name (size_t lineno, size_t *new_lineno)
141 {
142   size_t i;
143   for (i = 1; i < n_pattern_files; i++)
144     {
145       if (lineno < fl_pair[i].lineno)
146         break;
147     }
148 
149   *new_lineno = lineno - fl_pair[i - 1].lineno + 1;
150   return fl_pair[i - 1].filename;
151 }
152 
153 #if HAVE_ASAN
154 /* Record the starting address and length of the sole poisoned region,
155    so that we can unpoison it later, just before each following read.  */
156 static void const *poison_buf;
157 static size_t poison_len;
158 
159 static void
clear_asan_poison(void)160 clear_asan_poison (void)
161 {
162   if (poison_buf)
163     __asan_unpoison_memory_region (poison_buf, poison_len);
164 }
165 
166 static void
asan_poison(void const * addr,size_t size)167 asan_poison (void const *addr, size_t size)
168 {
169   poison_buf = addr;
170   poison_len = size;
171 
172   __asan_poison_memory_region (poison_buf, poison_len);
173 }
174 #else
clear_asan_poison(void)175 static void clear_asan_poison (void) { }
asan_poison(void const volatile * addr,size_t size)176 static void asan_poison (void const volatile *addr, size_t size) { }
177 #endif
178 
179 /* The group separator used when context is requested. */
180 static const char *group_separator = SEP_STR_GROUP;
181 
182 /* The context and logic for choosing default --color screen attributes
183    (foreground and background colors, etc.) are the following.
184       -- There are eight basic colors available, each with its own
185          nominal luminosity to the human eye and foreground/background
186          codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
187          magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
188          yellow [89 %, 33/43], and white [100 %, 37/47]).
189       -- Sometimes, white as a background is actually implemented using
190          a shade of light gray, so that a foreground white can be visible
191          on top of it (but most often not).
192       -- Sometimes, black as a foreground is actually implemented using
193          a shade of dark gray, so that it can be visible on top of a
194          background black (but most often not).
195       -- Sometimes, more colors are available, as extensions.
196       -- Other attributes can be selected/deselected (bold [1/22],
197          underline [4/24], standout/inverse [7/27], blink [5/25], and
198          invisible/hidden [8/28]).  They are sometimes implemented by
199          using colors instead of what their names imply; e.g., bold is
200          often achieved by using brighter colors.  In practice, only bold
201          is really available to us, underline sometimes being mapped by
202          the terminal to some strange color choice, and standout best
203          being left for use by downstream programs such as less(1).
204       -- We cannot assume that any of the extensions or special features
205          are available for the purpose of choosing defaults for everyone.
206       -- The most prevalent default terminal backgrounds are pure black
207          and pure white, and are not necessarily the same shades of
208          those as if they were selected explicitly with SGR sequences.
209          Some terminals use dark or light pictures as default background,
210          but those are covered over by an explicit selection of background
211          color with an SGR sequence; their users will appreciate their
212          background pictures not be covered like this, if possible.
213       -- Some uses of colors attributes is to make some output items
214          more understated (e.g., context lines); this cannot be achieved
215          by changing the background color.
216       -- For these reasons, the grep color defaults should strive not
217          to change the background color from its default, unless it's
218          for a short item that should be highlighted, not understated.
219       -- The grep foreground color defaults (without an explicitly set
220          background) should provide enough contrast to be readable on any
221          terminal with either a black (dark) or white (light) background.
222          This only leaves red, magenta, green, and cyan (and their bold
223          counterparts) and possibly bold blue.  */
224 /* The color strings used for matched text.
225    The user can overwrite them using the deprecated
226    environment variable GREP_COLOR or the new GREP_COLORS.  */
227 static const char *selected_match_color = "01;31";	/* bold red */
228 static const char *context_match_color  = "01;31";	/* bold red */
229 
230 /* Other colors.  Defaults look damn good.  */
231 static const char *filename_color = "35";	/* magenta */
232 static const char *line_num_color = "32";	/* green */
233 static const char *byte_num_color = "32";	/* green */
234 static const char *sep_color      = "36";	/* cyan */
235 static const char *selected_line_color = "";	/* default color pair */
236 static const char *context_line_color  = "";	/* default color pair */
237 
238 /* Select Graphic Rendition (SGR, "\33[...m") strings.  */
239 /* Also Erase in Line (EL) to Right ("\33[K") by default.  */
240 /*    Why have EL to Right after SGR?
241          -- The behavior of line-wrapping when at the bottom of the
242             terminal screen and at the end of the current line is often
243             such that a new line is introduced, entirely cleared with
244             the current background color which may be different from the
245             default one (see the boolean back_color_erase terminfo(5)
246             capability), thus scrolling the display by one line.
247             The end of this new line will stay in this background color
248             even after reverting to the default background color with
249             "\33[m', unless it is explicitly cleared again with "\33[K"
250             (which is the behavior the user would instinctively expect
251             from the whole thing).  There may be some unavoidable
252             background-color flicker at the end of this new line because
253             of this (when timing with the monitor's redraw is just right).
254          -- The behavior of HT (tab, "\t") is usually the same as that of
255             Cursor Forward Tabulation (CHT) with a default parameter
256             of 1 ("\33[I"), i.e., it performs pure movement to the next
257             tab stop, without any clearing of either content or screen
258             attributes (including background color); try
259                printf 'asdfqwerzxcv\rASDF\tZXCV\n'
260             in a bash(1) shell to demonstrate this.  This is not what the
261             user would instinctively expect of HT (but is ok for CHT).
262             The instinctive behavior would include clearing the terminal
263             cells that are skipped over by HT with blank cells in the
264             current screen attributes, including background color;
265             the boolean dest_tabs_magic_smso terminfo(5) capability
266             indicates this saner behavior for HT, but only some rare
267             terminals have it (although it also indicates a special
268             glitch with standout mode in the Teleray terminal for which
269             it was initially introduced).  The remedy is to add "\33K"
270             after each SGR sequence, be it START (to fix the behavior
271             of any HT after that before another SGR) or END (to fix the
272             behavior of an HT in default background color that would
273             follow a line-wrapping at the bottom of the screen in another
274             background color, and to complement doing it after START).
275             Piping grep's output through a pager such as less(1) avoids
276             any HT problems since the pager performs tab expansion.
277 
278       Generic disadvantages of this remedy are:
279          -- Some very rare terminals might support SGR but not EL (nobody
280             will use "grep --color" on a terminal that does not support
281             SGR in the first place).
282          -- Having these extra control sequences might somewhat complicate
283             the task of any program trying to parse "grep --color"
284             output in order to extract structuring information from it.
285       A specific disadvantage to doing it after SGR START is:
286          -- Even more possible background color flicker (when timing
287             with the monitor's redraw is just right), even when not at the
288             bottom of the screen.
289       There are no additional disadvantages specific to doing it after
290       SGR END.
291 
292       It would be impractical for GNU grep to become a full-fledged
293       terminal program linked against ncurses or the like, so it will
294       not detect terminfo(5) capabilities.  */
295 static const char *sgr_start = "\33[%sm\33[K";
296 static const char *sgr_end   = "\33[m\33[K";
297 
298 /* SGR utility functions.  */
299 static void
pr_sgr_start(char const * s)300 pr_sgr_start (char const *s)
301 {
302   if (*s)
303     print_start_colorize (sgr_start, s);
304 }
305 static void
pr_sgr_end(char const * s)306 pr_sgr_end (char const *s)
307 {
308   if (*s)
309     print_end_colorize (sgr_end);
310 }
311 static void
pr_sgr_start_if(char const * s)312 pr_sgr_start_if (char const *s)
313 {
314   if (color_option)
315     pr_sgr_start (s);
316 }
317 static void
pr_sgr_end_if(char const * s)318 pr_sgr_end_if (char const *s)
319 {
320   if (color_option)
321     pr_sgr_end (s);
322 }
323 
324 struct color_cap
325   {
326     const char *name;
327     const char **var;
328     void (*fct) (void);
329   };
330 
331 static void
color_cap_mt_fct(void)332 color_cap_mt_fct (void)
333 {
334   /* Our caller just set selected_match_color.  */
335   context_match_color = selected_match_color;
336 }
337 
338 static void
color_cap_rv_fct(void)339 color_cap_rv_fct (void)
340 {
341   /* By this point, it was 1 (or already -1).  */
342   color_option = -1;  /* That's still != 0.  */
343 }
344 
345 static void
color_cap_ne_fct(void)346 color_cap_ne_fct (void)
347 {
348   sgr_start = "\33[%sm";
349   sgr_end   = "\33[m";
350 }
351 
352 /* For GREP_COLORS.  */
353 static const struct color_cap color_dict[] =
354   {
355     { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
356     { "ms", &selected_match_color, NULL }, /* selected matched text */
357     { "mc", &context_match_color,  NULL }, /* context matched text */
358     { "fn", &filename_color,       NULL }, /* filename */
359     { "ln", &line_num_color,       NULL }, /* line number */
360     { "bn", &byte_num_color,       NULL }, /* byte (sic) offset */
361     { "se", &sep_color,            NULL }, /* separator */
362     { "sl", &selected_line_color,  NULL }, /* selected lines */
363     { "cx", &context_line_color,   NULL }, /* context lines */
364     { "rv", NULL,                  color_cap_rv_fct }, /* -v reverses sl/cx */
365     { "ne", NULL,                  color_cap_ne_fct }, /* no EL on SGR_* */
366     { NULL, NULL,                  NULL }
367   };
368 
369 /* Saved errno value from failed output functions on stdout.  */
370 static int stdout_errno;
371 
372 static void
putchar_errno(int c)373 putchar_errno (int c)
374 {
375   if (putchar (c) < 0)
376     stdout_errno = errno;
377 }
378 
379 static void
fputs_errno(char const * s)380 fputs_errno (char const *s)
381 {
382   if (fputs (s, stdout) < 0)
383     stdout_errno = errno;
384 }
385 
386 static void _GL_ATTRIBUTE_FORMAT_PRINTF (1, 2)
printf_errno(char const * format,...)387 printf_errno (char const *format, ...)
388 {
389   va_list ap;
390   va_start (ap, format);
391   if (vfprintf (stdout, format, ap) < 0)
392     stdout_errno = errno;
393   va_end (ap);
394 }
395 
396 static void
fwrite_errno(void const * ptr,size_t size,size_t nmemb)397 fwrite_errno (void const *ptr, size_t size, size_t nmemb)
398 {
399   if (fwrite (ptr, size, nmemb, stdout) != nmemb)
400     stdout_errno = errno;
401 }
402 
403 static void
fflush_errno(void)404 fflush_errno (void)
405 {
406   if (fflush (stdout) != 0)
407     stdout_errno = errno;
408 }
409 
410 static struct exclude *excluded_patterns[2];
411 static struct exclude *excluded_directory_patterns[2];
412 /* Short options.  */
413 static char const short_options[] =
414 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
415 
416 /* Non-boolean long options that have no corresponding short equivalents.  */
417 enum
418 {
419   BINARY_FILES_OPTION = CHAR_MAX + 1,
420   COLOR_OPTION,
421   EXCLUDE_DIRECTORY_OPTION,
422   EXCLUDE_OPTION,
423   EXCLUDE_FROM_OPTION,
424   GROUP_SEPARATOR_OPTION,
425   INCLUDE_OPTION,
426   LINE_BUFFERED_OPTION,
427   LABEL_OPTION,
428   NO_IGNORE_CASE_OPTION
429 };
430 
431 /* Long options equivalences. */
432 static struct option const long_options[] =
433 {
434   {"basic-regexp",    no_argument, NULL, 'G'},
435   {"extended-regexp", no_argument, NULL, 'E'},
436   {"fixed-regexp",    no_argument, NULL, 'F'},
437   {"fixed-strings",   no_argument, NULL, 'F'},
438   {"perl-regexp",     no_argument, NULL, 'P'},
439   {"after-context", required_argument, NULL, 'A'},
440   {"before-context", required_argument, NULL, 'B'},
441   {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
442   {"byte-offset", no_argument, NULL, 'b'},
443   {"context", required_argument, NULL, 'C'},
444   {"color", optional_argument, NULL, COLOR_OPTION},
445   {"colour", optional_argument, NULL, COLOR_OPTION},
446   {"count", no_argument, NULL, 'c'},
447   {"devices", required_argument, NULL, 'D'},
448   {"directories", required_argument, NULL, 'd'},
449   {"exclude", required_argument, NULL, EXCLUDE_OPTION},
450   {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
451   {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
452   {"file", required_argument, NULL, 'f'},
453   {"files-with-matches", no_argument, NULL, 'l'},
454   {"files-without-match", no_argument, NULL, 'L'},
455   {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
456   {"help", no_argument, &show_help, 1},
457   {"include", required_argument, NULL, INCLUDE_OPTION},
458   {"ignore-case", no_argument, NULL, 'i'},
459   {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
460   {"initial-tab", no_argument, NULL, 'T'},
461   {"label", required_argument, NULL, LABEL_OPTION},
462   {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
463   {"line-number", no_argument, NULL, 'n'},
464   {"line-regexp", no_argument, NULL, 'x'},
465   {"max-count", required_argument, NULL, 'm'},
466 
467   {"no-filename", no_argument, NULL, 'h'},
468   {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
469   {"no-messages", no_argument, NULL, 's'},
470   {"null", no_argument, NULL, 'Z'},
471   {"null-data", no_argument, NULL, 'z'},
472   {"only-matching", no_argument, NULL, 'o'},
473   {"quiet", no_argument, NULL, 'q'},
474   {"recursive", no_argument, NULL, 'r'},
475   {"dereference-recursive", no_argument, NULL, 'R'},
476   {"regexp", required_argument, NULL, 'e'},
477   {"invert-match", no_argument, NULL, 'v'},
478   {"silent", no_argument, NULL, 'q'},
479   {"text", no_argument, NULL, 'a'},
480   {"binary", no_argument, NULL, 'U'},
481   {"unix-byte-offsets", no_argument, NULL, 'u'},
482   {"version", no_argument, NULL, 'V'},
483   {"with-filename", no_argument, NULL, 'H'},
484   {"word-regexp", no_argument, NULL, 'w'},
485   {0, 0, 0, 0}
486 };
487 
488 /* Define flags declared in grep.h. */
489 bool match_icase;
490 bool match_words;
491 bool match_lines;
492 char eolbyte;
493 
494 /* For error messages. */
495 /* The input file name, or (if standard input) null or a --label argument.  */
496 static char const *filename;
497 /* Omit leading "./" from file names in diagnostics.  */
498 static bool omit_dot_slash;
499 static bool errseen;
500 
501 /* True if output from the current input file has been suppressed
502    because an output line had an encoding error.  */
503 static bool encoding_error_output;
504 
505 enum directories_type
506   {
507     READ_DIRECTORIES = 2,
508     RECURSE_DIRECTORIES,
509     SKIP_DIRECTORIES
510   };
511 
512 /* How to handle directories.  */
513 static char const *const directories_args[] =
514 {
515   "read", "recurse", "skip", NULL
516 };
517 static enum directories_type const directories_types[] =
518 {
519   READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
520 };
521 ARGMATCH_VERIFY (directories_args, directories_types);
522 
523 static enum directories_type directories = READ_DIRECTORIES;
524 
525 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
526 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
527 
528 /* How to handle devices. */
529 static enum
530   {
531     READ_COMMAND_LINE_DEVICES,
532     READ_DEVICES,
533     SKIP_DEVICES
534   } devices = READ_COMMAND_LINE_DEVICES;
535 
536 static bool grepfile (int, char const *, bool, bool);
537 static bool grepdesc (int, bool);
538 
539 static bool
is_device_mode(mode_t m)540 is_device_mode (mode_t m)
541 {
542   return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
543 }
544 
545 static bool
skip_devices(bool command_line)546 skip_devices (bool command_line)
547 {
548   return (devices == SKIP_DEVICES
549           || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line));
550 }
551 
552 /* Return if ST->st_size is defined.  Assume the file is not a
553    symbolic link.  */
554 static bool
usable_st_size(struct stat const * st)555 usable_st_size (struct stat const *st)
556 {
557   return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
558 }
559 
560 /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
561    Do not rely on these finding data or holes if they equal SEEK_SET.  */
562 #ifndef SEEK_DATA
563 enum { SEEK_DATA = SEEK_SET };
564 #endif
565 #ifndef SEEK_HOLE
566 enum { SEEK_HOLE = SEEK_SET };
567 #endif
568 
569 /* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input.  */
570 static bool seek_failed;
571 static bool seek_data_failed;
572 
573 /* Functions we'll use to search. */
574 typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t);
575 typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
576                                 char const *);
577 static execute_fp_t execute;
578 static void *compiled_pattern;
579 
580 static char const *
input_filename(void)581 input_filename (void)
582 {
583   if (!filename)
584     filename = _("(standard input)");
585   return filename;
586 }
587 
588 /* Unless requested, diagnose an error about the input file.  */
589 static void
suppressible_error(int errnum)590 suppressible_error (int errnum)
591 {
592   if (! suppress_errors)
593     error (0, errnum, "%s", input_filename ());
594   errseen = true;
595 }
596 
597 /* If there has already been a write error, don't bother closing
598    standard output, as that might elicit a duplicate diagnostic.  */
599 static void
clean_up_stdout(void)600 clean_up_stdout (void)
601 {
602   if (! stdout_errno)
603     close_stdout ();
604 }
605 
606 /* A cast to TYPE of VAL.  Use this when TYPE is a pointer type, VAL
607    is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
608    the alignment and would otherwise complain about the cast.  */
609 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
610 # define CAST_ALIGNED(type, val)                           \
611     ({ __typeof__ (val) val_ = val;                        \
612        _Pragma ("GCC diagnostic push")                     \
613        _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
614        (type) val_;                                        \
615        _Pragma ("GCC diagnostic pop")                      \
616     })
617 #else
618 # define CAST_ALIGNED(type, val) ((type) (val))
619 #endif
620 
621 /* An unsigned type suitable for fast matching.  */
622 typedef uintmax_t uword;
623 
624 struct localeinfo localeinfo;
625 
626 /* A mask to test for unibyte characters, with the pattern repeated to
627    fill a uword.  For a multibyte character encoding where
628    all bytes are unibyte characters, this is 0.  For UTF-8, this is
629    0x808080....  For encodings where unibyte characters have no discerned
630    pattern, this is all 1s.  The unsigned char C is a unibyte
631    character if C & UNIBYTE_MASK is zero.  If the uword W is the
632    concatenation of bytes, the bytes are all unibyte characters
633    if W & UNIBYTE_MASK is zero.  */
634 static uword unibyte_mask;
635 
636 static void
initialize_unibyte_mask(void)637 initialize_unibyte_mask (void)
638 {
639   /* For each encoding error I that MASK does not already match,
640      accumulate I's most significant 1 bit by ORing it into MASK.
641      Although any 1 bit of I could be used, in practice high-order
642      bits work better.  */
643   unsigned char mask = 0;
644   int ms1b = 1;
645   for (int i = 1; i <= UCHAR_MAX; i++)
646     if ((localeinfo.sbclen[i] != 1) & ! (mask & i))
647       {
648         while (ms1b * 2 <= i)
649           ms1b *= 2;
650         mask |= ms1b;
651       }
652 
653   /* Now MASK will detect any encoding-error byte, although it may
654      cry wolf and it may not be optimal.  Build a uword-length mask by
655      repeating MASK.  */
656   uword uword_max = -1;
657   unibyte_mask = uword_max / UCHAR_MAX * mask;
658 }
659 
660 /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
661    that is not easy, and return a pointer to the first non-easy byte.
662    The easy bytes all have UNIBYTE_MASK off.  */
663 static char const * _GL_ATTRIBUTE_PURE
skip_easy_bytes(char const * buf)664 skip_easy_bytes (char const *buf)
665 {
666   /* Search a byte at a time until the pointer is aligned, then a
667      uword at a time until a match is found, then a byte at a time to
668      identify the exact byte.  The uword search may go slightly past
669      the buffer end, but that's benign.  */
670   char const *p;
671   uword const *s;
672   for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
673     if (to_uchar (*p) & unibyte_mask)
674       return p;
675   for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
676     continue;
677   for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
678     continue;
679   return p;
680 }
681 
682 /* Return true if BUF, of size SIZE, has an encoding error.
683    BUF must be followed by at least sizeof (uword) bytes,
684    the first of which may be modified.  */
685 static bool
buf_has_encoding_errors(char * buf,size_t size)686 buf_has_encoding_errors (char *buf, size_t size)
687 {
688   if (! unibyte_mask)
689     return false;
690 
691   mbstate_t mbs = { 0 };
692   size_t clen;
693 
694   buf[size] = -1;
695   for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
696     {
697       clen = mbrlen (p, buf + size - p, &mbs);
698       if ((size_t) -2 <= clen)
699         return true;
700     }
701 
702   return false;
703 }
704 
705 
706 /* Return true if BUF, of size SIZE, has a null byte.
707    BUF must be followed by at least one byte,
708    which may be arbitrarily written to or read from.  */
709 static bool
buf_has_nulls(char * buf,size_t size)710 buf_has_nulls (char *buf, size_t size)
711 {
712   buf[size] = 0;
713   return strlen (buf) != size;
714 }
715 
716 /* Return true if a file is known to contain null bytes.
717    SIZE bytes have already been read from the file
718    with descriptor FD and status ST.  */
719 static bool
file_must_have_nulls(size_t size,int fd,struct stat const * st)720 file_must_have_nulls (size_t size, int fd, struct stat const *st)
721 {
722   /* If the file has holes, it must contain a null byte somewhere.  */
723   if (SEEK_HOLE != SEEK_SET && !seek_failed
724       && usable_st_size (st) && size < st->st_size)
725     {
726       off_t cur = size;
727       if (O_BINARY || fd == STDIN_FILENO)
728         {
729           cur = lseek (fd, 0, SEEK_CUR);
730           if (cur < 0)
731             return false;
732         }
733 
734       /* Look for a hole after the current location.  */
735       off_t hole_start = lseek (fd, cur, SEEK_HOLE);
736       if (0 <= hole_start)
737         {
738           if (lseek (fd, cur, SEEK_SET) < 0)
739             suppressible_error (errno);
740           if (hole_start < st->st_size)
741             return true;
742         }
743     }
744 
745   return false;
746 }
747 
748 /* Convert STR to a nonnegative integer, storing the result in *OUT.
749    STR must be a valid context length argument; report an error if it
750    isn't.  Silently ceiling *OUT at the maximum value, as that is
751    practically equivalent to infinity for grep's purposes.  */
752 static void
context_length_arg(char const * str,intmax_t * out)753 context_length_arg (char const *str, intmax_t *out)
754 {
755   switch (xstrtoimax (str, 0, 10, out, ""))
756     {
757     case LONGINT_OK:
758     case LONGINT_OVERFLOW:
759       if (0 <= *out)
760         break;
761       FALLTHROUGH;
762     default:
763       die (EXIT_TROUBLE, 0, "%s: %s", str,
764            _("invalid context length argument"));
765     }
766 }
767 
768 /* Return the add_exclude options suitable for excluding a file name.
769    If COMMAND_LINE, it is a command-line file name.  */
770 static int
exclude_options(bool command_line)771 exclude_options (bool command_line)
772 {
773   return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
774 }
775 
776 /* Return true if the file with NAME should be skipped.
777    If COMMAND_LINE, it is a command-line argument.
778    If IS_DIR, it is a directory.  */
779 static bool
skipped_file(char const * name,bool command_line,bool is_dir)780 skipped_file (char const *name, bool command_line, bool is_dir)
781 {
782   struct exclude **pats;
783   if (! is_dir)
784     pats = excluded_patterns;
785   else if (directories == SKIP_DIRECTORIES)
786     return true;
787   else if (command_line && omit_dot_slash)
788     return false;
789   else
790     pats = excluded_directory_patterns;
791   return pats[command_line] && excluded_file_name (pats[command_line], name);
792 }
793 
794 /* Hairy buffering mechanism for grep.  The intent is to keep
795    all reads aligned on a page boundary and multiples of the
796    page size, unless a read yields a partial page.  */
797 
798 static char *buffer;		/* Base of buffer. */
799 static size_t bufalloc;		/* Allocated buffer size, counting slop. */
800 static int bufdesc;		/* File descriptor. */
801 static char *bufbeg;		/* Beginning of user-visible stuff. */
802 static char *buflim;		/* Limit of user-visible stuff. */
803 static size_t pagesize;		/* alignment of memory pages */
804 static off_t bufoffset;		/* Read offset.  */
805 static off_t after_last_match;	/* Pointer after last matching line that
806                                    would have been output if we were
807                                    outputting characters. */
808 static bool skip_nuls;		/* Skip '\0' in data.  */
809 static bool skip_empty_lines;	/* Skip empty lines in data.  */
810 static uintmax_t totalnl;	/* Total newline count before lastnl. */
811 
812 /* Initial buffer size, not counting slop. */
813 enum { INITIAL_BUFSIZE = 96 * 1024 };
814 
815 /* Return VAL aligned to the next multiple of ALIGNMENT.  VAL can be
816    an integer or a pointer.  Both args must be free of side effects.  */
817 #define ALIGN_TO(val, alignment) \
818   ((size_t) (val) % (alignment) == 0 \
819    ? (val) \
820    : (val) + ((alignment) - (size_t) (val) % (alignment)))
821 
822 /* Add two numbers that count input bytes or lines, and report an
823    error if the addition overflows.  */
824 static uintmax_t
add_count(uintmax_t a,uintmax_t b)825 add_count (uintmax_t a, uintmax_t b)
826 {
827   uintmax_t sum = a + b;
828   if (sum < a)
829     die (EXIT_TROUBLE, 0, _("input is too large to count"));
830   return sum;
831 }
832 
833 /* Return true if BUF (of size SIZE) is all zeros.  */
834 static bool
all_zeros(char const * buf,size_t size)835 all_zeros (char const *buf, size_t size)
836 {
837   for (char const *p = buf; p < buf + size; p++)
838     if (*p)
839       return false;
840   return true;
841 }
842 
843 /* Reset the buffer for a new file, returning false if we should skip it.
844    Initialize on the first time through. */
845 static bool
reset(int fd,struct stat const * st)846 reset (int fd, struct stat const *st)
847 {
848   bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
849   bufbeg[-1] = eolbyte;
850   bufdesc = fd;
851   bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
852   seek_failed = bufoffset < 0;
853 
854   /* Assume SEEK_DATA fails if SEEK_CUR does.  */
855   seek_data_failed = seek_failed;
856 
857   if (seek_failed)
858     {
859       if (errno != ESPIPE)
860         {
861           suppressible_error (errno);
862           return false;
863         }
864       bufoffset = 0;
865     }
866   return true;
867 }
868 
869 /* Read new stuff into the buffer, saving the specified
870    amount of old stuff.  When we're done, 'bufbeg' points
871    to the beginning of the buffer contents, and 'buflim'
872    points just after the end.  Return false if there's an error.  */
873 static bool
fillbuf(size_t save,struct stat const * st)874 fillbuf (size_t save, struct stat const *st)
875 {
876   size_t fillsize;
877   bool cc = true;
878   char *readbuf;
879   size_t readsize;
880 
881   /* Offset from start of buffer to start of old stuff
882      that we want to save.  */
883   size_t saved_offset = buflim - save - buffer;
884 
885   if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
886     {
887       readbuf = buflim;
888       bufbeg = buflim - save;
889     }
890   else
891     {
892       size_t minsize = save + pagesize;
893       size_t newsize;
894       size_t newalloc;
895       char *newbuf;
896 
897       /* Grow newsize until it is at least as great as minsize.  */
898       for (newsize = bufalloc - pagesize - sizeof (uword);
899            newsize < minsize;
900            newsize *= 2)
901         if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
902           xalloc_die ();
903 
904       /* Try not to allocate more memory than the file size indicates,
905          as that might cause unnecessary memory exhaustion if the file
906          is large.  However, do not use the original file size as a
907          heuristic if we've already read past the file end, as most
908          likely the file is growing.  */
909       if (usable_st_size (st))
910         {
911           off_t to_be_read = st->st_size - bufoffset;
912           off_t maxsize_off = save + to_be_read;
913           if (0 <= to_be_read && to_be_read <= maxsize_off
914               && maxsize_off == (size_t) maxsize_off
915               && minsize <= (size_t) maxsize_off
916               && (size_t) maxsize_off < newsize)
917             newsize = maxsize_off;
918         }
919 
920       /* Add enough room so that the buffer is aligned and has room
921          for byte sentinels fore and aft, and so that a uword can
922          be read aft.  */
923       newalloc = newsize + pagesize + sizeof (uword);
924 
925       newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
926       readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
927       bufbeg = readbuf - save;
928       memmove (bufbeg, buffer + saved_offset, save);
929       bufbeg[-1] = eolbyte;
930       if (newbuf != buffer)
931         {
932           free (buffer);
933           buffer = newbuf;
934         }
935     }
936 
937   clear_asan_poison ();
938 
939   readsize = buffer + bufalloc - sizeof (uword) - readbuf;
940   readsize -= readsize % pagesize;
941 
942   while (true)
943     {
944       fillsize = safe_read (bufdesc, readbuf, readsize);
945       if (fillsize == SAFE_READ_ERROR)
946         {
947           fillsize = 0;
948           cc = false;
949         }
950       bufoffset += fillsize;
951 
952       if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize))
953         break;
954       totalnl = add_count (totalnl, fillsize);
955 
956       if (SEEK_DATA != SEEK_SET && !seek_data_failed)
957         {
958           /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF.  */
959           off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
960           if (data_start < 0 && errno == ENXIO
961               && usable_st_size (st) && bufoffset < st->st_size)
962             data_start = lseek (bufdesc, 0, SEEK_END);
963 
964           if (data_start < 0)
965             seek_data_failed = true;
966           else
967             {
968               totalnl = add_count (totalnl, data_start - bufoffset);
969               bufoffset = data_start;
970             }
971         }
972     }
973 
974   buflim = readbuf + fillsize;
975 
976   /* Initialize the following word, because skip_easy_bytes and some
977      matchers read (but do not use) those bytes.  This avoids false
978      positive reports of these bytes being used uninitialized.  */
979   memset (buflim, 0, sizeof (uword));
980 
981   /* Mark the part of the buffer not filled by the read or set by
982      the above memset call as ASAN-poisoned.  */
983   asan_poison (buflim + sizeof (uword),
984                bufalloc - (buflim - buffer) - sizeof (uword));
985 
986   return cc;
987 }
988 
989 /* Flags controlling the style of output. */
990 static enum
991 {
992   BINARY_BINARY_FILES,
993   TEXT_BINARY_FILES,
994   WITHOUT_MATCH_BINARY_FILES
995 } binary_files;		/* How to handle binary files.  */
996 
997 /* Options for output as a list of matching/non-matching files */
998 static enum
999 {
1000   LISTFILES_NONE,
1001   LISTFILES_MATCHING,
1002   LISTFILES_NONMATCHING,
1003 } list_files;
1004 
1005 /* Whether to output filenames.  1 means yes, 0 means no, and -1 means
1006    'grep -r PATTERN FILE' was used and it is not known yet whether
1007    FILE is a directory (which means yes) or not (which means no).  */
1008 static int out_file;
1009 
1010 static int filename_mask;	/* If zero, output nulls after filenames.  */
1011 static bool out_quiet;		/* Suppress all normal output. */
1012 static bool out_invert;		/* Print nonmatching stuff. */
1013 static bool out_line;		/* Print line numbers. */
1014 static bool out_byte;		/* Print byte offsets. */
1015 static intmax_t out_before;	/* Lines of leading context. */
1016 static intmax_t out_after;	/* Lines of trailing context. */
1017 static bool count_matches;	/* Count matching lines.  */
1018 static intmax_t max_count;	/* Max number of selected
1019                                    lines from an input file.  */
1020 static bool line_buffered;	/* Use line buffering.  */
1021 static char *label = NULL;      /* Fake filename for stdin */
1022 
1023 
1024 /* Internal variables to keep track of byte count, context, etc. */
1025 static uintmax_t totalcc;	/* Total character count before bufbeg. */
1026 static char const *lastnl;	/* Pointer after last newline counted. */
1027 static char *lastout;		/* Pointer after last character output;
1028                                    NULL if no character has been output
1029                                    or if it's conceptually before bufbeg. */
1030 static intmax_t outleft;	/* Maximum number of selected lines.  */
1031 static intmax_t pending;	/* Pending lines of output.
1032                                    Always kept 0 if out_quiet is true.  */
1033 static bool done_on_match;	/* Stop scanning file on first match.  */
1034 static bool exit_on_match;	/* Exit on first match.  */
1035 static bool dev_null_output;	/* Stdout is known to be /dev/null.  */
1036 static bool binary;		/* Use binary rather than text I/O.  */
1037 
1038 static void
nlscan(char const * lim)1039 nlscan (char const *lim)
1040 {
1041   size_t newlines = 0;
1042   char const *beg;
1043   for (beg = lastnl; beg < lim; beg++)
1044     {
1045       beg = memchr (beg, eolbyte, lim - beg);
1046       if (!beg)
1047         break;
1048       newlines++;
1049     }
1050   totalnl = add_count (totalnl, newlines);
1051   lastnl = lim;
1052 }
1053 
1054 /* Print the current filename.  */
1055 static void
print_filename(void)1056 print_filename (void)
1057 {
1058   pr_sgr_start_if (filename_color);
1059   fputs_errno (input_filename ());
1060   pr_sgr_end_if (filename_color);
1061 }
1062 
1063 /* Print a character separator.  */
1064 static void
print_sep(char sep)1065 print_sep (char sep)
1066 {
1067   pr_sgr_start_if (sep_color);
1068   putchar_errno (sep);
1069   pr_sgr_end_if (sep_color);
1070 }
1071 
1072 /* Print a line number or a byte offset.  */
1073 static void
print_offset(uintmax_t pos,const char * color)1074 print_offset (uintmax_t pos, const char *color)
1075 {
1076   pr_sgr_start_if (color);
1077   printf_errno ("%*"PRIuMAX, offset_width, pos);
1078   pr_sgr_end_if (color);
1079 }
1080 
1081 /* Print a whole line head (filename, line, byte).  The output data
1082    starts at BEG and contains LEN bytes; it is followed by at least
1083    sizeof (uword) bytes, the first of which may be temporarily modified.
1084    The output data comes from what is perhaps a larger input line that
1085    goes until LIM, where LIM[-1] is an end-of-line byte.  Use SEP as
1086    the separator on output.
1087 
1088    Return true unless the line was suppressed due to an encoding error.  */
1089 
1090 static bool
print_line_head(char * beg,size_t len,char const * lim,char sep)1091 print_line_head (char *beg, size_t len, char const *lim, char sep)
1092 {
1093   if (binary_files != TEXT_BINARY_FILES)
1094     {
1095       char ch = beg[len];
1096       bool encoding_errors = buf_has_encoding_errors (beg, len);
1097       beg[len] = ch;
1098       if (encoding_errors)
1099         {
1100           encoding_error_output = true;
1101           return false;
1102         }
1103     }
1104 
1105   if (out_file)
1106     {
1107       print_filename ();
1108       if (filename_mask)
1109         print_sep (sep);
1110       else
1111         putchar_errno (0);
1112     }
1113 
1114   if (out_line)
1115     {
1116       if (lastnl < lim)
1117         {
1118           nlscan (beg);
1119           totalnl = add_count (totalnl, 1);
1120           lastnl = lim;
1121         }
1122       print_offset (totalnl, line_num_color);
1123       print_sep (sep);
1124     }
1125 
1126   if (out_byte)
1127     {
1128       uintmax_t pos = add_count (totalcc, beg - bufbeg);
1129       print_offset (pos, byte_num_color);
1130       print_sep (sep);
1131     }
1132 
1133   if (align_tabs && (out_file | out_line | out_byte) && len != 0)
1134     putchar_errno ('\t');
1135 
1136   return true;
1137 }
1138 
1139 static char *
print_line_middle(char * beg,char * lim,const char * line_color,const char * match_color)1140 print_line_middle (char *beg, char *lim,
1141                    const char *line_color, const char *match_color)
1142 {
1143   size_t match_size;
1144   size_t match_offset;
1145   char *cur;
1146   char *mid = NULL;
1147   char *b;
1148 
1149   for (cur = beg;
1150        (cur < lim
1151         && ((match_offset = execute (compiled_pattern, beg, lim - beg,
1152                                      &match_size, cur)) != (size_t) -1));
1153        cur = b + match_size)
1154     {
1155       b = beg + match_offset;
1156 
1157       /* Avoid matching the empty line at the end of the buffer. */
1158       if (b == lim)
1159         break;
1160 
1161       /* Avoid hanging on grep --color "" foo */
1162       if (match_size == 0)
1163         {
1164           /* Make minimal progress; there may be further non-empty matches.  */
1165           /* XXX - Could really advance by one whole multi-octet character.  */
1166           match_size = 1;
1167           if (!mid)
1168             mid = cur;
1169         }
1170       else
1171         {
1172           /* This function is called on a matching line only,
1173              but is it selected or rejected/context?  */
1174           if (only_matching)
1175             {
1176               char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
1177               if (! print_line_head (b, match_size, lim, sep))
1178                 return NULL;
1179             }
1180           else
1181             {
1182               pr_sgr_start (line_color);
1183               if (mid)
1184                 {
1185                   cur = mid;
1186                   mid = NULL;
1187                 }
1188               fwrite_errno (cur, 1, b - cur);
1189             }
1190 
1191           pr_sgr_start_if (match_color);
1192           fwrite_errno (b, 1, match_size);
1193           pr_sgr_end_if (match_color);
1194           if (only_matching)
1195             putchar_errno (eolbyte);
1196         }
1197     }
1198 
1199   if (only_matching)
1200     cur = lim;
1201   else if (mid)
1202     cur = mid;
1203 
1204   return cur;
1205 }
1206 
1207 static char *
print_line_tail(char * beg,const char * lim,const char * line_color)1208 print_line_tail (char *beg, const char *lim, const char *line_color)
1209 {
1210   size_t eol_size;
1211   size_t tail_size;
1212 
1213   eol_size   = (lim > beg && lim[-1] == eolbyte);
1214   eol_size  += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
1215   tail_size  =  lim - eol_size - beg;
1216 
1217   if (tail_size > 0)
1218     {
1219       pr_sgr_start (line_color);
1220       fwrite_errno (beg, 1, tail_size);
1221       beg += tail_size;
1222       pr_sgr_end (line_color);
1223     }
1224 
1225   return beg;
1226 }
1227 
1228 static void
prline(char * beg,char * lim,char sep)1229 prline (char *beg, char *lim, char sep)
1230 {
1231   bool matching;
1232   const char *line_color;
1233   const char *match_color;
1234 
1235   if (!only_matching)
1236     if (! print_line_head (beg, lim - beg - 1, lim, sep))
1237       return;
1238 
1239   matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
1240 
1241   if (color_option)
1242     {
1243       line_color = (((sep == SEP_CHAR_SELECTED)
1244                      ^ (out_invert && (color_option < 0)))
1245                     ? selected_line_color  : context_line_color);
1246       match_color = (sep == SEP_CHAR_SELECTED
1247                      ? selected_match_color : context_match_color);
1248     }
1249   else
1250     line_color = match_color = NULL; /* Shouldn't be used.  */
1251 
1252   if ((only_matching && matching)
1253       || (color_option && (*line_color || *match_color)))
1254     {
1255       /* We already know that non-matching lines have no match (to colorize). */
1256       if (matching && (only_matching || *match_color))
1257         {
1258           beg = print_line_middle (beg, lim, line_color, match_color);
1259           if (! beg)
1260             return;
1261         }
1262 
1263       if (!only_matching && *line_color)
1264         {
1265           /* This code is exercised at least when grep is invoked like this:
1266              echo k| GREP_COLORS='sl=01;32' src/grep k --color=always  */
1267           beg = print_line_tail (beg, lim, line_color);
1268         }
1269     }
1270 
1271   if (!only_matching && lim > beg)
1272     fwrite_errno (beg, 1, lim - beg);
1273 
1274   if (line_buffered)
1275     fflush_errno ();
1276 
1277   if (stdout_errno)
1278     die (EXIT_TROUBLE, stdout_errno, _("write error"));
1279 
1280   lastout = lim;
1281 }
1282 
1283 /* Print pending lines of trailing context prior to LIM.  */
1284 static void
prpending(char const * lim)1285 prpending (char const *lim)
1286 {
1287   if (!lastout)
1288     lastout = bufbeg;
1289   for (; 0 < pending && lastout < lim; pending--)
1290     {
1291       char *nl = memchr (lastout, eolbyte, lim - lastout);
1292       prline (lastout, nl + 1, SEP_CHAR_REJECTED);
1293     }
1294 }
1295 
1296 /* Output the lines between BEG and LIM.  Deal with context.  */
1297 static void
prtext(char * beg,char * lim)1298 prtext (char *beg, char *lim)
1299 {
1300   static bool used;	/* Avoid printing SEP_STR_GROUP before any output.  */
1301   char eol = eolbyte;
1302 
1303   if (!out_quiet && pending > 0)
1304     prpending (beg);
1305 
1306   char *p = beg;
1307 
1308   if (!out_quiet)
1309     {
1310       /* Deal with leading context.  */
1311       char const *bp = lastout ? lastout : bufbeg;
1312       intmax_t i;
1313       for (i = 0; i < out_before; ++i)
1314         if (p > bp)
1315           do
1316             --p;
1317           while (p[-1] != eol);
1318 
1319       /* Print the group separator unless the output is adjacent to
1320          the previous output in the file.  */
1321       if ((0 <= out_before || 0 <= out_after) && used
1322           && p != lastout && group_separator)
1323         {
1324           pr_sgr_start_if (sep_color);
1325           fputs_errno (group_separator);
1326           pr_sgr_end_if (sep_color);
1327           putchar_errno ('\n');
1328         }
1329 
1330       while (p < beg)
1331         {
1332           char *nl = memchr (p, eol, beg - p);
1333           nl++;
1334           prline (p, nl, SEP_CHAR_REJECTED);
1335           p = nl;
1336         }
1337     }
1338 
1339   intmax_t n;
1340   if (out_invert)
1341     {
1342       /* One or more lines are output.  */
1343       for (n = 0; p < lim && n < outleft; n++)
1344         {
1345           char *nl = memchr (p, eol, lim - p);
1346           nl++;
1347           if (!out_quiet)
1348             prline (p, nl, SEP_CHAR_SELECTED);
1349           p = nl;
1350         }
1351     }
1352   else
1353     {
1354       /* Just one line is output.  */
1355       if (!out_quiet)
1356         prline (beg, lim, SEP_CHAR_SELECTED);
1357       n = 1;
1358       p = lim;
1359     }
1360 
1361   after_last_match = bufoffset - (buflim - p);
1362   pending = out_quiet ? 0 : MAX (0, out_after);
1363   used = true;
1364   outleft -= n;
1365 }
1366 
1367 /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
1368    This avoids running out of memory when binary input contains a long
1369    sequence of zeros, which would otherwise be considered to be part
1370    of a long line.  P[LIM] should be EOL.  */
1371 static void
zap_nuls(char * p,char * lim,char eol)1372 zap_nuls (char *p, char *lim, char eol)
1373 {
1374   if (eol)
1375     while (true)
1376       {
1377         *lim = '\0';
1378         p += strlen (p);
1379         *lim = eol;
1380         if (p == lim)
1381           break;
1382         do
1383           *p++ = eol;
1384         while (!*p);
1385       }
1386 }
1387 
1388 /* Scan the specified portion of the buffer, matching lines (or
1389    between matching lines if OUT_INVERT is true).  Return a count of
1390    lines printed.  Replace all NUL bytes with NUL_ZAPPER as we go.  */
1391 static intmax_t
grepbuf(char * beg,char const * lim)1392 grepbuf (char *beg, char const *lim)
1393 {
1394   intmax_t outleft0 = outleft;
1395   char *endp;
1396 
1397   for (char *p = beg; p < lim; p = endp)
1398     {
1399       size_t match_size;
1400       size_t match_offset = execute (compiled_pattern, p, lim - p,
1401                                      &match_size, NULL);
1402       if (match_offset == (size_t) -1)
1403         {
1404           if (!out_invert)
1405             break;
1406           match_offset = lim - p;
1407           match_size = 0;
1408         }
1409       char *b = p + match_offset;
1410       endp = b + match_size;
1411       /* Avoid matching the empty line at the end of the buffer. */
1412       if (!out_invert && b == lim)
1413         break;
1414       if (!out_invert || p < b)
1415         {
1416           char *prbeg = out_invert ? p : b;
1417           char *prend = out_invert ? b : endp;
1418           prtext (prbeg, prend);
1419           if (!outleft || done_on_match)
1420             {
1421               if (exit_on_match)
1422                 exit (errseen ? exit_failure : EXIT_SUCCESS);
1423               break;
1424             }
1425         }
1426     }
1427 
1428   return outleft0 - outleft;
1429 }
1430 
1431 /* Search a given (non-directory) file.  Return a count of lines printed.
1432    Set *INEOF to true if end-of-file reached.  */
1433 static intmax_t
grep(int fd,struct stat const * st,bool * ineof)1434 grep (int fd, struct stat const *st, bool *ineof)
1435 {
1436   intmax_t nlines, i;
1437   size_t residue, save;
1438   char oldc;
1439   char *beg;
1440   char *lim;
1441   char eol = eolbyte;
1442   char nul_zapper = '\0';
1443   bool done_on_match_0 = done_on_match;
1444   bool out_quiet_0 = out_quiet;
1445 
1446   /* The value of NLINES when nulls were first deduced in the input;
1447      this is not necessarily the same as the number of matching lines
1448      before the first null.  -1 if no input nulls have been deduced.  */
1449   intmax_t nlines_first_null = -1;
1450 
1451   if (! reset (fd, st))
1452     return 0;
1453 
1454   totalcc = 0;
1455   lastout = 0;
1456   totalnl = 0;
1457   outleft = max_count;
1458   after_last_match = 0;
1459   pending = 0;
1460   skip_nuls = skip_empty_lines && !eol;
1461   encoding_error_output = false;
1462 
1463   nlines = 0;
1464   residue = 0;
1465   save = 0;
1466 
1467   if (! fillbuf (save, st))
1468     {
1469       suppressible_error (errno);
1470       return 0;
1471     }
1472 
1473   offset_width = 0;
1474   if (align_tabs)
1475     {
1476       /* Width is log of maximum number.  Line numbers are origin-1.  */
1477       uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
1478       num += out_line && num < UINTMAX_MAX;
1479       do
1480         offset_width++;
1481       while ((num /= 10) != 0);
1482     }
1483 
1484   for (bool firsttime = true; ; firsttime = false)
1485     {
1486       if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
1487           && (buf_has_nulls (bufbeg, buflim - bufbeg)
1488               || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
1489         {
1490           if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1491             return 0;
1492           if (!count_matches)
1493             done_on_match = out_quiet = true;
1494           nlines_first_null = nlines;
1495           nul_zapper = eol;
1496           skip_nuls = skip_empty_lines;
1497         }
1498 
1499       lastnl = bufbeg;
1500       if (lastout)
1501         lastout = bufbeg;
1502 
1503       beg = bufbeg + save;
1504 
1505       /* no more data to scan (eof) except for maybe a residue -> break */
1506       if (beg == buflim)
1507         {
1508           *ineof = true;
1509           break;
1510         }
1511 
1512       zap_nuls (beg, buflim, nul_zapper);
1513 
1514       /* Determine new residue (the length of an incomplete line at the end of
1515          the buffer, 0 means there is no incomplete last line).  */
1516       oldc = beg[-1];
1517       beg[-1] = eol;
1518       /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1519          that this use of memrchr is guaranteed never to return NULL.  */
1520       lim = memrchr (beg - 1, eol, buflim - beg + 1);
1521       ++lim;
1522       beg[-1] = oldc;
1523       if (lim == beg)
1524         lim = beg - residue;
1525       beg -= residue;
1526       residue = buflim - lim;
1527 
1528       if (beg < lim)
1529         {
1530           if (outleft)
1531             nlines += grepbuf (beg, lim);
1532           if (pending)
1533             prpending (lim);
1534           if ((!outleft && !pending)
1535               || (done_on_match && MAX (0, nlines_first_null) < nlines))
1536             goto finish_grep;
1537         }
1538 
1539       /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1540          leading context if there is a matching line at the begin of the
1541          next data. Make beg point to their begin.  */
1542       i = 0;
1543       beg = lim;
1544       while (i < out_before && beg > bufbeg && beg != lastout)
1545         {
1546           ++i;
1547           do
1548             --beg;
1549           while (beg[-1] != eol);
1550         }
1551 
1552       /* Detect whether leading context is adjacent to previous output.  */
1553       if (beg != lastout)
1554         lastout = 0;
1555 
1556       /* Handle some details and read more data to scan.  */
1557       save = residue + lim - beg;
1558       if (out_byte)
1559         totalcc = add_count (totalcc, buflim - bufbeg - save);
1560       if (out_line)
1561         nlscan (beg);
1562       if (! fillbuf (save, st))
1563         {
1564           suppressible_error (errno);
1565           goto finish_grep;
1566         }
1567     }
1568   if (residue)
1569     {
1570       *buflim++ = eol;
1571       if (outleft)
1572         nlines += grepbuf (bufbeg + save - residue, buflim);
1573       if (pending)
1574         prpending (buflim);
1575     }
1576 
1577  finish_grep:
1578   done_on_match = done_on_match_0;
1579   out_quiet = out_quiet_0;
1580   if (!out_quiet && (encoding_error_output
1581                      || (0 <= nlines_first_null && nlines_first_null < nlines)))
1582     {
1583       printf_errno (_("Binary file %s matches\n"), input_filename ());
1584       if (line_buffered)
1585         fflush_errno ();
1586     }
1587   return nlines;
1588 }
1589 
1590 static bool
grepdirent(FTS * fts,FTSENT * ent,bool command_line)1591 grepdirent (FTS *fts, FTSENT *ent, bool command_line)
1592 {
1593   bool follow;
1594   command_line &= ent->fts_level == FTS_ROOTLEVEL;
1595 
1596   if (ent->fts_info == FTS_DP)
1597     return true;
1598 
1599   if (!command_line
1600       && skipped_file (ent->fts_name, false,
1601                        (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1602                         || ent->fts_info == FTS_DNR)))
1603     {
1604       fts_set (fts, ent, FTS_SKIP);
1605       return true;
1606     }
1607 
1608   filename = ent->fts_path;
1609   if (omit_dot_slash && filename[1])
1610     filename += 2;
1611   follow = (fts->fts_options & FTS_LOGICAL
1612             || (fts->fts_options & FTS_COMFOLLOW && command_line));
1613 
1614   switch (ent->fts_info)
1615     {
1616     case FTS_D:
1617       if (directories == RECURSE_DIRECTORIES)
1618         return true;
1619       fts_set (fts, ent, FTS_SKIP);
1620       break;
1621 
1622     case FTS_DC:
1623       if (!suppress_errors)
1624         error (0, 0, _("warning: %s: %s"), filename,
1625                _("recursive directory loop"));
1626       return true;
1627 
1628     case FTS_DNR:
1629     case FTS_ERR:
1630     case FTS_NS:
1631       suppressible_error (ent->fts_errno);
1632       return true;
1633 
1634     case FTS_DEFAULT:
1635     case FTS_NSOK:
1636       if (skip_devices (command_line))
1637         {
1638           struct stat *st = ent->fts_statp;
1639           struct stat st1;
1640           if (! st->st_mode)
1641             {
1642               /* The file type is not already known.  Get the file status
1643                  before opening, since opening might have side effects
1644                  on a device.  */
1645               int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1646               if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1647                 {
1648                   suppressible_error (errno);
1649                   return true;
1650                 }
1651               st = &st1;
1652             }
1653           if (is_device_mode (st->st_mode))
1654             return true;
1655         }
1656       break;
1657 
1658     case FTS_F:
1659     case FTS_SLNONE:
1660       break;
1661 
1662     case FTS_SL:
1663     case FTS_W:
1664       return true;
1665 
1666     default:
1667       abort ();
1668     }
1669 
1670   return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line);
1671 }
1672 
1673 /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
1674    POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD.  */
1675 static bool
open_symlink_nofollow_error(int err)1676 open_symlink_nofollow_error (int err)
1677 {
1678   if (err == ELOOP || err == EMLINK)
1679     return true;
1680 #ifdef EFTYPE
1681   if (err == EFTYPE)
1682     return true;
1683 #endif
1684   return false;
1685 }
1686 
1687 static bool
grepfile(int dirdesc,char const * name,bool follow,bool command_line)1688 grepfile (int dirdesc, char const *name, bool follow, bool command_line)
1689 {
1690   int oflag = (O_RDONLY | O_NOCTTY
1691                | (IGNORE_DUPLICATE_BRANCH_WARNING
1692                   (binary ? O_BINARY : 0))
1693                | (follow ? 0 : O_NOFOLLOW)
1694                | (skip_devices (command_line) ? O_NONBLOCK : 0));
1695   int desc = openat_safer (dirdesc, name, oflag);
1696   if (desc < 0)
1697     {
1698       if (follow || ! open_symlink_nofollow_error (errno))
1699         suppressible_error (errno);
1700       return true;
1701     }
1702   return grepdesc (desc, command_line);
1703 }
1704 
1705 /* Read all data from FD, with status ST.  Return true if successful,
1706    false (setting errno) otherwise.  */
1707 static bool
drain_input(int fd,struct stat const * st)1708 drain_input (int fd, struct stat const *st)
1709 {
1710   ssize_t nbytes;
1711   if (S_ISFIFO (st->st_mode) && dev_null_output)
1712     {
1713 #ifdef SPLICE_F_MOVE
1714       /* Should be faster, since it need not copy data to user space.  */
1715       nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1716                        INITIAL_BUFSIZE, SPLICE_F_MOVE);
1717       if (0 <= nbytes || errno != EINVAL)
1718         {
1719           while (0 < nbytes)
1720             nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1721                              INITIAL_BUFSIZE, SPLICE_F_MOVE);
1722           return nbytes == 0;
1723         }
1724 #endif
1725     }
1726   while ((nbytes = safe_read (fd, buffer, bufalloc)))
1727     if (nbytes == SAFE_READ_ERROR)
1728       return false;
1729   return true;
1730 }
1731 
1732 /* Finish reading from FD, with status ST and where end-of-file has
1733    been seen if INEOF.  Typically this is a no-op, but when reading
1734    from standard input this may adjust the file offset or drain a
1735    pipe.  */
1736 
1737 static void
finalize_input(int fd,struct stat const * st,bool ineof)1738 finalize_input (int fd, struct stat const *st, bool ineof)
1739 {
1740   if (fd == STDIN_FILENO
1741       && (outleft
1742           ? (!ineof
1743              && (seek_failed
1744                  || (lseek (fd, 0, SEEK_END) < 0
1745                      /* Linux proc file system has EINVAL (Bug#25180).  */
1746                      && errno != EINVAL))
1747              && ! drain_input (fd, st))
1748           : (bufoffset != after_last_match && !seek_failed
1749              && lseek (fd, after_last_match, SEEK_SET) < 0)))
1750     suppressible_error (errno);
1751 }
1752 
1753 static bool
grepdesc(int desc,bool command_line)1754 grepdesc (int desc, bool command_line)
1755 {
1756   intmax_t count;
1757   bool status = true;
1758   bool ineof = false;
1759   struct stat st;
1760 
1761   /* Get the file status, possibly for the second time.  This catches
1762      a race condition if the directory entry changes after the
1763      directory entry is read and before the file is opened.  For
1764      example, normally DESC is a directory only at the top level, but
1765      there is an exception if some other process substitutes a
1766      directory for a non-directory while 'grep' is running.  */
1767   if (fstat (desc, &st) != 0)
1768     {
1769       suppressible_error (errno);
1770       goto closeout;
1771     }
1772 
1773   if (desc != STDIN_FILENO && skip_devices (command_line)
1774       && is_device_mode (st.st_mode))
1775     goto closeout;
1776 
1777   if (desc != STDIN_FILENO && command_line
1778       && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
1779     goto closeout;
1780 
1781   /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'.  */
1782   if (out_file < 0)
1783     out_file = !!S_ISDIR (st.st_mode);
1784 
1785   if (desc != STDIN_FILENO
1786       && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1787     {
1788       /* Traverse the directory starting with its full name, because
1789          unfortunately fts provides no way to traverse the directory
1790          starting from its file descriptor.  */
1791 
1792       FTS *fts;
1793       FTSENT *ent;
1794       int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1795       char *fts_arg[2];
1796 
1797       /* Close DESC now, to conserve file descriptors if the race
1798          condition occurs many times in a deep recursion.  */
1799       if (close (desc) != 0)
1800         suppressible_error (errno);
1801 
1802       fts_arg[0] = (char *) filename;
1803       fts_arg[1] = NULL;
1804       fts = fts_open (fts_arg, opts, NULL);
1805 
1806       if (!fts)
1807         xalloc_die ();
1808       while ((ent = fts_read (fts)))
1809         status &= grepdirent (fts, ent, command_line);
1810       if (errno)
1811         suppressible_error (errno);
1812       if (fts_close (fts) != 0)
1813         suppressible_error (errno);
1814       return status;
1815     }
1816   if (desc != STDIN_FILENO
1817       && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1818           || ((devices == SKIP_DEVICES
1819                || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1820               && is_device_mode (st.st_mode))))
1821     goto closeout;
1822 
1823   /* If there is a regular file on stdout and the current file refers
1824      to the same i-node, we have to report the problem and skip it.
1825      Otherwise when matching lines from some other input reach the
1826      disk before we open this file, we can end up reading and matching
1827      those lines and appending them to the file from which we're reading.
1828      Then we'd have what appears to be an infinite loop that'd terminate
1829      only upon filling the output file system or reaching a quota.
1830      However, there is no risk of an infinite loop if grep is generating
1831      no output, i.e., with --silent, --quiet, -q.
1832      Similarly, with any of these:
1833        --max-count=N (-m) (for N >= 2)
1834        --files-with-matches (-l)
1835        --files-without-match (-L)
1836      there is no risk of trouble.
1837      For --max-count=1, grep stops after printing the first match,
1838      so there is no risk of malfunction.  But even --max-count=2, with
1839      input==output, while there is no risk of infloop, there is a race
1840      condition that could result in "alternate" output.  */
1841   if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
1842       && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
1843     {
1844       if (! suppress_errors)
1845         error (0, 0, _("input file %s is also the output"),
1846                quote (input_filename ()));
1847       errseen = true;
1848       goto closeout;
1849     }
1850 
1851   count = grep (desc, &st, &ineof);
1852   if (count_matches)
1853     {
1854       if (out_file)
1855         {
1856           print_filename ();
1857           if (filename_mask)
1858             print_sep (SEP_CHAR_SELECTED);
1859           else
1860             putchar_errno (0);
1861         }
1862       printf_errno ("%" PRIdMAX "\n", count);
1863       if (line_buffered)
1864         fflush_errno ();
1865     }
1866 
1867   status = !count == !(list_files == LISTFILES_NONMATCHING);
1868 
1869   if (list_files == LISTFILES_NONE || dev_null_output)
1870     finalize_input (desc, &st, ineof);
1871   else if (status == 0)
1872     {
1873       print_filename ();
1874       putchar_errno ('\n' & filename_mask);
1875       if (line_buffered)
1876         fflush_errno ();
1877     }
1878 
1879  closeout:
1880   if (desc != STDIN_FILENO && close (desc) != 0)
1881     suppressible_error (errno);
1882   return status;
1883 }
1884 
1885 static bool
grep_command_line_arg(char const * arg)1886 grep_command_line_arg (char const *arg)
1887 {
1888   if (STREQ (arg, "-"))
1889     {
1890       filename = label;
1891       if (binary)
1892         xset_binary_mode (STDIN_FILENO, O_BINARY);
1893       return grepdesc (STDIN_FILENO, true);
1894     }
1895   else
1896     {
1897       filename = arg;
1898       return grepfile (AT_FDCWD, arg, true, true);
1899     }
1900 }
1901 
1902 _Noreturn void usage (int);
1903 void
usage(int status)1904 usage (int status)
1905 {
1906   if (status != 0)
1907     {
1908       fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"),
1909                getprogname ());
1910       fprintf (stderr, _("Try '%s --help' for more information.\n"),
1911                getprogname ());
1912     }
1913   else
1914     {
1915       printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ());
1916       printf (_("Search for PATTERNS in each FILE.\n"));
1917       printf (_("\
1918 Example: %s -i 'hello world' menu.h main.c\n\
1919 PATTERNS can contain multiple patterns separated by newlines.\n\
1920 \n\
1921 Pattern selection and interpretation:\n"), getprogname ());
1922       printf (_("\
1923   -E, --extended-regexp     PATTERNS are extended regular expressions\n\
1924   -F, --fixed-strings       PATTERNS are strings\n\
1925   -G, --basic-regexp        PATTERNS are basic regular expressions\n\
1926   -P, --perl-regexp         PATTERNS are Perl regular expressions\n"));
1927   /* -X is deliberately undocumented.  */
1928       printf (_("\
1929   -e, --regexp=PATTERNS     use PATTERNS for matching\n\
1930   -f, --file=FILE           take PATTERNS from FILE\n\
1931   -i, --ignore-case         ignore case distinctions in patterns and data\n\
1932       --no-ignore-case      do not ignore case distinctions (default)\n\
1933   -w, --word-regexp         match only whole words\n\
1934   -x, --line-regexp         match only whole lines\n\
1935   -z, --null-data           a data line ends in 0 byte, not newline\n"));
1936       printf (_("\
1937 \n\
1938 Miscellaneous:\n\
1939   -s, --no-messages         suppress error messages\n\
1940   -v, --invert-match        select non-matching lines\n\
1941   -V, --version             display version information and exit\n\
1942       --help                display this help text and exit\n"));
1943       printf (_("\
1944 \n\
1945 Output control:\n\
1946   -m, --max-count=NUM       stop after NUM selected lines\n\
1947   -b, --byte-offset         print the byte offset with output lines\n\
1948   -n, --line-number         print line number with output lines\n\
1949       --line-buffered       flush output on every line\n\
1950   -H, --with-filename       print file name with output lines\n\
1951   -h, --no-filename         suppress the file name prefix on output\n\
1952       --label=LABEL         use LABEL as the standard input file name prefix\n\
1953 "));
1954       printf (_("\
1955   -o, --only-matching       show only nonempty parts of lines that match\n\
1956   -q, --quiet, --silent     suppress all normal output\n\
1957       --binary-files=TYPE   assume that binary files are TYPE;\n\
1958                             TYPE is 'binary', 'text', or 'without-match'\n\
1959   -a, --text                equivalent to --binary-files=text\n\
1960 "));
1961       printf (_("\
1962   -I                        equivalent to --binary-files=without-match\n\
1963   -d, --directories=ACTION  how to handle directories;\n\
1964                             ACTION is 'read', 'recurse', or 'skip'\n\
1965   -D, --devices=ACTION      how to handle devices, FIFOs and sockets;\n\
1966                             ACTION is 'read' or 'skip'\n\
1967   -r, --recursive           like --directories=recurse\n\
1968   -R, --dereference-recursive  likewise, but follow all symlinks\n\
1969 "));
1970       printf (_("\
1971       --include=GLOB        search only files that match GLOB (a file pattern)"
1972                 "\n\
1973       --exclude=GLOB        skip files that match GLOB\n\
1974       --exclude-from=FILE   skip files that match any file pattern from FILE\n\
1975       --exclude-dir=GLOB    skip directories that match GLOB\n\
1976 "));
1977       printf (_("\
1978   -L, --files-without-match  print only names of FILEs with no selected lines\n\
1979   -l, --files-with-matches  print only names of FILEs with selected lines\n\
1980   -c, --count               print only a count of selected lines per FILE\n\
1981   -T, --initial-tab         make tabs line up (if needed)\n\
1982   -Z, --null                print 0 byte after FILE name\n"));
1983       printf (_("\
1984 \n\
1985 Context control:\n\
1986   -B, --before-context=NUM  print NUM lines of leading context\n\
1987   -A, --after-context=NUM   print NUM lines of trailing context\n\
1988   -C, --context=NUM         print NUM lines of output context\n\
1989 "));
1990       printf (_("\
1991   -NUM                      same as --context=NUM\n\
1992       --color[=WHEN],\n\
1993       --colour[=WHEN]       use markers to highlight the matching strings;\n\
1994                             WHEN is 'always', 'never', or 'auto'\n\
1995   -U, --binary              do not strip CR characters at EOL (MSDOS/Windows)\n\
1996 \n"));
1997       printf (_("\
1998 When FILE is '-', read standard input.  With no FILE, read '.' if\n\
1999 recursive, '-' otherwise.  With fewer than two FILEs, assume -h.\n\
2000 Exit status is 0 if any line (or file if -L) is selected, 1 otherwise;\n\
2001 if any error occurs and -q is not given, the exit status is 2.\n"));
2002       emit_bug_reporting_address ();
2003     }
2004   exit (status);
2005 }
2006 
2007 /* Pattern compilers and matchers.  */
2008 
2009 static struct
2010 {
2011   char name[12];
2012   int syntax; /* used if compile == GEAcompile */
2013   compile_fp_t compile;
2014   execute_fp_t execute;
2015 } const matchers[] = {
2016   { "grep", RE_SYNTAX_GREP, GEAcompile, EGexecute },
2017   { "egrep", RE_SYNTAX_EGREP, GEAcompile, EGexecute },
2018   { "fgrep", 0, Fcompile, Fexecute, },
2019   { "awk", RE_SYNTAX_AWK, GEAcompile, EGexecute },
2020   { "gawk", RE_SYNTAX_GNU_AWK, GEAcompile, EGexecute },
2021   { "posixawk", RE_SYNTAX_POSIX_AWK, GEAcompile, EGexecute },
2022 #if HAVE_LIBPCRE
2023   { "perl", 0, Pcompile, Pexecute, },
2024 #endif
2025 };
2026 /* Keep these in sync with the 'matchers' table.  */
2027 enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
2028 
2029 /* Return the index of the matcher corresponding to M if available.
2030    MATCHER is the index of the previous matcher, or -1 if none.
2031    Exit in case of conflicts or if M is not available.  */
2032 static int
setmatcher(char const * m,int matcher)2033 setmatcher (char const *m, int matcher)
2034 {
2035   for (int i = 0; i < sizeof matchers / sizeof *matchers; i++)
2036     if (STREQ (m, matchers[i].name))
2037       {
2038         if (0 <= matcher && matcher != i)
2039           die (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
2040         return i;
2041       }
2042 
2043 #if !HAVE_LIBPCRE
2044   if (STREQ (m, "perl"))
2045     die (EXIT_TROUBLE, 0,
2046          _("Perl matching not supported in a --disable-perl-regexp build"));
2047 #endif
2048   die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
2049 }
2050 
2051 /* Find the white-space-separated options specified by OPTIONS, and
2052    using BUF to store copies of these options, set ARGV[0], ARGV[1],
2053    etc. to the option copies.  Return the number N of options found.
2054    Do not set ARGV[N] to NULL.  If ARGV is NULL, do not store ARGV[0]
2055    etc.  Backslash can be used to escape whitespace (and backslashes).  */
2056 static size_t
prepend_args(char const * options,char * buf,char ** argv)2057 prepend_args (char const *options, char *buf, char **argv)
2058 {
2059   char const *o = options;
2060   char *b = buf;
2061   size_t n = 0;
2062 
2063   for (;;)
2064     {
2065       while (c_isspace (to_uchar (*o)))
2066         o++;
2067       if (!*o)
2068         return n;
2069       if (argv)
2070         argv[n] = b;
2071       n++;
2072 
2073       do
2074         if ((*b++ = *o++) == '\\' && *o)
2075           b[-1] = *o++;
2076       while (*o && ! c_isspace (to_uchar (*o)));
2077 
2078       *b++ = '\0';
2079     }
2080 }
2081 
2082 /* Prepend the whitespace-separated options in OPTIONS to the argument
2083    vector of a main program with argument count *PARGC and argument
2084    vector *PARGV.  Return the number of options prepended.  */
2085 static int
prepend_default_options(char const * options,int * pargc,char *** pargv)2086 prepend_default_options (char const *options, int *pargc, char ***pargv)
2087 {
2088   if (options && *options)
2089     {
2090       char *buf = xmalloc (strlen (options) + 1);
2091       size_t prepended = prepend_args (options, buf, NULL);
2092       int argc = *pargc;
2093       char *const *argv = *pargv;
2094       char **pp;
2095       enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
2096       if (MAX_ARGS - argc < prepended)
2097         xalloc_die ();
2098       pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
2099       *pargc = prepended + argc;
2100       *pargv = pp;
2101       *pp++ = *argv++;
2102       pp += prepend_args (options, buf, pp);
2103       while ((*pp++ = *argv++))
2104         continue;
2105       return prepended;
2106     }
2107 
2108   return 0;
2109 }
2110 
2111 /* Get the next non-digit option from ARGC and ARGV.
2112    Return -1 if there are no more options.
2113    Process any digit options that were encountered on the way,
2114    and store the resulting integer into *DEFAULT_CONTEXT.  */
2115 static int
get_nondigit_option(int argc,char * const * argv,intmax_t * default_context)2116 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
2117 {
2118   static int prev_digit_optind = -1;
2119   int this_digit_optind;
2120   bool was_digit;
2121   char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
2122   char *p = buf;
2123   int opt;
2124 
2125   was_digit = false;
2126   this_digit_optind = optind;
2127   while (true)
2128     {
2129       opt = getopt_long (argc, (char **) argv, short_options,
2130                          long_options, NULL);
2131       if (! c_isdigit (opt))
2132         break;
2133 
2134       if (prev_digit_optind != this_digit_optind || !was_digit)
2135         {
2136           /* Reset to start another context length argument.  */
2137           p = buf;
2138         }
2139       else
2140         {
2141           /* Suppress trivial leading zeros, to avoid incorrect
2142              diagnostic on strings like 00000000000.  */
2143           p -= buf[0] == '0';
2144         }
2145 
2146       if (p == buf + sizeof buf - 4)
2147         {
2148           /* Too many digits.  Append "..." to make context_length_arg
2149              complain about "X...", where X contains the digits seen
2150              so far.  */
2151           strcpy (p, "...");
2152           p += 3;
2153           break;
2154         }
2155       *p++ = opt;
2156 
2157       was_digit = true;
2158       prev_digit_optind = this_digit_optind;
2159       this_digit_optind = optind;
2160     }
2161   if (p != buf)
2162     {
2163       *p = '\0';
2164       context_length_arg (buf, default_context);
2165     }
2166 
2167   return opt;
2168 }
2169 
2170 /* Parse GREP_COLORS.  The default would look like:
2171      GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
2172    with boolean capabilities (ne and rv) unset (i.e., omitted).
2173    No character escaping is needed or supported.  */
2174 static void
parse_grep_colors(void)2175 parse_grep_colors (void)
2176 {
2177   const char *p;
2178   char *q;
2179   char *name;
2180   char *val;
2181 
2182   p = getenv ("GREP_COLORS"); /* Plural! */
2183   if (p == NULL || *p == '\0')
2184     return;
2185 
2186   /* Work off a writable copy.  */
2187   q = xstrdup (p);
2188 
2189   name = q;
2190   val = NULL;
2191   /* From now on, be well-formed or you're gone.  */
2192   for (;;)
2193     if (*q == ':' || *q == '\0')
2194       {
2195         char c = *q;
2196         struct color_cap const *cap;
2197 
2198         *q++ = '\0'; /* Terminate name or val.  */
2199         /* Empty name without val (empty cap)
2200          * won't match and will be ignored.  */
2201         for (cap = color_dict; cap->name; cap++)
2202           if (STREQ (cap->name, name))
2203             break;
2204         /* If name unknown, go on for forward compatibility.  */
2205         if (cap->var && val)
2206           *(cap->var) = val;
2207         if (cap->fct)
2208           cap->fct ();
2209         if (c == '\0')
2210           return;
2211         name = q;
2212         val = NULL;
2213       }
2214     else if (*q == '=')
2215       {
2216         if (q == name || val)
2217           return;
2218         *q++ = '\0'; /* Terminate name.  */
2219         val = q; /* Can be the empty string.  */
2220       }
2221     else if (val == NULL)
2222       q++; /* Accumulate name.  */
2223     else if (*q == ';' || c_isdigit (*q))
2224       q++; /* Accumulate val.  Protect the terminal from being sent crap.  */
2225     else
2226       return;
2227 }
2228 
2229 /* Return true if PAT (of length PATLEN) contains an encoding error.  */
2230 static bool
contains_encoding_error(char const * pat,size_t patlen)2231 contains_encoding_error (char const *pat, size_t patlen)
2232 {
2233   mbstate_t mbs = { 0 };
2234   size_t i, charlen;
2235 
2236   for (i = 0; i < patlen; i += charlen)
2237     {
2238       charlen = mb_clen (pat + i, patlen - i, &mbs);
2239       if ((size_t) -2 <= charlen)
2240         return true;
2241     }
2242   return false;
2243 }
2244 
2245 /* Return the number of bytes in the initial character of PAT, of size
2246    PATLEN, if Fcompile can handle that character.  Return -1 if
2247    Fcompile cannot handle it.  MBS is the multibyte conversion state.
2248 
2249    Fcompile can handle a character C if C is single-byte, or if C has no
2250    case folded counterparts and toupper translates none of its bytes.  */
2251 
2252 static int
fgrep_icase_charlen(char const * pat,size_t patlen,mbstate_t * mbs)2253 fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
2254 {
2255   int n = localeinfo.sbclen[to_uchar (*pat)];
2256   if (n < 0)
2257     {
2258       wchar_t wc;
2259       wchar_t folded[CASE_FOLDED_BUFSIZE];
2260       size_t wn = mbrtowc (&wc, pat, patlen, mbs);
2261       if (MB_LEN_MAX < wn || case_folded_counterparts (wc, folded))
2262         return -1;
2263       for (int i = wn; 0 < --i; )
2264         {
2265           unsigned char c = pat[i];
2266           if (toupper (c) != c)
2267             return -1;
2268         }
2269       n = wn;
2270     }
2271   return n;
2272 }
2273 
2274 /* Return true if the -F patterns PAT, of size PATLEN, contain only
2275    single-byte characters or characters not subject to case folding,
2276    and so can be processed by Fcompile.  */
2277 
2278 static bool
fgrep_icase_available(char const * pat,size_t patlen)2279 fgrep_icase_available (char const *pat, size_t patlen)
2280 {
2281   mbstate_t mbs = {0,};
2282 
2283   for (size_t i = 0; i < patlen; )
2284     {
2285       int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
2286       if (n < 0)
2287         return false;
2288       i += n;
2289     }
2290 
2291   return true;
2292 }
2293 
2294 /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style.  */
2295 
2296 void
fgrep_to_grep_pattern(char ** keys_p,size_t * len_p)2297 fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
2298 {
2299   size_t len = *len_p;
2300   char *keys = *keys_p;
2301   mbstate_t mb_state = { 0 };
2302   char *new_keys = xnmalloc (len + 1, 2);
2303   char *p = new_keys;
2304   size_t n;
2305 
2306   for (; len; keys += n, len -= n)
2307     {
2308       n = mb_clen (keys, len, &mb_state);
2309       switch (n)
2310         {
2311         case (size_t) -2:
2312           n = len;
2313           FALLTHROUGH;
2314         default:
2315           p = mempcpy (p, keys, n);
2316           break;
2317 
2318         case (size_t) -1:
2319           memset (&mb_state, 0, sizeof mb_state);
2320           n = 1;
2321           FALLTHROUGH;
2322         case 1:
2323           switch (*keys)
2324             {
2325             case '$': case '*': case '.': case '[': case '\\': case '^':
2326               *p++ = '\\'; break;
2327             }
2328           *p++ = *keys;
2329           break;
2330         }
2331     }
2332 
2333   free (*keys_p);
2334   *keys_p = new_keys;
2335   *len_p = p - new_keys;
2336 }
2337 
2338 /* If it is easy, convert the MATCHER-style patterns KEYS (of size
2339    *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
2340    return F_MATCHER_INDEX.  If not, leave KEYS and *LEN_P alone and
2341    return MATCHER.  This function is conservative and sometimes misses
2342    conversions, e.g., it does not convert the -E pattern "(a|a|[aa])"
2343    to the -F pattern "a".  */
2344 
2345 static int
try_fgrep_pattern(int matcher,char * keys,size_t * len_p)2346 try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
2347 {
2348   int result = matcher;
2349   size_t len = *len_p;
2350   char *new_keys = xmalloc (len + 1);
2351   char *p = new_keys;
2352   char const *q = keys;
2353   mbstate_t mb_state = { 0 };
2354 
2355   while (len != 0)
2356     {
2357       switch (*q)
2358         {
2359         case '$': case '*': case '.': case '[': case '^':
2360           goto fail;
2361 
2362         case '(': case '+': case '?': case '{': case '|':
2363           if (matcher != G_MATCHER_INDEX)
2364             goto fail;
2365           break;
2366 
2367         case '\\':
2368           if (1 < len)
2369             switch (q[1])
2370               {
2371               case '\n':
2372               case 'B': case 'S': case 'W': case'\'': case '<':
2373               case 'b': case 's': case 'w': case '`': case '>':
2374               case '1': case '2': case '3': case '4':
2375               case '5': case '6': case '7': case '8': case '9':
2376                 goto fail;
2377 
2378               case '(': case '+': case '?': case '{': case '|':
2379                 if (matcher == G_MATCHER_INDEX)
2380                   goto fail;
2381                 FALLTHROUGH;
2382               default:
2383                 q++, len--;
2384                 break;
2385               }
2386           break;
2387         }
2388 
2389       {
2390         size_t n;
2391         if (match_icase)
2392           {
2393             int ni = fgrep_icase_charlen (q, len, &mb_state);
2394             if (ni < 0)
2395               goto fail;
2396             n = ni;
2397           }
2398         else
2399           {
2400             n = mb_clen (q, len, &mb_state);
2401             if (MB_LEN_MAX < n)
2402               goto fail;
2403           }
2404 
2405         p = mempcpy (p, q, n);
2406         q += n;
2407         len -= n;
2408       }
2409     }
2410 
2411   if (*len_p != p - new_keys)
2412     {
2413       *len_p = p - new_keys;
2414       memcpy (keys, new_keys, p - new_keys);
2415     }
2416   result = F_MATCHER_INDEX;
2417 
2418  fail:
2419   free (new_keys);
2420   return result;
2421 }
2422 
2423 int
main(int argc,char ** argv)2424 main (int argc, char **argv)
2425 {
2426   char *keys = NULL;
2427   size_t keycc = 0, oldcc, keyalloc = 0;
2428   int matcher = -1;
2429   size_t cc;
2430   int opt, prepended;
2431   int prev_optind, last_recursive;
2432   int fread_errno;
2433   intmax_t default_context;
2434   FILE *fp;
2435   exit_failure = EXIT_TROUBLE;
2436   initialize_main (&argc, &argv);
2437 
2438   /* Which command-line options have been specified for filename output.
2439      -1 for -h, 1 for -H, 0 for neither.  */
2440   int filename_option = 0;
2441 
2442   eolbyte = '\n';
2443   filename_mask = ~0;
2444 
2445   max_count = INTMAX_MAX;
2446 
2447   /* The value -1 means to use DEFAULT_CONTEXT. */
2448   out_after = out_before = -1;
2449   /* Default before/after context: changed by -C/-NUM options */
2450   default_context = -1;
2451   /* Changed by -o option */
2452   only_matching = false;
2453 
2454   /* Internationalization. */
2455 #if defined HAVE_SETLOCALE
2456   setlocale (LC_ALL, "");
2457 #endif
2458 #if defined ENABLE_NLS
2459   bindtextdomain (PACKAGE, LOCALEDIR);
2460   textdomain (PACKAGE);
2461 #endif
2462 
2463   init_localeinfo (&localeinfo);
2464 
2465   atexit (clean_up_stdout);
2466   c_stack_action (NULL);
2467 
2468   last_recursive = 0;
2469 
2470   prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
2471   if (prepended)
2472     error (0, 0, _("warning: GREP_OPTIONS is deprecated;"
2473                    " please use an alias or script"));
2474 
2475   while (prev_optind = optind,
2476          (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
2477     switch (opt)
2478       {
2479       case 'A':
2480         context_length_arg (optarg, &out_after);
2481         break;
2482 
2483       case 'B':
2484         context_length_arg (optarg, &out_before);
2485         break;
2486 
2487       case 'C':
2488         /* Set output match context, but let any explicit leading or
2489            trailing amount specified with -A or -B stand. */
2490         context_length_arg (optarg, &default_context);
2491         break;
2492 
2493       case 'D':
2494         if (STREQ (optarg, "read"))
2495           devices = READ_DEVICES;
2496         else if (STREQ (optarg, "skip"))
2497           devices = SKIP_DEVICES;
2498         else
2499           die (EXIT_TROUBLE, 0, _("unknown devices method"));
2500         break;
2501 
2502       case 'E':
2503         matcher = setmatcher ("egrep", matcher);
2504         break;
2505 
2506       case 'F':
2507         matcher = setmatcher ("fgrep", matcher);
2508         break;
2509 
2510       case 'P':
2511         matcher = setmatcher ("perl", matcher);
2512         break;
2513 
2514       case 'G':
2515         matcher = setmatcher ("grep", matcher);
2516         break;
2517 
2518       case 'X': /* undocumented on purpose */
2519         matcher = setmatcher (optarg, matcher);
2520         break;
2521 
2522       case 'H':
2523         filename_option = 1;
2524         break;
2525 
2526       case 'I':
2527         binary_files = WITHOUT_MATCH_BINARY_FILES;
2528         break;
2529 
2530       case 'T':
2531         align_tabs = true;
2532         break;
2533 
2534       case 'U':
2535         if (O_BINARY)
2536           binary = true;
2537         break;
2538 
2539       case 'u':
2540         /* Obsolete option; it has no effect.  FIXME: Diagnose use of
2541            this option starting in (say) the year 2020.  */
2542         break;
2543 
2544       case 'V':
2545         show_version = true;
2546         break;
2547 
2548       case 'a':
2549         binary_files = TEXT_BINARY_FILES;
2550         break;
2551 
2552       case 'b':
2553         out_byte = true;
2554         break;
2555 
2556       case 'c':
2557         count_matches = true;
2558         break;
2559 
2560       case 'd':
2561         directories = XARGMATCH ("--directories", optarg,
2562                                  directories_args, directories_types);
2563         if (directories == RECURSE_DIRECTORIES)
2564           last_recursive = prev_optind;
2565         break;
2566 
2567       case 'e':
2568         cc = strlen (optarg);
2569         if (keyalloc < keycc + cc + 1)
2570           {
2571             keyalloc = keycc + cc + 1;
2572             keys = x2realloc (keys, &keyalloc);
2573           }
2574         oldcc = keycc;
2575         memcpy (keys + oldcc, optarg, cc);
2576         keycc += cc;
2577         keys[keycc++] = '\n';
2578         fl_add (keys + oldcc, cc + 1, "");
2579         break;
2580 
2581       case 'f':
2582         if (STREQ (optarg, "-"))
2583           {
2584             if (binary)
2585               xset_binary_mode (STDIN_FILENO, O_BINARY);
2586             fp = stdin;
2587           }
2588         else
2589           {
2590             fp = fopen (optarg, binary ? "rb" : "r");
2591             if (!fp)
2592               die (EXIT_TROUBLE, errno, "%s", optarg);
2593           }
2594         oldcc = keycc;
2595         for (;; keycc += cc)
2596           {
2597             if (keyalloc <= keycc + 1)
2598               keys = x2realloc (keys, &keyalloc);
2599             cc = fread (keys + keycc, 1, keyalloc - (keycc + 1), fp);
2600             if (cc == 0)
2601               break;
2602           }
2603         fread_errno = errno;
2604         if (ferror (fp))
2605           die (EXIT_TROUBLE, fread_errno, "%s", optarg);
2606         if (fp != stdin)
2607           fclose (fp);
2608         /* Append final newline if file ended in non-newline. */
2609         if (oldcc != keycc && keys[keycc - 1] != '\n')
2610           keys[keycc++] = '\n';
2611         fl_add (keys + oldcc, keycc - oldcc, optarg);
2612         break;
2613 
2614       case 'h':
2615         filename_option = -1;
2616         break;
2617 
2618       case 'i':
2619       case 'y':			/* For old-timers . . . */
2620         match_icase = true;
2621         break;
2622 
2623       case NO_IGNORE_CASE_OPTION:
2624         match_icase = false;
2625         break;
2626 
2627       case 'L':
2628         /* Like -l, except list files that don't contain matches.
2629            Inspired by the same option in Hume's gre. */
2630         list_files = LISTFILES_NONMATCHING;
2631         break;
2632 
2633       case 'l':
2634         list_files = LISTFILES_MATCHING;
2635         break;
2636 
2637       case 'm':
2638         switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2639           {
2640           case LONGINT_OK:
2641           case LONGINT_OVERFLOW:
2642             break;
2643 
2644           default:
2645             die (EXIT_TROUBLE, 0, _("invalid max count"));
2646           }
2647         break;
2648 
2649       case 'n':
2650         out_line = true;
2651         break;
2652 
2653       case 'o':
2654         only_matching = true;
2655         break;
2656 
2657       case 'q':
2658         exit_on_match = true;
2659         exit_failure = 0;
2660         break;
2661 
2662       case 'R':
2663         fts_options = basic_fts_options | FTS_LOGICAL;
2664         FALLTHROUGH;
2665       case 'r':
2666         directories = RECURSE_DIRECTORIES;
2667         last_recursive = prev_optind;
2668         break;
2669 
2670       case 's':
2671         suppress_errors = true;
2672         break;
2673 
2674       case 'v':
2675         out_invert = true;
2676         break;
2677 
2678       case 'w':
2679         wordinit ();
2680         match_words = true;
2681         break;
2682 
2683       case 'x':
2684         match_lines = true;
2685         break;
2686 
2687       case 'Z':
2688         filename_mask = 0;
2689         break;
2690 
2691       case 'z':
2692         eolbyte = '\0';
2693         break;
2694 
2695       case BINARY_FILES_OPTION:
2696         if (STREQ (optarg, "binary"))
2697           binary_files = BINARY_BINARY_FILES;
2698         else if (STREQ (optarg, "text"))
2699           binary_files = TEXT_BINARY_FILES;
2700         else if (STREQ (optarg, "without-match"))
2701           binary_files = WITHOUT_MATCH_BINARY_FILES;
2702         else
2703           die (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2704         break;
2705 
2706       case COLOR_OPTION:
2707         if (optarg)
2708           {
2709             if (!c_strcasecmp (optarg, "always")
2710                 || !c_strcasecmp (optarg, "yes")
2711                 || !c_strcasecmp (optarg, "force"))
2712               color_option = 1;
2713             else if (!c_strcasecmp (optarg, "never")
2714                      || !c_strcasecmp (optarg, "no")
2715                      || !c_strcasecmp (optarg, "none"))
2716               color_option = 0;
2717             else if (!c_strcasecmp (optarg, "auto")
2718                      || !c_strcasecmp (optarg, "tty")
2719                      || !c_strcasecmp (optarg, "if-tty"))
2720               color_option = 2;
2721             else
2722               show_help = 1;
2723           }
2724         else
2725           color_option = 2;
2726         break;
2727 
2728       case EXCLUDE_OPTION:
2729       case INCLUDE_OPTION:
2730         for (int cmd = 0; cmd < 2; cmd++)
2731           {
2732             if (!excluded_patterns[cmd])
2733               excluded_patterns[cmd] = new_exclude ();
2734             add_exclude (excluded_patterns[cmd], optarg,
2735                          ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
2736                           | exclude_options (cmd)));
2737           }
2738         break;
2739       case EXCLUDE_FROM_OPTION:
2740         for (int cmd = 0; cmd < 2; cmd++)
2741           {
2742             if (!excluded_patterns[cmd])
2743               excluded_patterns[cmd] = new_exclude ();
2744             if (add_exclude_file (add_exclude, excluded_patterns[cmd],
2745                                   optarg, exclude_options (cmd), '\n')
2746                 != 0)
2747               die (EXIT_TROUBLE, errno, "%s", optarg);
2748           }
2749         break;
2750 
2751       case EXCLUDE_DIRECTORY_OPTION:
2752         strip_trailing_slashes (optarg);
2753         for (int cmd = 0; cmd < 2; cmd++)
2754           {
2755             if (!excluded_directory_patterns[cmd])
2756               excluded_directory_patterns[cmd] = new_exclude ();
2757             add_exclude (excluded_directory_patterns[cmd], optarg,
2758                          exclude_options (cmd));
2759           }
2760         break;
2761 
2762       case GROUP_SEPARATOR_OPTION:
2763         group_separator = optarg;
2764         break;
2765 
2766       case LINE_BUFFERED_OPTION:
2767         line_buffered = true;
2768         break;
2769 
2770       case LABEL_OPTION:
2771         label = optarg;
2772         break;
2773 
2774       case 0:
2775         /* long options */
2776         break;
2777 
2778       default:
2779         usage (EXIT_TROUBLE);
2780         break;
2781 
2782       }
2783 
2784   if (show_version)
2785     {
2786       version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
2787                    (char *) NULL);
2788       puts (_("Written by Mike Haertel and others; see\n"
2789               "<https://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>."));
2790       return EXIT_SUCCESS;
2791     }
2792 
2793   if (show_help)
2794     usage (EXIT_SUCCESS);
2795 
2796   if (keys)
2797     {
2798       if (keycc == 0)
2799         {
2800           /* No keys were specified (e.g. -f /dev/null).  Match nothing.  */
2801           out_invert ^= true;
2802           match_lines = match_words = false;
2803         }
2804       else
2805         /* Strip trailing newline. */
2806         --keycc;
2807     }
2808   else if (optind < argc)
2809     {
2810       /* Make a copy so that it can be reallocated or freed later.  */
2811       keycc = strlen (argv[optind]);
2812       keys = xmemdup (argv[optind++], keycc + 1);
2813       fl_add (keys, keycc, "");
2814       n_patterns++;
2815     }
2816   else
2817     usage (EXIT_TROUBLE);
2818 
2819   bool possibly_tty = false;
2820   struct stat tmp_stat;
2821   if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
2822     {
2823       if (S_ISREG (tmp_stat.st_mode))
2824         out_stat = tmp_stat;
2825       else if (S_ISCHR (tmp_stat.st_mode))
2826         {
2827           struct stat null_stat;
2828           if (stat ("/dev/null", &null_stat) == 0
2829               && SAME_INODE (tmp_stat, null_stat))
2830             dev_null_output = true;
2831           else
2832             possibly_tty = true;
2833         }
2834     }
2835 
2836   /* POSIX says -c, -l and -q are mutually exclusive.  In this
2837      implementation, -q overrides -l and -L, which in turn override -c.  */
2838   if (exit_on_match)
2839     list_files = LISTFILES_NONE;
2840   if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
2841     {
2842       count_matches = false;
2843       done_on_match = true;
2844     }
2845   out_quiet = count_matches | done_on_match;
2846 
2847   if (out_after < 0)
2848     out_after = default_context;
2849   if (out_before < 0)
2850     out_before = default_context;
2851 
2852   /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
2853      /dev/null'), fail without reading the input.  */
2854   if ((max_count == 0
2855        || (keycc == 0 && out_invert && !match_lines && !match_words))
2856       && list_files != LISTFILES_NONMATCHING)
2857     return EXIT_FAILURE;
2858 
2859   if (color_option == 2)
2860     color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
2861   init_colorize ();
2862 
2863   if (color_option)
2864     {
2865       /* Legacy.  */
2866       char *userval = getenv ("GREP_COLOR");
2867       if (userval != NULL && *userval != '\0')
2868         selected_match_color = context_match_color = userval;
2869 
2870       /* New GREP_COLORS has priority.  */
2871       parse_grep_colors ();
2872     }
2873 
2874   initialize_unibyte_mask ();
2875 
2876   if (matcher < 0)
2877     matcher = G_MATCHER_INDEX;
2878 
2879   /* In a single-byte locale, switch from -F to -G if it is a single
2880      pattern that matches words, where -G is typically faster.  In a
2881      multi-byte locale, switch if the patterns have an encoding error
2882      (where -F does not work) or if -i and the patterns will not work
2883      for -iF.  */
2884   if (matcher == F_MATCHER_INDEX
2885       && (! localeinfo.multibyte
2886           ? n_patterns == 1 && match_words
2887           : (contains_encoding_error (keys, keycc)
2888              || (match_icase && !fgrep_icase_available (keys, keycc)))))
2889     {
2890       fgrep_to_grep_pattern (&keys, &keycc);
2891       matcher = G_MATCHER_INDEX;
2892     }
2893   /* With two or more patterns, if -F works then switch from either -E
2894      or -G, as -F is probably faster then.  */
2895   else if ((matcher == G_MATCHER_INDEX || matcher == E_MATCHER_INDEX)
2896            && 1 < n_patterns)
2897     matcher = try_fgrep_pattern (matcher, keys, &keycc);
2898 
2899   execute = matchers[matcher].execute;
2900   compiled_pattern = matchers[matcher].compile (keys, keycc,
2901                                                 matchers[matcher].syntax);
2902   /* We need one byte prior and one after.  */
2903   char eolbytes[3] = { 0, eolbyte, 0 };
2904   size_t match_size;
2905   skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
2906                                 &match_size, NULL) == 0)
2907                       == out_invert);
2908 
2909   int num_operands = argc - optind;
2910   out_file = (filename_option == 0 && num_operands <= 1
2911               ? - (directories == RECURSE_DIRECTORIES)
2912               : 0 <= filename_option);
2913 
2914   if (binary)
2915     xset_binary_mode (STDOUT_FILENO, O_BINARY);
2916 
2917   /* Prefer sysconf for page size, as getpagesize typically returns int.  */
2918 #ifdef _SC_PAGESIZE
2919   long psize = sysconf (_SC_PAGESIZE);
2920 #else
2921   long psize = getpagesize ();
2922 #endif
2923   if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
2924     abort ();
2925   pagesize = psize;
2926   bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
2927   buffer = xmalloc (bufalloc);
2928 
2929   if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2930     devices = READ_DEVICES;
2931 
2932   char *const *files;
2933   if (0 < num_operands)
2934     {
2935       files = argv + optind;
2936     }
2937   else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
2938     {
2939       static char *const cwd_only[] = { (char *) ".", NULL };
2940       files = cwd_only;
2941       omit_dot_slash = true;
2942     }
2943   else
2944     {
2945       static char *const stdin_only[] = { (char *) "-", NULL };
2946       files = stdin_only;
2947     }
2948 
2949   bool status = true;
2950   do
2951     status &= grep_command_line_arg (*files++);
2952   while (*files != NULL);
2953 
2954   /* We register via atexit to test stdout.  */
2955   return errseen ? EXIT_TROUBLE : status;
2956 }
2957