1 /* grep.c - main driver file for grep.
2    Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17    02110-1301, USA.  */
18 
19 /* Written July 1992 by Mike Haertel.  */
20 
21 #include <config.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <wchar.h>
25 #include <inttypes.h>
26 #include <stdarg.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include "system.h"
30 
31 #include "argmatch.h"
32 #include "c-ctype.h"
33 #include "c-stack.h"
34 #include "closeout.h"
35 #include "colorize.h"
36 #include "die.h"
37 #include "error.h"
38 #include "exclude.h"
39 #include "exitfail.h"
40 #include "fcntl-safer.h"
41 #include "fts_.h"
42 #include "getopt.h"
43 #include "getprogname.h"
44 #include "grep.h"
45 #include "hash.h"
46 #include "intprops.h"
47 #include "propername.h"
48 #include "safe-read.h"
49 #include "search.h"
50 #include "c-strcase.h"
51 #include "version-etc.h"
52 #include "xalloc.h"
53 #include "xbinary-io.h"
54 #include "xstrtol.h"
55 
56 enum { SEP_CHAR_SELECTED = ':' };
57 enum { SEP_CHAR_REJECTED = '-' };
58 static char const SEP_STR_GROUP[] = "--";
59 
60 /* When stdout is connected to a regular file, save its stat
61    information here, so that we can automatically skip it, thus
62    avoiding a potential (racy) infinite loop.  */
63 static struct stat out_stat;
64 
65 /* if non-zero, display usage information and exit */
66 static int show_help;
67 
68 /* Print the version on standard output and exit.  */
69 static bool show_version;
70 
71 /* Suppress diagnostics for nonexistent or unreadable files.  */
72 static bool suppress_errors;
73 
74 /* If nonzero, use color markers.  */
75 static int color_option;
76 
77 /* Show only the part of a line matching the expression. */
78 static bool only_matching;
79 
80 /* If nonzero, make sure first content char in a line is on a tab stop. */
81 static bool align_tabs;
82 
83 /* Print width of line numbers and byte offsets.  Nonzero if ALIGN_TABS.  */
84 static int offset_width;
85 
86 /* An entry in the PATLOC array saying where patterns came from.  */
87 struct patloc
88   {
89     /* Line number of the pattern in PATTERN_ARRAY.  Line numbers
90        start at 0, and each pattern is terminated by '\n'.  */
91     ptrdiff_t lineno;
92 
93     /* Input location of the pattern.  The FILENAME "-" represents
94        standard input, and "" represents the command line.  FILELINE is
95        origin-1 for files and is irrelevant for the command line.  */
96     char const *filename;
97     ptrdiff_t fileline;
98   };
99 
100 /* The array of pattern locations.  The concatenation of all patterns
101    is stored in a single array, KEYS.  Given the invocation
102    'grep -f <(seq 5) -f <(seq 6) -f <(seq 3)', there will initially be
103    28 bytes in KEYS.  After duplicate patterns are removed, KEYS
104    will have 12 bytes and PATLOC will be {0,x,1}, {10,y,1}
105    where x, y and z are just place-holders for shell-generated names
106    since and z is omitted as it contains only duplicates.  Sometimes
107    removing duplicates will grow PATLOC, since each run of
108    removed patterns not at a file start or end requires another
109    PATLOC entry for the first non-removed pattern.  */
110 static struct patloc *patloc;
111 static size_t patlocs_allocated, patlocs_used;
112 
113 /* Pointer to the array of patterns, each terminated by newline.  */
114 static char *pattern_array;
115 
116 /* The number of unique patterns seen so far.  */
117 static size_t n_patterns;
118 
119 /* Hash table of patterns seen so far.  */
120 static Hash_table *pattern_table;
121 
122 /* Hash and compare newline-terminated patterns for textual equality.
123    Patterns are represented by origin-1 offsets into PATTERN_ARRAY,
124    cast to void *.  The origin-1 is so that the first pattern offset
125    does not appear to be a null pointer when cast to void *.  */
126 static size_t _GL_ATTRIBUTE_PURE
hash_pattern(void const * pat,size_t n_buckets)127 hash_pattern (void const *pat, size_t n_buckets)
128 {
129   size_t h = 0;
130   intptr_t pat_offset = (intptr_t) pat - 1;
131   unsigned char const *s = (unsigned char const *) pattern_array + pat_offset;
132   for ( ; *s != '\n'; s++)
133     h = h * 33 ^ *s;
134   return h % n_buckets;
135 }
136 static bool _GL_ATTRIBUTE_PURE
compare_patterns(void const * a,void const * b)137 compare_patterns (void const *a, void const *b)
138 {
139   intptr_t a_offset = (intptr_t) a - 1;
140   intptr_t b_offset = (intptr_t) b - 1;
141   char const *p = pattern_array + a_offset;
142   char const *q = pattern_array + b_offset;
143   for (; *p == *q; p++, q++)
144     if (*p == '\n')
145       return true;
146   return false;
147 }
148 
149 /* Update KEYS to remove duplicate patterns, and return the number of
150    bytes in the resulting KEYS.  KEYS contains a sequence of patterns
151    each terminated by '\n'.  The first DUPFREE_SIZE bytes are a
152    sequence of patterns with no duplicates; SIZE is the total number
153    of bytes in KEYS.  If some patterns past the first DUPFREE_SIZE
154    bytes are not duplicates, update PATLOCS accordingly.  */
155 static ptrdiff_t
update_patterns(char * keys,ptrdiff_t dupfree_size,ptrdiff_t size,char const * filename)156 update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
157                  char const *filename)
158 {
159   char *dst = keys + dupfree_size;
160   ptrdiff_t fileline = 1;
161   int prev_inserted = 0;
162 
163   char const *srclim = keys + size;
164   ptrdiff_t patsize;
165   for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
166     {
167       char const *patend = rawmemchr (src, '\n');
168       patsize = patend + 1 - src;
169       memmove (dst, src, patsize);
170 
171       intptr_t dst_offset_1 = dst - keys + 1;
172       int inserted = hash_insert_if_absent (pattern_table,
173                                             (void *) dst_offset_1, NULL);
174       if (inserted)
175         {
176           if (inserted < 0)
177             xalloc_die ();
178           dst += patsize;
179 
180           /* Add a PATLOCS entry unless this input line is simply the
181              next one in the same file.  */
182           if (!prev_inserted)
183             {
184               if (patlocs_used == patlocs_allocated)
185                 patloc = x2nrealloc (patloc, &patlocs_allocated,
186                                      sizeof *patloc);
187               patloc[patlocs_used++]
188                 = (struct patloc) { .lineno = n_patterns,
189                                     .filename = filename,
190                                     .fileline = fileline };
191             }
192           n_patterns++;
193         }
194 
195       prev_inserted = inserted;
196       fileline++;
197     }
198 
199   return dst - keys;
200 }
201 
202 /* Map LINENO, the origin-0 line number of one of the input patterns,
203    to the name of the file from which it came.  Return "-" if it was
204    read from stdin, "" if it was specified on the command line.
205    Set *NEW_LINENO to the origin-1 line number of PATTERN in the file,
206    or to an unspecified value if PATTERN came from the command line.  */
207 char const * _GL_ATTRIBUTE_PURE
pattern_file_name(size_t lineno,size_t * new_lineno)208 pattern_file_name (size_t lineno, size_t *new_lineno)
209 {
210   ptrdiff_t i;
211   for (i = 1; i < patlocs_used; i++)
212     if (lineno < patloc[i].lineno)
213       break;
214   *new_lineno = lineno - patloc[i - 1].lineno + patloc[i - 1].fileline;
215   return patloc[i - 1].filename;
216 }
217 
218 #if HAVE_ASAN
219 /* Record the starting address and length of the sole poisoned region,
220    so that we can unpoison it later, just before each following read.  */
221 static void const *poison_buf;
222 static size_t poison_len;
223 
224 static void
clear_asan_poison(void)225 clear_asan_poison (void)
226 {
227   if (poison_buf)
228     __asan_unpoison_memory_region (poison_buf, poison_len);
229 }
230 
231 static void
asan_poison(void const * addr,size_t size)232 asan_poison (void const *addr, size_t size)
233 {
234   poison_buf = addr;
235   poison_len = size;
236 
237   __asan_poison_memory_region (poison_buf, poison_len);
238 }
239 #else
clear_asan_poison(void)240 static void clear_asan_poison (void) { }
asan_poison(void const volatile * addr,size_t size)241 static void asan_poison (void const volatile *addr, size_t size) { }
242 #endif
243 
244 /* The group separator used when context is requested. */
245 static const char *group_separator = SEP_STR_GROUP;
246 
247 /* The context and logic for choosing default --color screen attributes
248    (foreground and background colors, etc.) are the following.
249       -- There are eight basic colors available, each with its own
250          nominal luminosity to the human eye and foreground/background
251          codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
252          magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
253          yellow [89 %, 33/43], and white [100 %, 37/47]).
254       -- Sometimes, white as a background is actually implemented using
255          a shade of light gray, so that a foreground white can be visible
256          on top of it (but most often not).
257       -- Sometimes, black as a foreground is actually implemented using
258          a shade of dark gray, so that it can be visible on top of a
259          background black (but most often not).
260       -- Sometimes, more colors are available, as extensions.
261       -- Other attributes can be selected/deselected (bold [1/22],
262          underline [4/24], standout/inverse [7/27], blink [5/25], and
263          invisible/hidden [8/28]).  They are sometimes implemented by
264          using colors instead of what their names imply; e.g., bold is
265          often achieved by using brighter colors.  In practice, only bold
266          is really available to us, underline sometimes being mapped by
267          the terminal to some strange color choice, and standout best
268          being left for use by downstream programs such as less(1).
269       -- We cannot assume that any of the extensions or special features
270          are available for the purpose of choosing defaults for everyone.
271       -- The most prevalent default terminal backgrounds are pure black
272          and pure white, and are not necessarily the same shades of
273          those as if they were selected explicitly with SGR sequences.
274          Some terminals use dark or light pictures as default background,
275          but those are covered over by an explicit selection of background
276          color with an SGR sequence; their users will appreciate their
277          background pictures not be covered like this, if possible.
278       -- Some uses of colors attributes is to make some output items
279          more understated (e.g., context lines); this cannot be achieved
280          by changing the background color.
281       -- For these reasons, the grep color defaults should strive not
282          to change the background color from its default, unless it's
283          for a short item that should be highlighted, not understated.
284       -- The grep foreground color defaults (without an explicitly set
285          background) should provide enough contrast to be readable on any
286          terminal with either a black (dark) or white (light) background.
287          This only leaves red, magenta, green, and cyan (and their bold
288          counterparts) and possibly bold blue.  */
289 /* The color strings used for matched text.
290    The user can overwrite them using the deprecated
291    environment variable GREP_COLOR or the new GREP_COLORS.  */
292 static const char *selected_match_color = "01;31";	/* bold red */
293 static const char *context_match_color  = "01;31";	/* bold red */
294 
295 /* Other colors.  Defaults look damn good.  */
296 static const char *filename_color = "35";	/* magenta */
297 static const char *line_num_color = "32";	/* green */
298 static const char *byte_num_color = "32";	/* green */
299 static const char *sep_color      = "36";	/* cyan */
300 static const char *selected_line_color = "";	/* default color pair */
301 static const char *context_line_color  = "";	/* default color pair */
302 
303 /* Select Graphic Rendition (SGR, "\33[...m") strings.  */
304 /* Also Erase in Line (EL) to Right ("\33[K") by default.  */
305 /*    Why have EL to Right after SGR?
306          -- The behavior of line-wrapping when at the bottom of the
307             terminal screen and at the end of the current line is often
308             such that a new line is introduced, entirely cleared with
309             the current background color which may be different from the
310             default one (see the boolean back_color_erase terminfo(5)
311             capability), thus scrolling the display by one line.
312             The end of this new line will stay in this background color
313             even after reverting to the default background color with
314             "\33[m', unless it is explicitly cleared again with "\33[K"
315             (which is the behavior the user would instinctively expect
316             from the whole thing).  There may be some unavoidable
317             background-color flicker at the end of this new line because
318             of this (when timing with the monitor's redraw is just right).
319          -- The behavior of HT (tab, "\t") is usually the same as that of
320             Cursor Forward Tabulation (CHT) with a default parameter
321             of 1 ("\33[I"), i.e., it performs pure movement to the next
322             tab stop, without any clearing of either content or screen
323             attributes (including background color); try
324                printf 'asdfqwerzxcv\rASDF\tZXCV\n'
325             in a bash(1) shell to demonstrate this.  This is not what the
326             user would instinctively expect of HT (but is ok for CHT).
327             The instinctive behavior would include clearing the terminal
328             cells that are skipped over by HT with blank cells in the
329             current screen attributes, including background color;
330             the boolean dest_tabs_magic_smso terminfo(5) capability
331             indicates this saner behavior for HT, but only some rare
332             terminals have it (although it also indicates a special
333             glitch with standout mode in the Teleray terminal for which
334             it was initially introduced).  The remedy is to add "\33K"
335             after each SGR sequence, be it START (to fix the behavior
336             of any HT after that before another SGR) or END (to fix the
337             behavior of an HT in default background color that would
338             follow a line-wrapping at the bottom of the screen in another
339             background color, and to complement doing it after START).
340             Piping grep's output through a pager such as less(1) avoids
341             any HT problems since the pager performs tab expansion.
342 
343       Generic disadvantages of this remedy are:
344          -- Some very rare terminals might support SGR but not EL (nobody
345             will use "grep --color" on a terminal that does not support
346             SGR in the first place).
347          -- Having these extra control sequences might somewhat complicate
348             the task of any program trying to parse "grep --color"
349             output in order to extract structuring information from it.
350       A specific disadvantage to doing it after SGR START is:
351          -- Even more possible background color flicker (when timing
352             with the monitor's redraw is just right), even when not at the
353             bottom of the screen.
354       There are no additional disadvantages specific to doing it after
355       SGR END.
356 
357       It would be impractical for GNU grep to become a full-fledged
358       terminal program linked against ncurses or the like, so it will
359       not detect terminfo(5) capabilities.  */
360 static const char *sgr_start = "\33[%sm\33[K";
361 static const char *sgr_end   = "\33[m\33[K";
362 
363 /* SGR utility functions.  */
364 static void
pr_sgr_start(char const * s)365 pr_sgr_start (char const *s)
366 {
367   if (*s)
368     print_start_colorize (sgr_start, s);
369 }
370 static void
pr_sgr_end(char const * s)371 pr_sgr_end (char const *s)
372 {
373   if (*s)
374     print_end_colorize (sgr_end);
375 }
376 static void
pr_sgr_start_if(char const * s)377 pr_sgr_start_if (char const *s)
378 {
379   if (color_option)
380     pr_sgr_start (s);
381 }
382 static void
pr_sgr_end_if(char const * s)383 pr_sgr_end_if (char const *s)
384 {
385   if (color_option)
386     pr_sgr_end (s);
387 }
388 
389 struct color_cap
390   {
391     const char *name;
392     const char **var;
393     void (*fct) (void);
394   };
395 
396 static void
color_cap_mt_fct(void)397 color_cap_mt_fct (void)
398 {
399   /* Our caller just set selected_match_color.  */
400   context_match_color = selected_match_color;
401 }
402 
403 static void
color_cap_rv_fct(void)404 color_cap_rv_fct (void)
405 {
406   /* By this point, it was 1 (or already -1).  */
407   color_option = -1;  /* That's still != 0.  */
408 }
409 
410 static void
color_cap_ne_fct(void)411 color_cap_ne_fct (void)
412 {
413   sgr_start = "\33[%sm";
414   sgr_end   = "\33[m";
415 }
416 
417 /* For GREP_COLORS.  */
418 static const struct color_cap color_dict[] =
419   {
420     { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
421     { "ms", &selected_match_color, NULL }, /* selected matched text */
422     { "mc", &context_match_color,  NULL }, /* context matched text */
423     { "fn", &filename_color,       NULL }, /* filename */
424     { "ln", &line_num_color,       NULL }, /* line number */
425     { "bn", &byte_num_color,       NULL }, /* byte (sic) offset */
426     { "se", &sep_color,            NULL }, /* separator */
427     { "sl", &selected_line_color,  NULL }, /* selected lines */
428     { "cx", &context_line_color,   NULL }, /* context lines */
429     { "rv", NULL,                  color_cap_rv_fct }, /* -v reverses sl/cx */
430     { "ne", NULL,                  color_cap_ne_fct }, /* no EL on SGR_* */
431     { NULL, NULL,                  NULL }
432   };
433 
434 /* Saved errno value from failed output functions on stdout.  */
435 static int stdout_errno;
436 
437 static void
putchar_errno(int c)438 putchar_errno (int c)
439 {
440   if (putchar (c) < 0)
441     stdout_errno = errno;
442 }
443 
444 static void
fputs_errno(char const * s)445 fputs_errno (char const *s)
446 {
447   if (fputs (s, stdout) < 0)
448     stdout_errno = errno;
449 }
450 
451 static void _GL_ATTRIBUTE_FORMAT_PRINTF_STANDARD (1, 2)
printf_errno(char const * format,...)452 printf_errno (char const *format, ...)
453 {
454   va_list ap;
455   va_start (ap, format);
456   if (vfprintf (stdout, format, ap) < 0)
457     stdout_errno = errno;
458   va_end (ap);
459 }
460 
461 static void
fwrite_errno(void const * ptr,size_t size,size_t nmemb)462 fwrite_errno (void const *ptr, size_t size, size_t nmemb)
463 {
464   if (fwrite (ptr, size, nmemb, stdout) != nmemb)
465     stdout_errno = errno;
466 }
467 
468 static void
fflush_errno(void)469 fflush_errno (void)
470 {
471   if (fflush (stdout) != 0)
472     stdout_errno = errno;
473 }
474 
475 static struct exclude *excluded_patterns[2];
476 static struct exclude *excluded_directory_patterns[2];
477 /* Short options.  */
478 static char const short_options[] =
479 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
480 
481 /* Non-boolean long options that have no corresponding short equivalents.  */
482 enum
483 {
484   BINARY_FILES_OPTION = CHAR_MAX + 1,
485   COLOR_OPTION,
486   EXCLUDE_DIRECTORY_OPTION,
487   EXCLUDE_OPTION,
488   EXCLUDE_FROM_OPTION,
489   GROUP_SEPARATOR_OPTION,
490   INCLUDE_OPTION,
491   LINE_BUFFERED_OPTION,
492   LABEL_OPTION,
493   NO_IGNORE_CASE_OPTION
494 };
495 
496 /* Long options equivalences. */
497 static struct option const long_options[] =
498 {
499   {"basic-regexp",    no_argument, NULL, 'G'},
500   {"extended-regexp", no_argument, NULL, 'E'},
501   {"fixed-regexp",    no_argument, NULL, 'F'},
502   {"fixed-strings",   no_argument, NULL, 'F'},
503   {"perl-regexp",     no_argument, NULL, 'P'},
504   {"after-context", required_argument, NULL, 'A'},
505   {"before-context", required_argument, NULL, 'B'},
506   {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
507   {"byte-offset", no_argument, NULL, 'b'},
508   {"context", required_argument, NULL, 'C'},
509   {"color", optional_argument, NULL, COLOR_OPTION},
510   {"colour", optional_argument, NULL, COLOR_OPTION},
511   {"count", no_argument, NULL, 'c'},
512   {"devices", required_argument, NULL, 'D'},
513   {"directories", required_argument, NULL, 'd'},
514   {"exclude", required_argument, NULL, EXCLUDE_OPTION},
515   {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
516   {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
517   {"file", required_argument, NULL, 'f'},
518   {"files-with-matches", no_argument, NULL, 'l'},
519   {"files-without-match", no_argument, NULL, 'L'},
520   {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
521   {"help", no_argument, &show_help, 1},
522   {"include", required_argument, NULL, INCLUDE_OPTION},
523   {"ignore-case", no_argument, NULL, 'i'},
524   {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
525   {"initial-tab", no_argument, NULL, 'T'},
526   {"label", required_argument, NULL, LABEL_OPTION},
527   {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
528   {"line-number", no_argument, NULL, 'n'},
529   {"line-regexp", no_argument, NULL, 'x'},
530   {"max-count", required_argument, NULL, 'm'},
531 
532   {"no-filename", no_argument, NULL, 'h'},
533   {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
534   {"no-messages", no_argument, NULL, 's'},
535   {"null", no_argument, NULL, 'Z'},
536   {"null-data", no_argument, NULL, 'z'},
537   {"only-matching", no_argument, NULL, 'o'},
538   {"quiet", no_argument, NULL, 'q'},
539   {"recursive", no_argument, NULL, 'r'},
540   {"dereference-recursive", no_argument, NULL, 'R'},
541   {"regexp", required_argument, NULL, 'e'},
542   {"invert-match", no_argument, NULL, 'v'},
543   {"silent", no_argument, NULL, 'q'},
544   {"text", no_argument, NULL, 'a'},
545   {"binary", no_argument, NULL, 'U'},
546   {"unix-byte-offsets", no_argument, NULL, 'u'},
547   {"version", no_argument, NULL, 'V'},
548   {"with-filename", no_argument, NULL, 'H'},
549   {"word-regexp", no_argument, NULL, 'w'},
550   {0, 0, 0, 0}
551 };
552 
553 /* Define flags declared in grep.h. */
554 bool match_icase;
555 bool match_words;
556 bool match_lines;
557 char eolbyte;
558 
559 /* For error messages. */
560 /* The input file name, or (if standard input) null or a --label argument.  */
561 static char const *filename;
562 /* Omit leading "./" from file names in diagnostics.  */
563 static bool omit_dot_slash;
564 static bool errseen;
565 
566 /* True if output from the current input file has been suppressed
567    because an output line had an encoding error.  */
568 static bool encoding_error_output;
569 
570 enum directories_type
571   {
572     READ_DIRECTORIES = 2,
573     RECURSE_DIRECTORIES,
574     SKIP_DIRECTORIES
575   };
576 
577 /* How to handle directories.  */
578 static char const *const directories_args[] =
579 {
580   "read", "recurse", "skip", NULL
581 };
582 static enum directories_type const directories_types[] =
583 {
584   READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
585 };
586 ARGMATCH_VERIFY (directories_args, directories_types);
587 
588 static enum directories_type directories = READ_DIRECTORIES;
589 
590 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
591 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
592 
593 /* How to handle devices. */
594 static enum
595   {
596     READ_COMMAND_LINE_DEVICES,
597     READ_DEVICES,
598     SKIP_DEVICES
599   } devices = READ_COMMAND_LINE_DEVICES;
600 
601 static bool grepfile (int, char const *, bool, bool);
602 static bool grepdesc (int, bool);
603 
604 static bool
is_device_mode(mode_t m)605 is_device_mode (mode_t m)
606 {
607   return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
608 }
609 
610 static bool
skip_devices(bool command_line)611 skip_devices (bool command_line)
612 {
613   return (devices == SKIP_DEVICES
614           || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line));
615 }
616 
617 /* Return if ST->st_size is defined.  Assume the file is not a
618    symbolic link.  */
619 static bool
usable_st_size(struct stat const * st)620 usable_st_size (struct stat const *st)
621 {
622   return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
623 }
624 
625 /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
626    Do not rely on these finding data or holes if they equal SEEK_SET.  */
627 #ifndef SEEK_DATA
628 enum { SEEK_DATA = SEEK_SET };
629 #endif
630 #ifndef SEEK_HOLE
631 enum { SEEK_HOLE = SEEK_SET };
632 #endif
633 
634 /* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input.  */
635 static bool seek_failed;
636 static bool seek_data_failed;
637 
638 /* Functions we'll use to search. */
639 typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
640 typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
641                                 char const *);
642 static execute_fp_t execute;
643 static void *compiled_pattern;
644 
645 char const *
input_filename(void)646 input_filename (void)
647 {
648   if (!filename)
649     filename = _("(standard input)");
650   return filename;
651 }
652 
653 /* Unless requested, diagnose an error about the input file.  */
654 static void
suppressible_error(int errnum)655 suppressible_error (int errnum)
656 {
657   if (! suppress_errors)
658     error (0, errnum, "%s", input_filename ());
659   errseen = true;
660 }
661 
662 /* If there has already been a write error, don't bother closing
663    standard output, as that might elicit a duplicate diagnostic.  */
664 static void
clean_up_stdout(void)665 clean_up_stdout (void)
666 {
667   if (! stdout_errno)
668     close_stdout ();
669 }
670 
671 /* A cast to TYPE of VAL.  Use this when TYPE is a pointer type, VAL
672    is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
673    the alignment and would otherwise complain about the cast.  */
674 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
675 # define CAST_ALIGNED(type, val)                           \
676     ({ __typeof__ (val) val_ = val;                        \
677        _Pragma ("GCC diagnostic push")                     \
678        _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
679        (type) val_;                                        \
680        _Pragma ("GCC diagnostic pop")                      \
681     })
682 #else
683 # define CAST_ALIGNED(type, val) ((type) (val))
684 #endif
685 
686 /* An unsigned type suitable for fast matching.  */
687 typedef uintmax_t uword;
688 static uword const uword_max = UINTMAX_MAX;
689 
690 struct localeinfo localeinfo;
691 
692 /* A mask to test for unibyte characters, with the pattern repeated to
693    fill a uword.  For a multibyte character encoding where
694    all bytes are unibyte characters, this is 0.  For UTF-8, this is
695    0x808080....  For encodings where unibyte characters have no discerned
696    pattern, this is all 1s.  The unsigned char C is a unibyte
697    character if C & UNIBYTE_MASK is zero.  If the uword W is the
698    concatenation of bytes, the bytes are all unibyte characters
699    if W & UNIBYTE_MASK is zero.  */
700 static uword unibyte_mask;
701 
702 static void
initialize_unibyte_mask(void)703 initialize_unibyte_mask (void)
704 {
705   /* For each encoding error I that MASK does not already match,
706      accumulate I's most significant 1 bit by ORing it into MASK.
707      Although any 1 bit of I could be used, in practice high-order
708      bits work better.  */
709   unsigned char mask = 0;
710   int ms1b = 1;
711   for (int i = 1; i <= UCHAR_MAX; i++)
712     if ((localeinfo.sbclen[i] != 1) & ! (mask & i))
713       {
714         while (ms1b * 2 <= i)
715           ms1b *= 2;
716         mask |= ms1b;
717       }
718 
719   /* Now MASK will detect any encoding-error byte, although it may
720      cry wolf and it may not be optimal.  Build a uword-length mask by
721      repeating MASK.  */
722   unibyte_mask = uword_max / UCHAR_MAX * mask;
723 }
724 
725 /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
726    that is not easy, and return a pointer to the first non-easy byte.
727    The easy bytes all have UNIBYTE_MASK off.  */
728 static char const * _GL_ATTRIBUTE_PURE
skip_easy_bytes(char const * buf)729 skip_easy_bytes (char const *buf)
730 {
731   /* Search a byte at a time until the pointer is aligned, then a
732      uword at a time until a match is found, then a byte at a time to
733      identify the exact byte.  The uword search may go slightly past
734      the buffer end, but that's benign.  */
735   char const *p;
736   uword const *s;
737   for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
738     if (to_uchar (*p) & unibyte_mask)
739       return p;
740   for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
741     continue;
742   for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
743     continue;
744   return p;
745 }
746 
747 /* Return true if BUF, of size SIZE, has an encoding error.
748    BUF must be followed by at least sizeof (uword) bytes,
749    the first of which may be modified.  */
750 static bool
buf_has_encoding_errors(char * buf,size_t size)751 buf_has_encoding_errors (char *buf, size_t size)
752 {
753   if (! unibyte_mask)
754     return false;
755 
756   mbstate_t mbs = { 0 };
757   size_t clen;
758 
759   buf[size] = -1;
760   for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
761     {
762       clen = mbrlen (p, buf + size - p, &mbs);
763       if ((size_t) -2 <= clen)
764         return true;
765     }
766 
767   return false;
768 }
769 
770 
771 /* Return true if BUF, of size SIZE, has a null byte.
772    BUF must be followed by at least one byte,
773    which may be arbitrarily written to or read from.  */
774 static bool
buf_has_nulls(char * buf,size_t size)775 buf_has_nulls (char *buf, size_t size)
776 {
777   buf[size] = 0;
778   return strlen (buf) != size;
779 }
780 
781 /* Return true if a file is known to contain null bytes.
782    SIZE bytes have already been read from the file
783    with descriptor FD and status ST.  */
784 static bool
file_must_have_nulls(size_t size,int fd,struct stat const * st)785 file_must_have_nulls (size_t size, int fd, struct stat const *st)
786 {
787   /* If the file has holes, it must contain a null byte somewhere.  */
788   if (SEEK_HOLE != SEEK_SET && !seek_failed
789       && usable_st_size (st) && size < st->st_size)
790     {
791       off_t cur = size;
792       if (O_BINARY || fd == STDIN_FILENO)
793         {
794           cur = lseek (fd, 0, SEEK_CUR);
795           if (cur < 0)
796             return false;
797         }
798 
799       /* Look for a hole after the current location.  */
800       off_t hole_start = lseek (fd, cur, SEEK_HOLE);
801       if (0 <= hole_start)
802         {
803           if (lseek (fd, cur, SEEK_SET) < 0)
804             suppressible_error (errno);
805           if (hole_start < st->st_size)
806             return true;
807         }
808     }
809 
810   return false;
811 }
812 
813 /* Convert STR to a nonnegative integer, storing the result in *OUT.
814    STR must be a valid context length argument; report an error if it
815    isn't.  Silently ceiling *OUT at the maximum value, as that is
816    practically equivalent to infinity for grep's purposes.  */
817 static void
context_length_arg(char const * str,intmax_t * out)818 context_length_arg (char const *str, intmax_t *out)
819 {
820   switch (xstrtoimax (str, 0, 10, out, ""))
821     {
822     case LONGINT_OK:
823     case LONGINT_OVERFLOW:
824       if (0 <= *out)
825         break;
826       FALLTHROUGH;
827     default:
828       die (EXIT_TROUBLE, 0, "%s: %s", str,
829            _("invalid context length argument"));
830     }
831 }
832 
833 /* Return the add_exclude options suitable for excluding a file name.
834    If COMMAND_LINE, it is a command-line file name.  */
835 static int
exclude_options(bool command_line)836 exclude_options (bool command_line)
837 {
838   return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
839 }
840 
841 /* Return true if the file with NAME should be skipped.
842    If COMMAND_LINE, it is a command-line argument.
843    If IS_DIR, it is a directory.  */
844 static bool
skipped_file(char const * name,bool command_line,bool is_dir)845 skipped_file (char const *name, bool command_line, bool is_dir)
846 {
847   struct exclude **pats;
848   if (! is_dir)
849     pats = excluded_patterns;
850   else if (directories == SKIP_DIRECTORIES)
851     return true;
852   else if (command_line && omit_dot_slash)
853     return false;
854   else
855     pats = excluded_directory_patterns;
856   return pats[command_line] && excluded_file_name (pats[command_line], name);
857 }
858 
859 /* Hairy buffering mechanism for grep.  The intent is to keep
860    all reads aligned on a page boundary and multiples of the
861    page size, unless a read yields a partial page.  */
862 
863 static char *buffer;		/* Base of buffer. */
864 static size_t bufalloc;		/* Allocated buffer size, counting slop. */
865 static int bufdesc;		/* File descriptor. */
866 static char *bufbeg;		/* Beginning of user-visible stuff. */
867 static char *buflim;		/* Limit of user-visible stuff. */
868 static size_t pagesize;		/* alignment of memory pages */
869 static off_t bufoffset;		/* Read offset.  */
870 static off_t after_last_match;	/* Pointer after last matching line that
871                                    would have been output if we were
872                                    outputting characters. */
873 static bool skip_nuls;		/* Skip '\0' in data.  */
874 static bool skip_empty_lines;	/* Skip empty lines in data.  */
875 static uintmax_t totalnl;	/* Total newline count before lastnl. */
876 
877 /* Initial buffer size, not counting slop. */
878 enum { INITIAL_BUFSIZE = 96 * 1024 };
879 
880 /* Return VAL aligned to the next multiple of ALIGNMENT.  VAL can be
881    an integer or a pointer.  Both args must be free of side effects.  */
882 #define ALIGN_TO(val, alignment) \
883   ((uintptr_t) (val) % (alignment) == 0 \
884    ? (val) \
885    : (val) + ((alignment) - (uintptr_t) (val) % (alignment)))
886 
887 /* Add two numbers that count input bytes or lines, and report an
888    error if the addition overflows.  */
889 static uintmax_t
add_count(uintmax_t a,uintmax_t b)890 add_count (uintmax_t a, uintmax_t b)
891 {
892   uintmax_t sum = a + b;
893   if (sum < a)
894     die (EXIT_TROUBLE, 0, _("input is too large to count"));
895   return sum;
896 }
897 
898 /* Return true if BUF (of size SIZE) is all zeros.  */
899 static bool
all_zeros(char const * buf,size_t size)900 all_zeros (char const *buf, size_t size)
901 {
902   for (char const *p = buf; p < buf + size; p++)
903     if (*p)
904       return false;
905   return true;
906 }
907 
908 /* Reset the buffer for a new file, returning false if we should skip it.
909    Initialize on the first time through. */
910 static bool
reset(int fd,struct stat const * st)911 reset (int fd, struct stat const *st)
912 {
913   bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
914   bufbeg[-1] = eolbyte;
915   bufdesc = fd;
916   bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
917   seek_failed = bufoffset < 0;
918 
919   /* Assume SEEK_DATA fails if SEEK_CUR does.  */
920   seek_data_failed = seek_failed;
921 
922   if (seek_failed)
923     {
924       if (errno != ESPIPE)
925         {
926           suppressible_error (errno);
927           return false;
928         }
929       bufoffset = 0;
930     }
931   return true;
932 }
933 
934 /* Read new stuff into the buffer, saving the specified
935    amount of old stuff.  When we're done, 'bufbeg' points
936    to the beginning of the buffer contents, and 'buflim'
937    points just after the end.  Return false if there's an error.  */
938 static bool
fillbuf(size_t save,struct stat const * st)939 fillbuf (size_t save, struct stat const *st)
940 {
941   size_t fillsize;
942   bool cc = true;
943   char *readbuf;
944   size_t readsize;
945 
946   if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
947     readbuf = buflim;
948   else
949     {
950       size_t minsize = save + pagesize;
951       size_t newsize;
952       size_t newalloc;
953       char *newbuf;
954 
955       /* Grow newsize until it is at least as great as minsize.  */
956       for (newsize = bufalloc - pagesize - sizeof (uword);
957            newsize < minsize;
958            newsize *= 2)
959         if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
960           xalloc_die ();
961 
962       /* Try not to allocate more memory than the file size indicates,
963          as that might cause unnecessary memory exhaustion if the file
964          is large.  However, do not use the original file size as a
965          heuristic if we've already read past the file end, as most
966          likely the file is growing.  */
967       if (usable_st_size (st))
968         {
969           off_t to_be_read = st->st_size - bufoffset;
970           off_t maxsize_off = save + to_be_read;
971           if (0 <= to_be_read && to_be_read <= maxsize_off
972               && maxsize_off == (size_t) maxsize_off
973               && minsize <= (size_t) maxsize_off
974               && (size_t) maxsize_off < newsize)
975             newsize = maxsize_off;
976         }
977 
978       /* Add enough room so that the buffer is aligned and has room
979          for byte sentinels fore and aft, and so that a uword can
980          be read aft.  */
981       newalloc = newsize + pagesize + sizeof (uword);
982 
983       newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
984       readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
985       size_t moved = save + 1;  /* Move the preceding byte sentinel too.  */
986       memmove (readbuf - moved, buflim - moved, moved);
987       if (newbuf != buffer)
988         {
989           free (buffer);
990           buffer = newbuf;
991         }
992     }
993 
994   bufbeg = readbuf - save;
995 
996   clear_asan_poison ();
997 
998   readsize = buffer + bufalloc - sizeof (uword) - readbuf;
999   readsize -= readsize % pagesize;
1000 
1001   while (true)
1002     {
1003       fillsize = safe_read (bufdesc, readbuf, readsize);
1004       if (fillsize == SAFE_READ_ERROR)
1005         {
1006           fillsize = 0;
1007           cc = false;
1008         }
1009       bufoffset += fillsize;
1010 
1011       if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize))
1012         break;
1013       totalnl = add_count (totalnl, fillsize);
1014 
1015       if (SEEK_DATA != SEEK_SET && !seek_data_failed)
1016         {
1017           /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF.  */
1018           off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
1019           if (data_start < 0 && errno == ENXIO
1020               && usable_st_size (st) && bufoffset < st->st_size)
1021             data_start = lseek (bufdesc, 0, SEEK_END);
1022 
1023           if (data_start < 0)
1024             seek_data_failed = true;
1025           else
1026             {
1027               totalnl = add_count (totalnl, data_start - bufoffset);
1028               bufoffset = data_start;
1029             }
1030         }
1031     }
1032 
1033   buflim = readbuf + fillsize;
1034 
1035   /* Initialize the following word, because skip_easy_bytes and some
1036      matchers read (but do not use) those bytes.  This avoids false
1037      positive reports of these bytes being used uninitialized.  */
1038   memset (buflim, 0, sizeof (uword));
1039 
1040   /* Mark the part of the buffer not filled by the read or set by
1041      the above memset call as ASAN-poisoned.  */
1042   asan_poison (buflim + sizeof (uword),
1043                bufalloc - (buflim - buffer) - sizeof (uword));
1044 
1045   return cc;
1046 }
1047 
1048 /* Flags controlling the style of output. */
1049 static enum
1050 {
1051   BINARY_BINARY_FILES,
1052   TEXT_BINARY_FILES,
1053   WITHOUT_MATCH_BINARY_FILES
1054 } binary_files;		/* How to handle binary files.  */
1055 
1056 /* Options for output as a list of matching/non-matching files */
1057 static enum
1058 {
1059   LISTFILES_NONE,
1060   LISTFILES_MATCHING,
1061   LISTFILES_NONMATCHING,
1062 } list_files;
1063 
1064 /* Whether to output filenames.  1 means yes, 0 means no, and -1 means
1065    'grep -r PATTERN FILE' was used and it is not known yet whether
1066    FILE is a directory (which means yes) or not (which means no).  */
1067 static int out_file;
1068 
1069 static int filename_mask;	/* If zero, output nulls after filenames.  */
1070 static bool out_quiet;		/* Suppress all normal output. */
1071 static bool out_invert;		/* Print nonmatching stuff. */
1072 static bool out_line;		/* Print line numbers. */
1073 static bool out_byte;		/* Print byte offsets. */
1074 static intmax_t out_before;	/* Lines of leading context. */
1075 static intmax_t out_after;	/* Lines of trailing context. */
1076 static bool count_matches;	/* Count matching lines.  */
1077 static intmax_t max_count;	/* Max number of selected
1078                                    lines from an input file.  */
1079 static bool line_buffered;	/* Use line buffering.  */
1080 static char *label = NULL;      /* Fake filename for stdin */
1081 
1082 
1083 /* Internal variables to keep track of byte count, context, etc. */
1084 static uintmax_t totalcc;	/* Total character count before bufbeg. */
1085 static char const *lastnl;	/* Pointer after last newline counted. */
1086 static char *lastout;		/* Pointer after last character output;
1087                                    NULL if no character has been output
1088                                    or if it's conceptually before bufbeg. */
1089 static intmax_t outleft;	/* Maximum number of selected lines.  */
1090 static intmax_t pending;	/* Pending lines of output.
1091                                    Always kept 0 if out_quiet is true.  */
1092 static bool done_on_match;	/* Stop scanning file on first match.  */
1093 static bool exit_on_match;	/* Exit on first match.  */
1094 static bool dev_null_output;	/* Stdout is known to be /dev/null.  */
1095 static bool binary;		/* Use binary rather than text I/O.  */
1096 
1097 static void
nlscan(char const * lim)1098 nlscan (char const *lim)
1099 {
1100   size_t newlines = 0;
1101   for (char const *beg = lastnl; beg < lim; beg++)
1102     {
1103       beg = memchr (beg, eolbyte, lim - beg);
1104       if (!beg)
1105         break;
1106       newlines++;
1107     }
1108   totalnl = add_count (totalnl, newlines);
1109   lastnl = lim;
1110 }
1111 
1112 /* Print the current filename.  */
1113 static void
print_filename(void)1114 print_filename (void)
1115 {
1116   pr_sgr_start_if (filename_color);
1117   fputs_errno (input_filename ());
1118   pr_sgr_end_if (filename_color);
1119 }
1120 
1121 /* Print a character separator.  */
1122 static void
print_sep(char sep)1123 print_sep (char sep)
1124 {
1125   pr_sgr_start_if (sep_color);
1126   putchar_errno (sep);
1127   pr_sgr_end_if (sep_color);
1128 }
1129 
1130 /* Print a line number or a byte offset.  */
1131 static void
print_offset(uintmax_t pos,const char * color)1132 print_offset (uintmax_t pos, const char *color)
1133 {
1134   pr_sgr_start_if (color);
1135   printf_errno ("%*"PRIuMAX, offset_width, pos);
1136   pr_sgr_end_if (color);
1137 }
1138 
1139 /* Print a whole line head (filename, line, byte).  The output data
1140    starts at BEG and contains LEN bytes; it is followed by at least
1141    sizeof (uword) bytes, the first of which may be temporarily modified.
1142    The output data comes from what is perhaps a larger input line that
1143    goes until LIM, where LIM[-1] is an end-of-line byte.  Use SEP as
1144    the separator on output.
1145 
1146    Return true unless the line was suppressed due to an encoding error.  */
1147 
1148 static bool
print_line_head(char * beg,size_t len,char const * lim,char sep)1149 print_line_head (char *beg, size_t len, char const *lim, char sep)
1150 {
1151   if (binary_files != TEXT_BINARY_FILES)
1152     {
1153       char ch = beg[len];
1154       bool encoding_errors = buf_has_encoding_errors (beg, len);
1155       beg[len] = ch;
1156       if (encoding_errors)
1157         {
1158           encoding_error_output = true;
1159           return false;
1160         }
1161     }
1162 
1163   if (out_file)
1164     {
1165       print_filename ();
1166       if (filename_mask)
1167         print_sep (sep);
1168       else
1169         putchar_errno (0);
1170     }
1171 
1172   if (out_line)
1173     {
1174       if (lastnl < lim)
1175         {
1176           nlscan (beg);
1177           totalnl = add_count (totalnl, 1);
1178           lastnl = lim;
1179         }
1180       print_offset (totalnl, line_num_color);
1181       print_sep (sep);
1182     }
1183 
1184   if (out_byte)
1185     {
1186       uintmax_t pos = add_count (totalcc, beg - bufbeg);
1187       print_offset (pos, byte_num_color);
1188       print_sep (sep);
1189     }
1190 
1191   if (align_tabs && (out_file | out_line | out_byte) && len != 0)
1192     putchar_errno ('\t');
1193 
1194   return true;
1195 }
1196 
1197 static char *
print_line_middle(char * beg,char * lim,const char * line_color,const char * match_color)1198 print_line_middle (char *beg, char *lim,
1199                    const char *line_color, const char *match_color)
1200 {
1201   size_t match_size;
1202   size_t match_offset;
1203   char *cur;
1204   char *mid = NULL;
1205   char *b;
1206 
1207   for (cur = beg;
1208        (cur < lim
1209         && ((match_offset = execute (compiled_pattern, beg, lim - beg,
1210                                      &match_size, cur)) != (size_t) -1));
1211        cur = b + match_size)
1212     {
1213       b = beg + match_offset;
1214 
1215       /* Avoid matching the empty line at the end of the buffer. */
1216       if (b == lim)
1217         break;
1218 
1219       /* Avoid hanging on grep --color "" foo */
1220       if (match_size == 0)
1221         {
1222           /* Make minimal progress; there may be further non-empty matches.  */
1223           /* XXX - Could really advance by one whole multi-octet character.  */
1224           match_size = 1;
1225           if (!mid)
1226             mid = cur;
1227         }
1228       else
1229         {
1230           /* This function is called on a matching line only,
1231              but is it selected or rejected/context?  */
1232           if (only_matching)
1233             {
1234               char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
1235               if (! print_line_head (b, match_size, lim, sep))
1236                 return NULL;
1237             }
1238           else
1239             {
1240               pr_sgr_start (line_color);
1241               if (mid)
1242                 {
1243                   cur = mid;
1244                   mid = NULL;
1245                 }
1246               fwrite_errno (cur, 1, b - cur);
1247             }
1248 
1249           pr_sgr_start_if (match_color);
1250           fwrite_errno (b, 1, match_size);
1251           pr_sgr_end_if (match_color);
1252           if (only_matching)
1253             putchar_errno (eolbyte);
1254         }
1255     }
1256 
1257   if (only_matching)
1258     cur = lim;
1259   else if (mid)
1260     cur = mid;
1261 
1262   return cur;
1263 }
1264 
1265 static char *
print_line_tail(char * beg,const char * lim,const char * line_color)1266 print_line_tail (char *beg, const char *lim, const char *line_color)
1267 {
1268   size_t eol_size;
1269   size_t tail_size;
1270 
1271   eol_size   = (lim > beg && lim[-1] == eolbyte);
1272   eol_size  += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
1273   tail_size  =  lim - eol_size - beg;
1274 
1275   if (tail_size > 0)
1276     {
1277       pr_sgr_start (line_color);
1278       fwrite_errno (beg, 1, tail_size);
1279       beg += tail_size;
1280       pr_sgr_end (line_color);
1281     }
1282 
1283   return beg;
1284 }
1285 
1286 static void
prline(char * beg,char * lim,char sep)1287 prline (char *beg, char *lim, char sep)
1288 {
1289   bool matching;
1290   const char *line_color;
1291   const char *match_color;
1292 
1293   if (!only_matching)
1294     if (! print_line_head (beg, lim - beg - 1, lim, sep))
1295       return;
1296 
1297   matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
1298 
1299   if (color_option)
1300     {
1301       line_color = (((sep == SEP_CHAR_SELECTED)
1302                      ^ (out_invert && (color_option < 0)))
1303                     ? selected_line_color  : context_line_color);
1304       match_color = (sep == SEP_CHAR_SELECTED
1305                      ? selected_match_color : context_match_color);
1306     }
1307   else
1308     line_color = match_color = NULL; /* Shouldn't be used.  */
1309 
1310   if ((only_matching && matching)
1311       || (color_option && (*line_color || *match_color)))
1312     {
1313       /* We already know that non-matching lines have no match (to colorize). */
1314       if (matching && (only_matching || *match_color))
1315         {
1316           beg = print_line_middle (beg, lim, line_color, match_color);
1317           if (! beg)
1318             return;
1319         }
1320 
1321       if (!only_matching && *line_color)
1322         {
1323           /* This code is exercised at least when grep is invoked like this:
1324              echo k| GREP_COLORS='sl=01;32' src/grep k --color=always  */
1325           beg = print_line_tail (beg, lim, line_color);
1326         }
1327     }
1328 
1329   if (!only_matching && lim > beg)
1330     fwrite_errno (beg, 1, lim - beg);
1331 
1332   if (line_buffered)
1333     fflush_errno ();
1334 
1335   if (stdout_errno)
1336     die (EXIT_TROUBLE, stdout_errno, _("write error"));
1337 
1338   lastout = lim;
1339 }
1340 
1341 /* Print pending lines of trailing context prior to LIM.  */
1342 static void
prpending(char const * lim)1343 prpending (char const *lim)
1344 {
1345   if (!lastout)
1346     lastout = bufbeg;
1347   for (; 0 < pending && lastout < lim; pending--)
1348     {
1349       char *nl = rawmemchr (lastout, eolbyte);
1350       prline (lastout, nl + 1, SEP_CHAR_REJECTED);
1351     }
1352 }
1353 
1354 /* Output the lines between BEG and LIM.  Deal with context.  */
1355 static void
prtext(char * beg,char * lim)1356 prtext (char *beg, char *lim)
1357 {
1358   static bool used;	/* Avoid printing SEP_STR_GROUP before any output.  */
1359   char eol = eolbyte;
1360 
1361   if (!out_quiet && pending > 0)
1362     prpending (beg);
1363 
1364   char *p = beg;
1365 
1366   if (!out_quiet)
1367     {
1368       /* Deal with leading context.  */
1369       char const *bp = lastout ? lastout : bufbeg;
1370       intmax_t i;
1371       for (i = 0; i < out_before; ++i)
1372         if (p > bp)
1373           do
1374             --p;
1375           while (p[-1] != eol);
1376 
1377       /* Print the group separator unless the output is adjacent to
1378          the previous output in the file.  */
1379       if ((0 <= out_before || 0 <= out_after) && used
1380           && p != lastout && group_separator)
1381         {
1382           pr_sgr_start_if (sep_color);
1383           fputs_errno (group_separator);
1384           pr_sgr_end_if (sep_color);
1385           putchar_errno ('\n');
1386         }
1387 
1388       while (p < beg)
1389         {
1390           char *nl = rawmemchr (p, eol);
1391           nl++;
1392           prline (p, nl, SEP_CHAR_REJECTED);
1393           p = nl;
1394         }
1395     }
1396 
1397   intmax_t n;
1398   if (out_invert)
1399     {
1400       /* One or more lines are output.  */
1401       for (n = 0; p < lim && n < outleft; n++)
1402         {
1403           char *nl = rawmemchr (p, eol);
1404           nl++;
1405           if (!out_quiet)
1406             prline (p, nl, SEP_CHAR_SELECTED);
1407           p = nl;
1408         }
1409     }
1410   else
1411     {
1412       /* Just one line is output.  */
1413       if (!out_quiet)
1414         prline (beg, lim, SEP_CHAR_SELECTED);
1415       n = 1;
1416       p = lim;
1417     }
1418 
1419   after_last_match = bufoffset - (buflim - p);
1420   pending = out_quiet ? 0 : MAX (0, out_after);
1421   used = true;
1422   outleft -= n;
1423 }
1424 
1425 /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
1426    This avoids running out of memory when binary input contains a long
1427    sequence of zeros, which would otherwise be considered to be part
1428    of a long line.  P[LIM] should be EOL.  */
1429 static void
zap_nuls(char * p,char * lim,char eol)1430 zap_nuls (char *p, char *lim, char eol)
1431 {
1432   if (eol)
1433     while (true)
1434       {
1435         *lim = '\0';
1436         p += strlen (p);
1437         *lim = eol;
1438         if (p == lim)
1439           break;
1440         do
1441           *p++ = eol;
1442         while (!*p);
1443       }
1444 }
1445 
1446 /* Scan the specified portion of the buffer, matching lines (or
1447    between matching lines if OUT_INVERT is true).  Return a count of
1448    lines printed.  Replace all NUL bytes with NUL_ZAPPER as we go.  */
1449 static intmax_t
grepbuf(char * beg,char const * lim)1450 grepbuf (char *beg, char const *lim)
1451 {
1452   intmax_t outleft0 = outleft;
1453   char *endp;
1454 
1455   for (char *p = beg; p < lim; p = endp)
1456     {
1457       size_t match_size;
1458       size_t match_offset = execute (compiled_pattern, p, lim - p,
1459                                      &match_size, NULL);
1460       if (match_offset == (size_t) -1)
1461         {
1462           if (!out_invert)
1463             break;
1464           match_offset = lim - p;
1465           match_size = 0;
1466         }
1467       char *b = p + match_offset;
1468       endp = b + match_size;
1469       /* Avoid matching the empty line at the end of the buffer. */
1470       if (!out_invert && b == lim)
1471         break;
1472       if (!out_invert || p < b)
1473         {
1474           char *prbeg = out_invert ? p : b;
1475           char *prend = out_invert ? b : endp;
1476           prtext (prbeg, prend);
1477           if (!outleft || done_on_match)
1478             {
1479               if (exit_on_match)
1480                 exit (errseen ? exit_failure : EXIT_SUCCESS);
1481               break;
1482             }
1483         }
1484     }
1485 
1486   return outleft0 - outleft;
1487 }
1488 
1489 /* Search a given (non-directory) file.  Return a count of lines printed.
1490    Set *INEOF to true if end-of-file reached.  */
1491 static intmax_t
grep(int fd,struct stat const * st,bool * ineof)1492 grep (int fd, struct stat const *st, bool *ineof)
1493 {
1494   intmax_t nlines, i;
1495   size_t residue, save;
1496   char oldc;
1497   char *beg;
1498   char *lim;
1499   char eol = eolbyte;
1500   char nul_zapper = '\0';
1501   bool done_on_match_0 = done_on_match;
1502   bool out_quiet_0 = out_quiet;
1503 
1504   /* The value of NLINES when nulls were first deduced in the input;
1505      this is not necessarily the same as the number of matching lines
1506      before the first null.  -1 if no input nulls have been deduced.  */
1507   intmax_t nlines_first_null = -1;
1508 
1509   if (! reset (fd, st))
1510     return 0;
1511 
1512   totalcc = 0;
1513   lastout = 0;
1514   totalnl = 0;
1515   outleft = max_count;
1516   after_last_match = 0;
1517   pending = 0;
1518   skip_nuls = skip_empty_lines && !eol;
1519   encoding_error_output = false;
1520 
1521   nlines = 0;
1522   residue = 0;
1523   save = 0;
1524 
1525   if (! fillbuf (save, st))
1526     {
1527       suppressible_error (errno);
1528       return 0;
1529     }
1530 
1531   offset_width = 0;
1532   if (align_tabs)
1533     {
1534       /* Width is log of maximum number.  Line numbers are origin-1.  */
1535       uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
1536       num += out_line && num < UINTMAX_MAX;
1537       do
1538         offset_width++;
1539       while ((num /= 10) != 0);
1540     }
1541 
1542   for (bool firsttime = true; ; firsttime = false)
1543     {
1544       if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
1545           && (buf_has_nulls (bufbeg, buflim - bufbeg)
1546               || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
1547         {
1548           if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1549             return 0;
1550           if (!count_matches)
1551             done_on_match = out_quiet = true;
1552           nlines_first_null = nlines;
1553           nul_zapper = eol;
1554           skip_nuls = skip_empty_lines;
1555         }
1556 
1557       lastnl = bufbeg;
1558       if (lastout)
1559         lastout = bufbeg;
1560 
1561       beg = bufbeg + save;
1562 
1563       /* no more data to scan (eof) except for maybe a residue -> break */
1564       if (beg == buflim)
1565         {
1566           *ineof = true;
1567           break;
1568         }
1569 
1570       zap_nuls (beg, buflim, nul_zapper);
1571 
1572       /* Determine new residue (the length of an incomplete line at the end of
1573          the buffer, 0 means there is no incomplete last line).  */
1574       oldc = beg[-1];
1575       beg[-1] = eol;
1576       /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1577          that this use of memrchr is guaranteed never to return NULL.  */
1578       lim = memrchr (beg - 1, eol, buflim - beg + 1);
1579       ++lim;
1580       beg[-1] = oldc;
1581       if (lim == beg)
1582         lim = beg - residue;
1583       beg -= residue;
1584       residue = buflim - lim;
1585 
1586       if (beg < lim)
1587         {
1588           if (outleft)
1589             nlines += grepbuf (beg, lim);
1590           if (pending)
1591             prpending (lim);
1592           if ((!outleft && !pending)
1593               || (done_on_match && MAX (0, nlines_first_null) < nlines))
1594             goto finish_grep;
1595         }
1596 
1597       /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1598          leading context if there is a matching line at the begin of the
1599          next data. Make beg point to their begin.  */
1600       i = 0;
1601       beg = lim;
1602       while (i < out_before && beg > bufbeg && beg != lastout)
1603         {
1604           ++i;
1605           do
1606             --beg;
1607           while (beg[-1] != eol);
1608         }
1609 
1610       /* Detect whether leading context is adjacent to previous output.  */
1611       if (beg != lastout)
1612         lastout = 0;
1613 
1614       /* Handle some details and read more data to scan.  */
1615       save = residue + lim - beg;
1616       if (out_byte)
1617         totalcc = add_count (totalcc, buflim - bufbeg - save);
1618       if (out_line)
1619         nlscan (beg);
1620       if (! fillbuf (save, st))
1621         {
1622           suppressible_error (errno);
1623           goto finish_grep;
1624         }
1625     }
1626   if (residue)
1627     {
1628       *buflim++ = eol;
1629       if (outleft)
1630         nlines += grepbuf (bufbeg + save - residue, buflim);
1631       if (pending)
1632         prpending (buflim);
1633     }
1634 
1635  finish_grep:
1636   done_on_match = done_on_match_0;
1637   out_quiet = out_quiet_0;
1638   if (binary_files == BINARY_BINARY_FILES && ! (out_quiet | suppress_errors)
1639       && (encoding_error_output
1640           || (0 <= nlines_first_null && nlines_first_null < nlines)))
1641     error (0, 0, _("%s: binary file matches"), input_filename ());
1642   return nlines;
1643 }
1644 
1645 static bool
grepdirent(FTS * fts,FTSENT * ent,bool command_line)1646 grepdirent (FTS *fts, FTSENT *ent, bool command_line)
1647 {
1648   bool follow;
1649   command_line &= ent->fts_level == FTS_ROOTLEVEL;
1650 
1651   if (ent->fts_info == FTS_DP)
1652     return true;
1653 
1654   if (!command_line
1655       && skipped_file (ent->fts_name, false,
1656                        (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1657                         || ent->fts_info == FTS_DNR)))
1658     {
1659       fts_set (fts, ent, FTS_SKIP);
1660       return true;
1661     }
1662 
1663   filename = ent->fts_path;
1664   if (omit_dot_slash && filename[1])
1665     filename += 2;
1666   follow = (fts->fts_options & FTS_LOGICAL
1667             || (fts->fts_options & FTS_COMFOLLOW && command_line));
1668 
1669   switch (ent->fts_info)
1670     {
1671     case FTS_D:
1672       if (directories == RECURSE_DIRECTORIES)
1673         return true;
1674       fts_set (fts, ent, FTS_SKIP);
1675       break;
1676 
1677     case FTS_DC:
1678       if (!suppress_errors)
1679         error (0, 0, _("%s: warning: recursive directory loop"), filename);
1680       return true;
1681 
1682     case FTS_DNR:
1683     case FTS_ERR:
1684     case FTS_NS:
1685       suppressible_error (ent->fts_errno);
1686       return true;
1687 
1688     case FTS_DEFAULT:
1689     case FTS_NSOK:
1690       if (skip_devices (command_line))
1691         {
1692           struct stat *st = ent->fts_statp;
1693           struct stat st1;
1694           if (! st->st_mode)
1695             {
1696               /* The file type is not already known.  Get the file status
1697                  before opening, since opening might have side effects
1698                  on a device.  */
1699               int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1700               if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1701                 {
1702                   suppressible_error (errno);
1703                   return true;
1704                 }
1705               st = &st1;
1706             }
1707           if (is_device_mode (st->st_mode))
1708             return true;
1709         }
1710       break;
1711 
1712     case FTS_F:
1713     case FTS_SLNONE:
1714       break;
1715 
1716     case FTS_SL:
1717     case FTS_W:
1718       return true;
1719 
1720     default:
1721       abort ();
1722     }
1723 
1724   return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line);
1725 }
1726 
1727 /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
1728    POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD.  */
1729 static bool
open_symlink_nofollow_error(int err)1730 open_symlink_nofollow_error (int err)
1731 {
1732   if (err == ELOOP || err == EMLINK)
1733     return true;
1734 #ifdef EFTYPE
1735   if (err == EFTYPE)
1736     return true;
1737 #endif
1738   return false;
1739 }
1740 
1741 static bool
grepfile(int dirdesc,char const * name,bool follow,bool command_line)1742 grepfile (int dirdesc, char const *name, bool follow, bool command_line)
1743 {
1744   int oflag = (O_RDONLY | O_NOCTTY
1745                | (IGNORE_DUPLICATE_BRANCH_WARNING
1746                   (binary ? O_BINARY : 0))
1747                | (follow ? 0 : O_NOFOLLOW)
1748                | (skip_devices (command_line) ? O_NONBLOCK : 0));
1749   int desc = openat_safer (dirdesc, name, oflag);
1750   if (desc < 0)
1751     {
1752       if (follow || ! open_symlink_nofollow_error (errno))
1753         suppressible_error (errno);
1754       return true;
1755     }
1756   return grepdesc (desc, command_line);
1757 }
1758 
1759 /* Read all data from FD, with status ST.  Return true if successful,
1760    false (setting errno) otherwise.  */
1761 static bool
drain_input(int fd,struct stat const * st)1762 drain_input (int fd, struct stat const *st)
1763 {
1764   ssize_t nbytes;
1765   if (S_ISFIFO (st->st_mode) && dev_null_output)
1766     {
1767 #ifdef SPLICE_F_MOVE
1768       /* Should be faster, since it need not copy data to user space.  */
1769       nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1770                        INITIAL_BUFSIZE, SPLICE_F_MOVE);
1771       if (0 <= nbytes || errno != EINVAL)
1772         {
1773           while (0 < nbytes)
1774             nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1775                              INITIAL_BUFSIZE, SPLICE_F_MOVE);
1776           return nbytes == 0;
1777         }
1778 #endif
1779     }
1780   while ((nbytes = safe_read (fd, buffer, bufalloc)))
1781     if (nbytes == SAFE_READ_ERROR)
1782       return false;
1783   return true;
1784 }
1785 
1786 /* Finish reading from FD, with status ST and where end-of-file has
1787    been seen if INEOF.  Typically this is a no-op, but when reading
1788    from standard input this may adjust the file offset or drain a
1789    pipe.  */
1790 
1791 static void
finalize_input(int fd,struct stat const * st,bool ineof)1792 finalize_input (int fd, struct stat const *st, bool ineof)
1793 {
1794   if (fd == STDIN_FILENO
1795       && (outleft
1796           ? (!ineof
1797              && (seek_failed
1798                  || (lseek (fd, 0, SEEK_END) < 0
1799                      /* Linux proc file system has EINVAL (Bug#25180).  */
1800                      && errno != EINVAL))
1801              && ! drain_input (fd, st))
1802           : (bufoffset != after_last_match && !seek_failed
1803              && lseek (fd, after_last_match, SEEK_SET) < 0)))
1804     suppressible_error (errno);
1805 }
1806 
1807 static bool
grepdesc(int desc,bool command_line)1808 grepdesc (int desc, bool command_line)
1809 {
1810   intmax_t count;
1811   bool status = true;
1812   bool ineof = false;
1813   struct stat st;
1814 
1815   /* Get the file status, possibly for the second time.  This catches
1816      a race condition if the directory entry changes after the
1817      directory entry is read and before the file is opened.  For
1818      example, normally DESC is a directory only at the top level, but
1819      there is an exception if some other process substitutes a
1820      directory for a non-directory while 'grep' is running.  */
1821   if (fstat (desc, &st) != 0)
1822     {
1823       suppressible_error (errno);
1824       goto closeout;
1825     }
1826 
1827   if (desc != STDIN_FILENO && skip_devices (command_line)
1828       && is_device_mode (st.st_mode))
1829     goto closeout;
1830 
1831   if (desc != STDIN_FILENO && command_line
1832       && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
1833     goto closeout;
1834 
1835   /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'.  */
1836   if (out_file < 0)
1837     out_file = !!S_ISDIR (st.st_mode);
1838 
1839   if (desc != STDIN_FILENO
1840       && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1841     {
1842       /* Traverse the directory starting with its full name, because
1843          unfortunately fts provides no way to traverse the directory
1844          starting from its file descriptor.  */
1845 
1846       FTS *fts;
1847       FTSENT *ent;
1848       int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1849       char *fts_arg[2];
1850 
1851       /* Close DESC now, to conserve file descriptors if the race
1852          condition occurs many times in a deep recursion.  */
1853       if (close (desc) != 0)
1854         suppressible_error (errno);
1855 
1856       fts_arg[0] = (char *) filename;
1857       fts_arg[1] = NULL;
1858       fts = fts_open (fts_arg, opts, NULL);
1859 
1860       if (!fts)
1861         xalloc_die ();
1862       while ((ent = fts_read (fts)))
1863         status &= grepdirent (fts, ent, command_line);
1864       if (errno)
1865         suppressible_error (errno);
1866       if (fts_close (fts) != 0)
1867         suppressible_error (errno);
1868       return status;
1869     }
1870   if (desc != STDIN_FILENO
1871       && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1872           || ((devices == SKIP_DEVICES
1873                || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1874               && is_device_mode (st.st_mode))))
1875     goto closeout;
1876 
1877   /* If there is a regular file on stdout and the current file refers
1878      to the same i-node, we have to report the problem and skip it.
1879      Otherwise when matching lines from some other input reach the
1880      disk before we open this file, we can end up reading and matching
1881      those lines and appending them to the file from which we're reading.
1882      Then we'd have what appears to be an infinite loop that'd terminate
1883      only upon filling the output file system or reaching a quota.
1884      However, there is no risk of an infinite loop if grep is generating
1885      no output, i.e., with --silent, --quiet, -q.
1886      Similarly, with any of these:
1887        --max-count=N (-m) (for N >= 2)
1888        --files-with-matches (-l)
1889        --files-without-match (-L)
1890      there is no risk of trouble.
1891      For --max-count=1, grep stops after printing the first match,
1892      so there is no risk of malfunction.  But even --max-count=2, with
1893      input==output, while there is no risk of infloop, there is a race
1894      condition that could result in "alternate" output.  */
1895   if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
1896       && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
1897     {
1898       if (! suppress_errors)
1899         error (0, 0, _("%s: input file is also the output"), input_filename ());
1900       errseen = true;
1901       goto closeout;
1902     }
1903 
1904   count = grep (desc, &st, &ineof);
1905   if (count_matches)
1906     {
1907       if (out_file)
1908         {
1909           print_filename ();
1910           if (filename_mask)
1911             print_sep (SEP_CHAR_SELECTED);
1912           else
1913             putchar_errno (0);
1914         }
1915       printf_errno ("%" PRIdMAX "\n", count);
1916       if (line_buffered)
1917         fflush_errno ();
1918     }
1919 
1920   status = !count;
1921 
1922   if (list_files == LISTFILES_NONE)
1923     finalize_input (desc, &st, ineof);
1924   else if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
1925     {
1926       print_filename ();
1927       putchar_errno ('\n' & filename_mask);
1928       if (line_buffered)
1929         fflush_errno ();
1930     }
1931 
1932  closeout:
1933   if (desc != STDIN_FILENO && close (desc) != 0)
1934     suppressible_error (errno);
1935   return status;
1936 }
1937 
1938 static bool
grep_command_line_arg(char const * arg)1939 grep_command_line_arg (char const *arg)
1940 {
1941   if (STREQ (arg, "-"))
1942     {
1943       filename = label;
1944       if (binary)
1945         xset_binary_mode (STDIN_FILENO, O_BINARY);
1946       return grepdesc (STDIN_FILENO, true);
1947     }
1948   else
1949     {
1950       filename = arg;
1951       return grepfile (AT_FDCWD, arg, true, true);
1952     }
1953 }
1954 
1955 _Noreturn void usage (int);
1956 void
usage(int status)1957 usage (int status)
1958 {
1959   if (status != 0)
1960     {
1961       fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"),
1962                getprogname ());
1963       fprintf (stderr, _("Try '%s --help' for more information.\n"),
1964                getprogname ());
1965     }
1966   else
1967     {
1968       printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ());
1969       printf (_("Search for PATTERNS in each FILE.\n"));
1970       printf (_("\
1971 Example: %s -i 'hello world' menu.h main.c\n\
1972 PATTERNS can contain multiple patterns separated by newlines.\n\
1973 \n\
1974 Pattern selection and interpretation:\n"), getprogname ());
1975       printf (_("\
1976   -E, --extended-regexp     PATTERNS are extended regular expressions\n\
1977   -F, --fixed-strings       PATTERNS are strings\n\
1978   -G, --basic-regexp        PATTERNS are basic regular expressions\n\
1979   -P, --perl-regexp         PATTERNS are Perl regular expressions\n"));
1980   /* -X is deliberately undocumented.  */
1981       printf (_("\
1982   -e, --regexp=PATTERNS     use PATTERNS for matching\n\
1983   -f, --file=FILE           take PATTERNS from FILE\n\
1984   -i, --ignore-case         ignore case distinctions in patterns and data\n\
1985       --no-ignore-case      do not ignore case distinctions (default)\n\
1986   -w, --word-regexp         match only whole words\n\
1987   -x, --line-regexp         match only whole lines\n\
1988   -z, --null-data           a data line ends in 0 byte, not newline\n"));
1989       printf (_("\
1990 \n\
1991 Miscellaneous:\n\
1992   -s, --no-messages         suppress error messages\n\
1993   -v, --invert-match        select non-matching lines\n\
1994   -V, --version             display version information and exit\n\
1995       --help                display this help text and exit\n"));
1996       printf (_("\
1997 \n\
1998 Output control:\n\
1999   -m, --max-count=NUM       stop after NUM selected lines\n\
2000   -b, --byte-offset         print the byte offset with output lines\n\
2001   -n, --line-number         print line number with output lines\n\
2002       --line-buffered       flush output on every line\n\
2003   -H, --with-filename       print file name with output lines\n\
2004   -h, --no-filename         suppress the file name prefix on output\n\
2005       --label=LABEL         use LABEL as the standard input file name prefix\n\
2006 "));
2007       printf (_("\
2008   -o, --only-matching       show only nonempty parts of lines that match\n\
2009   -q, --quiet, --silent     suppress all normal output\n\
2010       --binary-files=TYPE   assume that binary files are TYPE;\n\
2011                             TYPE is 'binary', 'text', or 'without-match'\n\
2012   -a, --text                equivalent to --binary-files=text\n\
2013 "));
2014       printf (_("\
2015   -I                        equivalent to --binary-files=without-match\n\
2016   -d, --directories=ACTION  how to handle directories;\n\
2017                             ACTION is 'read', 'recurse', or 'skip'\n\
2018   -D, --devices=ACTION      how to handle devices, FIFOs and sockets;\n\
2019                             ACTION is 'read' or 'skip'\n\
2020   -r, --recursive           like --directories=recurse\n\
2021   -R, --dereference-recursive  likewise, but follow all symlinks\n\
2022 "));
2023       printf (_("\
2024       --include=GLOB        search only files that match GLOB (a file pattern)"
2025                 "\n\
2026       --exclude=GLOB        skip files that match GLOB\n\
2027       --exclude-from=FILE   skip files that match any file pattern from FILE\n\
2028       --exclude-dir=GLOB    skip directories that match GLOB\n\
2029 "));
2030       printf (_("\
2031   -L, --files-without-match  print only names of FILEs with no selected lines\n\
2032   -l, --files-with-matches  print only names of FILEs with selected lines\n\
2033   -c, --count               print only a count of selected lines per FILE\n\
2034   -T, --initial-tab         make tabs line up (if needed)\n\
2035   -Z, --null                print 0 byte after FILE name\n"));
2036       printf (_("\
2037 \n\
2038 Context control:\n\
2039   -B, --before-context=NUM  print NUM lines of leading context\n\
2040   -A, --after-context=NUM   print NUM lines of trailing context\n\
2041   -C, --context=NUM         print NUM lines of output context\n\
2042 "));
2043       printf (_("\
2044   -NUM                      same as --context=NUM\n\
2045       --group-separator=SEP  print SEP on line between matches with context\n\
2046       --no-group-separator  do not print separator for matches with context\n\
2047       --color[=WHEN],\n\
2048       --colour[=WHEN]       use markers to highlight the matching strings;\n\
2049                             WHEN is 'always', 'never', or 'auto'\n\
2050   -U, --binary              do not strip CR characters at EOL (MSDOS/Windows)\n\
2051 \n"));
2052       printf (_("\
2053 When FILE is '-', read standard input.  With no FILE, read '.' if\n\
2054 recursive, '-' otherwise.  With fewer than two FILEs, assume -h.\n\
2055 Exit status is 0 if any line is selected, 1 otherwise;\n\
2056 if any error occurs and -q is not given, the exit status is 2.\n"));
2057       emit_bug_reporting_address ();
2058     }
2059   exit (status);
2060 }
2061 
2062 /* Pattern compilers and matchers.  */
2063 
2064 static struct
2065 {
2066   char name[12];
2067   int syntax; /* used if compile == GEAcompile */
2068   compile_fp_t compile;
2069   execute_fp_t execute;
2070 } const matchers[] = {
2071   { "grep", RE_SYNTAX_GREP, GEAcompile, EGexecute },
2072   { "egrep", RE_SYNTAX_EGREP, GEAcompile, EGexecute },
2073   { "fgrep", 0, Fcompile, Fexecute, },
2074   { "awk", RE_SYNTAX_AWK, GEAcompile, EGexecute },
2075   { "gawk", RE_SYNTAX_GNU_AWK, GEAcompile, EGexecute },
2076   { "posixawk", RE_SYNTAX_POSIX_AWK, GEAcompile, EGexecute },
2077 #if HAVE_LIBPCRE
2078   { "perl", 0, Pcompile, Pexecute, },
2079 #endif
2080 };
2081 /* Keep these in sync with the 'matchers' table.  */
2082 enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
2083 
2084 /* Return the index of the matcher corresponding to M if available.
2085    MATCHER is the index of the previous matcher, or -1 if none.
2086    Exit in case of conflicts or if M is not available.  */
2087 static int
setmatcher(char const * m,int matcher)2088 setmatcher (char const *m, int matcher)
2089 {
2090   for (int i = 0; i < sizeof matchers / sizeof *matchers; i++)
2091     if (STREQ (m, matchers[i].name))
2092       {
2093         if (0 <= matcher && matcher != i)
2094           die (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
2095         return i;
2096       }
2097 
2098 #if !HAVE_LIBPCRE
2099   if (STREQ (m, "perl"))
2100     die (EXIT_TROUBLE, 0,
2101          _("Perl matching not supported in a --disable-perl-regexp build"));
2102 #endif
2103   die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
2104 }
2105 
2106 /* Get the next non-digit option from ARGC and ARGV.
2107    Return -1 if there are no more options.
2108    Process any digit options that were encountered on the way,
2109    and store the resulting integer into *DEFAULT_CONTEXT.  */
2110 static int
get_nondigit_option(int argc,char * const * argv,intmax_t * default_context)2111 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
2112 {
2113   static int prev_digit_optind = -1;
2114   int this_digit_optind;
2115   bool was_digit;
2116   char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
2117   char *p = buf;
2118   int opt;
2119 
2120   was_digit = false;
2121   this_digit_optind = optind;
2122   while (true)
2123     {
2124       opt = getopt_long (argc, (char **) argv, short_options,
2125                          long_options, NULL);
2126       if (! c_isdigit (opt))
2127         break;
2128 
2129       if (prev_digit_optind != this_digit_optind || !was_digit)
2130         {
2131           /* Reset to start another context length argument.  */
2132           p = buf;
2133         }
2134       else
2135         {
2136           /* Suppress trivial leading zeros, to avoid incorrect
2137              diagnostic on strings like 00000000000.  */
2138           p -= buf[0] == '0';
2139         }
2140 
2141       if (p == buf + sizeof buf - 4)
2142         {
2143           /* Too many digits.  Append "..." to make context_length_arg
2144              complain about "X...", where X contains the digits seen
2145              so far.  */
2146           strcpy (p, "...");
2147           p += 3;
2148           break;
2149         }
2150       *p++ = opt;
2151 
2152       was_digit = true;
2153       prev_digit_optind = this_digit_optind;
2154       this_digit_optind = optind;
2155     }
2156   if (p != buf)
2157     {
2158       *p = '\0';
2159       context_length_arg (buf, default_context);
2160     }
2161 
2162   return opt;
2163 }
2164 
2165 /* Parse GREP_COLORS.  The default would look like:
2166      GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
2167    with boolean capabilities (ne and rv) unset (i.e., omitted).
2168    No character escaping is needed or supported.  */
2169 static void
parse_grep_colors(void)2170 parse_grep_colors (void)
2171 {
2172   const char *p;
2173   char *q;
2174   char *name;
2175   char *val;
2176 
2177   p = getenv ("GREP_COLORS"); /* Plural! */
2178   if (p == NULL || *p == '\0')
2179     return;
2180 
2181   /* Work off a writable copy.  */
2182   q = xstrdup (p);
2183 
2184   name = q;
2185   val = NULL;
2186   /* From now on, be well-formed or you're gone.  */
2187   for (;;)
2188     if (*q == ':' || *q == '\0')
2189       {
2190         char c = *q;
2191         struct color_cap const *cap;
2192 
2193         *q++ = '\0'; /* Terminate name or val.  */
2194         /* Empty name without val (empty cap)
2195          * won't match and will be ignored.  */
2196         for (cap = color_dict; cap->name; cap++)
2197           if (STREQ (cap->name, name))
2198             break;
2199         /* If name unknown, go on for forward compatibility.  */
2200         if (cap->var && val)
2201           *(cap->var) = val;
2202         if (cap->fct)
2203           cap->fct ();
2204         if (c == '\0')
2205           return;
2206         name = q;
2207         val = NULL;
2208       }
2209     else if (*q == '=')
2210       {
2211         if (q == name || val)
2212           return;
2213         *q++ = '\0'; /* Terminate name.  */
2214         val = q; /* Can be the empty string.  */
2215       }
2216     else if (val == NULL)
2217       q++; /* Accumulate name.  */
2218     else if (*q == ';' || c_isdigit (*q))
2219       q++; /* Accumulate val.  Protect the terminal from being sent crap.  */
2220     else
2221       return;
2222 }
2223 
2224 /* Return true if PAT (of length PATLEN) contains an encoding error.  */
2225 static bool
contains_encoding_error(char const * pat,size_t patlen)2226 contains_encoding_error (char const *pat, size_t patlen)
2227 {
2228   mbstate_t mbs = { 0 };
2229   size_t i, charlen;
2230 
2231   for (i = 0; i < patlen; i += charlen)
2232     {
2233       charlen = mb_clen (pat + i, patlen - i, &mbs);
2234       if ((size_t) -2 <= charlen)
2235         return true;
2236     }
2237   return false;
2238 }
2239 
2240 /* When ignoring case and (-E or -F or -G), then for each single-byte
2241    character I, ok_fold[I] is 1 if every case folded counterpart of I
2242    is also single-byte, and is -1 otherwise.  */
2243 static signed char ok_fold[NCHAR];
2244 static void
setup_ok_fold(void)2245 setup_ok_fold (void)
2246 {
2247   for (int i = 0; i < NCHAR; i++)
2248     {
2249       wint_t wi = localeinfo.sbctowc[i];
2250       if (wi == WEOF)
2251         continue;
2252 
2253       int ok = 1;
2254       wchar_t folded[CASE_FOLDED_BUFSIZE];
2255       for (int n = case_folded_counterparts (wi, folded); 0 <= --n; )
2256         {
2257           char buf[MB_LEN_MAX];
2258           mbstate_t s = { 0 };
2259           if (wcrtomb (buf, folded[n], &s) != 1)
2260             {
2261               ok = -1;
2262               break;
2263             }
2264         }
2265       ok_fold[i] = ok;
2266     }
2267 }
2268 
2269 /* Return the number of bytes in the initial character of PAT, of size
2270    PATLEN, if Fcompile can handle that character.  Return -1 if
2271    Fcompile cannot handle it.  MBS is the multibyte conversion state.
2272    PATLEN must be nonzero.  */
2273 
2274 static int
fgrep_icase_charlen(char const * pat,size_t patlen,mbstate_t * mbs)2275 fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
2276 {
2277   unsigned char pat0 = pat[0];
2278 
2279   /* If PAT starts with a single-byte character, Fcompile works if
2280      every case folded counterpart is also single-byte.  */
2281   if (localeinfo.sbctowc[pat0] != WEOF)
2282     return ok_fold[pat0];
2283 
2284   wchar_t wc;
2285   size_t wn = mbrtowc (&wc, pat, patlen, mbs);
2286 
2287   /* If PAT starts with an encoding error, Fcompile does not work.  */
2288   if (MB_LEN_MAX < wn)
2289     return -1;
2290 
2291   /* PAT starts with a multibyte character.  Fcompile works if the
2292      character has no case folded counterparts and toupper translates
2293      none of its encoding's bytes.  */
2294   wchar_t folded[CASE_FOLDED_BUFSIZE];
2295   if (case_folded_counterparts (wc, folded))
2296     return -1;
2297   for (int i = wn; 0 < --i; )
2298     {
2299       unsigned char c = pat[i];
2300       if (toupper (c) != c)
2301         return -1;
2302     }
2303   return wn;
2304 }
2305 
2306 /* Return true if the -F patterns PAT, of size PATLEN, contain only
2307    single-byte characters that case-fold only to single-byte
2308    characters, or multibyte characters not subject to case folding,
2309    and so can be processed by Fcompile.  */
2310 
2311 static bool
fgrep_icase_available(char const * pat,size_t patlen)2312 fgrep_icase_available (char const *pat, size_t patlen)
2313 {
2314   mbstate_t mbs = {0,};
2315 
2316   for (size_t i = 0; i < patlen; )
2317     {
2318       int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
2319       if (n < 0)
2320         return false;
2321       i += n;
2322     }
2323 
2324   return true;
2325 }
2326 
2327 /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style.  */
2328 
2329 void
fgrep_to_grep_pattern(char ** keys_p,size_t * len_p)2330 fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
2331 {
2332   size_t len = *len_p;
2333   char *keys = *keys_p;
2334   mbstate_t mb_state = { 0 };
2335   char *new_keys = xnmalloc (len + 1, 2);
2336   char *p = new_keys;
2337   size_t n;
2338 
2339   for (; len; keys += n, len -= n)
2340     {
2341       n = mb_clen (keys, len, &mb_state);
2342       switch (n)
2343         {
2344         case (size_t) -2:
2345           n = len;
2346           FALLTHROUGH;
2347         default:
2348           p = mempcpy (p, keys, n);
2349           break;
2350 
2351         case (size_t) -1:
2352           memset (&mb_state, 0, sizeof mb_state);
2353           n = 1;
2354           FALLTHROUGH;
2355         case 1:
2356           switch (*keys)
2357             {
2358             case '$': case '*': case '.': case '[': case '\\': case '^':
2359               *p++ = '\\'; break;
2360             }
2361           *p++ = *keys;
2362           break;
2363         }
2364     }
2365 
2366   *p = '\n';
2367   free (*keys_p);
2368   *keys_p = new_keys;
2369   *len_p = p - new_keys;
2370 }
2371 
2372 /* If it is easy, convert the MATCHER-style patterns KEYS (of size
2373    *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
2374    return F_MATCHER_INDEX.  If not, leave KEYS and *LEN_P alone and
2375    return MATCHER.  This function is conservative and sometimes misses
2376    conversions, e.g., it does not convert the -E pattern "(a|a|[aa])"
2377    to the -F pattern "a".  */
2378 
2379 static int
try_fgrep_pattern(int matcher,char * keys,size_t * len_p)2380 try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
2381 {
2382   int result = matcher;
2383   size_t len = *len_p;
2384   char *new_keys = xmalloc (len + 1);
2385   char *p = new_keys;
2386   char const *q = keys;
2387   mbstate_t mb_state = { 0 };
2388 
2389   while (len != 0)
2390     {
2391       switch (*q)
2392         {
2393         case '$': case '*': case '.': case '[': case '^':
2394           goto fail;
2395 
2396         case '(': case '+': case '?': case '{': case '|':
2397           /* There is no "case ')'" here, as "grep -E ')'" acts like
2398              "grep -E '\)'".  */
2399           if (matcher != G_MATCHER_INDEX)
2400             goto fail;
2401           break;
2402 
2403         case '\\':
2404           if (1 < len)
2405             switch (q[1])
2406               {
2407               case '\n':
2408               case 'B': case 'S': case 'W': case'\'': case '<':
2409               case 'b': case 's': case 'w': case '`': case '>':
2410               case '1': case '2': case '3': case '4':
2411               case '5': case '6': case '7': case '8': case '9':
2412                 goto fail;
2413 
2414               case '(': case '+': case '?': case '{': case '|':
2415                 /* Pass '\)' to GEAcompile so it can complain.  Otherwise,
2416                    "grep '\)'" would act like "grep ')'" while "grep '.*\)'
2417                    would be an error.  */
2418               case ')':
2419                 if (matcher == G_MATCHER_INDEX)
2420                   goto fail;
2421                 FALLTHROUGH;
2422               default:
2423                 q++, len--;
2424                 break;
2425               }
2426           break;
2427         }
2428 
2429       {
2430         size_t n;
2431         if (match_icase)
2432           {
2433             int ni = fgrep_icase_charlen (q, len, &mb_state);
2434             if (ni < 0)
2435               goto fail;
2436             n = ni;
2437           }
2438         else
2439           {
2440             n = mb_clen (q, len, &mb_state);
2441             if (MB_LEN_MAX < n)
2442               goto fail;
2443           }
2444 
2445         p = mempcpy (p, q, n);
2446         q += n;
2447         len -= n;
2448       }
2449     }
2450 
2451   if (*len_p != p - new_keys)
2452     {
2453       *len_p = p - new_keys;
2454       char *keys_end = mempcpy (keys, new_keys, p - new_keys);
2455       *keys_end = '\n';
2456     }
2457   result = F_MATCHER_INDEX;
2458 
2459  fail:
2460   free (new_keys);
2461   return result;
2462 }
2463 
2464 int
main(int argc,char ** argv)2465 main (int argc, char **argv)
2466 {
2467   char *keys = NULL;
2468   size_t keycc = 0, keyalloc = 0;
2469   int matcher = -1;
2470   int opt;
2471   int prev_optind, last_recursive;
2472   int fread_errno;
2473   intmax_t default_context;
2474   FILE *fp;
2475   exit_failure = EXIT_TROUBLE;
2476   initialize_main (&argc, &argv);
2477 
2478   /* Which command-line options have been specified for filename output.
2479      -1 for -h, 1 for -H, 0 for neither.  */
2480   int filename_option = 0;
2481 
2482   eolbyte = '\n';
2483   filename_mask = ~0;
2484 
2485   max_count = INTMAX_MAX;
2486 
2487   /* The value -1 means to use DEFAULT_CONTEXT. */
2488   out_after = out_before = -1;
2489   /* Default before/after context: changed by -C/-NUM options */
2490   default_context = -1;
2491   /* Changed by -o option */
2492   only_matching = false;
2493 
2494   /* Internationalization. */
2495 #if defined HAVE_SETLOCALE
2496   setlocale (LC_ALL, "");
2497 #endif
2498 #if defined ENABLE_NLS
2499   bindtextdomain (PACKAGE, LOCALEDIR);
2500   textdomain (PACKAGE);
2501 #endif
2502 
2503   init_localeinfo (&localeinfo);
2504 
2505   atexit (clean_up_stdout);
2506   c_stack_action (NULL);
2507 
2508   last_recursive = 0;
2509 
2510   pattern_table = hash_initialize (0, 0, hash_pattern, compare_patterns, 0);
2511   if (!pattern_table)
2512     xalloc_die ();
2513 
2514   while (prev_optind = optind,
2515          (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
2516     switch (opt)
2517       {
2518       case 'A':
2519         context_length_arg (optarg, &out_after);
2520         break;
2521 
2522       case 'B':
2523         context_length_arg (optarg, &out_before);
2524         break;
2525 
2526       case 'C':
2527         /* Set output match context, but let any explicit leading or
2528            trailing amount specified with -A or -B stand. */
2529         context_length_arg (optarg, &default_context);
2530         break;
2531 
2532       case 'D':
2533         if (STREQ (optarg, "read"))
2534           devices = READ_DEVICES;
2535         else if (STREQ (optarg, "skip"))
2536           devices = SKIP_DEVICES;
2537         else
2538           die (EXIT_TROUBLE, 0, _("unknown devices method"));
2539         break;
2540 
2541       case 'E':
2542         matcher = setmatcher ("egrep", matcher);
2543         break;
2544 
2545       case 'F':
2546         matcher = setmatcher ("fgrep", matcher);
2547         break;
2548 
2549       case 'P':
2550         matcher = setmatcher ("perl", matcher);
2551         break;
2552 
2553       case 'G':
2554         matcher = setmatcher ("grep", matcher);
2555         break;
2556 
2557       case 'X': /* undocumented on purpose */
2558         matcher = setmatcher (optarg, matcher);
2559         break;
2560 
2561       case 'H':
2562         filename_option = 1;
2563         break;
2564 
2565       case 'I':
2566         binary_files = WITHOUT_MATCH_BINARY_FILES;
2567         break;
2568 
2569       case 'T':
2570         align_tabs = true;
2571         break;
2572 
2573       case 'U':
2574         if (O_BINARY)
2575           binary = true;
2576         break;
2577 
2578       case 'u':
2579         /* Obsolete option; it had no effect; FIXME: remove in 2023  */
2580         error (0, 0, _("warning: --unix-byte-offsets (-u) is obsolete"));
2581         break;
2582 
2583       case 'V':
2584         show_version = true;
2585         break;
2586 
2587       case 'a':
2588         binary_files = TEXT_BINARY_FILES;
2589         break;
2590 
2591       case 'b':
2592         out_byte = true;
2593         break;
2594 
2595       case 'c':
2596         count_matches = true;
2597         break;
2598 
2599       case 'd':
2600         directories = XARGMATCH ("--directories", optarg,
2601                                  directories_args, directories_types);
2602         if (directories == RECURSE_DIRECTORIES)
2603           last_recursive = prev_optind;
2604         break;
2605 
2606       case 'e':
2607         {
2608           ptrdiff_t cc = strlen (optarg);
2609           if (keyalloc < keycc + cc + 1)
2610             {
2611               keyalloc = keycc + cc + 1;
2612               pattern_array = keys = x2realloc (keys, &keyalloc);
2613             }
2614           char *keyend = mempcpy (keys + keycc, optarg, cc);
2615           *keyend = '\n';
2616           keycc = update_patterns (keys, keycc, keycc + cc + 1, "");
2617         }
2618         break;
2619 
2620       case 'f':
2621         {
2622           if (STREQ (optarg, "-"))
2623             {
2624               if (binary)
2625                 xset_binary_mode (STDIN_FILENO, O_BINARY);
2626               fp = stdin;
2627             }
2628           else
2629             {
2630               fp = fopen (optarg, binary ? "rb" : "r");
2631               if (!fp)
2632                 die (EXIT_TROUBLE, errno, "%s", optarg);
2633             }
2634           ptrdiff_t newkeycc = keycc, cc;
2635           for (;; newkeycc += cc)
2636             {
2637               if (keyalloc <= newkeycc + 1)
2638                 pattern_array = keys = x2realloc (keys, &keyalloc);
2639               cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp);
2640               if (cc == 0)
2641                 break;
2642             }
2643           fread_errno = errno;
2644           if (ferror (fp))
2645             die (EXIT_TROUBLE, fread_errno, "%s", optarg);
2646           if (fp != stdin)
2647             fclose (fp);
2648           /* Append final newline if file ended in non-newline. */
2649           if (newkeycc != keycc && keys[newkeycc - 1] != '\n')
2650             keys[newkeycc++] = '\n';
2651           keycc = update_patterns (keys, keycc, newkeycc, optarg);
2652         }
2653         break;
2654 
2655       case 'h':
2656         filename_option = -1;
2657         break;
2658 
2659       case 'i':
2660       case 'y':			/* For old-timers . . . */
2661         match_icase = true;
2662         break;
2663 
2664       case NO_IGNORE_CASE_OPTION:
2665         match_icase = false;
2666         break;
2667 
2668       case 'L':
2669         /* Like -l, except list files that don't contain matches.
2670            Inspired by the same option in Hume's gre. */
2671         list_files = LISTFILES_NONMATCHING;
2672         break;
2673 
2674       case 'l':
2675         list_files = LISTFILES_MATCHING;
2676         break;
2677 
2678       case 'm':
2679         switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2680           {
2681           case LONGINT_OK:
2682           case LONGINT_OVERFLOW:
2683             break;
2684 
2685           default:
2686             die (EXIT_TROUBLE, 0, _("invalid max count"));
2687           }
2688         break;
2689 
2690       case 'n':
2691         out_line = true;
2692         break;
2693 
2694       case 'o':
2695         only_matching = true;
2696         break;
2697 
2698       case 'q':
2699         exit_on_match = true;
2700         exit_failure = 0;
2701         break;
2702 
2703       case 'R':
2704         fts_options = basic_fts_options | FTS_LOGICAL;
2705         FALLTHROUGH;
2706       case 'r':
2707         directories = RECURSE_DIRECTORIES;
2708         last_recursive = prev_optind;
2709         break;
2710 
2711       case 's':
2712         suppress_errors = true;
2713         break;
2714 
2715       case 'v':
2716         out_invert = true;
2717         break;
2718 
2719       case 'w':
2720         wordinit ();
2721         match_words = true;
2722         break;
2723 
2724       case 'x':
2725         match_lines = true;
2726         break;
2727 
2728       case 'Z':
2729         filename_mask = 0;
2730         break;
2731 
2732       case 'z':
2733         eolbyte = '\0';
2734         break;
2735 
2736       case BINARY_FILES_OPTION:
2737         if (STREQ (optarg, "binary"))
2738           binary_files = BINARY_BINARY_FILES;
2739         else if (STREQ (optarg, "text"))
2740           binary_files = TEXT_BINARY_FILES;
2741         else if (STREQ (optarg, "without-match"))
2742           binary_files = WITHOUT_MATCH_BINARY_FILES;
2743         else
2744           die (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2745         break;
2746 
2747       case COLOR_OPTION:
2748         if (optarg)
2749           {
2750             if (!c_strcasecmp (optarg, "always")
2751                 || !c_strcasecmp (optarg, "yes")
2752                 || !c_strcasecmp (optarg, "force"))
2753               color_option = 1;
2754             else if (!c_strcasecmp (optarg, "never")
2755                      || !c_strcasecmp (optarg, "no")
2756                      || !c_strcasecmp (optarg, "none"))
2757               color_option = 0;
2758             else if (!c_strcasecmp (optarg, "auto")
2759                      || !c_strcasecmp (optarg, "tty")
2760                      || !c_strcasecmp (optarg, "if-tty"))
2761               color_option = 2;
2762             else
2763               show_help = 1;
2764           }
2765         else
2766           color_option = 2;
2767         break;
2768 
2769       case EXCLUDE_OPTION:
2770       case INCLUDE_OPTION:
2771         for (int cmd = 0; cmd < 2; cmd++)
2772           {
2773             if (!excluded_patterns[cmd])
2774               excluded_patterns[cmd] = new_exclude ();
2775             add_exclude (excluded_patterns[cmd], optarg,
2776                          ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
2777                           | exclude_options (cmd)));
2778           }
2779         break;
2780       case EXCLUDE_FROM_OPTION:
2781         for (int cmd = 0; cmd < 2; cmd++)
2782           {
2783             if (!excluded_patterns[cmd])
2784               excluded_patterns[cmd] = new_exclude ();
2785             if (add_exclude_file (add_exclude, excluded_patterns[cmd],
2786                                   optarg, exclude_options (cmd), '\n')
2787                 != 0)
2788               die (EXIT_TROUBLE, errno, "%s", optarg);
2789           }
2790         break;
2791 
2792       case EXCLUDE_DIRECTORY_OPTION:
2793         strip_trailing_slashes (optarg);
2794         for (int cmd = 0; cmd < 2; cmd++)
2795           {
2796             if (!excluded_directory_patterns[cmd])
2797               excluded_directory_patterns[cmd] = new_exclude ();
2798             add_exclude (excluded_directory_patterns[cmd], optarg,
2799                          exclude_options (cmd));
2800           }
2801         break;
2802 
2803       case GROUP_SEPARATOR_OPTION:
2804         group_separator = optarg;
2805         break;
2806 
2807       case LINE_BUFFERED_OPTION:
2808         line_buffered = true;
2809         break;
2810 
2811       case LABEL_OPTION:
2812         label = optarg;
2813         break;
2814 
2815       case 0:
2816         /* long options */
2817         break;
2818 
2819       default:
2820         usage (EXIT_TROUBLE);
2821         break;
2822 
2823       }
2824 
2825   if (show_version)
2826     {
2827       version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
2828                    (char *) NULL);
2829       puts (_("Written by Mike Haertel and others; see\n"
2830               "<https://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>."));
2831       return EXIT_SUCCESS;
2832     }
2833 
2834   if (show_help)
2835     usage (EXIT_SUCCESS);
2836 
2837   if (keys)
2838     {
2839       if (keycc == 0)
2840         {
2841           /* No keys were specified (e.g. -f /dev/null).  Match nothing.  */
2842           out_invert ^= true;
2843           match_lines = match_words = false;
2844           keys[keycc++] = '\n';
2845         }
2846     }
2847   else if (optind < argc)
2848     {
2849       /* Make a copy so that it can be reallocated or freed later.  */
2850       pattern_array = keys = xstrdup (argv[optind++]);
2851       ptrdiff_t patlen = strlen (keys);
2852       keys[patlen] = '\n';
2853       keycc = update_patterns (keys, 0, patlen + 1, "");
2854     }
2855   else
2856     usage (EXIT_TROUBLE);
2857 
2858   /* Strip trailing newline from keys.  */
2859   keycc--;
2860 
2861   hash_free (pattern_table);
2862 
2863   bool possibly_tty = false;
2864   struct stat tmp_stat;
2865   if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
2866     {
2867       if (S_ISREG (tmp_stat.st_mode))
2868         out_stat = tmp_stat;
2869       else if (S_ISCHR (tmp_stat.st_mode))
2870         {
2871           struct stat null_stat;
2872           if (stat ("/dev/null", &null_stat) == 0
2873               && SAME_INODE (tmp_stat, null_stat))
2874             dev_null_output = true;
2875           else
2876             possibly_tty = true;
2877         }
2878     }
2879 
2880   /* POSIX says -c, -l and -q are mutually exclusive.  In this
2881      implementation, -q overrides -l and -L, which in turn override -c.  */
2882   if (exit_on_match | dev_null_output)
2883     list_files = LISTFILES_NONE;
2884   if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
2885     {
2886       count_matches = false;
2887       done_on_match = true;
2888     }
2889   out_quiet = count_matches | done_on_match;
2890 
2891   if (out_after < 0)
2892     out_after = default_context;
2893   if (out_before < 0)
2894     out_before = default_context;
2895 
2896   /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
2897      /dev/null'), fail without reading the input.  */
2898   if ((max_count == 0
2899        || (keycc == 0 && out_invert && !match_lines && !match_words))
2900       && list_files != LISTFILES_NONMATCHING)
2901     return EXIT_FAILURE;
2902 
2903   if (color_option == 2)
2904     color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
2905   init_colorize ();
2906 
2907   if (color_option)
2908     {
2909       /* Legacy.  */
2910       char *userval = getenv ("GREP_COLOR");
2911       if (userval != NULL && *userval != '\0')
2912         selected_match_color = context_match_color = userval;
2913 
2914       /* New GREP_COLORS has priority.  */
2915       parse_grep_colors ();
2916     }
2917 
2918   initialize_unibyte_mask ();
2919 
2920   if (matcher < 0)
2921     matcher = G_MATCHER_INDEX;
2922 
2923   if (matcher == F_MATCHER_INDEX
2924       || matcher == E_MATCHER_INDEX || matcher == G_MATCHER_INDEX)
2925     {
2926       if (match_icase)
2927         setup_ok_fold ();
2928 
2929       /* In a single-byte locale, switch from -F to -G if it is a single
2930          pattern that matches words, where -G is typically faster.  In a
2931          multibyte locale, switch if the patterns have an encoding error
2932          (where -F does not work) or if -i and the patterns will not work
2933          for -iF.  */
2934       if (matcher == F_MATCHER_INDEX)
2935         {
2936           if (! localeinfo.multibyte
2937               ? n_patterns == 1 && match_words
2938               : (contains_encoding_error (keys, keycc)
2939                  || (match_icase && !fgrep_icase_available (keys, keycc))))
2940             {
2941               fgrep_to_grep_pattern (&pattern_array, &keycc);
2942               keys = pattern_array;
2943               matcher = G_MATCHER_INDEX;
2944             }
2945         }
2946       /* With two or more patterns, if -F works then switch from either -E
2947          or -G, as -F is probably faster then.  */
2948       else if (1 < n_patterns)
2949         matcher = try_fgrep_pattern (matcher, keys, &keycc);
2950     }
2951 
2952   execute = matchers[matcher].execute;
2953   compiled_pattern =
2954     matchers[matcher].compile (keys, keycc, matchers[matcher].syntax,
2955                                only_matching | color_option);
2956   /* We need one byte prior and one after.  */
2957   char eolbytes[3] = { 0, eolbyte, 0 };
2958   size_t match_size;
2959   skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
2960                                 &match_size, NULL) == 0)
2961                       == out_invert);
2962 
2963   int num_operands = argc - optind;
2964   out_file = (filename_option == 0 && num_operands <= 1
2965               ? - (directories == RECURSE_DIRECTORIES)
2966               : 0 <= filename_option);
2967 
2968   if (binary)
2969     xset_binary_mode (STDOUT_FILENO, O_BINARY);
2970 
2971   /* Prefer sysconf for page size, as getpagesize typically returns int.  */
2972 #ifdef _SC_PAGESIZE
2973   long psize = sysconf (_SC_PAGESIZE);
2974 #else
2975   long psize = getpagesize ();
2976 #endif
2977   if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
2978     abort ();
2979   pagesize = psize;
2980   bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
2981   buffer = xmalloc (bufalloc);
2982 
2983   if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2984     devices = READ_DEVICES;
2985 
2986   char *const *files;
2987   if (0 < num_operands)
2988     {
2989       files = argv + optind;
2990     }
2991   else if (directories == RECURSE_DIRECTORIES && 0 < last_recursive)
2992     {
2993       static char *const cwd_only[] = { (char *) ".", NULL };
2994       files = cwd_only;
2995       omit_dot_slash = true;
2996     }
2997   else
2998     {
2999       static char *const stdin_only[] = { (char *) "-", NULL };
3000       files = stdin_only;
3001     }
3002 
3003   bool status = true;
3004   do
3005     status &= grep_command_line_arg (*files++);
3006   while (*files != NULL);
3007 
3008   /* We register via atexit to test stdout.  */
3009   return errseen ? EXIT_TROUBLE : status;
3010 }
3011