1 /* grep.c - main driver file for grep.
2 Copyright (C) 1992, 1997-2002, 2004-2021 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19 /* Written July 1992 by Mike Haertel. */
20
21 #include <config.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <wchar.h>
25 #include <inttypes.h>
26 #include <stdarg.h>
27 #include <stdint.h>
28 #include <stdio.h>
29 #include "system.h"
30
31 #include "argmatch.h"
32 #include "c-ctype.h"
33 #include "c-stack.h"
34 #include "closeout.h"
35 #include "colorize.h"
36 #include "die.h"
37 #include "error.h"
38 #include "exclude.h"
39 #include "exitfail.h"
40 #include "fcntl-safer.h"
41 #include "fts_.h"
42 #include "getopt.h"
43 #include "getprogname.h"
44 #include "grep.h"
45 #include "hash.h"
46 #include "intprops.h"
47 #include "propername.h"
48 #include "safe-read.h"
49 #include "search.h"
50 #include "c-strcase.h"
51 #include "version-etc.h"
52 #include "xalloc.h"
53 #include "xbinary-io.h"
54 #include "xstrtol.h"
55
56 enum { SEP_CHAR_SELECTED = ':' };
57 enum { SEP_CHAR_REJECTED = '-' };
58 static char const SEP_STR_GROUP[] = "--";
59
60 /* When stdout is connected to a regular file, save its stat
61 information here, so that we can automatically skip it, thus
62 avoiding a potential (racy) infinite loop. */
63 static struct stat out_stat;
64
65 /* if non-zero, display usage information and exit */
66 static int show_help;
67
68 /* Print the version on standard output and exit. */
69 static bool show_version;
70
71 /* Suppress diagnostics for nonexistent or unreadable files. */
72 static bool suppress_errors;
73
74 /* If nonzero, use color markers. */
75 static int color_option;
76
77 /* Show only the part of a line matching the expression. */
78 static bool only_matching;
79
80 /* If nonzero, make sure first content char in a line is on a tab stop. */
81 static bool align_tabs;
82
83 /* Print width of line numbers and byte offsets. Nonzero if ALIGN_TABS. */
84 static int offset_width;
85
86 /* An entry in the PATLOC array saying where patterns came from. */
87 struct patloc
88 {
89 /* Line number of the pattern in PATTERN_ARRAY. Line numbers
90 start at 0, and each pattern is terminated by '\n'. */
91 ptrdiff_t lineno;
92
93 /* Input location of the pattern. The FILENAME "-" represents
94 standard input, and "" represents the command line. FILELINE is
95 origin-1 for files and is irrelevant for the command line. */
96 char const *filename;
97 ptrdiff_t fileline;
98 };
99
100 /* The array of pattern locations. The concatenation of all patterns
101 is stored in a single array, KEYS. Given the invocation
102 'grep -f <(seq 5) -f <(seq 6) -f <(seq 3)', there will initially be
103 28 bytes in KEYS. After duplicate patterns are removed, KEYS
104 will have 12 bytes and PATLOC will be {0,x,1}, {10,y,1}
105 where x, y and z are just place-holders for shell-generated names
106 since and z is omitted as it contains only duplicates. Sometimes
107 removing duplicates will grow PATLOC, since each run of
108 removed patterns not at a file start or end requires another
109 PATLOC entry for the first non-removed pattern. */
110 static struct patloc *patloc;
111 static size_t patlocs_allocated, patlocs_used;
112
113 /* Pointer to the array of patterns, each terminated by newline. */
114 static char *pattern_array;
115
116 /* The number of unique patterns seen so far. */
117 static size_t n_patterns;
118
119 /* Hash table of patterns seen so far. */
120 static Hash_table *pattern_table;
121
122 /* Hash and compare newline-terminated patterns for textual equality.
123 Patterns are represented by origin-1 offsets into PATTERN_ARRAY,
124 cast to void *. The origin-1 is so that the first pattern offset
125 does not appear to be a null pointer when cast to void *. */
126 static size_t _GL_ATTRIBUTE_PURE
hash_pattern(void const * pat,size_t n_buckets)127 hash_pattern (void const *pat, size_t n_buckets)
128 {
129 size_t h = 0;
130 intptr_t pat_offset = (intptr_t) pat - 1;
131 unsigned char const *s = (unsigned char const *) pattern_array + pat_offset;
132 for ( ; *s != '\n'; s++)
133 h = h * 33 ^ *s;
134 return h % n_buckets;
135 }
136 static bool _GL_ATTRIBUTE_PURE
compare_patterns(void const * a,void const * b)137 compare_patterns (void const *a, void const *b)
138 {
139 intptr_t a_offset = (intptr_t) a - 1;
140 intptr_t b_offset = (intptr_t) b - 1;
141 char const *p = pattern_array + a_offset;
142 char const *q = pattern_array + b_offset;
143 for (; *p == *q; p++, q++)
144 if (*p == '\n')
145 return true;
146 return false;
147 }
148
149 /* Update KEYS to remove duplicate patterns, and return the number of
150 bytes in the resulting KEYS. KEYS contains a sequence of patterns
151 each terminated by '\n'. The first DUPFREE_SIZE bytes are a
152 sequence of patterns with no duplicates; SIZE is the total number
153 of bytes in KEYS. If some patterns past the first DUPFREE_SIZE
154 bytes are not duplicates, update PATLOCS accordingly. */
155 static ptrdiff_t
update_patterns(char * keys,ptrdiff_t dupfree_size,ptrdiff_t size,char const * filename)156 update_patterns (char *keys, ptrdiff_t dupfree_size, ptrdiff_t size,
157 char const *filename)
158 {
159 char *dst = keys + dupfree_size;
160 ptrdiff_t fileline = 1;
161 int prev_inserted = 0;
162
163 char const *srclim = keys + size;
164 ptrdiff_t patsize;
165 for (char const *src = keys + dupfree_size; src < srclim; src += patsize)
166 {
167 char const *patend = rawmemchr (src, '\n');
168 patsize = patend + 1 - src;
169 memmove (dst, src, patsize);
170
171 intptr_t dst_offset_1 = dst - keys + 1;
172 int inserted = hash_insert_if_absent (pattern_table,
173 (void *) dst_offset_1, NULL);
174 if (inserted)
175 {
176 if (inserted < 0)
177 xalloc_die ();
178 dst += patsize;
179
180 /* Add a PATLOCS entry unless this input line is simply the
181 next one in the same file. */
182 if (!prev_inserted)
183 {
184 if (patlocs_used == patlocs_allocated)
185 patloc = x2nrealloc (patloc, &patlocs_allocated,
186 sizeof *patloc);
187 patloc[patlocs_used++]
188 = (struct patloc) { .lineno = n_patterns,
189 .filename = filename,
190 .fileline = fileline };
191 }
192 n_patterns++;
193 }
194
195 prev_inserted = inserted;
196 fileline++;
197 }
198
199 return dst - keys;
200 }
201
202 /* Map LINENO, the origin-0 line number of one of the input patterns,
203 to the name of the file from which it came. Return "-" if it was
204 read from stdin, "" if it was specified on the command line.
205 Set *NEW_LINENO to the origin-1 line number of PATTERN in the file,
206 or to an unspecified value if PATTERN came from the command line. */
207 char const * _GL_ATTRIBUTE_PURE
pattern_file_name(size_t lineno,size_t * new_lineno)208 pattern_file_name (size_t lineno, size_t *new_lineno)
209 {
210 ptrdiff_t i;
211 for (i = 1; i < patlocs_used; i++)
212 if (lineno < patloc[i].lineno)
213 break;
214 *new_lineno = lineno - patloc[i - 1].lineno + patloc[i - 1].fileline;
215 return patloc[i - 1].filename;
216 }
217
218 #if HAVE_ASAN
219 /* Record the starting address and length of the sole poisoned region,
220 so that we can unpoison it later, just before each following read. */
221 static void const *poison_buf;
222 static size_t poison_len;
223
224 static void
clear_asan_poison(void)225 clear_asan_poison (void)
226 {
227 if (poison_buf)
228 __asan_unpoison_memory_region (poison_buf, poison_len);
229 }
230
231 static void
asan_poison(void const * addr,size_t size)232 asan_poison (void const *addr, size_t size)
233 {
234 poison_buf = addr;
235 poison_len = size;
236
237 __asan_poison_memory_region (poison_buf, poison_len);
238 }
239 #else
clear_asan_poison(void)240 static void clear_asan_poison (void) { }
asan_poison(void const volatile * addr,size_t size)241 static void asan_poison (void const volatile *addr, size_t size) { }
242 #endif
243
244 /* The group separator used when context is requested. */
245 static const char *group_separator = SEP_STR_GROUP;
246
247 /* The context and logic for choosing default --color screen attributes
248 (foreground and background colors, etc.) are the following.
249 -- There are eight basic colors available, each with its own
250 nominal luminosity to the human eye and foreground/background
251 codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
252 magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
253 yellow [89 %, 33/43], and white [100 %, 37/47]).
254 -- Sometimes, white as a background is actually implemented using
255 a shade of light gray, so that a foreground white can be visible
256 on top of it (but most often not).
257 -- Sometimes, black as a foreground is actually implemented using
258 a shade of dark gray, so that it can be visible on top of a
259 background black (but most often not).
260 -- Sometimes, more colors are available, as extensions.
261 -- Other attributes can be selected/deselected (bold [1/22],
262 underline [4/24], standout/inverse [7/27], blink [5/25], and
263 invisible/hidden [8/28]). They are sometimes implemented by
264 using colors instead of what their names imply; e.g., bold is
265 often achieved by using brighter colors. In practice, only bold
266 is really available to us, underline sometimes being mapped by
267 the terminal to some strange color choice, and standout best
268 being left for use by downstream programs such as less(1).
269 -- We cannot assume that any of the extensions or special features
270 are available for the purpose of choosing defaults for everyone.
271 -- The most prevalent default terminal backgrounds are pure black
272 and pure white, and are not necessarily the same shades of
273 those as if they were selected explicitly with SGR sequences.
274 Some terminals use dark or light pictures as default background,
275 but those are covered over by an explicit selection of background
276 color with an SGR sequence; their users will appreciate their
277 background pictures not be covered like this, if possible.
278 -- Some uses of colors attributes is to make some output items
279 more understated (e.g., context lines); this cannot be achieved
280 by changing the background color.
281 -- For these reasons, the grep color defaults should strive not
282 to change the background color from its default, unless it's
283 for a short item that should be highlighted, not understated.
284 -- The grep foreground color defaults (without an explicitly set
285 background) should provide enough contrast to be readable on any
286 terminal with either a black (dark) or white (light) background.
287 This only leaves red, magenta, green, and cyan (and their bold
288 counterparts) and possibly bold blue. */
289 /* The color strings used for matched text.
290 The user can overwrite them using the deprecated
291 environment variable GREP_COLOR or the new GREP_COLORS. */
292 static const char *selected_match_color = "01;31"; /* bold red */
293 static const char *context_match_color = "01;31"; /* bold red */
294
295 /* Other colors. Defaults look damn good. */
296 static const char *filename_color = "35"; /* magenta */
297 static const char *line_num_color = "32"; /* green */
298 static const char *byte_num_color = "32"; /* green */
299 static const char *sep_color = "36"; /* cyan */
300 static const char *selected_line_color = ""; /* default color pair */
301 static const char *context_line_color = ""; /* default color pair */
302
303 /* Select Graphic Rendition (SGR, "\33[...m") strings. */
304 /* Also Erase in Line (EL) to Right ("\33[K") by default. */
305 /* Why have EL to Right after SGR?
306 -- The behavior of line-wrapping when at the bottom of the
307 terminal screen and at the end of the current line is often
308 such that a new line is introduced, entirely cleared with
309 the current background color which may be different from the
310 default one (see the boolean back_color_erase terminfo(5)
311 capability), thus scrolling the display by one line.
312 The end of this new line will stay in this background color
313 even after reverting to the default background color with
314 "\33[m', unless it is explicitly cleared again with "\33[K"
315 (which is the behavior the user would instinctively expect
316 from the whole thing). There may be some unavoidable
317 background-color flicker at the end of this new line because
318 of this (when timing with the monitor's redraw is just right).
319 -- The behavior of HT (tab, "\t") is usually the same as that of
320 Cursor Forward Tabulation (CHT) with a default parameter
321 of 1 ("\33[I"), i.e., it performs pure movement to the next
322 tab stop, without any clearing of either content or screen
323 attributes (including background color); try
324 printf 'asdfqwerzxcv\rASDF\tZXCV\n'
325 in a bash(1) shell to demonstrate this. This is not what the
326 user would instinctively expect of HT (but is ok for CHT).
327 The instinctive behavior would include clearing the terminal
328 cells that are skipped over by HT with blank cells in the
329 current screen attributes, including background color;
330 the boolean dest_tabs_magic_smso terminfo(5) capability
331 indicates this saner behavior for HT, but only some rare
332 terminals have it (although it also indicates a special
333 glitch with standout mode in the Teleray terminal for which
334 it was initially introduced). The remedy is to add "\33K"
335 after each SGR sequence, be it START (to fix the behavior
336 of any HT after that before another SGR) or END (to fix the
337 behavior of an HT in default background color that would
338 follow a line-wrapping at the bottom of the screen in another
339 background color, and to complement doing it after START).
340 Piping grep's output through a pager such as less(1) avoids
341 any HT problems since the pager performs tab expansion.
342
343 Generic disadvantages of this remedy are:
344 -- Some very rare terminals might support SGR but not EL (nobody
345 will use "grep --color" on a terminal that does not support
346 SGR in the first place).
347 -- Having these extra control sequences might somewhat complicate
348 the task of any program trying to parse "grep --color"
349 output in order to extract structuring information from it.
350 A specific disadvantage to doing it after SGR START is:
351 -- Even more possible background color flicker (when timing
352 with the monitor's redraw is just right), even when not at the
353 bottom of the screen.
354 There are no additional disadvantages specific to doing it after
355 SGR END.
356
357 It would be impractical for GNU grep to become a full-fledged
358 terminal program linked against ncurses or the like, so it will
359 not detect terminfo(5) capabilities. */
360 static const char *sgr_start = "\33[%sm\33[K";
361 static const char *sgr_end = "\33[m\33[K";
362
363 /* SGR utility functions. */
364 static void
pr_sgr_start(char const * s)365 pr_sgr_start (char const *s)
366 {
367 if (*s)
368 print_start_colorize (sgr_start, s);
369 }
370 static void
pr_sgr_end(char const * s)371 pr_sgr_end (char const *s)
372 {
373 if (*s)
374 print_end_colorize (sgr_end);
375 }
376 static void
pr_sgr_start_if(char const * s)377 pr_sgr_start_if (char const *s)
378 {
379 if (color_option)
380 pr_sgr_start (s);
381 }
382 static void
pr_sgr_end_if(char const * s)383 pr_sgr_end_if (char const *s)
384 {
385 if (color_option)
386 pr_sgr_end (s);
387 }
388
389 struct color_cap
390 {
391 const char *name;
392 const char **var;
393 void (*fct) (void);
394 };
395
396 static void
color_cap_mt_fct(void)397 color_cap_mt_fct (void)
398 {
399 /* Our caller just set selected_match_color. */
400 context_match_color = selected_match_color;
401 }
402
403 static void
color_cap_rv_fct(void)404 color_cap_rv_fct (void)
405 {
406 /* By this point, it was 1 (or already -1). */
407 color_option = -1; /* That's still != 0. */
408 }
409
410 static void
color_cap_ne_fct(void)411 color_cap_ne_fct (void)
412 {
413 sgr_start = "\33[%sm";
414 sgr_end = "\33[m";
415 }
416
417 /* For GREP_COLORS. */
418 static const struct color_cap color_dict[] =
419 {
420 { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
421 { "ms", &selected_match_color, NULL }, /* selected matched text */
422 { "mc", &context_match_color, NULL }, /* context matched text */
423 { "fn", &filename_color, NULL }, /* filename */
424 { "ln", &line_num_color, NULL }, /* line number */
425 { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
426 { "se", &sep_color, NULL }, /* separator */
427 { "sl", &selected_line_color, NULL }, /* selected lines */
428 { "cx", &context_line_color, NULL }, /* context lines */
429 { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
430 { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
431 { NULL, NULL, NULL }
432 };
433
434 /* Saved errno value from failed output functions on stdout. */
435 static int stdout_errno;
436
437 static void
putchar_errno(int c)438 putchar_errno (int c)
439 {
440 if (putchar (c) < 0)
441 stdout_errno = errno;
442 }
443
444 static void
fputs_errno(char const * s)445 fputs_errno (char const *s)
446 {
447 if (fputs (s, stdout) < 0)
448 stdout_errno = errno;
449 }
450
451 static void _GL_ATTRIBUTE_FORMAT_PRINTF_STANDARD (1, 2)
printf_errno(char const * format,...)452 printf_errno (char const *format, ...)
453 {
454 va_list ap;
455 va_start (ap, format);
456 if (vfprintf (stdout, format, ap) < 0)
457 stdout_errno = errno;
458 va_end (ap);
459 }
460
461 static void
fwrite_errno(void const * ptr,size_t size,size_t nmemb)462 fwrite_errno (void const *ptr, size_t size, size_t nmemb)
463 {
464 if (fwrite (ptr, size, nmemb, stdout) != nmemb)
465 stdout_errno = errno;
466 }
467
468 static void
fflush_errno(void)469 fflush_errno (void)
470 {
471 if (fflush (stdout) != 0)
472 stdout_errno = errno;
473 }
474
475 static struct exclude *excluded_patterns[2];
476 static struct exclude *excluded_directory_patterns[2];
477 /* Short options. */
478 static char const short_options[] =
479 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
480
481 /* Non-boolean long options that have no corresponding short equivalents. */
482 enum
483 {
484 BINARY_FILES_OPTION = CHAR_MAX + 1,
485 COLOR_OPTION,
486 EXCLUDE_DIRECTORY_OPTION,
487 EXCLUDE_OPTION,
488 EXCLUDE_FROM_OPTION,
489 GROUP_SEPARATOR_OPTION,
490 INCLUDE_OPTION,
491 LINE_BUFFERED_OPTION,
492 LABEL_OPTION,
493 NO_IGNORE_CASE_OPTION
494 };
495
496 /* Long options equivalences. */
497 static struct option const long_options[] =
498 {
499 {"basic-regexp", no_argument, NULL, 'G'},
500 {"extended-regexp", no_argument, NULL, 'E'},
501 {"fixed-regexp", no_argument, NULL, 'F'},
502 {"fixed-strings", no_argument, NULL, 'F'},
503 {"perl-regexp", no_argument, NULL, 'P'},
504 {"after-context", required_argument, NULL, 'A'},
505 {"before-context", required_argument, NULL, 'B'},
506 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
507 {"byte-offset", no_argument, NULL, 'b'},
508 {"context", required_argument, NULL, 'C'},
509 {"color", optional_argument, NULL, COLOR_OPTION},
510 {"colour", optional_argument, NULL, COLOR_OPTION},
511 {"count", no_argument, NULL, 'c'},
512 {"devices", required_argument, NULL, 'D'},
513 {"directories", required_argument, NULL, 'd'},
514 {"exclude", required_argument, NULL, EXCLUDE_OPTION},
515 {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
516 {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
517 {"file", required_argument, NULL, 'f'},
518 {"files-with-matches", no_argument, NULL, 'l'},
519 {"files-without-match", no_argument, NULL, 'L'},
520 {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
521 {"help", no_argument, &show_help, 1},
522 {"include", required_argument, NULL, INCLUDE_OPTION},
523 {"ignore-case", no_argument, NULL, 'i'},
524 {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
525 {"initial-tab", no_argument, NULL, 'T'},
526 {"label", required_argument, NULL, LABEL_OPTION},
527 {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
528 {"line-number", no_argument, NULL, 'n'},
529 {"line-regexp", no_argument, NULL, 'x'},
530 {"max-count", required_argument, NULL, 'm'},
531
532 {"no-filename", no_argument, NULL, 'h'},
533 {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
534 {"no-messages", no_argument, NULL, 's'},
535 {"null", no_argument, NULL, 'Z'},
536 {"null-data", no_argument, NULL, 'z'},
537 {"only-matching", no_argument, NULL, 'o'},
538 {"quiet", no_argument, NULL, 'q'},
539 {"recursive", no_argument, NULL, 'r'},
540 {"dereference-recursive", no_argument, NULL, 'R'},
541 {"regexp", required_argument, NULL, 'e'},
542 {"invert-match", no_argument, NULL, 'v'},
543 {"silent", no_argument, NULL, 'q'},
544 {"text", no_argument, NULL, 'a'},
545 {"binary", no_argument, NULL, 'U'},
546 {"unix-byte-offsets", no_argument, NULL, 'u'},
547 {"version", no_argument, NULL, 'V'},
548 {"with-filename", no_argument, NULL, 'H'},
549 {"word-regexp", no_argument, NULL, 'w'},
550 {0, 0, 0, 0}
551 };
552
553 /* Define flags declared in grep.h. */
554 bool match_icase;
555 bool match_words;
556 bool match_lines;
557 char eolbyte;
558
559 /* For error messages. */
560 /* The input file name, or (if standard input) null or a --label argument. */
561 static char const *filename;
562 /* Omit leading "./" from file names in diagnostics. */
563 static bool omit_dot_slash;
564 static bool errseen;
565
566 /* True if output from the current input file has been suppressed
567 because an output line had an encoding error. */
568 static bool encoding_error_output;
569
570 enum directories_type
571 {
572 READ_DIRECTORIES = 2,
573 RECURSE_DIRECTORIES,
574 SKIP_DIRECTORIES
575 };
576
577 /* How to handle directories. */
578 static char const *const directories_args[] =
579 {
580 "read", "recurse", "skip", NULL
581 };
582 static enum directories_type const directories_types[] =
583 {
584 READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
585 };
586 ARGMATCH_VERIFY (directories_args, directories_types);
587
588 static enum directories_type directories = READ_DIRECTORIES;
589
590 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
591 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
592
593 /* How to handle devices. */
594 static enum
595 {
596 READ_COMMAND_LINE_DEVICES,
597 READ_DEVICES,
598 SKIP_DEVICES
599 } devices = READ_COMMAND_LINE_DEVICES;
600
601 static bool grepfile (int, char const *, bool, bool);
602 static bool grepdesc (int, bool);
603
604 static bool
is_device_mode(mode_t m)605 is_device_mode (mode_t m)
606 {
607 return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
608 }
609
610 static bool
skip_devices(bool command_line)611 skip_devices (bool command_line)
612 {
613 return (devices == SKIP_DEVICES
614 || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line));
615 }
616
617 /* Return if ST->st_size is defined. Assume the file is not a
618 symbolic link. */
619 static bool
usable_st_size(struct stat const * st)620 usable_st_size (struct stat const *st)
621 {
622 return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
623 }
624
625 /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
626 Do not rely on these finding data or holes if they equal SEEK_SET. */
627 #ifndef SEEK_DATA
628 enum { SEEK_DATA = SEEK_SET };
629 #endif
630 #ifndef SEEK_HOLE
631 enum { SEEK_HOLE = SEEK_SET };
632 #endif
633
634 /* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */
635 static bool seek_failed;
636 static bool seek_data_failed;
637
638 /* Functions we'll use to search. */
639 typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t, bool);
640 typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
641 char const *);
642 static execute_fp_t execute;
643 static void *compiled_pattern;
644
645 char const *
input_filename(void)646 input_filename (void)
647 {
648 if (!filename)
649 filename = _("(standard input)");
650 return filename;
651 }
652
653 /* Unless requested, diagnose an error about the input file. */
654 static void
suppressible_error(int errnum)655 suppressible_error (int errnum)
656 {
657 if (! suppress_errors)
658 error (0, errnum, "%s", input_filename ());
659 errseen = true;
660 }
661
662 /* If there has already been a write error, don't bother closing
663 standard output, as that might elicit a duplicate diagnostic. */
664 static void
clean_up_stdout(void)665 clean_up_stdout (void)
666 {
667 if (! stdout_errno)
668 close_stdout ();
669 }
670
671 /* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL
672 is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
673 the alignment and would otherwise complain about the cast. */
674 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
675 # define CAST_ALIGNED(type, val) \
676 ({ __typeof__ (val) val_ = val; \
677 _Pragma ("GCC diagnostic push") \
678 _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
679 (type) val_; \
680 _Pragma ("GCC diagnostic pop") \
681 })
682 #else
683 # define CAST_ALIGNED(type, val) ((type) (val))
684 #endif
685
686 /* An unsigned type suitable for fast matching. */
687 typedef uintmax_t uword;
688 static uword const uword_max = UINTMAX_MAX;
689
690 struct localeinfo localeinfo;
691
692 /* A mask to test for unibyte characters, with the pattern repeated to
693 fill a uword. For a multibyte character encoding where
694 all bytes are unibyte characters, this is 0. For UTF-8, this is
695 0x808080.... For encodings where unibyte characters have no discerned
696 pattern, this is all 1s. The unsigned char C is a unibyte
697 character if C & UNIBYTE_MASK is zero. If the uword W is the
698 concatenation of bytes, the bytes are all unibyte characters
699 if W & UNIBYTE_MASK is zero. */
700 static uword unibyte_mask;
701
702 static void
initialize_unibyte_mask(void)703 initialize_unibyte_mask (void)
704 {
705 /* For each encoding error I that MASK does not already match,
706 accumulate I's most significant 1 bit by ORing it into MASK.
707 Although any 1 bit of I could be used, in practice high-order
708 bits work better. */
709 unsigned char mask = 0;
710 int ms1b = 1;
711 for (int i = 1; i <= UCHAR_MAX; i++)
712 if ((localeinfo.sbclen[i] != 1) & ! (mask & i))
713 {
714 while (ms1b * 2 <= i)
715 ms1b *= 2;
716 mask |= ms1b;
717 }
718
719 /* Now MASK will detect any encoding-error byte, although it may
720 cry wolf and it may not be optimal. Build a uword-length mask by
721 repeating MASK. */
722 unibyte_mask = uword_max / UCHAR_MAX * mask;
723 }
724
725 /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
726 that is not easy, and return a pointer to the first non-easy byte.
727 The easy bytes all have UNIBYTE_MASK off. */
728 static char const * _GL_ATTRIBUTE_PURE
skip_easy_bytes(char const * buf)729 skip_easy_bytes (char const *buf)
730 {
731 /* Search a byte at a time until the pointer is aligned, then a
732 uword at a time until a match is found, then a byte at a time to
733 identify the exact byte. The uword search may go slightly past
734 the buffer end, but that's benign. */
735 char const *p;
736 uword const *s;
737 for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
738 if (to_uchar (*p) & unibyte_mask)
739 return p;
740 for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
741 continue;
742 for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
743 continue;
744 return p;
745 }
746
747 /* Return true if BUF, of size SIZE, has an encoding error.
748 BUF must be followed by at least sizeof (uword) bytes,
749 the first of which may be modified. */
750 static bool
buf_has_encoding_errors(char * buf,size_t size)751 buf_has_encoding_errors (char *buf, size_t size)
752 {
753 if (! unibyte_mask)
754 return false;
755
756 mbstate_t mbs = { 0 };
757 size_t clen;
758
759 buf[size] = -1;
760 for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
761 {
762 clen = mbrlen (p, buf + size - p, &mbs);
763 if ((size_t) -2 <= clen)
764 return true;
765 }
766
767 return false;
768 }
769
770
771 /* Return true if BUF, of size SIZE, has a null byte.
772 BUF must be followed by at least one byte,
773 which may be arbitrarily written to or read from. */
774 static bool
buf_has_nulls(char * buf,size_t size)775 buf_has_nulls (char *buf, size_t size)
776 {
777 buf[size] = 0;
778 return strlen (buf) != size;
779 }
780
781 /* Return true if a file is known to contain null bytes.
782 SIZE bytes have already been read from the file
783 with descriptor FD and status ST. */
784 static bool
file_must_have_nulls(size_t size,int fd,struct stat const * st)785 file_must_have_nulls (size_t size, int fd, struct stat const *st)
786 {
787 /* If the file has holes, it must contain a null byte somewhere. */
788 if (SEEK_HOLE != SEEK_SET && !seek_failed
789 && usable_st_size (st) && size < st->st_size)
790 {
791 off_t cur = size;
792 if (O_BINARY || fd == STDIN_FILENO)
793 {
794 cur = lseek (fd, 0, SEEK_CUR);
795 if (cur < 0)
796 return false;
797 }
798
799 /* Look for a hole after the current location. */
800 off_t hole_start = lseek (fd, cur, SEEK_HOLE);
801 if (0 <= hole_start)
802 {
803 if (lseek (fd, cur, SEEK_SET) < 0)
804 suppressible_error (errno);
805 if (hole_start < st->st_size)
806 return true;
807 }
808 }
809
810 return false;
811 }
812
813 /* Convert STR to a nonnegative integer, storing the result in *OUT.
814 STR must be a valid context length argument; report an error if it
815 isn't. Silently ceiling *OUT at the maximum value, as that is
816 practically equivalent to infinity for grep's purposes. */
817 static void
context_length_arg(char const * str,intmax_t * out)818 context_length_arg (char const *str, intmax_t *out)
819 {
820 switch (xstrtoimax (str, 0, 10, out, ""))
821 {
822 case LONGINT_OK:
823 case LONGINT_OVERFLOW:
824 if (0 <= *out)
825 break;
826 FALLTHROUGH;
827 default:
828 die (EXIT_TROUBLE, 0, "%s: %s", str,
829 _("invalid context length argument"));
830 }
831 }
832
833 /* Return the add_exclude options suitable for excluding a file name.
834 If COMMAND_LINE, it is a command-line file name. */
835 static int
exclude_options(bool command_line)836 exclude_options (bool command_line)
837 {
838 return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
839 }
840
841 /* Return true if the file with NAME should be skipped.
842 If COMMAND_LINE, it is a command-line argument.
843 If IS_DIR, it is a directory. */
844 static bool
skipped_file(char const * name,bool command_line,bool is_dir)845 skipped_file (char const *name, bool command_line, bool is_dir)
846 {
847 struct exclude **pats;
848 if (! is_dir)
849 pats = excluded_patterns;
850 else if (directories == SKIP_DIRECTORIES)
851 return true;
852 else if (command_line && omit_dot_slash)
853 return false;
854 else
855 pats = excluded_directory_patterns;
856 return pats[command_line] && excluded_file_name (pats[command_line], name);
857 }
858
859 /* Hairy buffering mechanism for grep. The intent is to keep
860 all reads aligned on a page boundary and multiples of the
861 page size, unless a read yields a partial page. */
862
863 static char *buffer; /* Base of buffer. */
864 static size_t bufalloc; /* Allocated buffer size, counting slop. */
865 static int bufdesc; /* File descriptor. */
866 static char *bufbeg; /* Beginning of user-visible stuff. */
867 static char *buflim; /* Limit of user-visible stuff. */
868 static size_t pagesize; /* alignment of memory pages */
869 static off_t bufoffset; /* Read offset. */
870 static off_t after_last_match; /* Pointer after last matching line that
871 would have been output if we were
872 outputting characters. */
873 static bool skip_nuls; /* Skip '\0' in data. */
874 static bool skip_empty_lines; /* Skip empty lines in data. */
875 static uintmax_t totalnl; /* Total newline count before lastnl. */
876
877 /* Initial buffer size, not counting slop. */
878 enum { INITIAL_BUFSIZE = 96 * 1024 };
879
880 /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
881 an integer or a pointer. Both args must be free of side effects. */
882 #define ALIGN_TO(val, alignment) \
883 ((uintptr_t) (val) % (alignment) == 0 \
884 ? (val) \
885 : (val) + ((alignment) - (uintptr_t) (val) % (alignment)))
886
887 /* Add two numbers that count input bytes or lines, and report an
888 error if the addition overflows. */
889 static uintmax_t
add_count(uintmax_t a,uintmax_t b)890 add_count (uintmax_t a, uintmax_t b)
891 {
892 uintmax_t sum = a + b;
893 if (sum < a)
894 die (EXIT_TROUBLE, 0, _("input is too large to count"));
895 return sum;
896 }
897
898 /* Return true if BUF (of size SIZE) is all zeros. */
899 static bool
all_zeros(char const * buf,size_t size)900 all_zeros (char const *buf, size_t size)
901 {
902 for (char const *p = buf; p < buf + size; p++)
903 if (*p)
904 return false;
905 return true;
906 }
907
908 /* Reset the buffer for a new file, returning false if we should skip it.
909 Initialize on the first time through. */
910 static bool
reset(int fd,struct stat const * st)911 reset (int fd, struct stat const *st)
912 {
913 bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
914 bufbeg[-1] = eolbyte;
915 bufdesc = fd;
916 bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
917 seek_failed = bufoffset < 0;
918
919 /* Assume SEEK_DATA fails if SEEK_CUR does. */
920 seek_data_failed = seek_failed;
921
922 if (seek_failed)
923 {
924 if (errno != ESPIPE)
925 {
926 suppressible_error (errno);
927 return false;
928 }
929 bufoffset = 0;
930 }
931 return true;
932 }
933
934 /* Read new stuff into the buffer, saving the specified
935 amount of old stuff. When we're done, 'bufbeg' points
936 to the beginning of the buffer contents, and 'buflim'
937 points just after the end. Return false if there's an error. */
938 static bool
fillbuf(size_t save,struct stat const * st)939 fillbuf (size_t save, struct stat const *st)
940 {
941 size_t fillsize;
942 bool cc = true;
943 char *readbuf;
944 size_t readsize;
945
946 if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
947 readbuf = buflim;
948 else
949 {
950 size_t minsize = save + pagesize;
951 size_t newsize;
952 size_t newalloc;
953 char *newbuf;
954
955 /* Grow newsize until it is at least as great as minsize. */
956 for (newsize = bufalloc - pagesize - sizeof (uword);
957 newsize < minsize;
958 newsize *= 2)
959 if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
960 xalloc_die ();
961
962 /* Try not to allocate more memory than the file size indicates,
963 as that might cause unnecessary memory exhaustion if the file
964 is large. However, do not use the original file size as a
965 heuristic if we've already read past the file end, as most
966 likely the file is growing. */
967 if (usable_st_size (st))
968 {
969 off_t to_be_read = st->st_size - bufoffset;
970 off_t maxsize_off = save + to_be_read;
971 if (0 <= to_be_read && to_be_read <= maxsize_off
972 && maxsize_off == (size_t) maxsize_off
973 && minsize <= (size_t) maxsize_off
974 && (size_t) maxsize_off < newsize)
975 newsize = maxsize_off;
976 }
977
978 /* Add enough room so that the buffer is aligned and has room
979 for byte sentinels fore and aft, and so that a uword can
980 be read aft. */
981 newalloc = newsize + pagesize + sizeof (uword);
982
983 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
984 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
985 size_t moved = save + 1; /* Move the preceding byte sentinel too. */
986 memmove (readbuf - moved, buflim - moved, moved);
987 if (newbuf != buffer)
988 {
989 free (buffer);
990 buffer = newbuf;
991 }
992 }
993
994 bufbeg = readbuf - save;
995
996 clear_asan_poison ();
997
998 readsize = buffer + bufalloc - sizeof (uword) - readbuf;
999 readsize -= readsize % pagesize;
1000
1001 while (true)
1002 {
1003 fillsize = safe_read (bufdesc, readbuf, readsize);
1004 if (fillsize == SAFE_READ_ERROR)
1005 {
1006 fillsize = 0;
1007 cc = false;
1008 }
1009 bufoffset += fillsize;
1010
1011 if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize))
1012 break;
1013 totalnl = add_count (totalnl, fillsize);
1014
1015 if (SEEK_DATA != SEEK_SET && !seek_data_failed)
1016 {
1017 /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */
1018 off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
1019 if (data_start < 0 && errno == ENXIO
1020 && usable_st_size (st) && bufoffset < st->st_size)
1021 data_start = lseek (bufdesc, 0, SEEK_END);
1022
1023 if (data_start < 0)
1024 seek_data_failed = true;
1025 else
1026 {
1027 totalnl = add_count (totalnl, data_start - bufoffset);
1028 bufoffset = data_start;
1029 }
1030 }
1031 }
1032
1033 buflim = readbuf + fillsize;
1034
1035 /* Initialize the following word, because skip_easy_bytes and some
1036 matchers read (but do not use) those bytes. This avoids false
1037 positive reports of these bytes being used uninitialized. */
1038 memset (buflim, 0, sizeof (uword));
1039
1040 /* Mark the part of the buffer not filled by the read or set by
1041 the above memset call as ASAN-poisoned. */
1042 asan_poison (buflim + sizeof (uword),
1043 bufalloc - (buflim - buffer) - sizeof (uword));
1044
1045 return cc;
1046 }
1047
1048 /* Flags controlling the style of output. */
1049 static enum
1050 {
1051 BINARY_BINARY_FILES,
1052 TEXT_BINARY_FILES,
1053 WITHOUT_MATCH_BINARY_FILES
1054 } binary_files; /* How to handle binary files. */
1055
1056 /* Options for output as a list of matching/non-matching files */
1057 static enum
1058 {
1059 LISTFILES_NONE,
1060 LISTFILES_MATCHING,
1061 LISTFILES_NONMATCHING,
1062 } list_files;
1063
1064 /* Whether to output filenames. 1 means yes, 0 means no, and -1 means
1065 'grep -r PATTERN FILE' was used and it is not known yet whether
1066 FILE is a directory (which means yes) or not (which means no). */
1067 static int out_file;
1068
1069 static int filename_mask; /* If zero, output nulls after filenames. */
1070 static bool out_quiet; /* Suppress all normal output. */
1071 static bool out_invert; /* Print nonmatching stuff. */
1072 static bool out_line; /* Print line numbers. */
1073 static bool out_byte; /* Print byte offsets. */
1074 static intmax_t out_before; /* Lines of leading context. */
1075 static intmax_t out_after; /* Lines of trailing context. */
1076 static bool count_matches; /* Count matching lines. */
1077 static intmax_t max_count; /* Max number of selected
1078 lines from an input file. */
1079 static bool line_buffered; /* Use line buffering. */
1080 static char *label = NULL; /* Fake filename for stdin */
1081
1082
1083 /* Internal variables to keep track of byte count, context, etc. */
1084 static uintmax_t totalcc; /* Total character count before bufbeg. */
1085 static char const *lastnl; /* Pointer after last newline counted. */
1086 static char *lastout; /* Pointer after last character output;
1087 NULL if no character has been output
1088 or if it's conceptually before bufbeg. */
1089 static intmax_t outleft; /* Maximum number of selected lines. */
1090 static intmax_t pending; /* Pending lines of output.
1091 Always kept 0 if out_quiet is true. */
1092 static bool done_on_match; /* Stop scanning file on first match. */
1093 static bool exit_on_match; /* Exit on first match. */
1094 static bool dev_null_output; /* Stdout is known to be /dev/null. */
1095 static bool binary; /* Use binary rather than text I/O. */
1096
1097 static void
nlscan(char const * lim)1098 nlscan (char const *lim)
1099 {
1100 size_t newlines = 0;
1101 for (char const *beg = lastnl; beg < lim; beg++)
1102 {
1103 beg = memchr (beg, eolbyte, lim - beg);
1104 if (!beg)
1105 break;
1106 newlines++;
1107 }
1108 totalnl = add_count (totalnl, newlines);
1109 lastnl = lim;
1110 }
1111
1112 /* Print the current filename. */
1113 static void
print_filename(void)1114 print_filename (void)
1115 {
1116 pr_sgr_start_if (filename_color);
1117 fputs_errno (input_filename ());
1118 pr_sgr_end_if (filename_color);
1119 }
1120
1121 /* Print a character separator. */
1122 static void
print_sep(char sep)1123 print_sep (char sep)
1124 {
1125 pr_sgr_start_if (sep_color);
1126 putchar_errno (sep);
1127 pr_sgr_end_if (sep_color);
1128 }
1129
1130 /* Print a line number or a byte offset. */
1131 static void
print_offset(uintmax_t pos,const char * color)1132 print_offset (uintmax_t pos, const char *color)
1133 {
1134 pr_sgr_start_if (color);
1135 printf_errno ("%*"PRIuMAX, offset_width, pos);
1136 pr_sgr_end_if (color);
1137 }
1138
1139 /* Print a whole line head (filename, line, byte). The output data
1140 starts at BEG and contains LEN bytes; it is followed by at least
1141 sizeof (uword) bytes, the first of which may be temporarily modified.
1142 The output data comes from what is perhaps a larger input line that
1143 goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
1144 the separator on output.
1145
1146 Return true unless the line was suppressed due to an encoding error. */
1147
1148 static bool
print_line_head(char * beg,size_t len,char const * lim,char sep)1149 print_line_head (char *beg, size_t len, char const *lim, char sep)
1150 {
1151 if (binary_files != TEXT_BINARY_FILES)
1152 {
1153 char ch = beg[len];
1154 bool encoding_errors = buf_has_encoding_errors (beg, len);
1155 beg[len] = ch;
1156 if (encoding_errors)
1157 {
1158 encoding_error_output = true;
1159 return false;
1160 }
1161 }
1162
1163 if (out_file)
1164 {
1165 print_filename ();
1166 if (filename_mask)
1167 print_sep (sep);
1168 else
1169 putchar_errno (0);
1170 }
1171
1172 if (out_line)
1173 {
1174 if (lastnl < lim)
1175 {
1176 nlscan (beg);
1177 totalnl = add_count (totalnl, 1);
1178 lastnl = lim;
1179 }
1180 print_offset (totalnl, line_num_color);
1181 print_sep (sep);
1182 }
1183
1184 if (out_byte)
1185 {
1186 uintmax_t pos = add_count (totalcc, beg - bufbeg);
1187 print_offset (pos, byte_num_color);
1188 print_sep (sep);
1189 }
1190
1191 if (align_tabs && (out_file | out_line | out_byte) && len != 0)
1192 putchar_errno ('\t');
1193
1194 return true;
1195 }
1196
1197 static char *
print_line_middle(char * beg,char * lim,const char * line_color,const char * match_color)1198 print_line_middle (char *beg, char *lim,
1199 const char *line_color, const char *match_color)
1200 {
1201 size_t match_size;
1202 size_t match_offset;
1203 char *cur;
1204 char *mid = NULL;
1205 char *b;
1206
1207 for (cur = beg;
1208 (cur < lim
1209 && ((match_offset = execute (compiled_pattern, beg, lim - beg,
1210 &match_size, cur)) != (size_t) -1));
1211 cur = b + match_size)
1212 {
1213 b = beg + match_offset;
1214
1215 /* Avoid matching the empty line at the end of the buffer. */
1216 if (b == lim)
1217 break;
1218
1219 /* Avoid hanging on grep --color "" foo */
1220 if (match_size == 0)
1221 {
1222 /* Make minimal progress; there may be further non-empty matches. */
1223 /* XXX - Could really advance by one whole multi-octet character. */
1224 match_size = 1;
1225 if (!mid)
1226 mid = cur;
1227 }
1228 else
1229 {
1230 /* This function is called on a matching line only,
1231 but is it selected or rejected/context? */
1232 if (only_matching)
1233 {
1234 char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
1235 if (! print_line_head (b, match_size, lim, sep))
1236 return NULL;
1237 }
1238 else
1239 {
1240 pr_sgr_start (line_color);
1241 if (mid)
1242 {
1243 cur = mid;
1244 mid = NULL;
1245 }
1246 fwrite_errno (cur, 1, b - cur);
1247 }
1248
1249 pr_sgr_start_if (match_color);
1250 fwrite_errno (b, 1, match_size);
1251 pr_sgr_end_if (match_color);
1252 if (only_matching)
1253 putchar_errno (eolbyte);
1254 }
1255 }
1256
1257 if (only_matching)
1258 cur = lim;
1259 else if (mid)
1260 cur = mid;
1261
1262 return cur;
1263 }
1264
1265 static char *
print_line_tail(char * beg,const char * lim,const char * line_color)1266 print_line_tail (char *beg, const char *lim, const char *line_color)
1267 {
1268 size_t eol_size;
1269 size_t tail_size;
1270
1271 eol_size = (lim > beg && lim[-1] == eolbyte);
1272 eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
1273 tail_size = lim - eol_size - beg;
1274
1275 if (tail_size > 0)
1276 {
1277 pr_sgr_start (line_color);
1278 fwrite_errno (beg, 1, tail_size);
1279 beg += tail_size;
1280 pr_sgr_end (line_color);
1281 }
1282
1283 return beg;
1284 }
1285
1286 static void
prline(char * beg,char * lim,char sep)1287 prline (char *beg, char *lim, char sep)
1288 {
1289 bool matching;
1290 const char *line_color;
1291 const char *match_color;
1292
1293 if (!only_matching)
1294 if (! print_line_head (beg, lim - beg - 1, lim, sep))
1295 return;
1296
1297 matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
1298
1299 if (color_option)
1300 {
1301 line_color = (((sep == SEP_CHAR_SELECTED)
1302 ^ (out_invert && (color_option < 0)))
1303 ? selected_line_color : context_line_color);
1304 match_color = (sep == SEP_CHAR_SELECTED
1305 ? selected_match_color : context_match_color);
1306 }
1307 else
1308 line_color = match_color = NULL; /* Shouldn't be used. */
1309
1310 if ((only_matching && matching)
1311 || (color_option && (*line_color || *match_color)))
1312 {
1313 /* We already know that non-matching lines have no match (to colorize). */
1314 if (matching && (only_matching || *match_color))
1315 {
1316 beg = print_line_middle (beg, lim, line_color, match_color);
1317 if (! beg)
1318 return;
1319 }
1320
1321 if (!only_matching && *line_color)
1322 {
1323 /* This code is exercised at least when grep is invoked like this:
1324 echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
1325 beg = print_line_tail (beg, lim, line_color);
1326 }
1327 }
1328
1329 if (!only_matching && lim > beg)
1330 fwrite_errno (beg, 1, lim - beg);
1331
1332 if (line_buffered)
1333 fflush_errno ();
1334
1335 if (stdout_errno)
1336 die (EXIT_TROUBLE, stdout_errno, _("write error"));
1337
1338 lastout = lim;
1339 }
1340
1341 /* Print pending lines of trailing context prior to LIM. */
1342 static void
prpending(char const * lim)1343 prpending (char const *lim)
1344 {
1345 if (!lastout)
1346 lastout = bufbeg;
1347 for (; 0 < pending && lastout < lim; pending--)
1348 {
1349 char *nl = rawmemchr (lastout, eolbyte);
1350 prline (lastout, nl + 1, SEP_CHAR_REJECTED);
1351 }
1352 }
1353
1354 /* Output the lines between BEG and LIM. Deal with context. */
1355 static void
prtext(char * beg,char * lim)1356 prtext (char *beg, char *lim)
1357 {
1358 static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
1359 char eol = eolbyte;
1360
1361 if (!out_quiet && pending > 0)
1362 prpending (beg);
1363
1364 char *p = beg;
1365
1366 if (!out_quiet)
1367 {
1368 /* Deal with leading context. */
1369 char const *bp = lastout ? lastout : bufbeg;
1370 intmax_t i;
1371 for (i = 0; i < out_before; ++i)
1372 if (p > bp)
1373 do
1374 --p;
1375 while (p[-1] != eol);
1376
1377 /* Print the group separator unless the output is adjacent to
1378 the previous output in the file. */
1379 if ((0 <= out_before || 0 <= out_after) && used
1380 && p != lastout && group_separator)
1381 {
1382 pr_sgr_start_if (sep_color);
1383 fputs_errno (group_separator);
1384 pr_sgr_end_if (sep_color);
1385 putchar_errno ('\n');
1386 }
1387
1388 while (p < beg)
1389 {
1390 char *nl = rawmemchr (p, eol);
1391 nl++;
1392 prline (p, nl, SEP_CHAR_REJECTED);
1393 p = nl;
1394 }
1395 }
1396
1397 intmax_t n;
1398 if (out_invert)
1399 {
1400 /* One or more lines are output. */
1401 for (n = 0; p < lim && n < outleft; n++)
1402 {
1403 char *nl = rawmemchr (p, eol);
1404 nl++;
1405 if (!out_quiet)
1406 prline (p, nl, SEP_CHAR_SELECTED);
1407 p = nl;
1408 }
1409 }
1410 else
1411 {
1412 /* Just one line is output. */
1413 if (!out_quiet)
1414 prline (beg, lim, SEP_CHAR_SELECTED);
1415 n = 1;
1416 p = lim;
1417 }
1418
1419 after_last_match = bufoffset - (buflim - p);
1420 pending = out_quiet ? 0 : MAX (0, out_after);
1421 used = true;
1422 outleft -= n;
1423 }
1424
1425 /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
1426 This avoids running out of memory when binary input contains a long
1427 sequence of zeros, which would otherwise be considered to be part
1428 of a long line. P[LIM] should be EOL. */
1429 static void
zap_nuls(char * p,char * lim,char eol)1430 zap_nuls (char *p, char *lim, char eol)
1431 {
1432 if (eol)
1433 while (true)
1434 {
1435 *lim = '\0';
1436 p += strlen (p);
1437 *lim = eol;
1438 if (p == lim)
1439 break;
1440 do
1441 *p++ = eol;
1442 while (!*p);
1443 }
1444 }
1445
1446 /* Scan the specified portion of the buffer, matching lines (or
1447 between matching lines if OUT_INVERT is true). Return a count of
1448 lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */
1449 static intmax_t
grepbuf(char * beg,char const * lim)1450 grepbuf (char *beg, char const *lim)
1451 {
1452 intmax_t outleft0 = outleft;
1453 char *endp;
1454
1455 for (char *p = beg; p < lim; p = endp)
1456 {
1457 size_t match_size;
1458 size_t match_offset = execute (compiled_pattern, p, lim - p,
1459 &match_size, NULL);
1460 if (match_offset == (size_t) -1)
1461 {
1462 if (!out_invert)
1463 break;
1464 match_offset = lim - p;
1465 match_size = 0;
1466 }
1467 char *b = p + match_offset;
1468 endp = b + match_size;
1469 /* Avoid matching the empty line at the end of the buffer. */
1470 if (!out_invert && b == lim)
1471 break;
1472 if (!out_invert || p < b)
1473 {
1474 char *prbeg = out_invert ? p : b;
1475 char *prend = out_invert ? b : endp;
1476 prtext (prbeg, prend);
1477 if (!outleft || done_on_match)
1478 {
1479 if (exit_on_match)
1480 exit (errseen ? exit_failure : EXIT_SUCCESS);
1481 break;
1482 }
1483 }
1484 }
1485
1486 return outleft0 - outleft;
1487 }
1488
1489 /* Search a given (non-directory) file. Return a count of lines printed.
1490 Set *INEOF to true if end-of-file reached. */
1491 static intmax_t
grep(int fd,struct stat const * st,bool * ineof)1492 grep (int fd, struct stat const *st, bool *ineof)
1493 {
1494 intmax_t nlines, i;
1495 size_t residue, save;
1496 char oldc;
1497 char *beg;
1498 char *lim;
1499 char eol = eolbyte;
1500 char nul_zapper = '\0';
1501 bool done_on_match_0 = done_on_match;
1502 bool out_quiet_0 = out_quiet;
1503
1504 /* The value of NLINES when nulls were first deduced in the input;
1505 this is not necessarily the same as the number of matching lines
1506 before the first null. -1 if no input nulls have been deduced. */
1507 intmax_t nlines_first_null = -1;
1508
1509 if (! reset (fd, st))
1510 return 0;
1511
1512 totalcc = 0;
1513 lastout = 0;
1514 totalnl = 0;
1515 outleft = max_count;
1516 after_last_match = 0;
1517 pending = 0;
1518 skip_nuls = skip_empty_lines && !eol;
1519 encoding_error_output = false;
1520
1521 nlines = 0;
1522 residue = 0;
1523 save = 0;
1524
1525 if (! fillbuf (save, st))
1526 {
1527 suppressible_error (errno);
1528 return 0;
1529 }
1530
1531 offset_width = 0;
1532 if (align_tabs)
1533 {
1534 /* Width is log of maximum number. Line numbers are origin-1. */
1535 uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
1536 num += out_line && num < UINTMAX_MAX;
1537 do
1538 offset_width++;
1539 while ((num /= 10) != 0);
1540 }
1541
1542 for (bool firsttime = true; ; firsttime = false)
1543 {
1544 if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
1545 && (buf_has_nulls (bufbeg, buflim - bufbeg)
1546 || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
1547 {
1548 if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1549 return 0;
1550 if (!count_matches)
1551 done_on_match = out_quiet = true;
1552 nlines_first_null = nlines;
1553 nul_zapper = eol;
1554 skip_nuls = skip_empty_lines;
1555 }
1556
1557 lastnl = bufbeg;
1558 if (lastout)
1559 lastout = bufbeg;
1560
1561 beg = bufbeg + save;
1562
1563 /* no more data to scan (eof) except for maybe a residue -> break */
1564 if (beg == buflim)
1565 {
1566 *ineof = true;
1567 break;
1568 }
1569
1570 zap_nuls (beg, buflim, nul_zapper);
1571
1572 /* Determine new residue (the length of an incomplete line at the end of
1573 the buffer, 0 means there is no incomplete last line). */
1574 oldc = beg[-1];
1575 beg[-1] = eol;
1576 /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1577 that this use of memrchr is guaranteed never to return NULL. */
1578 lim = memrchr (beg - 1, eol, buflim - beg + 1);
1579 ++lim;
1580 beg[-1] = oldc;
1581 if (lim == beg)
1582 lim = beg - residue;
1583 beg -= residue;
1584 residue = buflim - lim;
1585
1586 if (beg < lim)
1587 {
1588 if (outleft)
1589 nlines += grepbuf (beg, lim);
1590 if (pending)
1591 prpending (lim);
1592 if ((!outleft && !pending)
1593 || (done_on_match && MAX (0, nlines_first_null) < nlines))
1594 goto finish_grep;
1595 }
1596
1597 /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1598 leading context if there is a matching line at the begin of the
1599 next data. Make beg point to their begin. */
1600 i = 0;
1601 beg = lim;
1602 while (i < out_before && beg > bufbeg && beg != lastout)
1603 {
1604 ++i;
1605 do
1606 --beg;
1607 while (beg[-1] != eol);
1608 }
1609
1610 /* Detect whether leading context is adjacent to previous output. */
1611 if (beg != lastout)
1612 lastout = 0;
1613
1614 /* Handle some details and read more data to scan. */
1615 save = residue + lim - beg;
1616 if (out_byte)
1617 totalcc = add_count (totalcc, buflim - bufbeg - save);
1618 if (out_line)
1619 nlscan (beg);
1620 if (! fillbuf (save, st))
1621 {
1622 suppressible_error (errno);
1623 goto finish_grep;
1624 }
1625 }
1626 if (residue)
1627 {
1628 *buflim++ = eol;
1629 if (outleft)
1630 nlines += grepbuf (bufbeg + save - residue, buflim);
1631 if (pending)
1632 prpending (buflim);
1633 }
1634
1635 finish_grep:
1636 done_on_match = done_on_match_0;
1637 out_quiet = out_quiet_0;
1638 if (binary_files == BINARY_BINARY_FILES && ! (out_quiet | suppress_errors)
1639 && (encoding_error_output
1640 || (0 <= nlines_first_null && nlines_first_null < nlines)))
1641 error (0, 0, _("%s: binary file matches"), input_filename ());
1642 return nlines;
1643 }
1644
1645 static bool
grepdirent(FTS * fts,FTSENT * ent,bool command_line)1646 grepdirent (FTS *fts, FTSENT *ent, bool command_line)
1647 {
1648 bool follow;
1649 command_line &= ent->fts_level == FTS_ROOTLEVEL;
1650
1651 if (ent->fts_info == FTS_DP)
1652 return true;
1653
1654 if (!command_line
1655 && skipped_file (ent->fts_name, false,
1656 (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1657 || ent->fts_info == FTS_DNR)))
1658 {
1659 fts_set (fts, ent, FTS_SKIP);
1660 return true;
1661 }
1662
1663 filename = ent->fts_path;
1664 if (omit_dot_slash && filename[1])
1665 filename += 2;
1666 follow = (fts->fts_options & FTS_LOGICAL
1667 || (fts->fts_options & FTS_COMFOLLOW && command_line));
1668
1669 switch (ent->fts_info)
1670 {
1671 case FTS_D:
1672 if (directories == RECURSE_DIRECTORIES)
1673 return true;
1674 fts_set (fts, ent, FTS_SKIP);
1675 break;
1676
1677 case FTS_DC:
1678 if (!suppress_errors)
1679 error (0, 0, _("%s: warning: recursive directory loop"), filename);
1680 return true;
1681
1682 case FTS_DNR:
1683 case FTS_ERR:
1684 case FTS_NS:
1685 suppressible_error (ent->fts_errno);
1686 return true;
1687
1688 case FTS_DEFAULT:
1689 case FTS_NSOK:
1690 if (skip_devices (command_line))
1691 {
1692 struct stat *st = ent->fts_statp;
1693 struct stat st1;
1694 if (! st->st_mode)
1695 {
1696 /* The file type is not already known. Get the file status
1697 before opening, since opening might have side effects
1698 on a device. */
1699 int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1700 if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1701 {
1702 suppressible_error (errno);
1703 return true;
1704 }
1705 st = &st1;
1706 }
1707 if (is_device_mode (st->st_mode))
1708 return true;
1709 }
1710 break;
1711
1712 case FTS_F:
1713 case FTS_SLNONE:
1714 break;
1715
1716 case FTS_SL:
1717 case FTS_W:
1718 return true;
1719
1720 default:
1721 abort ();
1722 }
1723
1724 return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line);
1725 }
1726
1727 /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
1728 POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */
1729 static bool
open_symlink_nofollow_error(int err)1730 open_symlink_nofollow_error (int err)
1731 {
1732 if (err == ELOOP || err == EMLINK)
1733 return true;
1734 #ifdef EFTYPE
1735 if (err == EFTYPE)
1736 return true;
1737 #endif
1738 return false;
1739 }
1740
1741 static bool
grepfile(int dirdesc,char const * name,bool follow,bool command_line)1742 grepfile (int dirdesc, char const *name, bool follow, bool command_line)
1743 {
1744 int oflag = (O_RDONLY | O_NOCTTY
1745 | (IGNORE_DUPLICATE_BRANCH_WARNING
1746 (binary ? O_BINARY : 0))
1747 | (follow ? 0 : O_NOFOLLOW)
1748 | (skip_devices (command_line) ? O_NONBLOCK : 0));
1749 int desc = openat_safer (dirdesc, name, oflag);
1750 if (desc < 0)
1751 {
1752 if (follow || ! open_symlink_nofollow_error (errno))
1753 suppressible_error (errno);
1754 return true;
1755 }
1756 return grepdesc (desc, command_line);
1757 }
1758
1759 /* Read all data from FD, with status ST. Return true if successful,
1760 false (setting errno) otherwise. */
1761 static bool
drain_input(int fd,struct stat const * st)1762 drain_input (int fd, struct stat const *st)
1763 {
1764 ssize_t nbytes;
1765 if (S_ISFIFO (st->st_mode) && dev_null_output)
1766 {
1767 #ifdef SPLICE_F_MOVE
1768 /* Should be faster, since it need not copy data to user space. */
1769 nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1770 INITIAL_BUFSIZE, SPLICE_F_MOVE);
1771 if (0 <= nbytes || errno != EINVAL)
1772 {
1773 while (0 < nbytes)
1774 nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1775 INITIAL_BUFSIZE, SPLICE_F_MOVE);
1776 return nbytes == 0;
1777 }
1778 #endif
1779 }
1780 while ((nbytes = safe_read (fd, buffer, bufalloc)))
1781 if (nbytes == SAFE_READ_ERROR)
1782 return false;
1783 return true;
1784 }
1785
1786 /* Finish reading from FD, with status ST and where end-of-file has
1787 been seen if INEOF. Typically this is a no-op, but when reading
1788 from standard input this may adjust the file offset or drain a
1789 pipe. */
1790
1791 static void
finalize_input(int fd,struct stat const * st,bool ineof)1792 finalize_input (int fd, struct stat const *st, bool ineof)
1793 {
1794 if (fd == STDIN_FILENO
1795 && (outleft
1796 ? (!ineof
1797 && (seek_failed
1798 || (lseek (fd, 0, SEEK_END) < 0
1799 /* Linux proc file system has EINVAL (Bug#25180). */
1800 && errno != EINVAL))
1801 && ! drain_input (fd, st))
1802 : (bufoffset != after_last_match && !seek_failed
1803 && lseek (fd, after_last_match, SEEK_SET) < 0)))
1804 suppressible_error (errno);
1805 }
1806
1807 static bool
grepdesc(int desc,bool command_line)1808 grepdesc (int desc, bool command_line)
1809 {
1810 intmax_t count;
1811 bool status = true;
1812 bool ineof = false;
1813 struct stat st;
1814
1815 /* Get the file status, possibly for the second time. This catches
1816 a race condition if the directory entry changes after the
1817 directory entry is read and before the file is opened. For
1818 example, normally DESC is a directory only at the top level, but
1819 there is an exception if some other process substitutes a
1820 directory for a non-directory while 'grep' is running. */
1821 if (fstat (desc, &st) != 0)
1822 {
1823 suppressible_error (errno);
1824 goto closeout;
1825 }
1826
1827 if (desc != STDIN_FILENO && skip_devices (command_line)
1828 && is_device_mode (st.st_mode))
1829 goto closeout;
1830
1831 if (desc != STDIN_FILENO && command_line
1832 && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
1833 goto closeout;
1834
1835 /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'. */
1836 if (out_file < 0)
1837 out_file = !!S_ISDIR (st.st_mode);
1838
1839 if (desc != STDIN_FILENO
1840 && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1841 {
1842 /* Traverse the directory starting with its full name, because
1843 unfortunately fts provides no way to traverse the directory
1844 starting from its file descriptor. */
1845
1846 FTS *fts;
1847 FTSENT *ent;
1848 int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1849 char *fts_arg[2];
1850
1851 /* Close DESC now, to conserve file descriptors if the race
1852 condition occurs many times in a deep recursion. */
1853 if (close (desc) != 0)
1854 suppressible_error (errno);
1855
1856 fts_arg[0] = (char *) filename;
1857 fts_arg[1] = NULL;
1858 fts = fts_open (fts_arg, opts, NULL);
1859
1860 if (!fts)
1861 xalloc_die ();
1862 while ((ent = fts_read (fts)))
1863 status &= grepdirent (fts, ent, command_line);
1864 if (errno)
1865 suppressible_error (errno);
1866 if (fts_close (fts) != 0)
1867 suppressible_error (errno);
1868 return status;
1869 }
1870 if (desc != STDIN_FILENO
1871 && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1872 || ((devices == SKIP_DEVICES
1873 || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1874 && is_device_mode (st.st_mode))))
1875 goto closeout;
1876
1877 /* If there is a regular file on stdout and the current file refers
1878 to the same i-node, we have to report the problem and skip it.
1879 Otherwise when matching lines from some other input reach the
1880 disk before we open this file, we can end up reading and matching
1881 those lines and appending them to the file from which we're reading.
1882 Then we'd have what appears to be an infinite loop that'd terminate
1883 only upon filling the output file system or reaching a quota.
1884 However, there is no risk of an infinite loop if grep is generating
1885 no output, i.e., with --silent, --quiet, -q.
1886 Similarly, with any of these:
1887 --max-count=N (-m) (for N >= 2)
1888 --files-with-matches (-l)
1889 --files-without-match (-L)
1890 there is no risk of trouble.
1891 For --max-count=1, grep stops after printing the first match,
1892 so there is no risk of malfunction. But even --max-count=2, with
1893 input==output, while there is no risk of infloop, there is a race
1894 condition that could result in "alternate" output. */
1895 if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
1896 && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
1897 {
1898 if (! suppress_errors)
1899 error (0, 0, _("%s: input file is also the output"), input_filename ());
1900 errseen = true;
1901 goto closeout;
1902 }
1903
1904 count = grep (desc, &st, &ineof);
1905 if (count_matches)
1906 {
1907 if (out_file)
1908 {
1909 print_filename ();
1910 if (filename_mask)
1911 print_sep (SEP_CHAR_SELECTED);
1912 else
1913 putchar_errno (0);
1914 }
1915 printf_errno ("%" PRIdMAX "\n", count);
1916 if (line_buffered)
1917 fflush_errno ();
1918 }
1919
1920 status = !count;
1921
1922 if (list_files == LISTFILES_NONE)
1923 finalize_input (desc, &st, ineof);
1924 else if (list_files == (status ? LISTFILES_NONMATCHING : LISTFILES_MATCHING))
1925 {
1926 print_filename ();
1927 putchar_errno ('\n' & filename_mask);
1928 if (line_buffered)
1929 fflush_errno ();
1930 }
1931
1932 closeout:
1933 if (desc != STDIN_FILENO && close (desc) != 0)
1934 suppressible_error (errno);
1935 return status;
1936 }
1937
1938 static bool
grep_command_line_arg(char const * arg)1939 grep_command_line_arg (char const *arg)
1940 {
1941 if (STREQ (arg, "-"))
1942 {
1943 filename = label;
1944 if (binary)
1945 xset_binary_mode (STDIN_FILENO, O_BINARY);
1946 return grepdesc (STDIN_FILENO, true);
1947 }
1948 else
1949 {
1950 filename = arg;
1951 return grepfile (AT_FDCWD, arg, true, true);
1952 }
1953 }
1954
1955 _Noreturn void usage (int);
1956 void
usage(int status)1957 usage (int status)
1958 {
1959 if (status != 0)
1960 {
1961 fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"),
1962 getprogname ());
1963 fprintf (stderr, _("Try '%s --help' for more information.\n"),
1964 getprogname ());
1965 }
1966 else
1967 {
1968 printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ());
1969 printf (_("Search for PATTERNS in each FILE.\n"));
1970 printf (_("\
1971 Example: %s -i 'hello world' menu.h main.c\n\
1972 PATTERNS can contain multiple patterns separated by newlines.\n\
1973 \n\
1974 Pattern selection and interpretation:\n"), getprogname ());
1975 printf (_("\
1976 -E, --extended-regexp PATTERNS are extended regular expressions\n\
1977 -F, --fixed-strings PATTERNS are strings\n\
1978 -G, --basic-regexp PATTERNS are basic regular expressions\n\
1979 -P, --perl-regexp PATTERNS are Perl regular expressions\n"));
1980 /* -X is deliberately undocumented. */
1981 printf (_("\
1982 -e, --regexp=PATTERNS use PATTERNS for matching\n\
1983 -f, --file=FILE take PATTERNS from FILE\n\
1984 -i, --ignore-case ignore case distinctions in patterns and data\n\
1985 --no-ignore-case do not ignore case distinctions (default)\n\
1986 -w, --word-regexp match only whole words\n\
1987 -x, --line-regexp match only whole lines\n\
1988 -z, --null-data a data line ends in 0 byte, not newline\n"));
1989 printf (_("\
1990 \n\
1991 Miscellaneous:\n\
1992 -s, --no-messages suppress error messages\n\
1993 -v, --invert-match select non-matching lines\n\
1994 -V, --version display version information and exit\n\
1995 --help display this help text and exit\n"));
1996 printf (_("\
1997 \n\
1998 Output control:\n\
1999 -m, --max-count=NUM stop after NUM selected lines\n\
2000 -b, --byte-offset print the byte offset with output lines\n\
2001 -n, --line-number print line number with output lines\n\
2002 --line-buffered flush output on every line\n\
2003 -H, --with-filename print file name with output lines\n\
2004 -h, --no-filename suppress the file name prefix on output\n\
2005 --label=LABEL use LABEL as the standard input file name prefix\n\
2006 "));
2007 printf (_("\
2008 -o, --only-matching show only nonempty parts of lines that match\n\
2009 -q, --quiet, --silent suppress all normal output\n\
2010 --binary-files=TYPE assume that binary files are TYPE;\n\
2011 TYPE is 'binary', 'text', or 'without-match'\n\
2012 -a, --text equivalent to --binary-files=text\n\
2013 "));
2014 printf (_("\
2015 -I equivalent to --binary-files=without-match\n\
2016 -d, --directories=ACTION how to handle directories;\n\
2017 ACTION is 'read', 'recurse', or 'skip'\n\
2018 -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
2019 ACTION is 'read' or 'skip'\n\
2020 -r, --recursive like --directories=recurse\n\
2021 -R, --dereference-recursive likewise, but follow all symlinks\n\
2022 "));
2023 printf (_("\
2024 --include=GLOB search only files that match GLOB (a file pattern)"
2025 "\n\
2026 --exclude=GLOB skip files that match GLOB\n\
2027 --exclude-from=FILE skip files that match any file pattern from FILE\n\
2028 --exclude-dir=GLOB skip directories that match GLOB\n\
2029 "));
2030 printf (_("\
2031 -L, --files-without-match print only names of FILEs with no selected lines\n\
2032 -l, --files-with-matches print only names of FILEs with selected lines\n\
2033 -c, --count print only a count of selected lines per FILE\n\
2034 -T, --initial-tab make tabs line up (if needed)\n\
2035 -Z, --null print 0 byte after FILE name\n"));
2036 printf (_("\
2037 \n\
2038 Context control:\n\
2039 -B, --before-context=NUM print NUM lines of leading context\n\
2040 -A, --after-context=NUM print NUM lines of trailing context\n\
2041 -C, --context=NUM print NUM lines of output context\n\
2042 "));
2043 printf (_("\
2044 -NUM same as --context=NUM\n\
2045 --group-separator=SEP print SEP on line between matches with context\n\
2046 --no-group-separator do not print separator for matches with context\n\
2047 --color[=WHEN],\n\
2048 --colour[=WHEN] use markers to highlight the matching strings;\n\
2049 WHEN is 'always', 'never', or 'auto'\n\
2050 -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
2051 \n"));
2052 printf (_("\
2053 When FILE is '-', read standard input. With no FILE, read '.' if\n\
2054 recursive, '-' otherwise. With fewer than two FILEs, assume -h.\n\
2055 Exit status is 0 if any line is selected, 1 otherwise;\n\
2056 if any error occurs and -q is not given, the exit status is 2.\n"));
2057 emit_bug_reporting_address ();
2058 }
2059 exit (status);
2060 }
2061
2062 /* Pattern compilers and matchers. */
2063
2064 static struct
2065 {
2066 char name[12];
2067 int syntax; /* used if compile == GEAcompile */
2068 compile_fp_t compile;
2069 execute_fp_t execute;
2070 } const matchers[] = {
2071 { "grep", RE_SYNTAX_GREP, GEAcompile, EGexecute },
2072 { "egrep", RE_SYNTAX_EGREP, GEAcompile, EGexecute },
2073 { "fgrep", 0, Fcompile, Fexecute, },
2074 { "awk", RE_SYNTAX_AWK, GEAcompile, EGexecute },
2075 { "gawk", RE_SYNTAX_GNU_AWK, GEAcompile, EGexecute },
2076 { "posixawk", RE_SYNTAX_POSIX_AWK, GEAcompile, EGexecute },
2077 #if HAVE_LIBPCRE
2078 { "perl", 0, Pcompile, Pexecute, },
2079 #endif
2080 };
2081 /* Keep these in sync with the 'matchers' table. */
2082 enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
2083
2084 /* Return the index of the matcher corresponding to M if available.
2085 MATCHER is the index of the previous matcher, or -1 if none.
2086 Exit in case of conflicts or if M is not available. */
2087 static int
setmatcher(char const * m,int matcher)2088 setmatcher (char const *m, int matcher)
2089 {
2090 for (int i = 0; i < sizeof matchers / sizeof *matchers; i++)
2091 if (STREQ (m, matchers[i].name))
2092 {
2093 if (0 <= matcher && matcher != i)
2094 die (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
2095 return i;
2096 }
2097
2098 #if !HAVE_LIBPCRE
2099 if (STREQ (m, "perl"))
2100 die (EXIT_TROUBLE, 0,
2101 _("Perl matching not supported in a --disable-perl-regexp build"));
2102 #endif
2103 die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
2104 }
2105
2106 /* Get the next non-digit option from ARGC and ARGV.
2107 Return -1 if there are no more options.
2108 Process any digit options that were encountered on the way,
2109 and store the resulting integer into *DEFAULT_CONTEXT. */
2110 static int
get_nondigit_option(int argc,char * const * argv,intmax_t * default_context)2111 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
2112 {
2113 static int prev_digit_optind = -1;
2114 int this_digit_optind;
2115 bool was_digit;
2116 char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
2117 char *p = buf;
2118 int opt;
2119
2120 was_digit = false;
2121 this_digit_optind = optind;
2122 while (true)
2123 {
2124 opt = getopt_long (argc, (char **) argv, short_options,
2125 long_options, NULL);
2126 if (! c_isdigit (opt))
2127 break;
2128
2129 if (prev_digit_optind != this_digit_optind || !was_digit)
2130 {
2131 /* Reset to start another context length argument. */
2132 p = buf;
2133 }
2134 else
2135 {
2136 /* Suppress trivial leading zeros, to avoid incorrect
2137 diagnostic on strings like 00000000000. */
2138 p -= buf[0] == '0';
2139 }
2140
2141 if (p == buf + sizeof buf - 4)
2142 {
2143 /* Too many digits. Append "..." to make context_length_arg
2144 complain about "X...", where X contains the digits seen
2145 so far. */
2146 strcpy (p, "...");
2147 p += 3;
2148 break;
2149 }
2150 *p++ = opt;
2151
2152 was_digit = true;
2153 prev_digit_optind = this_digit_optind;
2154 this_digit_optind = optind;
2155 }
2156 if (p != buf)
2157 {
2158 *p = '\0';
2159 context_length_arg (buf, default_context);
2160 }
2161
2162 return opt;
2163 }
2164
2165 /* Parse GREP_COLORS. The default would look like:
2166 GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
2167 with boolean capabilities (ne and rv) unset (i.e., omitted).
2168 No character escaping is needed or supported. */
2169 static void
parse_grep_colors(void)2170 parse_grep_colors (void)
2171 {
2172 const char *p;
2173 char *q;
2174 char *name;
2175 char *val;
2176
2177 p = getenv ("GREP_COLORS"); /* Plural! */
2178 if (p == NULL || *p == '\0')
2179 return;
2180
2181 /* Work off a writable copy. */
2182 q = xstrdup (p);
2183
2184 name = q;
2185 val = NULL;
2186 /* From now on, be well-formed or you're gone. */
2187 for (;;)
2188 if (*q == ':' || *q == '\0')
2189 {
2190 char c = *q;
2191 struct color_cap const *cap;
2192
2193 *q++ = '\0'; /* Terminate name or val. */
2194 /* Empty name without val (empty cap)
2195 * won't match and will be ignored. */
2196 for (cap = color_dict; cap->name; cap++)
2197 if (STREQ (cap->name, name))
2198 break;
2199 /* If name unknown, go on for forward compatibility. */
2200 if (cap->var && val)
2201 *(cap->var) = val;
2202 if (cap->fct)
2203 cap->fct ();
2204 if (c == '\0')
2205 return;
2206 name = q;
2207 val = NULL;
2208 }
2209 else if (*q == '=')
2210 {
2211 if (q == name || val)
2212 return;
2213 *q++ = '\0'; /* Terminate name. */
2214 val = q; /* Can be the empty string. */
2215 }
2216 else if (val == NULL)
2217 q++; /* Accumulate name. */
2218 else if (*q == ';' || c_isdigit (*q))
2219 q++; /* Accumulate val. Protect the terminal from being sent crap. */
2220 else
2221 return;
2222 }
2223
2224 /* Return true if PAT (of length PATLEN) contains an encoding error. */
2225 static bool
contains_encoding_error(char const * pat,size_t patlen)2226 contains_encoding_error (char const *pat, size_t patlen)
2227 {
2228 mbstate_t mbs = { 0 };
2229 size_t i, charlen;
2230
2231 for (i = 0; i < patlen; i += charlen)
2232 {
2233 charlen = mb_clen (pat + i, patlen - i, &mbs);
2234 if ((size_t) -2 <= charlen)
2235 return true;
2236 }
2237 return false;
2238 }
2239
2240 /* When ignoring case and (-E or -F or -G), then for each single-byte
2241 character I, ok_fold[I] is 1 if every case folded counterpart of I
2242 is also single-byte, and is -1 otherwise. */
2243 static signed char ok_fold[NCHAR];
2244 static void
setup_ok_fold(void)2245 setup_ok_fold (void)
2246 {
2247 for (int i = 0; i < NCHAR; i++)
2248 {
2249 wint_t wi = localeinfo.sbctowc[i];
2250 if (wi == WEOF)
2251 continue;
2252
2253 int ok = 1;
2254 wchar_t folded[CASE_FOLDED_BUFSIZE];
2255 for (int n = case_folded_counterparts (wi, folded); 0 <= --n; )
2256 {
2257 char buf[MB_LEN_MAX];
2258 mbstate_t s = { 0 };
2259 if (wcrtomb (buf, folded[n], &s) != 1)
2260 {
2261 ok = -1;
2262 break;
2263 }
2264 }
2265 ok_fold[i] = ok;
2266 }
2267 }
2268
2269 /* Return the number of bytes in the initial character of PAT, of size
2270 PATLEN, if Fcompile can handle that character. Return -1 if
2271 Fcompile cannot handle it. MBS is the multibyte conversion state.
2272 PATLEN must be nonzero. */
2273
2274 static int
fgrep_icase_charlen(char const * pat,size_t patlen,mbstate_t * mbs)2275 fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
2276 {
2277 unsigned char pat0 = pat[0];
2278
2279 /* If PAT starts with a single-byte character, Fcompile works if
2280 every case folded counterpart is also single-byte. */
2281 if (localeinfo.sbctowc[pat0] != WEOF)
2282 return ok_fold[pat0];
2283
2284 wchar_t wc;
2285 size_t wn = mbrtowc (&wc, pat, patlen, mbs);
2286
2287 /* If PAT starts with an encoding error, Fcompile does not work. */
2288 if (MB_LEN_MAX < wn)
2289 return -1;
2290
2291 /* PAT starts with a multibyte character. Fcompile works if the
2292 character has no case folded counterparts and toupper translates
2293 none of its encoding's bytes. */
2294 wchar_t folded[CASE_FOLDED_BUFSIZE];
2295 if (case_folded_counterparts (wc, folded))
2296 return -1;
2297 for (int i = wn; 0 < --i; )
2298 {
2299 unsigned char c = pat[i];
2300 if (toupper (c) != c)
2301 return -1;
2302 }
2303 return wn;
2304 }
2305
2306 /* Return true if the -F patterns PAT, of size PATLEN, contain only
2307 single-byte characters that case-fold only to single-byte
2308 characters, or multibyte characters not subject to case folding,
2309 and so can be processed by Fcompile. */
2310
2311 static bool
fgrep_icase_available(char const * pat,size_t patlen)2312 fgrep_icase_available (char const *pat, size_t patlen)
2313 {
2314 mbstate_t mbs = {0,};
2315
2316 for (size_t i = 0; i < patlen; )
2317 {
2318 int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
2319 if (n < 0)
2320 return false;
2321 i += n;
2322 }
2323
2324 return true;
2325 }
2326
2327 /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */
2328
2329 void
fgrep_to_grep_pattern(char ** keys_p,size_t * len_p)2330 fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
2331 {
2332 size_t len = *len_p;
2333 char *keys = *keys_p;
2334 mbstate_t mb_state = { 0 };
2335 char *new_keys = xnmalloc (len + 1, 2);
2336 char *p = new_keys;
2337 size_t n;
2338
2339 for (; len; keys += n, len -= n)
2340 {
2341 n = mb_clen (keys, len, &mb_state);
2342 switch (n)
2343 {
2344 case (size_t) -2:
2345 n = len;
2346 FALLTHROUGH;
2347 default:
2348 p = mempcpy (p, keys, n);
2349 break;
2350
2351 case (size_t) -1:
2352 memset (&mb_state, 0, sizeof mb_state);
2353 n = 1;
2354 FALLTHROUGH;
2355 case 1:
2356 switch (*keys)
2357 {
2358 case '$': case '*': case '.': case '[': case '\\': case '^':
2359 *p++ = '\\'; break;
2360 }
2361 *p++ = *keys;
2362 break;
2363 }
2364 }
2365
2366 *p = '\n';
2367 free (*keys_p);
2368 *keys_p = new_keys;
2369 *len_p = p - new_keys;
2370 }
2371
2372 /* If it is easy, convert the MATCHER-style patterns KEYS (of size
2373 *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
2374 return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and
2375 return MATCHER. This function is conservative and sometimes misses
2376 conversions, e.g., it does not convert the -E pattern "(a|a|[aa])"
2377 to the -F pattern "a". */
2378
2379 static int
try_fgrep_pattern(int matcher,char * keys,size_t * len_p)2380 try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
2381 {
2382 int result = matcher;
2383 size_t len = *len_p;
2384 char *new_keys = xmalloc (len + 1);
2385 char *p = new_keys;
2386 char const *q = keys;
2387 mbstate_t mb_state = { 0 };
2388
2389 while (len != 0)
2390 {
2391 switch (*q)
2392 {
2393 case '$': case '*': case '.': case '[': case '^':
2394 goto fail;
2395
2396 case '(': case '+': case '?': case '{': case '|':
2397 /* There is no "case ')'" here, as "grep -E ')'" acts like
2398 "grep -E '\)'". */
2399 if (matcher != G_MATCHER_INDEX)
2400 goto fail;
2401 break;
2402
2403 case '\\':
2404 if (1 < len)
2405 switch (q[1])
2406 {
2407 case '\n':
2408 case 'B': case 'S': case 'W': case'\'': case '<':
2409 case 'b': case 's': case 'w': case '`': case '>':
2410 case '1': case '2': case '3': case '4':
2411 case '5': case '6': case '7': case '8': case '9':
2412 goto fail;
2413
2414 case '(': case '+': case '?': case '{': case '|':
2415 /* Pass '\)' to GEAcompile so it can complain. Otherwise,
2416 "grep '\)'" would act like "grep ')'" while "grep '.*\)'
2417 would be an error. */
2418 case ')':
2419 if (matcher == G_MATCHER_INDEX)
2420 goto fail;
2421 FALLTHROUGH;
2422 default:
2423 q++, len--;
2424 break;
2425 }
2426 break;
2427 }
2428
2429 {
2430 size_t n;
2431 if (match_icase)
2432 {
2433 int ni = fgrep_icase_charlen (q, len, &mb_state);
2434 if (ni < 0)
2435 goto fail;
2436 n = ni;
2437 }
2438 else
2439 {
2440 n = mb_clen (q, len, &mb_state);
2441 if (MB_LEN_MAX < n)
2442 goto fail;
2443 }
2444
2445 p = mempcpy (p, q, n);
2446 q += n;
2447 len -= n;
2448 }
2449 }
2450
2451 if (*len_p != p - new_keys)
2452 {
2453 *len_p = p - new_keys;
2454 char *keys_end = mempcpy (keys, new_keys, p - new_keys);
2455 *keys_end = '\n';
2456 }
2457 result = F_MATCHER_INDEX;
2458
2459 fail:
2460 free (new_keys);
2461 return result;
2462 }
2463
2464 int
main(int argc,char ** argv)2465 main (int argc, char **argv)
2466 {
2467 char *keys = NULL;
2468 size_t keycc = 0, keyalloc = 0;
2469 int matcher = -1;
2470 int opt;
2471 int prev_optind, last_recursive;
2472 int fread_errno;
2473 intmax_t default_context;
2474 FILE *fp;
2475 exit_failure = EXIT_TROUBLE;
2476 initialize_main (&argc, &argv);
2477
2478 /* Which command-line options have been specified for filename output.
2479 -1 for -h, 1 for -H, 0 for neither. */
2480 int filename_option = 0;
2481
2482 eolbyte = '\n';
2483 filename_mask = ~0;
2484
2485 max_count = INTMAX_MAX;
2486
2487 /* The value -1 means to use DEFAULT_CONTEXT. */
2488 out_after = out_before = -1;
2489 /* Default before/after context: changed by -C/-NUM options */
2490 default_context = -1;
2491 /* Changed by -o option */
2492 only_matching = false;
2493
2494 /* Internationalization. */
2495 #if defined HAVE_SETLOCALE
2496 setlocale (LC_ALL, "");
2497 #endif
2498 #if defined ENABLE_NLS
2499 bindtextdomain (PACKAGE, LOCALEDIR);
2500 textdomain (PACKAGE);
2501 #endif
2502
2503 init_localeinfo (&localeinfo);
2504
2505 atexit (clean_up_stdout);
2506 c_stack_action (NULL);
2507
2508 last_recursive = 0;
2509
2510 pattern_table = hash_initialize (0, 0, hash_pattern, compare_patterns, 0);
2511 if (!pattern_table)
2512 xalloc_die ();
2513
2514 while (prev_optind = optind,
2515 (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
2516 switch (opt)
2517 {
2518 case 'A':
2519 context_length_arg (optarg, &out_after);
2520 break;
2521
2522 case 'B':
2523 context_length_arg (optarg, &out_before);
2524 break;
2525
2526 case 'C':
2527 /* Set output match context, but let any explicit leading or
2528 trailing amount specified with -A or -B stand. */
2529 context_length_arg (optarg, &default_context);
2530 break;
2531
2532 case 'D':
2533 if (STREQ (optarg, "read"))
2534 devices = READ_DEVICES;
2535 else if (STREQ (optarg, "skip"))
2536 devices = SKIP_DEVICES;
2537 else
2538 die (EXIT_TROUBLE, 0, _("unknown devices method"));
2539 break;
2540
2541 case 'E':
2542 matcher = setmatcher ("egrep", matcher);
2543 break;
2544
2545 case 'F':
2546 matcher = setmatcher ("fgrep", matcher);
2547 break;
2548
2549 case 'P':
2550 matcher = setmatcher ("perl", matcher);
2551 break;
2552
2553 case 'G':
2554 matcher = setmatcher ("grep", matcher);
2555 break;
2556
2557 case 'X': /* undocumented on purpose */
2558 matcher = setmatcher (optarg, matcher);
2559 break;
2560
2561 case 'H':
2562 filename_option = 1;
2563 break;
2564
2565 case 'I':
2566 binary_files = WITHOUT_MATCH_BINARY_FILES;
2567 break;
2568
2569 case 'T':
2570 align_tabs = true;
2571 break;
2572
2573 case 'U':
2574 if (O_BINARY)
2575 binary = true;
2576 break;
2577
2578 case 'u':
2579 /* Obsolete option; it had no effect; FIXME: remove in 2023 */
2580 error (0, 0, _("warning: --unix-byte-offsets (-u) is obsolete"));
2581 break;
2582
2583 case 'V':
2584 show_version = true;
2585 break;
2586
2587 case 'a':
2588 binary_files = TEXT_BINARY_FILES;
2589 break;
2590
2591 case 'b':
2592 out_byte = true;
2593 break;
2594
2595 case 'c':
2596 count_matches = true;
2597 break;
2598
2599 case 'd':
2600 directories = XARGMATCH ("--directories", optarg,
2601 directories_args, directories_types);
2602 if (directories == RECURSE_DIRECTORIES)
2603 last_recursive = prev_optind;
2604 break;
2605
2606 case 'e':
2607 {
2608 ptrdiff_t cc = strlen (optarg);
2609 if (keyalloc < keycc + cc + 1)
2610 {
2611 keyalloc = keycc + cc + 1;
2612 pattern_array = keys = x2realloc (keys, &keyalloc);
2613 }
2614 char *keyend = mempcpy (keys + keycc, optarg, cc);
2615 *keyend = '\n';
2616 keycc = update_patterns (keys, keycc, keycc + cc + 1, "");
2617 }
2618 break;
2619
2620 case 'f':
2621 {
2622 if (STREQ (optarg, "-"))
2623 {
2624 if (binary)
2625 xset_binary_mode (STDIN_FILENO, O_BINARY);
2626 fp = stdin;
2627 }
2628 else
2629 {
2630 fp = fopen (optarg, binary ? "rb" : "r");
2631 if (!fp)
2632 die (EXIT_TROUBLE, errno, "%s", optarg);
2633 }
2634 ptrdiff_t newkeycc = keycc, cc;
2635 for (;; newkeycc += cc)
2636 {
2637 if (keyalloc <= newkeycc + 1)
2638 pattern_array = keys = x2realloc (keys, &keyalloc);
2639 cc = fread (keys + newkeycc, 1, keyalloc - (newkeycc + 1), fp);
2640 if (cc == 0)
2641 break;
2642 }
2643 fread_errno = errno;
2644 if (ferror (fp))
2645 die (EXIT_TROUBLE, fread_errno, "%s", optarg);
2646 if (fp != stdin)
2647 fclose (fp);
2648 /* Append final newline if file ended in non-newline. */
2649 if (newkeycc != keycc && keys[newkeycc - 1] != '\n')
2650 keys[newkeycc++] = '\n';
2651 keycc = update_patterns (keys, keycc, newkeycc, optarg);
2652 }
2653 break;
2654
2655 case 'h':
2656 filename_option = -1;
2657 break;
2658
2659 case 'i':
2660 case 'y': /* For old-timers . . . */
2661 match_icase = true;
2662 break;
2663
2664 case NO_IGNORE_CASE_OPTION:
2665 match_icase = false;
2666 break;
2667
2668 case 'L':
2669 /* Like -l, except list files that don't contain matches.
2670 Inspired by the same option in Hume's gre. */
2671 list_files = LISTFILES_NONMATCHING;
2672 break;
2673
2674 case 'l':
2675 list_files = LISTFILES_MATCHING;
2676 break;
2677
2678 case 'm':
2679 switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2680 {
2681 case LONGINT_OK:
2682 case LONGINT_OVERFLOW:
2683 break;
2684
2685 default:
2686 die (EXIT_TROUBLE, 0, _("invalid max count"));
2687 }
2688 break;
2689
2690 case 'n':
2691 out_line = true;
2692 break;
2693
2694 case 'o':
2695 only_matching = true;
2696 break;
2697
2698 case 'q':
2699 exit_on_match = true;
2700 exit_failure = 0;
2701 break;
2702
2703 case 'R':
2704 fts_options = basic_fts_options | FTS_LOGICAL;
2705 FALLTHROUGH;
2706 case 'r':
2707 directories = RECURSE_DIRECTORIES;
2708 last_recursive = prev_optind;
2709 break;
2710
2711 case 's':
2712 suppress_errors = true;
2713 break;
2714
2715 case 'v':
2716 out_invert = true;
2717 break;
2718
2719 case 'w':
2720 wordinit ();
2721 match_words = true;
2722 break;
2723
2724 case 'x':
2725 match_lines = true;
2726 break;
2727
2728 case 'Z':
2729 filename_mask = 0;
2730 break;
2731
2732 case 'z':
2733 eolbyte = '\0';
2734 break;
2735
2736 case BINARY_FILES_OPTION:
2737 if (STREQ (optarg, "binary"))
2738 binary_files = BINARY_BINARY_FILES;
2739 else if (STREQ (optarg, "text"))
2740 binary_files = TEXT_BINARY_FILES;
2741 else if (STREQ (optarg, "without-match"))
2742 binary_files = WITHOUT_MATCH_BINARY_FILES;
2743 else
2744 die (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2745 break;
2746
2747 case COLOR_OPTION:
2748 if (optarg)
2749 {
2750 if (!c_strcasecmp (optarg, "always")
2751 || !c_strcasecmp (optarg, "yes")
2752 || !c_strcasecmp (optarg, "force"))
2753 color_option = 1;
2754 else if (!c_strcasecmp (optarg, "never")
2755 || !c_strcasecmp (optarg, "no")
2756 || !c_strcasecmp (optarg, "none"))
2757 color_option = 0;
2758 else if (!c_strcasecmp (optarg, "auto")
2759 || !c_strcasecmp (optarg, "tty")
2760 || !c_strcasecmp (optarg, "if-tty"))
2761 color_option = 2;
2762 else
2763 show_help = 1;
2764 }
2765 else
2766 color_option = 2;
2767 break;
2768
2769 case EXCLUDE_OPTION:
2770 case INCLUDE_OPTION:
2771 for (int cmd = 0; cmd < 2; cmd++)
2772 {
2773 if (!excluded_patterns[cmd])
2774 excluded_patterns[cmd] = new_exclude ();
2775 add_exclude (excluded_patterns[cmd], optarg,
2776 ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
2777 | exclude_options (cmd)));
2778 }
2779 break;
2780 case EXCLUDE_FROM_OPTION:
2781 for (int cmd = 0; cmd < 2; cmd++)
2782 {
2783 if (!excluded_patterns[cmd])
2784 excluded_patterns[cmd] = new_exclude ();
2785 if (add_exclude_file (add_exclude, excluded_patterns[cmd],
2786 optarg, exclude_options (cmd), '\n')
2787 != 0)
2788 die (EXIT_TROUBLE, errno, "%s", optarg);
2789 }
2790 break;
2791
2792 case EXCLUDE_DIRECTORY_OPTION:
2793 strip_trailing_slashes (optarg);
2794 for (int cmd = 0; cmd < 2; cmd++)
2795 {
2796 if (!excluded_directory_patterns[cmd])
2797 excluded_directory_patterns[cmd] = new_exclude ();
2798 add_exclude (excluded_directory_patterns[cmd], optarg,
2799 exclude_options (cmd));
2800 }
2801 break;
2802
2803 case GROUP_SEPARATOR_OPTION:
2804 group_separator = optarg;
2805 break;
2806
2807 case LINE_BUFFERED_OPTION:
2808 line_buffered = true;
2809 break;
2810
2811 case LABEL_OPTION:
2812 label = optarg;
2813 break;
2814
2815 case 0:
2816 /* long options */
2817 break;
2818
2819 default:
2820 usage (EXIT_TROUBLE);
2821 break;
2822
2823 }
2824
2825 if (show_version)
2826 {
2827 version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
2828 (char *) NULL);
2829 puts (_("Written by Mike Haertel and others; see\n"
2830 "<https://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>."));
2831 return EXIT_SUCCESS;
2832 }
2833
2834 if (show_help)
2835 usage (EXIT_SUCCESS);
2836
2837 if (keys)
2838 {
2839 if (keycc == 0)
2840 {
2841 /* No keys were specified (e.g. -f /dev/null). Match nothing. */
2842 out_invert ^= true;
2843 match_lines = match_words = false;
2844 keys[keycc++] = '\n';
2845 }
2846 }
2847 else if (optind < argc)
2848 {
2849 /* Make a copy so that it can be reallocated or freed later. */
2850 pattern_array = keys = xstrdup (argv[optind++]);
2851 ptrdiff_t patlen = strlen (keys);
2852 keys[patlen] = '\n';
2853 keycc = update_patterns (keys, 0, patlen + 1, "");
2854 }
2855 else
2856 usage (EXIT_TROUBLE);
2857
2858 /* Strip trailing newline from keys. */
2859 keycc--;
2860
2861 hash_free (pattern_table);
2862
2863 bool possibly_tty = false;
2864 struct stat tmp_stat;
2865 if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
2866 {
2867 if (S_ISREG (tmp_stat.st_mode))
2868 out_stat = tmp_stat;
2869 else if (S_ISCHR (tmp_stat.st_mode))
2870 {
2871 struct stat null_stat;
2872 if (stat ("/dev/null", &null_stat) == 0
2873 && SAME_INODE (tmp_stat, null_stat))
2874 dev_null_output = true;
2875 else
2876 possibly_tty = true;
2877 }
2878 }
2879
2880 /* POSIX says -c, -l and -q are mutually exclusive. In this
2881 implementation, -q overrides -l and -L, which in turn override -c. */
2882 if (exit_on_match | dev_null_output)
2883 list_files = LISTFILES_NONE;
2884 if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
2885 {
2886 count_matches = false;
2887 done_on_match = true;
2888 }
2889 out_quiet = count_matches | done_on_match;
2890
2891 if (out_after < 0)
2892 out_after = default_context;
2893 if (out_before < 0)
2894 out_before = default_context;
2895
2896 /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
2897 /dev/null'), fail without reading the input. */
2898 if ((max_count == 0
2899 || (keycc == 0 && out_invert && !match_lines && !match_words))
2900 && list_files != LISTFILES_NONMATCHING)
2901 return EXIT_FAILURE;
2902
2903 if (color_option == 2)
2904 color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
2905 init_colorize ();
2906
2907 if (color_option)
2908 {
2909 /* Legacy. */
2910 char *userval = getenv ("GREP_COLOR");
2911 if (userval != NULL && *userval != '\0')
2912 selected_match_color = context_match_color = userval;
2913
2914 /* New GREP_COLORS has priority. */
2915 parse_grep_colors ();
2916 }
2917
2918 initialize_unibyte_mask ();
2919
2920 if (matcher < 0)
2921 matcher = G_MATCHER_INDEX;
2922
2923 if (matcher == F_MATCHER_INDEX
2924 || matcher == E_MATCHER_INDEX || matcher == G_MATCHER_INDEX)
2925 {
2926 if (match_icase)
2927 setup_ok_fold ();
2928
2929 /* In a single-byte locale, switch from -F to -G if it is a single
2930 pattern that matches words, where -G is typically faster. In a
2931 multibyte locale, switch if the patterns have an encoding error
2932 (where -F does not work) or if -i and the patterns will not work
2933 for -iF. */
2934 if (matcher == F_MATCHER_INDEX)
2935 {
2936 if (! localeinfo.multibyte
2937 ? n_patterns == 1 && match_words
2938 : (contains_encoding_error (keys, keycc)
2939 || (match_icase && !fgrep_icase_available (keys, keycc))))
2940 {
2941 fgrep_to_grep_pattern (&pattern_array, &keycc);
2942 keys = pattern_array;
2943 matcher = G_MATCHER_INDEX;
2944 }
2945 }
2946 /* With two or more patterns, if -F works then switch from either -E
2947 or -G, as -F is probably faster then. */
2948 else if (1 < n_patterns)
2949 matcher = try_fgrep_pattern (matcher, keys, &keycc);
2950 }
2951
2952 execute = matchers[matcher].execute;
2953 compiled_pattern =
2954 matchers[matcher].compile (keys, keycc, matchers[matcher].syntax,
2955 only_matching | color_option);
2956 /* We need one byte prior and one after. */
2957 char eolbytes[3] = { 0, eolbyte, 0 };
2958 size_t match_size;
2959 skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
2960 &match_size, NULL) == 0)
2961 == out_invert);
2962
2963 int num_operands = argc - optind;
2964 out_file = (filename_option == 0 && num_operands <= 1
2965 ? - (directories == RECURSE_DIRECTORIES)
2966 : 0 <= filename_option);
2967
2968 if (binary)
2969 xset_binary_mode (STDOUT_FILENO, O_BINARY);
2970
2971 /* Prefer sysconf for page size, as getpagesize typically returns int. */
2972 #ifdef _SC_PAGESIZE
2973 long psize = sysconf (_SC_PAGESIZE);
2974 #else
2975 long psize = getpagesize ();
2976 #endif
2977 if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
2978 abort ();
2979 pagesize = psize;
2980 bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
2981 buffer = xmalloc (bufalloc);
2982
2983 if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2984 devices = READ_DEVICES;
2985
2986 char *const *files;
2987 if (0 < num_operands)
2988 {
2989 files = argv + optind;
2990 }
2991 else if (directories == RECURSE_DIRECTORIES && 0 < last_recursive)
2992 {
2993 static char *const cwd_only[] = { (char *) ".", NULL };
2994 files = cwd_only;
2995 omit_dot_slash = true;
2996 }
2997 else
2998 {
2999 static char *const stdin_only[] = { (char *) "-", NULL };
3000 files = stdin_only;
3001 }
3002
3003 bool status = true;
3004 do
3005 status &= grep_command_line_arg (*files++);
3006 while (*files != NULL);
3007
3008 /* We register via atexit to test stdout. */
3009 return errseen ? EXIT_TROUBLE : status;
3010 }
3011