1 /* grep.c - main driver file for grep.
2 Copyright (C) 1992, 1997-2002, 2004-2020 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19 /* Written July 1992 by Mike Haertel. */
20
21 #include <config.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <wchar.h>
25 #include <inttypes.h>
26 #include <stdarg.h>
27 #include <stdio.h>
28 #include "system.h"
29
30 #include "argmatch.h"
31 #include "c-ctype.h"
32 #include "c-stack.h"
33 #include "closeout.h"
34 #include "colorize.h"
35 #include "die.h"
36 #include "error.h"
37 #include "exclude.h"
38 #include "exitfail.h"
39 #include "fcntl-safer.h"
40 #include "fts_.h"
41 #include "getopt.h"
42 #include "getprogname.h"
43 #include "grep.h"
44 #include "intprops.h"
45 #include "propername.h"
46 #include "quote.h"
47 #include "safe-read.h"
48 #include "search.h"
49 #include "c-strcase.h"
50 #include "version-etc.h"
51 #include "xalloc.h"
52 #include "xbinary-io.h"
53 #include "xstrtol.h"
54
55 enum { SEP_CHAR_SELECTED = ':' };
56 enum { SEP_CHAR_REJECTED = '-' };
57 static char const SEP_STR_GROUP[] = "--";
58
59 /* When stdout is connected to a regular file, save its stat
60 information here, so that we can automatically skip it, thus
61 avoiding a potential (racy) infinite loop. */
62 static struct stat out_stat;
63
64 /* if non-zero, display usage information and exit */
65 static int show_help;
66
67 /* Print the version on standard output and exit. */
68 static bool show_version;
69
70 /* Suppress diagnostics for nonexistent or unreadable files. */
71 static bool suppress_errors;
72
73 /* If nonzero, use color markers. */
74 static int color_option;
75
76 /* Show only the part of a line matching the expression. */
77 static bool only_matching;
78
79 /* If nonzero, make sure first content char in a line is on a tab stop. */
80 static bool align_tabs;
81
82 /* Print width of line numbers and byte offsets. Nonzero if ALIGN_TABS. */
83 static int offset_width;
84
85 /* See below */
86 struct FL_pair
87 {
88 char const *filename;
89 size_t lineno;
90 };
91
92 /* A list of lineno,filename pairs corresponding to -f FILENAME
93 arguments. Since we store the concatenation of all patterns in
94 a single array, KEYS, be they from the command line via "-e PAT"
95 or read from one or more -f-specified FILENAMES. Given this
96 invocation, grep -f <(seq 5) -f <(seq 2) -f <(seq 3) FILE, there
97 will be three entries in LF_PAIR: {1, x} {6, y} {8, z}, where
98 x, y and z are just place-holders for shell-generated names. */
99 static struct FL_pair *fl_pair;
100 static size_t n_fl_pair_slots;
101 /* Count not only -f-specified files, but also individual -e operands
102 and any command-line argument that serves as a regular expression. */
103 static size_t n_pattern_files;
104
105 /* The number of patterns seen so far.
106 It is advanced by fl_add and, when needed, used in pattern_file_name
107 to derive a file-relative line number. */
108 static size_t n_patterns;
109
110 /* Return the number of newline bytes in BUF with size SIZE. */
111 static size_t _GL_ATTRIBUTE_PURE
count_nl_bytes(char const * buf,size_t size)112 count_nl_bytes (char const *buf, size_t size)
113 {
114 char const *p = buf;
115 char const *end_p = buf + size;
116 size_t n = 0;
117 while ((p = memchr (p, '\n', end_p - p)))
118 p++, n++;
119 return n;
120 }
121
122 /* Append a FILENAME,line-number pair to FL_PAIR, and update
123 pattern-related counts from the contents of BUF with SIZE bytes. */
124 static void
fl_add(char const * buf,size_t size,char const * filename)125 fl_add (char const *buf, size_t size, char const *filename)
126 {
127 if (n_fl_pair_slots <= n_pattern_files)
128 fl_pair = x2nrealloc (fl_pair, &n_fl_pair_slots, sizeof *fl_pair);
129
130 fl_pair[n_pattern_files].lineno = n_patterns + 1;
131 fl_pair[n_pattern_files].filename = filename;
132 n_pattern_files++;
133 n_patterns += count_nl_bytes (buf, size);
134 }
135
136 /* Map the line number, LINENO, of one of the input patterns to the
137 name of the file from which it came. If it was read from stdin
138 or if it was specified on the command line, return "-". */
139 char const * _GL_ATTRIBUTE_PURE
pattern_file_name(size_t lineno,size_t * new_lineno)140 pattern_file_name (size_t lineno, size_t *new_lineno)
141 {
142 size_t i;
143 for (i = 1; i < n_pattern_files; i++)
144 {
145 if (lineno < fl_pair[i].lineno)
146 break;
147 }
148
149 *new_lineno = lineno - fl_pair[i - 1].lineno + 1;
150 return fl_pair[i - 1].filename;
151 }
152
153 #if HAVE_ASAN
154 /* Record the starting address and length of the sole poisoned region,
155 so that we can unpoison it later, just before each following read. */
156 static void const *poison_buf;
157 static size_t poison_len;
158
159 static void
clear_asan_poison(void)160 clear_asan_poison (void)
161 {
162 if (poison_buf)
163 __asan_unpoison_memory_region (poison_buf, poison_len);
164 }
165
166 static void
asan_poison(void const * addr,size_t size)167 asan_poison (void const *addr, size_t size)
168 {
169 poison_buf = addr;
170 poison_len = size;
171
172 __asan_poison_memory_region (poison_buf, poison_len);
173 }
174 #else
clear_asan_poison(void)175 static void clear_asan_poison (void) { }
asan_poison(void const volatile * addr,size_t size)176 static void asan_poison (void const volatile *addr, size_t size) { }
177 #endif
178
179 /* The group separator used when context is requested. */
180 static const char *group_separator = SEP_STR_GROUP;
181
182 /* The context and logic for choosing default --color screen attributes
183 (foreground and background colors, etc.) are the following.
184 -- There are eight basic colors available, each with its own
185 nominal luminosity to the human eye and foreground/background
186 codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41],
187 magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46],
188 yellow [89 %, 33/43], and white [100 %, 37/47]).
189 -- Sometimes, white as a background is actually implemented using
190 a shade of light gray, so that a foreground white can be visible
191 on top of it (but most often not).
192 -- Sometimes, black as a foreground is actually implemented using
193 a shade of dark gray, so that it can be visible on top of a
194 background black (but most often not).
195 -- Sometimes, more colors are available, as extensions.
196 -- Other attributes can be selected/deselected (bold [1/22],
197 underline [4/24], standout/inverse [7/27], blink [5/25], and
198 invisible/hidden [8/28]). They are sometimes implemented by
199 using colors instead of what their names imply; e.g., bold is
200 often achieved by using brighter colors. In practice, only bold
201 is really available to us, underline sometimes being mapped by
202 the terminal to some strange color choice, and standout best
203 being left for use by downstream programs such as less(1).
204 -- We cannot assume that any of the extensions or special features
205 are available for the purpose of choosing defaults for everyone.
206 -- The most prevalent default terminal backgrounds are pure black
207 and pure white, and are not necessarily the same shades of
208 those as if they were selected explicitly with SGR sequences.
209 Some terminals use dark or light pictures as default background,
210 but those are covered over by an explicit selection of background
211 color with an SGR sequence; their users will appreciate their
212 background pictures not be covered like this, if possible.
213 -- Some uses of colors attributes is to make some output items
214 more understated (e.g., context lines); this cannot be achieved
215 by changing the background color.
216 -- For these reasons, the grep color defaults should strive not
217 to change the background color from its default, unless it's
218 for a short item that should be highlighted, not understated.
219 -- The grep foreground color defaults (without an explicitly set
220 background) should provide enough contrast to be readable on any
221 terminal with either a black (dark) or white (light) background.
222 This only leaves red, magenta, green, and cyan (and their bold
223 counterparts) and possibly bold blue. */
224 /* The color strings used for matched text.
225 The user can overwrite them using the deprecated
226 environment variable GREP_COLOR or the new GREP_COLORS. */
227 static const char *selected_match_color = "01;31"; /* bold red */
228 static const char *context_match_color = "01;31"; /* bold red */
229
230 /* Other colors. Defaults look damn good. */
231 static const char *filename_color = "35"; /* magenta */
232 static const char *line_num_color = "32"; /* green */
233 static const char *byte_num_color = "32"; /* green */
234 static const char *sep_color = "36"; /* cyan */
235 static const char *selected_line_color = ""; /* default color pair */
236 static const char *context_line_color = ""; /* default color pair */
237
238 /* Select Graphic Rendition (SGR, "\33[...m") strings. */
239 /* Also Erase in Line (EL) to Right ("\33[K") by default. */
240 /* Why have EL to Right after SGR?
241 -- The behavior of line-wrapping when at the bottom of the
242 terminal screen and at the end of the current line is often
243 such that a new line is introduced, entirely cleared with
244 the current background color which may be different from the
245 default one (see the boolean back_color_erase terminfo(5)
246 capability), thus scrolling the display by one line.
247 The end of this new line will stay in this background color
248 even after reverting to the default background color with
249 "\33[m', unless it is explicitly cleared again with "\33[K"
250 (which is the behavior the user would instinctively expect
251 from the whole thing). There may be some unavoidable
252 background-color flicker at the end of this new line because
253 of this (when timing with the monitor's redraw is just right).
254 -- The behavior of HT (tab, "\t") is usually the same as that of
255 Cursor Forward Tabulation (CHT) with a default parameter
256 of 1 ("\33[I"), i.e., it performs pure movement to the next
257 tab stop, without any clearing of either content or screen
258 attributes (including background color); try
259 printf 'asdfqwerzxcv\rASDF\tZXCV\n'
260 in a bash(1) shell to demonstrate this. This is not what the
261 user would instinctively expect of HT (but is ok for CHT).
262 The instinctive behavior would include clearing the terminal
263 cells that are skipped over by HT with blank cells in the
264 current screen attributes, including background color;
265 the boolean dest_tabs_magic_smso terminfo(5) capability
266 indicates this saner behavior for HT, but only some rare
267 terminals have it (although it also indicates a special
268 glitch with standout mode in the Teleray terminal for which
269 it was initially introduced). The remedy is to add "\33K"
270 after each SGR sequence, be it START (to fix the behavior
271 of any HT after that before another SGR) or END (to fix the
272 behavior of an HT in default background color that would
273 follow a line-wrapping at the bottom of the screen in another
274 background color, and to complement doing it after START).
275 Piping grep's output through a pager such as less(1) avoids
276 any HT problems since the pager performs tab expansion.
277
278 Generic disadvantages of this remedy are:
279 -- Some very rare terminals might support SGR but not EL (nobody
280 will use "grep --color" on a terminal that does not support
281 SGR in the first place).
282 -- Having these extra control sequences might somewhat complicate
283 the task of any program trying to parse "grep --color"
284 output in order to extract structuring information from it.
285 A specific disadvantage to doing it after SGR START is:
286 -- Even more possible background color flicker (when timing
287 with the monitor's redraw is just right), even when not at the
288 bottom of the screen.
289 There are no additional disadvantages specific to doing it after
290 SGR END.
291
292 It would be impractical for GNU grep to become a full-fledged
293 terminal program linked against ncurses or the like, so it will
294 not detect terminfo(5) capabilities. */
295 static const char *sgr_start = "\33[%sm\33[K";
296 static const char *sgr_end = "\33[m\33[K";
297
298 /* SGR utility functions. */
299 static void
pr_sgr_start(char const * s)300 pr_sgr_start (char const *s)
301 {
302 if (*s)
303 print_start_colorize (sgr_start, s);
304 }
305 static void
pr_sgr_end(char const * s)306 pr_sgr_end (char const *s)
307 {
308 if (*s)
309 print_end_colorize (sgr_end);
310 }
311 static void
pr_sgr_start_if(char const * s)312 pr_sgr_start_if (char const *s)
313 {
314 if (color_option)
315 pr_sgr_start (s);
316 }
317 static void
pr_sgr_end_if(char const * s)318 pr_sgr_end_if (char const *s)
319 {
320 if (color_option)
321 pr_sgr_end (s);
322 }
323
324 struct color_cap
325 {
326 const char *name;
327 const char **var;
328 void (*fct) (void);
329 };
330
331 static void
color_cap_mt_fct(void)332 color_cap_mt_fct (void)
333 {
334 /* Our caller just set selected_match_color. */
335 context_match_color = selected_match_color;
336 }
337
338 static void
color_cap_rv_fct(void)339 color_cap_rv_fct (void)
340 {
341 /* By this point, it was 1 (or already -1). */
342 color_option = -1; /* That's still != 0. */
343 }
344
345 static void
color_cap_ne_fct(void)346 color_cap_ne_fct (void)
347 {
348 sgr_start = "\33[%sm";
349 sgr_end = "\33[m";
350 }
351
352 /* For GREP_COLORS. */
353 static const struct color_cap color_dict[] =
354 {
355 { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */
356 { "ms", &selected_match_color, NULL }, /* selected matched text */
357 { "mc", &context_match_color, NULL }, /* context matched text */
358 { "fn", &filename_color, NULL }, /* filename */
359 { "ln", &line_num_color, NULL }, /* line number */
360 { "bn", &byte_num_color, NULL }, /* byte (sic) offset */
361 { "se", &sep_color, NULL }, /* separator */
362 { "sl", &selected_line_color, NULL }, /* selected lines */
363 { "cx", &context_line_color, NULL }, /* context lines */
364 { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */
365 { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */
366 { NULL, NULL, NULL }
367 };
368
369 /* Saved errno value from failed output functions on stdout. */
370 static int stdout_errno;
371
372 static void
putchar_errno(int c)373 putchar_errno (int c)
374 {
375 if (putchar (c) < 0)
376 stdout_errno = errno;
377 }
378
379 static void
fputs_errno(char const * s)380 fputs_errno (char const *s)
381 {
382 if (fputs (s, stdout) < 0)
383 stdout_errno = errno;
384 }
385
386 static void _GL_ATTRIBUTE_FORMAT_PRINTF (1, 2)
printf_errno(char const * format,...)387 printf_errno (char const *format, ...)
388 {
389 va_list ap;
390 va_start (ap, format);
391 if (vfprintf (stdout, format, ap) < 0)
392 stdout_errno = errno;
393 va_end (ap);
394 }
395
396 static void
fwrite_errno(void const * ptr,size_t size,size_t nmemb)397 fwrite_errno (void const *ptr, size_t size, size_t nmemb)
398 {
399 if (fwrite (ptr, size, nmemb, stdout) != nmemb)
400 stdout_errno = errno;
401 }
402
403 static void
fflush_errno(void)404 fflush_errno (void)
405 {
406 if (fflush (stdout) != 0)
407 stdout_errno = errno;
408 }
409
410 static struct exclude *excluded_patterns[2];
411 static struct exclude *excluded_directory_patterns[2];
412 /* Short options. */
413 static char const short_options[] =
414 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz";
415
416 /* Non-boolean long options that have no corresponding short equivalents. */
417 enum
418 {
419 BINARY_FILES_OPTION = CHAR_MAX + 1,
420 COLOR_OPTION,
421 EXCLUDE_DIRECTORY_OPTION,
422 EXCLUDE_OPTION,
423 EXCLUDE_FROM_OPTION,
424 GROUP_SEPARATOR_OPTION,
425 INCLUDE_OPTION,
426 LINE_BUFFERED_OPTION,
427 LABEL_OPTION,
428 NO_IGNORE_CASE_OPTION
429 };
430
431 /* Long options equivalences. */
432 static struct option const long_options[] =
433 {
434 {"basic-regexp", no_argument, NULL, 'G'},
435 {"extended-regexp", no_argument, NULL, 'E'},
436 {"fixed-regexp", no_argument, NULL, 'F'},
437 {"fixed-strings", no_argument, NULL, 'F'},
438 {"perl-regexp", no_argument, NULL, 'P'},
439 {"after-context", required_argument, NULL, 'A'},
440 {"before-context", required_argument, NULL, 'B'},
441 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION},
442 {"byte-offset", no_argument, NULL, 'b'},
443 {"context", required_argument, NULL, 'C'},
444 {"color", optional_argument, NULL, COLOR_OPTION},
445 {"colour", optional_argument, NULL, COLOR_OPTION},
446 {"count", no_argument, NULL, 'c'},
447 {"devices", required_argument, NULL, 'D'},
448 {"directories", required_argument, NULL, 'd'},
449 {"exclude", required_argument, NULL, EXCLUDE_OPTION},
450 {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION},
451 {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION},
452 {"file", required_argument, NULL, 'f'},
453 {"files-with-matches", no_argument, NULL, 'l'},
454 {"files-without-match", no_argument, NULL, 'L'},
455 {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION},
456 {"help", no_argument, &show_help, 1},
457 {"include", required_argument, NULL, INCLUDE_OPTION},
458 {"ignore-case", no_argument, NULL, 'i'},
459 {"no-ignore-case", no_argument, NULL, NO_IGNORE_CASE_OPTION},
460 {"initial-tab", no_argument, NULL, 'T'},
461 {"label", required_argument, NULL, LABEL_OPTION},
462 {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION},
463 {"line-number", no_argument, NULL, 'n'},
464 {"line-regexp", no_argument, NULL, 'x'},
465 {"max-count", required_argument, NULL, 'm'},
466
467 {"no-filename", no_argument, NULL, 'h'},
468 {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION},
469 {"no-messages", no_argument, NULL, 's'},
470 {"null", no_argument, NULL, 'Z'},
471 {"null-data", no_argument, NULL, 'z'},
472 {"only-matching", no_argument, NULL, 'o'},
473 {"quiet", no_argument, NULL, 'q'},
474 {"recursive", no_argument, NULL, 'r'},
475 {"dereference-recursive", no_argument, NULL, 'R'},
476 {"regexp", required_argument, NULL, 'e'},
477 {"invert-match", no_argument, NULL, 'v'},
478 {"silent", no_argument, NULL, 'q'},
479 {"text", no_argument, NULL, 'a'},
480 {"binary", no_argument, NULL, 'U'},
481 {"unix-byte-offsets", no_argument, NULL, 'u'},
482 {"version", no_argument, NULL, 'V'},
483 {"with-filename", no_argument, NULL, 'H'},
484 {"word-regexp", no_argument, NULL, 'w'},
485 {0, 0, 0, 0}
486 };
487
488 /* Define flags declared in grep.h. */
489 bool match_icase;
490 bool match_words;
491 bool match_lines;
492 char eolbyte;
493
494 /* For error messages. */
495 /* The input file name, or (if standard input) null or a --label argument. */
496 static char const *filename;
497 /* Omit leading "./" from file names in diagnostics. */
498 static bool omit_dot_slash;
499 static bool errseen;
500
501 /* True if output from the current input file has been suppressed
502 because an output line had an encoding error. */
503 static bool encoding_error_output;
504
505 enum directories_type
506 {
507 READ_DIRECTORIES = 2,
508 RECURSE_DIRECTORIES,
509 SKIP_DIRECTORIES
510 };
511
512 /* How to handle directories. */
513 static char const *const directories_args[] =
514 {
515 "read", "recurse", "skip", NULL
516 };
517 static enum directories_type const directories_types[] =
518 {
519 READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES
520 };
521 ARGMATCH_VERIFY (directories_args, directories_types);
522
523 static enum directories_type directories = READ_DIRECTORIES;
524
525 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK };
526 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL;
527
528 /* How to handle devices. */
529 static enum
530 {
531 READ_COMMAND_LINE_DEVICES,
532 READ_DEVICES,
533 SKIP_DEVICES
534 } devices = READ_COMMAND_LINE_DEVICES;
535
536 static bool grepfile (int, char const *, bool, bool);
537 static bool grepdesc (int, bool);
538
539 static bool
is_device_mode(mode_t m)540 is_device_mode (mode_t m)
541 {
542 return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m);
543 }
544
545 static bool
skip_devices(bool command_line)546 skip_devices (bool command_line)
547 {
548 return (devices == SKIP_DEVICES
549 || ((devices == READ_COMMAND_LINE_DEVICES) & !command_line));
550 }
551
552 /* Return if ST->st_size is defined. Assume the file is not a
553 symbolic link. */
554 static bool
usable_st_size(struct stat const * st)555 usable_st_size (struct stat const *st)
556 {
557 return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st);
558 }
559
560 /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them.
561 Do not rely on these finding data or holes if they equal SEEK_SET. */
562 #ifndef SEEK_DATA
563 enum { SEEK_DATA = SEEK_SET };
564 #endif
565 #ifndef SEEK_HOLE
566 enum { SEEK_HOLE = SEEK_SET };
567 #endif
568
569 /* True if lseek with SEEK_CUR or SEEK_DATA failed on the current input. */
570 static bool seek_failed;
571 static bool seek_data_failed;
572
573 /* Functions we'll use to search. */
574 typedef void *(*compile_fp_t) (char *, size_t, reg_syntax_t);
575 typedef size_t (*execute_fp_t) (void *, char const *, size_t, size_t *,
576 char const *);
577 static execute_fp_t execute;
578 static void *compiled_pattern;
579
580 static char const *
input_filename(void)581 input_filename (void)
582 {
583 if (!filename)
584 filename = _("(standard input)");
585 return filename;
586 }
587
588 /* Unless requested, diagnose an error about the input file. */
589 static void
suppressible_error(int errnum)590 suppressible_error (int errnum)
591 {
592 if (! suppress_errors)
593 error (0, errnum, "%s", input_filename ());
594 errseen = true;
595 }
596
597 /* If there has already been a write error, don't bother closing
598 standard output, as that might elicit a duplicate diagnostic. */
599 static void
clean_up_stdout(void)600 clean_up_stdout (void)
601 {
602 if (! stdout_errno)
603 close_stdout ();
604 }
605
606 /* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL
607 is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer
608 the alignment and would otherwise complain about the cast. */
609 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
610 # define CAST_ALIGNED(type, val) \
611 ({ __typeof__ (val) val_ = val; \
612 _Pragma ("GCC diagnostic push") \
613 _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \
614 (type) val_; \
615 _Pragma ("GCC diagnostic pop") \
616 })
617 #else
618 # define CAST_ALIGNED(type, val) ((type) (val))
619 #endif
620
621 /* An unsigned type suitable for fast matching. */
622 typedef uintmax_t uword;
623
624 struct localeinfo localeinfo;
625
626 /* A mask to test for unibyte characters, with the pattern repeated to
627 fill a uword. For a multibyte character encoding where
628 all bytes are unibyte characters, this is 0. For UTF-8, this is
629 0x808080.... For encodings where unibyte characters have no discerned
630 pattern, this is all 1s. The unsigned char C is a unibyte
631 character if C & UNIBYTE_MASK is zero. If the uword W is the
632 concatenation of bytes, the bytes are all unibyte characters
633 if W & UNIBYTE_MASK is zero. */
634 static uword unibyte_mask;
635
636 static void
initialize_unibyte_mask(void)637 initialize_unibyte_mask (void)
638 {
639 /* For each encoding error I that MASK does not already match,
640 accumulate I's most significant 1 bit by ORing it into MASK.
641 Although any 1 bit of I could be used, in practice high-order
642 bits work better. */
643 unsigned char mask = 0;
644 int ms1b = 1;
645 for (int i = 1; i <= UCHAR_MAX; i++)
646 if ((localeinfo.sbclen[i] != 1) & ! (mask & i))
647 {
648 while (ms1b * 2 <= i)
649 ms1b *= 2;
650 mask |= ms1b;
651 }
652
653 /* Now MASK will detect any encoding-error byte, although it may
654 cry wolf and it may not be optimal. Build a uword-length mask by
655 repeating MASK. */
656 uword uword_max = -1;
657 unibyte_mask = uword_max / UCHAR_MAX * mask;
658 }
659
660 /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel
661 that is not easy, and return a pointer to the first non-easy byte.
662 The easy bytes all have UNIBYTE_MASK off. */
663 static char const * _GL_ATTRIBUTE_PURE
skip_easy_bytes(char const * buf)664 skip_easy_bytes (char const *buf)
665 {
666 /* Search a byte at a time until the pointer is aligned, then a
667 uword at a time until a match is found, then a byte at a time to
668 identify the exact byte. The uword search may go slightly past
669 the buffer end, but that's benign. */
670 char const *p;
671 uword const *s;
672 for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++)
673 if (to_uchar (*p) & unibyte_mask)
674 return p;
675 for (s = CAST_ALIGNED (uword const *, p); ! (*s & unibyte_mask); s++)
676 continue;
677 for (p = (char const *) s; ! (to_uchar (*p) & unibyte_mask); p++)
678 continue;
679 return p;
680 }
681
682 /* Return true if BUF, of size SIZE, has an encoding error.
683 BUF must be followed by at least sizeof (uword) bytes,
684 the first of which may be modified. */
685 static bool
buf_has_encoding_errors(char * buf,size_t size)686 buf_has_encoding_errors (char *buf, size_t size)
687 {
688 if (! unibyte_mask)
689 return false;
690
691 mbstate_t mbs = { 0 };
692 size_t clen;
693
694 buf[size] = -1;
695 for (char const *p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen)
696 {
697 clen = mbrlen (p, buf + size - p, &mbs);
698 if ((size_t) -2 <= clen)
699 return true;
700 }
701
702 return false;
703 }
704
705
706 /* Return true if BUF, of size SIZE, has a null byte.
707 BUF must be followed by at least one byte,
708 which may be arbitrarily written to or read from. */
709 static bool
buf_has_nulls(char * buf,size_t size)710 buf_has_nulls (char *buf, size_t size)
711 {
712 buf[size] = 0;
713 return strlen (buf) != size;
714 }
715
716 /* Return true if a file is known to contain null bytes.
717 SIZE bytes have already been read from the file
718 with descriptor FD and status ST. */
719 static bool
file_must_have_nulls(size_t size,int fd,struct stat const * st)720 file_must_have_nulls (size_t size, int fd, struct stat const *st)
721 {
722 /* If the file has holes, it must contain a null byte somewhere. */
723 if (SEEK_HOLE != SEEK_SET && !seek_failed
724 && usable_st_size (st) && size < st->st_size)
725 {
726 off_t cur = size;
727 if (O_BINARY || fd == STDIN_FILENO)
728 {
729 cur = lseek (fd, 0, SEEK_CUR);
730 if (cur < 0)
731 return false;
732 }
733
734 /* Look for a hole after the current location. */
735 off_t hole_start = lseek (fd, cur, SEEK_HOLE);
736 if (0 <= hole_start)
737 {
738 if (lseek (fd, cur, SEEK_SET) < 0)
739 suppressible_error (errno);
740 if (hole_start < st->st_size)
741 return true;
742 }
743 }
744
745 return false;
746 }
747
748 /* Convert STR to a nonnegative integer, storing the result in *OUT.
749 STR must be a valid context length argument; report an error if it
750 isn't. Silently ceiling *OUT at the maximum value, as that is
751 practically equivalent to infinity for grep's purposes. */
752 static void
context_length_arg(char const * str,intmax_t * out)753 context_length_arg (char const *str, intmax_t *out)
754 {
755 switch (xstrtoimax (str, 0, 10, out, ""))
756 {
757 case LONGINT_OK:
758 case LONGINT_OVERFLOW:
759 if (0 <= *out)
760 break;
761 FALLTHROUGH;
762 default:
763 die (EXIT_TROUBLE, 0, "%s: %s", str,
764 _("invalid context length argument"));
765 }
766 }
767
768 /* Return the add_exclude options suitable for excluding a file name.
769 If COMMAND_LINE, it is a command-line file name. */
770 static int
exclude_options(bool command_line)771 exclude_options (bool command_line)
772 {
773 return EXCLUDE_WILDCARDS | (command_line ? 0 : EXCLUDE_ANCHORED);
774 }
775
776 /* Return true if the file with NAME should be skipped.
777 If COMMAND_LINE, it is a command-line argument.
778 If IS_DIR, it is a directory. */
779 static bool
skipped_file(char const * name,bool command_line,bool is_dir)780 skipped_file (char const *name, bool command_line, bool is_dir)
781 {
782 struct exclude **pats;
783 if (! is_dir)
784 pats = excluded_patterns;
785 else if (directories == SKIP_DIRECTORIES)
786 return true;
787 else if (command_line && omit_dot_slash)
788 return false;
789 else
790 pats = excluded_directory_patterns;
791 return pats[command_line] && excluded_file_name (pats[command_line], name);
792 }
793
794 /* Hairy buffering mechanism for grep. The intent is to keep
795 all reads aligned on a page boundary and multiples of the
796 page size, unless a read yields a partial page. */
797
798 static char *buffer; /* Base of buffer. */
799 static size_t bufalloc; /* Allocated buffer size, counting slop. */
800 static int bufdesc; /* File descriptor. */
801 static char *bufbeg; /* Beginning of user-visible stuff. */
802 static char *buflim; /* Limit of user-visible stuff. */
803 static size_t pagesize; /* alignment of memory pages */
804 static off_t bufoffset; /* Read offset. */
805 static off_t after_last_match; /* Pointer after last matching line that
806 would have been output if we were
807 outputting characters. */
808 static bool skip_nuls; /* Skip '\0' in data. */
809 static bool skip_empty_lines; /* Skip empty lines in data. */
810 static uintmax_t totalnl; /* Total newline count before lastnl. */
811
812 /* Initial buffer size, not counting slop. */
813 enum { INITIAL_BUFSIZE = 96 * 1024 };
814
815 /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be
816 an integer or a pointer. Both args must be free of side effects. */
817 #define ALIGN_TO(val, alignment) \
818 ((size_t) (val) % (alignment) == 0 \
819 ? (val) \
820 : (val) + ((alignment) - (size_t) (val) % (alignment)))
821
822 /* Add two numbers that count input bytes or lines, and report an
823 error if the addition overflows. */
824 static uintmax_t
add_count(uintmax_t a,uintmax_t b)825 add_count (uintmax_t a, uintmax_t b)
826 {
827 uintmax_t sum = a + b;
828 if (sum < a)
829 die (EXIT_TROUBLE, 0, _("input is too large to count"));
830 return sum;
831 }
832
833 /* Return true if BUF (of size SIZE) is all zeros. */
834 static bool
all_zeros(char const * buf,size_t size)835 all_zeros (char const *buf, size_t size)
836 {
837 for (char const *p = buf; p < buf + size; p++)
838 if (*p)
839 return false;
840 return true;
841 }
842
843 /* Reset the buffer for a new file, returning false if we should skip it.
844 Initialize on the first time through. */
845 static bool
reset(int fd,struct stat const * st)846 reset (int fd, struct stat const *st)
847 {
848 bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize);
849 bufbeg[-1] = eolbyte;
850 bufdesc = fd;
851 bufoffset = fd == STDIN_FILENO ? lseek (fd, 0, SEEK_CUR) : 0;
852 seek_failed = bufoffset < 0;
853
854 /* Assume SEEK_DATA fails if SEEK_CUR does. */
855 seek_data_failed = seek_failed;
856
857 if (seek_failed)
858 {
859 if (errno != ESPIPE)
860 {
861 suppressible_error (errno);
862 return false;
863 }
864 bufoffset = 0;
865 }
866 return true;
867 }
868
869 /* Read new stuff into the buffer, saving the specified
870 amount of old stuff. When we're done, 'bufbeg' points
871 to the beginning of the buffer contents, and 'buflim'
872 points just after the end. Return false if there's an error. */
873 static bool
fillbuf(size_t save,struct stat const * st)874 fillbuf (size_t save, struct stat const *st)
875 {
876 size_t fillsize;
877 bool cc = true;
878 char *readbuf;
879 size_t readsize;
880
881 /* Offset from start of buffer to start of old stuff
882 that we want to save. */
883 size_t saved_offset = buflim - save - buffer;
884
885 if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim)
886 {
887 readbuf = buflim;
888 bufbeg = buflim - save;
889 }
890 else
891 {
892 size_t minsize = save + pagesize;
893 size_t newsize;
894 size_t newalloc;
895 char *newbuf;
896
897 /* Grow newsize until it is at least as great as minsize. */
898 for (newsize = bufalloc - pagesize - sizeof (uword);
899 newsize < minsize;
900 newsize *= 2)
901 if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize)
902 xalloc_die ();
903
904 /* Try not to allocate more memory than the file size indicates,
905 as that might cause unnecessary memory exhaustion if the file
906 is large. However, do not use the original file size as a
907 heuristic if we've already read past the file end, as most
908 likely the file is growing. */
909 if (usable_st_size (st))
910 {
911 off_t to_be_read = st->st_size - bufoffset;
912 off_t maxsize_off = save + to_be_read;
913 if (0 <= to_be_read && to_be_read <= maxsize_off
914 && maxsize_off == (size_t) maxsize_off
915 && minsize <= (size_t) maxsize_off
916 && (size_t) maxsize_off < newsize)
917 newsize = maxsize_off;
918 }
919
920 /* Add enough room so that the buffer is aligned and has room
921 for byte sentinels fore and aft, and so that a uword can
922 be read aft. */
923 newalloc = newsize + pagesize + sizeof (uword);
924
925 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer;
926 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize);
927 bufbeg = readbuf - save;
928 memmove (bufbeg, buffer + saved_offset, save);
929 bufbeg[-1] = eolbyte;
930 if (newbuf != buffer)
931 {
932 free (buffer);
933 buffer = newbuf;
934 }
935 }
936
937 clear_asan_poison ();
938
939 readsize = buffer + bufalloc - sizeof (uword) - readbuf;
940 readsize -= readsize % pagesize;
941
942 while (true)
943 {
944 fillsize = safe_read (bufdesc, readbuf, readsize);
945 if (fillsize == SAFE_READ_ERROR)
946 {
947 fillsize = 0;
948 cc = false;
949 }
950 bufoffset += fillsize;
951
952 if (((fillsize == 0) | !skip_nuls) || !all_zeros (readbuf, fillsize))
953 break;
954 totalnl = add_count (totalnl, fillsize);
955
956 if (SEEK_DATA != SEEK_SET && !seek_data_failed)
957 {
958 /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */
959 off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA);
960 if (data_start < 0 && errno == ENXIO
961 && usable_st_size (st) && bufoffset < st->st_size)
962 data_start = lseek (bufdesc, 0, SEEK_END);
963
964 if (data_start < 0)
965 seek_data_failed = true;
966 else
967 {
968 totalnl = add_count (totalnl, data_start - bufoffset);
969 bufoffset = data_start;
970 }
971 }
972 }
973
974 buflim = readbuf + fillsize;
975
976 /* Initialize the following word, because skip_easy_bytes and some
977 matchers read (but do not use) those bytes. This avoids false
978 positive reports of these bytes being used uninitialized. */
979 memset (buflim, 0, sizeof (uword));
980
981 /* Mark the part of the buffer not filled by the read or set by
982 the above memset call as ASAN-poisoned. */
983 asan_poison (buflim + sizeof (uword),
984 bufalloc - (buflim - buffer) - sizeof (uword));
985
986 return cc;
987 }
988
989 /* Flags controlling the style of output. */
990 static enum
991 {
992 BINARY_BINARY_FILES,
993 TEXT_BINARY_FILES,
994 WITHOUT_MATCH_BINARY_FILES
995 } binary_files; /* How to handle binary files. */
996
997 /* Options for output as a list of matching/non-matching files */
998 static enum
999 {
1000 LISTFILES_NONE,
1001 LISTFILES_MATCHING,
1002 LISTFILES_NONMATCHING,
1003 } list_files;
1004
1005 /* Whether to output filenames. 1 means yes, 0 means no, and -1 means
1006 'grep -r PATTERN FILE' was used and it is not known yet whether
1007 FILE is a directory (which means yes) or not (which means no). */
1008 static int out_file;
1009
1010 static int filename_mask; /* If zero, output nulls after filenames. */
1011 static bool out_quiet; /* Suppress all normal output. */
1012 static bool out_invert; /* Print nonmatching stuff. */
1013 static bool out_line; /* Print line numbers. */
1014 static bool out_byte; /* Print byte offsets. */
1015 static intmax_t out_before; /* Lines of leading context. */
1016 static intmax_t out_after; /* Lines of trailing context. */
1017 static bool count_matches; /* Count matching lines. */
1018 static intmax_t max_count; /* Max number of selected
1019 lines from an input file. */
1020 static bool line_buffered; /* Use line buffering. */
1021 static char *label = NULL; /* Fake filename for stdin */
1022
1023
1024 /* Internal variables to keep track of byte count, context, etc. */
1025 static uintmax_t totalcc; /* Total character count before bufbeg. */
1026 static char const *lastnl; /* Pointer after last newline counted. */
1027 static char *lastout; /* Pointer after last character output;
1028 NULL if no character has been output
1029 or if it's conceptually before bufbeg. */
1030 static intmax_t outleft; /* Maximum number of selected lines. */
1031 static intmax_t pending; /* Pending lines of output.
1032 Always kept 0 if out_quiet is true. */
1033 static bool done_on_match; /* Stop scanning file on first match. */
1034 static bool exit_on_match; /* Exit on first match. */
1035 static bool dev_null_output; /* Stdout is known to be /dev/null. */
1036 static bool binary; /* Use binary rather than text I/O. */
1037
1038 static void
nlscan(char const * lim)1039 nlscan (char const *lim)
1040 {
1041 size_t newlines = 0;
1042 char const *beg;
1043 for (beg = lastnl; beg < lim; beg++)
1044 {
1045 beg = memchr (beg, eolbyte, lim - beg);
1046 if (!beg)
1047 break;
1048 newlines++;
1049 }
1050 totalnl = add_count (totalnl, newlines);
1051 lastnl = lim;
1052 }
1053
1054 /* Print the current filename. */
1055 static void
print_filename(void)1056 print_filename (void)
1057 {
1058 pr_sgr_start_if (filename_color);
1059 fputs_errno (input_filename ());
1060 pr_sgr_end_if (filename_color);
1061 }
1062
1063 /* Print a character separator. */
1064 static void
print_sep(char sep)1065 print_sep (char sep)
1066 {
1067 pr_sgr_start_if (sep_color);
1068 putchar_errno (sep);
1069 pr_sgr_end_if (sep_color);
1070 }
1071
1072 /* Print a line number or a byte offset. */
1073 static void
print_offset(uintmax_t pos,const char * color)1074 print_offset (uintmax_t pos, const char *color)
1075 {
1076 pr_sgr_start_if (color);
1077 printf_errno ("%*"PRIuMAX, offset_width, pos);
1078 pr_sgr_end_if (color);
1079 }
1080
1081 /* Print a whole line head (filename, line, byte). The output data
1082 starts at BEG and contains LEN bytes; it is followed by at least
1083 sizeof (uword) bytes, the first of which may be temporarily modified.
1084 The output data comes from what is perhaps a larger input line that
1085 goes until LIM, where LIM[-1] is an end-of-line byte. Use SEP as
1086 the separator on output.
1087
1088 Return true unless the line was suppressed due to an encoding error. */
1089
1090 static bool
print_line_head(char * beg,size_t len,char const * lim,char sep)1091 print_line_head (char *beg, size_t len, char const *lim, char sep)
1092 {
1093 if (binary_files != TEXT_BINARY_FILES)
1094 {
1095 char ch = beg[len];
1096 bool encoding_errors = buf_has_encoding_errors (beg, len);
1097 beg[len] = ch;
1098 if (encoding_errors)
1099 {
1100 encoding_error_output = true;
1101 return false;
1102 }
1103 }
1104
1105 if (out_file)
1106 {
1107 print_filename ();
1108 if (filename_mask)
1109 print_sep (sep);
1110 else
1111 putchar_errno (0);
1112 }
1113
1114 if (out_line)
1115 {
1116 if (lastnl < lim)
1117 {
1118 nlscan (beg);
1119 totalnl = add_count (totalnl, 1);
1120 lastnl = lim;
1121 }
1122 print_offset (totalnl, line_num_color);
1123 print_sep (sep);
1124 }
1125
1126 if (out_byte)
1127 {
1128 uintmax_t pos = add_count (totalcc, beg - bufbeg);
1129 print_offset (pos, byte_num_color);
1130 print_sep (sep);
1131 }
1132
1133 if (align_tabs && (out_file | out_line | out_byte) && len != 0)
1134 putchar_errno ('\t');
1135
1136 return true;
1137 }
1138
1139 static char *
print_line_middle(char * beg,char * lim,const char * line_color,const char * match_color)1140 print_line_middle (char *beg, char *lim,
1141 const char *line_color, const char *match_color)
1142 {
1143 size_t match_size;
1144 size_t match_offset;
1145 char *cur;
1146 char *mid = NULL;
1147 char *b;
1148
1149 for (cur = beg;
1150 (cur < lim
1151 && ((match_offset = execute (compiled_pattern, beg, lim - beg,
1152 &match_size, cur)) != (size_t) -1));
1153 cur = b + match_size)
1154 {
1155 b = beg + match_offset;
1156
1157 /* Avoid matching the empty line at the end of the buffer. */
1158 if (b == lim)
1159 break;
1160
1161 /* Avoid hanging on grep --color "" foo */
1162 if (match_size == 0)
1163 {
1164 /* Make minimal progress; there may be further non-empty matches. */
1165 /* XXX - Could really advance by one whole multi-octet character. */
1166 match_size = 1;
1167 if (!mid)
1168 mid = cur;
1169 }
1170 else
1171 {
1172 /* This function is called on a matching line only,
1173 but is it selected or rejected/context? */
1174 if (only_matching)
1175 {
1176 char sep = out_invert ? SEP_CHAR_REJECTED : SEP_CHAR_SELECTED;
1177 if (! print_line_head (b, match_size, lim, sep))
1178 return NULL;
1179 }
1180 else
1181 {
1182 pr_sgr_start (line_color);
1183 if (mid)
1184 {
1185 cur = mid;
1186 mid = NULL;
1187 }
1188 fwrite_errno (cur, 1, b - cur);
1189 }
1190
1191 pr_sgr_start_if (match_color);
1192 fwrite_errno (b, 1, match_size);
1193 pr_sgr_end_if (match_color);
1194 if (only_matching)
1195 putchar_errno (eolbyte);
1196 }
1197 }
1198
1199 if (only_matching)
1200 cur = lim;
1201 else if (mid)
1202 cur = mid;
1203
1204 return cur;
1205 }
1206
1207 static char *
print_line_tail(char * beg,const char * lim,const char * line_color)1208 print_line_tail (char *beg, const char *lim, const char *line_color)
1209 {
1210 size_t eol_size;
1211 size_t tail_size;
1212
1213 eol_size = (lim > beg && lim[-1] == eolbyte);
1214 eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r');
1215 tail_size = lim - eol_size - beg;
1216
1217 if (tail_size > 0)
1218 {
1219 pr_sgr_start (line_color);
1220 fwrite_errno (beg, 1, tail_size);
1221 beg += tail_size;
1222 pr_sgr_end (line_color);
1223 }
1224
1225 return beg;
1226 }
1227
1228 static void
prline(char * beg,char * lim,char sep)1229 prline (char *beg, char *lim, char sep)
1230 {
1231 bool matching;
1232 const char *line_color;
1233 const char *match_color;
1234
1235 if (!only_matching)
1236 if (! print_line_head (beg, lim - beg - 1, lim, sep))
1237 return;
1238
1239 matching = (sep == SEP_CHAR_SELECTED) ^ out_invert;
1240
1241 if (color_option)
1242 {
1243 line_color = (((sep == SEP_CHAR_SELECTED)
1244 ^ (out_invert && (color_option < 0)))
1245 ? selected_line_color : context_line_color);
1246 match_color = (sep == SEP_CHAR_SELECTED
1247 ? selected_match_color : context_match_color);
1248 }
1249 else
1250 line_color = match_color = NULL; /* Shouldn't be used. */
1251
1252 if ((only_matching && matching)
1253 || (color_option && (*line_color || *match_color)))
1254 {
1255 /* We already know that non-matching lines have no match (to colorize). */
1256 if (matching && (only_matching || *match_color))
1257 {
1258 beg = print_line_middle (beg, lim, line_color, match_color);
1259 if (! beg)
1260 return;
1261 }
1262
1263 if (!only_matching && *line_color)
1264 {
1265 /* This code is exercised at least when grep is invoked like this:
1266 echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */
1267 beg = print_line_tail (beg, lim, line_color);
1268 }
1269 }
1270
1271 if (!only_matching && lim > beg)
1272 fwrite_errno (beg, 1, lim - beg);
1273
1274 if (line_buffered)
1275 fflush_errno ();
1276
1277 if (stdout_errno)
1278 die (EXIT_TROUBLE, stdout_errno, _("write error"));
1279
1280 lastout = lim;
1281 }
1282
1283 /* Print pending lines of trailing context prior to LIM. */
1284 static void
prpending(char const * lim)1285 prpending (char const *lim)
1286 {
1287 if (!lastout)
1288 lastout = bufbeg;
1289 for (; 0 < pending && lastout < lim; pending--)
1290 {
1291 char *nl = memchr (lastout, eolbyte, lim - lastout);
1292 prline (lastout, nl + 1, SEP_CHAR_REJECTED);
1293 }
1294 }
1295
1296 /* Output the lines between BEG and LIM. Deal with context. */
1297 static void
prtext(char * beg,char * lim)1298 prtext (char *beg, char *lim)
1299 {
1300 static bool used; /* Avoid printing SEP_STR_GROUP before any output. */
1301 char eol = eolbyte;
1302
1303 if (!out_quiet && pending > 0)
1304 prpending (beg);
1305
1306 char *p = beg;
1307
1308 if (!out_quiet)
1309 {
1310 /* Deal with leading context. */
1311 char const *bp = lastout ? lastout : bufbeg;
1312 intmax_t i;
1313 for (i = 0; i < out_before; ++i)
1314 if (p > bp)
1315 do
1316 --p;
1317 while (p[-1] != eol);
1318
1319 /* Print the group separator unless the output is adjacent to
1320 the previous output in the file. */
1321 if ((0 <= out_before || 0 <= out_after) && used
1322 && p != lastout && group_separator)
1323 {
1324 pr_sgr_start_if (sep_color);
1325 fputs_errno (group_separator);
1326 pr_sgr_end_if (sep_color);
1327 putchar_errno ('\n');
1328 }
1329
1330 while (p < beg)
1331 {
1332 char *nl = memchr (p, eol, beg - p);
1333 nl++;
1334 prline (p, nl, SEP_CHAR_REJECTED);
1335 p = nl;
1336 }
1337 }
1338
1339 intmax_t n;
1340 if (out_invert)
1341 {
1342 /* One or more lines are output. */
1343 for (n = 0; p < lim && n < outleft; n++)
1344 {
1345 char *nl = memchr (p, eol, lim - p);
1346 nl++;
1347 if (!out_quiet)
1348 prline (p, nl, SEP_CHAR_SELECTED);
1349 p = nl;
1350 }
1351 }
1352 else
1353 {
1354 /* Just one line is output. */
1355 if (!out_quiet)
1356 prline (beg, lim, SEP_CHAR_SELECTED);
1357 n = 1;
1358 p = lim;
1359 }
1360
1361 after_last_match = bufoffset - (buflim - p);
1362 pending = out_quiet ? 0 : MAX (0, out_after);
1363 used = true;
1364 outleft -= n;
1365 }
1366
1367 /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL.
1368 This avoids running out of memory when binary input contains a long
1369 sequence of zeros, which would otherwise be considered to be part
1370 of a long line. P[LIM] should be EOL. */
1371 static void
zap_nuls(char * p,char * lim,char eol)1372 zap_nuls (char *p, char *lim, char eol)
1373 {
1374 if (eol)
1375 while (true)
1376 {
1377 *lim = '\0';
1378 p += strlen (p);
1379 *lim = eol;
1380 if (p == lim)
1381 break;
1382 do
1383 *p++ = eol;
1384 while (!*p);
1385 }
1386 }
1387
1388 /* Scan the specified portion of the buffer, matching lines (or
1389 between matching lines if OUT_INVERT is true). Return a count of
1390 lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */
1391 static intmax_t
grepbuf(char * beg,char const * lim)1392 grepbuf (char *beg, char const *lim)
1393 {
1394 intmax_t outleft0 = outleft;
1395 char *endp;
1396
1397 for (char *p = beg; p < lim; p = endp)
1398 {
1399 size_t match_size;
1400 size_t match_offset = execute (compiled_pattern, p, lim - p,
1401 &match_size, NULL);
1402 if (match_offset == (size_t) -1)
1403 {
1404 if (!out_invert)
1405 break;
1406 match_offset = lim - p;
1407 match_size = 0;
1408 }
1409 char *b = p + match_offset;
1410 endp = b + match_size;
1411 /* Avoid matching the empty line at the end of the buffer. */
1412 if (!out_invert && b == lim)
1413 break;
1414 if (!out_invert || p < b)
1415 {
1416 char *prbeg = out_invert ? p : b;
1417 char *prend = out_invert ? b : endp;
1418 prtext (prbeg, prend);
1419 if (!outleft || done_on_match)
1420 {
1421 if (exit_on_match)
1422 exit (errseen ? exit_failure : EXIT_SUCCESS);
1423 break;
1424 }
1425 }
1426 }
1427
1428 return outleft0 - outleft;
1429 }
1430
1431 /* Search a given (non-directory) file. Return a count of lines printed.
1432 Set *INEOF to true if end-of-file reached. */
1433 static intmax_t
grep(int fd,struct stat const * st,bool * ineof)1434 grep (int fd, struct stat const *st, bool *ineof)
1435 {
1436 intmax_t nlines, i;
1437 size_t residue, save;
1438 char oldc;
1439 char *beg;
1440 char *lim;
1441 char eol = eolbyte;
1442 char nul_zapper = '\0';
1443 bool done_on_match_0 = done_on_match;
1444 bool out_quiet_0 = out_quiet;
1445
1446 /* The value of NLINES when nulls were first deduced in the input;
1447 this is not necessarily the same as the number of matching lines
1448 before the first null. -1 if no input nulls have been deduced. */
1449 intmax_t nlines_first_null = -1;
1450
1451 if (! reset (fd, st))
1452 return 0;
1453
1454 totalcc = 0;
1455 lastout = 0;
1456 totalnl = 0;
1457 outleft = max_count;
1458 after_last_match = 0;
1459 pending = 0;
1460 skip_nuls = skip_empty_lines && !eol;
1461 encoding_error_output = false;
1462
1463 nlines = 0;
1464 residue = 0;
1465 save = 0;
1466
1467 if (! fillbuf (save, st))
1468 {
1469 suppressible_error (errno);
1470 return 0;
1471 }
1472
1473 offset_width = 0;
1474 if (align_tabs)
1475 {
1476 /* Width is log of maximum number. Line numbers are origin-1. */
1477 uintmax_t num = usable_st_size (st) ? st->st_size : UINTMAX_MAX;
1478 num += out_line && num < UINTMAX_MAX;
1479 do
1480 offset_width++;
1481 while ((num /= 10) != 0);
1482 }
1483
1484 for (bool firsttime = true; ; firsttime = false)
1485 {
1486 if (nlines_first_null < 0 && eol && binary_files != TEXT_BINARY_FILES
1487 && (buf_has_nulls (bufbeg, buflim - bufbeg)
1488 || (firsttime && file_must_have_nulls (buflim - bufbeg, fd, st))))
1489 {
1490 if (binary_files == WITHOUT_MATCH_BINARY_FILES)
1491 return 0;
1492 if (!count_matches)
1493 done_on_match = out_quiet = true;
1494 nlines_first_null = nlines;
1495 nul_zapper = eol;
1496 skip_nuls = skip_empty_lines;
1497 }
1498
1499 lastnl = bufbeg;
1500 if (lastout)
1501 lastout = bufbeg;
1502
1503 beg = bufbeg + save;
1504
1505 /* no more data to scan (eof) except for maybe a residue -> break */
1506 if (beg == buflim)
1507 {
1508 *ineof = true;
1509 break;
1510 }
1511
1512 zap_nuls (beg, buflim, nul_zapper);
1513
1514 /* Determine new residue (the length of an incomplete line at the end of
1515 the buffer, 0 means there is no incomplete last line). */
1516 oldc = beg[-1];
1517 beg[-1] = eol;
1518 /* FIXME: use rawmemrchr if/when it exists, since we have ensured
1519 that this use of memrchr is guaranteed never to return NULL. */
1520 lim = memrchr (beg - 1, eol, buflim - beg + 1);
1521 ++lim;
1522 beg[-1] = oldc;
1523 if (lim == beg)
1524 lim = beg - residue;
1525 beg -= residue;
1526 residue = buflim - lim;
1527
1528 if (beg < lim)
1529 {
1530 if (outleft)
1531 nlines += grepbuf (beg, lim);
1532 if (pending)
1533 prpending (lim);
1534 if ((!outleft && !pending)
1535 || (done_on_match && MAX (0, nlines_first_null) < nlines))
1536 goto finish_grep;
1537 }
1538
1539 /* The last OUT_BEFORE lines at the end of the buffer will be needed as
1540 leading context if there is a matching line at the begin of the
1541 next data. Make beg point to their begin. */
1542 i = 0;
1543 beg = lim;
1544 while (i < out_before && beg > bufbeg && beg != lastout)
1545 {
1546 ++i;
1547 do
1548 --beg;
1549 while (beg[-1] != eol);
1550 }
1551
1552 /* Detect whether leading context is adjacent to previous output. */
1553 if (beg != lastout)
1554 lastout = 0;
1555
1556 /* Handle some details and read more data to scan. */
1557 save = residue + lim - beg;
1558 if (out_byte)
1559 totalcc = add_count (totalcc, buflim - bufbeg - save);
1560 if (out_line)
1561 nlscan (beg);
1562 if (! fillbuf (save, st))
1563 {
1564 suppressible_error (errno);
1565 goto finish_grep;
1566 }
1567 }
1568 if (residue)
1569 {
1570 *buflim++ = eol;
1571 if (outleft)
1572 nlines += grepbuf (bufbeg + save - residue, buflim);
1573 if (pending)
1574 prpending (buflim);
1575 }
1576
1577 finish_grep:
1578 done_on_match = done_on_match_0;
1579 out_quiet = out_quiet_0;
1580 if (!out_quiet && (encoding_error_output
1581 || (0 <= nlines_first_null && nlines_first_null < nlines)))
1582 {
1583 printf_errno (_("Binary file %s matches\n"), input_filename ());
1584 if (line_buffered)
1585 fflush_errno ();
1586 }
1587 return nlines;
1588 }
1589
1590 static bool
grepdirent(FTS * fts,FTSENT * ent,bool command_line)1591 grepdirent (FTS *fts, FTSENT *ent, bool command_line)
1592 {
1593 bool follow;
1594 command_line &= ent->fts_level == FTS_ROOTLEVEL;
1595
1596 if (ent->fts_info == FTS_DP)
1597 return true;
1598
1599 if (!command_line
1600 && skipped_file (ent->fts_name, false,
1601 (ent->fts_info == FTS_D || ent->fts_info == FTS_DC
1602 || ent->fts_info == FTS_DNR)))
1603 {
1604 fts_set (fts, ent, FTS_SKIP);
1605 return true;
1606 }
1607
1608 filename = ent->fts_path;
1609 if (omit_dot_slash && filename[1])
1610 filename += 2;
1611 follow = (fts->fts_options & FTS_LOGICAL
1612 || (fts->fts_options & FTS_COMFOLLOW && command_line));
1613
1614 switch (ent->fts_info)
1615 {
1616 case FTS_D:
1617 if (directories == RECURSE_DIRECTORIES)
1618 return true;
1619 fts_set (fts, ent, FTS_SKIP);
1620 break;
1621
1622 case FTS_DC:
1623 if (!suppress_errors)
1624 error (0, 0, _("warning: %s: %s"), filename,
1625 _("recursive directory loop"));
1626 return true;
1627
1628 case FTS_DNR:
1629 case FTS_ERR:
1630 case FTS_NS:
1631 suppressible_error (ent->fts_errno);
1632 return true;
1633
1634 case FTS_DEFAULT:
1635 case FTS_NSOK:
1636 if (skip_devices (command_line))
1637 {
1638 struct stat *st = ent->fts_statp;
1639 struct stat st1;
1640 if (! st->st_mode)
1641 {
1642 /* The file type is not already known. Get the file status
1643 before opening, since opening might have side effects
1644 on a device. */
1645 int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW;
1646 if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0)
1647 {
1648 suppressible_error (errno);
1649 return true;
1650 }
1651 st = &st1;
1652 }
1653 if (is_device_mode (st->st_mode))
1654 return true;
1655 }
1656 break;
1657
1658 case FTS_F:
1659 case FTS_SLNONE:
1660 break;
1661
1662 case FTS_SL:
1663 case FTS_W:
1664 return true;
1665
1666 default:
1667 abort ();
1668 }
1669
1670 return grepfile (fts->fts_cwd_fd, ent->fts_accpath, follow, command_line);
1671 }
1672
1673 /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'.
1674 POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */
1675 static bool
open_symlink_nofollow_error(int err)1676 open_symlink_nofollow_error (int err)
1677 {
1678 if (err == ELOOP || err == EMLINK)
1679 return true;
1680 #ifdef EFTYPE
1681 if (err == EFTYPE)
1682 return true;
1683 #endif
1684 return false;
1685 }
1686
1687 static bool
grepfile(int dirdesc,char const * name,bool follow,bool command_line)1688 grepfile (int dirdesc, char const *name, bool follow, bool command_line)
1689 {
1690 int oflag = (O_RDONLY | O_NOCTTY
1691 | (IGNORE_DUPLICATE_BRANCH_WARNING
1692 (binary ? O_BINARY : 0))
1693 | (follow ? 0 : O_NOFOLLOW)
1694 | (skip_devices (command_line) ? O_NONBLOCK : 0));
1695 int desc = openat_safer (dirdesc, name, oflag);
1696 if (desc < 0)
1697 {
1698 if (follow || ! open_symlink_nofollow_error (errno))
1699 suppressible_error (errno);
1700 return true;
1701 }
1702 return grepdesc (desc, command_line);
1703 }
1704
1705 /* Read all data from FD, with status ST. Return true if successful,
1706 false (setting errno) otherwise. */
1707 static bool
drain_input(int fd,struct stat const * st)1708 drain_input (int fd, struct stat const *st)
1709 {
1710 ssize_t nbytes;
1711 if (S_ISFIFO (st->st_mode) && dev_null_output)
1712 {
1713 #ifdef SPLICE_F_MOVE
1714 /* Should be faster, since it need not copy data to user space. */
1715 nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1716 INITIAL_BUFSIZE, SPLICE_F_MOVE);
1717 if (0 <= nbytes || errno != EINVAL)
1718 {
1719 while (0 < nbytes)
1720 nbytes = splice (fd, NULL, STDOUT_FILENO, NULL,
1721 INITIAL_BUFSIZE, SPLICE_F_MOVE);
1722 return nbytes == 0;
1723 }
1724 #endif
1725 }
1726 while ((nbytes = safe_read (fd, buffer, bufalloc)))
1727 if (nbytes == SAFE_READ_ERROR)
1728 return false;
1729 return true;
1730 }
1731
1732 /* Finish reading from FD, with status ST and where end-of-file has
1733 been seen if INEOF. Typically this is a no-op, but when reading
1734 from standard input this may adjust the file offset or drain a
1735 pipe. */
1736
1737 static void
finalize_input(int fd,struct stat const * st,bool ineof)1738 finalize_input (int fd, struct stat const *st, bool ineof)
1739 {
1740 if (fd == STDIN_FILENO
1741 && (outleft
1742 ? (!ineof
1743 && (seek_failed
1744 || (lseek (fd, 0, SEEK_END) < 0
1745 /* Linux proc file system has EINVAL (Bug#25180). */
1746 && errno != EINVAL))
1747 && ! drain_input (fd, st))
1748 : (bufoffset != after_last_match && !seek_failed
1749 && lseek (fd, after_last_match, SEEK_SET) < 0)))
1750 suppressible_error (errno);
1751 }
1752
1753 static bool
grepdesc(int desc,bool command_line)1754 grepdesc (int desc, bool command_line)
1755 {
1756 intmax_t count;
1757 bool status = true;
1758 bool ineof = false;
1759 struct stat st;
1760
1761 /* Get the file status, possibly for the second time. This catches
1762 a race condition if the directory entry changes after the
1763 directory entry is read and before the file is opened. For
1764 example, normally DESC is a directory only at the top level, but
1765 there is an exception if some other process substitutes a
1766 directory for a non-directory while 'grep' is running. */
1767 if (fstat (desc, &st) != 0)
1768 {
1769 suppressible_error (errno);
1770 goto closeout;
1771 }
1772
1773 if (desc != STDIN_FILENO && skip_devices (command_line)
1774 && is_device_mode (st.st_mode))
1775 goto closeout;
1776
1777 if (desc != STDIN_FILENO && command_line
1778 && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0))
1779 goto closeout;
1780
1781 /* Don't output file names if invoked as 'grep -r PATTERN NONDIRECTORY'. */
1782 if (out_file < 0)
1783 out_file = !!S_ISDIR (st.st_mode);
1784
1785 if (desc != STDIN_FILENO
1786 && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode))
1787 {
1788 /* Traverse the directory starting with its full name, because
1789 unfortunately fts provides no way to traverse the directory
1790 starting from its file descriptor. */
1791
1792 FTS *fts;
1793 FTSENT *ent;
1794 int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW);
1795 char *fts_arg[2];
1796
1797 /* Close DESC now, to conserve file descriptors if the race
1798 condition occurs many times in a deep recursion. */
1799 if (close (desc) != 0)
1800 suppressible_error (errno);
1801
1802 fts_arg[0] = (char *) filename;
1803 fts_arg[1] = NULL;
1804 fts = fts_open (fts_arg, opts, NULL);
1805
1806 if (!fts)
1807 xalloc_die ();
1808 while ((ent = fts_read (fts)))
1809 status &= grepdirent (fts, ent, command_line);
1810 if (errno)
1811 suppressible_error (errno);
1812 if (fts_close (fts) != 0)
1813 suppressible_error (errno);
1814 return status;
1815 }
1816 if (desc != STDIN_FILENO
1817 && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode))
1818 || ((devices == SKIP_DEVICES
1819 || (devices == READ_COMMAND_LINE_DEVICES && !command_line))
1820 && is_device_mode (st.st_mode))))
1821 goto closeout;
1822
1823 /* If there is a regular file on stdout and the current file refers
1824 to the same i-node, we have to report the problem and skip it.
1825 Otherwise when matching lines from some other input reach the
1826 disk before we open this file, we can end up reading and matching
1827 those lines and appending them to the file from which we're reading.
1828 Then we'd have what appears to be an infinite loop that'd terminate
1829 only upon filling the output file system or reaching a quota.
1830 However, there is no risk of an infinite loop if grep is generating
1831 no output, i.e., with --silent, --quiet, -q.
1832 Similarly, with any of these:
1833 --max-count=N (-m) (for N >= 2)
1834 --files-with-matches (-l)
1835 --files-without-match (-L)
1836 there is no risk of trouble.
1837 For --max-count=1, grep stops after printing the first match,
1838 so there is no risk of malfunction. But even --max-count=2, with
1839 input==output, while there is no risk of infloop, there is a race
1840 condition that could result in "alternate" output. */
1841 if (!out_quiet && list_files == LISTFILES_NONE && 1 < max_count
1842 && S_ISREG (st.st_mode) && SAME_INODE (st, out_stat))
1843 {
1844 if (! suppress_errors)
1845 error (0, 0, _("input file %s is also the output"),
1846 quote (input_filename ()));
1847 errseen = true;
1848 goto closeout;
1849 }
1850
1851 count = grep (desc, &st, &ineof);
1852 if (count_matches)
1853 {
1854 if (out_file)
1855 {
1856 print_filename ();
1857 if (filename_mask)
1858 print_sep (SEP_CHAR_SELECTED);
1859 else
1860 putchar_errno (0);
1861 }
1862 printf_errno ("%" PRIdMAX "\n", count);
1863 if (line_buffered)
1864 fflush_errno ();
1865 }
1866
1867 status = !count == !(list_files == LISTFILES_NONMATCHING);
1868
1869 if (list_files == LISTFILES_NONE || dev_null_output)
1870 finalize_input (desc, &st, ineof);
1871 else if (status == 0)
1872 {
1873 print_filename ();
1874 putchar_errno ('\n' & filename_mask);
1875 if (line_buffered)
1876 fflush_errno ();
1877 }
1878
1879 closeout:
1880 if (desc != STDIN_FILENO && close (desc) != 0)
1881 suppressible_error (errno);
1882 return status;
1883 }
1884
1885 static bool
grep_command_line_arg(char const * arg)1886 grep_command_line_arg (char const *arg)
1887 {
1888 if (STREQ (arg, "-"))
1889 {
1890 filename = label;
1891 if (binary)
1892 xset_binary_mode (STDIN_FILENO, O_BINARY);
1893 return grepdesc (STDIN_FILENO, true);
1894 }
1895 else
1896 {
1897 filename = arg;
1898 return grepfile (AT_FDCWD, arg, true, true);
1899 }
1900 }
1901
1902 _Noreturn void usage (int);
1903 void
usage(int status)1904 usage (int status)
1905 {
1906 if (status != 0)
1907 {
1908 fprintf (stderr, _("Usage: %s [OPTION]... PATTERNS [FILE]...\n"),
1909 getprogname ());
1910 fprintf (stderr, _("Try '%s --help' for more information.\n"),
1911 getprogname ());
1912 }
1913 else
1914 {
1915 printf (_("Usage: %s [OPTION]... PATTERNS [FILE]...\n"), getprogname ());
1916 printf (_("Search for PATTERNS in each FILE.\n"));
1917 printf (_("\
1918 Example: %s -i 'hello world' menu.h main.c\n\
1919 PATTERNS can contain multiple patterns separated by newlines.\n\
1920 \n\
1921 Pattern selection and interpretation:\n"), getprogname ());
1922 printf (_("\
1923 -E, --extended-regexp PATTERNS are extended regular expressions\n\
1924 -F, --fixed-strings PATTERNS are strings\n\
1925 -G, --basic-regexp PATTERNS are basic regular expressions\n\
1926 -P, --perl-regexp PATTERNS are Perl regular expressions\n"));
1927 /* -X is deliberately undocumented. */
1928 printf (_("\
1929 -e, --regexp=PATTERNS use PATTERNS for matching\n\
1930 -f, --file=FILE take PATTERNS from FILE\n\
1931 -i, --ignore-case ignore case distinctions in patterns and data\n\
1932 --no-ignore-case do not ignore case distinctions (default)\n\
1933 -w, --word-regexp match only whole words\n\
1934 -x, --line-regexp match only whole lines\n\
1935 -z, --null-data a data line ends in 0 byte, not newline\n"));
1936 printf (_("\
1937 \n\
1938 Miscellaneous:\n\
1939 -s, --no-messages suppress error messages\n\
1940 -v, --invert-match select non-matching lines\n\
1941 -V, --version display version information and exit\n\
1942 --help display this help text and exit\n"));
1943 printf (_("\
1944 \n\
1945 Output control:\n\
1946 -m, --max-count=NUM stop after NUM selected lines\n\
1947 -b, --byte-offset print the byte offset with output lines\n\
1948 -n, --line-number print line number with output lines\n\
1949 --line-buffered flush output on every line\n\
1950 -H, --with-filename print file name with output lines\n\
1951 -h, --no-filename suppress the file name prefix on output\n\
1952 --label=LABEL use LABEL as the standard input file name prefix\n\
1953 "));
1954 printf (_("\
1955 -o, --only-matching show only nonempty parts of lines that match\n\
1956 -q, --quiet, --silent suppress all normal output\n\
1957 --binary-files=TYPE assume that binary files are TYPE;\n\
1958 TYPE is 'binary', 'text', or 'without-match'\n\
1959 -a, --text equivalent to --binary-files=text\n\
1960 "));
1961 printf (_("\
1962 -I equivalent to --binary-files=without-match\n\
1963 -d, --directories=ACTION how to handle directories;\n\
1964 ACTION is 'read', 'recurse', or 'skip'\n\
1965 -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\
1966 ACTION is 'read' or 'skip'\n\
1967 -r, --recursive like --directories=recurse\n\
1968 -R, --dereference-recursive likewise, but follow all symlinks\n\
1969 "));
1970 printf (_("\
1971 --include=GLOB search only files that match GLOB (a file pattern)"
1972 "\n\
1973 --exclude=GLOB skip files that match GLOB\n\
1974 --exclude-from=FILE skip files that match any file pattern from FILE\n\
1975 --exclude-dir=GLOB skip directories that match GLOB\n\
1976 "));
1977 printf (_("\
1978 -L, --files-without-match print only names of FILEs with no selected lines\n\
1979 -l, --files-with-matches print only names of FILEs with selected lines\n\
1980 -c, --count print only a count of selected lines per FILE\n\
1981 -T, --initial-tab make tabs line up (if needed)\n\
1982 -Z, --null print 0 byte after FILE name\n"));
1983 printf (_("\
1984 \n\
1985 Context control:\n\
1986 -B, --before-context=NUM print NUM lines of leading context\n\
1987 -A, --after-context=NUM print NUM lines of trailing context\n\
1988 -C, --context=NUM print NUM lines of output context\n\
1989 "));
1990 printf (_("\
1991 -NUM same as --context=NUM\n\
1992 --color[=WHEN],\n\
1993 --colour[=WHEN] use markers to highlight the matching strings;\n\
1994 WHEN is 'always', 'never', or 'auto'\n\
1995 -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\
1996 \n"));
1997 printf (_("\
1998 When FILE is '-', read standard input. With no FILE, read '.' if\n\
1999 recursive, '-' otherwise. With fewer than two FILEs, assume -h.\n\
2000 Exit status is 0 if any line (or file if -L) is selected, 1 otherwise;\n\
2001 if any error occurs and -q is not given, the exit status is 2.\n"));
2002 emit_bug_reporting_address ();
2003 }
2004 exit (status);
2005 }
2006
2007 /* Pattern compilers and matchers. */
2008
2009 static struct
2010 {
2011 char name[12];
2012 int syntax; /* used if compile == GEAcompile */
2013 compile_fp_t compile;
2014 execute_fp_t execute;
2015 } const matchers[] = {
2016 { "grep", RE_SYNTAX_GREP, GEAcompile, EGexecute },
2017 { "egrep", RE_SYNTAX_EGREP, GEAcompile, EGexecute },
2018 { "fgrep", 0, Fcompile, Fexecute, },
2019 { "awk", RE_SYNTAX_AWK, GEAcompile, EGexecute },
2020 { "gawk", RE_SYNTAX_GNU_AWK, GEAcompile, EGexecute },
2021 { "posixawk", RE_SYNTAX_POSIX_AWK, GEAcompile, EGexecute },
2022 #if HAVE_LIBPCRE
2023 { "perl", 0, Pcompile, Pexecute, },
2024 #endif
2025 };
2026 /* Keep these in sync with the 'matchers' table. */
2027 enum { E_MATCHER_INDEX = 1, F_MATCHER_INDEX = 2, G_MATCHER_INDEX = 0 };
2028
2029 /* Return the index of the matcher corresponding to M if available.
2030 MATCHER is the index of the previous matcher, or -1 if none.
2031 Exit in case of conflicts or if M is not available. */
2032 static int
setmatcher(char const * m,int matcher)2033 setmatcher (char const *m, int matcher)
2034 {
2035 for (int i = 0; i < sizeof matchers / sizeof *matchers; i++)
2036 if (STREQ (m, matchers[i].name))
2037 {
2038 if (0 <= matcher && matcher != i)
2039 die (EXIT_TROUBLE, 0, _("conflicting matchers specified"));
2040 return i;
2041 }
2042
2043 #if !HAVE_LIBPCRE
2044 if (STREQ (m, "perl"))
2045 die (EXIT_TROUBLE, 0,
2046 _("Perl matching not supported in a --disable-perl-regexp build"));
2047 #endif
2048 die (EXIT_TROUBLE, 0, _("invalid matcher %s"), m);
2049 }
2050
2051 /* Find the white-space-separated options specified by OPTIONS, and
2052 using BUF to store copies of these options, set ARGV[0], ARGV[1],
2053 etc. to the option copies. Return the number N of options found.
2054 Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0]
2055 etc. Backslash can be used to escape whitespace (and backslashes). */
2056 static size_t
prepend_args(char const * options,char * buf,char ** argv)2057 prepend_args (char const *options, char *buf, char **argv)
2058 {
2059 char const *o = options;
2060 char *b = buf;
2061 size_t n = 0;
2062
2063 for (;;)
2064 {
2065 while (c_isspace (to_uchar (*o)))
2066 o++;
2067 if (!*o)
2068 return n;
2069 if (argv)
2070 argv[n] = b;
2071 n++;
2072
2073 do
2074 if ((*b++ = *o++) == '\\' && *o)
2075 b[-1] = *o++;
2076 while (*o && ! c_isspace (to_uchar (*o)));
2077
2078 *b++ = '\0';
2079 }
2080 }
2081
2082 /* Prepend the whitespace-separated options in OPTIONS to the argument
2083 vector of a main program with argument count *PARGC and argument
2084 vector *PARGV. Return the number of options prepended. */
2085 static int
prepend_default_options(char const * options,int * pargc,char *** pargv)2086 prepend_default_options (char const *options, int *pargc, char ***pargv)
2087 {
2088 if (options && *options)
2089 {
2090 char *buf = xmalloc (strlen (options) + 1);
2091 size_t prepended = prepend_args (options, buf, NULL);
2092 int argc = *pargc;
2093 char *const *argv = *pargv;
2094 char **pp;
2095 enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) };
2096 if (MAX_ARGS - argc < prepended)
2097 xalloc_die ();
2098 pp = xmalloc ((prepended + argc + 1) * sizeof *pp);
2099 *pargc = prepended + argc;
2100 *pargv = pp;
2101 *pp++ = *argv++;
2102 pp += prepend_args (options, buf, pp);
2103 while ((*pp++ = *argv++))
2104 continue;
2105 return prepended;
2106 }
2107
2108 return 0;
2109 }
2110
2111 /* Get the next non-digit option from ARGC and ARGV.
2112 Return -1 if there are no more options.
2113 Process any digit options that were encountered on the way,
2114 and store the resulting integer into *DEFAULT_CONTEXT. */
2115 static int
get_nondigit_option(int argc,char * const * argv,intmax_t * default_context)2116 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context)
2117 {
2118 static int prev_digit_optind = -1;
2119 int this_digit_optind;
2120 bool was_digit;
2121 char buf[INT_BUFSIZE_BOUND (intmax_t) + 4];
2122 char *p = buf;
2123 int opt;
2124
2125 was_digit = false;
2126 this_digit_optind = optind;
2127 while (true)
2128 {
2129 opt = getopt_long (argc, (char **) argv, short_options,
2130 long_options, NULL);
2131 if (! c_isdigit (opt))
2132 break;
2133
2134 if (prev_digit_optind != this_digit_optind || !was_digit)
2135 {
2136 /* Reset to start another context length argument. */
2137 p = buf;
2138 }
2139 else
2140 {
2141 /* Suppress trivial leading zeros, to avoid incorrect
2142 diagnostic on strings like 00000000000. */
2143 p -= buf[0] == '0';
2144 }
2145
2146 if (p == buf + sizeof buf - 4)
2147 {
2148 /* Too many digits. Append "..." to make context_length_arg
2149 complain about "X...", where X contains the digits seen
2150 so far. */
2151 strcpy (p, "...");
2152 p += 3;
2153 break;
2154 }
2155 *p++ = opt;
2156
2157 was_digit = true;
2158 prev_digit_optind = this_digit_optind;
2159 this_digit_optind = optind;
2160 }
2161 if (p != buf)
2162 {
2163 *p = '\0';
2164 context_length_arg (buf, default_context);
2165 }
2166
2167 return opt;
2168 }
2169
2170 /* Parse GREP_COLORS. The default would look like:
2171 GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36'
2172 with boolean capabilities (ne and rv) unset (i.e., omitted).
2173 No character escaping is needed or supported. */
2174 static void
parse_grep_colors(void)2175 parse_grep_colors (void)
2176 {
2177 const char *p;
2178 char *q;
2179 char *name;
2180 char *val;
2181
2182 p = getenv ("GREP_COLORS"); /* Plural! */
2183 if (p == NULL || *p == '\0')
2184 return;
2185
2186 /* Work off a writable copy. */
2187 q = xstrdup (p);
2188
2189 name = q;
2190 val = NULL;
2191 /* From now on, be well-formed or you're gone. */
2192 for (;;)
2193 if (*q == ':' || *q == '\0')
2194 {
2195 char c = *q;
2196 struct color_cap const *cap;
2197
2198 *q++ = '\0'; /* Terminate name or val. */
2199 /* Empty name without val (empty cap)
2200 * won't match and will be ignored. */
2201 for (cap = color_dict; cap->name; cap++)
2202 if (STREQ (cap->name, name))
2203 break;
2204 /* If name unknown, go on for forward compatibility. */
2205 if (cap->var && val)
2206 *(cap->var) = val;
2207 if (cap->fct)
2208 cap->fct ();
2209 if (c == '\0')
2210 return;
2211 name = q;
2212 val = NULL;
2213 }
2214 else if (*q == '=')
2215 {
2216 if (q == name || val)
2217 return;
2218 *q++ = '\0'; /* Terminate name. */
2219 val = q; /* Can be the empty string. */
2220 }
2221 else if (val == NULL)
2222 q++; /* Accumulate name. */
2223 else if (*q == ';' || c_isdigit (*q))
2224 q++; /* Accumulate val. Protect the terminal from being sent crap. */
2225 else
2226 return;
2227 }
2228
2229 /* Return true if PAT (of length PATLEN) contains an encoding error. */
2230 static bool
contains_encoding_error(char const * pat,size_t patlen)2231 contains_encoding_error (char const *pat, size_t patlen)
2232 {
2233 mbstate_t mbs = { 0 };
2234 size_t i, charlen;
2235
2236 for (i = 0; i < patlen; i += charlen)
2237 {
2238 charlen = mb_clen (pat + i, patlen - i, &mbs);
2239 if ((size_t) -2 <= charlen)
2240 return true;
2241 }
2242 return false;
2243 }
2244
2245 /* Return the number of bytes in the initial character of PAT, of size
2246 PATLEN, if Fcompile can handle that character. Return -1 if
2247 Fcompile cannot handle it. MBS is the multibyte conversion state.
2248
2249 Fcompile can handle a character C if C is single-byte, or if C has no
2250 case folded counterparts and toupper translates none of its bytes. */
2251
2252 static int
fgrep_icase_charlen(char const * pat,size_t patlen,mbstate_t * mbs)2253 fgrep_icase_charlen (char const *pat, size_t patlen, mbstate_t *mbs)
2254 {
2255 int n = localeinfo.sbclen[to_uchar (*pat)];
2256 if (n < 0)
2257 {
2258 wchar_t wc;
2259 wchar_t folded[CASE_FOLDED_BUFSIZE];
2260 size_t wn = mbrtowc (&wc, pat, patlen, mbs);
2261 if (MB_LEN_MAX < wn || case_folded_counterparts (wc, folded))
2262 return -1;
2263 for (int i = wn; 0 < --i; )
2264 {
2265 unsigned char c = pat[i];
2266 if (toupper (c) != c)
2267 return -1;
2268 }
2269 n = wn;
2270 }
2271 return n;
2272 }
2273
2274 /* Return true if the -F patterns PAT, of size PATLEN, contain only
2275 single-byte characters or characters not subject to case folding,
2276 and so can be processed by Fcompile. */
2277
2278 static bool
fgrep_icase_available(char const * pat,size_t patlen)2279 fgrep_icase_available (char const *pat, size_t patlen)
2280 {
2281 mbstate_t mbs = {0,};
2282
2283 for (size_t i = 0; i < patlen; )
2284 {
2285 int n = fgrep_icase_charlen (pat + i, patlen - i, &mbs);
2286 if (n < 0)
2287 return false;
2288 i += n;
2289 }
2290
2291 return true;
2292 }
2293
2294 /* Change the pattern *KEYS_P, of size *LEN_P, from fgrep to grep style. */
2295
2296 void
fgrep_to_grep_pattern(char ** keys_p,size_t * len_p)2297 fgrep_to_grep_pattern (char **keys_p, size_t *len_p)
2298 {
2299 size_t len = *len_p;
2300 char *keys = *keys_p;
2301 mbstate_t mb_state = { 0 };
2302 char *new_keys = xnmalloc (len + 1, 2);
2303 char *p = new_keys;
2304 size_t n;
2305
2306 for (; len; keys += n, len -= n)
2307 {
2308 n = mb_clen (keys, len, &mb_state);
2309 switch (n)
2310 {
2311 case (size_t) -2:
2312 n = len;
2313 FALLTHROUGH;
2314 default:
2315 p = mempcpy (p, keys, n);
2316 break;
2317
2318 case (size_t) -1:
2319 memset (&mb_state, 0, sizeof mb_state);
2320 n = 1;
2321 FALLTHROUGH;
2322 case 1:
2323 switch (*keys)
2324 {
2325 case '$': case '*': case '.': case '[': case '\\': case '^':
2326 *p++ = '\\'; break;
2327 }
2328 *p++ = *keys;
2329 break;
2330 }
2331 }
2332
2333 free (*keys_p);
2334 *keys_p = new_keys;
2335 *len_p = p - new_keys;
2336 }
2337
2338 /* If it is easy, convert the MATCHER-style patterns KEYS (of size
2339 *LEN_P) to -F style, update *LEN_P to a possibly-smaller value, and
2340 return F_MATCHER_INDEX. If not, leave KEYS and *LEN_P alone and
2341 return MATCHER. This function is conservative and sometimes misses
2342 conversions, e.g., it does not convert the -E pattern "(a|a|[aa])"
2343 to the -F pattern "a". */
2344
2345 static int
try_fgrep_pattern(int matcher,char * keys,size_t * len_p)2346 try_fgrep_pattern (int matcher, char *keys, size_t *len_p)
2347 {
2348 int result = matcher;
2349 size_t len = *len_p;
2350 char *new_keys = xmalloc (len + 1);
2351 char *p = new_keys;
2352 char const *q = keys;
2353 mbstate_t mb_state = { 0 };
2354
2355 while (len != 0)
2356 {
2357 switch (*q)
2358 {
2359 case '$': case '*': case '.': case '[': case '^':
2360 goto fail;
2361
2362 case '(': case '+': case '?': case '{': case '|':
2363 if (matcher != G_MATCHER_INDEX)
2364 goto fail;
2365 break;
2366
2367 case '\\':
2368 if (1 < len)
2369 switch (q[1])
2370 {
2371 case '\n':
2372 case 'B': case 'S': case 'W': case'\'': case '<':
2373 case 'b': case 's': case 'w': case '`': case '>':
2374 case '1': case '2': case '3': case '4':
2375 case '5': case '6': case '7': case '8': case '9':
2376 goto fail;
2377
2378 case '(': case '+': case '?': case '{': case '|':
2379 if (matcher == G_MATCHER_INDEX)
2380 goto fail;
2381 FALLTHROUGH;
2382 default:
2383 q++, len--;
2384 break;
2385 }
2386 break;
2387 }
2388
2389 {
2390 size_t n;
2391 if (match_icase)
2392 {
2393 int ni = fgrep_icase_charlen (q, len, &mb_state);
2394 if (ni < 0)
2395 goto fail;
2396 n = ni;
2397 }
2398 else
2399 {
2400 n = mb_clen (q, len, &mb_state);
2401 if (MB_LEN_MAX < n)
2402 goto fail;
2403 }
2404
2405 p = mempcpy (p, q, n);
2406 q += n;
2407 len -= n;
2408 }
2409 }
2410
2411 if (*len_p != p - new_keys)
2412 {
2413 *len_p = p - new_keys;
2414 memcpy (keys, new_keys, p - new_keys);
2415 }
2416 result = F_MATCHER_INDEX;
2417
2418 fail:
2419 free (new_keys);
2420 return result;
2421 }
2422
2423 int
main(int argc,char ** argv)2424 main (int argc, char **argv)
2425 {
2426 char *keys = NULL;
2427 size_t keycc = 0, oldcc, keyalloc = 0;
2428 int matcher = -1;
2429 size_t cc;
2430 int opt, prepended;
2431 int prev_optind, last_recursive;
2432 int fread_errno;
2433 intmax_t default_context;
2434 FILE *fp;
2435 exit_failure = EXIT_TROUBLE;
2436 initialize_main (&argc, &argv);
2437
2438 /* Which command-line options have been specified for filename output.
2439 -1 for -h, 1 for -H, 0 for neither. */
2440 int filename_option = 0;
2441
2442 eolbyte = '\n';
2443 filename_mask = ~0;
2444
2445 max_count = INTMAX_MAX;
2446
2447 /* The value -1 means to use DEFAULT_CONTEXT. */
2448 out_after = out_before = -1;
2449 /* Default before/after context: changed by -C/-NUM options */
2450 default_context = -1;
2451 /* Changed by -o option */
2452 only_matching = false;
2453
2454 /* Internationalization. */
2455 #if defined HAVE_SETLOCALE
2456 setlocale (LC_ALL, "");
2457 #endif
2458 #if defined ENABLE_NLS
2459 bindtextdomain (PACKAGE, LOCALEDIR);
2460 textdomain (PACKAGE);
2461 #endif
2462
2463 init_localeinfo (&localeinfo);
2464
2465 atexit (clean_up_stdout);
2466 c_stack_action (NULL);
2467
2468 last_recursive = 0;
2469
2470 prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv);
2471 if (prepended)
2472 error (0, 0, _("warning: GREP_OPTIONS is deprecated;"
2473 " please use an alias or script"));
2474
2475 while (prev_optind = optind,
2476 (opt = get_nondigit_option (argc, argv, &default_context)) != -1)
2477 switch (opt)
2478 {
2479 case 'A':
2480 context_length_arg (optarg, &out_after);
2481 break;
2482
2483 case 'B':
2484 context_length_arg (optarg, &out_before);
2485 break;
2486
2487 case 'C':
2488 /* Set output match context, but let any explicit leading or
2489 trailing amount specified with -A or -B stand. */
2490 context_length_arg (optarg, &default_context);
2491 break;
2492
2493 case 'D':
2494 if (STREQ (optarg, "read"))
2495 devices = READ_DEVICES;
2496 else if (STREQ (optarg, "skip"))
2497 devices = SKIP_DEVICES;
2498 else
2499 die (EXIT_TROUBLE, 0, _("unknown devices method"));
2500 break;
2501
2502 case 'E':
2503 matcher = setmatcher ("egrep", matcher);
2504 break;
2505
2506 case 'F':
2507 matcher = setmatcher ("fgrep", matcher);
2508 break;
2509
2510 case 'P':
2511 matcher = setmatcher ("perl", matcher);
2512 break;
2513
2514 case 'G':
2515 matcher = setmatcher ("grep", matcher);
2516 break;
2517
2518 case 'X': /* undocumented on purpose */
2519 matcher = setmatcher (optarg, matcher);
2520 break;
2521
2522 case 'H':
2523 filename_option = 1;
2524 break;
2525
2526 case 'I':
2527 binary_files = WITHOUT_MATCH_BINARY_FILES;
2528 break;
2529
2530 case 'T':
2531 align_tabs = true;
2532 break;
2533
2534 case 'U':
2535 if (O_BINARY)
2536 binary = true;
2537 break;
2538
2539 case 'u':
2540 /* Obsolete option; it has no effect. FIXME: Diagnose use of
2541 this option starting in (say) the year 2020. */
2542 break;
2543
2544 case 'V':
2545 show_version = true;
2546 break;
2547
2548 case 'a':
2549 binary_files = TEXT_BINARY_FILES;
2550 break;
2551
2552 case 'b':
2553 out_byte = true;
2554 break;
2555
2556 case 'c':
2557 count_matches = true;
2558 break;
2559
2560 case 'd':
2561 directories = XARGMATCH ("--directories", optarg,
2562 directories_args, directories_types);
2563 if (directories == RECURSE_DIRECTORIES)
2564 last_recursive = prev_optind;
2565 break;
2566
2567 case 'e':
2568 cc = strlen (optarg);
2569 if (keyalloc < keycc + cc + 1)
2570 {
2571 keyalloc = keycc + cc + 1;
2572 keys = x2realloc (keys, &keyalloc);
2573 }
2574 oldcc = keycc;
2575 memcpy (keys + oldcc, optarg, cc);
2576 keycc += cc;
2577 keys[keycc++] = '\n';
2578 fl_add (keys + oldcc, cc + 1, "");
2579 break;
2580
2581 case 'f':
2582 if (STREQ (optarg, "-"))
2583 {
2584 if (binary)
2585 xset_binary_mode (STDIN_FILENO, O_BINARY);
2586 fp = stdin;
2587 }
2588 else
2589 {
2590 fp = fopen (optarg, binary ? "rb" : "r");
2591 if (!fp)
2592 die (EXIT_TROUBLE, errno, "%s", optarg);
2593 }
2594 oldcc = keycc;
2595 for (;; keycc += cc)
2596 {
2597 if (keyalloc <= keycc + 1)
2598 keys = x2realloc (keys, &keyalloc);
2599 cc = fread (keys + keycc, 1, keyalloc - (keycc + 1), fp);
2600 if (cc == 0)
2601 break;
2602 }
2603 fread_errno = errno;
2604 if (ferror (fp))
2605 die (EXIT_TROUBLE, fread_errno, "%s", optarg);
2606 if (fp != stdin)
2607 fclose (fp);
2608 /* Append final newline if file ended in non-newline. */
2609 if (oldcc != keycc && keys[keycc - 1] != '\n')
2610 keys[keycc++] = '\n';
2611 fl_add (keys + oldcc, keycc - oldcc, optarg);
2612 break;
2613
2614 case 'h':
2615 filename_option = -1;
2616 break;
2617
2618 case 'i':
2619 case 'y': /* For old-timers . . . */
2620 match_icase = true;
2621 break;
2622
2623 case NO_IGNORE_CASE_OPTION:
2624 match_icase = false;
2625 break;
2626
2627 case 'L':
2628 /* Like -l, except list files that don't contain matches.
2629 Inspired by the same option in Hume's gre. */
2630 list_files = LISTFILES_NONMATCHING;
2631 break;
2632
2633 case 'l':
2634 list_files = LISTFILES_MATCHING;
2635 break;
2636
2637 case 'm':
2638 switch (xstrtoimax (optarg, 0, 10, &max_count, ""))
2639 {
2640 case LONGINT_OK:
2641 case LONGINT_OVERFLOW:
2642 break;
2643
2644 default:
2645 die (EXIT_TROUBLE, 0, _("invalid max count"));
2646 }
2647 break;
2648
2649 case 'n':
2650 out_line = true;
2651 break;
2652
2653 case 'o':
2654 only_matching = true;
2655 break;
2656
2657 case 'q':
2658 exit_on_match = true;
2659 exit_failure = 0;
2660 break;
2661
2662 case 'R':
2663 fts_options = basic_fts_options | FTS_LOGICAL;
2664 FALLTHROUGH;
2665 case 'r':
2666 directories = RECURSE_DIRECTORIES;
2667 last_recursive = prev_optind;
2668 break;
2669
2670 case 's':
2671 suppress_errors = true;
2672 break;
2673
2674 case 'v':
2675 out_invert = true;
2676 break;
2677
2678 case 'w':
2679 wordinit ();
2680 match_words = true;
2681 break;
2682
2683 case 'x':
2684 match_lines = true;
2685 break;
2686
2687 case 'Z':
2688 filename_mask = 0;
2689 break;
2690
2691 case 'z':
2692 eolbyte = '\0';
2693 break;
2694
2695 case BINARY_FILES_OPTION:
2696 if (STREQ (optarg, "binary"))
2697 binary_files = BINARY_BINARY_FILES;
2698 else if (STREQ (optarg, "text"))
2699 binary_files = TEXT_BINARY_FILES;
2700 else if (STREQ (optarg, "without-match"))
2701 binary_files = WITHOUT_MATCH_BINARY_FILES;
2702 else
2703 die (EXIT_TROUBLE, 0, _("unknown binary-files type"));
2704 break;
2705
2706 case COLOR_OPTION:
2707 if (optarg)
2708 {
2709 if (!c_strcasecmp (optarg, "always")
2710 || !c_strcasecmp (optarg, "yes")
2711 || !c_strcasecmp (optarg, "force"))
2712 color_option = 1;
2713 else if (!c_strcasecmp (optarg, "never")
2714 || !c_strcasecmp (optarg, "no")
2715 || !c_strcasecmp (optarg, "none"))
2716 color_option = 0;
2717 else if (!c_strcasecmp (optarg, "auto")
2718 || !c_strcasecmp (optarg, "tty")
2719 || !c_strcasecmp (optarg, "if-tty"))
2720 color_option = 2;
2721 else
2722 show_help = 1;
2723 }
2724 else
2725 color_option = 2;
2726 break;
2727
2728 case EXCLUDE_OPTION:
2729 case INCLUDE_OPTION:
2730 for (int cmd = 0; cmd < 2; cmd++)
2731 {
2732 if (!excluded_patterns[cmd])
2733 excluded_patterns[cmd] = new_exclude ();
2734 add_exclude (excluded_patterns[cmd], optarg,
2735 ((opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0)
2736 | exclude_options (cmd)));
2737 }
2738 break;
2739 case EXCLUDE_FROM_OPTION:
2740 for (int cmd = 0; cmd < 2; cmd++)
2741 {
2742 if (!excluded_patterns[cmd])
2743 excluded_patterns[cmd] = new_exclude ();
2744 if (add_exclude_file (add_exclude, excluded_patterns[cmd],
2745 optarg, exclude_options (cmd), '\n')
2746 != 0)
2747 die (EXIT_TROUBLE, errno, "%s", optarg);
2748 }
2749 break;
2750
2751 case EXCLUDE_DIRECTORY_OPTION:
2752 strip_trailing_slashes (optarg);
2753 for (int cmd = 0; cmd < 2; cmd++)
2754 {
2755 if (!excluded_directory_patterns[cmd])
2756 excluded_directory_patterns[cmd] = new_exclude ();
2757 add_exclude (excluded_directory_patterns[cmd], optarg,
2758 exclude_options (cmd));
2759 }
2760 break;
2761
2762 case GROUP_SEPARATOR_OPTION:
2763 group_separator = optarg;
2764 break;
2765
2766 case LINE_BUFFERED_OPTION:
2767 line_buffered = true;
2768 break;
2769
2770 case LABEL_OPTION:
2771 label = optarg;
2772 break;
2773
2774 case 0:
2775 /* long options */
2776 break;
2777
2778 default:
2779 usage (EXIT_TROUBLE);
2780 break;
2781
2782 }
2783
2784 if (show_version)
2785 {
2786 version_etc (stdout, getprogname (), PACKAGE_NAME, VERSION,
2787 (char *) NULL);
2788 puts (_("Written by Mike Haertel and others; see\n"
2789 "<https://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>."));
2790 return EXIT_SUCCESS;
2791 }
2792
2793 if (show_help)
2794 usage (EXIT_SUCCESS);
2795
2796 if (keys)
2797 {
2798 if (keycc == 0)
2799 {
2800 /* No keys were specified (e.g. -f /dev/null). Match nothing. */
2801 out_invert ^= true;
2802 match_lines = match_words = false;
2803 }
2804 else
2805 /* Strip trailing newline. */
2806 --keycc;
2807 }
2808 else if (optind < argc)
2809 {
2810 /* Make a copy so that it can be reallocated or freed later. */
2811 keycc = strlen (argv[optind]);
2812 keys = xmemdup (argv[optind++], keycc + 1);
2813 fl_add (keys, keycc, "");
2814 n_patterns++;
2815 }
2816 else
2817 usage (EXIT_TROUBLE);
2818
2819 bool possibly_tty = false;
2820 struct stat tmp_stat;
2821 if (! exit_on_match && fstat (STDOUT_FILENO, &tmp_stat) == 0)
2822 {
2823 if (S_ISREG (tmp_stat.st_mode))
2824 out_stat = tmp_stat;
2825 else if (S_ISCHR (tmp_stat.st_mode))
2826 {
2827 struct stat null_stat;
2828 if (stat ("/dev/null", &null_stat) == 0
2829 && SAME_INODE (tmp_stat, null_stat))
2830 dev_null_output = true;
2831 else
2832 possibly_tty = true;
2833 }
2834 }
2835
2836 /* POSIX says -c, -l and -q are mutually exclusive. In this
2837 implementation, -q overrides -l and -L, which in turn override -c. */
2838 if (exit_on_match)
2839 list_files = LISTFILES_NONE;
2840 if ((exit_on_match | dev_null_output) || list_files != LISTFILES_NONE)
2841 {
2842 count_matches = false;
2843 done_on_match = true;
2844 }
2845 out_quiet = count_matches | done_on_match;
2846
2847 if (out_after < 0)
2848 out_after = default_context;
2849 if (out_before < 0)
2850 out_before = default_context;
2851
2852 /* If it is easy to see that matching cannot succeed (e.g., 'grep -f
2853 /dev/null'), fail without reading the input. */
2854 if ((max_count == 0
2855 || (keycc == 0 && out_invert && !match_lines && !match_words))
2856 && list_files != LISTFILES_NONMATCHING)
2857 return EXIT_FAILURE;
2858
2859 if (color_option == 2)
2860 color_option = possibly_tty && should_colorize () && isatty (STDOUT_FILENO);
2861 init_colorize ();
2862
2863 if (color_option)
2864 {
2865 /* Legacy. */
2866 char *userval = getenv ("GREP_COLOR");
2867 if (userval != NULL && *userval != '\0')
2868 selected_match_color = context_match_color = userval;
2869
2870 /* New GREP_COLORS has priority. */
2871 parse_grep_colors ();
2872 }
2873
2874 initialize_unibyte_mask ();
2875
2876 if (matcher < 0)
2877 matcher = G_MATCHER_INDEX;
2878
2879 /* In a single-byte locale, switch from -F to -G if it is a single
2880 pattern that matches words, where -G is typically faster. In a
2881 multi-byte locale, switch if the patterns have an encoding error
2882 (where -F does not work) or if -i and the patterns will not work
2883 for -iF. */
2884 if (matcher == F_MATCHER_INDEX
2885 && (! localeinfo.multibyte
2886 ? n_patterns == 1 && match_words
2887 : (contains_encoding_error (keys, keycc)
2888 || (match_icase && !fgrep_icase_available (keys, keycc)))))
2889 {
2890 fgrep_to_grep_pattern (&keys, &keycc);
2891 matcher = G_MATCHER_INDEX;
2892 }
2893 /* With two or more patterns, if -F works then switch from either -E
2894 or -G, as -F is probably faster then. */
2895 else if ((matcher == G_MATCHER_INDEX || matcher == E_MATCHER_INDEX)
2896 && 1 < n_patterns)
2897 matcher = try_fgrep_pattern (matcher, keys, &keycc);
2898
2899 execute = matchers[matcher].execute;
2900 compiled_pattern = matchers[matcher].compile (keys, keycc,
2901 matchers[matcher].syntax);
2902 /* We need one byte prior and one after. */
2903 char eolbytes[3] = { 0, eolbyte, 0 };
2904 size_t match_size;
2905 skip_empty_lines = ((execute (compiled_pattern, eolbytes + 1, 1,
2906 &match_size, NULL) == 0)
2907 == out_invert);
2908
2909 int num_operands = argc - optind;
2910 out_file = (filename_option == 0 && num_operands <= 1
2911 ? - (directories == RECURSE_DIRECTORIES)
2912 : 0 <= filename_option);
2913
2914 if (binary)
2915 xset_binary_mode (STDOUT_FILENO, O_BINARY);
2916
2917 /* Prefer sysconf for page size, as getpagesize typically returns int. */
2918 #ifdef _SC_PAGESIZE
2919 long psize = sysconf (_SC_PAGESIZE);
2920 #else
2921 long psize = getpagesize ();
2922 #endif
2923 if (! (0 < psize && psize <= (SIZE_MAX - sizeof (uword)) / 2))
2924 abort ();
2925 pagesize = psize;
2926 bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + sizeof (uword);
2927 buffer = xmalloc (bufalloc);
2928
2929 if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES)
2930 devices = READ_DEVICES;
2931
2932 char *const *files;
2933 if (0 < num_operands)
2934 {
2935 files = argv + optind;
2936 }
2937 else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive)
2938 {
2939 static char *const cwd_only[] = { (char *) ".", NULL };
2940 files = cwd_only;
2941 omit_dot_slash = true;
2942 }
2943 else
2944 {
2945 static char *const stdin_only[] = { (char *) "-", NULL };
2946 files = stdin_only;
2947 }
2948
2949 bool status = true;
2950 do
2951 status &= grep_command_line_arg (*files++);
2952 while (*files != NULL);
2953
2954 /* We register via atexit to test stdout. */
2955 return errseen ? EXIT_TROUBLE : status;
2956 }
2957