1 /* grep.c - main driver file for grep. 2 Copyright (C) 1992, 1997-2002, 2004-2015 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 17 02110-1301, USA. */ 18 19 /* Written July 1992 by Mike Haertel. */ 20 21 #include <config.h> 22 #include <sys/types.h> 23 #include <sys/stat.h> 24 #include <wchar.h> 25 #include <wctype.h> 26 #include <fcntl.h> 27 #include <inttypes.h> 28 #include <stdio.h> 29 #include "system.h" 30 31 #include "argmatch.h" 32 #include "c-ctype.h" 33 #include "closeout.h" 34 #include "colorize.h" 35 #include "error.h" 36 #include "exclude.h" 37 #include "exitfail.h" 38 #include "fcntl-safer.h" 39 #include "fts_.h" 40 #include "getopt.h" 41 #include "grep.h" 42 #include "intprops.h" 43 #include "progname.h" 44 #include "propername.h" 45 #include "quote.h" 46 #include "safe-read.h" 47 #include "search.h" 48 #include "version-etc.h" 49 #include "xalloc.h" 50 #include "xstrtol.h" 51 52 #define SEP_CHAR_SELECTED ':' 53 #define SEP_CHAR_REJECTED '-' 54 #define SEP_STR_GROUP "--" 55 56 #define AUTHORS \ 57 proper_name ("Mike Haertel"), \ 58 _("others, see\n<http://git.sv.gnu.org/cgit/grep.git/tree/AUTHORS>") 59 60 /* When stdout is connected to a regular file, save its stat 61 information here, so that we can automatically skip it, thus 62 avoiding a potential (racy) infinite loop. */ 63 static struct stat out_stat; 64 65 /* if non-zero, display usage information and exit */ 66 static int show_help; 67 68 /* Print the version on standard output and exit. */ 69 static bool show_version; 70 71 /* Suppress diagnostics for nonexistent or unreadable files. */ 72 static bool suppress_errors; 73 74 /* If nonzero, use color markers. */ 75 static int color_option; 76 77 /* Show only the part of a line matching the expression. */ 78 static bool only_matching; 79 80 /* If nonzero, make sure first content char in a line is on a tab stop. */ 81 static bool align_tabs; 82 83 #if HAVE_ASAN 84 /* Record the starting address and length of the sole poisoned region, 85 so that we can unpoison it later, just before each following read. */ 86 static void const *poison_buf; 87 static size_t poison_len; 88 89 static void 90 clear_asan_poison (void) 91 { 92 if (poison_buf) 93 __asan_unpoison_memory_region (poison_buf, poison_len); 94 } 95 96 static void 97 asan_poison (void const *addr, size_t size) 98 { 99 poison_buf = addr; 100 poison_len = size; 101 102 __asan_poison_memory_region (poison_buf, poison_len); 103 } 104 #else 105 static void clear_asan_poison (void) { } 106 static void asan_poison (void const volatile *addr, size_t size) { } 107 #endif 108 109 /* The group separator used when context is requested. */ 110 static const char *group_separator = SEP_STR_GROUP; 111 112 /* The context and logic for choosing default --color screen attributes 113 (foreground and background colors, etc.) are the following. 114 -- There are eight basic colors available, each with its own 115 nominal luminosity to the human eye and foreground/background 116 codes (black [0 %, 30/40], blue [11 %, 34/44], red [30 %, 31/41], 117 magenta [41 %, 35/45], green [59 %, 32/42], cyan [70 %, 36/46], 118 yellow [89 %, 33/43], and white [100 %, 37/47]). 119 -- Sometimes, white as a background is actually implemented using 120 a shade of light gray, so that a foreground white can be visible 121 on top of it (but most often not). 122 -- Sometimes, black as a foreground is actually implemented using 123 a shade of dark gray, so that it can be visible on top of a 124 background black (but most often not). 125 -- Sometimes, more colors are available, as extensions. 126 -- Other attributes can be selected/deselected (bold [1/22], 127 underline [4/24], standout/inverse [7/27], blink [5/25], and 128 invisible/hidden [8/28]). They are sometimes implemented by 129 using colors instead of what their names imply; e.g., bold is 130 often achieved by using brighter colors. In practice, only bold 131 is really available to us, underline sometimes being mapped by 132 the terminal to some strange color choice, and standout best 133 being left for use by downstream programs such as less(1). 134 -- We cannot assume that any of the extensions or special features 135 are available for the purpose of choosing defaults for everyone. 136 -- The most prevalent default terminal backgrounds are pure black 137 and pure white, and are not necessarily the same shades of 138 those as if they were selected explicitly with SGR sequences. 139 Some terminals use dark or light pictures as default background, 140 but those are covered over by an explicit selection of background 141 color with an SGR sequence; their users will appreciate their 142 background pictures not be covered like this, if possible. 143 -- Some uses of colors attributes is to make some output items 144 more understated (e.g., context lines); this cannot be achieved 145 by changing the background color. 146 -- For these reasons, the grep color defaults should strive not 147 to change the background color from its default, unless it's 148 for a short item that should be highlighted, not understated. 149 -- The grep foreground color defaults (without an explicitly set 150 background) should provide enough contrast to be readable on any 151 terminal with either a black (dark) or white (light) background. 152 This only leaves red, magenta, green, and cyan (and their bold 153 counterparts) and possibly bold blue. */ 154 /* The color strings used for matched text. 155 The user can overwrite them using the deprecated 156 environment variable GREP_COLOR or the new GREP_COLORS. */ 157 static const char *selected_match_color = "01;31"; /* bold red */ 158 static const char *context_match_color = "01;31"; /* bold red */ 159 160 /* Other colors. Defaults look damn good. */ 161 static const char *filename_color = "35"; /* magenta */ 162 static const char *line_num_color = "32"; /* green */ 163 static const char *byte_num_color = "32"; /* green */ 164 static const char *sep_color = "36"; /* cyan */ 165 static const char *selected_line_color = ""; /* default color pair */ 166 static const char *context_line_color = ""; /* default color pair */ 167 168 /* Select Graphic Rendition (SGR, "\33[...m") strings. */ 169 /* Also Erase in Line (EL) to Right ("\33[K") by default. */ 170 /* Why have EL to Right after SGR? 171 -- The behavior of line-wrapping when at the bottom of the 172 terminal screen and at the end of the current line is often 173 such that a new line is introduced, entirely cleared with 174 the current background color which may be different from the 175 default one (see the boolean back_color_erase terminfo(5) 176 capability), thus scrolling the display by one line. 177 The end of this new line will stay in this background color 178 even after reverting to the default background color with 179 "\33[m', unless it is explicitly cleared again with "\33[K" 180 (which is the behavior the user would instinctively expect 181 from the whole thing). There may be some unavoidable 182 background-color flicker at the end of this new line because 183 of this (when timing with the monitor's redraw is just right). 184 -- The behavior of HT (tab, "\t") is usually the same as that of 185 Cursor Forward Tabulation (CHT) with a default parameter 186 of 1 ("\33[I"), i.e., it performs pure movement to the next 187 tab stop, without any clearing of either content or screen 188 attributes (including background color); try 189 printf 'asdfqwerzxcv\rASDF\tZXCV\n' 190 in a bash(1) shell to demonstrate this. This is not what the 191 user would instinctively expect of HT (but is ok for CHT). 192 The instinctive behavior would include clearing the terminal 193 cells that are skipped over by HT with blank cells in the 194 current screen attributes, including background color; 195 the boolean dest_tabs_magic_smso terminfo(5) capability 196 indicates this saner behavior for HT, but only some rare 197 terminals have it (although it also indicates a special 198 glitch with standout mode in the Teleray terminal for which 199 it was initially introduced). The remedy is to add "\33K" 200 after each SGR sequence, be it START (to fix the behavior 201 of any HT after that before another SGR) or END (to fix the 202 behavior of an HT in default background color that would 203 follow a line-wrapping at the bottom of the screen in another 204 background color, and to complement doing it after START). 205 Piping grep's output through a pager such as less(1) avoids 206 any HT problems since the pager performs tab expansion. 207 208 Generic disadvantages of this remedy are: 209 -- Some very rare terminals might support SGR but not EL (nobody 210 will use "grep --color" on a terminal that does not support 211 SGR in the first place). 212 -- Having these extra control sequences might somewhat complicate 213 the task of any program trying to parse "grep --color" 214 output in order to extract structuring information from it. 215 A specific disadvantage to doing it after SGR START is: 216 -- Even more possible background color flicker (when timing 217 with the monitor's redraw is just right), even when not at the 218 bottom of the screen. 219 There are no additional disadvantages specific to doing it after 220 SGR END. 221 222 It would be impractical for GNU grep to become a full-fledged 223 terminal program linked against ncurses or the like, so it will 224 not detect terminfo(5) capabilities. */ 225 static const char *sgr_start = "\33[%sm\33[K"; 226 static const char *sgr_end = "\33[m\33[K"; 227 228 /* SGR utility functions. */ 229 static void 230 pr_sgr_start (char const *s) 231 { 232 if (*s) 233 print_start_colorize (sgr_start, s); 234 } 235 static void 236 pr_sgr_end (char const *s) 237 { 238 if (*s) 239 print_end_colorize (sgr_end); 240 } 241 static void 242 pr_sgr_start_if (char const *s) 243 { 244 if (color_option) 245 pr_sgr_start (s); 246 } 247 static void 248 pr_sgr_end_if (char const *s) 249 { 250 if (color_option) 251 pr_sgr_end (s); 252 } 253 254 struct color_cap 255 { 256 const char *name; 257 const char **var; 258 void (*fct) (void); 259 }; 260 261 static void 262 color_cap_mt_fct (void) 263 { 264 /* Our caller just set selected_match_color. */ 265 context_match_color = selected_match_color; 266 } 267 268 static void 269 color_cap_rv_fct (void) 270 { 271 /* By this point, it was 1 (or already -1). */ 272 color_option = -1; /* That's still != 0. */ 273 } 274 275 static void 276 color_cap_ne_fct (void) 277 { 278 sgr_start = "\33[%sm"; 279 sgr_end = "\33[m"; 280 } 281 282 /* For GREP_COLORS. */ 283 static const struct color_cap color_dict[] = 284 { 285 { "mt", &selected_match_color, color_cap_mt_fct }, /* both ms/mc */ 286 { "ms", &selected_match_color, NULL }, /* selected matched text */ 287 { "mc", &context_match_color, NULL }, /* context matched text */ 288 { "fn", &filename_color, NULL }, /* filename */ 289 { "ln", &line_num_color, NULL }, /* line number */ 290 { "bn", &byte_num_color, NULL }, /* byte (sic) offset */ 291 { "se", &sep_color, NULL }, /* separator */ 292 { "sl", &selected_line_color, NULL }, /* selected lines */ 293 { "cx", &context_line_color, NULL }, /* context lines */ 294 { "rv", NULL, color_cap_rv_fct }, /* -v reverses sl/cx */ 295 { "ne", NULL, color_cap_ne_fct }, /* no EL on SGR_* */ 296 { NULL, NULL, NULL } 297 }; 298 299 static struct exclude *excluded_patterns; 300 static struct exclude *excluded_directory_patterns; 301 /* Short options. */ 302 static char const short_options[] = 303 "0123456789A:B:C:D:EFGHIPTUVX:abcd:e:f:hiLlm:noqRrsuvwxyZz"; 304 305 /* Non-boolean long options that have no corresponding short equivalents. */ 306 enum 307 { 308 BINARY_FILES_OPTION = CHAR_MAX + 1, 309 COLOR_OPTION, 310 EXCLUDE_DIRECTORY_OPTION, 311 EXCLUDE_OPTION, 312 EXCLUDE_FROM_OPTION, 313 GROUP_SEPARATOR_OPTION, 314 INCLUDE_OPTION, 315 LINE_BUFFERED_OPTION, 316 LABEL_OPTION 317 }; 318 319 /* Long options equivalences. */ 320 static struct option const long_options[] = 321 { 322 {"basic-regexp", no_argument, NULL, 'G'}, 323 {"extended-regexp", no_argument, NULL, 'E'}, 324 {"fixed-regexp", no_argument, NULL, 'F'}, 325 {"fixed-strings", no_argument, NULL, 'F'}, 326 {"perl-regexp", no_argument, NULL, 'P'}, 327 {"after-context", required_argument, NULL, 'A'}, 328 {"before-context", required_argument, NULL, 'B'}, 329 {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, 330 {"byte-offset", no_argument, NULL, 'b'}, 331 {"context", required_argument, NULL, 'C'}, 332 {"color", optional_argument, NULL, COLOR_OPTION}, 333 {"colour", optional_argument, NULL, COLOR_OPTION}, 334 {"count", no_argument, NULL, 'c'}, 335 {"devices", required_argument, NULL, 'D'}, 336 {"directories", required_argument, NULL, 'd'}, 337 {"exclude", required_argument, NULL, EXCLUDE_OPTION}, 338 {"exclude-from", required_argument, NULL, EXCLUDE_FROM_OPTION}, 339 {"exclude-dir", required_argument, NULL, EXCLUDE_DIRECTORY_OPTION}, 340 {"file", required_argument, NULL, 'f'}, 341 {"files-with-matches", no_argument, NULL, 'l'}, 342 {"files-without-match", no_argument, NULL, 'L'}, 343 {"group-separator", required_argument, NULL, GROUP_SEPARATOR_OPTION}, 344 {"help", no_argument, &show_help, 1}, 345 {"include", required_argument, NULL, INCLUDE_OPTION}, 346 {"ignore-case", no_argument, NULL, 'i'}, 347 {"initial-tab", no_argument, NULL, 'T'}, 348 {"label", required_argument, NULL, LABEL_OPTION}, 349 {"line-buffered", no_argument, NULL, LINE_BUFFERED_OPTION}, 350 {"line-number", no_argument, NULL, 'n'}, 351 {"line-regexp", no_argument, NULL, 'x'}, 352 {"max-count", required_argument, NULL, 'm'}, 353 354 {"no-filename", no_argument, NULL, 'h'}, 355 {"no-group-separator", no_argument, NULL, GROUP_SEPARATOR_OPTION}, 356 {"no-messages", no_argument, NULL, 's'}, 357 {"null", no_argument, NULL, 'Z'}, 358 {"null-data", no_argument, NULL, 'z'}, 359 {"only-matching", no_argument, NULL, 'o'}, 360 {"quiet", no_argument, NULL, 'q'}, 361 {"recursive", no_argument, NULL, 'r'}, 362 {"dereference-recursive", no_argument, NULL, 'R'}, 363 {"regexp", required_argument, NULL, 'e'}, 364 {"invert-match", no_argument, NULL, 'v'}, 365 {"silent", no_argument, NULL, 'q'}, 366 {"text", no_argument, NULL, 'a'}, 367 {"binary", no_argument, NULL, 'U'}, 368 {"unix-byte-offsets", no_argument, NULL, 'u'}, 369 {"version", no_argument, NULL, 'V'}, 370 {"with-filename", no_argument, NULL, 'H'}, 371 {"word-regexp", no_argument, NULL, 'w'}, 372 {0, 0, 0, 0} 373 }; 374 375 /* Define flags declared in grep.h. */ 376 bool match_icase; 377 bool match_words; 378 bool match_lines; 379 char eolbyte; 380 enum textbin input_textbin; 381 382 static char const *matcher; 383 384 /* For error messages. */ 385 /* The input file name, or (if standard input) "-" or a --label argument. */ 386 static char const *filename; 387 /* Omit leading "./" from file names in diagnostics. */ 388 static bool omit_dot_slash; 389 static bool errseen; 390 static bool write_error_seen; 391 392 enum directories_type 393 { 394 READ_DIRECTORIES = 2, 395 RECURSE_DIRECTORIES, 396 SKIP_DIRECTORIES 397 }; 398 399 /* How to handle directories. */ 400 static char const *const directories_args[] = 401 { 402 "read", "recurse", "skip", NULL 403 }; 404 static enum directories_type const directories_types[] = 405 { 406 READ_DIRECTORIES, RECURSE_DIRECTORIES, SKIP_DIRECTORIES 407 }; 408 ARGMATCH_VERIFY (directories_args, directories_types); 409 410 static enum directories_type directories = READ_DIRECTORIES; 411 412 enum { basic_fts_options = FTS_CWDFD | FTS_NOSTAT | FTS_TIGHT_CYCLE_CHECK }; 413 static int fts_options = basic_fts_options | FTS_COMFOLLOW | FTS_PHYSICAL; 414 415 /* How to handle devices. */ 416 static enum 417 { 418 READ_COMMAND_LINE_DEVICES, 419 READ_DEVICES, 420 SKIP_DEVICES 421 } devices = READ_COMMAND_LINE_DEVICES; 422 423 static bool grepfile (int, char const *, bool, bool); 424 static bool grepdesc (int, bool); 425 426 static void dos_binary (void); 427 static void dos_unix_byte_offsets (void); 428 static size_t undossify_input (char *, size_t); 429 430 static bool 431 is_device_mode (mode_t m) 432 { 433 return S_ISCHR (m) || S_ISBLK (m) || S_ISSOCK (m) || S_ISFIFO (m); 434 } 435 436 static bool 437 skip_devices (bool command_line) 438 { 439 return (devices == SKIP_DEVICES 440 || (devices == READ_COMMAND_LINE_DEVICES && !command_line)); 441 } 442 443 /* Return if ST->st_size is defined. Assume the file is not a 444 symbolic link. */ 445 static bool 446 usable_st_size (struct stat const *st) 447 { 448 return S_ISREG (st->st_mode) || S_TYPEISSHM (st) || S_TYPEISTMO (st); 449 } 450 451 /* Lame substitutes for SEEK_DATA and SEEK_HOLE on platforms lacking them. 452 Do not rely on these finding data or holes if they equal SEEK_SET. */ 453 #ifndef SEEK_DATA 454 enum { SEEK_DATA = SEEK_SET }; 455 #endif 456 #ifndef SEEK_HOLE 457 enum { SEEK_HOLE = SEEK_SET }; 458 #endif 459 460 /* Functions we'll use to search. */ 461 typedef void (*compile_fp_t) (char const *, size_t); 462 typedef size_t (*execute_fp_t) (char const *, size_t, size_t *, char const *); 463 static compile_fp_t compile; 464 static execute_fp_t execute; 465 466 /* Like error, but suppress the diagnostic if requested. */ 467 static void 468 suppressible_error (char const *mesg, int errnum) 469 { 470 if (! suppress_errors) 471 error (0, errnum, "%s", mesg); 472 errseen = true; 473 } 474 475 /* If there has already been a write error, don't bother closing 476 standard output, as that might elicit a duplicate diagnostic. */ 477 static void 478 clean_up_stdout (void) 479 { 480 if (! write_error_seen) 481 close_stdout (); 482 } 483 484 static bool 485 textbin_is_binary (enum textbin textbin) 486 { 487 return textbin < TEXTBIN_UNKNOWN; 488 } 489 490 /* The high-order bit of a byte. */ 491 enum { HIBYTE = 0x80 }; 492 493 /* True if every byte with HIBYTE off is a single-byte character. 494 UTF-8 has this property. */ 495 static bool easy_encoding; 496 497 static void 498 init_easy_encoding (void) 499 { 500 easy_encoding = true; 501 for (int i = 0; i < HIBYTE; i++) 502 easy_encoding &= mbclen_cache[i] == 1; 503 } 504 505 /* A cast to TYPE of VAL. Use this when TYPE is a pointer type, VAL 506 is properly aligned for TYPE, and 'gcc -Wcast-align' cannot infer 507 the alignment and would otherwise complain about the cast. */ 508 #if 4 < __GNUC__ + (6 <= __GNUC_MINOR__) 509 # define CAST_ALIGNED(type, val) \ 510 ({ __typeof__ (val) val_ = val; \ 511 _Pragma ("GCC diagnostic push") \ 512 _Pragma ("GCC diagnostic ignored \"-Wcast-align\"") \ 513 (type) val_; \ 514 _Pragma ("GCC diagnostic pop") \ 515 }) 516 #else 517 # define CAST_ALIGNED(type, val) ((type) (val)) 518 #endif 519 520 /* An unsigned type suitable for fast matching. */ 521 typedef uintmax_t uword; 522 523 /* Skip the easy bytes in a buffer that is guaranteed to have a sentinel 524 that is not easy, and return a pointer to the first non-easy byte. 525 In easy encodings, the easy bytes all have HIBYTE off. 526 In other encodings, no byte is easy. */ 527 static char const * _GL_ATTRIBUTE_PURE 528 skip_easy_bytes (char const *buf) 529 { 530 if (!easy_encoding) 531 return buf; 532 533 uword uword_max = -1; 534 535 /* 0x8080..., extended to be wide enough for uword. */ 536 uword hibyte_mask = uword_max / UCHAR_MAX * HIBYTE; 537 538 /* Search a byte at a time until the pointer is aligned, then a 539 uword at a time until a match is found, then a byte at a time to 540 identify the exact byte. The uword search may go slightly past 541 the buffer end, but that's benign. */ 542 char const *p; 543 uword const *s; 544 for (p = buf; (uintptr_t) p % sizeof (uword) != 0; p++) 545 if (*p & HIBYTE) 546 return p; 547 for (s = CAST_ALIGNED (uword const *, p); ! (*s & hibyte_mask); s++) 548 continue; 549 for (p = (char const *) s; ! (*p & HIBYTE); p++) 550 continue; 551 return p; 552 } 553 554 /* Return the text type of data in BUF, of size SIZE. 555 BUF must be followed by at least sizeof (uword) bytes, 556 which may be arbitrarily written to or read from. */ 557 static enum textbin 558 buffer_textbin (char *buf, size_t size) 559 { 560 if (eolbyte && memchr (buf, '\0', size)) 561 return TEXTBIN_BINARY; 562 563 if (1 < MB_CUR_MAX) 564 { 565 mbstate_t mbs = { 0 }; 566 size_t clen; 567 char const *p; 568 569 buf[size] = -1; 570 for (p = buf; (p = skip_easy_bytes (p)) < buf + size; p += clen) 571 { 572 clen = mbrlen (p, buf + size - p, &mbs); 573 if ((size_t) -2 <= clen) 574 return clen == (size_t) -2 ? TEXTBIN_UNKNOWN : TEXTBIN_BINARY; 575 } 576 } 577 578 return TEXTBIN_TEXT; 579 } 580 581 /* Return the text type of a file. BUF, of size SIZE, is the initial 582 buffer read from the file with descriptor FD and status ST. 583 BUF must be followed by at least sizeof (uword) bytes, 584 which may be arbitrarily written to or read from. */ 585 static enum textbin 586 file_textbin (char *buf, size_t size, int fd, struct stat const *st) 587 { 588 enum textbin textbin = buffer_textbin (buf, size); 589 if (textbin_is_binary (textbin)) 590 return textbin; 591 592 if (usable_st_size (st)) 593 { 594 if (st->st_size <= size) 595 return textbin == TEXTBIN_UNKNOWN ? TEXTBIN_BINARY : textbin; 596 597 /* If the file has holes, it must contain a null byte somewhere. */ 598 if (SEEK_HOLE != SEEK_SET && eolbyte) 599 { 600 off_t cur = size; 601 if (O_BINARY || fd == STDIN_FILENO) 602 { 603 cur = lseek (fd, 0, SEEK_CUR); 604 if (cur < 0) 605 return TEXTBIN_UNKNOWN; 606 } 607 608 /* Look for a hole after the current location. */ 609 off_t hole_start = lseek (fd, cur, SEEK_HOLE); 610 if (0 <= hole_start) 611 { 612 if (lseek (fd, cur, SEEK_SET) < 0) 613 suppressible_error (filename, errno); 614 if (hole_start < st->st_size) 615 return TEXTBIN_BINARY; 616 } 617 } 618 } 619 620 return TEXTBIN_UNKNOWN; 621 } 622 623 /* Convert STR to a nonnegative integer, storing the result in *OUT. 624 STR must be a valid context length argument; report an error if it 625 isn't. Silently ceiling *OUT at the maximum value, as that is 626 practically equivalent to infinity for grep's purposes. */ 627 static void 628 context_length_arg (char const *str, intmax_t *out) 629 { 630 switch (xstrtoimax (str, 0, 10, out, "")) 631 { 632 case LONGINT_OK: 633 case LONGINT_OVERFLOW: 634 if (0 <= *out) 635 break; 636 /* Fall through. */ 637 default: 638 error (EXIT_TROUBLE, 0, "%s: %s", str, 639 _("invalid context length argument")); 640 } 641 } 642 643 /* Return true if the file with NAME should be skipped. 644 If COMMAND_LINE, it is a command-line argument. 645 If IS_DIR, it is a directory. */ 646 static bool 647 skipped_file (char const *name, bool command_line, bool is_dir) 648 { 649 return (is_dir 650 ? (directories == SKIP_DIRECTORIES 651 || (! (command_line && omit_dot_slash) 652 && excluded_directory_patterns 653 && excluded_file_name (excluded_directory_patterns, name))) 654 : (excluded_patterns 655 && excluded_file_name (excluded_patterns, name))); 656 } 657 658 /* Hairy buffering mechanism for grep. The intent is to keep 659 all reads aligned on a page boundary and multiples of the 660 page size, unless a read yields a partial page. */ 661 662 static char *buffer; /* Base of buffer. */ 663 static size_t bufalloc; /* Allocated buffer size, counting slop. */ 664 #define INITIAL_BUFSIZE 32768 /* Initial buffer size, not counting slop. */ 665 static int bufdesc; /* File descriptor. */ 666 static char *bufbeg; /* Beginning of user-visible stuff. */ 667 static char *buflim; /* Limit of user-visible stuff. */ 668 static size_t pagesize; /* alignment of memory pages */ 669 static off_t bufoffset; /* Read offset; defined on regular files. */ 670 static off_t after_last_match; /* Pointer after last matching line that 671 would have been output if we were 672 outputting characters. */ 673 static bool skip_nuls; /* Skip '\0' in data. */ 674 static bool skip_empty_lines; /* Skip empty lines in data. */ 675 static bool seek_data_failed; /* lseek with SEEK_DATA failed. */ 676 static uintmax_t totalnl; /* Total newline count before lastnl. */ 677 678 /* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be 679 an integer or a pointer. Both args must be free of side effects. */ 680 #define ALIGN_TO(val, alignment) \ 681 ((size_t) (val) % (alignment) == 0 \ 682 ? (val) \ 683 : (val) + ((alignment) - (size_t) (val) % (alignment))) 684 685 /* Add two numbers that count input bytes or lines, and report an 686 error if the addition overflows. */ 687 static uintmax_t 688 add_count (uintmax_t a, uintmax_t b) 689 { 690 uintmax_t sum = a + b; 691 if (sum < a) 692 error (EXIT_TROUBLE, 0, _("input is too large to count")); 693 return sum; 694 } 695 696 /* Return true if BUF (of size SIZE) is all zeros. */ 697 static bool 698 all_zeros (char const *buf, size_t size) 699 { 700 for (char const *p = buf; p < buf + size; p++) 701 if (*p) 702 return false; 703 return true; 704 } 705 706 /* Reset the buffer for a new file, returning false if we should skip it. 707 Initialize on the first time through. */ 708 static bool 709 reset (int fd, struct stat const *st) 710 { 711 if (! pagesize) 712 { 713 pagesize = getpagesize (); 714 if (pagesize == 0 || 2 * pagesize + 1 <= pagesize) 715 abort (); 716 bufalloc = (ALIGN_TO (INITIAL_BUFSIZE, pagesize) 717 + pagesize + sizeof (uword)); 718 buffer = xmalloc (bufalloc); 719 } 720 721 bufbeg = buflim = ALIGN_TO (buffer + 1, pagesize); 722 bufbeg[-1] = eolbyte; 723 bufdesc = fd; 724 725 if (S_ISREG (st->st_mode)) 726 { 727 if (fd != STDIN_FILENO) 728 bufoffset = 0; 729 else 730 { 731 bufoffset = lseek (fd, 0, SEEK_CUR); 732 if (bufoffset < 0) 733 { 734 suppressible_error (_("lseek failed"), errno); 735 return false; 736 } 737 } 738 } 739 return true; 740 } 741 742 /* Read new stuff into the buffer, saving the specified 743 amount of old stuff. When we're done, 'bufbeg' points 744 to the beginning of the buffer contents, and 'buflim' 745 points just after the end. Return false if there's an error. */ 746 static bool 747 fillbuf (size_t save, struct stat const *st) 748 { 749 size_t fillsize; 750 bool cc = true; 751 char *readbuf; 752 size_t readsize; 753 754 /* Offset from start of buffer to start of old stuff 755 that we want to save. */ 756 size_t saved_offset = buflim - save - buffer; 757 758 if (pagesize <= buffer + bufalloc - sizeof (uword) - buflim) 759 { 760 readbuf = buflim; 761 bufbeg = buflim - save; 762 } 763 else 764 { 765 size_t minsize = save + pagesize; 766 size_t newsize; 767 size_t newalloc; 768 char *newbuf; 769 770 /* Grow newsize until it is at least as great as minsize. */ 771 for (newsize = bufalloc - pagesize - sizeof (uword); 772 newsize < minsize; 773 newsize *= 2) 774 if ((SIZE_MAX - pagesize - sizeof (uword)) / 2 < newsize) 775 xalloc_die (); 776 777 /* Try not to allocate more memory than the file size indicates, 778 as that might cause unnecessary memory exhaustion if the file 779 is large. However, do not use the original file size as a 780 heuristic if we've already read past the file end, as most 781 likely the file is growing. */ 782 if (usable_st_size (st)) 783 { 784 off_t to_be_read = st->st_size - bufoffset; 785 off_t maxsize_off = save + to_be_read; 786 if (0 <= to_be_read && to_be_read <= maxsize_off 787 && maxsize_off == (size_t) maxsize_off 788 && minsize <= (size_t) maxsize_off 789 && (size_t) maxsize_off < newsize) 790 newsize = maxsize_off; 791 } 792 793 /* Add enough room so that the buffer is aligned and has room 794 for byte sentinels fore and aft, and so that a uword can 795 be read aft. */ 796 newalloc = newsize + pagesize + sizeof (uword); 797 798 newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; 799 readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); 800 bufbeg = readbuf - save; 801 memmove (bufbeg, buffer + saved_offset, save); 802 bufbeg[-1] = eolbyte; 803 if (newbuf != buffer) 804 { 805 free (buffer); 806 buffer = newbuf; 807 } 808 } 809 810 clear_asan_poison (); 811 812 readsize = buffer + bufalloc - sizeof (uword) - readbuf; 813 readsize -= readsize % pagesize; 814 815 while (true) 816 { 817 fillsize = safe_read (bufdesc, readbuf, readsize); 818 if (fillsize == SAFE_READ_ERROR) 819 { 820 fillsize = 0; 821 cc = false; 822 } 823 bufoffset += fillsize; 824 825 if (fillsize == 0 || !skip_nuls || !all_zeros (readbuf, fillsize)) 826 break; 827 totalnl = add_count (totalnl, fillsize); 828 829 if (SEEK_DATA != SEEK_SET && !seek_data_failed) 830 { 831 /* Solaris SEEK_DATA fails with errno == ENXIO in a hole at EOF. */ 832 off_t data_start = lseek (bufdesc, bufoffset, SEEK_DATA); 833 if (data_start < 0 && errno == ENXIO 834 && usable_st_size (st) && bufoffset < st->st_size) 835 data_start = lseek (bufdesc, 0, SEEK_END); 836 837 if (data_start < 0) 838 seek_data_failed = true; 839 else 840 { 841 totalnl = add_count (totalnl, data_start - bufoffset); 842 bufoffset = data_start; 843 } 844 } 845 } 846 847 fillsize = undossify_input (readbuf, fillsize); 848 buflim = readbuf + fillsize; 849 850 /* Initialize the following word, because skip_easy_bytes and some 851 matchers read (but do not use) those bytes. This avoids false 852 positive reports of these bytes being used uninitialized. */ 853 memset (buflim, 0, sizeof (uword)); 854 855 /* Mark the part of the buffer not filled by the read or set by 856 the above memset call as ASAN-poisoned. */ 857 asan_poison (buflim + sizeof (uword), 858 bufalloc - (buflim - buffer) - sizeof (uword)); 859 860 return cc; 861 } 862 863 /* Flags controlling the style of output. */ 864 static enum 865 { 866 BINARY_BINARY_FILES, 867 TEXT_BINARY_FILES, 868 WITHOUT_MATCH_BINARY_FILES 869 } binary_files; /* How to handle binary files. */ 870 871 static int filename_mask; /* If zero, output nulls after filenames. */ 872 static bool out_quiet; /* Suppress all normal output. */ 873 static bool out_invert; /* Print nonmatching stuff. */ 874 static int out_file; /* Print filenames. */ 875 static bool out_line; /* Print line numbers. */ 876 static bool out_byte; /* Print byte offsets. */ 877 static intmax_t out_before; /* Lines of leading context. */ 878 static intmax_t out_after; /* Lines of trailing context. */ 879 static bool count_matches; /* Count matching lines. */ 880 static int list_files; /* List matching files. */ 881 static bool no_filenames; /* Suppress file names. */ 882 static intmax_t max_count; /* Stop after outputting this many 883 lines from an input file. */ 884 static bool line_buffered; /* Use line buffering. */ 885 static char *label = NULL; /* Fake filename for stdin */ 886 887 888 /* Internal variables to keep track of byte count, context, etc. */ 889 static uintmax_t totalcc; /* Total character count before bufbeg. */ 890 static char const *lastnl; /* Pointer after last newline counted. */ 891 static char const *lastout; /* Pointer after last character output; 892 NULL if no character has been output 893 or if it's conceptually before bufbeg. */ 894 static intmax_t outleft; /* Maximum number of lines to be output. */ 895 static intmax_t pending; /* Pending lines of output. 896 Always kept 0 if out_quiet is true. */ 897 static bool done_on_match; /* Stop scanning file on first match. */ 898 static bool exit_on_match; /* Exit on first match. */ 899 900 #include "dosbuf.c" 901 902 static void 903 nlscan (char const *lim) 904 { 905 size_t newlines = 0; 906 char const *beg; 907 for (beg = lastnl; beg < lim; beg++) 908 { 909 beg = memchr (beg, eolbyte, lim - beg); 910 if (!beg) 911 break; 912 newlines++; 913 } 914 totalnl = add_count (totalnl, newlines); 915 lastnl = lim; 916 } 917 918 /* Print the current filename. */ 919 static void 920 print_filename (void) 921 { 922 pr_sgr_start_if (filename_color); 923 fputs (filename, stdout); 924 pr_sgr_end_if (filename_color); 925 } 926 927 /* Print a character separator. */ 928 static void 929 print_sep (char sep) 930 { 931 pr_sgr_start_if (sep_color); 932 fputc (sep, stdout); 933 pr_sgr_end_if (sep_color); 934 } 935 936 /* Print a line number or a byte offset. */ 937 static void 938 print_offset (uintmax_t pos, int min_width, const char *color) 939 { 940 /* Do not rely on printf to print pos, since uintmax_t may be longer 941 than long, and long long is not portable. */ 942 943 char buf[sizeof pos * CHAR_BIT]; 944 char *p = buf + sizeof buf; 945 946 do 947 { 948 *--p = '0' + pos % 10; 949 --min_width; 950 } 951 while ((pos /= 10) != 0); 952 953 /* Do this to maximize the probability of alignment across lines. */ 954 if (align_tabs) 955 while (--min_width >= 0) 956 *--p = ' '; 957 958 pr_sgr_start_if (color); 959 fwrite (p, 1, buf + sizeof buf - p, stdout); 960 pr_sgr_end_if (color); 961 } 962 963 /* Print a whole line head (filename, line, byte). */ 964 static void 965 print_line_head (char const *beg, char const *lim, char sep) 966 { 967 bool pending_sep = false; 968 969 if (out_file) 970 { 971 print_filename (); 972 if (filename_mask) 973 pending_sep = true; 974 else 975 fputc (0, stdout); 976 } 977 978 if (out_line) 979 { 980 if (lastnl < lim) 981 { 982 nlscan (beg); 983 totalnl = add_count (totalnl, 1); 984 lastnl = lim; 985 } 986 if (pending_sep) 987 print_sep (sep); 988 print_offset (totalnl, 4, line_num_color); 989 pending_sep = true; 990 } 991 992 if (out_byte) 993 { 994 uintmax_t pos = add_count (totalcc, beg - bufbeg); 995 pos = dossified_pos (pos); 996 if (pending_sep) 997 print_sep (sep); 998 print_offset (pos, 6, byte_num_color); 999 pending_sep = true; 1000 } 1001 1002 if (pending_sep) 1003 { 1004 /* This assumes sep is one column wide. 1005 Try doing this any other way with Unicode 1006 (and its combining and wide characters) 1007 filenames and you're wasting your efforts. */ 1008 if (align_tabs) 1009 fputs ("\t\b", stdout); 1010 1011 print_sep (sep); 1012 } 1013 } 1014 1015 static const char * 1016 print_line_middle (const char *beg, const char *lim, 1017 const char *line_color, const char *match_color) 1018 { 1019 size_t match_size; 1020 size_t match_offset; 1021 const char *cur = beg; 1022 const char *mid = NULL; 1023 1024 while (cur < lim 1025 && ((match_offset = execute (beg, lim - beg, &match_size, cur)) 1026 != (size_t) -1)) 1027 { 1028 char const *b = beg + match_offset; 1029 1030 /* Avoid matching the empty line at the end of the buffer. */ 1031 if (b == lim) 1032 break; 1033 1034 /* Avoid hanging on grep --color "" foo */ 1035 if (match_size == 0) 1036 { 1037 /* Make minimal progress; there may be further non-empty matches. */ 1038 /* XXX - Could really advance by one whole multi-octet character. */ 1039 match_size = 1; 1040 if (!mid) 1041 mid = cur; 1042 } 1043 else 1044 { 1045 /* This function is called on a matching line only, 1046 but is it selected or rejected/context? */ 1047 if (only_matching) 1048 print_line_head (b, lim, (out_invert ? SEP_CHAR_REJECTED 1049 : SEP_CHAR_SELECTED)); 1050 else 1051 { 1052 pr_sgr_start (line_color); 1053 if (mid) 1054 { 1055 cur = mid; 1056 mid = NULL; 1057 } 1058 fwrite (cur, sizeof (char), b - cur, stdout); 1059 } 1060 1061 pr_sgr_start_if (match_color); 1062 fwrite (b, sizeof (char), match_size, stdout); 1063 pr_sgr_end_if (match_color); 1064 if (only_matching) 1065 fputs ("\n", stdout); 1066 } 1067 cur = b + match_size; 1068 } 1069 1070 if (only_matching) 1071 cur = lim; 1072 else if (mid) 1073 cur = mid; 1074 1075 return cur; 1076 } 1077 1078 static const char * 1079 print_line_tail (const char *beg, const char *lim, const char *line_color) 1080 { 1081 size_t eol_size; 1082 size_t tail_size; 1083 1084 eol_size = (lim > beg && lim[-1] == eolbyte); 1085 eol_size += (lim - eol_size > beg && lim[-(1 + eol_size)] == '\r'); 1086 tail_size = lim - eol_size - beg; 1087 1088 if (tail_size > 0) 1089 { 1090 pr_sgr_start (line_color); 1091 fwrite (beg, 1, tail_size, stdout); 1092 beg += tail_size; 1093 pr_sgr_end (line_color); 1094 } 1095 1096 return beg; 1097 } 1098 1099 static void 1100 prline (char const *beg, char const *lim, char sep) 1101 { 1102 bool matching; 1103 const char *line_color; 1104 const char *match_color; 1105 1106 if (!only_matching) 1107 print_line_head (beg, lim, sep); 1108 1109 matching = (sep == SEP_CHAR_SELECTED) ^ out_invert; 1110 1111 if (color_option) 1112 { 1113 line_color = (((sep == SEP_CHAR_SELECTED) 1114 ^ (out_invert && (color_option < 0))) 1115 ? selected_line_color : context_line_color); 1116 match_color = (sep == SEP_CHAR_SELECTED 1117 ? selected_match_color : context_match_color); 1118 } 1119 else 1120 line_color = match_color = NULL; /* Shouldn't be used. */ 1121 1122 if ((only_matching && matching) 1123 || (color_option && (*line_color || *match_color))) 1124 { 1125 /* We already know that non-matching lines have no match (to colorize). */ 1126 if (matching && (only_matching || *match_color)) 1127 beg = print_line_middle (beg, lim, line_color, match_color); 1128 1129 if (!only_matching && *line_color) 1130 { 1131 /* This code is exercised at least when grep is invoked like this: 1132 echo k| GREP_COLORS='sl=01;32' src/grep k --color=always */ 1133 beg = print_line_tail (beg, lim, line_color); 1134 } 1135 } 1136 1137 if (!only_matching && lim > beg) 1138 fwrite (beg, 1, lim - beg, stdout); 1139 1140 if (ferror (stdout)) 1141 { 1142 write_error_seen = true; 1143 error (EXIT_TROUBLE, 0, _("write error")); 1144 } 1145 1146 lastout = lim; 1147 1148 if (line_buffered) 1149 fflush (stdout); 1150 } 1151 1152 /* Print pending lines of trailing context prior to LIM. Trailing context ends 1153 at the next matching line when OUTLEFT is 0. */ 1154 static void 1155 prpending (char const *lim) 1156 { 1157 if (!lastout) 1158 lastout = bufbeg; 1159 while (pending > 0 && lastout < lim) 1160 { 1161 char const *nl = memchr (lastout, eolbyte, lim - lastout); 1162 size_t match_size; 1163 --pending; 1164 if (outleft 1165 || ((execute (lastout, nl + 1 - lastout, 1166 &match_size, NULL) == (size_t) -1) 1167 == !out_invert)) 1168 prline (lastout, nl + 1, SEP_CHAR_REJECTED); 1169 else 1170 pending = 0; 1171 } 1172 } 1173 1174 /* Output the lines between BEG and LIM. Deal with context. */ 1175 static void 1176 prtext (char const *beg, char const *lim) 1177 { 1178 static bool used; /* Avoid printing SEP_STR_GROUP before any output. */ 1179 char eol = eolbyte; 1180 1181 if (!out_quiet && pending > 0) 1182 prpending (beg); 1183 1184 char const *p = beg; 1185 1186 if (!out_quiet) 1187 { 1188 /* Deal with leading context. */ 1189 char const *bp = lastout ? lastout : bufbeg; 1190 intmax_t i; 1191 for (i = 0; i < out_before; ++i) 1192 if (p > bp) 1193 do 1194 --p; 1195 while (p[-1] != eol); 1196 1197 /* Print the group separator unless the output is adjacent to 1198 the previous output in the file. */ 1199 if ((0 <= out_before || 0 <= out_after) && used 1200 && p != lastout && group_separator) 1201 { 1202 pr_sgr_start_if (sep_color); 1203 fputs (group_separator, stdout); 1204 pr_sgr_end_if (sep_color); 1205 fputc ('\n', stdout); 1206 } 1207 1208 while (p < beg) 1209 { 1210 char const *nl = memchr (p, eol, beg - p); 1211 nl++; 1212 prline (p, nl, SEP_CHAR_REJECTED); 1213 p = nl; 1214 } 1215 } 1216 1217 intmax_t n; 1218 if (out_invert) 1219 { 1220 /* One or more lines are output. */ 1221 for (n = 0; p < lim && n < outleft; n++) 1222 { 1223 char const *nl = memchr (p, eol, lim - p); 1224 nl++; 1225 if (!out_quiet) 1226 prline (p, nl, SEP_CHAR_SELECTED); 1227 p = nl; 1228 } 1229 } 1230 else 1231 { 1232 /* Just one line is output. */ 1233 if (!out_quiet) 1234 prline (beg, lim, SEP_CHAR_SELECTED); 1235 n = 1; 1236 p = lim; 1237 } 1238 1239 after_last_match = bufoffset - (buflim - p); 1240 pending = out_quiet ? 0 : MAX (0, out_after); 1241 used = true; 1242 outleft -= n; 1243 } 1244 1245 /* Replace all NUL bytes in buffer P (which ends at LIM) with EOL. 1246 This avoids running out of memory when binary input contains a long 1247 sequence of zeros, which would otherwise be considered to be part 1248 of a long line. P[LIM] should be EOL. */ 1249 static void 1250 zap_nuls (char *p, char *lim, char eol) 1251 { 1252 if (eol) 1253 while (true) 1254 { 1255 *lim = '\0'; 1256 p += strlen (p); 1257 *lim = eol; 1258 if (p == lim) 1259 break; 1260 do 1261 *p++ = eol; 1262 while (!*p); 1263 } 1264 } 1265 1266 /* Scan the specified portion of the buffer, matching lines (or 1267 between matching lines if OUT_INVERT is true). Return a count of 1268 lines printed. Replace all NUL bytes with NUL_ZAPPER as we go. */ 1269 static intmax_t 1270 grepbuf (char const *beg, char const *lim) 1271 { 1272 intmax_t outleft0 = outleft; 1273 char const *p; 1274 char const *endp; 1275 1276 for (p = beg; p < lim; p = endp) 1277 { 1278 size_t match_size; 1279 size_t match_offset = execute (p, lim - p, &match_size, NULL); 1280 if (match_offset == (size_t) -1) 1281 { 1282 if (!out_invert) 1283 break; 1284 match_offset = lim - p; 1285 match_size = 0; 1286 } 1287 char const *b = p + match_offset; 1288 endp = b + match_size; 1289 /* Avoid matching the empty line at the end of the buffer. */ 1290 if (!out_invert && b == lim) 1291 break; 1292 if (!out_invert || p < b) 1293 { 1294 char const *prbeg = out_invert ? p : b; 1295 char const *prend = out_invert ? b : endp; 1296 prtext (prbeg, prend); 1297 if (!outleft || done_on_match) 1298 { 1299 if (exit_on_match) 1300 exit (EXIT_SUCCESS); 1301 break; 1302 } 1303 } 1304 } 1305 1306 return outleft0 - outleft; 1307 } 1308 1309 /* Search a given file. Normally, return a count of lines printed; 1310 but if the file is a directory and we search it recursively, then 1311 return -2 if there was a match, and -1 otherwise. */ 1312 static intmax_t 1313 grep (int fd, struct stat const *st) 1314 { 1315 intmax_t nlines, i; 1316 enum textbin textbin; 1317 size_t residue, save; 1318 char oldc; 1319 char *beg; 1320 char *lim; 1321 char eol = eolbyte; 1322 char nul_zapper = '\0'; 1323 bool done_on_match_0 = done_on_match; 1324 bool out_quiet_0 = out_quiet; 1325 1326 if (! reset (fd, st)) 1327 return 0; 1328 1329 totalcc = 0; 1330 lastout = 0; 1331 totalnl = 0; 1332 outleft = max_count; 1333 after_last_match = 0; 1334 pending = 0; 1335 skip_nuls = skip_empty_lines && !eol; 1336 seek_data_failed = false; 1337 1338 nlines = 0; 1339 residue = 0; 1340 save = 0; 1341 1342 if (! fillbuf (save, st)) 1343 { 1344 if (errno != EINVAL) 1345 suppressible_error (filename, errno); 1346 return 0; 1347 } 1348 1349 if (binary_files == TEXT_BINARY_FILES) 1350 textbin = TEXTBIN_TEXT; 1351 else 1352 { 1353 textbin = file_textbin (bufbeg, buflim - bufbeg, fd, st); 1354 if (textbin_is_binary (textbin)) 1355 { 1356 if (binary_files == WITHOUT_MATCH_BINARY_FILES) 1357 return 0; 1358 done_on_match = out_quiet = true; 1359 nul_zapper = eol; 1360 skip_nuls = skip_empty_lines; 1361 } 1362 else if (execute != Pexecute) 1363 textbin = TEXTBIN_TEXT; 1364 } 1365 1366 for (;;) 1367 { 1368 input_textbin = textbin; 1369 lastnl = bufbeg; 1370 if (lastout) 1371 lastout = bufbeg; 1372 1373 beg = bufbeg + save; 1374 1375 /* no more data to scan (eof) except for maybe a residue -> break */ 1376 if (beg == buflim) 1377 break; 1378 1379 zap_nuls (beg, buflim, nul_zapper); 1380 1381 /* Determine new residue (the length of an incomplete line at the end of 1382 the buffer, 0 means there is no incomplete last line). */ 1383 oldc = beg[-1]; 1384 beg[-1] = eol; 1385 /* FIXME: use rawmemrchr if/when it exists, since we have ensured 1386 that this use of memrchr is guaranteed never to return NULL. */ 1387 lim = memrchr (beg - 1, eol, buflim - beg + 1); 1388 ++lim; 1389 beg[-1] = oldc; 1390 if (lim == beg) 1391 lim = beg - residue; 1392 beg -= residue; 1393 residue = buflim - lim; 1394 1395 if (beg < lim) 1396 { 1397 if (outleft) 1398 nlines += grepbuf (beg, lim); 1399 if (pending) 1400 prpending (lim); 1401 if ((!outleft && !pending) || (nlines && done_on_match)) 1402 goto finish_grep; 1403 } 1404 1405 /* The last OUT_BEFORE lines at the end of the buffer will be needed as 1406 leading context if there is a matching line at the begin of the 1407 next data. Make beg point to their begin. */ 1408 i = 0; 1409 beg = lim; 1410 while (i < out_before && beg > bufbeg && beg != lastout) 1411 { 1412 ++i; 1413 do 1414 --beg; 1415 while (beg[-1] != eol); 1416 } 1417 1418 /* Detect whether leading context is adjacent to previous output. */ 1419 if (lastout) 1420 { 1421 if (textbin == TEXTBIN_UNKNOWN) 1422 textbin = TEXTBIN_TEXT; 1423 if (beg != lastout) 1424 lastout = 0; 1425 } 1426 1427 /* Handle some details and read more data to scan. */ 1428 save = residue + lim - beg; 1429 if (out_byte) 1430 totalcc = add_count (totalcc, buflim - bufbeg - save); 1431 if (out_line) 1432 nlscan (beg); 1433 if (! fillbuf (save, st)) 1434 { 1435 suppressible_error (filename, errno); 1436 goto finish_grep; 1437 } 1438 1439 /* If the file's textbin has not been determined yet, assume 1440 it's binary if the next input buffer suggests so. */ 1441 if (textbin == TEXTBIN_UNKNOWN) 1442 { 1443 enum textbin tb = buffer_textbin (bufbeg, buflim - bufbeg); 1444 if (textbin_is_binary (tb)) 1445 { 1446 if (binary_files == WITHOUT_MATCH_BINARY_FILES) 1447 return 0; 1448 textbin = tb; 1449 done_on_match = out_quiet = true; 1450 nul_zapper = eol; 1451 skip_nuls = skip_empty_lines; 1452 } 1453 } 1454 } 1455 if (residue) 1456 { 1457 *buflim++ = eol; 1458 if (outleft) 1459 nlines += grepbuf (bufbeg + save - residue, buflim); 1460 if (pending) 1461 prpending (buflim); 1462 } 1463 1464 finish_grep: 1465 done_on_match = done_on_match_0; 1466 out_quiet = out_quiet_0; 1467 if (textbin_is_binary (textbin) && !out_quiet && nlines != 0) 1468 printf (_("Binary file %s matches\n"), filename); 1469 return nlines; 1470 } 1471 1472 static bool 1473 grepdirent (FTS *fts, FTSENT *ent, bool command_line) 1474 { 1475 bool follow; 1476 int dirdesc; 1477 command_line &= ent->fts_level == FTS_ROOTLEVEL; 1478 1479 if (ent->fts_info == FTS_DP) 1480 { 1481 if (directories == RECURSE_DIRECTORIES && command_line) 1482 out_file &= ~ (2 * !no_filenames); 1483 return true; 1484 } 1485 1486 if (!command_line 1487 && skipped_file (ent->fts_name, false, 1488 (ent->fts_info == FTS_D || ent->fts_info == FTS_DC 1489 || ent->fts_info == FTS_DNR))) 1490 { 1491 fts_set (fts, ent, FTS_SKIP); 1492 return true; 1493 } 1494 1495 filename = ent->fts_path; 1496 if (omit_dot_slash && filename[1]) 1497 filename += 2; 1498 follow = (fts->fts_options & FTS_LOGICAL 1499 || (fts->fts_options & FTS_COMFOLLOW && command_line)); 1500 1501 switch (ent->fts_info) 1502 { 1503 case FTS_D: 1504 if (directories == RECURSE_DIRECTORIES) 1505 { 1506 out_file |= 2 * !no_filenames; 1507 return true; 1508 } 1509 fts_set (fts, ent, FTS_SKIP); 1510 break; 1511 1512 case FTS_DC: 1513 if (!suppress_errors) 1514 error (0, 0, _("warning: %s: %s"), filename, 1515 _("recursive directory loop")); 1516 return true; 1517 1518 case FTS_DNR: 1519 case FTS_ERR: 1520 case FTS_NS: 1521 suppressible_error (filename, ent->fts_errno); 1522 return true; 1523 1524 case FTS_DEFAULT: 1525 case FTS_NSOK: 1526 if (skip_devices (command_line)) 1527 { 1528 struct stat *st = ent->fts_statp; 1529 struct stat st1; 1530 if (! st->st_mode) 1531 { 1532 /* The file type is not already known. Get the file status 1533 before opening, since opening might have side effects 1534 on a device. */ 1535 int flag = follow ? 0 : AT_SYMLINK_NOFOLLOW; 1536 if (fstatat (fts->fts_cwd_fd, ent->fts_accpath, &st1, flag) != 0) 1537 { 1538 suppressible_error (filename, errno); 1539 return true; 1540 } 1541 st = &st1; 1542 } 1543 if (is_device_mode (st->st_mode)) 1544 return true; 1545 } 1546 break; 1547 1548 case FTS_F: 1549 case FTS_SLNONE: 1550 break; 1551 1552 case FTS_SL: 1553 case FTS_W: 1554 return true; 1555 1556 default: 1557 abort (); 1558 } 1559 1560 dirdesc = ((fts->fts_options & (FTS_NOCHDIR | FTS_CWDFD)) == FTS_CWDFD 1561 ? fts->fts_cwd_fd 1562 : AT_FDCWD); 1563 return grepfile (dirdesc, ent->fts_accpath, follow, command_line); 1564 } 1565 1566 /* True if errno is ERR after 'open ("symlink", ... O_NOFOLLOW ...)'. 1567 POSIX specifies ELOOP, but it's EMLINK on FreeBSD and EFTYPE on NetBSD. */ 1568 static bool 1569 open_symlink_nofollow_error (int err) 1570 { 1571 if (err == ELOOP || err == EMLINK) 1572 return true; 1573 #ifdef EFTYPE 1574 if (err == EFTYPE) 1575 return true; 1576 #endif 1577 return false; 1578 } 1579 1580 static bool 1581 grepfile (int dirdesc, char const *name, bool follow, bool command_line) 1582 { 1583 int oflag = (O_RDONLY | O_NOCTTY 1584 | (follow ? 0 : O_NOFOLLOW) 1585 | (skip_devices (command_line) ? O_NONBLOCK : 0)); 1586 int desc = openat_safer (dirdesc, name, oflag); 1587 if (desc < 0) 1588 { 1589 if (follow || ! open_symlink_nofollow_error (errno)) 1590 suppressible_error (filename, errno); 1591 return true; 1592 } 1593 return grepdesc (desc, command_line); 1594 } 1595 1596 static bool 1597 grepdesc (int desc, bool command_line) 1598 { 1599 intmax_t count; 1600 bool status = true; 1601 struct stat st; 1602 1603 /* Get the file status, possibly for the second time. This catches 1604 a race condition if the directory entry changes after the 1605 directory entry is read and before the file is opened. For 1606 example, normally DESC is a directory only at the top level, but 1607 there is an exception if some other process substitutes a 1608 directory for a non-directory while 'grep' is running. */ 1609 if (fstat (desc, &st) != 0) 1610 { 1611 suppressible_error (filename, errno); 1612 goto closeout; 1613 } 1614 1615 if (desc != STDIN_FILENO && skip_devices (command_line) 1616 && is_device_mode (st.st_mode)) 1617 goto closeout; 1618 1619 if (desc != STDIN_FILENO && command_line 1620 && skipped_file (filename, true, S_ISDIR (st.st_mode) != 0)) 1621 goto closeout; 1622 1623 if (desc != STDIN_FILENO 1624 && directories == RECURSE_DIRECTORIES && S_ISDIR (st.st_mode)) 1625 { 1626 /* Traverse the directory starting with its full name, because 1627 unfortunately fts provides no way to traverse the directory 1628 starting from its file descriptor. */ 1629 1630 FTS *fts; 1631 FTSENT *ent; 1632 int opts = fts_options & ~(command_line ? 0 : FTS_COMFOLLOW); 1633 char *fts_arg[2]; 1634 1635 /* Close DESC now, to conserve file descriptors if the race 1636 condition occurs many times in a deep recursion. */ 1637 if (close (desc) != 0) 1638 suppressible_error (filename, errno); 1639 1640 fts_arg[0] = (char *) filename; 1641 fts_arg[1] = NULL; 1642 fts = fts_open (fts_arg, opts, NULL); 1643 1644 if (!fts) 1645 xalloc_die (); 1646 while ((ent = fts_read (fts))) 1647 status &= grepdirent (fts, ent, command_line); 1648 if (errno) 1649 suppressible_error (filename, errno); 1650 if (fts_close (fts) != 0) 1651 suppressible_error (filename, errno); 1652 return status; 1653 } 1654 if (desc != STDIN_FILENO 1655 && ((directories == SKIP_DIRECTORIES && S_ISDIR (st.st_mode)) 1656 || ((devices == SKIP_DEVICES 1657 || (devices == READ_COMMAND_LINE_DEVICES && !command_line)) 1658 && is_device_mode (st.st_mode)))) 1659 goto closeout; 1660 1661 /* If there is a regular file on stdout and the current file refers 1662 to the same i-node, we have to report the problem and skip it. 1663 Otherwise when matching lines from some other input reach the 1664 disk before we open this file, we can end up reading and matching 1665 those lines and appending them to the file from which we're reading. 1666 Then we'd have what appears to be an infinite loop that'd terminate 1667 only upon filling the output file system or reaching a quota. 1668 However, there is no risk of an infinite loop if grep is generating 1669 no output, i.e., with --silent, --quiet, -q. 1670 Similarly, with any of these: 1671 --max-count=N (-m) (for N >= 2) 1672 --files-with-matches (-l) 1673 --files-without-match (-L) 1674 there is no risk of trouble. 1675 For --max-count=1, grep stops after printing the first match, 1676 so there is no risk of malfunction. But even --max-count=2, with 1677 input==output, while there is no risk of infloop, there is a race 1678 condition that could result in "alternate" output. */ 1679 if (!out_quiet && list_files == 0 && 1 < max_count 1680 && S_ISREG (out_stat.st_mode) && out_stat.st_ino 1681 && SAME_INODE (st, out_stat)) 1682 { 1683 if (! suppress_errors) 1684 error (0, 0, _("input file %s is also the output"), quote (filename)); 1685 errseen = true; 1686 goto closeout; 1687 } 1688 1689 #if defined SET_BINARY 1690 /* Set input to binary mode. Pipes are simulated with files 1691 on DOS, so this includes the case of "foo | grep bar". */ 1692 if (!isatty (desc)) 1693 SET_BINARY (desc); 1694 #endif 1695 1696 count = grep (desc, &st); 1697 if (count < 0) 1698 status = count + 2; 1699 else 1700 { 1701 if (count_matches) 1702 { 1703 if (out_file) 1704 { 1705 print_filename (); 1706 if (filename_mask) 1707 print_sep (SEP_CHAR_SELECTED); 1708 else 1709 fputc (0, stdout); 1710 } 1711 printf ("%" PRIdMAX "\n", count); 1712 } 1713 1714 status = !count; 1715 if (list_files == 1 - 2 * status) 1716 { 1717 print_filename (); 1718 fputc ('\n' & filename_mask, stdout); 1719 } 1720 1721 if (desc == STDIN_FILENO) 1722 { 1723 off_t required_offset = outleft ? bufoffset : after_last_match; 1724 if (required_offset != bufoffset 1725 && lseek (desc, required_offset, SEEK_SET) < 0 1726 && S_ISREG (st.st_mode)) 1727 suppressible_error (filename, errno); 1728 } 1729 } 1730 1731 closeout: 1732 if (desc != STDIN_FILENO && close (desc) != 0) 1733 suppressible_error (filename, errno); 1734 return status; 1735 } 1736 1737 static bool 1738 grep_command_line_arg (char const *arg) 1739 { 1740 if (STREQ (arg, "-")) 1741 { 1742 filename = label ? label : _("(standard input)"); 1743 return grepdesc (STDIN_FILENO, true); 1744 } 1745 else 1746 { 1747 filename = arg; 1748 return grepfile (AT_FDCWD, arg, true, true); 1749 } 1750 } 1751 1752 _Noreturn void usage (int); 1753 void 1754 usage (int status) 1755 { 1756 if (status != 0) 1757 { 1758 fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), 1759 program_name); 1760 fprintf (stderr, _("Try '%s --help' for more information.\n"), 1761 program_name); 1762 } 1763 else 1764 { 1765 printf (_("Usage: %s [OPTION]... PATTERN [FILE]...\n"), program_name); 1766 printf (_("Search for PATTERN in each FILE or standard input.\n")); 1767 printf (_("PATTERN is, by default, a basic regular expression (BRE).\n")); 1768 printf (_("\ 1769 Example: %s -i 'hello world' menu.h main.c\n\ 1770 \n\ 1771 Regexp selection and interpretation:\n"), program_name); 1772 printf (_("\ 1773 -E, --extended-regexp PATTERN is an extended regular expression (ERE)\n\ 1774 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ 1775 -G, --basic-regexp PATTERN is a basic regular expression (BRE)\n\ 1776 -P, --perl-regexp PATTERN is a Perl regular expression\n")); 1777 /* -X is deliberately undocumented. */ 1778 printf (_("\ 1779 -e, --regexp=PATTERN use PATTERN for matching\n\ 1780 -f, --file=FILE obtain PATTERN from FILE\n\ 1781 -i, --ignore-case ignore case distinctions\n\ 1782 -w, --word-regexp force PATTERN to match only whole words\n\ 1783 -x, --line-regexp force PATTERN to match only whole lines\n\ 1784 -z, --null-data a data line ends in 0 byte, not newline\n")); 1785 printf (_("\ 1786 \n\ 1787 Miscellaneous:\n\ 1788 -s, --no-messages suppress error messages\n\ 1789 -v, --invert-match select non-matching lines\n\ 1790 -V, --version display version information and exit\n\ 1791 --help display this help text and exit\n")); 1792 printf (_("\ 1793 \n\ 1794 Output control:\n\ 1795 -m, --max-count=NUM stop after NUM matches\n\ 1796 -b, --byte-offset print the byte offset with output lines\n\ 1797 -n, --line-number print line number with output lines\n\ 1798 --line-buffered flush output on every line\n\ 1799 -H, --with-filename print the file name for each match\n\ 1800 -h, --no-filename suppress the file name prefix on output\n\ 1801 --label=LABEL use LABEL as the standard input file name prefix\n\ 1802 ")); 1803 printf (_("\ 1804 -o, --only-matching show only the part of a line matching PATTERN\n\ 1805 -q, --quiet, --silent suppress all normal output\n\ 1806 --binary-files=TYPE assume that binary files are TYPE;\n\ 1807 TYPE is 'binary', 'text', or 'without-match'\n\ 1808 -a, --text equivalent to --binary-files=text\n\ 1809 ")); 1810 printf (_("\ 1811 -I equivalent to --binary-files=without-match\n\ 1812 -d, --directories=ACTION how to handle directories;\n\ 1813 ACTION is 'read', 'recurse', or 'skip'\n\ 1814 -D, --devices=ACTION how to handle devices, FIFOs and sockets;\n\ 1815 ACTION is 'read' or 'skip'\n\ 1816 -r, --recursive like --directories=recurse\n\ 1817 -R, --dereference-recursive likewise, but follow all symlinks\n\ 1818 ")); 1819 printf (_("\ 1820 --include=FILE_PATTERN search only files that match FILE_PATTERN\n\ 1821 --exclude=FILE_PATTERN skip files and directories matching\ 1822 FILE_PATTERN\n\ 1823 --exclude-from=FILE skip files matching any file pattern from FILE\n\ 1824 --exclude-dir=PATTERN directories that match PATTERN will be skipped.\n\ 1825 ")); 1826 printf (_("\ 1827 -L, --files-without-match print only names of FILEs containing no match\n\ 1828 -l, --files-with-matches print only names of FILEs containing matches\n\ 1829 -c, --count print only a count of matching lines per FILE\n\ 1830 -T, --initial-tab make tabs line up (if needed)\n\ 1831 -Z, --null print 0 byte after FILE name\n")); 1832 printf (_("\ 1833 \n\ 1834 Context control:\n\ 1835 -B, --before-context=NUM print NUM lines of leading context\n\ 1836 -A, --after-context=NUM print NUM lines of trailing context\n\ 1837 -C, --context=NUM print NUM lines of output context\n\ 1838 ")); 1839 printf (_("\ 1840 -NUM same as --context=NUM\n\ 1841 --color[=WHEN],\n\ 1842 --colour[=WHEN] use markers to highlight the matching strings;\n\ 1843 WHEN is 'always', 'never', or 'auto'\n\ 1844 -U, --binary do not strip CR characters at EOL (MSDOS/Windows)\n\ 1845 -u, --unix-byte-offsets report offsets as if CRs were not there\n\ 1846 (MSDOS/Windows)\n\ 1847 \n")); 1848 printf (_("\ 1849 'egrep' means 'grep -E'. 'fgrep' means 'grep -F'.\n\ 1850 Direct invocation as either 'egrep' or 'fgrep' is deprecated.\n")); 1851 printf (_("\ 1852 When FILE is -, read standard input. With no FILE, read . if a command-line\n\ 1853 -r is given, - otherwise. If fewer than two FILEs are given, assume -h.\n\ 1854 Exit status is 0 if any line is selected, 1 otherwise;\n\ 1855 if any error occurs and -q is not given, the exit status is 2.\n")); 1856 emit_bug_reporting_address (); 1857 } 1858 exit (status); 1859 } 1860 1861 /* Pattern compilers and matchers. */ 1862 1863 static void 1864 Gcompile (char const *pattern, size_t size) 1865 { 1866 GEAcompile (pattern, size, RE_SYNTAX_GREP); 1867 } 1868 1869 static void 1870 Ecompile (char const *pattern, size_t size) 1871 { 1872 GEAcompile (pattern, size, RE_SYNTAX_EGREP); 1873 } 1874 1875 static void 1876 Acompile (char const *pattern, size_t size) 1877 { 1878 GEAcompile (pattern, size, RE_SYNTAX_AWK); 1879 } 1880 1881 static void 1882 GAcompile (char const *pattern, size_t size) 1883 { 1884 GEAcompile (pattern, size, RE_SYNTAX_GNU_AWK); 1885 } 1886 1887 static void 1888 PAcompile (char const *pattern, size_t size) 1889 { 1890 GEAcompile (pattern, size, RE_SYNTAX_POSIX_AWK); 1891 } 1892 1893 struct matcher 1894 { 1895 char const name[16]; 1896 compile_fp_t compile; 1897 execute_fp_t execute; 1898 }; 1899 static struct matcher const matchers[] = { 1900 { "grep", Gcompile, EGexecute }, 1901 { "egrep", Ecompile, EGexecute }, 1902 { "fgrep", Fcompile, Fexecute }, 1903 { "awk", Acompile, EGexecute }, 1904 { "gawk", GAcompile, EGexecute }, 1905 { "posixawk", PAcompile, EGexecute }, 1906 { "perl", Pcompile, Pexecute }, 1907 { "", NULL, NULL }, 1908 }; 1909 1910 /* Set the matcher to M if available. Exit in case of conflicts or if 1911 M is not available. */ 1912 static void 1913 setmatcher (char const *m) 1914 { 1915 struct matcher const *p; 1916 1917 if (matcher && !STREQ (matcher, m)) 1918 error (EXIT_TROUBLE, 0, _("conflicting matchers specified")); 1919 1920 for (p = matchers; p->compile; p++) 1921 if (STREQ (m, p->name)) 1922 { 1923 matcher = p->name; 1924 compile = p->compile; 1925 execute = p->execute; 1926 return; 1927 } 1928 1929 error (EXIT_TROUBLE, 0, _("invalid matcher %s"), m); 1930 } 1931 1932 /* Find the white-space-separated options specified by OPTIONS, and 1933 using BUF to store copies of these options, set ARGV[0], ARGV[1], 1934 etc. to the option copies. Return the number N of options found. 1935 Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] 1936 etc. Backslash can be used to escape whitespace (and backslashes). */ 1937 static size_t 1938 prepend_args (char const *options, char *buf, char **argv) 1939 { 1940 char const *o = options; 1941 char *b = buf; 1942 size_t n = 0; 1943 1944 for (;;) 1945 { 1946 while (c_isspace (to_uchar (*o))) 1947 o++; 1948 if (!*o) 1949 return n; 1950 if (argv) 1951 argv[n] = b; 1952 n++; 1953 1954 do 1955 if ((*b++ = *o++) == '\\' && *o) 1956 b[-1] = *o++; 1957 while (*o && ! c_isspace (to_uchar (*o))); 1958 1959 *b++ = '\0'; 1960 } 1961 } 1962 1963 /* Prepend the whitespace-separated options in OPTIONS to the argument 1964 vector of a main program with argument count *PARGC and argument 1965 vector *PARGV. Return the number of options prepended. */ 1966 static int 1967 prepend_default_options (char const *options, int *pargc, char ***pargv) 1968 { 1969 if (options && *options) 1970 { 1971 char *buf = xmalloc (strlen (options) + 1); 1972 size_t prepended = prepend_args (options, buf, NULL); 1973 int argc = *pargc; 1974 char *const *argv = *pargv; 1975 char **pp; 1976 enum { MAX_ARGS = MIN (INT_MAX, SIZE_MAX / sizeof *pp - 1) }; 1977 if (MAX_ARGS - argc < prepended) 1978 xalloc_die (); 1979 pp = xmalloc ((prepended + argc + 1) * sizeof *pp); 1980 *pargc = prepended + argc; 1981 *pargv = pp; 1982 *pp++ = *argv++; 1983 pp += prepend_args (options, buf, pp); 1984 while ((*pp++ = *argv++)) 1985 continue; 1986 return prepended; 1987 } 1988 1989 return 0; 1990 } 1991 1992 /* Get the next non-digit option from ARGC and ARGV. 1993 Return -1 if there are no more options. 1994 Process any digit options that were encountered on the way, 1995 and store the resulting integer into *DEFAULT_CONTEXT. */ 1996 static int 1997 get_nondigit_option (int argc, char *const *argv, intmax_t *default_context) 1998 { 1999 static int prev_digit_optind = -1; 2000 int this_digit_optind; 2001 bool was_digit; 2002 char buf[INT_BUFSIZE_BOUND (intmax_t) + 4]; 2003 char *p = buf; 2004 int opt; 2005 2006 was_digit = false; 2007 this_digit_optind = optind; 2008 while (true) 2009 { 2010 opt = getopt_long (argc, (char **) argv, short_options, 2011 long_options, NULL); 2012 if ( ! ('0' <= opt && opt <= '9')) 2013 break; 2014 2015 if (prev_digit_optind != this_digit_optind || !was_digit) 2016 { 2017 /* Reset to start another context length argument. */ 2018 p = buf; 2019 } 2020 else 2021 { 2022 /* Suppress trivial leading zeros, to avoid incorrect 2023 diagnostic on strings like 00000000000. */ 2024 p -= buf[0] == '0'; 2025 } 2026 2027 if (p == buf + sizeof buf - 4) 2028 { 2029 /* Too many digits. Append "..." to make context_length_arg 2030 complain about "X...", where X contains the digits seen 2031 so far. */ 2032 strcpy (p, "..."); 2033 p += 3; 2034 break; 2035 } 2036 *p++ = opt; 2037 2038 was_digit = true; 2039 prev_digit_optind = this_digit_optind; 2040 this_digit_optind = optind; 2041 } 2042 if (p != buf) 2043 { 2044 *p = '\0'; 2045 context_length_arg (buf, default_context); 2046 } 2047 2048 return opt; 2049 } 2050 2051 /* Parse GREP_COLORS. The default would look like: 2052 GREP_COLORS='ms=01;31:mc=01;31:sl=:cx=:fn=35:ln=32:bn=32:se=36' 2053 with boolean capabilities (ne and rv) unset (i.e., omitted). 2054 No character escaping is needed or supported. */ 2055 static void 2056 parse_grep_colors (void) 2057 { 2058 const char *p; 2059 char *q; 2060 char *name; 2061 char *val; 2062 2063 p = getenv ("GREP_COLORS"); /* Plural! */ 2064 if (p == NULL || *p == '\0') 2065 return; 2066 2067 /* Work off a writable copy. */ 2068 q = xstrdup (p); 2069 2070 name = q; 2071 val = NULL; 2072 /* From now on, be well-formed or you're gone. */ 2073 for (;;) 2074 if (*q == ':' || *q == '\0') 2075 { 2076 char c = *q; 2077 struct color_cap const *cap; 2078 2079 *q++ = '\0'; /* Terminate name or val. */ 2080 /* Empty name without val (empty cap) 2081 * won't match and will be ignored. */ 2082 for (cap = color_dict; cap->name; cap++) 2083 if (STREQ (cap->name, name)) 2084 break; 2085 /* If name unknown, go on for forward compatibility. */ 2086 if (cap->var && val) 2087 *(cap->var) = val; 2088 if (cap->fct) 2089 cap->fct (); 2090 if (c == '\0') 2091 return; 2092 name = q; 2093 val = NULL; 2094 } 2095 else if (*q == '=') 2096 { 2097 if (q == name || val) 2098 return; 2099 *q++ = '\0'; /* Terminate name. */ 2100 val = q; /* Can be the empty string. */ 2101 } 2102 else if (val == NULL) 2103 q++; /* Accumulate name. */ 2104 else if (*q == ';' || (*q >= '0' && *q <= '9')) 2105 q++; /* Accumulate val. Protect the terminal from being sent crap. */ 2106 else 2107 return; 2108 } 2109 2110 /* Return true if PAT (of length PATLEN) contains an encoding error. */ 2111 static bool 2112 contains_encoding_error (char const *pat, size_t patlen) 2113 { 2114 mbstate_t mbs = { 0 }; 2115 size_t i, charlen; 2116 2117 for (i = 0; i < patlen; i += charlen) 2118 { 2119 charlen = mb_clen (pat + i, patlen - i, &mbs); 2120 if ((size_t) -2 <= charlen) 2121 return true; 2122 } 2123 return false; 2124 } 2125 2126 /* Change a pattern for fgrep into grep. */ 2127 static void 2128 fgrep_to_grep_pattern (size_t len, char const *keys, 2129 size_t *new_len, char **new_keys) 2130 { 2131 char *p = *new_keys = xnmalloc (len + 1, 2); 2132 mbstate_t mb_state = { 0 }; 2133 size_t n; 2134 2135 for (; len; keys += n, len -= n) 2136 { 2137 n = mb_clen (keys, len, &mb_state); 2138 switch (n) 2139 { 2140 case (size_t) -2: 2141 n = len; 2142 /* Fall through. */ 2143 default: 2144 p = mempcpy (p, keys, n); 2145 break; 2146 2147 case (size_t) -1: 2148 memset (&mb_state, 0, sizeof mb_state); 2149 /* Fall through. */ 2150 case 1: 2151 *p = '\\'; 2152 p += strchr ("$*.[\\^", *keys) != NULL; 2153 /* Fall through. */ 2154 case 0: 2155 *p++ = *keys; 2156 n = 1; 2157 break; 2158 } 2159 } 2160 2161 *new_len = p - *new_keys; 2162 } 2163 2164 int 2165 main (int argc, char **argv) 2166 { 2167 char *keys; 2168 size_t keycc, oldcc, keyalloc; 2169 bool with_filenames; 2170 size_t cc; 2171 int opt, prepended; 2172 int prev_optind, last_recursive; 2173 int fread_errno; 2174 intmax_t default_context; 2175 FILE *fp; 2176 exit_failure = EXIT_TROUBLE; 2177 initialize_main (&argc, &argv); 2178 set_program_name (argv[0]); 2179 program_name = argv[0]; 2180 2181 keys = NULL; 2182 keycc = 0; 2183 with_filenames = false; 2184 eolbyte = '\n'; 2185 filename_mask = ~0; 2186 2187 max_count = INTMAX_MAX; 2188 2189 /* The value -1 means to use DEFAULT_CONTEXT. */ 2190 out_after = out_before = -1; 2191 /* Default before/after context: changed by -C/-NUM options */ 2192 default_context = -1; 2193 /* Changed by -o option */ 2194 only_matching = false; 2195 2196 /* Internationalization. */ 2197 #if defined HAVE_SETLOCALE 2198 setlocale (LC_ALL, ""); 2199 #endif 2200 #if defined ENABLE_NLS 2201 bindtextdomain (PACKAGE, LOCALEDIR); 2202 textdomain (PACKAGE); 2203 #endif 2204 2205 exit_failure = EXIT_TROUBLE; 2206 atexit (clean_up_stdout); 2207 2208 last_recursive = 0; 2209 2210 prepended = prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); 2211 if (prepended) 2212 error (0, 0, _("warning: GREP_OPTIONS is deprecated;" 2213 " please use an alias or script")); 2214 2215 compile = matchers[0].compile; 2216 execute = matchers[0].execute; 2217 2218 while (prev_optind = optind, 2219 (opt = get_nondigit_option (argc, argv, &default_context)) != -1) 2220 switch (opt) 2221 { 2222 case 'A': 2223 context_length_arg (optarg, &out_after); 2224 break; 2225 2226 case 'B': 2227 context_length_arg (optarg, &out_before); 2228 break; 2229 2230 case 'C': 2231 /* Set output match context, but let any explicit leading or 2232 trailing amount specified with -A or -B stand. */ 2233 context_length_arg (optarg, &default_context); 2234 break; 2235 2236 case 'D': 2237 if (STREQ (optarg, "read")) 2238 devices = READ_DEVICES; 2239 else if (STREQ (optarg, "skip")) 2240 devices = SKIP_DEVICES; 2241 else 2242 error (EXIT_TROUBLE, 0, _("unknown devices method")); 2243 break; 2244 2245 case 'E': 2246 setmatcher ("egrep"); 2247 break; 2248 2249 case 'F': 2250 setmatcher ("fgrep"); 2251 break; 2252 2253 case 'P': 2254 setmatcher ("perl"); 2255 break; 2256 2257 case 'G': 2258 setmatcher ("grep"); 2259 break; 2260 2261 case 'X': /* undocumented on purpose */ 2262 setmatcher (optarg); 2263 break; 2264 2265 case 'H': 2266 with_filenames = true; 2267 no_filenames = false; 2268 break; 2269 2270 case 'I': 2271 binary_files = WITHOUT_MATCH_BINARY_FILES; 2272 break; 2273 2274 case 'T': 2275 align_tabs = true; 2276 break; 2277 2278 case 'U': 2279 dos_binary (); 2280 break; 2281 2282 case 'u': 2283 dos_unix_byte_offsets (); 2284 break; 2285 2286 case 'V': 2287 show_version = true; 2288 break; 2289 2290 case 'a': 2291 binary_files = TEXT_BINARY_FILES; 2292 break; 2293 2294 case 'b': 2295 out_byte = true; 2296 break; 2297 2298 case 'c': 2299 count_matches = true; 2300 break; 2301 2302 case 'd': 2303 directories = XARGMATCH ("--directories", optarg, 2304 directories_args, directories_types); 2305 if (directories == RECURSE_DIRECTORIES) 2306 last_recursive = prev_optind; 2307 break; 2308 2309 case 'e': 2310 cc = strlen (optarg); 2311 keys = xrealloc (keys, keycc + cc + 1); 2312 strcpy (&keys[keycc], optarg); 2313 keycc += cc; 2314 keys[keycc++] = '\n'; 2315 break; 2316 2317 case 'f': 2318 fp = STREQ (optarg, "-") ? stdin : fopen (optarg, O_TEXT ? "rt" : "r"); 2319 if (!fp) 2320 error (EXIT_TROUBLE, errno, "%s", optarg); 2321 for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) 2322 ; 2323 keys = xrealloc (keys, keyalloc); 2324 oldcc = keycc; 2325 while ((cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) != 0) 2326 { 2327 keycc += cc; 2328 if (keycc == keyalloc - 1) 2329 keys = x2nrealloc (keys, &keyalloc, sizeof *keys); 2330 } 2331 fread_errno = errno; 2332 if (ferror (fp)) 2333 error (EXIT_TROUBLE, fread_errno, "%s", optarg); 2334 if (fp != stdin) 2335 fclose (fp); 2336 /* Append final newline if file ended in non-newline. */ 2337 if (oldcc != keycc && keys[keycc - 1] != '\n') 2338 keys[keycc++] = '\n'; 2339 break; 2340 2341 case 'h': 2342 with_filenames = false; 2343 no_filenames = true; 2344 break; 2345 2346 case 'i': 2347 case 'y': /* For old-timers . . . */ 2348 match_icase = true; 2349 break; 2350 2351 case 'L': 2352 /* Like -l, except list files that don't contain matches. 2353 Inspired by the same option in Hume's gre. */ 2354 list_files = -1; 2355 break; 2356 2357 case 'l': 2358 list_files = 1; 2359 break; 2360 2361 case 'm': 2362 switch (xstrtoimax (optarg, 0, 10, &max_count, "")) 2363 { 2364 case LONGINT_OK: 2365 case LONGINT_OVERFLOW: 2366 break; 2367 2368 default: 2369 error (EXIT_TROUBLE, 0, _("invalid max count")); 2370 } 2371 break; 2372 2373 case 'n': 2374 out_line = true; 2375 break; 2376 2377 case 'o': 2378 only_matching = true; 2379 break; 2380 2381 case 'q': 2382 exit_on_match = true; 2383 exit_failure = 0; 2384 break; 2385 2386 case 'R': 2387 fts_options = basic_fts_options | FTS_LOGICAL; 2388 /* Fall through. */ 2389 case 'r': 2390 directories = RECURSE_DIRECTORIES; 2391 last_recursive = prev_optind; 2392 break; 2393 2394 case 's': 2395 suppress_errors = true; 2396 break; 2397 2398 case 'v': 2399 out_invert = true; 2400 break; 2401 2402 case 'w': 2403 match_words = true; 2404 break; 2405 2406 case 'x': 2407 match_lines = true; 2408 break; 2409 2410 case 'Z': 2411 filename_mask = 0; 2412 break; 2413 2414 case 'z': 2415 eolbyte = '\0'; 2416 break; 2417 2418 case BINARY_FILES_OPTION: 2419 if (STREQ (optarg, "binary")) 2420 binary_files = BINARY_BINARY_FILES; 2421 else if (STREQ (optarg, "text")) 2422 binary_files = TEXT_BINARY_FILES; 2423 else if (STREQ (optarg, "without-match")) 2424 binary_files = WITHOUT_MATCH_BINARY_FILES; 2425 else 2426 error (EXIT_TROUBLE, 0, _("unknown binary-files type")); 2427 break; 2428 2429 case COLOR_OPTION: 2430 if (optarg) 2431 { 2432 if (!strcasecmp (optarg, "always") || !strcasecmp (optarg, "yes") 2433 || !strcasecmp (optarg, "force")) 2434 color_option = 1; 2435 else if (!strcasecmp (optarg, "never") || !strcasecmp (optarg, "no") 2436 || !strcasecmp (optarg, "none")) 2437 color_option = 0; 2438 else if (!strcasecmp (optarg, "auto") || !strcasecmp (optarg, "tty") 2439 || !strcasecmp (optarg, "if-tty")) 2440 color_option = 2; 2441 else 2442 show_help = 1; 2443 } 2444 else 2445 color_option = 2; 2446 break; 2447 2448 case EXCLUDE_OPTION: 2449 case INCLUDE_OPTION: 2450 if (!excluded_patterns) 2451 excluded_patterns = new_exclude (); 2452 add_exclude (excluded_patterns, optarg, 2453 (EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS 2454 | (opt == INCLUDE_OPTION ? EXCLUDE_INCLUDE : 0))); 2455 break; 2456 case EXCLUDE_FROM_OPTION: 2457 if (!excluded_patterns) 2458 excluded_patterns = new_exclude (); 2459 if (add_exclude_file (add_exclude, excluded_patterns, optarg, 2460 EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS, '\n') != 0) 2461 { 2462 error (EXIT_TROUBLE, errno, "%s", optarg); 2463 } 2464 break; 2465 2466 case EXCLUDE_DIRECTORY_OPTION: 2467 if (!excluded_directory_patterns) 2468 excluded_directory_patterns = new_exclude (); 2469 strip_trailing_slashes (optarg); 2470 add_exclude (excluded_directory_patterns, optarg, 2471 EXCLUDE_ANCHORED | EXCLUDE_WILDCARDS); 2472 break; 2473 2474 case GROUP_SEPARATOR_OPTION: 2475 group_separator = optarg; 2476 break; 2477 2478 case LINE_BUFFERED_OPTION: 2479 line_buffered = true; 2480 break; 2481 2482 case LABEL_OPTION: 2483 label = optarg; 2484 break; 2485 2486 case 0: 2487 /* long options */ 2488 break; 2489 2490 default: 2491 usage (EXIT_TROUBLE); 2492 break; 2493 2494 } 2495 2496 if (color_option == 2) 2497 color_option = isatty (STDOUT_FILENO) && should_colorize (); 2498 init_colorize (); 2499 2500 /* POSIX says that -q overrides -l, which in turn overrides the 2501 other output options. */ 2502 if (exit_on_match) 2503 list_files = 0; 2504 if (exit_on_match | list_files) 2505 { 2506 count_matches = false; 2507 done_on_match = true; 2508 } 2509 out_quiet = count_matches | done_on_match; 2510 2511 if (out_after < 0) 2512 out_after = default_context; 2513 if (out_before < 0) 2514 out_before = default_context; 2515 2516 if (color_option) 2517 { 2518 /* Legacy. */ 2519 char *userval = getenv ("GREP_COLOR"); 2520 if (userval != NULL && *userval != '\0') 2521 selected_match_color = context_match_color = userval; 2522 2523 /* New GREP_COLORS has priority. */ 2524 parse_grep_colors (); 2525 } 2526 2527 if (show_version) 2528 { 2529 version_etc (stdout, program_name, PACKAGE_NAME, VERSION, AUTHORS, 2530 (char *) NULL); 2531 return EXIT_SUCCESS; 2532 } 2533 2534 if (show_help) 2535 usage (EXIT_SUCCESS); 2536 2537 struct stat tmp_stat; 2538 if (fstat (STDOUT_FILENO, &tmp_stat) == 0 && S_ISREG (tmp_stat.st_mode)) 2539 out_stat = tmp_stat; 2540 2541 if (keys) 2542 { 2543 if (keycc == 0) 2544 { 2545 /* No keys were specified (e.g. -f /dev/null). Match nothing. */ 2546 out_invert ^= true; 2547 match_lines = match_words = false; 2548 } 2549 else 2550 /* Strip trailing newline. */ 2551 --keycc; 2552 } 2553 else if (optind < argc) 2554 { 2555 /* A copy must be made in case of an xrealloc() or free() later. */ 2556 keycc = strlen (argv[optind]); 2557 keys = xmemdup (argv[optind++], keycc + 1); 2558 } 2559 else 2560 usage (EXIT_TROUBLE); 2561 2562 build_mbclen_cache (); 2563 init_easy_encoding (); 2564 2565 /* In a unibyte locale, switch from fgrep to grep if 2566 the pattern matches words (where grep is typically faster). 2567 In a multibyte locale, switch from fgrep to grep if either 2568 (1) case is ignored (where grep is typically faster), or 2569 (2) the pattern has an encoding error (where fgrep might not work). */ 2570 if (compile == Fcompile 2571 && (MB_CUR_MAX <= 1 2572 ? match_words 2573 : match_icase || contains_encoding_error (keys, keycc))) 2574 { 2575 size_t new_keycc; 2576 char *new_keys; 2577 fgrep_to_grep_pattern (keycc, keys, &new_keycc, &new_keys); 2578 free (keys); 2579 keys = new_keys; 2580 keycc = new_keycc; 2581 matcher = "grep"; 2582 compile = Gcompile; 2583 execute = EGexecute; 2584 } 2585 2586 compile (keys, keycc); 2587 free (keys); 2588 /* We need one byte prior and one after. */ 2589 char eolbytes[3] = { 0, eolbyte, 0 }; 2590 size_t match_size; 2591 skip_empty_lines = ((execute (eolbytes + 1, 1, &match_size, NULL) == 0) 2592 == out_invert); 2593 2594 if ((argc - optind > 1 && !no_filenames) || with_filenames) 2595 out_file = 1; 2596 2597 #ifdef SET_BINARY 2598 /* Output is set to binary mode because we shouldn't convert 2599 NL to CR-LF pairs, especially when grepping binary files. */ 2600 if (!isatty (STDOUT_FILENO)) 2601 SET_BINARY (STDOUT_FILENO); 2602 #endif 2603 2604 if (max_count == 0) 2605 return EXIT_FAILURE; 2606 2607 if (fts_options & FTS_LOGICAL && devices == READ_COMMAND_LINE_DEVICES) 2608 devices = READ_DEVICES; 2609 2610 char *const *files; 2611 if (optind < argc) 2612 { 2613 files = argv + optind; 2614 } 2615 else if (directories == RECURSE_DIRECTORIES && prepended < last_recursive) 2616 { 2617 static char *const cwd_only[] = { (char *) ".", NULL }; 2618 files = cwd_only; 2619 omit_dot_slash = true; 2620 } 2621 else 2622 { 2623 static char *const stdin_only[] = { (char *) "-", NULL }; 2624 files = stdin_only; 2625 } 2626 2627 bool status = true; 2628 do 2629 status &= grep_command_line_arg (*files++); 2630 while (*files != NULL); 2631 2632 /* We register via atexit() to test stdout. */ 2633 return errseen ? EXIT_TROUBLE : status; 2634 } 2635