1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini grep implementation for busybox using libc regex.
4  *
5  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7  *
8  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
9  */
10 /* BB_AUDIT SUSv3 defects - unsupported option -x "match whole line only". */
11 /* BB_AUDIT GNU defects - always acts as -a.  */
12 /* http://www.opengroup.org/onlinepubs/007904975/utilities/grep.html */
13 /*
14  * 2004,2006 (C) Vladimir Oleynik <dzo@simtreas.ru> -
15  * correction "-e pattern1 -e pattern2" logic and more optimizations.
16  * precompiled regex
17  *
18  * (C) 2006 Jac Goudsmit added -o option
19  */
20 
21 //config:config GREP
22 //config:	bool "grep"
23 //config:	default y
24 //config:	help
25 //config:	  grep is used to search files for a specified pattern.
26 //config:
27 //config:config EGREP
28 //config:	bool "egrep"
29 //config:	default y
30 //config:	help
31 //config:	  Alias to "grep -E"
32 //config:
33 //config:config FGREP
34 //config:	bool "fgrep"
35 //config:	default y
36 //config:	help
37 //config:	  Alias to "grep -F"
38 //config:
39 //config:config FEATURE_GREP_CONTEXT
40 //config:	bool "Enable before and after context flags (-A, -B and -C)"
41 //config:	default y
42 //config:	depends on GREP || EGREP
43 //config:	help
44 //config:	  Print the specified number of leading (-B) and/or trailing (-A)
45 //config:	  context surrounding our matching lines.
46 //config:	  Print the specified number of context lines (-C).
47 
48 //applet:IF_GREP(APPLET(grep, BB_DIR_BIN, BB_SUID_DROP))
49 //applet:IF_EGREP(APPLET_ODDNAME(egrep, grep, BB_DIR_BIN, BB_SUID_DROP, egrep))
50 //applet:IF_FGREP(APPLET_ODDNAME(fgrep, grep, BB_DIR_BIN, BB_SUID_DROP, fgrep))
51 
52 //kbuild:lib-$(CONFIG_GREP) += grep.o
53 //kbuild:lib-$(CONFIG_EGREP) += grep.o
54 //kbuild:lib-$(CONFIG_FGREP) += grep.o
55 
56 #include "libbb.h"
57 #include "common_bufsiz.h"
58 #include "xregex.h"
59 
60 
61 /* options */
62 //usage:#define grep_trivial_usage
63 //usage:       "[-HhnlLoqvsriwFE"
64 //usage:	IF_EXTRA_COMPAT("z")
65 //usage:       "] [-m N] "
66 //usage:	IF_FEATURE_GREP_CONTEXT("[-A/B/C N] ")
67 //usage:       "PATTERN/-e PATTERN.../-f FILE [FILE]..."
68 //usage:#define grep_full_usage "\n\n"
69 //usage:       "Search for PATTERN in FILEs (or stdin)\n"
70 //usage:     "\n	-H	Add 'filename:' prefix"
71 //usage:     "\n	-h	Do not add 'filename:' prefix"
72 //usage:     "\n	-n	Add 'line_no:' prefix"
73 //usage:     "\n	-l	Show only names of files that match"
74 //usage:     "\n	-L	Show only names of files that don't match"
75 //usage:     "\n	-c	Show only count of matching lines"
76 //usage:     "\n	-o	Show only the matching part of line"
77 //usage:     "\n	-q	Quiet. Return 0 if PATTERN is found, 1 otherwise"
78 //usage:     "\n	-v	Select non-matching lines"
79 //usage:     "\n	-s	Suppress open and read errors"
80 //usage:     "\n	-r	Recurse"
81 //usage:     "\n	-i	Ignore case"
82 //usage:     "\n	-w	Match whole words only"
83 //usage:     "\n	-x	Match whole lines only"
84 //usage:     "\n	-F	PATTERN is a literal (not regexp)"
85 //usage:     "\n	-E	PATTERN is an extended regexp"
86 //usage:	IF_EXTRA_COMPAT(
87 //usage:     "\n	-z	Input is NUL terminated"
88 //usage:	)
89 //usage:     "\n	-m N	Match up to N times per file"
90 //usage:	IF_FEATURE_GREP_CONTEXT(
91 //usage:     "\n	-A N	Print N lines of trailing context"
92 //usage:     "\n	-B N	Print N lines of leading context"
93 //usage:     "\n	-C N	Same as '-A N -B N'"
94 //usage:	)
95 //usage:     "\n	-e PTRN	Pattern to match"
96 //usage:     "\n	-f FILE	Read pattern from file"
97 //usage:
98 //usage:#define grep_example_usage
99 //usage:       "$ grep root /etc/passwd\n"
100 //usage:       "root:x:0:0:root:/root:/bin/bash\n"
101 //usage:       "$ grep ^[rR]oo. /etc/passwd\n"
102 //usage:       "root:x:0:0:root:/root:/bin/bash\n"
103 //usage:
104 //usage:#define egrep_trivial_usage NOUSAGE_STR
105 //usage:#define egrep_full_usage ""
106 //usage:#define fgrep_trivial_usage NOUSAGE_STR
107 //usage:#define fgrep_full_usage ""
108 
109 #define OPTSTR_GREP \
110 	"lnqvscFiHhe:*f:*Lorm:+wx" \
111 	IF_FEATURE_GREP_CONTEXT("A:+B:+C:+") \
112 	"E" \
113 	IF_EXTRA_COMPAT("z") \
114 	"aI"
115 /* ignored: -a "assume all files to be text" */
116 /* ignored: -I "assume binary files have no matches" */
117 enum {
118 	OPTBIT_l, /* list matched file names only */
119 	OPTBIT_n, /* print line# */
120 	OPTBIT_q, /* quiet - exit(EXIT_SUCCESS) of first match */
121 	OPTBIT_v, /* invert the match, to select non-matching lines */
122 	OPTBIT_s, /* suppress errors about file open errors */
123 	OPTBIT_c, /* count matches per file (suppresses normal output) */
124 	OPTBIT_F, /* literal match */
125 	OPTBIT_i, /* case-insensitive */
126 	OPTBIT_H, /* force filename display */
127 	OPTBIT_h, /* inhibit filename display */
128 	OPTBIT_e, /* -e PATTERN */
129 	OPTBIT_f, /* -f FILE_WITH_PATTERNS */
130 	OPTBIT_L, /* list unmatched file names only */
131 	OPTBIT_o, /* show only matching parts of lines */
132 	OPTBIT_r, /* recurse dirs */
133 	OPTBIT_m, /* -m MAX_MATCHES */
134 	OPTBIT_w, /* -w whole word match */
135 	OPTBIT_x, /* -x whole line match */
136 	IF_FEATURE_GREP_CONTEXT(    OPTBIT_A ,) /* -A NUM: after-match context */
137 	IF_FEATURE_GREP_CONTEXT(    OPTBIT_B ,) /* -B NUM: before-match context */
138 	IF_FEATURE_GREP_CONTEXT(    OPTBIT_C ,) /* -C NUM: -A and -B combined */
139 	OPTBIT_E, /* extended regexp */
140 	IF_EXTRA_COMPAT(            OPTBIT_z ,) /* input is NUL terminated */
141 	OPT_l = 1 << OPTBIT_l,
142 	OPT_n = 1 << OPTBIT_n,
143 	OPT_q = 1 << OPTBIT_q,
144 	OPT_v = 1 << OPTBIT_v,
145 	OPT_s = 1 << OPTBIT_s,
146 	OPT_c = 1 << OPTBIT_c,
147 	OPT_F = 1 << OPTBIT_F,
148 	OPT_i = 1 << OPTBIT_i,
149 	OPT_H = 1 << OPTBIT_H,
150 	OPT_h = 1 << OPTBIT_h,
151 	OPT_e = 1 << OPTBIT_e,
152 	OPT_f = 1 << OPTBIT_f,
153 	OPT_L = 1 << OPTBIT_L,
154 	OPT_o = 1 << OPTBIT_o,
155 	OPT_r = 1 << OPTBIT_r,
156 	OPT_m = 1 << OPTBIT_m,
157 	OPT_w = 1 << OPTBIT_w,
158 	OPT_x = 1 << OPTBIT_x,
159 	OPT_A = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_A)) + 0,
160 	OPT_B = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_B)) + 0,
161 	OPT_C = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_C)) + 0,
162 	OPT_E = 1 << OPTBIT_E,
163 	OPT_z = IF_EXTRA_COMPAT(            (1 << OPTBIT_z)) + 0,
164 };
165 
166 #define PRINT_FILES_WITH_MATCHES    (option_mask32 & OPT_l)
167 #define PRINT_LINE_NUM              (option_mask32 & OPT_n)
168 #define BE_QUIET                    (option_mask32 & OPT_q)
169 #define SUPPRESS_ERR_MSGS           (option_mask32 & OPT_s)
170 #define PRINT_MATCH_COUNTS          (option_mask32 & OPT_c)
171 #define FGREP_FLAG                  (option_mask32 & OPT_F)
172 #define PRINT_FILES_WITHOUT_MATCHES (option_mask32 & OPT_L)
173 #define NUL_DELIMITED               (option_mask32 & OPT_z)
174 
175 struct globals {
176 	int max_matches;
177 #if !ENABLE_EXTRA_COMPAT
178 	int reflags;
179 #else
180 	RE_TRANSLATE_TYPE case_fold; /* RE_TRANSLATE_TYPE is [[un]signed] char* */
181 #endif
182 	smalluint invert_search;
183 	smalluint print_filename;
184 	smalluint open_errors;
185 #if ENABLE_FEATURE_GREP_CONTEXT
186 	smalluint did_print_line;
187 	int lines_before;
188 	int lines_after;
189 	char **before_buf;
190 	IF_EXTRA_COMPAT(size_t *before_buf_size;)
191 	int last_line_printed;
192 #endif
193 	/* globals used internally */
194 	llist_t *pattern_head;   /* growable list of patterns to match */
195 	const char *cur_file;    /* the current file we are reading */
196 } FIX_ALIASING;
197 #define G (*(struct globals*)bb_common_bufsiz1)
198 #define INIT_G() do { \
199 	setup_common_bufsiz(); \
200 	BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
201 } while (0)
202 #define max_matches       (G.max_matches         )
203 #if !ENABLE_EXTRA_COMPAT
204 # define reflags          (G.reflags             )
205 #else
206 # define case_fold        (G.case_fold           )
207 /* http://www.delorie.com/gnu/docs/regex/regex_46.html */
208 # define reflags           re_syntax_options
209 # undef REG_NOSUB
210 # undef REG_EXTENDED
211 # undef REG_ICASE
212 # define REG_NOSUB    bug:is:here /* should not be used */
213 /* Just RE_SYNTAX_EGREP is not enough, need to enable {n[,[m]]} too */
214 # define REG_EXTENDED (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
215 # define REG_ICASE    bug:is:here /* should not be used */
216 #endif
217 #define invert_search     (G.invert_search       )
218 #define print_filename    (G.print_filename      )
219 #define open_errors       (G.open_errors         )
220 #define did_print_line    (G.did_print_line      )
221 #define lines_before      (G.lines_before        )
222 #define lines_after       (G.lines_after         )
223 #define before_buf        (G.before_buf          )
224 #define before_buf_size   (G.before_buf_size     )
225 #define last_line_printed (G.last_line_printed   )
226 #define pattern_head      (G.pattern_head        )
227 #define cur_file          (G.cur_file            )
228 
229 
230 typedef struct grep_list_data_t {
231 	char *pattern;
232 /* for GNU regex, matched_range must be persistent across grep_file() calls */
233 #if !ENABLE_EXTRA_COMPAT
234 	regex_t compiled_regex;
235 	regmatch_t matched_range;
236 #else
237 	struct re_pattern_buffer compiled_regex;
238 	struct re_registers matched_range;
239 #endif
240 #define ALLOCATED 1
241 #define COMPILED 2
242 	int flg_mem_allocated_compiled;
243 } grep_list_data_t;
244 
245 #if !ENABLE_EXTRA_COMPAT
246 #define print_line(line, line_len, linenum, decoration) \
247 	print_line(line, linenum, decoration)
248 #endif
print_line(const char * line,size_t line_len,int linenum,char decoration)249 static void print_line(const char *line, size_t line_len, int linenum, char decoration)
250 {
251 #if ENABLE_FEATURE_GREP_CONTEXT
252 	/* Happens when we go to next file, immediately hit match
253 	 * and try to print prev context... from prev file! Don't do it */
254 	if (linenum < 1)
255 		return;
256 	/* possibly print the little '--' separator */
257 	if ((lines_before || lines_after) && did_print_line
258 	 && last_line_printed != linenum - 1
259 	) {
260 		puts("--");
261 	}
262 	/* guard against printing "--" before first line of first file */
263 	did_print_line = 1;
264 	last_line_printed = linenum;
265 #endif
266 	if (print_filename)
267 		printf("%s%c", cur_file, decoration);
268 	if (PRINT_LINE_NUM)
269 		printf("%i%c", linenum, decoration);
270 	/* Emulate weird GNU grep behavior with -ov */
271 	if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o)) {
272 #if !ENABLE_EXTRA_COMPAT
273 		puts(line);
274 #else
275 		fwrite(line, 1, line_len, stdout);
276 		putchar(NUL_DELIMITED ? '\0' : '\n');
277 #endif
278 	}
279 }
280 
281 #if ENABLE_EXTRA_COMPAT
282 /* Unlike getline, this one removes trailing '\n' */
bb_getline(char ** line_ptr,size_t * line_alloc_len,FILE * file)283 static ssize_t FAST_FUNC bb_getline(char **line_ptr, size_t *line_alloc_len, FILE *file)
284 {
285 	ssize_t res_sz;
286 	char *line;
287 	int delim = (NUL_DELIMITED ? '\0' : '\n');
288 
289 	res_sz = getdelim(line_ptr, line_alloc_len, delim, file);
290 	line = *line_ptr;
291 
292 	if (res_sz > 0) {
293 		if (line[res_sz - 1] == delim)
294 			line[--res_sz] = '\0';
295 	} else {
296 		free(line); /* uclibc allocates a buffer even on EOF. WTF? */
297 	}
298 	return res_sz;
299 }
300 #endif
301 
grep_file(FILE * file)302 static int grep_file(FILE *file)
303 {
304 	smalluint found;
305 	int linenum = 0;
306 	int nmatches = 0;
307 #if !ENABLE_EXTRA_COMPAT
308 	char *line;
309 #else
310 	char *line = NULL;
311 	ssize_t line_len;
312 	size_t line_alloc_len;
313 # define rm_so start[0]
314 # define rm_eo end[0]
315 #endif
316 #if ENABLE_FEATURE_GREP_CONTEXT
317 	int print_n_lines_after = 0;
318 	int curpos = 0; /* track where we are in the circular 'before' buffer */
319 	int idx = 0; /* used for iteration through the circular buffer */
320 #else
321 	enum { print_n_lines_after = 0 };
322 #endif
323 
324 	while (
325 #if !ENABLE_EXTRA_COMPAT
326 		(line = xmalloc_fgetline(file)) != NULL
327 #else
328 		(line_len = bb_getline(&line, &line_alloc_len, file)) >= 0
329 #endif
330 	) {
331 		llist_t *pattern_ptr = pattern_head;
332 		grep_list_data_t *gl = gl; /* for gcc */
333 
334 		linenum++;
335 		found = 0;
336 		while (pattern_ptr) {
337 			gl = (grep_list_data_t *)pattern_ptr->data;
338 			if (FGREP_FLAG) {
339 				char *match;
340 				char *str = line;
341  opt_f_again:
342 				match = ((option_mask32 & OPT_i)
343 					? strcasestr(str, gl->pattern)
344 					: strstr(str, gl->pattern)
345 					);
346 				if (match) {
347 					if (option_mask32 & OPT_x) {
348 						if (match != str)
349 							goto opt_f_not_found;
350 						if (str[strlen(gl->pattern)] != '\0')
351 							goto opt_f_not_found;
352 					} else
353 					if (option_mask32 & OPT_w) {
354 						char c = (match != str) ? match[-1] : ' ';
355 						if (!isalnum(c) && c != '_') {
356 							c = match[strlen(gl->pattern)];
357 							if (!c || (!isalnum(c) && c != '_'))
358 								goto opt_f_found;
359 						}
360 						str = match + 1;
361 						goto opt_f_again;
362 					}
363  opt_f_found:
364 					found = 1;
365  opt_f_not_found: ;
366 				}
367 			} else {
368 #if ENABLE_EXTRA_COMPAT
369 				unsigned start_pos;
370 #else
371 				int match_flg;
372 #endif
373 				char *match_at;
374 
375 				if (!(gl->flg_mem_allocated_compiled & COMPILED)) {
376 					gl->flg_mem_allocated_compiled |= COMPILED;
377 #if !ENABLE_EXTRA_COMPAT
378 					xregcomp(&gl->compiled_regex, gl->pattern, reflags);
379 #else
380 					memset(&gl->compiled_regex, 0, sizeof(gl->compiled_regex));
381 					gl->compiled_regex.translate = case_fold; /* for -i */
382 					if (re_compile_pattern(gl->pattern, strlen(gl->pattern), &gl->compiled_regex))
383 						bb_error_msg_and_die("bad regex '%s'", gl->pattern);
384 #endif
385 				}
386 #if !ENABLE_EXTRA_COMPAT
387 				gl->matched_range.rm_so = 0;
388 				gl->matched_range.rm_eo = 0;
389 				match_flg = 0;
390 #else
391 				start_pos = 0;
392 #endif
393 				match_at = line;
394  opt_w_again:
395 //bb_error_msg("'%s' start_pos:%d line_len:%d", match_at, start_pos, line_len);
396 				if (
397 #if !ENABLE_EXTRA_COMPAT
398 					regexec(&gl->compiled_regex, match_at, 1, &gl->matched_range, match_flg) == 0
399 #else
400 					re_search(&gl->compiled_regex, match_at, line_len,
401 							start_pos, /*range:*/ line_len,
402 							&gl->matched_range) >= 0
403 #endif
404 				) {
405 					if (option_mask32 & OPT_x) {
406 						found = (gl->matched_range.rm_so == 0
407 						         && match_at[gl->matched_range.rm_eo] == '\0');
408 					} else
409 					if (!(option_mask32 & OPT_w)) {
410 						found = 1;
411 					} else {
412 						char c = ' ';
413 						if (match_at > line || gl->matched_range.rm_so != 0) {
414 							c = match_at[gl->matched_range.rm_so - 1];
415 						}
416 						if (!isalnum(c) && c != '_') {
417 							c = match_at[gl->matched_range.rm_eo];
418 						}
419 						if (!isalnum(c) && c != '_') {
420 							found = 1;
421 						} else {
422 			/*
423 			 * Why check gl->matched_range.rm_eo?
424 			 * Zero-length match makes -w skip the line:
425 			 * "echo foo | grep ^" prints "foo",
426 			 * "echo foo | grep -w ^" prints nothing.
427 			 * Without such check, we can loop forever.
428 			 */
429 #if !ENABLE_EXTRA_COMPAT
430 							if (gl->matched_range.rm_eo != 0) {
431 								match_at += gl->matched_range.rm_eo;
432 								match_flg |= REG_NOTBOL;
433 								goto opt_w_again;
434 							}
435 #else
436 							if (gl->matched_range.rm_eo > start_pos) {
437 								start_pos = gl->matched_range.rm_eo;
438 								goto opt_w_again;
439 							}
440 #endif
441 						}
442 					}
443 				}
444 			}
445 			/* If it's non-inverted search, we can stop
446 			 * at first match */
447 			if (found && !invert_search)
448 				goto do_found;
449 			pattern_ptr = pattern_ptr->link;
450 		} /* while (pattern_ptr) */
451 
452 		if (found ^ invert_search) {
453  do_found:
454 			/* keep track of matches */
455 			nmatches++;
456 
457 			/* quiet/print (non)matching file names only? */
458 			if (option_mask32 & (OPT_q|OPT_l|OPT_L)) {
459 				free(line); /* we don't need line anymore */
460 				if (BE_QUIET) {
461 					/* manpage says about -q:
462 					 * "exit immediately with zero status
463 					 * if any match is found,
464 					 * even if errors were detected" */
465 					exit(EXIT_SUCCESS);
466 				}
467 				/* if we're just printing filenames, we stop after the first match */
468 				if (PRINT_FILES_WITH_MATCHES) {
469 					puts(cur_file);
470 					/* fall through to "return 1" */
471 				}
472 				/* OPT_L aka PRINT_FILES_WITHOUT_MATCHES: return early */
473 				return 1; /* one match */
474 			}
475 
476 #if ENABLE_FEATURE_GREP_CONTEXT
477 			/* Were we printing context and saw next (unwanted) match? */
478 			if ((option_mask32 & OPT_m) && nmatches > max_matches)
479 				break;
480 #endif
481 
482 			/* print the matched line */
483 			if (PRINT_MATCH_COUNTS == 0) {
484 #if ENABLE_FEATURE_GREP_CONTEXT
485 				int prevpos = (curpos == 0) ? lines_before - 1 : curpos - 1;
486 
487 				/* if we were told to print 'before' lines and there is at least
488 				 * one line in the circular buffer, print them */
489 				if (lines_before && before_buf[prevpos] != NULL) {
490 					int first_buf_entry_line_num = linenum - lines_before;
491 
492 					/* advance to the first entry in the circular buffer, and
493 					 * figure out the line number is of the first line in the
494 					 * buffer */
495 					idx = curpos;
496 					while (before_buf[idx] == NULL) {
497 						idx = (idx + 1) % lines_before;
498 						first_buf_entry_line_num++;
499 					}
500 
501 					/* now print each line in the buffer, clearing them as we go */
502 					while (before_buf[idx] != NULL) {
503 						print_line(before_buf[idx], before_buf_size[idx], first_buf_entry_line_num, '-');
504 						free(before_buf[idx]);
505 						before_buf[idx] = NULL;
506 						idx = (idx + 1) % lines_before;
507 						first_buf_entry_line_num++;
508 					}
509 				}
510 
511 				/* make a note that we need to print 'after' lines */
512 				print_n_lines_after = lines_after;
513 #endif
514 				if (option_mask32 & OPT_o) {
515 					if (FGREP_FLAG) {
516 						/* -Fo just prints the pattern
517 						 * (unless -v: -Fov doesnt print anything at all) */
518 						if (found)
519 							print_line(gl->pattern, strlen(gl->pattern), linenum, ':');
520 					} else while (1) {
521 						unsigned start = gl->matched_range.rm_so;
522 						unsigned end = gl->matched_range.rm_eo;
523 						unsigned len = end - start;
524 						char old = line[end];
525 						line[end] = '\0';
526 						/* Empty match is not printed: try "echo test | grep -o ''" */
527 						if (len != 0)
528 							print_line(line + start, len, linenum, ':');
529 						if (old == '\0')
530 							break;
531 						line[end] = old;
532 						if (len == 0)
533 							end++;
534 #if !ENABLE_EXTRA_COMPAT
535 						if (regexec(&gl->compiled_regex, line + end,
536 								1, &gl->matched_range, REG_NOTBOL) != 0)
537 							break;
538 						gl->matched_range.rm_so += end;
539 						gl->matched_range.rm_eo += end;
540 #else
541 						if (re_search(&gl->compiled_regex, line, line_len,
542 								end, line_len - end,
543 								&gl->matched_range) < 0)
544 							break;
545 #endif
546 					}
547 				} else {
548 					print_line(line, line_len, linenum, ':');
549 				}
550 			}
551 		}
552 #if ENABLE_FEATURE_GREP_CONTEXT
553 		else { /* no match */
554 			/* if we need to print some context lines after the last match, do so */
555 			if (print_n_lines_after) {
556 				print_line(line, strlen(line), linenum, '-');
557 				print_n_lines_after--;
558 			} else if (lines_before) {
559 				/* Add the line to the circular 'before' buffer */
560 				free(before_buf[curpos]);
561 				before_buf[curpos] = line;
562 				IF_EXTRA_COMPAT(before_buf_size[curpos] = line_len;)
563 				curpos = (curpos + 1) % lines_before;
564 				/* avoid free(line) - we took the line */
565 				line = NULL;
566 			}
567 		}
568 
569 #endif /* ENABLE_FEATURE_GREP_CONTEXT */
570 #if !ENABLE_EXTRA_COMPAT
571 		free(line);
572 #endif
573 		/* Did we print all context after last requested match? */
574 		if ((option_mask32 & OPT_m)
575 		 && !print_n_lines_after
576 		 && nmatches == max_matches
577 		) {
578 			break;
579 		}
580 	} /* while (read line) */
581 
582 	/* special-case file post-processing for options where we don't print line
583 	 * matches, just filenames and possibly match counts */
584 
585 	/* grep -c: print [filename:]count, even if count is zero */
586 	if (PRINT_MATCH_COUNTS) {
587 		if (print_filename)
588 			printf("%s:", cur_file);
589 		printf("%d\n", nmatches);
590 	}
591 
592 	/* grep -L: print just the filename */
593 	if (PRINT_FILES_WITHOUT_MATCHES) {
594 		/* nmatches is zero, no need to check it:
595 		 * we return 1 early if we detected a match
596 		 * and PRINT_FILES_WITHOUT_MATCHES is set */
597 		puts(cur_file);
598 	}
599 
600 	return nmatches;
601 }
602 
603 #if ENABLE_FEATURE_CLEAN_UP
604 #define new_grep_list_data(p, m) add_grep_list_data(p, m)
add_grep_list_data(char * pattern,int flg_used_mem)605 static char *add_grep_list_data(char *pattern, int flg_used_mem)
606 #else
607 #define new_grep_list_data(p, m) add_grep_list_data(p)
608 static char *add_grep_list_data(char *pattern)
609 #endif
610 {
611 	grep_list_data_t *gl = xzalloc(sizeof(*gl));
612 	gl->pattern = pattern;
613 #if ENABLE_FEATURE_CLEAN_UP
614 	gl->flg_mem_allocated_compiled = flg_used_mem;
615 #else
616 	/*gl->flg_mem_allocated_compiled = 0;*/
617 #endif
618 	return (char *)gl;
619 }
620 
load_regexes_from_file(llist_t * fopt)621 static void load_regexes_from_file(llist_t *fopt)
622 {
623 	while (fopt) {
624 		char *line;
625 		FILE *fp;
626 		llist_t *cur = fopt;
627 		char *ffile = cur->data;
628 
629 		fopt = cur->link;
630 		free(cur);
631 		fp = xfopen_stdin(ffile);
632 		while ((line = xmalloc_fgetline(fp)) != NULL) {
633 			llist_add_to(&pattern_head,
634 				new_grep_list_data(line, ALLOCATED));
635 		}
636 		fclose_if_not_stdin(fp);
637 	}
638 }
639 
file_action_grep(const char * filename,struct stat * statbuf UNUSED_PARAM,void * matched,int depth UNUSED_PARAM)640 static int FAST_FUNC file_action_grep(const char *filename,
641 			struct stat *statbuf UNUSED_PARAM,
642 			void* matched,
643 			int depth UNUSED_PARAM)
644 {
645 	FILE *file = fopen_for_read(filename);
646 	if (file == NULL) {
647 		if (!SUPPRESS_ERR_MSGS)
648 			bb_simple_perror_msg(filename);
649 		open_errors = 1;
650 		return 0;
651 	}
652 	cur_file = filename;
653 	*(int*)matched += grep_file(file);
654 	fclose(file);
655 	return 1;
656 }
657 
grep_dir(const char * dir)658 static int grep_dir(const char *dir)
659 {
660 	int matched = 0;
661 	recursive_action(dir,
662 		/* recurse=yes */ ACTION_RECURSE |
663 		/* followLinks=command line only */ ACTION_FOLLOWLINKS_L0 |
664 		/* depthFirst=yes */ ACTION_DEPTHFIRST,
665 		/* fileAction= */ file_action_grep,
666 		/* dirAction= */ NULL,
667 		/* userData= */ &matched,
668 		/* depth= */ 0);
669 	return matched;
670 }
671 
672 int grep_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
grep_main(int argc UNUSED_PARAM,char ** argv)673 int grep_main(int argc UNUSED_PARAM, char **argv)
674 {
675 	FILE *file;
676 	int matched;
677 	llist_t *fopt = NULL;
678 #if ENABLE_FEATURE_GREP_CONTEXT
679 	int Copt, opts;
680 #endif
681 	INIT_G();
682 
683 	/* For grep, exitcode of 1 is "not found". Other errors are 2: */
684 	xfunc_error_retval = 2;
685 
686 	/* do normal option parsing */
687 #if ENABLE_FEATURE_GREP_CONTEXT
688 	/* -H unsets -h; -C unsets -A,-B; -e,-f are lists;
689 	 * -m,-A,-B,-C have numeric param */
690 	opt_complementary = "H-h:C-AB";
691 	opts = getopt32(argv,
692 		OPTSTR_GREP,
693 		&pattern_head, &fopt, &max_matches,
694 		&lines_after, &lines_before, &Copt);
695 
696 	if (opts & OPT_C) {
697 		/* -C unsets prev -A and -B, but following -A or -B
698 		 * may override it */
699 		if (!(opts & OPT_A)) /* not overridden */
700 			lines_after = Copt;
701 		if (!(opts & OPT_B)) /* not overridden */
702 			lines_before = Copt;
703 	}
704 	/* sanity checks */
705 	if (opts & (OPT_c|OPT_q|OPT_l|OPT_L)) {
706 		option_mask32 &= ~OPT_n;
707 		lines_before = 0;
708 		lines_after = 0;
709 	} else if (lines_before > 0) {
710 		if (lines_before > INT_MAX / sizeof(long long))
711 			lines_before = INT_MAX / sizeof(long long);
712 		/* overflow in (lines_before * sizeof(x)) is prevented (above) */
713 		before_buf = xzalloc(lines_before * sizeof(before_buf[0]));
714 		IF_EXTRA_COMPAT(before_buf_size = xzalloc(lines_before * sizeof(before_buf_size[0]));)
715 	}
716 #else
717 	/* with auto sanity checks */
718 	/* -H unsets -h; -c,-q or -l unset -n; -e,-f are lists; -m N */
719 	opt_complementary = "H-h:c-n:q-n:l-n:";
720 	getopt32(argv, OPTSTR_GREP,
721 		&pattern_head, &fopt, &max_matches);
722 #endif
723 	invert_search = ((option_mask32 & OPT_v) != 0); /* 0 | 1 */
724 
725 	{	/* convert char **argv to grep_list_data_t */
726 		llist_t *cur;
727 		for (cur = pattern_head; cur; cur = cur->link)
728 			cur->data = new_grep_list_data(cur->data, 0);
729 	}
730 	if (option_mask32 & OPT_f) {
731 		load_regexes_from_file(fopt);
732 		if (!pattern_head) { /* -f EMPTY_FILE? */
733 			/* GNU grep treats it as "nothing matches" */
734 			llist_add_to(&pattern_head, new_grep_list_data((char*) "", 0));
735 			invert_search ^= 1;
736 		}
737 	}
738 
739 	if (ENABLE_FGREP && applet_name[0] == 'f')
740 		option_mask32 |= OPT_F;
741 
742 #if !ENABLE_EXTRA_COMPAT
743 	if (!(option_mask32 & (OPT_o | OPT_w | OPT_x)))
744 		reflags = REG_NOSUB;
745 #endif
746 
747 	if ((ENABLE_EGREP && applet_name[0] == 'e')
748 	 || (option_mask32 & OPT_E)
749 	) {
750 		reflags |= REG_EXTENDED;
751 	}
752 #if ENABLE_EXTRA_COMPAT
753 	else {
754 		reflags = RE_SYNTAX_GREP;
755 	}
756 #endif
757 
758 	if (option_mask32 & OPT_i) {
759 #if !ENABLE_EXTRA_COMPAT
760 		reflags |= REG_ICASE;
761 #else
762 		int i;
763 		case_fold = xmalloc(256);
764 		for (i = 0; i < 256; i++)
765 			case_fold[i] = (unsigned char)i;
766 		for (i = 'a'; i <= 'z'; i++)
767 			case_fold[i] = (unsigned char)(i - ('a' - 'A'));
768 #endif
769 	}
770 
771 	argv += optind;
772 
773 	/* if we didn't get a pattern from -e and no command file was specified,
774 	 * first parameter should be the pattern. no pattern, no worky */
775 	if (pattern_head == NULL) {
776 		char *pattern;
777 		if (*argv == NULL)
778 			bb_show_usage();
779 		pattern = new_grep_list_data(*argv++, 0);
780 		llist_add_to(&pattern_head, pattern);
781 	}
782 
783 	/* argv[0..(argc-1)] should be names of file to grep through. If
784 	 * there is more than one file to grep, we will print the filenames. */
785 	if (argv[0] && argv[1])
786 		print_filename = 1;
787 	/* -H / -h of course override */
788 	if (option_mask32 & OPT_H)
789 		print_filename = 1;
790 	if (option_mask32 & OPT_h)
791 		print_filename = 0;
792 
793 	/* If no files were specified, or '-' was specified, take input from
794 	 * stdin. Otherwise, we grep through all the files specified. */
795 	matched = 0;
796 	do {
797 		cur_file = *argv;
798 		file = stdin;
799 		if (!cur_file || LONE_DASH(cur_file)) {
800 			cur_file = "(standard input)";
801 		} else {
802 			if (option_mask32 & OPT_r) {
803 				struct stat st;
804 				if (stat(cur_file, &st) == 0 && S_ISDIR(st.st_mode)) {
805 					if (!(option_mask32 & OPT_h))
806 						print_filename = 1;
807 					matched += grep_dir(cur_file);
808 					goto grep_done;
809 				}
810 			}
811 			/* else: fopen(dir) will succeed, but reading won't */
812 			file = fopen_for_read(cur_file);
813 			if (file == NULL) {
814 				if (!SUPPRESS_ERR_MSGS)
815 					bb_simple_perror_msg(cur_file);
816 				open_errors = 1;
817 				continue;
818 			}
819 		}
820 		matched += grep_file(file);
821 		fclose_if_not_stdin(file);
822  grep_done: ;
823 	} while (*argv && *++argv);
824 
825 	/* destroy all the elments in the pattern list */
826 	if (ENABLE_FEATURE_CLEAN_UP) {
827 		while (pattern_head) {
828 			llist_t *pattern_head_ptr = pattern_head;
829 			grep_list_data_t *gl = (grep_list_data_t *)pattern_head_ptr->data;
830 
831 			pattern_head = pattern_head->link;
832 			if (gl->flg_mem_allocated_compiled & ALLOCATED)
833 				free(gl->pattern);
834 			if (gl->flg_mem_allocated_compiled & COMPILED)
835 				regfree(&gl->compiled_regex);
836 			free(gl);
837 			free(pattern_head_ptr);
838 		}
839 	}
840 	/* 0 = success, 1 = failed, 2 = error */
841 	if (open_errors)
842 		return 2;
843 	return !matched; /* invert return value: 0 = success, 1 = failed */
844 }
845