1 /* $OpenBSD: grep.c,v 1.1 2001/09/21 23:12:00 deraadt Exp $ */ 2 3 /*- 4 * Copyright (c) 2000 Carson Harding. All rights reserved. 5 * This code was written and contributed to OpenBSD by Carson Harding. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the author, or the names of contributors may be 16 * used to endorse or promote products derived from this software without 17 * specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 #ifndef lint 33 static char rcsid[] = "$OpenBSD: grep.c,v 1.1 2001/09/21 23:12:00 deraadt Exp $"; 34 #endif /* not lint */ 35 36 #include <sys/types.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <unistd.h> 40 #include <regex.h> 41 #include <string.h> 42 #include <ctype.h> 43 #include <sys/param.h> 44 #include <fts.h> 45 #include <err.h> 46 47 extern char *__progname; 48 49 50 void usage(void); 51 void err_regerror(int r, regex_t *rexp); 52 int grep_files(int regexc, regex_t *regexv, char **files); 53 int grep_tree(int regexc, regex_t *regexv, char **paths); 54 int grep_file(int regexc, regex_t *rexp, char *fname); 55 void arg_patt(char *s); 56 char *chop_patt(char *s, size_t *len); 57 void add_patt(char *s, size_t len); 58 void load_patt(char *fname); 59 regex_t *regcomp_patt(int pattc, char *pattvp[], int cflags); 60 61 62 int f_bytecount; /* -b prepend byte count */ 63 int f_countonly; /* -c return only count */ 64 int f_nofname; /* -h do not prepend filenames on multiple */ 65 int f_fnameonly; /* -l only print file name with match */ 66 int f_suppress; /* -s suppress error messages; 1/2 -q */ 67 int f_lineno; /* -n prepend with line numbers */ 68 int f_quiet; /* -q no output, only status */ 69 int f_wmatch; /* -w match words */ 70 int f_xmatch; /* -x match line */ 71 int f_zerobyte; /* -z NUL character after filename with -l */ 72 int f_match; /* = REG_MATCH; else = REG_NOMATCH for -v */ 73 int f_multifile; /* multiple files: prepend file names */ 74 int f_matchall; /* empty pattern, matches all input */ 75 int f_error; /* saw error; set exit status */ 76 77 /* default traversal flags */ 78 int f_ftsflags = FTS_LOGICAL|FTS_NOCHDIR|FTS_NOSTAT; 79 80 int f_debug; /* temporary debugging flag */ 81 82 #define START_PATT_SZ 8 /* start with room for 8 patterns */ 83 char **pattv; /* array of patterns from -e and -f */ 84 int pattc; /* patterns in pattern array */ 85 int pattn; /* patterns we have seen, including nulls */ 86 87 int 88 main(int argc, char **argv) 89 { 90 int c; 91 int ch; 92 int cflags; /* flags to regcomp() */ 93 int sawfile; /* did we see a pattern file? */ 94 regex_t *regexv; /* start of array of compiled patterns */ 95 96 int (*grepf)(int regexc, regex_t *regexv, char **argv); 97 98 sawfile = 0; 99 cflags = REG_BASIC|REG_NEWLINE; 100 grepf = grep_files; 101 102 if (*__progname == 'e') 103 cflags |= REG_EXTENDED; 104 else if (*__progname == 'f') 105 cflags |= REG_NOSPEC; 106 107 while ((ch = getopt(argc, argv, "DEFRHLPXabce:f:hilnqsvwxz")) != -1) { 108 switch(ch) { 109 case 'D': 110 f_debug = 1; 111 break; 112 case 'E': 113 cflags |= REG_EXTENDED; 114 break; 115 case 'F': 116 cflags |= REG_NOSPEC; 117 break; 118 case 'H': 119 f_ftsflags |= FTS_COMFOLLOW; 120 break; 121 case 'L': 122 f_ftsflags |= FTS_LOGICAL; 123 break; 124 case 'P': 125 f_ftsflags |= FTS_PHYSICAL; 126 break; 127 case 'R': 128 grepf = grep_tree; 129 /* 130 * If walking the tree we don't know how many files 131 * we'll actually find. So assume multiple, if 132 * you don't want names, there's always -h .... 133 */ 134 f_multifile = 1; 135 break; 136 case 'X': 137 f_ftsflags |= FTS_XDEV; 138 break; 139 case 'a': 140 /* 141 * Silently eat -a; we don't use the default 142 * behaviour it toggles off in gnugrep. 143 */ 144 break; 145 case 'b': 146 f_bytecount = 1; 147 break; 148 case 'c': 149 f_countonly = 1; 150 break; 151 case 'e': 152 arg_patt(optarg); 153 break; 154 case 'f': 155 load_patt(optarg); 156 sawfile = 1; 157 break; 158 case 'h': 159 f_nofname = 1; 160 break; 161 case 'i': 162 cflags |= REG_ICASE; 163 break; 164 case 'l': 165 f_fnameonly = 1; 166 break; 167 case 'n': 168 f_lineno = 1; 169 break; 170 case 'q': 171 f_quiet = 1; 172 break; 173 case 's': 174 f_suppress = 1; 175 break; 176 case 'v': 177 f_match = REG_NOMATCH; 178 break; 179 case 'w': 180 f_wmatch = 1; 181 break; 182 case 'x': 183 f_xmatch = 1; 184 break; 185 case 'z': 186 f_zerobyte = 1; 187 break; 188 default: 189 usage(); 190 break; 191 } 192 } 193 194 if ((cflags & REG_EXTENDED) && (cflags & REG_NOSPEC)) 195 usage(); 196 197 /* 198 * If we read one or more pattern files, and still 199 * didn't end up with any pattern, any pattern file 200 * we read was empty. This is different than failing 201 * to provide a pattern as an argument, and we fail 202 * on this case as if we had searched and found 203 * no matches. (At least this is what GNU grep and 204 * Solaris's grep do.) 205 */ 206 if (!pattn && !argv[optind]) { 207 if (sawfile) 208 exit(1); 209 else usage(); 210 } 211 212 if (!pattn) { 213 arg_patt(argv[optind]); 214 optind++; 215 } 216 217 /* why bother ... just do nothing sooner */ 218 if (f_matchall && f_match == REG_NOMATCH) 219 exit(1); 220 221 regexv = regcomp_patt(pattc, pattv, cflags); 222 223 if (optind == argc) { 224 c = grep_file(pattc, regexv, NULL); 225 } else { 226 if (argc - optind > 1 && !f_nofname) 227 f_multifile = 1; 228 c = (*grepf)(pattc, regexv, &argv[optind]); 229 } 230 231 /* XX ugh */ 232 if (f_error) { 233 if (c && f_quiet) 234 exit(0); 235 else 236 exit(2); 237 } else if (c) 238 exit(0); 239 else 240 exit(1); 241 } 242 243 void 244 usage(void) 245 { 246 fprintf(stderr, "usage: %s [-E|-F] [-abchilnqsvwx] [-RXH[-L|-P]]" 247 " {patt | -e patt | -f patt_file} [files]\n", 248 __progname); 249 exit(2); 250 } 251 252 /* 253 * Patterns as arguments may have embedded newlines. 254 * When read from file, these are detected by fgetln(); 255 * in arguments we have to find and cut out the segments. 256 */ 257 void 258 arg_patt(char *s) 259 { 260 size_t len; 261 char *sp; 262 263 if (f_debug) 264 fprintf(stderr, "arg_patt(\"%s\")\n", s); 265 266 len = strlen(s); 267 if (!len) { /* got "" on the command-line */ 268 add_patt(s, len); 269 return; 270 } 271 for (sp = chop_patt(s, &len); sp; sp = chop_patt(NULL, &len)) { 272 if (f_debug) { 273 fprintf(stderr, "adding pattern \""); 274 fwrite(sp, len, 1, stderr); 275 fprintf(stderr, "\", length %lu\n",(unsigned long)len); 276 if (pattc > 20) { 277 fprintf(stderr, "too many, exiting ...\n"); 278 exit(2); 279 } 280 } 281 add_patt(sp, len); 282 } 283 } 284 285 /* 286 * Kind of like strtok; pass char *, then NULL for rest. 287 * Call it memtok()... New size gets written into len. 288 */ 289 char * 290 chop_patt(char *s, size_t *len) 291 { 292 char *cp; 293 static char *save_s; 294 static int save_n; 295 296 if (s) 297 save_n = *len; 298 else 299 s = save_s; 300 301 if (save_n <= 0) { 302 s = save_s = NULL; 303 } else if (s) { 304 if ((cp = memchr(s, '\n', save_n)) != NULL) { 305 *len = cp - s; /* returned segment */ 306 save_n -= *len; 307 save_s = ++cp; /* adjust past newline */ 308 save_n--; 309 } else { 310 *len = save_n; /* else return the whole string */ 311 save_n = 0; 312 } 313 } 314 315 return s; 316 } 317 318 /* 319 * Start with an array for 8 patterns, and double it 320 * each time we outgrow it. If pattern is empty (0 length), 321 * or if f_matchall is already set, set f_matchall and return. 322 * No use adding a pattern if all input is going to match 323 * anyhow. 324 */ 325 void 326 add_patt(char *s, size_t len) 327 { 328 char *p; 329 static size_t pattmax = START_PATT_SZ; 330 static size_t sumlen; 331 332 pattn++; 333 sumlen += len; 334 335 if (!len || f_matchall) { 336 f_matchall = 1; 337 return; 338 } 339 340 if (!pattv) { 341 pattv = malloc(START_PATT_SZ * sizeof(char *)); 342 if (!pattv) 343 err(2, "malloc"); 344 pattc = 0; 345 } else if (pattc >= pattmax) { 346 pattmax *= 2; 347 pattv = realloc(pattv, pattmax * sizeof(char *)); 348 if (!pattv) 349 err(2, "realloc"); 350 } 351 p = malloc(len+1); 352 if (!p) err(2, "malloc"); 353 memmove(p, s, len); 354 p[len] = '\0'; 355 pattv[pattc++] = p; 356 } 357 358 /* 359 * Load patterns from file. 360 */ 361 void 362 load_patt(char *fname) 363 { 364 char *buf; 365 size_t len; 366 FILE *fr; 367 368 fr = fopen(fname, "r"); 369 if (!fr) 370 err(2, fname); 371 while ((buf = fgetln(fr, &len)) != NULL) { 372 if (buf[len-1] == '\n') 373 buf[--len] = '\0'; 374 add_patt(buf, len); 375 } 376 fclose(fr); 377 } 378 379 /* 380 * Compile the collected pattern strings into an array 381 * of regex_t. 382 */ 383 regex_t * 384 regcomp_patt(int lpattc, char *lpattv[], int cflags) 385 { 386 int i; 387 int r; 388 regex_t *rxv; 389 390 if (f_matchall) 391 return NULL; 392 393 rxv = malloc(sizeof(regex_t) * lpattc); 394 if (!rxv) 395 err(2, "malloc"); 396 for (i = 0; i < lpattc; i++) { 397 if ((r = regcomp(&rxv[i], lpattv[i], cflags)) != 0) 398 err_regerror(r, &rxv[i]); 399 } 400 return rxv; 401 } 402 403 /* 404 * Print out regcomp error, and exit. 405 */ 406 void 407 err_regerror(int r, regex_t *rexp) 408 { 409 size_t n; 410 char *buf; 411 412 n = regerror(r, rexp, NULL, 0); 413 buf = malloc(n); 414 if (!buf) 415 err(2, "malloc"); 416 (void)regerror(r, rexp, buf, n); 417 errx(2, "%s", buf); 418 } 419 420 /* 421 * Little wrapper so we can use function pointer above. 422 */ 423 int 424 grep_files(int regexc, regex_t *regexv, char **files) 425 { 426 int c; 427 char **fname; 428 429 c = 0; 430 for (fname = files; *fname; fname++) 431 c += grep_file(regexc, regexv, *fname); 432 433 return c; 434 } 435 436 /* 437 * Modified from James Howard and Dag-Erling Co?dan Sm?rgrav's grep: 438 * add FTS_D to FTS_DP (especially since D was the one being used) 439 * pass in regex_t array, and set fts flags above in main(). 440 */ 441 int 442 grep_tree(int regexc, regex_t *regexv, char **paths) 443 { 444 int c; 445 FTS *fts; 446 FTSENT *p; 447 448 c = 0; 449 450 if (!(fts = fts_open(paths, f_ftsflags, (int (*) ()) NULL))) 451 err(2, "fts_open"); 452 while ((p = fts_read(fts)) != NULL) { 453 switch (p->fts_info) { 454 case FTS_D: 455 case FTS_DP: 456 case FTS_DNR: 457 break; 458 case FTS_ERR: 459 errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); 460 break; 461 default: 462 if (f_debug) 463 printf("%s\n", p->fts_path); 464 c += grep_file(regexc, regexv, p->fts_path); 465 break; 466 } 467 } 468 469 return c; 470 } 471 472 /* 473 * Open and grep the named file. If fname is NULL, read 474 * from stdin. 475 */ 476 477 #define isword(x) (isalnum(x) || (x) == '_') 478 479 int 480 grep_file(int regexc, regex_t *regexv, char *fname) 481 { 482 int i; 483 int c; 484 int n; 485 int r; 486 int match; 487 char *buf; 488 size_t b; 489 size_t len; 490 FILE *fr; 491 regmatch_t pmatch[1]; 492 regoff_t so, eo; 493 494 b = 0; /* byte count */ 495 c = 0; /* match count */ 496 n = 0; /* line count */ 497 498 if (!fname) { 499 fr = stdin; 500 fname = "(standard input)"; 501 } else { 502 fr = fopen(fname, "r"); 503 if (!fr) { 504 if (!f_suppress) 505 warn("%s", fname); 506 f_error = 1; 507 return 0; 508 } 509 } 510 511 while ((buf = fgetln(fr, &len)) != NULL) { 512 n++; 513 if (f_matchall) 514 goto printmatch; 515 match = 0; 516 for (i = 0; i < regexc; i++) { 517 pmatch[0].rm_so = 0; 518 pmatch[0].rm_eo = len-1; 519 r = regexec(®exv[i], buf, 1, pmatch, REG_STARTEND); 520 if (r == f_match) { 521 /* 522 * XX gnu grep allows both -w and -x; 523 * XX but seems bizarre. sometimes -w seems 524 * XX to override, at other times, not. 525 * XX Need to figure that out. 526 * XX It seems logical to go with the most 527 * XX restrictive argument: -x, as -x is 528 * XX a boundary case of -w anyhow. 529 */ 530 if (f_xmatch) { 531 if (pmatch[0].rm_so != 0 || 532 pmatch[0].rm_eo != len-1) 533 continue; 534 } else if (f_wmatch) { 535 so = pmatch[0].rm_so; 536 eo = pmatch[0].rm_eo; 537 if (!((so == 0 || !isword(buf[so-1])) && 538 (eo == len || !isword(buf[eo])))) 539 continue; 540 } 541 match = 1; 542 break; 543 } 544 /* XX test for regexec() errors ?? */ 545 } 546 if (match) { 547 printmatch: 548 c++; 549 if (f_fnameonly || f_quiet) 550 break; 551 if (f_countonly) 552 continue; 553 if (f_multifile && !f_nofname) 554 printf("%s:", fname); 555 if (f_lineno) 556 printf("%d:", n); 557 if (f_bytecount) 558 printf("%lu:", (unsigned long)b); 559 fwrite(buf, len, 1, stdout); 560 } 561 /* save position in stream before next line */ 562 b += len; 563 } 564 565 if (!buf && ferror(fr)) { 566 warn("%s", fname); 567 f_error = 1; 568 /* 569 * XX or do we spit out what result we did have? 570 */ 571 } else if (!f_quiet) { 572 /* 573 * XX test -c and -l together: gnu grep 574 * XX allows (although ugly), do others? 575 */ 576 if (f_countonly) { 577 if (f_multifile) 578 printf("%s:", fname); 579 printf("%d\n", c); 580 } 581 if (c && f_fnameonly) { 582 fputs(fname, stdout); 583 if (f_zerobyte) 584 fputc('\0', stdout); 585 else 586 fputc('\n', stdout); 587 } 588 } 589 590 if (fr != stdin) 591 fclose(fr); 592 593 return c; 594 } 595 596