1 /* exclude.c -- exclude file names 2 3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2020 Free Software 4 Foundation, Inc. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <https://www.gnu.org/licenses/>. */ 18 19 /* Written by Paul Eggert <eggert@twinsun.com> 20 and Sergey Poznyakoff <gray@gnu.org>. 21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk> 22 for improvement suggestions. */ 23 24 #include <config.h> 25 26 #include <stdbool.h> 27 28 #include <ctype.h> 29 #include <errno.h> 30 #include <stddef.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <wctype.h> 35 #include <regex.h> 36 37 #include "exclude.h" 38 #include "hash.h" 39 #include "mbuiter.h" 40 #include "fnmatch.h" 41 #include "xalloc.h" 42 #include "verify.h" 43 #include "filename.h" 44 45 #if USE_UNLOCKED_IO 46 # include "unlocked-io.h" 47 #endif 48 49 /* Non-GNU systems lack these options, so we don't need to check them. */ 50 #ifndef FNM_CASEFOLD 51 # define FNM_CASEFOLD 0 52 #endif 53 #ifndef FNM_EXTMATCH 54 # define FNM_EXTMATCH 0 55 #endif 56 #ifndef FNM_LEADING_DIR 57 # define FNM_LEADING_DIR 0 58 #endif 59 60 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) 61 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR 62 | FNM_CASEFOLD | FNM_EXTMATCH)) 63 == 0); 64 65 66 /* Exclusion patterns are grouped into a singly-linked list of 67 "exclusion segments". Each segment represents a set of patterns 68 that can be matches using the same algorithm. Non-wildcard 69 patterns are kept in hash tables, to speed up searches. Wildcard 70 patterns are stored as arrays of patterns. */ 71 72 73 /* An exclude pattern-options pair. The options are fnmatch options 74 ORed with EXCLUDE_* options. */ 75 76 struct patopts 77 { 78 int options; 79 union 80 { 81 char const *pattern; 82 regex_t re; 83 } v; 84 }; 85 86 /* An array of pattern-options pairs. */ 87 88 struct exclude_pattern 89 { 90 struct patopts *exclude; 91 size_t exclude_alloc; 92 size_t exclude_count; 93 }; 94 95 enum exclude_type 96 { 97 exclude_hash, /* a hash table of excluded names */ 98 exclude_pattern /* an array of exclude patterns */ 99 }; 100 101 struct exclude_segment 102 { 103 struct exclude_segment *next; /* next segment in list */ 104 enum exclude_type type; /* type of this segment */ 105 int options; /* common options for this segment */ 106 union 107 { 108 Hash_table *table; /* for type == exclude_hash */ 109 struct exclude_pattern pat; /* for type == exclude_pattern */ 110 } v; 111 }; 112 113 struct pattern_buffer 114 { 115 struct pattern_buffer *next; 116 char *base; 117 }; 118 119 /* The exclude structure keeps a singly-linked list of exclude segments, 120 maintained in reverse order. */ 121 struct exclude 122 { 123 struct exclude_segment *head; 124 struct pattern_buffer *patbuf; 125 }; 126 127 /* Register BUF in the pattern buffer list of EX. ADD_FUNC (see 128 add_exclude_file and add_exclude_fp below) can use this function 129 if it modifies the pattern, to ensure the allocated memory will be 130 properly reclaimed upon calling free_exclude. */ 131 void 132 exclude_add_pattern_buffer (struct exclude *ex, char *buf) 133 { 134 struct pattern_buffer *pbuf = xmalloc (sizeof *pbuf); 135 pbuf->base = buf; 136 pbuf->next = ex->patbuf; 137 ex->patbuf = pbuf; 138 } 139 140 /* Return true if STR has or may have wildcards, when matched with OPTIONS. 141 Return false if STR definitely does not have wildcards. */ 142 bool 143 fnmatch_pattern_has_wildcards (const char *str, int options) 144 { 145 while (1) 146 { 147 switch (*str++) 148 { 149 case '.': 150 case '{': 151 case '}': 152 case '(': 153 case ')': 154 if (options & EXCLUDE_REGEX) 155 return true; 156 break; 157 158 case '\\': 159 if (options & EXCLUDE_REGEX) 160 continue; 161 else 162 str += ! (options & FNM_NOESCAPE) && *str; 163 break; 164 165 case '+': case '@': case '!': 166 if (options & FNM_EXTMATCH && *str == '(') 167 return true; 168 break; 169 170 case '?': case '*': case '[': 171 return true; 172 173 case '\0': 174 return false; 175 } 176 } 177 } 178 179 static void 180 unescape_pattern (char *str) 181 { 182 char const *q = str; 183 do 184 q += *q == '\\' && q[1]; 185 while ((*str++ = *q++)); 186 } 187 188 /* Return a newly allocated and empty exclude list. */ 189 190 struct exclude * 191 new_exclude (void) 192 { 193 return xzalloc (sizeof *new_exclude ()); 194 } 195 196 /* Calculate the hash of string. */ 197 static size_t 198 string_hasher (void const *data, size_t n_buckets) 199 { 200 char const *p = data; 201 return hash_string (p, n_buckets); 202 } 203 204 /* Ditto, for case-insensitive hashes */ 205 static size_t 206 string_hasher_ci (void const *data, size_t n_buckets) 207 { 208 char const *p = data; 209 mbui_iterator_t iter; 210 size_t value = 0; 211 212 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter)) 213 { 214 mbchar_t m = mbui_cur (iter); 215 wchar_t wc; 216 217 if (m.wc_valid) 218 wc = towlower (m.wc); 219 else 220 wc = *m.ptr; 221 222 value = (value * 31 + wc) % n_buckets; 223 } 224 225 return value; 226 } 227 228 /* compare two strings for equality */ 229 static bool 230 string_compare (void const *data1, void const *data2) 231 { 232 char const *p1 = data1; 233 char const *p2 = data2; 234 return strcmp (p1, p2) == 0; 235 } 236 237 /* compare two strings for equality, case-insensitive */ 238 static bool 239 string_compare_ci (void const *data1, void const *data2) 240 { 241 char const *p1 = data1; 242 char const *p2 = data2; 243 return mbscasecmp (p1, p2) == 0; 244 } 245 246 static void 247 string_free (void *data) 248 { 249 free (data); 250 } 251 252 /* Create new exclude segment of given TYPE and OPTIONS, and attach it 253 to the head of EX. */ 254 static void 255 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options) 256 { 257 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment)); 258 sp->type = type; 259 sp->options = options; 260 switch (type) 261 { 262 case exclude_pattern: 263 break; 264 265 case exclude_hash: 266 sp->v.table = hash_initialize (0, NULL, 267 (options & FNM_CASEFOLD) ? 268 string_hasher_ci 269 : string_hasher, 270 (options & FNM_CASEFOLD) ? 271 string_compare_ci 272 : string_compare, 273 string_free); 274 break; 275 } 276 sp->next = ex->head; 277 ex->head = sp; 278 } 279 280 /* Free a single exclude segment */ 281 static void 282 free_exclude_segment (struct exclude_segment *seg) 283 { 284 size_t i; 285 286 switch (seg->type) 287 { 288 case exclude_pattern: 289 for (i = 0; i < seg->v.pat.exclude_count; i++) 290 { 291 if (seg->v.pat.exclude[i].options & EXCLUDE_REGEX) 292 regfree (&seg->v.pat.exclude[i].v.re); 293 } 294 free (seg->v.pat.exclude); 295 break; 296 297 case exclude_hash: 298 hash_free (seg->v.table); 299 break; 300 } 301 free (seg); 302 } 303 304 /* Free the storage associated with an exclude list. */ 305 void 306 free_exclude (struct exclude *ex) 307 { 308 struct exclude_segment *seg; 309 struct pattern_buffer *pbuf; 310 311 for (seg = ex->head; seg; ) 312 { 313 struct exclude_segment *next = seg->next; 314 free_exclude_segment (seg); 315 seg = next; 316 } 317 318 for (pbuf = ex->patbuf; pbuf; ) 319 { 320 struct pattern_buffer *next = pbuf->next; 321 free (pbuf->base); 322 free (pbuf); 323 pbuf = next; 324 } 325 326 free (ex); 327 } 328 329 /* Return zero if PATTERN matches F, obeying OPTIONS, except that 330 (unlike fnmatch) wildcards are disabled in PATTERN. */ 331 332 static int 333 fnmatch_no_wildcards (char const *pattern, char const *f, int options) 334 { 335 if (! (options & FNM_LEADING_DIR)) 336 return ((options & FNM_CASEFOLD) 337 ? mbscasecmp (pattern, f) 338 : strcmp (pattern, f)); 339 else if (! (options & FNM_CASEFOLD)) 340 { 341 size_t patlen = strlen (pattern); 342 int r = strncmp (pattern, f, patlen); 343 if (! r) 344 { 345 r = f[patlen]; 346 if (r == '/') 347 r = 0; 348 } 349 return r; 350 } 351 else 352 { 353 /* Walk through a copy of F, seeing whether P matches any prefix 354 of F. 355 356 FIXME: This is an O(N**2) algorithm; it should be O(N). 357 Also, the copy should not be necessary. However, fixing this 358 will probably involve a change to the mbs* API. */ 359 360 char *fcopy = xstrdup (f); 361 char *p; 362 int r; 363 for (p = fcopy; ; *p++ = '/') 364 { 365 p = strchr (p, '/'); 366 if (p) 367 *p = '\0'; 368 r = mbscasecmp (pattern, fcopy); 369 if (!p || r <= 0) 370 break; 371 } 372 free (fcopy); 373 return r; 374 } 375 } 376 377 bool 378 exclude_fnmatch (char const *pattern, char const *f, int options) 379 { 380 int (*matcher) (char const *, char const *, int) = 381 (options & EXCLUDE_WILDCARDS 382 ? fnmatch 383 : fnmatch_no_wildcards); 384 bool matched = ((*matcher) (pattern, f, options) == 0); 385 char const *p; 386 387 if (! (options & EXCLUDE_ANCHORED)) 388 for (p = f; *p && ! matched; p++) 389 if (*p == '/' && p[1] != '/') 390 matched = ((*matcher) (pattern, p + 1, options) == 0); 391 392 return matched; 393 } 394 395 static bool 396 exclude_patopts (struct patopts const *opts, char const *f) 397 { 398 int options = opts->options; 399 400 return (options & EXCLUDE_REGEX) 401 ? regexec (&opts->v.re, f, 0, NULL, 0) == 0 402 : exclude_fnmatch (opts->v.pattern, f, options); 403 } 404 405 /* Return true if the exclude_pattern segment SEG matches F. */ 406 407 static bool 408 file_pattern_matches (struct exclude_segment const *seg, char const *f) 409 { 410 size_t exclude_count = seg->v.pat.exclude_count; 411 struct patopts const *exclude = seg->v.pat.exclude; 412 size_t i; 413 414 for (i = 0; i < exclude_count; i++) 415 { 416 if (exclude_patopts (exclude + i, f)) 417 return true; 418 } 419 return false; 420 } 421 422 /* Return true if the exclude_hash segment SEG matches F. 423 BUFFER is an auxiliary storage of the same length as F (with nul 424 terminator included) */ 425 static bool 426 file_name_matches (struct exclude_segment const *seg, char const *f, 427 char *buffer) 428 { 429 int options = seg->options; 430 Hash_table *table = seg->v.table; 431 432 do 433 { 434 /* initialize the pattern */ 435 strcpy (buffer, f); 436 437 while (1) 438 { 439 if (hash_lookup (table, buffer)) 440 return true; 441 if (options & FNM_LEADING_DIR) 442 { 443 char *p = strrchr (buffer, '/'); 444 if (p) 445 { 446 *p = 0; 447 continue; 448 } 449 } 450 break; 451 } 452 453 if (!(options & EXCLUDE_ANCHORED)) 454 { 455 f = strchr (f, '/'); 456 if (f) 457 f++; 458 } 459 else 460 break; 461 } 462 while (f); 463 464 return false; 465 } 466 467 /* Return true if EX excludes F. */ 468 469 bool 470 excluded_file_name (struct exclude const *ex, char const *f) 471 { 472 struct exclude_segment *seg; 473 bool invert = false; 474 char *filename = NULL; 475 476 /* If no patterns are given, the default is to include. */ 477 if (!ex->head) 478 return false; 479 480 /* Scan through the segments, reporting the status of the first match. 481 The segments are in reverse order, so this reports the status of 482 the last match in the original option list. */ 483 for (seg = ex->head; ; seg = seg->next) 484 { 485 if (seg->type == exclude_hash) 486 { 487 if (!filename) 488 filename = xmalloc (strlen (f) + 1); 489 if (file_name_matches (seg, f, filename)) 490 break; 491 } 492 else 493 { 494 if (file_pattern_matches (seg, f)) 495 break; 496 } 497 498 if (! seg->next) 499 { 500 /* If patterns are given but none match, the default is the 501 opposite of the last segment (i.e., the first in the 502 original option list). For example, in the command 503 'grep -r --exclude="a*" --include="*b" pat dir', the 504 first option is --exclude so any file name matching 505 neither a* nor *b is included. */ 506 invert = true; 507 break; 508 } 509 } 510 511 free (filename); 512 return invert ^ ! (seg->options & EXCLUDE_INCLUDE); 513 } 514 515 /* Append to EX the exclusion PATTERN with OPTIONS. */ 516 517 void 518 add_exclude (struct exclude *ex, char const *pattern, int options) 519 { 520 struct exclude_segment *seg; 521 struct exclude_pattern *pat; 522 struct patopts *patopts; 523 524 if ((options & (EXCLUDE_REGEX|EXCLUDE_WILDCARDS)) 525 && fnmatch_pattern_has_wildcards (pattern, options)) 526 { 527 if (! (ex->head && ex->head->type == exclude_pattern 528 && ((ex->head->options & EXCLUDE_INCLUDE) 529 == (options & EXCLUDE_INCLUDE)))) 530 new_exclude_segment (ex, exclude_pattern, options); 531 532 seg = ex->head; 533 534 pat = &seg->v.pat; 535 if (pat->exclude_count == pat->exclude_alloc) 536 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc, 537 sizeof *pat->exclude); 538 patopts = &pat->exclude[pat->exclude_count++]; 539 540 patopts->options = options; 541 if (options & EXCLUDE_REGEX) 542 { 543 int rc; 544 int cflags = REG_NOSUB|REG_EXTENDED| 545 ((options & FNM_CASEFOLD) ? REG_ICASE : 0); 546 547 if (options & FNM_LEADING_DIR) 548 { 549 char *tmp; 550 size_t len = strlen (pattern); 551 552 while (len > 0 && ISSLASH (pattern[len-1])) 553 --len; 554 555 if (len == 0) 556 rc = 1; 557 else 558 { 559 tmp = xmalloc (len + 7); 560 memcpy (tmp, pattern, len); 561 strcpy (tmp + len, "(/.*)?"); 562 rc = regcomp (&patopts->v.re, tmp, cflags); 563 free (tmp); 564 } 565 } 566 else 567 rc = regcomp (&patopts->v.re, pattern, cflags); 568 569 if (rc) 570 { 571 pat->exclude_count--; 572 return; 573 } 574 } 575 else 576 { 577 if (options & EXCLUDE_ALLOC) 578 { 579 pattern = xstrdup (pattern); 580 exclude_add_pattern_buffer (ex, (char*) pattern); 581 } 582 patopts->v.pattern = pattern; 583 } 584 } 585 else 586 { 587 char *str, *p; 588 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED 589 | FNM_LEADING_DIR | FNM_CASEFOLD); 590 if (! (ex->head && ex->head->type == exclude_hash 591 && ((ex->head->options & exclude_hash_flags) 592 == (options & exclude_hash_flags)))) 593 new_exclude_segment (ex, exclude_hash, options); 594 seg = ex->head; 595 596 str = xstrdup (pattern); 597 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS) 598 unescape_pattern (str); 599 p = hash_insert (seg->v.table, str); 600 if (p != str) 601 free (str); 602 } 603 } 604 605 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with 606 OPTIONS. LINE_END terminates each pattern in the file. If 607 LINE_END is a space character, ignore trailing spaces and empty 608 lines in FP. Return -1 on failure, 0 on success. */ 609 610 int 611 add_exclude_fp (void (*add_func) (struct exclude *, char const *, int, void *), 612 struct exclude *ex, FILE *fp, int options, 613 char line_end, 614 void *data) 615 { 616 char *buf = NULL; 617 char *p; 618 char *pattern; 619 char const *lim; 620 size_t buf_alloc = 0; 621 size_t buf_count = 0; 622 int c; 623 int e = 0; 624 625 while ((c = getc (fp)) != EOF) 626 { 627 if (buf_count == buf_alloc) 628 buf = x2realloc (buf, &buf_alloc); 629 buf[buf_count++] = c; 630 } 631 632 if (ferror (fp)) 633 e = errno; 634 635 buf = xrealloc (buf, buf_count + 1); 636 buf[buf_count] = line_end; 637 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end); 638 639 exclude_add_pattern_buffer (ex, buf); 640 641 pattern = buf; 642 643 for (p = buf; p < lim; p++) 644 if (*p == line_end) 645 { 646 char *pattern_end = p; 647 648 if (isspace ((unsigned char) line_end)) 649 { 650 for (; ; pattern_end--) 651 if (pattern_end == pattern) 652 goto next_pattern; 653 else if (! isspace ((unsigned char) pattern_end[-1])) 654 break; 655 } 656 657 *pattern_end = '\0'; 658 (*add_func) (ex, pattern, options, data); 659 660 next_pattern: 661 pattern = p + 1; 662 } 663 664 errno = e; 665 return e ? -1 : 0; 666 } 667 668 static void 669 call_addfn (struct exclude *ex, char const *pattern, int options, void *data) 670 { 671 void (**addfnptr) (struct exclude *, char const *, int) = data; 672 (*addfnptr) (ex, pattern, options); 673 } 674 675 int 676 add_exclude_file (void (*add_func) (struct exclude *, char const *, int), 677 struct exclude *ex, char const *file_name, int options, 678 char line_end) 679 { 680 bool use_stdin = file_name[0] == '-' && !file_name[1]; 681 FILE *in; 682 int rc = 0; 683 684 if (use_stdin) 685 in = stdin; 686 else if (! (in = fopen (file_name, "r"))) 687 return -1; 688 689 rc = add_exclude_fp (call_addfn, ex, in, options, line_end, &add_func); 690 691 if (!use_stdin && fclose (in) != 0) 692 rc = -1; 693 694 return rc; 695 } 696