1 /* exclude.c -- exclude file names 2 3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2013 Free Software 4 Foundation, Inc. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 /* Written by Paul Eggert <eggert@twinsun.com> 20 and Sergey Poznyakoff <gray@gnu.org>. 21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk> 22 for improvement suggestions. */ 23 24 #include <config.h> 25 26 #include <stdbool.h> 27 28 #include <ctype.h> 29 #include <errno.h> 30 #include <stddef.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <wctype.h> 35 36 #include "exclude.h" 37 #include "hash.h" 38 #include "mbuiter.h" 39 #include "fnmatch.h" 40 #include "xalloc.h" 41 #include "verify.h" 42 43 #if USE_UNLOCKED_IO 44 # include "unlocked-io.h" 45 #endif 46 47 /* Non-GNU systems lack these options, so we don't need to check them. */ 48 #ifndef FNM_CASEFOLD 49 # define FNM_CASEFOLD 0 50 #endif 51 #ifndef FNM_EXTMATCH 52 # define FNM_EXTMATCH 0 53 #endif 54 #ifndef FNM_LEADING_DIR 55 # define FNM_LEADING_DIR 0 56 #endif 57 58 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) 59 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR 60 | FNM_CASEFOLD | FNM_EXTMATCH)) 61 == 0); 62 63 64 /* Exclusion patterns are grouped into a singly-linked list of 65 "exclusion segments". Each segment represents a set of patterns 66 that can be matches using the same algorithm. Non-wildcard 67 patterns are kept in hash tables, to speed up searches. Wildcard 68 patterns are stored as arrays of patterns. */ 69 70 71 /* An exclude pattern-options pair. The options are fnmatch options 72 ORed with EXCLUDE_* options. */ 73 74 struct patopts 75 { 76 char const *pattern; 77 int options; 78 }; 79 80 /* An array of pattern-options pairs. */ 81 82 struct exclude_pattern 83 { 84 struct patopts *exclude; 85 size_t exclude_alloc; 86 size_t exclude_count; 87 }; 88 89 enum exclude_type 90 { 91 exclude_hash, /* a hash table of excluded names */ 92 exclude_pattern /* an array of exclude patterns */ 93 }; 94 95 struct exclude_segment 96 { 97 struct exclude_segment *next; /* next segment in list */ 98 enum exclude_type type; /* type of this segment */ 99 int options; /* common options for this segment */ 100 union 101 { 102 Hash_table *table; /* for type == exclude_hash */ 103 struct exclude_pattern pat; /* for type == exclude_pattern */ 104 } v; 105 }; 106 107 /* The exclude structure keeps a singly-linked list of exclude segments, 108 maintained in reverse order. */ 109 struct exclude 110 { 111 struct exclude_segment *head; 112 }; 113 114 /* Return true if STR has or may have wildcards, when matched with OPTIONS. 115 Return false if STR definitely does not have wildcards. */ 116 bool 117 fnmatch_pattern_has_wildcards (const char *str, int options) 118 { 119 while (1) 120 { 121 switch (*str++) 122 { 123 case '\\': 124 str += ! (options & FNM_NOESCAPE) && *str; 125 break; 126 127 case '+': case '@': case '!': 128 if (options & FNM_EXTMATCH && *str == '(') 129 return true; 130 break; 131 132 case '?': case '*': case '[': 133 return true; 134 135 case '\0': 136 return false; 137 } 138 } 139 } 140 141 static void 142 unescape_pattern (char *str) 143 { 144 char const *q = str; 145 do 146 q += *q == '\\' && q[1]; 147 while ((*str++ = *q++)); 148 } 149 150 /* Return a newly allocated and empty exclude list. */ 151 152 struct exclude * 153 new_exclude (void) 154 { 155 return xzalloc (sizeof *new_exclude ()); 156 } 157 158 /* Calculate the hash of string. */ 159 static size_t 160 string_hasher (void const *data, size_t n_buckets) 161 { 162 char const *p = data; 163 return hash_string (p, n_buckets); 164 } 165 166 /* Ditto, for case-insensitive hashes */ 167 static size_t 168 string_hasher_ci (void const *data, size_t n_buckets) 169 { 170 char const *p = data; 171 mbui_iterator_t iter; 172 size_t value = 0; 173 174 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter)) 175 { 176 mbchar_t m = mbui_cur (iter); 177 wchar_t wc; 178 179 if (m.wc_valid) 180 wc = towlower (m.wc); 181 else 182 wc = *m.ptr; 183 184 value = (value * 31 + wc) % n_buckets; 185 } 186 187 return value; 188 } 189 190 /* compare two strings for equality */ 191 static bool 192 string_compare (void const *data1, void const *data2) 193 { 194 char const *p1 = data1; 195 char const *p2 = data2; 196 return strcmp (p1, p2) == 0; 197 } 198 199 /* compare two strings for equality, case-insensitive */ 200 static bool 201 string_compare_ci (void const *data1, void const *data2) 202 { 203 char const *p1 = data1; 204 char const *p2 = data2; 205 return mbscasecmp (p1, p2) == 0; 206 } 207 208 static void 209 string_free (void *data) 210 { 211 free (data); 212 } 213 214 /* Create new exclude segment of given TYPE and OPTIONS, and attach it 215 to the head of EX. */ 216 static void 217 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options) 218 { 219 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment)); 220 sp->type = type; 221 sp->options = options; 222 switch (type) 223 { 224 case exclude_pattern: 225 break; 226 227 case exclude_hash: 228 sp->v.table = hash_initialize (0, NULL, 229 (options & FNM_CASEFOLD) ? 230 string_hasher_ci 231 : string_hasher, 232 (options & FNM_CASEFOLD) ? 233 string_compare_ci 234 : string_compare, 235 string_free); 236 break; 237 } 238 sp->next = ex->head; 239 ex->head = sp; 240 } 241 242 /* Free a single exclude segment */ 243 static void 244 free_exclude_segment (struct exclude_segment *seg) 245 { 246 switch (seg->type) 247 { 248 case exclude_pattern: 249 free (seg->v.pat.exclude); 250 break; 251 252 case exclude_hash: 253 hash_free (seg->v.table); 254 break; 255 } 256 free (seg); 257 } 258 259 /* Free the storage associated with an exclude list. */ 260 void 261 free_exclude (struct exclude *ex) 262 { 263 struct exclude_segment *seg; 264 for (seg = ex->head; seg; ) 265 { 266 struct exclude_segment *next = seg->next; 267 free_exclude_segment (seg); 268 seg = next; 269 } 270 free (ex); 271 } 272 273 /* Return zero if PATTERN matches F, obeying OPTIONS, except that 274 (unlike fnmatch) wildcards are disabled in PATTERN. */ 275 276 static int 277 fnmatch_no_wildcards (char const *pattern, char const *f, int options) 278 { 279 if (! (options & FNM_LEADING_DIR)) 280 return ((options & FNM_CASEFOLD) 281 ? mbscasecmp (pattern, f) 282 : strcmp (pattern, f)); 283 else if (! (options & FNM_CASEFOLD)) 284 { 285 size_t patlen = strlen (pattern); 286 int r = strncmp (pattern, f, patlen); 287 if (! r) 288 { 289 r = f[patlen]; 290 if (r == '/') 291 r = 0; 292 } 293 return r; 294 } 295 else 296 { 297 /* Walk through a copy of F, seeing whether P matches any prefix 298 of F. 299 300 FIXME: This is an O(N**2) algorithm; it should be O(N). 301 Also, the copy should not be necessary. However, fixing this 302 will probably involve a change to the mbs* API. */ 303 304 char *fcopy = xstrdup (f); 305 char *p; 306 int r; 307 for (p = fcopy; ; *p++ = '/') 308 { 309 p = strchr (p, '/'); 310 if (p) 311 *p = '\0'; 312 r = mbscasecmp (pattern, fcopy); 313 if (!p || r <= 0) 314 break; 315 } 316 free (fcopy); 317 return r; 318 } 319 } 320 321 bool 322 exclude_fnmatch (char const *pattern, char const *f, int options) 323 { 324 int (*matcher) (char const *, char const *, int) = 325 (options & EXCLUDE_WILDCARDS 326 ? fnmatch 327 : fnmatch_no_wildcards); 328 bool matched = ((*matcher) (pattern, f, options) == 0); 329 char const *p; 330 331 if (! (options & EXCLUDE_ANCHORED)) 332 for (p = f; *p && ! matched; p++) 333 if (*p == '/' && p[1] != '/') 334 matched = ((*matcher) (pattern, p + 1, options) == 0); 335 336 return matched; 337 } 338 339 /* Return true if the exclude_pattern segment SEG matches F. */ 340 341 static bool 342 file_pattern_matches (struct exclude_segment const *seg, char const *f) 343 { 344 size_t exclude_count = seg->v.pat.exclude_count; 345 struct patopts const *exclude = seg->v.pat.exclude; 346 size_t i; 347 348 for (i = 0; i < exclude_count; i++) 349 { 350 char const *pattern = exclude[i].pattern; 351 int options = exclude[i].options; 352 if (exclude_fnmatch (pattern, f, options)) 353 return true; 354 } 355 return false; 356 } 357 358 /* Return true if the exclude_hash segment SEG matches F. 359 BUFFER is an auxiliary storage of the same length as F (with nul 360 terminator included) */ 361 static bool 362 file_name_matches (struct exclude_segment const *seg, char const *f, 363 char *buffer) 364 { 365 int options = seg->options; 366 Hash_table *table = seg->v.table; 367 368 do 369 { 370 /* initialize the pattern */ 371 strcpy (buffer, f); 372 373 while (1) 374 { 375 if (hash_lookup (table, buffer)) 376 return true; 377 if (options & FNM_LEADING_DIR) 378 { 379 char *p = strrchr (buffer, '/'); 380 if (p) 381 { 382 *p = 0; 383 continue; 384 } 385 } 386 break; 387 } 388 389 if (!(options & EXCLUDE_ANCHORED)) 390 { 391 f = strchr (f, '/'); 392 if (f) 393 f++; 394 } 395 else 396 break; 397 } 398 while (f); 399 400 return false; 401 } 402 403 /* Return true if EX excludes F. */ 404 405 bool 406 excluded_file_name (struct exclude const *ex, char const *f) 407 { 408 struct exclude_segment *seg; 409 bool invert = false; 410 char *filename = NULL; 411 412 /* If no patterns are given, the default is to include. */ 413 if (!ex->head) 414 return false; 415 416 /* Scan through the segments, reporting the status of the first match. 417 The segments are in reverse order, so this reports the status of 418 the last match in the original option list. */ 419 for (seg = ex->head; ; seg = seg->next) 420 { 421 if (seg->type == exclude_hash) 422 { 423 if (!filename) 424 filename = xmalloc (strlen (f) + 1); 425 if (file_name_matches (seg, f, filename)) 426 break; 427 } 428 else 429 { 430 if (file_pattern_matches (seg, f)) 431 break; 432 } 433 434 if (! seg->next) 435 { 436 /* If patterns are given but none match, the default is the 437 opposite of the last segment (i.e., the first in the 438 original option list). For example, in the command 439 'grep -r --exclude="a*" --include="*b" pat dir', the 440 first option is --exclude so any file name matching 441 neither a* nor *b is included. */ 442 invert = true; 443 break; 444 } 445 } 446 447 free (filename); 448 return invert ^ ! (seg->options & EXCLUDE_INCLUDE); 449 } 450 451 /* Append to EX the exclusion PATTERN with OPTIONS. */ 452 453 void 454 add_exclude (struct exclude *ex, char const *pattern, int options) 455 { 456 struct exclude_segment *seg; 457 458 if ((options & EXCLUDE_WILDCARDS) 459 && fnmatch_pattern_has_wildcards (pattern, options)) 460 { 461 struct exclude_pattern *pat; 462 struct patopts *patopts; 463 464 if (! (ex->head && ex->head->type == exclude_pattern 465 && ((ex->head->options & EXCLUDE_INCLUDE) 466 == (options & EXCLUDE_INCLUDE)))) 467 new_exclude_segment (ex, exclude_pattern, options); 468 seg = ex->head; 469 470 pat = &seg->v.pat; 471 if (pat->exclude_count == pat->exclude_alloc) 472 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc, 473 sizeof *pat->exclude); 474 patopts = &pat->exclude[pat->exclude_count++]; 475 patopts->pattern = pattern; 476 patopts->options = options; 477 } 478 else 479 { 480 char *str, *p; 481 int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED 482 | FNM_LEADING_DIR | FNM_CASEFOLD); 483 if (! (ex->head && ex->head->type == exclude_hash 484 && ((ex->head->options & exclude_hash_flags) 485 == (options & exclude_hash_flags)))) 486 new_exclude_segment (ex, exclude_hash, options); 487 seg = ex->head; 488 489 str = xstrdup (pattern); 490 if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS) 491 unescape_pattern (str); 492 p = hash_insert (seg->v.table, str); 493 if (p != str) 494 free (str); 495 } 496 } 497 498 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with 499 OPTIONS. LINE_END terminates each pattern in the file. If 500 LINE_END is a space character, ignore trailing spaces and empty 501 lines in FILE. Return -1 on failure, 0 on success. */ 502 503 int 504 add_exclude_file (void (*add_func) (struct exclude *, char const *, int), 505 struct exclude *ex, char const *file_name, int options, 506 char line_end) 507 { 508 bool use_stdin = file_name[0] == '-' && !file_name[1]; 509 FILE *in; 510 char *buf = NULL; 511 char *p; 512 char const *pattern; 513 char const *lim; 514 size_t buf_alloc = 0; 515 size_t buf_count = 0; 516 int c; 517 int e = 0; 518 519 if (use_stdin) 520 in = stdin; 521 else if (! (in = fopen (file_name, "r"))) 522 return -1; 523 524 while ((c = getc (in)) != EOF) 525 { 526 if (buf_count == buf_alloc) 527 buf = x2realloc (buf, &buf_alloc); 528 buf[buf_count++] = c; 529 } 530 531 if (ferror (in)) 532 e = errno; 533 534 if (!use_stdin && fclose (in) != 0) 535 e = errno; 536 537 buf = xrealloc (buf, buf_count + 1); 538 buf[buf_count] = line_end; 539 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end); 540 pattern = buf; 541 542 for (p = buf; p < lim; p++) 543 if (*p == line_end) 544 { 545 char *pattern_end = p; 546 547 if (isspace ((unsigned char) line_end)) 548 { 549 for (; ; pattern_end--) 550 if (pattern_end == pattern) 551 goto next_pattern; 552 else if (! isspace ((unsigned char) pattern_end[-1])) 553 break; 554 } 555 556 *pattern_end = '\0'; 557 (*add_func) (ex, pattern, options); 558 559 next_pattern: 560 pattern = p + 1; 561 } 562 563 errno = e; 564 return e ? -1 : 0; 565 } 566