1 /* exclude.c -- exclude file names 2 3 Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2011 Free Software 4 Foundation, Inc. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 /* Written by Paul Eggert <eggert@twinsun.com> 20 and Sergey Poznyakoff <gray@gnu.org>. 21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk> 22 for improvement suggestions. */ 23 24 #include <config.h> 25 26 #include <stdbool.h> 27 28 #include <ctype.h> 29 #include <errno.h> 30 #include <stddef.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <wctype.h> 35 36 #include "exclude.h" 37 #include "hash.h" 38 #include "mbuiter.h" 39 #include "fnmatch.h" 40 #include "xalloc.h" 41 #include "verify.h" 42 43 #if USE_UNLOCKED_IO 44 # include "unlocked-io.h" 45 #endif 46 47 /* Non-GNU systems lack these options, so we don't need to check them. */ 48 #ifndef FNM_CASEFOLD 49 # define FNM_CASEFOLD 0 50 #endif 51 #ifndef FNM_EXTMATCH 52 # define FNM_EXTMATCH 0 53 #endif 54 #ifndef FNM_LEADING_DIR 55 # define FNM_LEADING_DIR 0 56 #endif 57 58 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) 59 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR 60 | FNM_CASEFOLD | FNM_EXTMATCH)) 61 == 0); 62 63 64 /* Exclusion patterns are grouped into a singly-linked list of 65 "exclusion segments". Each segment represents a set of patterns 66 that can be matches using the same algorithm. Non-wildcard 67 patterns are kept in hash tables, to speed up searches. Wildcard 68 patterns are stored as arrays of patterns. */ 69 70 71 /* An exclude pattern-options pair. The options are fnmatch options 72 ORed with EXCLUDE_* options. */ 73 74 struct patopts 75 { 76 char const *pattern; 77 int options; 78 }; 79 80 /* An array of pattern-options pairs. */ 81 82 struct exclude_pattern 83 { 84 struct patopts *exclude; 85 size_t exclude_alloc; 86 size_t exclude_count; 87 }; 88 89 enum exclude_type 90 { 91 exclude_hash, /* a hash table of excluded names */ 92 exclude_pattern /* an array of exclude patterns */ 93 }; 94 95 struct exclude_segment 96 { 97 struct exclude_segment *next; /* next segment in list */ 98 enum exclude_type type; /* type of this segment */ 99 int options; /* common options for this segment */ 100 union 101 { 102 Hash_table *table; /* for type == exclude_hash */ 103 struct exclude_pattern pat; /* for type == exclude_pattern */ 104 } v; 105 }; 106 107 /* The exclude structure keeps a singly-linked list of exclude segments */ 108 struct exclude 109 { 110 struct exclude_segment *head, *tail; 111 }; 112 113 /* Return true if str has wildcard characters */ 114 bool 115 fnmatch_pattern_has_wildcards (const char *str, int options) 116 { 117 const char *cset = "\\?*[]"; 118 if (options & FNM_NOESCAPE) 119 cset++; 120 while (*str) 121 { 122 size_t n = strcspn (str, cset); 123 if (str[n] == 0) 124 break; 125 else if (str[n] == '\\') 126 { 127 str += n + 1; 128 if (*str) 129 str++; 130 } 131 else 132 return true; 133 } 134 return false; 135 } 136 137 static void 138 unescape_pattern (char *str) 139 { 140 int inset = 0; 141 char *q = str; 142 do 143 { 144 if (inset) 145 { 146 if (*q == ']') 147 inset = 0; 148 } 149 else if (*q == '[') 150 inset = 1; 151 else if (*q == '\\') 152 q++; 153 } 154 while ((*str++ = *q++)); 155 } 156 157 /* Return a newly allocated and empty exclude list. */ 158 159 struct exclude * 160 new_exclude (void) 161 { 162 return xzalloc (sizeof *new_exclude ()); 163 } 164 165 /* Calculate the hash of string. */ 166 static size_t 167 string_hasher (void const *data, size_t n_buckets) 168 { 169 char const *p = data; 170 return hash_string (p, n_buckets); 171 } 172 173 /* Ditto, for case-insensitive hashes */ 174 static size_t 175 string_hasher_ci (void const *data, size_t n_buckets) 176 { 177 char const *p = data; 178 mbui_iterator_t iter; 179 size_t value = 0; 180 181 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter)) 182 { 183 mbchar_t m = mbui_cur (iter); 184 wchar_t wc; 185 186 if (m.wc_valid) 187 wc = towlower (m.wc); 188 else 189 wc = *m.ptr; 190 191 value = (value * 31 + wc) % n_buckets; 192 } 193 194 return value; 195 } 196 197 /* compare two strings for equality */ 198 static bool 199 string_compare (void const *data1, void const *data2) 200 { 201 char const *p1 = data1; 202 char const *p2 = data2; 203 return strcmp (p1, p2) == 0; 204 } 205 206 /* compare two strings for equality, case-insensitive */ 207 static bool 208 string_compare_ci (void const *data1, void const *data2) 209 { 210 char const *p1 = data1; 211 char const *p2 = data2; 212 return mbscasecmp (p1, p2) == 0; 213 } 214 215 static void 216 string_free (void *data) 217 { 218 free (data); 219 } 220 221 /* Create new exclude segment of given TYPE and OPTIONS, and attach it 222 to the tail of list in EX */ 223 static struct exclude_segment * 224 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options) 225 { 226 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment)); 227 sp->type = type; 228 sp->options = options; 229 switch (type) 230 { 231 case exclude_pattern: 232 break; 233 234 case exclude_hash: 235 sp->v.table = hash_initialize (0, NULL, 236 (options & FNM_CASEFOLD) ? 237 string_hasher_ci 238 : string_hasher, 239 (options & FNM_CASEFOLD) ? 240 string_compare_ci 241 : string_compare, 242 string_free); 243 break; 244 } 245 if (ex->tail) 246 ex->tail->next = sp; 247 else 248 ex->head = sp; 249 ex->tail = sp; 250 return sp; 251 } 252 253 /* Free a single exclude segment */ 254 static void 255 free_exclude_segment (struct exclude_segment *seg) 256 { 257 switch (seg->type) 258 { 259 case exclude_pattern: 260 free (seg->v.pat.exclude); 261 break; 262 263 case exclude_hash: 264 hash_free (seg->v.table); 265 break; 266 } 267 free (seg); 268 } 269 270 /* Free the storage associated with an exclude list. */ 271 void 272 free_exclude (struct exclude *ex) 273 { 274 struct exclude_segment *seg; 275 for (seg = ex->head; seg; ) 276 { 277 struct exclude_segment *next = seg->next; 278 free_exclude_segment (seg); 279 seg = next; 280 } 281 free (ex); 282 } 283 284 /* Return zero if PATTERN matches F, obeying OPTIONS, except that 285 (unlike fnmatch) wildcards are disabled in PATTERN. */ 286 287 static int 288 fnmatch_no_wildcards (char const *pattern, char const *f, int options) 289 { 290 if (! (options & FNM_LEADING_DIR)) 291 return ((options & FNM_CASEFOLD) 292 ? mbscasecmp (pattern, f) 293 : strcmp (pattern, f)); 294 else if (! (options & FNM_CASEFOLD)) 295 { 296 size_t patlen = strlen (pattern); 297 int r = strncmp (pattern, f, patlen); 298 if (! r) 299 { 300 r = f[patlen]; 301 if (r == '/') 302 r = 0; 303 } 304 return r; 305 } 306 else 307 { 308 /* Walk through a copy of F, seeing whether P matches any prefix 309 of F. 310 311 FIXME: This is an O(N**2) algorithm; it should be O(N). 312 Also, the copy should not be necessary. However, fixing this 313 will probably involve a change to the mbs* API. */ 314 315 char *fcopy = xstrdup (f); 316 char *p; 317 int r; 318 for (p = fcopy; ; *p++ = '/') 319 { 320 p = strchr (p, '/'); 321 if (p) 322 *p = '\0'; 323 r = mbscasecmp (pattern, fcopy); 324 if (!p || r <= 0) 325 break; 326 } 327 free (fcopy); 328 return r; 329 } 330 } 331 332 bool 333 exclude_fnmatch (char const *pattern, char const *f, int options) 334 { 335 int (*matcher) (char const *, char const *, int) = 336 (options & EXCLUDE_WILDCARDS 337 ? fnmatch 338 : fnmatch_no_wildcards); 339 bool matched = ((*matcher) (pattern, f, options) == 0); 340 char const *p; 341 342 if (! (options & EXCLUDE_ANCHORED)) 343 for (p = f; *p && ! matched; p++) 344 if (*p == '/' && p[1] != '/') 345 matched = ((*matcher) (pattern, p + 1, options) == 0); 346 347 return matched; 348 } 349 350 /* Return true if the exclude_pattern segment SEG excludes F. */ 351 352 static bool 353 excluded_file_pattern_p (struct exclude_segment const *seg, char const *f) 354 { 355 size_t exclude_count = seg->v.pat.exclude_count; 356 struct patopts const *exclude = seg->v.pat.exclude; 357 size_t i; 358 bool excluded = !! (exclude[0].options & EXCLUDE_INCLUDE); 359 360 /* Scan through the options, until they change excluded */ 361 for (i = 0; i < exclude_count; i++) 362 { 363 char const *pattern = exclude[i].pattern; 364 int options = exclude[i].options; 365 if (exclude_fnmatch (pattern, f, options)) 366 return !excluded; 367 } 368 return excluded; 369 } 370 371 /* Return true if the exclude_hash segment SEG excludes F. 372 BUFFER is an auxiliary storage of the same length as F (with nul 373 terminator included) */ 374 static bool 375 excluded_file_name_p (struct exclude_segment const *seg, char const *f, 376 char *buffer) 377 { 378 int options = seg->options; 379 bool excluded = !! (options & EXCLUDE_INCLUDE); 380 Hash_table *table = seg->v.table; 381 382 do 383 { 384 /* initialize the pattern */ 385 strcpy (buffer, f); 386 387 while (1) 388 { 389 if (hash_lookup (table, buffer)) 390 return !excluded; 391 if (options & FNM_LEADING_DIR) 392 { 393 char *p = strrchr (buffer, '/'); 394 if (p) 395 { 396 *p = 0; 397 continue; 398 } 399 } 400 break; 401 } 402 403 if (!(options & EXCLUDE_ANCHORED)) 404 { 405 f = strchr (f, '/'); 406 if (f) 407 f++; 408 } 409 else 410 break; 411 } 412 while (f); 413 return excluded; 414 } 415 416 /* Return true if EX excludes F. */ 417 418 bool 419 excluded_file_name (struct exclude const *ex, char const *f) 420 { 421 struct exclude_segment *seg; 422 bool excluded; 423 char *filename = NULL; 424 425 /* If no patterns are given, the default is to include. */ 426 if (!ex->head) 427 return false; 428 429 /* Otherwise, the default is the opposite of the first option. */ 430 excluded = !! (ex->head->options & EXCLUDE_INCLUDE); 431 /* Scan through the segments, seeing whether they change status from 432 excluded to included or vice versa. */ 433 for (seg = ex->head; seg; seg = seg->next) 434 { 435 bool rc; 436 437 switch (seg->type) 438 { 439 case exclude_pattern: 440 rc = excluded_file_pattern_p (seg, f); 441 break; 442 443 case exclude_hash: 444 if (!filename) 445 filename = xmalloc (strlen (f) + 1); 446 rc = excluded_file_name_p (seg, f, filename); 447 break; 448 449 default: 450 abort (); 451 } 452 if (rc != excluded) 453 { 454 excluded = rc; 455 break; 456 } 457 } 458 free (filename); 459 return excluded; 460 } 461 462 /* Append to EX the exclusion PATTERN with OPTIONS. */ 463 464 void 465 add_exclude (struct exclude *ex, char const *pattern, int options) 466 { 467 struct exclude_segment *seg; 468 469 if ((options & EXCLUDE_WILDCARDS) 470 && fnmatch_pattern_has_wildcards (pattern, options)) 471 { 472 struct exclude_pattern *pat; 473 struct patopts *patopts; 474 475 if (ex->tail && ex->tail->type == exclude_pattern 476 && ((ex->tail->options & EXCLUDE_INCLUDE) == 477 (options & EXCLUDE_INCLUDE))) 478 seg = ex->tail; 479 else 480 seg = new_exclude_segment (ex, exclude_pattern, options); 481 482 pat = &seg->v.pat; 483 if (pat->exclude_count == pat->exclude_alloc) 484 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc, 485 sizeof *pat->exclude); 486 patopts = &pat->exclude[pat->exclude_count++]; 487 patopts->pattern = pattern; 488 patopts->options = options; 489 } 490 else 491 { 492 char *str, *p; 493 #define EXCLUDE_HASH_FLAGS (EXCLUDE_INCLUDE|EXCLUDE_ANCHORED|\ 494 FNM_LEADING_DIR|FNM_CASEFOLD) 495 if (ex->tail && ex->tail->type == exclude_hash 496 && ((ex->tail->options & EXCLUDE_HASH_FLAGS) == 497 (options & EXCLUDE_HASH_FLAGS))) 498 seg = ex->tail; 499 else 500 seg = new_exclude_segment (ex, exclude_hash, options); 501 502 str = xstrdup (pattern); 503 if (options & EXCLUDE_WILDCARDS) 504 unescape_pattern (str); 505 p = hash_insert (seg->v.table, str); 506 if (p != str) 507 free (str); 508 } 509 } 510 511 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with 512 OPTIONS. LINE_END terminates each pattern in the file. If 513 LINE_END is a space character, ignore trailing spaces and empty 514 lines in FILE. Return -1 on failure, 0 on success. */ 515 516 int 517 add_exclude_file (void (*add_func) (struct exclude *, char const *, int), 518 struct exclude *ex, char const *file_name, int options, 519 char line_end) 520 { 521 bool use_stdin = file_name[0] == '-' && !file_name[1]; 522 FILE *in; 523 char *buf = NULL; 524 char *p; 525 char const *pattern; 526 char const *lim; 527 size_t buf_alloc = 0; 528 size_t buf_count = 0; 529 int c; 530 int e = 0; 531 532 if (use_stdin) 533 in = stdin; 534 else if (! (in = fopen (file_name, "r"))) 535 return -1; 536 537 while ((c = getc (in)) != EOF) 538 { 539 if (buf_count == buf_alloc) 540 buf = x2realloc (buf, &buf_alloc); 541 buf[buf_count++] = c; 542 } 543 544 if (ferror (in)) 545 e = errno; 546 547 if (!use_stdin && fclose (in) != 0) 548 e = errno; 549 550 buf = xrealloc (buf, buf_count + 1); 551 buf[buf_count] = line_end; 552 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end); 553 pattern = buf; 554 555 for (p = buf; p < lim; p++) 556 if (*p == line_end) 557 { 558 char *pattern_end = p; 559 560 if (isspace ((unsigned char) line_end)) 561 { 562 for (; ; pattern_end--) 563 if (pattern_end == pattern) 564 goto next_pattern; 565 else if (! isspace ((unsigned char) pattern_end[-1])) 566 break; 567 } 568 569 *pattern_end = '\0'; 570 (*add_func) (ex, pattern, options); 571 572 next_pattern: 573 pattern = p + 1; 574 } 575 576 errno = e; 577 return e ? -1 : 0; 578 } 579