1 /* exclude.c -- exclude file names 2 3 Copyright (C) 1992, 1993, 1994, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 4 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 /* Written by Paul Eggert <eggert@twinsun.com> 20 and Sergey Poznyakoff <gray@gnu.org>. 21 Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk> 22 for improvement suggestions. */ 23 24 #include <config.h> 25 26 #include <stdbool.h> 27 28 #include <ctype.h> 29 #include <errno.h> 30 #include <stddef.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <wctype.h> 35 36 #include "exclude.h" 37 #include "hash.h" 38 #include "mbuiter.h" 39 #include "fnmatch.h" 40 #include "xalloc.h" 41 #include "verify.h" 42 43 #if USE_UNLOCKED_IO 44 # include "unlocked-io.h" 45 #endif 46 47 /* Non-GNU systems lack these options, so we don't need to check them. */ 48 #ifndef FNM_CASEFOLD 49 # define FNM_CASEFOLD 0 50 #endif 51 #ifndef FNM_EXTMATCH 52 # define FNM_EXTMATCH 0 53 #endif 54 #ifndef FNM_LEADING_DIR 55 # define FNM_LEADING_DIR 0 56 #endif 57 58 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS) 59 & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR 60 | FNM_CASEFOLD | FNM_EXTMATCH)) 61 == 0); 62 63 64 /* Exclusion patterns are grouped into a singly-linked list of 65 "exclusion segments". Each segment represents a set of patterns 66 that can be matches using the same algorithm. Non-wildcard 67 patterns are kept in hash tables, to speed up searches. Wildcard 68 patterns are stored as arrays of patterns. */ 69 70 71 /* An exclude pattern-options pair. The options are fnmatch options 72 ORed with EXCLUDE_* options. */ 73 74 struct patopts 75 { 76 char const *pattern; 77 int options; 78 }; 79 80 /* An array of pattern-options pairs. */ 81 82 struct exclude_pattern 83 { 84 struct patopts *exclude; 85 size_t exclude_alloc; 86 size_t exclude_count; 87 }; 88 89 enum exclude_type 90 { 91 exclude_hash, /* a hash table of excluded names */ 92 exclude_pattern /* an array of exclude patterns */ 93 }; 94 95 struct exclude_segment 96 { 97 struct exclude_segment *next; /* next segment in list */ 98 enum exclude_type type; /* type of this segment */ 99 int options; /* common options for this segment */ 100 union 101 { 102 Hash_table *table; /* for type == exclude_hash */ 103 struct exclude_pattern pat; /* for type == exclude_pattern */ 104 } v; 105 }; 106 107 /* The exclude structure keeps a singly-linked list of exclude segments */ 108 struct exclude 109 { 110 struct exclude_segment *head, *tail; 111 }; 112 113 /* Return true if str has wildcard characters */ 114 bool 115 fnmatch_pattern_has_wildcards (const char *str, int options) 116 { 117 const char *cset = "\\?*[]"; 118 if (options & FNM_NOESCAPE) 119 cset++; 120 while (*str) 121 { 122 size_t n = strcspn (str, cset); 123 if (str[n] == 0) 124 break; 125 else if (str[n] == '\\') 126 { 127 str += n + 1; 128 if (*str) 129 str++; 130 } 131 else 132 return true; 133 } 134 return false; 135 } 136 137 /* Return a newly allocated and empty exclude list. */ 138 139 struct exclude * 140 new_exclude (void) 141 { 142 return xzalloc (sizeof *new_exclude ()); 143 } 144 145 /* Calculate the hash of string. */ 146 static size_t 147 string_hasher (void const *data, size_t n_buckets) 148 { 149 char const *p = data; 150 return hash_string (p, n_buckets); 151 } 152 153 /* Ditto, for case-insensitive hashes */ 154 static size_t 155 string_hasher_ci (void const *data, size_t n_buckets) 156 { 157 char const *p = data; 158 mbui_iterator_t iter; 159 size_t value = 0; 160 161 for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter)) 162 { 163 mbchar_t m = mbui_cur (iter); 164 wchar_t wc; 165 166 if (m.wc_valid) 167 wc = towlower (m.wc); 168 else 169 wc = *m.ptr; 170 171 value = (value * 31 + wc) % n_buckets; 172 } 173 174 return value; 175 } 176 177 /* compare two strings for equality */ 178 static bool 179 string_compare (void const *data1, void const *data2) 180 { 181 char const *p1 = data1; 182 char const *p2 = data2; 183 return strcmp (p1, p2) == 0; 184 } 185 186 /* compare two strings for equality, case-insensitive */ 187 static bool 188 string_compare_ci (void const *data1, void const *data2) 189 { 190 char const *p1 = data1; 191 char const *p2 = data2; 192 return mbscasecmp (p1, p2) == 0; 193 } 194 195 static void 196 string_free (void *data) 197 { 198 free (data); 199 } 200 201 /* Create new exclude segment of given TYPE and OPTIONS, and attach it 202 to the tail of list in EX */ 203 static struct exclude_segment * 204 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options) 205 { 206 struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment)); 207 sp->type = type; 208 sp->options = options; 209 switch (type) 210 { 211 case exclude_pattern: 212 break; 213 214 case exclude_hash: 215 sp->v.table = hash_initialize (0, NULL, 216 (options & FNM_CASEFOLD) ? 217 string_hasher_ci 218 : string_hasher, 219 (options & FNM_CASEFOLD) ? 220 string_compare_ci 221 : string_compare, 222 string_free); 223 break; 224 } 225 if (ex->tail) 226 ex->tail->next = sp; 227 else 228 ex->head = sp; 229 ex->tail = sp; 230 return sp; 231 } 232 233 /* Free a single exclude segment */ 234 static void 235 free_exclude_segment (struct exclude_segment *seg) 236 { 237 switch (seg->type) 238 { 239 case exclude_pattern: 240 free (seg->v.pat.exclude); 241 break; 242 243 case exclude_hash: 244 hash_free (seg->v.table); 245 break; 246 } 247 free (seg); 248 } 249 250 /* Free the storage associated with an exclude list. */ 251 void 252 free_exclude (struct exclude *ex) 253 { 254 struct exclude_segment *seg; 255 for (seg = ex->head; seg; ) 256 { 257 struct exclude_segment *next = seg->next; 258 free_exclude_segment (seg); 259 seg = next; 260 } 261 free (ex); 262 } 263 264 /* Return zero if PATTERN matches F, obeying OPTIONS, except that 265 (unlike fnmatch) wildcards are disabled in PATTERN. */ 266 267 static int 268 fnmatch_no_wildcards (char const *pattern, char const *f, int options) 269 { 270 if (! (options & FNM_LEADING_DIR)) 271 return ((options & FNM_CASEFOLD) 272 ? mbscasecmp (pattern, f) 273 : strcmp (pattern, f)); 274 else if (! (options & FNM_CASEFOLD)) 275 { 276 size_t patlen = strlen (pattern); 277 int r = strncmp (pattern, f, patlen); 278 if (! r) 279 { 280 r = f[patlen]; 281 if (r == '/') 282 r = 0; 283 } 284 return r; 285 } 286 else 287 { 288 /* Walk through a copy of F, seeing whether P matches any prefix 289 of F. 290 291 FIXME: This is an O(N**2) algorithm; it should be O(N). 292 Also, the copy should not be necessary. However, fixing this 293 will probably involve a change to the mbs* API. */ 294 295 char *fcopy = xstrdup (f); 296 char *p; 297 int r; 298 for (p = fcopy; ; *p++ = '/') 299 { 300 p = strchr (p, '/'); 301 if (p) 302 *p = '\0'; 303 r = mbscasecmp (pattern, fcopy); 304 if (!p || r <= 0) 305 break; 306 } 307 free (fcopy); 308 return r; 309 } 310 } 311 312 bool 313 exclude_fnmatch (char const *pattern, char const *f, int options) 314 { 315 int (*matcher) (char const *, char const *, int) = 316 (options & EXCLUDE_WILDCARDS 317 ? fnmatch 318 : fnmatch_no_wildcards); 319 bool matched = ((*matcher) (pattern, f, options) == 0); 320 char const *p; 321 322 if (! (options & EXCLUDE_ANCHORED)) 323 for (p = f; *p && ! matched; p++) 324 if (*p == '/' && p[1] != '/') 325 matched = ((*matcher) (pattern, p + 1, options) == 0); 326 327 return matched; 328 } 329 330 /* Return true if the exclude_pattern segment SEG excludes F. */ 331 332 static bool 333 excluded_file_pattern_p (struct exclude_segment const *seg, char const *f) 334 { 335 size_t exclude_count = seg->v.pat.exclude_count; 336 struct patopts const *exclude = seg->v.pat.exclude; 337 size_t i; 338 bool excluded = !! (exclude[0].options & EXCLUDE_INCLUDE); 339 340 /* Scan through the options, until they change excluded */ 341 for (i = 0; i < exclude_count; i++) 342 { 343 char const *pattern = exclude[i].pattern; 344 int options = exclude[i].options; 345 if (exclude_fnmatch (pattern, f, options)) 346 return !excluded; 347 } 348 return excluded; 349 } 350 351 /* Return true if the exclude_hash segment SEG excludes F. 352 BUFFER is an auxiliary storage of the same length as F (with nul 353 terminator included) */ 354 static bool 355 excluded_file_name_p (struct exclude_segment const *seg, char const *f, 356 char *buffer) 357 { 358 int options = seg->options; 359 bool excluded = !! (options & EXCLUDE_INCLUDE); 360 Hash_table *table = seg->v.table; 361 362 do 363 { 364 /* initialize the pattern */ 365 strcpy (buffer, f); 366 367 while (1) 368 { 369 if (hash_lookup (table, buffer)) 370 return !excluded; 371 if (options & FNM_LEADING_DIR) 372 { 373 char *p = strrchr (buffer, '/'); 374 if (p) 375 { 376 *p = 0; 377 continue; 378 } 379 } 380 break; 381 } 382 383 if (!(options & EXCLUDE_ANCHORED)) 384 { 385 f = strchr (f, '/'); 386 if (f) 387 f++; 388 } 389 else 390 break; 391 } 392 while (f); 393 return excluded; 394 } 395 396 /* Return true if EX excludes F. */ 397 398 bool 399 excluded_file_name (struct exclude const *ex, char const *f) 400 { 401 struct exclude_segment *seg; 402 bool excluded; 403 char *filename = NULL; 404 405 /* If no patterns are given, the default is to include. */ 406 if (!ex->head) 407 return false; 408 409 /* Otherwise, the default is the opposite of the first option. */ 410 excluded = !! (ex->head->options & EXCLUDE_INCLUDE); 411 /* Scan through the segments, seeing whether they change status from 412 excluded to included or vice versa. */ 413 for (seg = ex->head; seg; seg = seg->next) 414 { 415 bool rc; 416 417 switch (seg->type) 418 { 419 case exclude_pattern: 420 rc = excluded_file_pattern_p (seg, f); 421 break; 422 423 case exclude_hash: 424 if (!filename) 425 filename = xmalloc (strlen (f) + 1); 426 rc = excluded_file_name_p (seg, f, filename); 427 break; 428 429 default: 430 abort (); 431 } 432 if (rc != excluded) 433 { 434 excluded = rc; 435 break; 436 } 437 } 438 free (filename); 439 return excluded; 440 } 441 442 /* Append to EX the exclusion PATTERN with OPTIONS. */ 443 444 void 445 add_exclude (struct exclude *ex, char const *pattern, int options) 446 { 447 struct exclude_segment *seg; 448 449 if ((options & EXCLUDE_WILDCARDS) 450 && fnmatch_pattern_has_wildcards (pattern, options)) 451 { 452 struct exclude_pattern *pat; 453 struct patopts *patopts; 454 455 if (ex->tail && ex->tail->type == exclude_pattern 456 && ((ex->tail->options & EXCLUDE_INCLUDE) == 457 (options & EXCLUDE_INCLUDE))) 458 seg = ex->tail; 459 else 460 seg = new_exclude_segment (ex, exclude_pattern, options); 461 462 pat = &seg->v.pat; 463 if (pat->exclude_count == pat->exclude_alloc) 464 pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc, 465 sizeof *pat->exclude); 466 patopts = &pat->exclude[pat->exclude_count++]; 467 patopts->pattern = pattern; 468 patopts->options = options; 469 } 470 else 471 { 472 char *str, *p; 473 #define EXCLUDE_HASH_FLAGS (EXCLUDE_INCLUDE|EXCLUDE_ANCHORED|\ 474 FNM_LEADING_DIR|FNM_CASEFOLD) 475 if (ex->tail && ex->tail->type == exclude_hash 476 && ((ex->tail->options & EXCLUDE_HASH_FLAGS) == 477 (options & EXCLUDE_HASH_FLAGS))) 478 seg = ex->tail; 479 else 480 seg = new_exclude_segment (ex, exclude_hash, options); 481 482 str = xstrdup (pattern); 483 p = hash_insert (seg->v.table, str); 484 if (p != str) 485 free (str); 486 } 487 } 488 489 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with 490 OPTIONS. LINE_END terminates each pattern in the file. If 491 LINE_END is a space character, ignore trailing spaces and empty 492 lines in FILE. Return -1 on failure, 0 on success. */ 493 494 int 495 add_exclude_file (void (*add_func) (struct exclude *, char const *, int), 496 struct exclude *ex, char const *file_name, int options, 497 char line_end) 498 { 499 bool use_stdin = file_name[0] == '-' && !file_name[1]; 500 FILE *in; 501 char *buf = NULL; 502 char *p; 503 char const *pattern; 504 char const *lim; 505 size_t buf_alloc = 0; 506 size_t buf_count = 0; 507 int c; 508 int e = 0; 509 510 if (use_stdin) 511 in = stdin; 512 else if (! (in = fopen (file_name, "r"))) 513 return -1; 514 515 while ((c = getc (in)) != EOF) 516 { 517 if (buf_count == buf_alloc) 518 buf = x2realloc (buf, &buf_alloc); 519 buf[buf_count++] = c; 520 } 521 522 if (ferror (in)) 523 e = errno; 524 525 if (!use_stdin && fclose (in) != 0) 526 e = errno; 527 528 buf = xrealloc (buf, buf_count + 1); 529 buf[buf_count] = line_end; 530 lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end); 531 pattern = buf; 532 533 for (p = buf; p < lim; p++) 534 if (*p == line_end) 535 { 536 char *pattern_end = p; 537 538 if (isspace ((unsigned char) line_end)) 539 { 540 for (; ; pattern_end--) 541 if (pattern_end == pattern) 542 goto next_pattern; 543 else if (! isspace ((unsigned char) pattern_end[-1])) 544 break; 545 } 546 547 *pattern_end = '\0'; 548 (*add_func) (ex, pattern, options); 549 550 next_pattern: 551 pattern = p + 1; 552 } 553 554 errno = e; 555 return e ? -1 : 0; 556 } 557