xref: /dragonfly/contrib/diffutils/lib/exclude.c (revision 9348a738)
1 /* exclude.c -- exclude file names
2 
3    Copyright (C) 1992-1994, 1997, 1999-2007, 2009-2013 Free Software
4    Foundation, Inc.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18 
19 /* Written by Paul Eggert <eggert@twinsun.com>
20    and Sergey Poznyakoff <gray@gnu.org>.
21    Thanks to Phil Proudman <phil@proudman51.freeserve.co.uk>
22    for improvement suggestions. */
23 
24 #include <config.h>
25 
26 #include <stdbool.h>
27 
28 #include <ctype.h>
29 #include <errno.h>
30 #include <stddef.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wctype.h>
35 
36 #include "exclude.h"
37 #include "hash.h"
38 #include "mbuiter.h"
39 #include "fnmatch.h"
40 #include "xalloc.h"
41 #include "verify.h"
42 
43 #if USE_UNLOCKED_IO
44 # include "unlocked-io.h"
45 #endif
46 
47 /* Non-GNU systems lack these options, so we don't need to check them.  */
48 #ifndef FNM_CASEFOLD
49 # define FNM_CASEFOLD 0
50 #endif
51 #ifndef FNM_EXTMATCH
52 # define FNM_EXTMATCH 0
53 #endif
54 #ifndef FNM_LEADING_DIR
55 # define FNM_LEADING_DIR 0
56 #endif
57 
58 verify (((EXCLUDE_ANCHORED | EXCLUDE_INCLUDE | EXCLUDE_WILDCARDS)
59          & (FNM_PATHNAME | FNM_NOESCAPE | FNM_PERIOD | FNM_LEADING_DIR
60             | FNM_CASEFOLD | FNM_EXTMATCH))
61         == 0);
62 
63 
64 /* Exclusion patterns are grouped into a singly-linked list of
65    "exclusion segments".  Each segment represents a set of patterns
66    that can be matches using the same algorithm.  Non-wildcard
67    patterns are kept in hash tables, to speed up searches.  Wildcard
68    patterns are stored as arrays of patterns. */
69 
70 
71 /* An exclude pattern-options pair.  The options are fnmatch options
72    ORed with EXCLUDE_* options.  */
73 
74 struct patopts
75   {
76     char const *pattern;
77     int options;
78   };
79 
80 /* An array of pattern-options pairs.  */
81 
82 struct exclude_pattern
83   {
84     struct patopts *exclude;
85     size_t exclude_alloc;
86     size_t exclude_count;
87   };
88 
89 enum exclude_type
90   {
91     exclude_hash,                    /* a hash table of excluded names */
92     exclude_pattern                  /* an array of exclude patterns */
93   };
94 
95 struct exclude_segment
96   {
97     struct exclude_segment *next;    /* next segment in list */
98     enum exclude_type type;          /* type of this segment */
99     int options;                     /* common options for this segment */
100     union
101     {
102       Hash_table *table;             /* for type == exclude_hash */
103       struct exclude_pattern pat;    /* for type == exclude_pattern */
104     } v;
105   };
106 
107 /* The exclude structure keeps a singly-linked list of exclude segments,
108    maintained in reverse order.  */
109 struct exclude
110   {
111     struct exclude_segment *head;
112   };
113 
114 /* Return true if STR has or may have wildcards, when matched with OPTIONS.
115    Return false if STR definitely does not have wildcards.  */
116 bool
117 fnmatch_pattern_has_wildcards (const char *str, int options)
118 {
119   while (1)
120     {
121       switch (*str++)
122         {
123         case '\\':
124           str += ! (options & FNM_NOESCAPE) && *str;
125           break;
126 
127         case '+': case '@': case '!':
128           if (options & FNM_EXTMATCH && *str == '(')
129             return true;
130           break;
131 
132         case '?': case '*': case '[':
133           return true;
134 
135         case '\0':
136           return false;
137         }
138     }
139 }
140 
141 static void
142 unescape_pattern (char *str)
143 {
144   char const *q = str;
145   do
146     q += *q == '\\' && q[1];
147   while ((*str++ = *q++));
148 }
149 
150 /* Return a newly allocated and empty exclude list.  */
151 
152 struct exclude *
153 new_exclude (void)
154 {
155   return xzalloc (sizeof *new_exclude ());
156 }
157 
158 /* Calculate the hash of string.  */
159 static size_t
160 string_hasher (void const *data, size_t n_buckets)
161 {
162   char const *p = data;
163   return hash_string (p, n_buckets);
164 }
165 
166 /* Ditto, for case-insensitive hashes */
167 static size_t
168 string_hasher_ci (void const *data, size_t n_buckets)
169 {
170   char const *p = data;
171   mbui_iterator_t iter;
172   size_t value = 0;
173 
174   for (mbui_init (iter, p); mbui_avail (iter); mbui_advance (iter))
175     {
176       mbchar_t m = mbui_cur (iter);
177       wchar_t wc;
178 
179       if (m.wc_valid)
180         wc = towlower (m.wc);
181       else
182         wc = *m.ptr;
183 
184       value = (value * 31 + wc) % n_buckets;
185     }
186 
187   return value;
188 }
189 
190 /* compare two strings for equality */
191 static bool
192 string_compare (void const *data1, void const *data2)
193 {
194   char const *p1 = data1;
195   char const *p2 = data2;
196   return strcmp (p1, p2) == 0;
197 }
198 
199 /* compare two strings for equality, case-insensitive */
200 static bool
201 string_compare_ci (void const *data1, void const *data2)
202 {
203   char const *p1 = data1;
204   char const *p2 = data2;
205   return mbscasecmp (p1, p2) == 0;
206 }
207 
208 static void
209 string_free (void *data)
210 {
211   free (data);
212 }
213 
214 /* Create new exclude segment of given TYPE and OPTIONS, and attach it
215    to the head of EX.  */
216 static void
217 new_exclude_segment (struct exclude *ex, enum exclude_type type, int options)
218 {
219   struct exclude_segment *sp = xzalloc (sizeof (struct exclude_segment));
220   sp->type = type;
221   sp->options = options;
222   switch (type)
223     {
224     case exclude_pattern:
225       break;
226 
227     case exclude_hash:
228       sp->v.table = hash_initialize (0, NULL,
229                                      (options & FNM_CASEFOLD) ?
230                                        string_hasher_ci
231                                        : string_hasher,
232                                      (options & FNM_CASEFOLD) ?
233                                        string_compare_ci
234                                        : string_compare,
235                                      string_free);
236       break;
237     }
238   sp->next = ex->head;
239   ex->head = sp;
240 }
241 
242 /* Free a single exclude segment */
243 static void
244 free_exclude_segment (struct exclude_segment *seg)
245 {
246   switch (seg->type)
247     {
248     case exclude_pattern:
249       free (seg->v.pat.exclude);
250       break;
251 
252     case exclude_hash:
253       hash_free (seg->v.table);
254       break;
255     }
256   free (seg);
257 }
258 
259 /* Free the storage associated with an exclude list.  */
260 void
261 free_exclude (struct exclude *ex)
262 {
263   struct exclude_segment *seg;
264   for (seg = ex->head; seg; )
265     {
266       struct exclude_segment *next = seg->next;
267       free_exclude_segment (seg);
268       seg = next;
269     }
270   free (ex);
271 }
272 
273 /* Return zero if PATTERN matches F, obeying OPTIONS, except that
274    (unlike fnmatch) wildcards are disabled in PATTERN.  */
275 
276 static int
277 fnmatch_no_wildcards (char const *pattern, char const *f, int options)
278 {
279   if (! (options & FNM_LEADING_DIR))
280     return ((options & FNM_CASEFOLD)
281             ? mbscasecmp (pattern, f)
282             : strcmp (pattern, f));
283   else if (! (options & FNM_CASEFOLD))
284     {
285       size_t patlen = strlen (pattern);
286       int r = strncmp (pattern, f, patlen);
287       if (! r)
288         {
289           r = f[patlen];
290           if (r == '/')
291             r = 0;
292         }
293       return r;
294     }
295   else
296     {
297       /* Walk through a copy of F, seeing whether P matches any prefix
298          of F.
299 
300          FIXME: This is an O(N**2) algorithm; it should be O(N).
301          Also, the copy should not be necessary.  However, fixing this
302          will probably involve a change to the mbs* API.  */
303 
304       char *fcopy = xstrdup (f);
305       char *p;
306       int r;
307       for (p = fcopy; ; *p++ = '/')
308         {
309           p = strchr (p, '/');
310           if (p)
311             *p = '\0';
312           r = mbscasecmp (pattern, fcopy);
313           if (!p || r <= 0)
314             break;
315         }
316       free (fcopy);
317       return r;
318     }
319 }
320 
321 bool
322 exclude_fnmatch (char const *pattern, char const *f, int options)
323 {
324   int (*matcher) (char const *, char const *, int) =
325     (options & EXCLUDE_WILDCARDS
326      ? fnmatch
327      : fnmatch_no_wildcards);
328   bool matched = ((*matcher) (pattern, f, options) == 0);
329   char const *p;
330 
331   if (! (options & EXCLUDE_ANCHORED))
332     for (p = f; *p && ! matched; p++)
333       if (*p == '/' && p[1] != '/')
334         matched = ((*matcher) (pattern, p + 1, options) == 0);
335 
336   return matched;
337 }
338 
339 /* Return true if the exclude_pattern segment SEG matches F.  */
340 
341 static bool
342 file_pattern_matches (struct exclude_segment const *seg, char const *f)
343 {
344   size_t exclude_count = seg->v.pat.exclude_count;
345   struct patopts const *exclude = seg->v.pat.exclude;
346   size_t i;
347 
348   for (i = 0; i < exclude_count; i++)
349     {
350       char const *pattern = exclude[i].pattern;
351       int options = exclude[i].options;
352       if (exclude_fnmatch (pattern, f, options))
353         return true;
354     }
355   return false;
356 }
357 
358 /* Return true if the exclude_hash segment SEG matches F.
359    BUFFER is an auxiliary storage of the same length as F (with nul
360    terminator included) */
361 static bool
362 file_name_matches (struct exclude_segment const *seg, char const *f,
363                    char *buffer)
364 {
365   int options = seg->options;
366   Hash_table *table = seg->v.table;
367 
368   do
369     {
370       /* initialize the pattern */
371       strcpy (buffer, f);
372 
373       while (1)
374         {
375           if (hash_lookup (table, buffer))
376             return true;
377           if (options & FNM_LEADING_DIR)
378             {
379               char *p = strrchr (buffer, '/');
380               if (p)
381                 {
382                   *p = 0;
383                   continue;
384                 }
385             }
386           break;
387         }
388 
389       if (!(options & EXCLUDE_ANCHORED))
390         {
391           f = strchr (f, '/');
392           if (f)
393             f++;
394         }
395       else
396         break;
397     }
398   while (f);
399 
400   return false;
401 }
402 
403 /* Return true if EX excludes F.  */
404 
405 bool
406 excluded_file_name (struct exclude const *ex, char const *f)
407 {
408   struct exclude_segment *seg;
409   bool invert = false;
410   char *filename = NULL;
411 
412   /* If no patterns are given, the default is to include.  */
413   if (!ex->head)
414     return false;
415 
416   /* Scan through the segments, reporting the status of the first match.
417      The segments are in reverse order, so this reports the status of
418      the last match in the original option list.  */
419   for (seg = ex->head; ; seg = seg->next)
420     {
421       if (seg->type == exclude_hash)
422         {
423           if (!filename)
424             filename = xmalloc (strlen (f) + 1);
425           if (file_name_matches (seg, f, filename))
426             break;
427         }
428       else
429         {
430           if (file_pattern_matches (seg, f))
431             break;
432         }
433 
434       if (! seg->next)
435         {
436           /* If patterns are given but none match, the default is the
437              opposite of the last segment (i.e., the first in the
438              original option list).  For example, in the command
439              'grep -r --exclude="a*" --include="*b" pat dir', the
440              first option is --exclude so any file name matching
441              neither a* nor *b is included.  */
442           invert = true;
443           break;
444         }
445     }
446 
447   free (filename);
448   return invert ^ ! (seg->options & EXCLUDE_INCLUDE);
449 }
450 
451 /* Append to EX the exclusion PATTERN with OPTIONS.  */
452 
453 void
454 add_exclude (struct exclude *ex, char const *pattern, int options)
455 {
456   struct exclude_segment *seg;
457 
458   if ((options & EXCLUDE_WILDCARDS)
459       && fnmatch_pattern_has_wildcards (pattern, options))
460     {
461       struct exclude_pattern *pat;
462       struct patopts *patopts;
463 
464       if (! (ex->head && ex->head->type == exclude_pattern
465              && ((ex->head->options & EXCLUDE_INCLUDE)
466                  == (options & EXCLUDE_INCLUDE))))
467         new_exclude_segment (ex, exclude_pattern, options);
468       seg = ex->head;
469 
470       pat = &seg->v.pat;
471       if (pat->exclude_count == pat->exclude_alloc)
472         pat->exclude = x2nrealloc (pat->exclude, &pat->exclude_alloc,
473                                    sizeof *pat->exclude);
474       patopts = &pat->exclude[pat->exclude_count++];
475       patopts->pattern = pattern;
476       patopts->options = options;
477     }
478   else
479     {
480       char *str, *p;
481       int exclude_hash_flags = (EXCLUDE_INCLUDE | EXCLUDE_ANCHORED
482                                 | FNM_LEADING_DIR | FNM_CASEFOLD);
483       if (! (ex->head && ex->head->type == exclude_hash
484              && ((ex->head->options & exclude_hash_flags)
485                  == (options & exclude_hash_flags))))
486         new_exclude_segment (ex, exclude_hash, options);
487       seg = ex->head;
488 
489       str = xstrdup (pattern);
490       if ((options & (EXCLUDE_WILDCARDS | FNM_NOESCAPE)) == EXCLUDE_WILDCARDS)
491         unescape_pattern (str);
492       p = hash_insert (seg->v.table, str);
493       if (p != str)
494         free (str);
495     }
496 }
497 
498 /* Use ADD_FUNC to append to EX the patterns in FILE_NAME, each with
499    OPTIONS.  LINE_END terminates each pattern in the file.  If
500    LINE_END is a space character, ignore trailing spaces and empty
501    lines in FILE.  Return -1 on failure, 0 on success.  */
502 
503 int
504 add_exclude_file (void (*add_func) (struct exclude *, char const *, int),
505                   struct exclude *ex, char const *file_name, int options,
506                   char line_end)
507 {
508   bool use_stdin = file_name[0] == '-' && !file_name[1];
509   FILE *in;
510   char *buf = NULL;
511   char *p;
512   char const *pattern;
513   char const *lim;
514   size_t buf_alloc = 0;
515   size_t buf_count = 0;
516   int c;
517   int e = 0;
518 
519   if (use_stdin)
520     in = stdin;
521   else if (! (in = fopen (file_name, "r")))
522     return -1;
523 
524   while ((c = getc (in)) != EOF)
525     {
526       if (buf_count == buf_alloc)
527         buf = x2realloc (buf, &buf_alloc);
528       buf[buf_count++] = c;
529     }
530 
531   if (ferror (in))
532     e = errno;
533 
534   if (!use_stdin && fclose (in) != 0)
535     e = errno;
536 
537   buf = xrealloc (buf, buf_count + 1);
538   buf[buf_count] = line_end;
539   lim = buf + buf_count + ! (buf_count == 0 || buf[buf_count - 1] == line_end);
540   pattern = buf;
541 
542   for (p = buf; p < lim; p++)
543     if (*p == line_end)
544       {
545         char *pattern_end = p;
546 
547         if (isspace ((unsigned char) line_end))
548           {
549             for (; ; pattern_end--)
550               if (pattern_end == pattern)
551                 goto next_pattern;
552               else if (! isspace ((unsigned char) pattern_end[-1]))
553                 break;
554           }
555 
556         *pattern_end = '\0';
557         (*add_func) (ex, pattern, options);
558 
559       next_pattern:
560         pattern = p + 1;
561       }
562 
563   errno = e;
564   return e ? -1 : 0;
565 }
566