1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "archive_platform.h"
28 __FBSDID("$FreeBSD$");
29
30 #ifdef HAVE_STRING_H
31 #include <string.h>
32 #endif
33 #ifdef HAVE_WCHAR_H
34 #include <wchar.h>
35 #endif
36
37 #include "archive_pathmatch.h"
38
39 /*
40 * Check whether a character 'c' is matched by a list specification [...]:
41 * * Leading '!' or '^' negates the class.
42 * * <char>-<char> is a range of characters
43 * * \<char> removes any special meaning for <char>
44 *
45 * Some interesting boundary cases:
46 * a-d-e is one range (a-d) followed by two single characters - and e.
47 * \a-\d is same as a-d
48 * a\-d is three single characters: a, d, -
49 * Trailing - is not special (so [a-] is two characters a and -).
50 * Initial - is not special ([a-] is same as [-a] is same as [\\-a])
51 * This function never sees a trailing \.
52 * [] always fails
53 * [!] always succeeds
54 */
55 static int
pm_list(const char * start,const char * end,const char c,int flags)56 pm_list(const char *start, const char *end, const char c, int flags)
57 {
58 const char *p = start;
59 char rangeStart = '\0', nextRangeStart;
60 int match = 1, nomatch = 0;
61
62 /* This will be used soon... */
63 (void)flags; /* UNUSED */
64
65 /* If this is a negated class, return success for nomatch. */
66 if ((*p == '!' || *p == '^') && p < end) {
67 match = 0;
68 nomatch = 1;
69 ++p;
70 }
71
72 while (p < end) {
73 nextRangeStart = '\0';
74 switch (*p) {
75 case '-':
76 /* Trailing or initial '-' is not special. */
77 if ((rangeStart == '\0') || (p == end - 1)) {
78 if (*p == c)
79 return (match);
80 } else {
81 char rangeEnd = *++p;
82 if (rangeEnd == '\\')
83 rangeEnd = *++p;
84 if ((rangeStart <= c) && (c <= rangeEnd))
85 return (match);
86 }
87 break;
88 case '\\':
89 ++p;
90 /* Fall through */
91 default:
92 if (*p == c)
93 return (match);
94 nextRangeStart = *p; /* Possible start of range. */
95 }
96 rangeStart = nextRangeStart;
97 ++p;
98 }
99 return (nomatch);
100 }
101
102 static int
pm_list_w(const wchar_t * start,const wchar_t * end,const wchar_t c,int flags)103 pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags)
104 {
105 const wchar_t *p = start;
106 wchar_t rangeStart = L'\0', nextRangeStart;
107 int match = 1, nomatch = 0;
108
109 /* This will be used soon... */
110 (void)flags; /* UNUSED */
111
112 /* If this is a negated class, return success for nomatch. */
113 if ((*p == L'!' || *p == L'^') && p < end) {
114 match = 0;
115 nomatch = 1;
116 ++p;
117 }
118
119 while (p < end) {
120 nextRangeStart = L'\0';
121 switch (*p) {
122 case L'-':
123 /* Trailing or initial '-' is not special. */
124 if ((rangeStart == L'\0') || (p == end - 1)) {
125 if (*p == c)
126 return (match);
127 } else {
128 wchar_t rangeEnd = *++p;
129 if (rangeEnd == L'\\')
130 rangeEnd = *++p;
131 if ((rangeStart <= c) && (c <= rangeEnd))
132 return (match);
133 }
134 break;
135 case L'\\':
136 ++p;
137 /* Fall through */
138 default:
139 if (*p == c)
140 return (match);
141 nextRangeStart = *p; /* Possible start of range. */
142 }
143 rangeStart = nextRangeStart;
144 ++p;
145 }
146 return (nomatch);
147 }
148
149 /*
150 * If s is pointing to "./", ".//", "./././" or the like, skip it.
151 */
152 static const char *
pm_slashskip(const char * s)153 pm_slashskip(const char *s) {
154 while ((*s == '/')
155 || (s[0] == '.' && s[1] == '/')
156 || (s[0] == '.' && s[1] == '\0'))
157 ++s;
158 return (s);
159 }
160
161 static const wchar_t *
pm_slashskip_w(const wchar_t * s)162 pm_slashskip_w(const wchar_t *s) {
163 while ((*s == L'/')
164 || (s[0] == L'.' && s[1] == L'/')
165 || (s[0] == L'.' && s[1] == L'\0'))
166 ++s;
167 return (s);
168 }
169
170 static int
pm(const char * p,const char * s,int flags)171 pm(const char *p, const char *s, int flags)
172 {
173 const char *end;
174
175 /*
176 * Ignore leading './', './/', '././', etc.
177 */
178 if (s[0] == '.' && s[1] == '/')
179 s = pm_slashskip(s + 1);
180 if (p[0] == '.' && p[1] == '/')
181 p = pm_slashskip(p + 1);
182
183 for (;;) {
184 switch (*p) {
185 case '\0':
186 if (s[0] == '/') {
187 if (flags & PATHMATCH_NO_ANCHOR_END)
188 return (1);
189 /* "dir" == "dir/" == "dir/." */
190 s = pm_slashskip(s);
191 }
192 return (*s == '\0');
193 case '?':
194 /* ? always succeeds, unless we hit end of 's' */
195 if (*s == '\0')
196 return (0);
197 break;
198 case '*':
199 /* "*" == "**" == "***" ... */
200 while (*p == '*')
201 ++p;
202 /* Trailing '*' always succeeds. */
203 if (*p == '\0')
204 return (1);
205 while (*s) {
206 if (archive_pathmatch(p, s, flags))
207 return (1);
208 ++s;
209 }
210 return (0);
211 case '[':
212 /*
213 * Find the end of the [...] character class,
214 * ignoring \] that might occur within the class.
215 */
216 end = p + 1;
217 while (*end != '\0' && *end != ']') {
218 if (*end == '\\' && end[1] != '\0')
219 ++end;
220 ++end;
221 }
222 if (*end == ']') {
223 /* We found [...], try to match it. */
224 if (!pm_list(p + 1, end, *s, flags))
225 return (0);
226 p = end; /* Jump to trailing ']' char. */
227 break;
228 } else
229 /* No final ']', so just match '['. */
230 if (*p != *s)
231 return (0);
232 break;
233 case '\\':
234 /* Trailing '\\' matches itself. */
235 if (p[1] == '\0') {
236 if (*s != '\\')
237 return (0);
238 } else {
239 ++p;
240 if (*p != *s)
241 return (0);
242 }
243 break;
244 case '/':
245 if (*s != '/' && *s != '\0')
246 return (0);
247 /* Note: pattern "/\./" won't match "/";
248 * pm_slashskip() correctly stops at backslash. */
249 p = pm_slashskip(p);
250 s = pm_slashskip(s);
251 if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
252 return (1);
253 --p; /* Counteract the increment below. */
254 --s;
255 break;
256 case '$':
257 /* '$' is special only at end of pattern and only
258 * if PATHMATCH_NO_ANCHOR_END is specified. */
259 if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
260 /* "dir" == "dir/" == "dir/." */
261 return (*pm_slashskip(s) == '\0');
262 }
263 /* Otherwise, '$' is not special. */
264 /* FALL THROUGH */
265 default:
266 if (*p != *s)
267 return (0);
268 break;
269 }
270 ++p;
271 ++s;
272 }
273 }
274
275 static int
pm_w(const wchar_t * p,const wchar_t * s,int flags)276 pm_w(const wchar_t *p, const wchar_t *s, int flags)
277 {
278 const wchar_t *end;
279
280 /*
281 * Ignore leading './', './/', '././', etc.
282 */
283 if (s[0] == L'.' && s[1] == L'/')
284 s = pm_slashskip_w(s + 1);
285 if (p[0] == L'.' && p[1] == L'/')
286 p = pm_slashskip_w(p + 1);
287
288 for (;;) {
289 switch (*p) {
290 case L'\0':
291 if (s[0] == L'/') {
292 if (flags & PATHMATCH_NO_ANCHOR_END)
293 return (1);
294 /* "dir" == "dir/" == "dir/." */
295 s = pm_slashskip_w(s);
296 }
297 return (*s == L'\0');
298 case L'?':
299 /* ? always succeeds, unless we hit end of 's' */
300 if (*s == L'\0')
301 return (0);
302 break;
303 case L'*':
304 /* "*" == "**" == "***" ... */
305 while (*p == L'*')
306 ++p;
307 /* Trailing '*' always succeeds. */
308 if (*p == L'\0')
309 return (1);
310 while (*s) {
311 if (archive_pathmatch_w(p, s, flags))
312 return (1);
313 ++s;
314 }
315 return (0);
316 case L'[':
317 /*
318 * Find the end of the [...] character class,
319 * ignoring \] that might occur within the class.
320 */
321 end = p + 1;
322 while (*end != L'\0' && *end != L']') {
323 if (*end == L'\\' && end[1] != L'\0')
324 ++end;
325 ++end;
326 }
327 if (*end == L']') {
328 /* We found [...], try to match it. */
329 if (!pm_list_w(p + 1, end, *s, flags))
330 return (0);
331 p = end; /* Jump to trailing ']' char. */
332 break;
333 } else
334 /* No final ']', so just match '['. */
335 if (*p != *s)
336 return (0);
337 break;
338 case L'\\':
339 /* Trailing '\\' matches itself. */
340 if (p[1] == L'\0') {
341 if (*s != L'\\')
342 return (0);
343 } else {
344 ++p;
345 if (*p != *s)
346 return (0);
347 }
348 break;
349 case L'/':
350 if (*s != L'/' && *s != L'\0')
351 return (0);
352 /* Note: pattern "/\./" won't match "/";
353 * pm_slashskip() correctly stops at backslash. */
354 p = pm_slashskip_w(p);
355 s = pm_slashskip_w(s);
356 if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END))
357 return (1);
358 --p; /* Counteract the increment below. */
359 --s;
360 break;
361 case L'$':
362 /* '$' is special only at end of pattern and only
363 * if PATHMATCH_NO_ANCHOR_END is specified. */
364 if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
365 /* "dir" == "dir/" == "dir/." */
366 return (*pm_slashskip_w(s) == L'\0');
367 }
368 /* Otherwise, '$' is not special. */
369 /* FALL THROUGH */
370 default:
371 if (*p != *s)
372 return (0);
373 break;
374 }
375 ++p;
376 ++s;
377 }
378 }
379
380 /* Main entry point. */
381 int
__archive_pathmatch(const char * p,const char * s,int flags)382 __archive_pathmatch(const char *p, const char *s, int flags)
383 {
384 /* Empty pattern only matches the empty string. */
385 if (p == NULL || *p == '\0')
386 return (s == NULL || *s == '\0');
387 else if (s == NULL)
388 return (0);
389
390 /* Leading '^' anchors the start of the pattern. */
391 if (*p == '^') {
392 ++p;
393 flags &= ~PATHMATCH_NO_ANCHOR_START;
394 }
395
396 if (*p == '/' && *s != '/')
397 return (0);
398
399 /* Certain patterns anchor implicitly. */
400 if (*p == '*' || *p == '/') {
401 while (*p == '/')
402 ++p;
403 while (*s == '/')
404 ++s;
405 return (pm(p, s, flags));
406 }
407
408 /* If start is unanchored, try to match start of each path element. */
409 if (flags & PATHMATCH_NO_ANCHOR_START) {
410 for ( ; s != NULL; s = strchr(s, '/')) {
411 if (*s == '/')
412 s++;
413 if (pm(p, s, flags))
414 return (1);
415 }
416 return (0);
417 }
418
419 /* Default: Match from beginning. */
420 return (pm(p, s, flags));
421 }
422
423 int
__archive_pathmatch_w(const wchar_t * p,const wchar_t * s,int flags)424 __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags)
425 {
426 /* Empty pattern only matches the empty string. */
427 if (p == NULL || *p == L'\0')
428 return (s == NULL || *s == L'\0');
429 else if (s == NULL)
430 return (0);
431
432 /* Leading '^' anchors the start of the pattern. */
433 if (*p == L'^') {
434 ++p;
435 flags &= ~PATHMATCH_NO_ANCHOR_START;
436 }
437
438 if (*p == L'/' && *s != L'/')
439 return (0);
440
441 /* Certain patterns anchor implicitly. */
442 if (*p == L'*' || *p == L'/') {
443 while (*p == L'/')
444 ++p;
445 while (*s == L'/')
446 ++s;
447 return (pm_w(p, s, flags));
448 }
449
450 /* If start is unanchored, try to match start of each path element. */
451 if (flags & PATHMATCH_NO_ANCHOR_START) {
452 for ( ; s != NULL; s = wcschr(s, L'/')) {
453 if (*s == L'/')
454 s++;
455 if (pm_w(p, s, flags))
456 return (1);
457 }
458 return (0);
459 }
460
461 /* Default: Match from beginning. */
462 return (pm_w(p, s, flags));
463 }
464