1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "archive_platform.h"
28
29 #ifdef HAVE_STRING_H
30 #include <string.h>
31 #endif
32 #ifdef HAVE_WCHAR_H
33 #include <wchar.h>
34 #endif
35
36 #include "archive_pathmatch.h"
37
38 /*
39 * Check whether a character 'c' is matched by a list specification [...]:
40 * * Leading '!' or '^' negates the class.
41 * * <char>-<char> is a range of characters
42 * * \<char> removes any special meaning for <char>
43 *
44 * Some interesting boundary cases:
45 * a-d-e is one range (a-d) followed by two single characters - and e.
46 * \a-\d is same as a-d
47 * a\-d is three single characters: a, d, -
48 * Trailing - is not special (so [a-] is two characters a and -).
49 * Initial - is not special ([a-] is same as [-a] is same as [\\-a])
50 * This function never sees a trailing \.
51 * [] always fails
52 * [!] always succeeds
53 */
54 static int
pm_list(const char * start,const char * end,const char c,int flags)55 pm_list(const char *start, const char *end, const char c, int flags)
56 {
57 const char *p = start;
58 char rangeStart = '\0', nextRangeStart;
59 int match = 1, nomatch = 0;
60
61 /* This will be used soon... */
62 (void)flags; /* UNUSED */
63
64 /* If this is a negated class, return success for nomatch. */
65 if ((*p == '!' || *p == '^') && p < end) {
66 match = 0;
67 nomatch = 1;
68 ++p;
69 }
70
71 while (p < end) {
72 nextRangeStart = '\0';
73 switch (*p) {
74 case '-':
75 /* Trailing or initial '-' is not special. */
76 if ((rangeStart == '\0') || (p == end - 1)) {
77 if (*p == c)
78 return (match);
79 } else {
80 char rangeEnd = *++p;
81 if (rangeEnd == '\\')
82 rangeEnd = *++p;
83 if ((rangeStart <= c) && (c <= rangeEnd))
84 return (match);
85 }
86 break;
87 case '\\':
88 ++p;
89 /* Fall through */
90 default:
91 if (*p == c)
92 return (match);
93 nextRangeStart = *p; /* Possible start of range. */
94 }
95 rangeStart = nextRangeStart;
96 ++p;
97 }
98 return (nomatch);
99 }
100
101 static int
pm_list_w(const wchar_t * start,const wchar_t * end,const wchar_t c,int flags)102 pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags)
103 {
104 const wchar_t *p = start;
105 wchar_t rangeStart = L'\0', nextRangeStart;
106 int match = 1, nomatch = 0;
107
108 /* This will be used soon... */
109 (void)flags; /* UNUSED */
110
111 /* If this is a negated class, return success for nomatch. */
112 if ((*p == L'!' || *p == L'^') && p < end) {
113 match = 0;
114 nomatch = 1;
115 ++p;
116 }
117
118 while (p < end) {
119 nextRangeStart = L'\0';
120 switch (*p) {
121 case L'-':
122 /* Trailing or initial '-' is not special. */
123 if ((rangeStart == L'\0') || (p == end - 1)) {
124 if (*p == c)
125 return (match);
126 } else {
127 wchar_t rangeEnd = *++p;
128 if (rangeEnd == L'\\')
129 rangeEnd = *++p;
130 if ((rangeStart <= c) && (c <= rangeEnd))
131 return (match);
132 }
133 break;
134 case L'\\':
135 ++p;
136 /* Fall through */
137 default:
138 if (*p == c)
139 return (match);
140 nextRangeStart = *p; /* Possible start of range. */
141 }
142 rangeStart = nextRangeStart;
143 ++p;
144 }
145 return (nomatch);
146 }
147
148 /*
149 * If s is pointing to "./", ".//", "./././" or the like, skip it.
150 */
151 static const char *
pm_slashskip(const char * s)152 pm_slashskip(const char *s) {
153 while ((*s == '/')
154 || (s[0] == '.' && s[1] == '/')
155 || (s[0] == '.' && s[1] == '\0'))
156 ++s;
157 return (s);
158 }
159
160 static const wchar_t *
pm_slashskip_w(const wchar_t * s)161 pm_slashskip_w(const wchar_t *s) {
162 while ((*s == L'/')
163 || (s[0] == L'.' && s[1] == L'/')
164 || (s[0] == L'.' && s[1] == L'\0'))
165 ++s;
166 return (s);
167 }
168
169 static int
pm(const char * p,const char * s,int flags)170 pm(const char *p, const char *s, int flags)
171 {
172 const char *end;
173
174 /*
175 * Ignore leading './', './/', '././', etc.
176 */
177 if (s[0] == '.' && s[1] == '/')
178 s = pm_slashskip(s + 1);
179 if (p[0] == '.' && p[1] == '/')
180 p = pm_slashskip(p + 1);
181
182 for (;;) {
183 switch (*p) {
184 case '\0':
185 if (s[0] == '/') {
186 if (flags & PATHMATCH_NO_ANCHOR_END)
187 return (1);
188 /* "dir" == "dir/" == "dir/." */
189 s = pm_slashskip(s);
190 }
191 return (*s == '\0');
192 case '?':
193 /* ? always succeeds, unless we hit end of 's' */
194 if (*s == '\0')
195 return (0);
196 break;
197 case '*':
198 /* "*" == "**" == "***" ... */
199 while (*p == '*')
200 ++p;
201 /* Trailing '*' always succeeds. */
202 if (*p == '\0')
203 return (1);
204 while (*s) {
205 if (archive_pathmatch(p, s, flags))
206 return (1);
207 ++s;
208 }
209 return (0);
210 case '[':
211 /*
212 * Find the end of the [...] character class,
213 * ignoring \] that might occur within the class.
214 */
215 end = p + 1;
216 while (*end != '\0' && *end != ']') {
217 if (*end == '\\' && end[1] != '\0')
218 ++end;
219 ++end;
220 }
221 if (*end == ']') {
222 /* We found [...], try to match it. */
223 if (!pm_list(p + 1, end, *s, flags))
224 return (0);
225 p = end; /* Jump to trailing ']' char. */
226 break;
227 } else
228 /* No final ']', so just match '['. */
229 if (*p != *s)
230 return (0);
231 break;
232 case '\\':
233 /* Trailing '\\' matches itself. */
234 if (p[1] == '\0') {
235 if (*s != '\\')
236 return (0);
237 } else {
238 ++p;
239 if (*p != *s)
240 return (0);
241 }
242 break;
243 case '/':
244 if (*s != '/' && *s != '\0')
245 return (0);
246 /* Note: pattern "/\./" won't match "/";
247 * pm_slashskip() correctly stops at backslash. */
248 p = pm_slashskip(p);
249 s = pm_slashskip(s);
250 if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
251 return (1);
252 --p; /* Counteract the increment below. */
253 --s;
254 break;
255 case '$':
256 /* '$' is special only at end of pattern and only
257 * if PATHMATCH_NO_ANCHOR_END is specified. */
258 if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
259 /* "dir" == "dir/" == "dir/." */
260 return (*pm_slashskip(s) == '\0');
261 }
262 /* Otherwise, '$' is not special. */
263 /* FALL THROUGH */
264 default:
265 if (*p != *s)
266 return (0);
267 break;
268 }
269 ++p;
270 ++s;
271 }
272 }
273
274 static int
pm_w(const wchar_t * p,const wchar_t * s,int flags)275 pm_w(const wchar_t *p, const wchar_t *s, int flags)
276 {
277 const wchar_t *end;
278
279 /*
280 * Ignore leading './', './/', '././', etc.
281 */
282 if (s[0] == L'.' && s[1] == L'/')
283 s = pm_slashskip_w(s + 1);
284 if (p[0] == L'.' && p[1] == L'/')
285 p = pm_slashskip_w(p + 1);
286
287 for (;;) {
288 switch (*p) {
289 case L'\0':
290 if (s[0] == L'/') {
291 if (flags & PATHMATCH_NO_ANCHOR_END)
292 return (1);
293 /* "dir" == "dir/" == "dir/." */
294 s = pm_slashskip_w(s);
295 }
296 return (*s == L'\0');
297 case L'?':
298 /* ? always succeeds, unless we hit end of 's' */
299 if (*s == L'\0')
300 return (0);
301 break;
302 case L'*':
303 /* "*" == "**" == "***" ... */
304 while (*p == L'*')
305 ++p;
306 /* Trailing '*' always succeeds. */
307 if (*p == L'\0')
308 return (1);
309 while (*s) {
310 if (archive_pathmatch_w(p, s, flags))
311 return (1);
312 ++s;
313 }
314 return (0);
315 case L'[':
316 /*
317 * Find the end of the [...] character class,
318 * ignoring \] that might occur within the class.
319 */
320 end = p + 1;
321 while (*end != L'\0' && *end != L']') {
322 if (*end == L'\\' && end[1] != L'\0')
323 ++end;
324 ++end;
325 }
326 if (*end == L']') {
327 /* We found [...], try to match it. */
328 if (!pm_list_w(p + 1, end, *s, flags))
329 return (0);
330 p = end; /* Jump to trailing ']' char. */
331 break;
332 } else
333 /* No final ']', so just match '['. */
334 if (*p != *s)
335 return (0);
336 break;
337 case L'\\':
338 /* Trailing '\\' matches itself. */
339 if (p[1] == L'\0') {
340 if (*s != L'\\')
341 return (0);
342 } else {
343 ++p;
344 if (*p != *s)
345 return (0);
346 }
347 break;
348 case L'/':
349 if (*s != L'/' && *s != L'\0')
350 return (0);
351 /* Note: pattern "/\./" won't match "/";
352 * pm_slashskip() correctly stops at backslash. */
353 p = pm_slashskip_w(p);
354 s = pm_slashskip_w(s);
355 if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END))
356 return (1);
357 --p; /* Counteract the increment below. */
358 --s;
359 break;
360 case L'$':
361 /* '$' is special only at end of pattern and only
362 * if PATHMATCH_NO_ANCHOR_END is specified. */
363 if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
364 /* "dir" == "dir/" == "dir/." */
365 return (*pm_slashskip_w(s) == L'\0');
366 }
367 /* Otherwise, '$' is not special. */
368 /* FALL THROUGH */
369 default:
370 if (*p != *s)
371 return (0);
372 break;
373 }
374 ++p;
375 ++s;
376 }
377 }
378
379 /* Main entry point. */
380 int
__archive_pathmatch(const char * p,const char * s,int flags)381 __archive_pathmatch(const char *p, const char *s, int flags)
382 {
383 /* Empty pattern only matches the empty string. */
384 if (p == NULL || *p == '\0')
385 return (s == NULL || *s == '\0');
386 else if (s == NULL)
387 return (0);
388
389 /* Leading '^' anchors the start of the pattern. */
390 if (*p == '^') {
391 ++p;
392 flags &= ~PATHMATCH_NO_ANCHOR_START;
393 }
394
395 if (*p == '/' && *s != '/')
396 return (0);
397
398 /* Certain patterns anchor implicitly. */
399 if (*p == '*' || *p == '/') {
400 while (*p == '/')
401 ++p;
402 while (*s == '/')
403 ++s;
404 return (pm(p, s, flags));
405 }
406
407 /* If start is unanchored, try to match start of each path element. */
408 if (flags & PATHMATCH_NO_ANCHOR_START) {
409 for ( ; s != NULL; s = strchr(s, '/')) {
410 if (*s == '/')
411 s++;
412 if (pm(p, s, flags))
413 return (1);
414 }
415 return (0);
416 }
417
418 /* Default: Match from beginning. */
419 return (pm(p, s, flags));
420 }
421
422 int
__archive_pathmatch_w(const wchar_t * p,const wchar_t * s,int flags)423 __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags)
424 {
425 /* Empty pattern only matches the empty string. */
426 if (p == NULL || *p == L'\0')
427 return (s == NULL || *s == L'\0');
428 else if (s == NULL)
429 return (0);
430
431 /* Leading '^' anchors the start of the pattern. */
432 if (*p == L'^') {
433 ++p;
434 flags &= ~PATHMATCH_NO_ANCHOR_START;
435 }
436
437 if (*p == L'/' && *s != L'/')
438 return (0);
439
440 /* Certain patterns anchor implicitly. */
441 if (*p == L'*' || *p == L'/') {
442 while (*p == L'/')
443 ++p;
444 while (*s == L'/')
445 ++s;
446 return (pm_w(p, s, flags));
447 }
448
449 /* If start is unanchored, try to match start of each path element. */
450 if (flags & PATHMATCH_NO_ANCHOR_START) {
451 for ( ; s != NULL; s = wcschr(s, L'/')) {
452 if (*s == L'/')
453 s++;
454 if (pm_w(p, s, flags))
455 return (1);
456 }
457 return (0);
458 }
459
460 /* Default: Match from beginning. */
461 return (pm_w(p, s, flags));
462 }
463