1 /*- 2 * Copyright (c) 2003-2007 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "archive_platform.h" 28 __FBSDID("$FreeBSD$"); 29 30 #ifdef HAVE_STRING_H 31 #include <string.h> 32 #endif 33 #ifdef HAVE_WCHAR_H 34 #include <wchar.h> 35 #endif 36 37 #include "archive_pathmatch.h" 38 39 /* 40 * Check whether a character 'c' is matched by a list specification [...]: 41 * * Leading '!' or '^' negates the class. 42 * * <char>-<char> is a range of characters 43 * * \<char> removes any special meaning for <char> 44 * 45 * Some interesting boundary cases: 46 * a-d-e is one range (a-d) followed by two single characters - and e. 47 * \a-\d is same as a-d 48 * a\-d is three single characters: a, d, - 49 * Trailing - is not special (so [a-] is two characters a and -). 50 * Initial - is not special ([a-] is same as [-a] is same as [\\-a]) 51 * This function never sees a trailing \. 52 * [] always fails 53 * [!] always succeeds 54 */ 55 static int 56 pm_list(const char *start, const char *end, const char c, int flags) 57 { 58 const char *p = start; 59 char rangeStart = '\0', nextRangeStart; 60 int match = 1, nomatch = 0; 61 62 /* This will be used soon... */ 63 (void)flags; /* UNUSED */ 64 65 /* If this is a negated class, return success for nomatch. */ 66 if ((*p == '!' || *p == '^') && p < end) { 67 match = 0; 68 nomatch = 1; 69 ++p; 70 } 71 72 while (p < end) { 73 nextRangeStart = '\0'; 74 switch (*p) { 75 case '-': 76 /* Trailing or initial '-' is not special. */ 77 if ((rangeStart == '\0') || (p == end - 1)) { 78 if (*p == c) 79 return (match); 80 } else { 81 char rangeEnd = *++p; 82 if (rangeEnd == '\\') 83 rangeEnd = *++p; 84 if ((rangeStart <= c) && (c <= rangeEnd)) 85 return (match); 86 } 87 break; 88 case '\\': 89 ++p; 90 /* Fall through */ 91 default: 92 if (*p == c) 93 return (match); 94 nextRangeStart = *p; /* Possible start of range. */ 95 } 96 rangeStart = nextRangeStart; 97 ++p; 98 } 99 return (nomatch); 100 } 101 102 static int 103 pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags) 104 { 105 const wchar_t *p = start; 106 wchar_t rangeStart = L'\0', nextRangeStart; 107 int match = 1, nomatch = 0; 108 109 /* This will be used soon... */ 110 (void)flags; /* UNUSED */ 111 112 /* If this is a negated class, return success for nomatch. */ 113 if ((*p == L'!' || *p == L'^') && p < end) { 114 match = 0; 115 nomatch = 1; 116 ++p; 117 } 118 119 while (p < end) { 120 nextRangeStart = L'\0'; 121 switch (*p) { 122 case L'-': 123 /* Trailing or initial '-' is not special. */ 124 if ((rangeStart == L'\0') || (p == end - 1)) { 125 if (*p == c) 126 return (match); 127 } else { 128 wchar_t rangeEnd = *++p; 129 if (rangeEnd == L'\\') 130 rangeEnd = *++p; 131 if ((rangeStart <= c) && (c <= rangeEnd)) 132 return (match); 133 } 134 break; 135 case L'\\': 136 ++p; 137 /* Fall through */ 138 default: 139 if (*p == c) 140 return (match); 141 nextRangeStart = *p; /* Possible start of range. */ 142 } 143 rangeStart = nextRangeStart; 144 ++p; 145 } 146 return (nomatch); 147 } 148 149 /* 150 * If s is pointing to "./", ".//", "./././" or the like, skip it. 151 */ 152 static const char * 153 pm_slashskip(const char *s) { 154 while ((*s == '/') 155 || (s[0] == '.' && s[1] == '/') 156 || (s[0] == '.' && s[1] == '\0')) 157 ++s; 158 return (s); 159 } 160 161 static const wchar_t * 162 pm_slashskip_w(const wchar_t *s) { 163 while ((*s == L'/') 164 || (s[0] == L'.' && s[1] == L'/') 165 || (s[0] == L'.' && s[1] == L'\0')) 166 ++s; 167 return (s); 168 } 169 170 static int 171 pm(const char *p, const char *s, int flags) 172 { 173 const char *end; 174 175 /* 176 * Ignore leading './', './/', '././', etc. 177 */ 178 if (s[0] == '.' && s[1] == '/') 179 s = pm_slashskip(s + 1); 180 if (p[0] == '.' && p[1] == '/') 181 p = pm_slashskip(p + 1); 182 183 for (;;) { 184 switch (*p) { 185 case '\0': 186 if (s[0] == '/') { 187 if (flags & PATHMATCH_NO_ANCHOR_END) 188 return (1); 189 /* "dir" == "dir/" == "dir/." */ 190 s = pm_slashskip(s); 191 } 192 return (*s == '\0'); 193 case '?': 194 /* ? always succeeds, unless we hit end of 's' */ 195 if (*s == '\0') 196 return (0); 197 break; 198 case '*': 199 /* "*" == "**" == "***" ... */ 200 while (*p == '*') 201 ++p; 202 /* Trailing '*' always succeeds. */ 203 if (*p == '\0') 204 return (1); 205 while (*s) { 206 if (archive_pathmatch(p, s, flags)) 207 return (1); 208 ++s; 209 } 210 return (0); 211 case '[': 212 /* 213 * Find the end of the [...] character class, 214 * ignoring \] that might occur within the class. 215 */ 216 end = p + 1; 217 while (*end != '\0' && *end != ']') { 218 if (*end == '\\' && end[1] != '\0') 219 ++end; 220 ++end; 221 } 222 if (*end == ']') { 223 /* We found [...], try to match it. */ 224 if (!pm_list(p + 1, end, *s, flags)) 225 return (0); 226 p = end; /* Jump to trailing ']' char. */ 227 break; 228 } else 229 /* No final ']', so just match '['. */ 230 if (*p != *s) 231 return (0); 232 break; 233 case '\\': 234 /* Trailing '\\' matches itself. */ 235 if (p[1] == '\0') { 236 if (*s != '\\') 237 return (0); 238 } else { 239 ++p; 240 if (*p != *s) 241 return (0); 242 } 243 break; 244 case '/': 245 if (*s != '/' && *s != '\0') 246 return (0); 247 /* Note: pattern "/\./" won't match "/"; 248 * pm_slashskip() correctly stops at backslash. */ 249 p = pm_slashskip(p); 250 s = pm_slashskip(s); 251 if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)) 252 return (1); 253 --p; /* Counteract the increment below. */ 254 --s; 255 break; 256 case '$': 257 /* '$' is special only at end of pattern and only 258 * if PATHMATCH_NO_ANCHOR_END is specified. */ 259 if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ 260 /* "dir" == "dir/" == "dir/." */ 261 return (*pm_slashskip(s) == '\0'); 262 } 263 /* Otherwise, '$' is not special. */ 264 /* FALL THROUGH */ 265 default: 266 if (*p != *s) 267 return (0); 268 break; 269 } 270 ++p; 271 ++s; 272 } 273 } 274 275 static int 276 pm_w(const wchar_t *p, const wchar_t *s, int flags) 277 { 278 const wchar_t *end; 279 280 /* 281 * Ignore leading './', './/', '././', etc. 282 */ 283 if (s[0] == L'.' && s[1] == L'/') 284 s = pm_slashskip_w(s + 1); 285 if (p[0] == L'.' && p[1] == L'/') 286 p = pm_slashskip_w(p + 1); 287 288 for (;;) { 289 switch (*p) { 290 case L'\0': 291 if (s[0] == L'/') { 292 if (flags & PATHMATCH_NO_ANCHOR_END) 293 return (1); 294 /* "dir" == "dir/" == "dir/." */ 295 s = pm_slashskip_w(s); 296 } 297 return (*s == L'\0'); 298 case L'?': 299 /* ? always succeeds, unless we hit end of 's' */ 300 if (*s == L'\0') 301 return (0); 302 break; 303 case L'*': 304 /* "*" == "**" == "***" ... */ 305 while (*p == L'*') 306 ++p; 307 /* Trailing '*' always succeeds. */ 308 if (*p == L'\0') 309 return (1); 310 while (*s) { 311 if (archive_pathmatch_w(p, s, flags)) 312 return (1); 313 ++s; 314 } 315 return (0); 316 case L'[': 317 /* 318 * Find the end of the [...] character class, 319 * ignoring \] that might occur within the class. 320 */ 321 end = p + 1; 322 while (*end != L'\0' && *end != L']') { 323 if (*end == L'\\' && end[1] != L'\0') 324 ++end; 325 ++end; 326 } 327 if (*end == L']') { 328 /* We found [...], try to match it. */ 329 if (!pm_list_w(p + 1, end, *s, flags)) 330 return (0); 331 p = end; /* Jump to trailing ']' char. */ 332 break; 333 } else 334 /* No final ']', so just match '['. */ 335 if (*p != *s) 336 return (0); 337 break; 338 case L'\\': 339 /* Trailing '\\' matches itself. */ 340 if (p[1] == L'\0') { 341 if (*s != L'\\') 342 return (0); 343 } else { 344 ++p; 345 if (*p != *s) 346 return (0); 347 } 348 break; 349 case L'/': 350 if (*s != L'/' && *s != L'\0') 351 return (0); 352 /* Note: pattern "/\./" won't match "/"; 353 * pm_slashskip() correctly stops at backslash. */ 354 p = pm_slashskip_w(p); 355 s = pm_slashskip_w(s); 356 if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)) 357 return (1); 358 --p; /* Counteract the increment below. */ 359 --s; 360 break; 361 case L'$': 362 /* '$' is special only at end of pattern and only 363 * if PATHMATCH_NO_ANCHOR_END is specified. */ 364 if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ 365 /* "dir" == "dir/" == "dir/." */ 366 return (*pm_slashskip_w(s) == L'\0'); 367 } 368 /* Otherwise, '$' is not special. */ 369 /* FALL THROUGH */ 370 default: 371 if (*p != *s) 372 return (0); 373 break; 374 } 375 ++p; 376 ++s; 377 } 378 } 379 380 /* Main entry point. */ 381 int 382 __archive_pathmatch(const char *p, const char *s, int flags) 383 { 384 /* Empty pattern only matches the empty string. */ 385 if (p == NULL || *p == '\0') 386 return (s == NULL || *s == '\0'); 387 388 /* Leading '^' anchors the start of the pattern. */ 389 if (*p == '^') { 390 ++p; 391 flags &= ~PATHMATCH_NO_ANCHOR_START; 392 } 393 394 if (*p == '/' && *s != '/') 395 return (0); 396 397 /* Certain patterns anchor implicitly. */ 398 if (*p == '*' || *p == '/') { 399 while (*p == '/') 400 ++p; 401 while (*s == '/') 402 ++s; 403 return (pm(p, s, flags)); 404 } 405 406 /* If start is unanchored, try to match start of each path element. */ 407 if (flags & PATHMATCH_NO_ANCHOR_START) { 408 for ( ; s != NULL; s = strchr(s, '/')) { 409 if (*s == '/') 410 s++; 411 if (pm(p, s, flags)) 412 return (1); 413 } 414 return (0); 415 } 416 417 /* Default: Match from beginning. */ 418 return (pm(p, s, flags)); 419 } 420 421 int 422 __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags) 423 { 424 /* Empty pattern only matches the empty string. */ 425 if (p == NULL || *p == L'\0') 426 return (s == NULL || *s == L'\0'); 427 428 /* Leading '^' anchors the start of the pattern. */ 429 if (*p == L'^') { 430 ++p; 431 flags &= ~PATHMATCH_NO_ANCHOR_START; 432 } 433 434 if (*p == L'/' && *s != L'/') 435 return (0); 436 437 /* Certain patterns anchor implicitly. */ 438 if (*p == L'*' || *p == L'/') { 439 while (*p == L'/') 440 ++p; 441 while (*s == L'/') 442 ++s; 443 return (pm_w(p, s, flags)); 444 } 445 446 /* If start is unanchored, try to match start of each path element. */ 447 if (flags & PATHMATCH_NO_ANCHOR_START) { 448 for ( ; s != NULL; s = wcschr(s, L'/')) { 449 if (*s == L'/') 450 s++; 451 if (pm_w(p, s, flags)) 452 return (1); 453 } 454 return (0); 455 } 456 457 /* Default: Match from beginning. */ 458 return (pm_w(p, s, flags)); 459 } 460