1 /* @(#)fnmatch.c 8.24 17/08/30 2005-2017 J. Schilling from 8.2 (Berkeley) */ 2 #include <schily/mconfig.h> 3 #ifndef lint 4 static UConst char sccsid[] = 5 "@(#)fnmatch.c 8.24 17/08/30 2005-2017 J. Schilling from 8.2 (Berkeley)"; 6 #endif 7 /* 8 * Copyright (c) 1989, 1993, 1994 9 * The Regents of the University of California. All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * Guido van Rossum. 13 * 14 * Copyright (c) 2005-2017 J. Schilling 15 * Copyright (c) 2011 The FreeBSD Foundation 16 * All rights reserved. 17 * Portions of this software were developed by David Chisnall 18 * under sponsorship from the FreeBSD Foundation. 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 1. Redistributions of source code must retain the above copyright 24 * notice, this list of conditions and the following disclaimer. 25 * 2. Redistributions in binary form must reproduce the above copyright 26 * notice, this list of conditions and the following disclaimer in the 27 * documentation and/or other materials provided with the distribution. 28 * 3. Neither the name of the University nor the names of its contributors 29 * may be used to endorse or promote products derived from this software 30 * without specific prior written permission. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 42 * SUCH DAMAGE. 43 */ 44 45 #if defined(LIBC_SCCS) && !defined(lint) 46 static UConst char sccsid[] = "@(#)fnmatch.c 8.24 (Berkeley) 08/30/17"; 47 #endif /* LIBC_SCCS and not lint */ 48 /* "FBSD src/lib/libc/gen/fnmatch.c,v 1.19 2010/04/16 22:29:24 jilles Exp $" */ 49 50 /* 51 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 52 * Compares a filename or pathname to a pattern. 53 */ 54 55 /* 56 * Some notes on multibyte character support: 57 * 1. Patterns with illegal byte sequences match nothing. 58 * 2. Illegal byte sequences in the "string" argument are handled by treating 59 * them as single-byte characters with a value of the first byte of the 60 * sequence cast to wchar_t. 61 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 62 * used for most, but not all, conversions. Further work will be required 63 * to support state-dependent encodings. 64 */ 65 66 #include <schily/mconfig.h> 67 #include <schily/fnmatch.h> 68 #include <schily/limits.h> 69 #include <schily/string.h> 70 #include <schily/wchar.h> 71 #include <schily/wctype.h> 72 #include <schily/libport.h> /* Define missing prototypes */ 73 74 #define EOS '\0' 75 76 #define RANGE_MATCH 1 77 #define RANGE_NOMATCH 0 78 #define RANGE_ERROR (-1) 79 80 #define CL_SIZE 32 /* Max size for '[: :]' */ 81 82 static int rangematch __PR((const char *, wchar_t, int, char **, mbstate_t *)); 83 static int fnmatch1 __PR((const char *, const char *, const char *, int, 84 mbstate_t, mbstate_t)); 85 86 #ifndef HAVE_FNMATCH 87 #undef fnmatch 88 89 /* 90 * The Cygwin compile environment incorrectly implements #pragma weak. 91 * The weak symbols are only defined as local symbols making it impossible 92 * to use them from outside the scope of this source file. 93 * A platform that allows linking with global symbols has HAVE_LINK_WEAK 94 * defined. 95 */ 96 #if defined(HAVE_PRAGMA_WEAK) && defined(HAVE_LINK_WEAK) 97 #pragma weak fnmatch = js_fnmatch 98 #else 99 int 100 fnmatch(pattern, string, flags) 101 const char *pattern; 102 const char *string; 103 int flags; 104 { 105 return (js_fnmatch(pattern, string, flags)); 106 } 107 #endif 108 #endif 109 110 int 111 js_fnmatch(pattern, string, flags) 112 const char *pattern; 113 const char *string; 114 int flags; 115 { 116 /* 117 * SunPro C gives a warning if we do not initialize an object: 118 * static const mbstate_t initial; 119 * GCC gives a warning if we try to initialize it. 120 * As the POSIX standard forbids mbstate_t from being an array, 121 * we do not need "const", the var is always copied when used as 122 * a parapemeter for fnmatch1(); 123 */ 124 static mbstate_t initial; 125 126 return (fnmatch1(pattern, string, string, flags, initial, initial)); 127 } 128 129 static int 130 fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs) 131 const char *pattern; 132 const char *string; 133 const char *stringstart; 134 int flags; 135 mbstate_t patmbs; 136 mbstate_t strmbs; 137 { 138 const char *bt_pattern, *bt_string; 139 mbstate_t bt_patmbs, bt_strmbs; 140 char *newp; 141 char c; 142 wchar_t pc, sc; 143 size_t pclen, sclen; 144 145 bt_pattern = bt_string = NULL; 146 for (;;) { 147 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 148 if (pclen == (size_t)-1 || pclen == (size_t)-2) 149 return (FNM_NOMATCH); 150 pattern += pclen; 151 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 152 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 153 sc = (unsigned char)*string; 154 sclen = 1; 155 memset(&strmbs, 0, sizeof (strmbs)); 156 } 157 switch (pc) { 158 case EOS: 159 if ((flags & FNM_LEADING_DIR) && sc == '/') 160 return (0); 161 if (sc == EOS) 162 return (0); 163 goto backtrack; 164 case '?': 165 if (sc == EOS) 166 return (FNM_NOMATCH); 167 if (sc == '/' && (flags & FNM_PATHNAME)) 168 goto backtrack; 169 if (sc == '.' && (flags & FNM_PERIOD) && 170 (string == stringstart || 171 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 172 goto backtrack; 173 string += sclen; 174 break; 175 case '*': 176 c = *pattern; 177 /* Collapse multiple stars. */ 178 while (c == '*') 179 c = *++pattern; 180 181 if (sc == '.' && (flags & FNM_PERIOD) && 182 (string == stringstart || 183 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 184 goto backtrack; 185 186 /* Optimize for pattern with * at end or before /. */ 187 if (c == EOS) { 188 if (flags & FNM_PATHNAME) 189 return ((flags & FNM_LEADING_DIR) || 190 strchr(string, '/') == NULL ? 191 0 : FNM_NOMATCH); 192 else 193 return (0); 194 } else if (c == '/' && flags & FNM_PATHNAME) { 195 if ((string = strchr(string, '/')) == NULL) 196 return (FNM_NOMATCH); 197 break; 198 } 199 200 /* 201 * First try the shortest match for the '*' that 202 * could work. We can forget any earlier '*' since 203 * there is no way having it match more characters 204 * can help us, given that we are already here. 205 */ 206 bt_pattern = pattern, bt_patmbs = patmbs; 207 bt_string = string, bt_strmbs = strmbs; 208 break; 209 case '[': 210 if (sc == EOS) 211 return (FNM_NOMATCH); 212 if (sc == '/' && (flags & FNM_PATHNAME)) 213 goto backtrack; 214 if (sc == '.' && (flags & FNM_PERIOD) && 215 (string == stringstart || 216 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 217 goto backtrack; 218 219 switch (rangematch(pattern, sc, flags, &newp, 220 &patmbs)) { 221 case RANGE_ERROR: 222 goto norm; 223 case RANGE_MATCH: 224 pattern = newp; 225 break; 226 case RANGE_NOMATCH: 227 goto backtrack; 228 } 229 string += sclen; 230 break; 231 case '\\': 232 if (!(flags & FNM_NOESCAPE)) { 233 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 234 &patmbs); 235 if (pclen == (size_t)-1 || pclen == (size_t)-2) 236 return (FNM_NOMATCH); 237 if (pclen == 0) 238 return (FNM_NOMATCH); 239 pattern += pclen; 240 } 241 /* FALLTHROUGH */ 242 default: 243 norm: 244 string += sclen; 245 if (pc == sc) { 246 ; 247 } else if ((flags & FNM_CASEFOLD) && 248 (towlower(pc) == towlower(sc))) { 249 ; 250 } else { 251 backtrack: 252 /* 253 * If we have a mismatch (other than hitting 254 * the end of the string), go back to the last 255 * '*' seen and have it match one additional 256 * character. 257 */ 258 if (bt_pattern == NULL) 259 return (FNM_NOMATCH); 260 sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX, 261 &bt_strmbs); 262 if (sclen == (size_t)-1 || 263 sclen == (size_t)-2) { 264 sc = (unsigned char)*bt_string; 265 sclen = 1; 266 memset(&bt_strmbs, 0, 267 sizeof (bt_strmbs)); 268 } 269 if (sc == EOS) 270 return (FNM_NOMATCH); 271 if (sc == '/' && flags & FNM_PATHNAME) 272 return (FNM_NOMATCH); 273 bt_string += sclen; 274 pattern = bt_pattern, patmbs = bt_patmbs; 275 string = bt_string, strmbs = bt_strmbs; 276 } 277 break; 278 } 279 } 280 /* NOTREACHED */ 281 } 282 283 #ifdef PROTOTYPES 284 static int 285 rangematch(const char *pattern, wchar_t test, int flags, char **newp, 286 mbstate_t *patmbs) 287 #else 288 static int 289 rangematch(pattern, test, flags, newp, patmbs) 290 const char *pattern; 291 wchar_t test; 292 int flags; 293 char **newp; 294 mbstate_t *patmbs; 295 #endif 296 { 297 int negate, ok; 298 wchar_t c, c2; 299 wchar_t otest = test; 300 size_t pclen; 301 const char *origpat; 302 #ifdef XXX_COLLATE 303 struct xlocale_collate *table = (struct xlocale_collate *) 304 __get_locale()->components[XLC_COLLATE]; 305 #endif 306 307 /* 308 * A bracket expression starting with an unquoted circumflex 309 * character produces unspecified results (IEEE 1003.2-1992, 310 * 3.13.2). This implementation treats it like '!', for 311 * consistency with the regular expression syntax. 312 * J.T. Conklin (conklin@ngai.kaleida.com) 313 */ 314 if ((negate = (*pattern == '!' || *pattern == '^'))) 315 ++pattern; 316 317 if (flags & FNM_CASEFOLD) 318 test = towlower(test); 319 320 /* 321 * A right bracket shall lose its special meaning and represent 322 * itself in a bracket expression if it occurs first in the list. 323 * -- POSIX.2 2.8.3.2 324 */ 325 ok = 0; 326 origpat = pattern; 327 for (;;) { 328 int quoted = 0; 329 330 if (*pattern == ']' && pattern > origpat) { 331 pattern++; 332 break; 333 } else if (*pattern == '\0') { 334 return (RANGE_ERROR); 335 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 336 return (RANGE_NOMATCH); 337 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) { 338 pattern++; 339 quoted++; 340 } 341 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 342 if (pclen == (size_t)-1 || pclen == (size_t)-2) 343 return (RANGE_NOMATCH); 344 pattern += pclen; 345 346 if (!quoted && c == '[') { 347 if (pattern[0] == ':') { 348 char class[CL_SIZE+1]; 349 char *pc = class; 350 const char *p; 351 352 p = pattern + 1; /* Eat ':' */ 353 for (;;) { 354 if (*p == '\0') 355 return (RANGE_ERROR); 356 if (*p == ':' && p[1] == ']') 357 break; 358 if (pc >= &class[CL_SIZE]) 359 return (RANGE_ERROR); 360 *pc++ = *p++; 361 } 362 if (pc == class) 363 return (RANGE_ERROR); 364 *pc = '\0'; 365 pattern = p + 2; /* Skip ":]" */ 366 if (iswctype(otest, wctype(class))) { 367 ok = 1; 368 } else if (flags & FNM_CASEFOLD) { 369 /* 370 * Convert to the other case 371 */ 372 if (strcmp(class, "upper") == 0) 373 if (iswctype(otest, 374 wctype("lower"))) 375 ok = 1; 376 else if (strcmp(class, "lower") == 0) 377 if (iswctype(otest, 378 wctype("upper"))) 379 ok = 1; 380 } 381 continue; 382 } 383 } 384 385 if (flags & FNM_CASEFOLD) 386 c = towlower(c); 387 388 if (*pattern == '-' && *(pattern + 1) != EOS && 389 *(pattern + 1) != ']') { 390 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 391 if (*pattern != EOS) 392 pattern++; 393 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 394 if (pclen == (size_t)-1 || pclen == (size_t)-2) 395 return (RANGE_NOMATCH); 396 pattern += pclen; 397 if (c2 == EOS) 398 return (RANGE_ERROR); 399 400 if (flags & FNM_CASEFOLD) 401 c2 = towlower(c2); 402 403 #ifdef XXX_COLLATE 404 if (table->__collate_load_error ? 405 c <= test && test <= c2 : 406 __wcollate_range_cmp(c, test) <= 0 && 407 __wcollate_range_cmp(test, c2) <= 0) 408 ok = 1; 409 #else 410 if (c <= test && test <= c2) 411 ok = 1; 412 #endif 413 } else if (c == test) 414 ok = 1; 415 } 416 417 *newp = (char *)pattern; 418 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 419 } 420