1 /* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Copyright (c) 2011 The FreeBSD Foundation 9 * All rights reserved. 10 * Portions of this software were developed by David Chisnall 11 * under sponsorship from the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)fnmatch.c 8.2 (Berkeley) 4/16/94 38 * $FreeBSD: head/lib/libc/gen/fnmatch.c 254091 2013-08-08 09:04:02Z ache $ 39 */ 40 41 42 /* 43 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 44 * Compares a filename or pathname to a pattern. 45 */ 46 47 /* 48 * Some notes on multibyte character support: 49 * 1. Patterns with illegal byte sequences match nothing. 50 * 2. Illegal byte sequences in the "string" argument are handled by treating 51 * them as single-byte characters with a value of the first byte of the 52 * sequence cast to wchar_t. 53 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 54 * used for most, but not all, conversions. Further work will be required 55 * to support state-dependent encodings. 56 */ 57 58 #include <fnmatch.h> 59 #include <limits.h> 60 #include <string.h> 61 #include <wchar.h> 62 #include <wctype.h> 63 64 #include "collate.h" 65 66 #define EOS '\0' 67 68 #define RANGE_MATCH 1 69 #define RANGE_NOMATCH 0 70 #define RANGE_ERROR (-1) 71 72 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); 73 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t, 74 mbstate_t); 75 76 int 77 fnmatch(const char *pattern, const char *string, int flags) 78 { 79 static const mbstate_t initial; 80 81 return (fnmatch1(pattern, string, string, flags, initial, initial)); 82 } 83 84 static int 85 fnmatch1(const char *pattern, const char *string, const char *stringstart, 86 int flags, mbstate_t patmbs, mbstate_t strmbs) 87 { 88 char *newp; 89 char c; 90 wchar_t pc, sc; 91 size_t pclen, sclen; 92 93 for (;;) { 94 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 95 if (pclen == (size_t)-1 || pclen == (size_t)-2) 96 return (FNM_NOMATCH); 97 pattern += pclen; 98 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 99 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 100 sc = (unsigned char)*string; 101 sclen = 1; 102 memset(&strmbs, 0, sizeof(strmbs)); 103 } 104 switch (pc) { 105 case EOS: 106 if ((flags & FNM_LEADING_DIR) && sc == '/') 107 return (0); 108 return (sc == EOS ? 0 : FNM_NOMATCH); 109 case '?': 110 if (sc == EOS) 111 return (FNM_NOMATCH); 112 if (sc == '/' && (flags & FNM_PATHNAME)) 113 return (FNM_NOMATCH); 114 if (sc == '.' && (flags & FNM_PERIOD) && 115 (string == stringstart || 116 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 117 return (FNM_NOMATCH); 118 string += sclen; 119 break; 120 case '*': 121 c = *pattern; 122 /* Collapse multiple stars. */ 123 while (c == '*') 124 c = *++pattern; 125 126 if (sc == '.' && (flags & FNM_PERIOD) && 127 (string == stringstart || 128 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 129 return (FNM_NOMATCH); 130 131 /* Optimize for pattern with * at end or before /. */ 132 if (c == EOS) 133 if (flags & FNM_PATHNAME) 134 return ((flags & FNM_LEADING_DIR) || 135 strchr(string, '/') == NULL ? 136 0 : FNM_NOMATCH); 137 else 138 return (0); 139 else if (c == '/' && flags & FNM_PATHNAME) { 140 if ((string = strchr(string, '/')) == NULL) 141 return (FNM_NOMATCH); 142 break; 143 } 144 145 /* General case, use recursion. */ 146 while (sc != EOS) { 147 if (!fnmatch1(pattern, string, stringstart, 148 flags, patmbs, strmbs)) 149 return (0); 150 sclen = mbrtowc(&sc, string, MB_LEN_MAX, 151 &strmbs); 152 if (sclen == (size_t)-1 || 153 sclen == (size_t)-2) { 154 sc = (unsigned char)*string; 155 sclen = 1; 156 memset(&strmbs, 0, sizeof(strmbs)); 157 } 158 if (sc == '/' && flags & FNM_PATHNAME) 159 break; 160 string += sclen; 161 } 162 return (FNM_NOMATCH); 163 case '[': 164 if (sc == EOS) 165 return (FNM_NOMATCH); 166 if (sc == '/' && (flags & FNM_PATHNAME)) 167 return (FNM_NOMATCH); 168 if (sc == '.' && (flags & FNM_PERIOD) && 169 (string == stringstart || 170 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 171 return (FNM_NOMATCH); 172 173 switch (rangematch(pattern, sc, flags, &newp, 174 &patmbs)) { 175 case RANGE_ERROR: 176 goto norm; 177 case RANGE_MATCH: 178 pattern = newp; 179 break; 180 case RANGE_NOMATCH: 181 return (FNM_NOMATCH); 182 } 183 string += sclen; 184 break; 185 case '\\': 186 if (!(flags & FNM_NOESCAPE)) { 187 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 188 &patmbs); 189 if (pclen == (size_t)-1 || pclen == (size_t)-2) 190 return (FNM_NOMATCH); 191 pattern += pclen; 192 } 193 /* FALLTHROUGH */ 194 default: 195 norm: 196 if (pc == sc) 197 ; 198 else if ((flags & FNM_CASEFOLD) && 199 (towlower(pc) == towlower(sc))) 200 ; 201 else 202 return (FNM_NOMATCH); 203 string += sclen; 204 break; 205 } 206 } 207 /* NOTREACHED */ 208 } 209 210 static int 211 rangematch(const char *pattern, wchar_t test, int flags, char **newp, 212 mbstate_t *patmbs) 213 { 214 int negate, ok; 215 wchar_t c, c2; 216 size_t pclen; 217 const char *origpat; 218 struct xlocale_collate *table = 219 (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; 220 221 /* 222 * A bracket expression starting with an unquoted circumflex 223 * character produces unspecified results (IEEE 1003.2-1992, 224 * 3.13.2). This implementation treats it like '!', for 225 * consistency with the regular expression syntax. 226 * J.T. Conklin (conklin@ngai.kaleida.com) 227 */ 228 if ( (negate = (*pattern == '!' || *pattern == '^')) ) 229 ++pattern; 230 231 if (flags & FNM_CASEFOLD) 232 test = towlower(test); 233 234 /* 235 * A right bracket shall lose its special meaning and represent 236 * itself in a bracket expression if it occurs first in the list. 237 * -- POSIX.2 2.8.3.2 238 */ 239 ok = 0; 240 origpat = pattern; 241 for (;;) { 242 if (*pattern == ']' && pattern > origpat) { 243 pattern++; 244 break; 245 } else if (*pattern == '\0') { 246 return (RANGE_ERROR); 247 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 248 return (RANGE_NOMATCH); 249 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) 250 pattern++; 251 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 252 if (pclen == (size_t)-1 || pclen == (size_t)-2) 253 return (RANGE_NOMATCH); 254 pattern += pclen; 255 256 if (flags & FNM_CASEFOLD) 257 c = towlower(c); 258 259 if (*pattern == '-' && *(pattern + 1) != EOS && 260 *(pattern + 1) != ']') { 261 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) 262 if (*pattern != EOS) 263 pattern++; 264 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 265 if (pclen == (size_t)-1 || pclen == (size_t)-2) 266 return (RANGE_NOMATCH); 267 pattern += pclen; 268 if (c2 == EOS) 269 return (RANGE_ERROR); 270 271 if (flags & FNM_CASEFOLD) 272 c2 = towlower(c2); 273 274 if (table->__collate_load_error ? 275 c <= test && test <= c2 : 276 __collate_range_cmp(table, c, test) <= 0 277 && __collate_range_cmp(table, test, c2) <= 0 278 ) 279 ok = 1; 280 } else if (c == test) 281 ok = 1; 282 } 283 284 *newp = (char *)pattern; 285 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 286 } 287