1 /* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)fnmatch.c 8.2 (Berkeley) 4/16/94 33 * $FreeBSD: src/lib/libc/gen/fnmatch.c,v 1.18 2007/01/09 00:27:53 imp Exp $ 34 * $DragonFly: src/lib/libc/gen/fnmatch.c,v 1.6 2005/11/13 00:07:42 swildner Exp $ 35 */ 36 37 /* 38 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6. 39 * Compares a filename or pathname to a pattern. 40 */ 41 42 /* 43 * Some notes on multibyte character support: 44 * 1. Patterns with illegal byte sequences match nothing. 45 * 2. Illegal byte sequences in the "string" argument are handled by treating 46 * them as single-byte characters with a value of the first byte of the 47 * sequence cast to wchar_t. 48 * 3. Multibyte conversion state objects (mbstate_t) are passed around and 49 * used for most, but not all, conversions. Further work will be required 50 * to support state-dependent encodings. 51 */ 52 53 #include <fnmatch.h> 54 #include <limits.h> 55 #include <string.h> 56 #include <wchar.h> 57 #include <wctype.h> 58 59 #include "collate.h" 60 61 #define EOS '\0' 62 63 #define RANGE_MATCH 1 64 #define RANGE_NOMATCH 0 65 #define RANGE_ERROR (-1) 66 67 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *); 68 static int fnmatch1(const char *, const char *, int, mbstate_t, mbstate_t); 69 70 int 71 fnmatch(const char *pattern, const char *string, int flags) 72 { 73 static const mbstate_t initial; 74 75 return (fnmatch1(pattern, string, flags, initial, initial)); 76 } 77 78 static int 79 fnmatch1(const char *pattern, const char *string, int flags, mbstate_t patmbs, 80 mbstate_t strmbs) 81 { 82 const char *stringstart; 83 char *newp; 84 char c; 85 wchar_t pc, sc; 86 size_t pclen, sclen; 87 88 for (stringstart = string;;) { 89 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs); 90 if (pclen == (size_t)-1 || pclen == (size_t)-2) 91 return (FNM_NOMATCH); 92 pattern += pclen; 93 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs); 94 if (sclen == (size_t)-1 || sclen == (size_t)-2) { 95 sc = (unsigned char)*string; 96 sclen = 1; 97 memset(&strmbs, 0, sizeof(strmbs)); 98 } 99 switch (pc) { 100 case EOS: 101 if ((flags & FNM_LEADING_DIR) && sc == '/') 102 return (0); 103 return (sc == EOS ? 0 : FNM_NOMATCH); 104 case '?': 105 if (sc == EOS) 106 return (FNM_NOMATCH); 107 if (sc == '/' && (flags & FNM_PATHNAME)) 108 return (FNM_NOMATCH); 109 if (sc == '.' && (flags & FNM_PERIOD) && 110 (string == stringstart || 111 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 112 return (FNM_NOMATCH); 113 string += sclen; 114 break; 115 case '*': 116 c = *pattern; 117 /* Collapse multiple stars. */ 118 while (c == '*') 119 c = *++pattern; 120 121 if (sc == '.' && (flags & FNM_PERIOD) && 122 (string == stringstart || 123 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 124 return (FNM_NOMATCH); 125 126 /* Optimize for pattern with * at end or before /. */ 127 if (c == EOS) 128 if (flags & FNM_PATHNAME) 129 return ((flags & FNM_LEADING_DIR) || 130 strchr(string, '/') == NULL ? 131 0 : FNM_NOMATCH); 132 else 133 return (0); 134 else if (c == '/' && flags & FNM_PATHNAME) { 135 if ((string = strchr(string, '/')) == NULL) 136 return (FNM_NOMATCH); 137 break; 138 } 139 140 /* General case, use recursion. */ 141 while (sc != EOS) { 142 if (!fnmatch1(pattern, string, 143 flags & ~FNM_PERIOD, patmbs, strmbs)) 144 return (0); 145 sclen = mbrtowc(&sc, string, MB_LEN_MAX, 146 &strmbs); 147 if (sclen == (size_t)-1 || 148 sclen == (size_t)-2) { 149 sc = (unsigned char)*string; 150 sclen = 1; 151 memset(&strmbs, 0, sizeof(strmbs)); 152 } 153 if (sc == '/' && flags & FNM_PATHNAME) 154 break; 155 string += sclen; 156 } 157 return (FNM_NOMATCH); 158 case '[': 159 if (sc == EOS) 160 return (FNM_NOMATCH); 161 if (sc == '/' && (flags & FNM_PATHNAME)) 162 return (FNM_NOMATCH); 163 if (sc == '.' && (flags & FNM_PERIOD) && 164 (string == stringstart || 165 ((flags & FNM_PATHNAME) && *(string - 1) == '/'))) 166 return (FNM_NOMATCH); 167 168 switch (rangematch(pattern, sc, flags, &newp, 169 &patmbs)) { 170 case RANGE_ERROR: 171 goto norm; 172 case RANGE_MATCH: 173 pattern = newp; 174 break; 175 case RANGE_NOMATCH: 176 return (FNM_NOMATCH); 177 } 178 string += sclen; 179 break; 180 case '\\': 181 if (!(flags & FNM_NOESCAPE)) { 182 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, 183 &patmbs); 184 if (pclen == (size_t)-1 || pclen == (size_t)-2) 185 return (FNM_NOMATCH); 186 if (pclen == 0) 187 pc = '\\'; 188 pattern += pclen; 189 } 190 /* FALLTHROUGH */ 191 default: 192 norm: 193 if (pc == sc) 194 ; 195 else if ((flags & FNM_CASEFOLD) && 196 (towlower(pc) == towlower(sc))) 197 ; 198 else 199 return (FNM_NOMATCH); 200 string += sclen; 201 break; 202 } 203 } 204 /* NOTREACHED */ 205 } 206 207 static int 208 rangematch(const char *pattern, wchar_t test, int flags, char **newp, 209 mbstate_t *patmbs) 210 { 211 int negate, ok; 212 wchar_t c, c2; 213 size_t pclen; 214 const char *origpat; 215 216 /* 217 * A bracket expression starting with an unquoted circumflex 218 * character produces unspecified results (IEEE 1003.2-1992, 219 * 3.13.2). This implementation treats it like '!', for 220 * consistency with the regular expression syntax. 221 * J.T. Conklin (conklin@ngai.kaleida.com) 222 */ 223 if ((negate = (*pattern == '!' || *pattern == '^'))) 224 ++pattern; 225 226 if (flags & FNM_CASEFOLD) 227 test = towlower(test); 228 229 /* 230 * A right bracket shall lose its special meaning and represent 231 * itself in a bracket expression if it occurs first in the list. 232 * -- POSIX.2 2.8.3.2 233 */ 234 ok = 0; 235 origpat = pattern; 236 for (;;) { 237 if (*pattern == ']' && pattern > origpat) { 238 pattern++; 239 break; 240 } else if (*pattern == '\0') { 241 return (RANGE_ERROR); 242 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) { 243 return (RANGE_NOMATCH); 244 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) { 245 pattern++; 246 } 247 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs); 248 if (pclen == (size_t)-1 || pclen == (size_t)-2) 249 return (RANGE_NOMATCH); 250 pattern += pclen; 251 252 if (flags & FNM_CASEFOLD) 253 c = towlower(c); 254 255 if (*pattern == '-' && *(pattern + 1) != EOS && 256 *(pattern + 1) != ']') { 257 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE)) { 258 if (*pattern != EOS) 259 pattern++; 260 } 261 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs); 262 if (pclen == (size_t)-1 || pclen == (size_t)-2) 263 return (RANGE_NOMATCH); 264 pattern += pclen; 265 if (c2 == EOS) 266 return (RANGE_ERROR); 267 268 if (flags & FNM_CASEFOLD) 269 c2 = towlower(c2); 270 271 if (__collate_load_error ? 272 c <= test && test <= c2 : 273 __collate_range_cmp(c, test) <= 0 274 && __collate_range_cmp(test, c2) <= 0 275 ) 276 ok = 1; 277 } else if (c == test) { 278 ok = 1; 279 } 280 } 281 282 *newp = __DECONST(char *, pattern); 283 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH); 284 } 285