1 /* @(#)fnmatch.c 8.24 17/08/30 2005-2017 J. Schilling from 8.2 (Berkeley) */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static UConst char sccsid[] =
5 "@(#)fnmatch.c 8.24 17/08/30 2005-2017 J. Schilling from 8.2 (Berkeley)";
6 #endif
7 /*
8 * Copyright (c) 1989, 1993, 1994
9 * The Regents of the University of California. All rights reserved.
10 *
11 * This code is derived from software contributed to Berkeley by
12 * Guido van Rossum.
13 *
14 * Copyright (c) 2005-2017 J. Schilling
15 * Copyright (c) 2011 The FreeBSD Foundation
16 * All rights reserved.
17 * Portions of this software were developed by David Chisnall
18 * under sponsorship from the FreeBSD Foundation.
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 * 1. Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 * notice, this list of conditions and the following disclaimer in the
27 * documentation and/or other materials provided with the distribution.
28 * 3. Neither the name of the University nor the names of its contributors
29 * may be used to endorse or promote products derived from this software
30 * without specific prior written permission.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * SUCH DAMAGE.
43 */
44
45 #if defined(LIBC_SCCS) && !defined(lint)
46 static UConst char sccsid[] = "@(#)fnmatch.c 8.24 (Berkeley) 08/30/17";
47 #endif /* LIBC_SCCS and not lint */
48 /* "FBSD src/lib/libc/gen/fnmatch.c,v 1.19 2010/04/16 22:29:24 jilles Exp $" */
49
50 /*
51 * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
52 * Compares a filename or pathname to a pattern.
53 */
54
55 /*
56 * Some notes on multibyte character support:
57 * 1. Patterns with illegal byte sequences match nothing.
58 * 2. Illegal byte sequences in the "string" argument are handled by treating
59 * them as single-byte characters with a value of the first byte of the
60 * sequence cast to wchar_t.
61 * 3. Multibyte conversion state objects (mbstate_t) are passed around and
62 * used for most, but not all, conversions. Further work will be required
63 * to support state-dependent encodings.
64 */
65
66 #include <schily/mconfig.h>
67 #include <schily/fnmatch.h>
68 #include <schily/limits.h>
69 #include <schily/string.h>
70 #include <schily/wchar.h>
71 #include <schily/wctype.h>
72 #include <schily/libport.h> /* Define missing prototypes */
73
74 #define EOS '\0'
75
76 #define RANGE_MATCH 1
77 #define RANGE_NOMATCH 0
78 #define RANGE_ERROR (-1)
79
80 #define CL_SIZE 32 /* Max size for '[: :]' */
81
82 static int rangematch __PR((const char *, wchar_t, int, char **, mbstate_t *));
83 static int fnmatch1 __PR((const char *, const char *, const char *, int,
84 mbstate_t, mbstate_t));
85
86 #ifndef HAVE_FNMATCH
87 #undef fnmatch
88
89 /*
90 * The Cygwin compile environment incorrectly implements #pragma weak.
91 * The weak symbols are only defined as local symbols making it impossible
92 * to use them from outside the scope of this source file.
93 * A platform that allows linking with global symbols has HAVE_LINK_WEAK
94 * defined.
95 */
96 #if defined(HAVE_PRAGMA_WEAK) && defined(HAVE_LINK_WEAK)
97 #pragma weak fnmatch = js_fnmatch
98 #else
99 int
fnmatch(pattern,string,flags)100 fnmatch(pattern, string, flags)
101 const char *pattern;
102 const char *string;
103 int flags;
104 {
105 return (js_fnmatch(pattern, string, flags));
106 }
107 #endif
108 #endif
109
110 int
js_fnmatch(pattern,string,flags)111 js_fnmatch(pattern, string, flags)
112 const char *pattern;
113 const char *string;
114 int flags;
115 {
116 /*
117 * SunPro C gives a warning if we do not initialize an object:
118 * static const mbstate_t initial;
119 * GCC gives a warning if we try to initialize it.
120 * As the POSIX standard forbids mbstate_t from being an array,
121 * we do not need "const", the var is always copied when used as
122 * a parapemeter for fnmatch1();
123 */
124 static mbstate_t initial;
125
126 return (fnmatch1(pattern, string, string, flags, initial, initial));
127 }
128
129 static int
fnmatch1(pattern,string,stringstart,flags,patmbs,strmbs)130 fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs)
131 const char *pattern;
132 const char *string;
133 const char *stringstart;
134 int flags;
135 mbstate_t patmbs;
136 mbstate_t strmbs;
137 {
138 const char *bt_pattern, *bt_string;
139 mbstate_t bt_patmbs, bt_strmbs;
140 char *newp;
141 char c;
142 wchar_t pc, sc;
143 size_t pclen, sclen;
144
145 bt_pattern = bt_string = NULL;
146 for (;;) {
147 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
148 if (pclen == (size_t)-1 || pclen == (size_t)-2)
149 return (FNM_NOMATCH);
150 pattern += pclen;
151 sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
152 if (sclen == (size_t)-1 || sclen == (size_t)-2) {
153 sc = (unsigned char)*string;
154 sclen = 1;
155 memset(&strmbs, 0, sizeof (strmbs));
156 }
157 switch (pc) {
158 case EOS:
159 if ((flags & FNM_LEADING_DIR) && sc == '/')
160 return (0);
161 if (sc == EOS)
162 return (0);
163 goto backtrack;
164 case '?':
165 if (sc == EOS)
166 return (FNM_NOMATCH);
167 if (sc == '/' && (flags & FNM_PATHNAME))
168 goto backtrack;
169 if (sc == '.' && (flags & FNM_PERIOD) &&
170 (string == stringstart ||
171 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
172 goto backtrack;
173 string += sclen;
174 break;
175 case '*':
176 c = *pattern;
177 /* Collapse multiple stars. */
178 while (c == '*')
179 c = *++pattern;
180
181 if (sc == '.' && (flags & FNM_PERIOD) &&
182 (string == stringstart ||
183 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
184 goto backtrack;
185
186 /* Optimize for pattern with * at end or before /. */
187 if (c == EOS) {
188 if (flags & FNM_PATHNAME)
189 return ((flags & FNM_LEADING_DIR) ||
190 strchr(string, '/') == NULL ?
191 0 : FNM_NOMATCH);
192 else
193 return (0);
194 } else if (c == '/' && flags & FNM_PATHNAME) {
195 if ((string = strchr(string, '/')) == NULL)
196 return (FNM_NOMATCH);
197 break;
198 }
199
200 /*
201 * First try the shortest match for the '*' that
202 * could work. We can forget any earlier '*' since
203 * there is no way having it match more characters
204 * can help us, given that we are already here.
205 */
206 bt_pattern = pattern, bt_patmbs = patmbs;
207 bt_string = string, bt_strmbs = strmbs;
208 break;
209 case '[':
210 if (sc == EOS)
211 return (FNM_NOMATCH);
212 if (sc == '/' && (flags & FNM_PATHNAME))
213 goto backtrack;
214 if (sc == '.' && (flags & FNM_PERIOD) &&
215 (string == stringstart ||
216 ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
217 goto backtrack;
218
219 switch (rangematch(pattern, sc, flags, &newp,
220 &patmbs)) {
221 case RANGE_ERROR:
222 goto norm;
223 case RANGE_MATCH:
224 pattern = newp;
225 break;
226 case RANGE_NOMATCH:
227 goto backtrack;
228 }
229 string += sclen;
230 break;
231 case '\\':
232 if (!(flags & FNM_NOESCAPE)) {
233 pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
234 &patmbs);
235 if (pclen == (size_t)-1 || pclen == (size_t)-2)
236 return (FNM_NOMATCH);
237 if (pclen == 0)
238 return (FNM_NOMATCH);
239 pattern += pclen;
240 }
241 /* FALLTHROUGH */
242 default:
243 norm:
244 string += sclen;
245 if (pc == sc) {
246 ;
247 } else if ((flags & FNM_CASEFOLD) &&
248 (towlower(pc) == towlower(sc))) {
249 ;
250 } else {
251 backtrack:
252 /*
253 * If we have a mismatch (other than hitting
254 * the end of the string), go back to the last
255 * '*' seen and have it match one additional
256 * character.
257 */
258 if (bt_pattern == NULL)
259 return (FNM_NOMATCH);
260 sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
261 &bt_strmbs);
262 if (sclen == (size_t)-1 ||
263 sclen == (size_t)-2) {
264 sc = (unsigned char)*bt_string;
265 sclen = 1;
266 memset(&bt_strmbs, 0,
267 sizeof (bt_strmbs));
268 }
269 if (sc == EOS)
270 return (FNM_NOMATCH);
271 if (sc == '/' && flags & FNM_PATHNAME)
272 return (FNM_NOMATCH);
273 bt_string += sclen;
274 pattern = bt_pattern, patmbs = bt_patmbs;
275 string = bt_string, strmbs = bt_strmbs;
276 }
277 break;
278 }
279 }
280 /* NOTREACHED */
281 }
282
283 #ifdef PROTOTYPES
284 static int
rangematch(const char * pattern,wchar_t test,int flags,char ** newp,mbstate_t * patmbs)285 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
286 mbstate_t *patmbs)
287 #else
288 static int
289 rangematch(pattern, test, flags, newp, patmbs)
290 const char *pattern;
291 wchar_t test;
292 int flags;
293 char **newp;
294 mbstate_t *patmbs;
295 #endif
296 {
297 int negate, ok;
298 wchar_t c, c2;
299 wchar_t otest = test;
300 size_t pclen;
301 const char *origpat;
302 #ifdef XXX_COLLATE
303 struct xlocale_collate *table = (struct xlocale_collate *)
304 __get_locale()->components[XLC_COLLATE];
305 #endif
306
307 /*
308 * A bracket expression starting with an unquoted circumflex
309 * character produces unspecified results (IEEE 1003.2-1992,
310 * 3.13.2). This implementation treats it like '!', for
311 * consistency with the regular expression syntax.
312 * J.T. Conklin (conklin@ngai.kaleida.com)
313 */
314 if ((negate = (*pattern == '!' || *pattern == '^')))
315 ++pattern;
316
317 if (flags & FNM_CASEFOLD)
318 test = towlower(test);
319
320 /*
321 * A right bracket shall lose its special meaning and represent
322 * itself in a bracket expression if it occurs first in the list.
323 * -- POSIX.2 2.8.3.2
324 */
325 ok = 0;
326 origpat = pattern;
327 for (;;) {
328 int quoted = 0;
329
330 if (*pattern == ']' && pattern > origpat) {
331 pattern++;
332 break;
333 } else if (*pattern == '\0') {
334 return (RANGE_ERROR);
335 } else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
336 return (RANGE_NOMATCH);
337 } else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) {
338 pattern++;
339 quoted++;
340 }
341 pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
342 if (pclen == (size_t)-1 || pclen == (size_t)-2)
343 return (RANGE_NOMATCH);
344 pattern += pclen;
345
346 if (!quoted && c == '[') {
347 if (pattern[0] == ':') {
348 char class[CL_SIZE+1];
349 char *pc = class;
350 const char *p;
351
352 p = pattern + 1; /* Eat ':' */
353 for (;;) {
354 if (*p == '\0')
355 return (RANGE_ERROR);
356 if (*p == ':' && p[1] == ']')
357 break;
358 if (pc >= &class[CL_SIZE])
359 return (RANGE_ERROR);
360 *pc++ = *p++;
361 }
362 if (pc == class)
363 return (RANGE_ERROR);
364 *pc = '\0';
365 pattern = p + 2; /* Skip ":]" */
366 if (iswctype(otest, wctype(class))) {
367 ok = 1;
368 } else if (flags & FNM_CASEFOLD) {
369 /*
370 * Convert to the other case
371 */
372 if (strcmp(class, "upper") == 0)
373 if (iswctype(otest,
374 wctype("lower")))
375 ok = 1;
376 else if (strcmp(class, "lower") == 0)
377 if (iswctype(otest,
378 wctype("upper")))
379 ok = 1;
380 }
381 continue;
382 }
383 }
384
385 if (flags & FNM_CASEFOLD)
386 c = towlower(c);
387
388 if (*pattern == '-' && *(pattern + 1) != EOS &&
389 *(pattern + 1) != ']') {
390 if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
391 if (*pattern != EOS)
392 pattern++;
393 pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
394 if (pclen == (size_t)-1 || pclen == (size_t)-2)
395 return (RANGE_NOMATCH);
396 pattern += pclen;
397 if (c2 == EOS)
398 return (RANGE_ERROR);
399
400 if (flags & FNM_CASEFOLD)
401 c2 = towlower(c2);
402
403 #ifdef XXX_COLLATE
404 if (table->__collate_load_error ?
405 c <= test && test <= c2 :
406 __wcollate_range_cmp(c, test) <= 0 &&
407 __wcollate_range_cmp(test, c2) <= 0)
408 ok = 1;
409 #else
410 if (c <= test && test <= c2)
411 ok = 1;
412 #endif
413 } else if (c == test)
414 ok = 1;
415 }
416
417 *newp = (char *)pattern;
418 return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
419 }
420