1 /* @(#)fnmatch.c	8.25 18/01/12 2005-2018 J. Schilling from 8.2 (Berkeley) */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)fnmatch.c	8.25 18/01/12 2005-2018 J. Schilling from 8.2 (Berkeley)";
6 #endif
7 /*
8  * Copyright (c) 1989, 1993, 1994
9  *	The Regents of the University of California.  All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Guido van Rossum.
13  *
14  * Copyright (c) 2005-2018 J. Schilling
15  * Copyright (c) 2011 The FreeBSD Foundation
16  * All rights reserved.
17  * Portions of this software were developed by David Chisnall
18  * under sponsorship from the FreeBSD Foundation.
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. Neither the name of the University nor the names of its contributors
29  *    may be used to endorse or promote products derived from this software
30  *    without specific prior written permission.
31  *
32  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42  * SUCH DAMAGE.
43  */
44 
45 #if defined(LIBC_SCCS) && !defined(lint)
46 static UConst char sccsid[] = "@(#)fnmatch.c	8.25 (Berkeley) 01/12/18";
47 #endif /* LIBC_SCCS and not lint */
48 /* "FBSD src/lib/libc/gen/fnmatch.c,v 1.19 2010/04/16 22:29:24 jilles Exp $" */
49 
50 /*
51  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
52  * Compares a filename or pathname to a pattern.
53  */
54 
55 /*
56  * Some notes on multibyte character support:
57  * 1. Patterns with illegal byte sequences match nothing.
58  * 2. Illegal byte sequences in the "string" argument are handled by treating
59  *    them as single-byte characters with a value of the first byte of the
60  *    sequence cast to wchar_t.
61  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
62  *    used for most, but not all, conversions. Further work will be required
63  *    to support state-dependent encodings.
64  */
65 
66 #include <schily/mconfig.h>
67 #include <schily/fnmatch.h>
68 #include <schily/limits.h>
69 #include <schily/string.h>
70 #include <schily/wchar.h>
71 #include <schily/wctype.h>
72 #include <schily/libport.h>	/* Define missing prototypes */
73 
74 #define	EOS	'\0'
75 
76 #define	RANGE_MATCH	1
77 #define	RANGE_NOMATCH	0
78 #define	RANGE_ERROR	(-1)
79 
80 #define	CL_SIZE		32	/* Max size for '[: :]'			*/
81 
82 static int rangematch __PR((const char *, wchar_t, int, char **, mbstate_t *));
83 static int fnmatch1 __PR((const char *, const char *, const char *, int,
84 				mbstate_t, mbstate_t));
85 
86 #ifndef	HAVE_FNMATCH
87 #undef	fnmatch
88 
89 /*
90  * The Cygwin compile environment incorrectly implements #pragma weak.
91  * The weak symbols are only defined as local symbols making it impossible
92  * to use them from outside the scope of this source file.
93  * A platform that allows linking with global symbols has HAVE_LINK_WEAK
94  * defined.
95  */
96 #if defined(HAVE_PRAGMA_WEAK) && defined(HAVE_LINK_WEAK)
97 #pragma	weak fnmatch =	js_fnmatch
98 #else
99 int
fnmatch(pattern,string,flags)100 fnmatch(pattern, string, flags)
101 	const char	*pattern;
102 	const char	*string;
103 	int		flags;
104 {
105 	return (js_fnmatch(pattern, string, flags));
106 }
107 #endif
108 #endif
109 
110 int
js_fnmatch(pattern,string,flags)111 js_fnmatch(pattern, string, flags)
112 	const char	*pattern;
113 	const char	*string;
114 	int		flags;
115 {
116 	/*
117 	 * SunPro C gives a warning if we do not initialize an object:
118 	 * static const mbstate_t initial;
119 	 * GCC gives a warning if we try to initialize it.
120 	 * As the POSIX standard forbids mbstate_t from being an array,
121 	 * we do not need "const", the var is always copied when used as
122 	 * a parapemeter for fnmatch1();
123 	 */
124 	static mbstate_t initial;
125 
126 	return (fnmatch1(pattern, string, string, flags, initial, initial));
127 }
128 
129 static int
fnmatch1(pattern,string,stringstart,flags,patmbs,strmbs)130 fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs)
131 	const char	*pattern;
132 	const char	*string;
133 	const char	*stringstart;
134 	int		flags;
135 	mbstate_t	patmbs;
136 	mbstate_t	strmbs;
137 {
138 	const char *bt_pattern, *bt_string;
139 	mbstate_t bt_patmbs, bt_strmbs;
140 	char *newp;
141 	char c;
142 	wchar_t pc, sc;
143 	size_t pclen, sclen;
144 
145 	bt_pattern = bt_string = NULL;
146 	for (;;) {
147 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
148 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
149 			return (FNM_NOMATCH);
150 		pattern += pclen;
151 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
152 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
153 			sc = (unsigned char)*string;
154 			sclen = 1;
155 			memset(&strmbs, 0, sizeof (strmbs));
156 		}
157 		switch (pc) {
158 		case EOS:
159 			if ((flags & FNM_LEADING_DIR) && sc == '/')
160 				return (0);
161 			if (sc == EOS)
162 				return (0);
163 			goto backtrack;
164 		case '?':
165 			if (sc == EOS)
166 				return (FNM_NOMATCH);
167 			if (sc == '/' && (flags & FNM_PATHNAME))
168 				goto backtrack;
169 			if (sc == '.' && (flags & FNM_PERIOD) &&
170 			    (string == stringstart ||
171 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
172 				goto backtrack;
173 			string += sclen;
174 			break;
175 		case '*':
176 			c = *pattern;
177 			/* Collapse multiple stars. */
178 			while (c == '*')
179 				c = *++pattern;
180 
181 			if (sc == '.' && (flags & FNM_PERIOD) &&
182 			    (string == stringstart ||
183 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
184 				goto backtrack;
185 
186 			/* Optimize for pattern with * at end or before /. */
187 			if (c == EOS) {
188 				if (flags & FNM_PATHNAME)
189 					return ((flags & FNM_LEADING_DIR) ||
190 					    strchr(string, '/') == NULL ?
191 					    0 : FNM_NOMATCH);
192 				else
193 					return (0);
194 			} else if (c == '/' && flags & FNM_PATHNAME) {
195 				if ((string = strchr(string, '/')) == NULL)
196 					return (FNM_NOMATCH);
197 				break;
198 			}
199 
200 			/*
201 			 * First try the shortest match for the '*' that
202 			 * could work. We can forget any earlier '*' since
203 			 * there is no way having it match more characters
204 			 * can help us, given that we are already here.
205 			 */
206 			bt_pattern = pattern, bt_patmbs = patmbs;
207 			bt_string = string, bt_strmbs = strmbs;
208 			break;
209 		case '[':
210 			if (sc == EOS)
211 				return (FNM_NOMATCH);
212 			if (sc == '/' && (flags & FNM_PATHNAME))
213 				goto backtrack;
214 			if (sc == '.' && (flags & FNM_PERIOD) &&
215 			    (string == stringstart ||
216 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
217 				goto backtrack;
218 
219 			switch (rangematch(pattern, sc, flags, &newp,
220 			    &patmbs)) {
221 			case RANGE_ERROR:
222 				goto norm;
223 			case RANGE_MATCH:
224 				pattern = newp;
225 				break;
226 			case RANGE_NOMATCH:
227 				goto backtrack;
228 			}
229 			string += sclen;
230 			break;
231 		case '\\':
232 			if (!(flags & FNM_NOESCAPE)) {
233 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
234 				    &patmbs);
235 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
236 					return (FNM_NOMATCH);
237 				if (pclen == 0)
238 					return (FNM_NOMATCH);
239 				pattern += pclen;
240 			}
241 			/* FALLTHROUGH */
242 		default:
243 		norm:
244 			string += sclen;
245 			if (pc == sc) {
246 				;
247 			} else if ((flags & FNM_CASEFOLD) &&
248 				    (towlower(pc) == towlower(sc))) {
249 				;
250 			} else {
251 		backtrack:
252 				/*
253 				 * If we have a mismatch (other than hitting
254 				 * the end of the string), go back to the last
255 				 * '*' seen and have it match one additional
256 				 * character.
257 				 */
258 				if (bt_pattern == NULL)
259 					return (FNM_NOMATCH);
260 				sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
261 				    &bt_strmbs);
262 				if (sclen == (size_t)-1 ||
263 				    sclen == (size_t)-2) {
264 					sc = (unsigned char)*bt_string;
265 					sclen = 1;
266 					memset(&bt_strmbs, 0,
267 					    sizeof (bt_strmbs));
268 				}
269 				if (sc == EOS)
270 					return (FNM_NOMATCH);
271 				if (sc == '/' && flags & FNM_PATHNAME)
272 					return (FNM_NOMATCH);
273 				bt_string += sclen;
274 				pattern = bt_pattern, patmbs = bt_patmbs;
275 				string = bt_string, strmbs = bt_strmbs;
276 			}
277 			break;
278 		}
279 	}
280 	/* NOTREACHED */
281 }
282 
283 #ifdef	PROTOTYPES
284 static int
rangematch(const char * pattern,wchar_t test,int flags,char ** newp,mbstate_t * patmbs)285 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
286 	    mbstate_t *patmbs)
287 #else
288 static int
289 rangematch(pattern, test, flags, newp, patmbs)
290 	const char *pattern;
291 	wchar_t test;
292 	int flags;
293 	char **newp;
294 	mbstate_t *patmbs;
295 #endif
296 {
297 	int negate, ok;
298 	wchar_t c, c2;
299 	wchar_t	otest = test;
300 	size_t pclen;
301 	const char *origpat;
302 /*#define	XXX_COLLATE*/
303 #ifdef	XXX_COLLATE
304 	locale_t	locale = __get_locale();
305 	struct xlocale_collate *table = (struct xlocale_collate *)
306 				    locale->components[XLC_COLLATE];
307 #endif
308 
309 	/*
310 	 * A bracket expression starting with an unquoted circumflex
311 	 * character produces unspecified results (IEEE 1003.2-1992,
312 	 * 3.13.2).  This implementation treats it like '!', for
313 	 * consistency with the regular expression syntax.
314 	 * J.T. Conklin (conklin@ngai.kaleida.com)
315 	 */
316 	if ((negate = (*pattern == '!' || *pattern == '^')))
317 		++pattern;
318 
319 	if (flags & FNM_CASEFOLD)
320 		test = towlower(test);
321 
322 	/*
323 	 * A right bracket shall lose its special meaning and represent
324 	 * itself in a bracket expression if it occurs first in the list.
325 	 * -- POSIX.2 2.8.3.2
326 	 */
327 	ok = 0;
328 	origpat = pattern;
329 	for (;;) {
330 		int	quoted = 0;
331 
332 		if (*pattern == ']' && pattern > origpat) {
333 			pattern++;
334 			break;
335 		} else if (*pattern == '\0') {
336 			return (RANGE_ERROR);
337 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
338 			return (RANGE_NOMATCH);
339 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) {
340 			pattern++;
341 			quoted++;
342 		}
343 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
344 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
345 			return (RANGE_NOMATCH);
346 		pattern += pclen;
347 
348 		/*
349 		 * "[" initiates a special expression
350 		 *	[: :]	A character class like [:upper:]
351 		 *	[= =]	An equivalence character class like [=o=] o-like
352 		 *	[. .]	A collating symbol like [.ch.]
353 		 */
354 		if (!quoted && c == '[') {
355 			const char	*p;
356 
357 			if (pattern[0] == ':') {	/* [: :] char class */
358 				char	class[CL_SIZE+1];
359 				char	*pc = class;
360 
361 				p = pattern + 1;	/* Eat ':' */
362 				for (;;) {
363 					if (*p == '\0')
364 						return (RANGE_ERROR);
365 					if (*p == ':' && p[1] == ']')
366 						break;
367 					if (pc >= &class[CL_SIZE])
368 						return (RANGE_ERROR);
369 					*pc++ = *p++;
370 				}
371 				if (pc == class)
372 					return (RANGE_ERROR);
373 				*pc = '\0';
374 				pattern = p + 2;	/* Skip ":]" */
375 				if (iswctype(otest, wctype(class))) {
376 					ok = 1;
377 				} else if (flags & FNM_CASEFOLD) {
378 					/*
379 					 * Convert to the other case
380 					 */
381 					if (strcmp(class, "upper") == 0) {
382 						if (iswctype(otest,
383 						    wctype("lower")))
384 							ok = 1;
385 					} else if (strcmp(class, "lower") == 0) {
386 						if (iswctype(otest,
387 						    wctype("upper")))
388 							ok = 1;
389 					}
390 				}
391 				continue;
392 			} else if (pattern[0] == '=') {	/* [= =] equ. class */
393 				p = pattern + 1;	/* Eat '=' */
394 
395 				pclen = mbrtowc(&c, p, MB_LEN_MAX, patmbs);
396 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
397 					return (RANGE_NOMATCH);
398 				p += pclen;
399 
400 /*
401  * Wenn es hier nicht mit =] endet - also l�nger ist -, dann ist es ein Collating Symbol.
402  */
403 				if (*p != '=')
404 					goto is_coll;
405 #ifdef	XXX_COLLATE
406 				__collate_equiv_value(locale, &c, 1);
407 #endif
408 
409 				pattern = p + 2;	/* Skip "=]" */
410 			} else if (pattern[0] == '.') {	/* [. .] collate sym */
411 				p = pattern + 1;	/* Eat '.' */
412 is_coll:
413 
414 				pattern = p + 2;	/* Skip ".]" */
415 			}
416 		}
417 
418 		if (flags & FNM_CASEFOLD)
419 			c = towlower(c);
420 
421 		if (*pattern == '-' && *(pattern + 1) != EOS &&
422 		    *(pattern + 1) != ']') {
423 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
424 				if (*pattern != EOS)
425 					pattern++;
426 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
427 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
428 				return (RANGE_NOMATCH);
429 			pattern += pclen;
430 			if (c2 == EOS)
431 				return (RANGE_ERROR);
432 
433 			if (flags & FNM_CASEFOLD)
434 				c2 = towlower(c2);
435 
436 #ifdef	XXX_COLLATE
437 			if (table->__collate_load_error ?
438 			    c <= test && test <= c2 :
439 			    __wcollate_range_cmp(c, test) <= 0 &&
440 			    __wcollate_range_cmp(test, c2) <= 0)
441 				ok = 1;
442 #else
443 			if (c <= test && test <= c2)
444 				ok = 1;
445 #endif
446 		} else if (c == test)
447 			ok = 1;
448 	}
449 
450 	*newp = (char *)pattern;
451 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
452 }
453