1 /* @(#)fnmatch.c	8.24 17/08/30 2005-2017 J. Schilling from 8.2 (Berkeley) */
2 #include <schily/mconfig.h>
3 #ifndef lint
4 static	UConst char sccsid[] =
5 	"@(#)fnmatch.c	8.24 17/08/30 2005-2017 J. Schilling from 8.2 (Berkeley)";
6 #endif
7 /*
8  * Copyright (c) 1989, 1993, 1994
9  *	The Regents of the University of California.  All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * Guido van Rossum.
13  *
14  * Copyright (c) 2005-2017 J. Schilling
15  * Copyright (c) 2011 The FreeBSD Foundation
16  * All rights reserved.
17  * Portions of this software were developed by David Chisnall
18  * under sponsorship from the FreeBSD Foundation.
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  * 1. Redistributions of source code must retain the above copyright
24  *    notice, this list of conditions and the following disclaimer.
25  * 2. Redistributions in binary form must reproduce the above copyright
26  *    notice, this list of conditions and the following disclaimer in the
27  *    documentation and/or other materials provided with the distribution.
28  * 3. Neither the name of the University nor the names of its contributors
29  *    may be used to endorse or promote products derived from this software
30  *    without specific prior written permission.
31  *
32  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
33  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
36  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
38  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
39  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
40  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
41  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42  * SUCH DAMAGE.
43  */
44 
45 #if defined(LIBC_SCCS) && !defined(lint)
46 static UConst char sccsid[] = "@(#)fnmatch.c	8.24 (Berkeley) 08/30/17";
47 #endif /* LIBC_SCCS and not lint */
48 /* "FBSD src/lib/libc/gen/fnmatch.c,v 1.19 2010/04/16 22:29:24 jilles Exp $" */
49 
50 /*
51  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
52  * Compares a filename or pathname to a pattern.
53  */
54 
55 /*
56  * Some notes on multibyte character support:
57  * 1. Patterns with illegal byte sequences match nothing.
58  * 2. Illegal byte sequences in the "string" argument are handled by treating
59  *    them as single-byte characters with a value of the first byte of the
60  *    sequence cast to wchar_t.
61  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
62  *    used for most, but not all, conversions. Further work will be required
63  *    to support state-dependent encodings.
64  */
65 
66 #include <schily/mconfig.h>
67 #include <schily/fnmatch.h>
68 #include <schily/limits.h>
69 #include <schily/string.h>
70 #include <schily/wchar.h>
71 #include <schily/wctype.h>
72 #include <schily/libport.h>	/* Define missing prototypes */
73 
74 #define	EOS	'\0'
75 
76 #define	RANGE_MATCH	1
77 #define	RANGE_NOMATCH	0
78 #define	RANGE_ERROR	(-1)
79 
80 #define	CL_SIZE		32	/* Max size for '[: :]'			*/
81 
82 static int rangematch __PR((const char *, wchar_t, int, char **, mbstate_t *));
83 static int fnmatch1 __PR((const char *, const char *, const char *, int,
84 				mbstate_t, mbstate_t));
85 
86 #ifndef	HAVE_FNMATCH
87 #undef	fnmatch
88 
89 /*
90  * The Cygwin compile environment incorrectly implements #pragma weak.
91  * The weak symbols are only defined as local symbols making it impossible
92  * to use them from outside the scope of this source file.
93  * A platform that allows linking with global symbols has HAVE_LINK_WEAK
94  * defined.
95  */
96 #if defined(HAVE_PRAGMA_WEAK) && defined(HAVE_LINK_WEAK)
97 #pragma	weak fnmatch =	js_fnmatch
98 #else
99 int
100 fnmatch(pattern, string, flags)
101 	const char	*pattern;
102 	const char	*string;
103 	int		flags;
104 {
105 	return (js_fnmatch(pattern, string, flags));
106 }
107 #endif
108 #endif
109 
110 int
111 js_fnmatch(pattern, string, flags)
112 	const char	*pattern;
113 	const char	*string;
114 	int		flags;
115 {
116 	/*
117 	 * SunPro C gives a warning if we do not initialize an object:
118 	 * static const mbstate_t initial;
119 	 * GCC gives a warning if we try to initialize it.
120 	 * As the POSIX standard forbids mbstate_t from being an array,
121 	 * we do not need "const", the var is always copied when used as
122 	 * a parapemeter for fnmatch1();
123 	 */
124 	static mbstate_t initial;
125 
126 	return (fnmatch1(pattern, string, string, flags, initial, initial));
127 }
128 
129 static int
130 fnmatch1(pattern, string, stringstart, flags, patmbs, strmbs)
131 	const char	*pattern;
132 	const char	*string;
133 	const char	*stringstart;
134 	int		flags;
135 	mbstate_t	patmbs;
136 	mbstate_t	strmbs;
137 {
138 	const char *bt_pattern, *bt_string;
139 	mbstate_t bt_patmbs, bt_strmbs;
140 	char *newp;
141 	char c;
142 	wchar_t pc, sc;
143 	size_t pclen, sclen;
144 
145 	bt_pattern = bt_string = NULL;
146 	for (;;) {
147 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
148 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
149 			return (FNM_NOMATCH);
150 		pattern += pclen;
151 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
152 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
153 			sc = (unsigned char)*string;
154 			sclen = 1;
155 			memset(&strmbs, 0, sizeof (strmbs));
156 		}
157 		switch (pc) {
158 		case EOS:
159 			if ((flags & FNM_LEADING_DIR) && sc == '/')
160 				return (0);
161 			if (sc == EOS)
162 				return (0);
163 			goto backtrack;
164 		case '?':
165 			if (sc == EOS)
166 				return (FNM_NOMATCH);
167 			if (sc == '/' && (flags & FNM_PATHNAME))
168 				goto backtrack;
169 			if (sc == '.' && (flags & FNM_PERIOD) &&
170 			    (string == stringstart ||
171 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
172 				goto backtrack;
173 			string += sclen;
174 			break;
175 		case '*':
176 			c = *pattern;
177 			/* Collapse multiple stars. */
178 			while (c == '*')
179 				c = *++pattern;
180 
181 			if (sc == '.' && (flags & FNM_PERIOD) &&
182 			    (string == stringstart ||
183 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
184 				goto backtrack;
185 
186 			/* Optimize for pattern with * at end or before /. */
187 			if (c == EOS) {
188 				if (flags & FNM_PATHNAME)
189 					return ((flags & FNM_LEADING_DIR) ||
190 					    strchr(string, '/') == NULL ?
191 					    0 : FNM_NOMATCH);
192 				else
193 					return (0);
194 			} else if (c == '/' && flags & FNM_PATHNAME) {
195 				if ((string = strchr(string, '/')) == NULL)
196 					return (FNM_NOMATCH);
197 				break;
198 			}
199 
200 			/*
201 			 * First try the shortest match for the '*' that
202 			 * could work. We can forget any earlier '*' since
203 			 * there is no way having it match more characters
204 			 * can help us, given that we are already here.
205 			 */
206 			bt_pattern = pattern, bt_patmbs = patmbs;
207 			bt_string = string, bt_strmbs = strmbs;
208 			break;
209 		case '[':
210 			if (sc == EOS)
211 				return (FNM_NOMATCH);
212 			if (sc == '/' && (flags & FNM_PATHNAME))
213 				goto backtrack;
214 			if (sc == '.' && (flags & FNM_PERIOD) &&
215 			    (string == stringstart ||
216 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
217 				goto backtrack;
218 
219 			switch (rangematch(pattern, sc, flags, &newp,
220 			    &patmbs)) {
221 			case RANGE_ERROR:
222 				goto norm;
223 			case RANGE_MATCH:
224 				pattern = newp;
225 				break;
226 			case RANGE_NOMATCH:
227 				goto backtrack;
228 			}
229 			string += sclen;
230 			break;
231 		case '\\':
232 			if (!(flags & FNM_NOESCAPE)) {
233 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
234 				    &patmbs);
235 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
236 					return (FNM_NOMATCH);
237 				if (pclen == 0)
238 					return (FNM_NOMATCH);
239 				pattern += pclen;
240 			}
241 			/* FALLTHROUGH */
242 		default:
243 		norm:
244 			string += sclen;
245 			if (pc == sc) {
246 				;
247 			} else if ((flags & FNM_CASEFOLD) &&
248 				    (towlower(pc) == towlower(sc))) {
249 				;
250 			} else {
251 		backtrack:
252 				/*
253 				 * If we have a mismatch (other than hitting
254 				 * the end of the string), go back to the last
255 				 * '*' seen and have it match one additional
256 				 * character.
257 				 */
258 				if (bt_pattern == NULL)
259 					return (FNM_NOMATCH);
260 				sclen = mbrtowc(&sc, bt_string, MB_LEN_MAX,
261 				    &bt_strmbs);
262 				if (sclen == (size_t)-1 ||
263 				    sclen == (size_t)-2) {
264 					sc = (unsigned char)*bt_string;
265 					sclen = 1;
266 					memset(&bt_strmbs, 0,
267 					    sizeof (bt_strmbs));
268 				}
269 				if (sc == EOS)
270 					return (FNM_NOMATCH);
271 				if (sc == '/' && flags & FNM_PATHNAME)
272 					return (FNM_NOMATCH);
273 				bt_string += sclen;
274 				pattern = bt_pattern, patmbs = bt_patmbs;
275 				string = bt_string, strmbs = bt_strmbs;
276 			}
277 			break;
278 		}
279 	}
280 	/* NOTREACHED */
281 }
282 
283 #ifdef	PROTOTYPES
284 static int
285 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
286 	    mbstate_t *patmbs)
287 #else
288 static int
289 rangematch(pattern, test, flags, newp, patmbs)
290 	const char *pattern;
291 	wchar_t test;
292 	int flags;
293 	char **newp;
294 	mbstate_t *patmbs;
295 #endif
296 {
297 	int negate, ok;
298 	wchar_t c, c2;
299 	wchar_t	otest = test;
300 	size_t pclen;
301 	const char *origpat;
302 #ifdef	XXX_COLLATE
303 	struct xlocale_collate *table = (struct xlocale_collate *)
304 				    __get_locale()->components[XLC_COLLATE];
305 #endif
306 
307 	/*
308 	 * A bracket expression starting with an unquoted circumflex
309 	 * character produces unspecified results (IEEE 1003.2-1992,
310 	 * 3.13.2).  This implementation treats it like '!', for
311 	 * consistency with the regular expression syntax.
312 	 * J.T. Conklin (conklin@ngai.kaleida.com)
313 	 */
314 	if ((negate = (*pattern == '!' || *pattern == '^')))
315 		++pattern;
316 
317 	if (flags & FNM_CASEFOLD)
318 		test = towlower(test);
319 
320 	/*
321 	 * A right bracket shall lose its special meaning and represent
322 	 * itself in a bracket expression if it occurs first in the list.
323 	 * -- POSIX.2 2.8.3.2
324 	 */
325 	ok = 0;
326 	origpat = pattern;
327 	for (;;) {
328 		int	quoted = 0;
329 
330 		if (*pattern == ']' && pattern > origpat) {
331 			pattern++;
332 			break;
333 		} else if (*pattern == '\0') {
334 			return (RANGE_ERROR);
335 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
336 			return (RANGE_NOMATCH);
337 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE)) {
338 			pattern++;
339 			quoted++;
340 		}
341 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
342 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
343 			return (RANGE_NOMATCH);
344 		pattern += pclen;
345 
346 		if (!quoted && c == '[') {
347 			if (pattern[0] == ':') {
348 				char	class[CL_SIZE+1];
349 				char	*pc = class;
350 				const char	*p;
351 
352 				p = pattern + 1;	/* Eat ':' */
353 				for (;;) {
354 					if (*p == '\0')
355 						return (RANGE_ERROR);
356 					if (*p == ':' && p[1] == ']')
357 						break;
358 					if (pc >= &class[CL_SIZE])
359 						return (RANGE_ERROR);
360 					*pc++ = *p++;
361 				}
362 				if (pc == class)
363 					return (RANGE_ERROR);
364 				*pc = '\0';
365 				pattern = p + 2;	/* Skip ":]" */
366 				if (iswctype(otest, wctype(class))) {
367 					ok = 1;
368 				} else if (flags & FNM_CASEFOLD) {
369 					/*
370 					 * Convert to the other case
371 					 */
372 					if (strcmp(class, "upper") == 0)
373 						if (iswctype(otest,
374 						    wctype("lower")))
375 							ok = 1;
376 					else if (strcmp(class, "lower") == 0)
377 						if (iswctype(otest,
378 						    wctype("upper")))
379 							ok = 1;
380 				}
381 				continue;
382 			}
383 		}
384 
385 		if (flags & FNM_CASEFOLD)
386 			c = towlower(c);
387 
388 		if (*pattern == '-' && *(pattern + 1) != EOS &&
389 		    *(pattern + 1) != ']') {
390 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
391 				if (*pattern != EOS)
392 					pattern++;
393 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
394 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
395 				return (RANGE_NOMATCH);
396 			pattern += pclen;
397 			if (c2 == EOS)
398 				return (RANGE_ERROR);
399 
400 			if (flags & FNM_CASEFOLD)
401 				c2 = towlower(c2);
402 
403 #ifdef	XXX_COLLATE
404 			if (table->__collate_load_error ?
405 			    c <= test && test <= c2 :
406 			    __wcollate_range_cmp(c, test) <= 0 &&
407 			    __wcollate_range_cmp(test, c2) <= 0)
408 				ok = 1;
409 #else
410 			if (c <= test && test <= c2)
411 				ok = 1;
412 #endif
413 		} else if (c == test)
414 			ok = 1;
415 	}
416 
417 	*newp = (char *)pattern;
418 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
419 }
420