xref: /dragonfly/lib/libc/gen/fnmatch.c (revision 5062ee70)
1 /*
2  * Copyright (c) 1989, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Guido van Rossum.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * @(#)fnmatch.c	8.2 (Berkeley) 4/16/94
38  * $FreeBSD: head/lib/libc/gen/fnmatch.c 254091 2013-08-08 09:04:02Z ache $
39  */
40 
41 
42 /*
43  * Function fnmatch() as specified in POSIX 1003.2-1992, section B.6.
44  * Compares a filename or pathname to a pattern.
45  */
46 
47 /*
48  * Some notes on multibyte character support:
49  * 1. Patterns with illegal byte sequences match nothing.
50  * 2. Illegal byte sequences in the "string" argument are handled by treating
51  *    them as single-byte characters with a value of the first byte of the
52  *    sequence cast to wchar_t.
53  * 3. Multibyte conversion state objects (mbstate_t) are passed around and
54  *    used for most, but not all, conversions. Further work will be required
55  *    to support state-dependent encodings.
56  */
57 
58 #include <fnmatch.h>
59 #include <limits.h>
60 #include <string.h>
61 #include <wchar.h>
62 #include <wctype.h>
63 
64 #include "collate.h"
65 
66 #define	EOS	'\0'
67 
68 #define RANGE_MATCH     1
69 #define RANGE_NOMATCH   0
70 #define RANGE_ERROR     (-1)
71 
72 static int rangematch(const char *, wchar_t, int, char **, mbstate_t *);
73 static int fnmatch1(const char *, const char *, const char *, int, mbstate_t,
74 		mbstate_t);
75 
76 int
77 fnmatch(const char *pattern, const char *string, int flags)
78 {
79 	static const mbstate_t initial;
80 
81 	return (fnmatch1(pattern, string, string, flags, initial, initial));
82 }
83 
84 static int
85 fnmatch1(const char *pattern, const char *string, const char *stringstart,
86     int flags, mbstate_t patmbs, mbstate_t strmbs)
87 {
88 	char *newp;
89 	char c;
90 	wchar_t pc, sc;
91 	size_t pclen, sclen;
92 
93 	for (;;) {
94 		pclen = mbrtowc(&pc, pattern, MB_LEN_MAX, &patmbs);
95 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
96 			return (FNM_NOMATCH);
97 		pattern += pclen;
98 		sclen = mbrtowc(&sc, string, MB_LEN_MAX, &strmbs);
99 		if (sclen == (size_t)-1 || sclen == (size_t)-2) {
100 			sc = (unsigned char)*string;
101 			sclen = 1;
102 			memset(&strmbs, 0, sizeof(strmbs));
103 		}
104 		switch (pc) {
105 		case EOS:
106 			if ((flags & FNM_LEADING_DIR) && sc == '/')
107 				return (0);
108 			return (sc == EOS ? 0 : FNM_NOMATCH);
109 		case '?':
110 			if (sc == EOS)
111 				return (FNM_NOMATCH);
112 			if (sc == '/' && (flags & FNM_PATHNAME))
113 				return (FNM_NOMATCH);
114 			if (sc == '.' && (flags & FNM_PERIOD) &&
115 			    (string == stringstart ||
116 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
117 				return (FNM_NOMATCH);
118 			string += sclen;
119 			break;
120 		case '*':
121 			c = *pattern;
122 			/* Collapse multiple stars. */
123 			while (c == '*')
124 				c = *++pattern;
125 
126 			if (sc == '.' && (flags & FNM_PERIOD) &&
127 			    (string == stringstart ||
128 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
129 				return (FNM_NOMATCH);
130 
131 			/* Optimize for pattern with * at end or before /. */
132 			if (c == EOS)
133 				if (flags & FNM_PATHNAME)
134 					return ((flags & FNM_LEADING_DIR) ||
135 					    strchr(string, '/') == NULL ?
136 					    0 : FNM_NOMATCH);
137 				else
138 					return (0);
139 			else if (c == '/' && flags & FNM_PATHNAME) {
140 				if ((string = strchr(string, '/')) == NULL)
141 					return (FNM_NOMATCH);
142 				break;
143 			}
144 
145 			/* General case, use recursion. */
146 			while (sc != EOS) {
147 				if (!fnmatch1(pattern, string, stringstart,
148 				    flags, patmbs, strmbs))
149 					return (0);
150 				sclen = mbrtowc(&sc, string, MB_LEN_MAX,
151 				    &strmbs);
152 				if (sclen == (size_t)-1 ||
153 				    sclen == (size_t)-2) {
154 					sc = (unsigned char)*string;
155 					sclen = 1;
156 					memset(&strmbs, 0, sizeof(strmbs));
157 				}
158 				if (sc == '/' && flags & FNM_PATHNAME)
159 					break;
160 				string += sclen;
161 			}
162 			return (FNM_NOMATCH);
163 		case '[':
164 			if (sc == EOS)
165 				return (FNM_NOMATCH);
166 			if (sc == '/' && (flags & FNM_PATHNAME))
167 				return (FNM_NOMATCH);
168 			if (sc == '.' && (flags & FNM_PERIOD) &&
169 			    (string == stringstart ||
170 			    ((flags & FNM_PATHNAME) && *(string - 1) == '/')))
171 				return (FNM_NOMATCH);
172 
173 			switch (rangematch(pattern, sc, flags, &newp,
174 			    &patmbs)) {
175 			case RANGE_ERROR:
176 				goto norm;
177 			case RANGE_MATCH:
178 				pattern = newp;
179 				break;
180 			case RANGE_NOMATCH:
181 				return (FNM_NOMATCH);
182 			}
183 			string += sclen;
184 			break;
185 		case '\\':
186 			if (!(flags & FNM_NOESCAPE)) {
187 				pclen = mbrtowc(&pc, pattern, MB_LEN_MAX,
188 				    &patmbs);
189 				if (pclen == (size_t)-1 || pclen == (size_t)-2)
190 					return (FNM_NOMATCH);
191 				pattern += pclen;
192 			}
193 			/* FALLTHROUGH */
194 		default:
195 		norm:
196 			if (pc == sc)
197 				;
198 			else if ((flags & FNM_CASEFOLD) &&
199 				 (towlower(pc) == towlower(sc)))
200 				;
201 			else
202 				return (FNM_NOMATCH);
203 			string += sclen;
204 			break;
205 		}
206 	}
207 	/* NOTREACHED */
208 }
209 
210 static int
211 rangematch(const char *pattern, wchar_t test, int flags, char **newp,
212     mbstate_t *patmbs)
213 {
214 	int negate, ok;
215 	wchar_t c, c2;
216 	size_t pclen;
217 	const char *origpat;
218 	struct xlocale_collate *table =
219 		(struct xlocale_collate*)__get_locale()->components[XLC_COLLATE];
220 
221 	/*
222 	 * A bracket expression starting with an unquoted circumflex
223 	 * character produces unspecified results (IEEE 1003.2-1992,
224 	 * 3.13.2).  This implementation treats it like '!', for
225 	 * consistency with the regular expression syntax.
226 	 * J.T. Conklin (conklin@ngai.kaleida.com)
227 	 */
228 	if ((negate = (*pattern == '!' || *pattern == '^')))
229 		++pattern;
230 
231 	if (flags & FNM_CASEFOLD)
232 		test = towlower(test);
233 
234 	/*
235 	 * A right bracket shall lose its special meaning and represent
236 	 * itself in a bracket expression if it occurs first in the list.
237 	 * -- POSIX.2 2.8.3.2
238 	 */
239 	ok = 0;
240 	origpat = pattern;
241 	for (;;) {
242 		if (*pattern == ']' && pattern > origpat) {
243 			pattern++;
244 			break;
245 		} else if (*pattern == '\0') {
246 			return (RANGE_ERROR);
247 		} else if (*pattern == '/' && (flags & FNM_PATHNAME)) {
248 			return (RANGE_NOMATCH);
249 		} else if (*pattern == '\\' && !(flags & FNM_NOESCAPE))
250 			pattern++;
251 		pclen = mbrtowc(&c, pattern, MB_LEN_MAX, patmbs);
252 		if (pclen == (size_t)-1 || pclen == (size_t)-2)
253 			return (RANGE_NOMATCH);
254 		pattern += pclen;
255 
256 		if (flags & FNM_CASEFOLD)
257 			c = towlower(c);
258 
259 		if (*pattern == '-' && *(pattern + 1) != EOS &&
260 		    *(pattern + 1) != ']') {
261 			if (*++pattern == '\\' && !(flags & FNM_NOESCAPE))
262 				if (*pattern != EOS)
263 					pattern++;
264 			pclen = mbrtowc(&c2, pattern, MB_LEN_MAX, patmbs);
265 			if (pclen == (size_t)-1 || pclen == (size_t)-2)
266 				return (RANGE_NOMATCH);
267 			pattern += pclen;
268 			if (c2 == EOS)
269 				return (RANGE_ERROR);
270 
271 			if (flags & FNM_CASEFOLD)
272 				c2 = towlower(c2);
273 
274 			if (table->__collate_load_error ?
275 			    c <= test && test <= c2 :
276 			       __wcollate_range_cmp(c, test) <= 0
277 			    && __wcollate_range_cmp(test, c2) <= 0
278 			   )
279 				ok = 1;
280 		} else if (c == test)
281 			ok = 1;
282 	}
283 
284 	*newp = (char *)pattern;
285 	return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
286 }
287