1 /*
2 **  Do shell-style pattern matching for ?, \, [], and * characters.
3 **  It is 8bit clean.
4 **
5 **  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
6 **  Rich $alz is now <rsalz@bbn.com>.
7 **
8 **  Modified by Wayne Davison to special-case '/' matching, to make '**'
9 **  work differently than '*', and to fix the character-class code.
10 */
11 
12 #include "cache.h"
13 #include "wildmatch.h"
14 
15 typedef unsigned char uchar;
16 
17 /* What character marks an inverted character class? */
18 #define NEGATE_CLASS	'!'
19 #define NEGATE_CLASS2	'^'
20 
21 #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
22 				    && *(class) == *(litmatch) \
23 				    && strncmp((char*)class, litmatch, len) == 0)
24 
25 #if defined STDC_HEADERS || !defined isascii
26 # define ISASCII(c) 1
27 #else
28 # define ISASCII(c) isascii(c)
29 #endif
30 
31 #ifdef isblank
32 # define ISBLANK(c) (ISASCII(c) && isblank(c))
33 #else
34 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
35 #endif
36 
37 #ifdef isgraph
38 # define ISGRAPH(c) (ISASCII(c) && isgraph(c))
39 #else
40 # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
41 #endif
42 
43 #define ISPRINT(c) (ISASCII(c) && isprint(c))
44 #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
45 #define ISALNUM(c) (ISASCII(c) && isalnum(c))
46 #define ISALPHA(c) (ISASCII(c) && isalpha(c))
47 #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
48 #define ISLOWER(c) (ISASCII(c) && islower(c))
49 #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
50 #define ISSPACE(c) (ISASCII(c) && isspace(c))
51 #define ISUPPER(c) (ISASCII(c) && isupper(c))
52 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
53 
54 /* Match pattern "p" against "text" */
dowild(const uchar * p,const uchar * text,unsigned int flags)55 static int dowild(const uchar *p, const uchar *text, unsigned int flags)
56 {
57 	uchar p_ch;
58 	const uchar *pattern = p;
59 
60 	for ( ; (p_ch = *p) != '\0'; text++, p++) {
61 		int matched, match_slash, negated;
62 		uchar t_ch, prev_ch;
63 		if ((t_ch = *text) == '\0' && p_ch != '*')
64 			return WM_ABORT_ALL;
65 		if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
66 			t_ch = tolower(t_ch);
67 		if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
68 			p_ch = tolower(p_ch);
69 		switch (p_ch) {
70 		case '\\':
71 			/* Literal match with following character.  Note that the test
72 			 * in "default" handles the p[1] == '\0' failure case. */
73 			p_ch = *++p;
74 			/* FALLTHROUGH */
75 		default:
76 			if (t_ch != p_ch)
77 				return WM_NOMATCH;
78 			continue;
79 		case '?':
80 			/* Match anything but '/'. */
81 			if ((flags & WM_PATHNAME) && t_ch == '/')
82 				return WM_NOMATCH;
83 			continue;
84 		case '*':
85 			if (*++p == '*') {
86 				const uchar *prev_p = p - 2;
87 				while (*++p == '*') {}
88 				if (!(flags & WM_PATHNAME))
89 					/* without WM_PATHNAME, '*' == '**' */
90 					match_slash = 1;
91 				else if ((prev_p < pattern || *prev_p == '/') &&
92 				    (*p == '\0' || *p == '/' ||
93 				     (p[0] == '\\' && p[1] == '/'))) {
94 					/*
95 					 * Assuming we already match 'foo/' and are at
96 					 * <star star slash>, just assume it matches
97 					 * nothing and go ahead match the rest of the
98 					 * pattern with the remaining string. This
99 					 * helps make foo/<*><*>/bar (<> because
100 					 * otherwise it breaks C comment syntax) match
101 					 * both foo/bar and foo/a/bar.
102 					 */
103 					if (p[0] == '/' &&
104 					    dowild(p + 1, text, flags) == WM_MATCH)
105 						return WM_MATCH;
106 					match_slash = 1;
107 				} else /* WM_PATHNAME is set */
108 					match_slash = 0;
109 			} else
110 				/* without WM_PATHNAME, '*' == '**' */
111 				match_slash = flags & WM_PATHNAME ? 0 : 1;
112 			if (*p == '\0') {
113 				/* Trailing "**" matches everything.  Trailing "*" matches
114 				 * only if there are no more slash characters. */
115 				if (!match_slash) {
116 					if (strchr((char*)text, '/') != NULL)
117 						return WM_NOMATCH;
118 				}
119 				return WM_MATCH;
120 			} else if (!match_slash && *p == '/') {
121 				/*
122 				 * _one_ asterisk followed by a slash
123 				 * with WM_PATHNAME matches the next
124 				 * directory
125 				 */
126 				const char *slash = strchr((char*)text, '/');
127 				if (!slash)
128 					return WM_NOMATCH;
129 				text = (const uchar*)slash;
130 				/* the slash is consumed by the top-level for loop */
131 				break;
132 			}
133 			while (1) {
134 				if (t_ch == '\0')
135 					break;
136 				/*
137 				 * Try to advance faster when an asterisk is
138 				 * followed by a literal. We know in this case
139 				 * that the string before the literal
140 				 * must belong to "*".
141 				 * If match_slash is false, do not look past
142 				 * the first slash as it cannot belong to '*'.
143 				 */
144 				if (!is_glob_special(*p)) {
145 					p_ch = *p;
146 					if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
147 						p_ch = tolower(p_ch);
148 					while ((t_ch = *text) != '\0' &&
149 					       (match_slash || t_ch != '/')) {
150 						if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
151 							t_ch = tolower(t_ch);
152 						if (t_ch == p_ch)
153 							break;
154 						text++;
155 					}
156 					if (t_ch != p_ch)
157 						return WM_NOMATCH;
158 				}
159 				if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
160 					if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
161 						return matched;
162 				} else if (!match_slash && t_ch == '/')
163 					return WM_ABORT_TO_STARSTAR;
164 				t_ch = *++text;
165 			}
166 			return WM_ABORT_ALL;
167 		case '[':
168 			p_ch = *++p;
169 #ifdef NEGATE_CLASS2
170 			if (p_ch == NEGATE_CLASS2)
171 				p_ch = NEGATE_CLASS;
172 #endif
173 			/* Assign literal 1/0 because of "matched" comparison. */
174 			negated = p_ch == NEGATE_CLASS ? 1 : 0;
175 			if (negated) {
176 				/* Inverted character class. */
177 				p_ch = *++p;
178 			}
179 			prev_ch = 0;
180 			matched = 0;
181 			do {
182 				if (!p_ch)
183 					return WM_ABORT_ALL;
184 				if (p_ch == '\\') {
185 					p_ch = *++p;
186 					if (!p_ch)
187 						return WM_ABORT_ALL;
188 					if (t_ch == p_ch)
189 						matched = 1;
190 				} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
191 					p_ch = *++p;
192 					if (p_ch == '\\') {
193 						p_ch = *++p;
194 						if (!p_ch)
195 							return WM_ABORT_ALL;
196 					}
197 					if (t_ch <= p_ch && t_ch >= prev_ch)
198 						matched = 1;
199 					else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
200 						uchar t_ch_upper = toupper(t_ch);
201 						if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
202 							matched = 1;
203 					}
204 					p_ch = 0; /* This makes "prev_ch" get set to 0. */
205 				} else if (p_ch == '[' && p[1] == ':') {
206 					const uchar *s;
207 					int i;
208 					for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
209 					if (!p_ch)
210 						return WM_ABORT_ALL;
211 					i = p - s - 1;
212 					if (i < 0 || p[-1] != ':') {
213 						/* Didn't find ":]", so treat like a normal set. */
214 						p = s - 2;
215 						p_ch = '[';
216 						if (t_ch == p_ch)
217 							matched = 1;
218 						continue;
219 					}
220 					if (CC_EQ(s,i, "alnum")) {
221 						if (ISALNUM(t_ch))
222 							matched = 1;
223 					} else if (CC_EQ(s,i, "alpha")) {
224 						if (ISALPHA(t_ch))
225 							matched = 1;
226 					} else if (CC_EQ(s,i, "blank")) {
227 						if (ISBLANK(t_ch))
228 							matched = 1;
229 					} else if (CC_EQ(s,i, "cntrl")) {
230 						if (ISCNTRL(t_ch))
231 							matched = 1;
232 					} else if (CC_EQ(s,i, "digit")) {
233 						if (ISDIGIT(t_ch))
234 							matched = 1;
235 					} else if (CC_EQ(s,i, "graph")) {
236 						if (ISGRAPH(t_ch))
237 							matched = 1;
238 					} else if (CC_EQ(s,i, "lower")) {
239 						if (ISLOWER(t_ch))
240 							matched = 1;
241 					} else if (CC_EQ(s,i, "print")) {
242 						if (ISPRINT(t_ch))
243 							matched = 1;
244 					} else if (CC_EQ(s,i, "punct")) {
245 						if (ISPUNCT(t_ch))
246 							matched = 1;
247 					} else if (CC_EQ(s,i, "space")) {
248 						if (ISSPACE(t_ch))
249 							matched = 1;
250 					} else if (CC_EQ(s,i, "upper")) {
251 						if (ISUPPER(t_ch))
252 							matched = 1;
253 						else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
254 							matched = 1;
255 					} else if (CC_EQ(s,i, "xdigit")) {
256 						if (ISXDIGIT(t_ch))
257 							matched = 1;
258 					} else /* malformed [:class:] string */
259 						return WM_ABORT_ALL;
260 					p_ch = 0; /* This makes "prev_ch" get set to 0. */
261 				} else if (t_ch == p_ch)
262 					matched = 1;
263 			} while (prev_ch = p_ch, (p_ch = *++p) != ']');
264 			if (matched == negated ||
265 			    ((flags & WM_PATHNAME) && t_ch == '/'))
266 				return WM_NOMATCH;
267 			continue;
268 		}
269 	}
270 
271 	return *text ? WM_NOMATCH : WM_MATCH;
272 }
273 
274 /* Match the "pattern" against the "text" string. */
wildmatch(const char * pattern,const char * text,unsigned int flags)275 int wildmatch(const char *pattern, const char *text, unsigned int flags)
276 {
277 	return dowild((const uchar*)pattern, (const uchar*)text, flags);
278 }
279