1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  *
7  * Do shell-style pattern matching for ?, \, [], and * characters.
8  * It is 8bit clean.
9  *
10  * Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
11  * Rich $alz is now <rsalz@bbn.com>.
12  *
13  * Modified by Wayne Davison to special-case '/' matching, to make '**'
14  * work differently than '*', and to fix the character-class code.
15  *
16  * Imported from git.git.
17  */
18 
19 #include "wildmatch.h"
20 
21 #define GIT_SPACE 0x01
22 #define GIT_DIGIT 0x02
23 #define GIT_ALPHA 0x04
24 #define GIT_GLOB_SPECIAL 0x08
25 #define GIT_REGEX_SPECIAL 0x10
26 #define GIT_PATHSPEC_MAGIC 0x20
27 #define GIT_CNTRL 0x40
28 #define GIT_PUNCT 0x80
29 
30 enum {
31 	S = GIT_SPACE,
32 	A = GIT_ALPHA,
33 	D = GIT_DIGIT,
34 	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
35 	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | */
36 	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
37 	X = GIT_CNTRL,
38 	U = GIT_PUNCT,
39 	Z = GIT_CNTRL | GIT_SPACE
40 };
41 
42 static const unsigned char sane_ctype[256] = {
43 	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
44 	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */
45 	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
46 	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
47 	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
48 	A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P,		/*  80.. 95 */
49 	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
50 	A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X,		/* 112..127 */
51 	/* Nothing in the 128.. range */
52 };
53 
54 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
55 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
56 
57 typedef unsigned char uchar;
58 
59 /* What character marks an inverted character class? */
60 #define NEGATE_CLASS	'!'
61 #define NEGATE_CLASS2	'^'
62 
63 #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
64 				    && *(class) == *(litmatch) \
65 				    && strncmp((char*)class, litmatch, len) == 0)
66 
67 #if defined STDC_HEADERS || !defined isascii
68 # define ISASCII(c) 1
69 #else
70 # define ISASCII(c) isascii(c)
71 #endif
72 
73 #ifdef isblank
74 # define ISBLANK(c) (ISASCII(c) && isblank(c))
75 #else
76 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
77 #endif
78 
79 #ifdef isgraph
80 # define ISGRAPH(c) (ISASCII(c) && isgraph(c))
81 #else
82 # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
83 #endif
84 
85 #define ISPRINT(c) (ISASCII(c) && isprint(c))
86 #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
87 #define ISALNUM(c) (ISASCII(c) && isalnum(c))
88 #define ISALPHA(c) (ISASCII(c) && isalpha(c))
89 #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
90 #define ISLOWER(c) (ISASCII(c) && islower(c))
91 #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
92 #define ISSPACE(c) (ISASCII(c) && isspace(c))
93 #define ISUPPER(c) (ISASCII(c) && isupper(c))
94 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
95 
96 /* Match pattern "p" against "text" */
dowild(const uchar * p,const uchar * text,unsigned int flags)97 static int dowild(const uchar *p, const uchar *text, unsigned int flags)
98 {
99 	uchar p_ch;
100 	const uchar *pattern = p;
101 
102 	for ( ; (p_ch = *p) != '\0'; text++, p++) {
103 		int matched, match_slash, negated;
104 		uchar t_ch, prev_ch;
105 		if ((t_ch = *text) == '\0' && p_ch != '*')
106 			return WM_ABORT_ALL;
107 		if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
108 			t_ch = tolower(t_ch);
109 		if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
110 			p_ch = tolower(p_ch);
111 		switch (p_ch) {
112 		case '\\':
113 			/* Literal match with following character.  Note that the test
114 			 * in "default" handles the p[1] == '\0' failure case. */
115 			p_ch = *++p;
116 			/* FALLTHROUGH */
117 		default:
118 			if (t_ch != p_ch)
119 				return WM_NOMATCH;
120 			continue;
121 		case '?':
122 			/* Match anything but '/'. */
123 			if ((flags & WM_PATHNAME) && t_ch == '/')
124 				return WM_NOMATCH;
125 			continue;
126 		case '*':
127 			if (*++p == '*') {
128 				const uchar *prev_p = p - 2;
129 				while (*++p == '*') {}
130 				if (!(flags & WM_PATHNAME))
131 					/* without WM_PATHNAME, '*' == '**' */
132 					match_slash = 1;
133 				else if ((prev_p < pattern || *prev_p == '/') &&
134 				    (*p == '\0' || *p == '/' ||
135 				     (p[0] == '\\' && p[1] == '/'))) {
136 					/*
137 					 * Assuming we already match 'foo/' and are at
138 					 * <star star slash>, just assume it matches
139 					 * nothing and go ahead match the rest of the
140 					 * pattern with the remaining string. This
141 					 * helps make foo/<*><*>/bar (<> because
142 					 * otherwise it breaks C comment syntax) match
143 					 * both foo/bar and foo/a/bar.
144 					 */
145 					if (p[0] == '/' &&
146 					    dowild(p + 1, text, flags) == WM_MATCH)
147 						return WM_MATCH;
148 					match_slash = 1;
149 				} else /* WM_PATHNAME is set */
150 					match_slash = 0;
151 			} else
152 				/* without WM_PATHNAME, '*' == '**' */
153 				match_slash = flags & WM_PATHNAME ? 0 : 1;
154 			if (*p == '\0') {
155 				/* Trailing "**" matches everything.  Trailing "*" matches
156 				 * only if there are no more slash characters. */
157 				if (!match_slash) {
158 					if (strchr((char*)text, '/') != NULL)
159 						return WM_NOMATCH;
160 				}
161 				return WM_MATCH;
162 			} else if (!match_slash && *p == '/') {
163 				/*
164 				 * _one_ asterisk followed by a slash
165 				 * with WM_PATHNAME matches the next
166 				 * directory
167 				 */
168 				const char *slash = strchr((char*)text, '/');
169 				if (!slash)
170 					return WM_NOMATCH;
171 				text = (const uchar*)slash;
172 				/* the slash is consumed by the top-level for loop */
173 				break;
174 			}
175 			while (1) {
176 				if (t_ch == '\0')
177 					break;
178 				/*
179 				 * Try to advance faster when an asterisk is
180 				 * followed by a literal. We know in this case
181 				 * that the string before the literal
182 				 * must belong to "*".
183 				 * If match_slash is false, do not look past
184 				 * the first slash as it cannot belong to '*'.
185 				 */
186 				if (!is_glob_special(*p)) {
187 					p_ch = *p;
188 					if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
189 						p_ch = tolower(p_ch);
190 					while ((t_ch = *text) != '\0' &&
191 					       (match_slash || t_ch != '/')) {
192 						if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
193 							t_ch = tolower(t_ch);
194 						if (t_ch == p_ch)
195 							break;
196 						text++;
197 					}
198 					if (t_ch != p_ch)
199 						return WM_NOMATCH;
200 				}
201 				if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
202 					if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
203 						return matched;
204 				} else if (!match_slash && t_ch == '/')
205 					return WM_ABORT_TO_STARSTAR;
206 				t_ch = *++text;
207 			}
208 			return WM_ABORT_ALL;
209 		case '[':
210 			p_ch = *++p;
211 #ifdef NEGATE_CLASS2
212 			if (p_ch == NEGATE_CLASS2)
213 				p_ch = NEGATE_CLASS;
214 #endif
215 			/* Assign literal 1/0 because of "matched" comparison. */
216 			negated = p_ch == NEGATE_CLASS ? 1 : 0;
217 			if (negated) {
218 				/* Inverted character class. */
219 				p_ch = *++p;
220 			}
221 			prev_ch = 0;
222 			matched = 0;
223 			do {
224 				if (!p_ch)
225 					return WM_ABORT_ALL;
226 				if (p_ch == '\\') {
227 					p_ch = *++p;
228 					if (!p_ch)
229 						return WM_ABORT_ALL;
230 					if (t_ch == p_ch)
231 						matched = 1;
232 				} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
233 					p_ch = *++p;
234 					if (p_ch == '\\') {
235 						p_ch = *++p;
236 						if (!p_ch)
237 							return WM_ABORT_ALL;
238 					}
239 					if (t_ch <= p_ch && t_ch >= prev_ch)
240 						matched = 1;
241 					else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
242 						uchar t_ch_upper = toupper(t_ch);
243 						if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
244 							matched = 1;
245 					}
246 					p_ch = 0; /* This makes "prev_ch" get set to 0. */
247 				} else if (p_ch == '[' && p[1] == ':') {
248 					const uchar *s;
249 					int i;
250 					for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
251 					if (!p_ch)
252 						return WM_ABORT_ALL;
253 					i = (int)(p - s - 1);
254 					if (i < 0 || p[-1] != ':') {
255 						/* Didn't find ":]", so treat like a normal set. */
256 						p = s - 2;
257 						p_ch = '[';
258 						if (t_ch == p_ch)
259 							matched = 1;
260 						continue;
261 					}
262 					if (CC_EQ(s,i, "alnum")) {
263 						if (ISALNUM(t_ch))
264 							matched = 1;
265 					} else if (CC_EQ(s,i, "alpha")) {
266 						if (ISALPHA(t_ch))
267 							matched = 1;
268 					} else if (CC_EQ(s,i, "blank")) {
269 						if (ISBLANK(t_ch))
270 							matched = 1;
271 					} else if (CC_EQ(s,i, "cntrl")) {
272 						if (ISCNTRL(t_ch))
273 							matched = 1;
274 					} else if (CC_EQ(s,i, "digit")) {
275 						if (ISDIGIT(t_ch))
276 							matched = 1;
277 					} else if (CC_EQ(s,i, "graph")) {
278 						if (ISGRAPH(t_ch))
279 							matched = 1;
280 					} else if (CC_EQ(s,i, "lower")) {
281 						if (ISLOWER(t_ch))
282 							matched = 1;
283 					} else if (CC_EQ(s,i, "print")) {
284 						if (ISPRINT(t_ch))
285 							matched = 1;
286 					} else if (CC_EQ(s,i, "punct")) {
287 						if (ISPUNCT(t_ch))
288 							matched = 1;
289 					} else if (CC_EQ(s,i, "space")) {
290 						if (ISSPACE(t_ch))
291 							matched = 1;
292 					} else if (CC_EQ(s,i, "upper")) {
293 						if (ISUPPER(t_ch))
294 							matched = 1;
295 						else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
296 							matched = 1;
297 					} else if (CC_EQ(s,i, "xdigit")) {
298 						if (ISXDIGIT(t_ch))
299 							matched = 1;
300 					} else /* malformed [:class:] string */
301 						return WM_ABORT_ALL;
302 					p_ch = 0; /* This makes "prev_ch" get set to 0. */
303 				} else if (t_ch == p_ch)
304 					matched = 1;
305 			} while (prev_ch = p_ch, (p_ch = *++p) != ']');
306 			if (matched == negated ||
307 			    ((flags & WM_PATHNAME) && t_ch == '/'))
308 				return WM_NOMATCH;
309 			continue;
310 		}
311 	}
312 
313 	return *text ? WM_NOMATCH : WM_MATCH;
314 }
315 
316 /* Match the "pattern" against the "text" string. */
wildmatch(const char * pattern,const char * text,unsigned int flags)317 int wildmatch(const char *pattern, const char *text, unsigned int flags)
318 {
319 	return dowild((const uchar*)pattern, (const uchar*)text, flags);
320 }
321