1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "lafe_platform.h"
28 __FBSDID("$FreeBSD$");
29 
30 #ifdef HAVE_STRING_H
31 #include <string.h>
32 #endif
33 
34 #include "pathmatch.h"
35 
36 /*
37  * Check whether a character 'c' is matched by a list specification [...]:
38  *    * Leading '!' negates the class.
39  *    * <char>-<char> is a range of characters
40  *    * \<char> removes any special meaning for <char>
41  *
42  * Some interesting boundary cases:
43  *   a-d-e is one range (a-d) followed by two single characters - and e.
44  *   \a-\d is same as a-d
45  *   a\-d is three single characters: a, d, -
46  *   Trailing - is not special (so [a-] is two characters a and -).
47  *   Initial - is not special ([a-] is same as [-a] is same as [\\-a])
48  *   This function never sees a trailing \.
49  *   [] always fails
50  *   [!] always succeeds
51  */
52 static int
pm_list(const char * start,const char * end,const char c,int flags)53 pm_list(const char *start, const char *end, const char c, int flags)
54 {
55 	const char *p = start;
56 	char rangeStart = '\0', nextRangeStart;
57 	int match = 1, nomatch = 0;
58 
59 	/* This will be used soon... */
60 	(void)flags; /* UNUSED */
61 
62 	/* If this is a negated class, return success for nomatch. */
63 	if (*p == '!' && p < end) {
64 		match = 0;
65 		nomatch = 1;
66 		++p;
67 	}
68 
69 	while (p < end) {
70 		nextRangeStart = '\0';
71 		switch (*p) {
72 		case '-':
73 			/* Trailing or initial '-' is not special. */
74 			if ((rangeStart == '\0') || (p == end - 1)) {
75 				if (*p == c)
76 					return (match);
77 			} else {
78 				char rangeEnd = *++p;
79 				if (rangeEnd == '\\')
80 					rangeEnd = *++p;
81 				if ((rangeStart <= c) && (c <= rangeEnd))
82 					return (match);
83 			}
84 			break;
85 		case '\\':
86 			++p;
87 			/* Fall through */
88 		default:
89 			if (*p == c)
90 				return (match);
91 			nextRangeStart = *p; /* Possible start of range. */
92 		}
93 		rangeStart = nextRangeStart;
94 		++p;
95 	}
96 	return (nomatch);
97 }
98 
99 /*
100  * If s is pointing to "./", ".//", "./././" or the like, skip it.
101  */
102 static const char *
pm_slashskip(const char * s)103 pm_slashskip(const char *s) {
104 	while ((*s == '/')
105 	    || (s[0] == '.' && s[1] == '/')
106 	    || (s[0] == '.' && s[1] == '\0'))
107 		++s;
108 	return (s);
109 }
110 
111 static int
pm(const char * p,const char * s,int flags)112 pm(const char *p, const char *s, int flags)
113 {
114 	const char *end;
115 
116 	/*
117 	 * Ignore leading './', './/', '././', etc.
118 	 */
119 	if (s[0] == '.' && s[1] == '/')
120 		s = pm_slashskip(s + 1);
121 	if (p[0] == '.' && p[1] == '/')
122 		p = pm_slashskip(p + 1);
123 
124 	for (;;) {
125 		switch (*p) {
126 		case '\0':
127 			if (s[0] == '/') {
128 				if (flags & PATHMATCH_NO_ANCHOR_END)
129 					return (1);
130 				/* "dir" == "dir/" == "dir/." */
131 				s = pm_slashskip(s);
132 			}
133 			return (*s == '\0');
134 		case '?':
135 			/* ? always succeds, unless we hit end of 's' */
136 			if (*s == '\0')
137 				return (0);
138 			break;
139 		case '*':
140 			/* "*" == "**" == "***" ... */
141 			while (*p == '*')
142 				++p;
143 			/* Trailing '*' always succeeds. */
144 			if (*p == '\0')
145 				return (1);
146 			while (*s) {
147 				if (lafe_pathmatch(p, s, flags))
148 					return (1);
149 				++s;
150 			}
151 			return (0);
152 		case '[':
153 			/*
154 			 * Find the end of the [...] character class,
155 			 * ignoring \] that might occur within the class.
156 			 */
157 			end = p + 1;
158 			while (*end != '\0' && *end != ']') {
159 				if (*end == '\\' && end[1] != '\0')
160 					++end;
161 				++end;
162 			}
163 			if (*end == ']') {
164 				/* We found [...], try to match it. */
165 				if (!pm_list(p + 1, end, *s, flags))
166 					return (0);
167 				p = end; /* Jump to trailing ']' char. */
168 				break;
169 			} else
170 				/* No final ']', so just match '['. */
171 				if (*p != *s)
172 					return (0);
173 			break;
174 		case '\\':
175 			/* Trailing '\\' matches itself. */
176 			if (p[1] == '\0') {
177 				if (*s != '\\')
178 					return (0);
179 			} else {
180 				++p;
181 				if (*p != *s)
182 					return (0);
183 			}
184 			break;
185 		case '/':
186 			if (*s != '/' && *s != '\0')
187 				return (0);
188 			/* Note: pattern "/\./" won't match "/";
189 			 * pm_slashskip() correctly stops at backslash. */
190 			p = pm_slashskip(p);
191 			s = pm_slashskip(s);
192 			if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END))
193 				return (1);
194 			--p; /* Counteract the increment below. */
195 			--s;
196 			break;
197 		case '$':
198 			/* '$' is special only at end of pattern and only
199 			 * if PATHMATCH_NO_ANCHOR_END is specified. */
200 			if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){
201 				/* "dir" == "dir/" == "dir/." */
202 				return (*pm_slashskip(s) == '\0');
203 			}
204 			/* Otherwise, '$' is not special. */
205 			/* FALL THROUGH */
206 		default:
207 			if (*p != *s)
208 				return (0);
209 			break;
210 		}
211 		++p;
212 		++s;
213 	}
214 }
215 
216 /* Main entry point. */
217 int
lafe_pathmatch(const char * p,const char * s,int flags)218 lafe_pathmatch(const char *p, const char *s, int flags)
219 {
220 	/* Empty pattern only matches the empty string. */
221 	if (p == NULL || *p == '\0')
222 		return (s == NULL || *s == '\0');
223 
224 	/* Leading '^' anchors the start of the pattern. */
225 	if (*p == '^') {
226 		++p;
227 		flags &= ~PATHMATCH_NO_ANCHOR_START;
228 	}
229 
230 	if (*p == '/' && *s != '/')
231 		return (0);
232 
233 	/* Certain patterns and file names anchor implicitly. */
234 	if (*p == '*' || *p == '/' || *p == '/') {
235 		while (*p == '/')
236 			++p;
237 		while (*s == '/')
238 			++s;
239 		return (pm(p, s, flags));
240 	}
241 
242 	/* If start is unanchored, try to match start of each path element. */
243 	if (flags & PATHMATCH_NO_ANCHOR_START) {
244 		for ( ; s != NULL; s = strchr(s, '/')) {
245 			if (*s == '/')
246 				s++;
247 			if (pm(p, s, flags))
248 				return (1);
249 		}
250 		return (0);
251 	}
252 
253 	/* Default: Match from beginning. */
254 	return (pm(p, s, flags));
255 }
256