1 /* ScummVM - Graphic Adventure Engine
2  *
3  * ScummVM is the legal property of its developers, whose names
4  * are too numerous to list here. Please refer to the COPYRIGHT
5  * file distributed with this source distribution.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version 2
10  * of the License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #include "glk/adrift/scare.h"
24 #include "glk/adrift/sxprotos.h"
25 
26 namespace Glk {
27 namespace Adrift {
28 
29 /*
30  * Module notes:
31  *
32  * The glob matching functions in this module are derived from an original
33  * (and somewhat hairy) glob.c posted by Arjan Kenter from the University
34  * of Twente, NL, in an assortment of minor variations between 1993 and 1997.
35  * The major modifications are:
36  *
37  *  o Added checks to ensure that invalid range patterns such as "[a-" or
38  *    "[-" don't cause the loops to walk off the end of the pattern string
39  *    and (usually) result in SIGSEGV.
40  *  o Moved from plain char to unsigned char to avoid signedness problems
41  *    with range comparisons.
42  *  o Skipped the leading '[' in the range checker; the original was treating
43  *    it as a possible first value of 'r'.
44  *  o Moved the range checker while() from the bottom of the loop to the top,
45  *    to avoid problems with invalid ranges.
46  *  o Gave 'l' in the range checker an initial value that ensures that it
47  *    can never match until it's been re-assigned to 'r'.
48  *  o Used a return value rather than multiple returns in the matcher, for
49  *    better debugability.
50  *  o Applied some const-correctness, and replaced some pointers by indexing.
51  *  o Added scanf-like special cases, making ']' a valid part of a range if
52  *    first, and '-' if last.
53  *
54  * This glob accepts * and ? wild cards, and [] ranges.  It does not check
55  * whether the range string is valid (for example, terminates with ']'), but
56  * simply returns the best it can under those circumstances.
57  *
58  * Example call:
59  *    glob_match ("a*b?c[A-Za-z_0-9]d*", some_string)
60  */
61 
62 /*
63  * glob_inrange_unsigned()
64  * glob_match_unsigned()
65  *
66  * Match a "[...]" character range, and match general glob wildcards.  See
67  * above for notes on where these functions came from originally.
68  */
glob_inrange_unsigned(const unsigned char ** const pattern,unsigned char ch)69 static int glob_inrange_unsigned(const unsigned char **const pattern, unsigned char ch) {
70 	const unsigned char *const pattern_ = *pattern;
71 	int in_range = FALSE;
72 	unsigned int l = 256, r = 0, index_;
73 
74 	/* Skip the leading '[' on entry to a range check. */
75 	index_ = 1;
76 
77 	/* Special-case a range that has ']' as its first character. */
78 	if (pattern_[index_] == ']') {
79 		r = pattern_[index_++];
80 		if (ch == r)
81 			in_range = TRUE;
82 	}
83 
84 	/*
85 	 * Check at the loop top, rather than the bottom, to avoid problems with
86 	 * invalid or uncompleted ranges.
87 	 */
88 	while (pattern_[index_] && pattern_[index_] != ']') {
89 		r = pattern_[index_++];
90 		if (r == '-') {
91 			/* Special-case a range that has '-' as its last character. */
92 			if (pattern_[index_] == ']' || !pattern_[index_]) {
93 				if (ch == r)
94 					in_range = TRUE;
95 				break;
96 			}
97 
98 			/* Break the loop on unterminated range ending with '-'. */
99 			if (!pattern_[index_])
100 				break;
101 
102 			r = pattern_[index_++];
103 			if (l <= ch && ch <= r)
104 				in_range = TRUE;
105 		} else {
106 			l = r;
107 			if (ch == r)
108 				in_range = TRUE;
109 		}
110 	}
111 
112 	/* Update pattern with characters consumed, return result. */
113 	*pattern += index_;
114 	return in_range;
115 }
116 
glob_match_unsigned(const unsigned char * pattern,const unsigned char * string)117 static int glob_match_unsigned(const unsigned char *pattern, const unsigned char *string) {
118 	int is_match = FALSE;
119 
120 	if (!*string) {
121 		if (*pattern == '*')
122 			is_match = glob_match_unsigned(pattern + 1, string);
123 		else
124 			is_match = !*pattern;
125 	} else {
126 		switch (*pattern) {
127 		case '\0':
128 			is_match = !*string;
129 			break;
130 		case '*':
131 			if (glob_match_unsigned(pattern + 1, string))
132 				is_match = TRUE;
133 			else
134 				is_match = glob_match_unsigned(pattern, string + 1);
135 			break;
136 		case '?':
137 			is_match = glob_match_unsigned(pattern + 1, string + 1);
138 			break;
139 		case '[':
140 			/*
141 			 * After a range check, we need to see if we hit the end of the
142 			 * pattern before recursively matching pattern + 1.
143 			 */
144 			is_match = glob_inrange_unsigned(&pattern, *string)
145 			           && (!*pattern
146 			               || glob_match_unsigned(pattern + 1, string + 1));
147 			break;
148 		default:
149 			is_match = *pattern == *string
150 			           && glob_match_unsigned(pattern + 1, string + 1);
151 			break;
152 		}
153 	}
154 
155 	return is_match;
156 }
157 
158 
159 /* Structures and data for the self test function. */
160 struct sx_test_data_t {
161 	const sc_char *const pattern;
162 	const sc_char *const string;
163 };
164 
165 static const sx_test_data_t SHOULD_MATCH[] = {
166 	{"a", "a"}, {"abc", "abc"}, {"", ""},
167 	{"*", ""}, {"*", "abc"}, {"*", "cba"},
168 	{"*c", "c"}, {"*c", "abc"}, {"*c", "cbac"},
169 	{"a*", "a"}, {"a*", "abc"}, {"a*", "abca"},
170 	{"a*c", "ac"}, {"a*c", "abc"}, {"a*c", "abcbcbc"},
171 	{"a**c", "ac"}, {"a**c", "abc"}, {"a**c", "abcbcbc"},
172 	{"*b*", "b"}, {"*b*", "abc"}, {"*b*", "ab"}, {"*b*", "bc"},
173 	{"?", "a"}, {"?", "z"}, {"?", "?"}, {"[?]", "?"},
174 	{"a?", "aa"}, {"a?", "az"}, {"a?", "a?"},
175 	{"?c", "ac"}, {"?c", "zc"}, {"?c", "?c"},
176 	{"[abz]", "a"}, {"[abz]", "b"}, {"[abz]", "z"},
177 	{"[a-c]", "a"}, {"[a-c]", "b"}, {"[a-c]", "c"},
178 	{"[ac]b[ac]", "abc"}, {"[ac]b[ac]", "cba"},
179 
180 	{"[]]", "]"}, {"[]a-c]", "a"}, {"[]a-c]", "b"}, {"[]a-c]", "c"},
181 	{"[?]", "?" }, {"[-]", "-"}, {"[z-]", "z"}, {"[z-]", "-"},
182 	{"[][-]", "]"}, {"[][-]", "["}, {"[][-]", "-"},
183 	{"[a-c-]", "a"}, {"[a-c-]", "b"}, {"[a-c-]", "c"}, {"[a-c-]", "-"},
184 
185 	{"*[a-z]*abc?xyz", "a<star>abcQxyz"}, {"*[a-z]*abc?xyz", "<star>aabcQxyz"},
186 	{"*[a-z]*abc?xyz", "aabcQxyz"}, {"*[a-z]*abc?xyz", "<star>a<star>abcQxyz"},
187 
188 	{"???]", "abc]"}, {"[z-a]", "z"},
189 	{"[a-z", "a"}, {"[a-", "a"}, {"[a", "a"}, {"[[", "["},
190 	{NULL, NULL}
191 };
192 
193 static const sx_test_data_t SHOULD_NOT_MATCH[] = {
194 	{"a", "b"}, {"abc", "abd"}, {"a", ""}, {"", "a"},
195 	{"*c", "a"}, {"*c", "ab"}, {"*c", "abca"},
196 	{"a*", "c"}, {"a*", "cba"}, {"a*", "cbac"},
197 	{"a*c", "ca"}, {"a*c", "cba"}, {"a*c", "cbababa"},
198 	{"a**c", "ca"}, {"a**c", "cba"}, {"a**c", "cbababa"},
199 	{"*b*", ""}, {"*b*", "z"}, {"*b*", "ac"}, {"*b*", "azc"},
200 	{"?", ""}, {"?", "ab"}, {"?", "abc"}, {"[?]", "a"},
201 	{"a?", "ca"}, {"a?", "cz"}, {"a?", "??"},
202 	{"?c", "ab"}, {"?c", "zb"}, {"?c", "??"},
203 	{"[bcy]", "a"}, {"[bcy]", "d"}, {"[bcy]", "z"},
204 	{"[b-d]", "a"}, {"[b-d]", "e"}, {"[b-d]", ""}, {"[b-d]", "bc"},
205 	{"[ac]b[ac]", "aaa"}, {"[ac]b[ac]", "bbb"}, {"[ac]b[ac]", "ccc"},
206 
207 	{"[]]", "["}, {"[]]", "a"}, {"[]a-c]", "z"},
208 	{"[?]", "a" }, {"[-]", "a"}, {"[z-]", "a"},
209 	{"[][-]", "a"}, {"[][-]", "z"},
210 	{"[a-c-]", "z"},
211 
212 	{"*[a-z]*abc?xyz", "A<STAR>abcQxyz"}, {"*[a-z]*abc?xyz", "<STAR>AabcQxyz"},
213 	{"*[a-z]*abc?xyz", "AabcQxyz"}, {"*[a-z]*abc?xyz", "aabcxyz"},
214 
215 	{"[z-a]", "a"}, {"[z-a]", "b"}, {"[", "a"}, {"[[", "a"},
216 	{NULL, NULL}
217 };
218 
219 
220 /*
221  * glob_self_test()
222  *
223  * Sed quis custodiet ipsos custodes?
224  */
glob_self_test(void)225 static void glob_self_test(void) {
226 	const sx_test_data_t *test;
227 	sc_int errors;
228 
229 	/*
230 	 * Run each test case and compare against expected result.  To avoid a lot
231 	 * of ugly casting, we use the main public glob_match() function.
232 	 */
233 	errors = 0;
234 	for (test = SHOULD_MATCH; test->pattern; test++) {
235 		if (!glob_match(test->pattern, test->string)) {
236 			sx_error("glob_self_test: \"%s\", \"%s\""
237 			         " did not match, and should have matched\n",
238 			         test->pattern, test->string);
239 			errors++;
240 		}
241 	}
242 
243 	for (test = SHOULD_NOT_MATCH; test->pattern; test++) {
244 		if (glob_match(test->pattern, test->string)) {
245 			sx_error("glob_self_test: \"%s\", \"%s\""
246 			         " matched, and should not have matched\n",
247 			         test->pattern, test->string);
248 			errors++;
249 		}
250 	}
251 
252 	/*
253 	 * Abort if any error.  As befits our distrustful nature, we won't even
254 	 * trust that sx_fatal() calls abort() (though it should).
255 	 */
256 	if (errors > 0) {
257 		sx_fatal("glob_self_test: %ld self-test error%s found, aborting\n",
258 		         errors, (errors == 1) ? "" : "s");
259 	}
260 }
261 
262 
263 /*
264  * glob_match()
265  *
266  * Adapter for the above globbing functions, presenting a more standard char-
267  * based interface.  Here is where all the evil casting lives.
268  */
glob_match(const sc_char * pattern,const sc_char * string)269 sc_bool glob_match(const sc_char *pattern, const sc_char *string) {
270 	static sc_bool initialized = FALSE;
271 
272 	const unsigned char *pattern_ = (const unsigned char *) pattern;
273 	const unsigned char *string_ = (const unsigned char *) string;
274 	sc_bool retval;
275 	assert(pattern && string);
276 
277 	/* On the first call, run a self-test to verify basic glob matching. */
278 	if (!initialized) {
279 		/*
280 		 * To avoid lots of icky casting, the self-test uses the core public
281 		 * glob_match() that we're in right here to run its tests.  So set
282 		 * initialized _before_ the test, to avoid infinite recursion.
283 		 */
284 		initialized = TRUE;
285 		glob_self_test();
286 	}
287 
288 	retval = glob_match_unsigned(pattern_, string_) != 0;
289 	return retval;
290 }
291 
292 } // End of namespace Adrift
293 } // End of namespace Glk
294