1 /* ScummVM - Graphic Adventure Engine
2 *
3 * ScummVM is the legal property of its developers, whose names
4 * are too numerous to list here. Please refer to the COPYRIGHT
5 * file distributed with this source distribution.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #include "glk/adrift/scare.h"
24 #include "glk/adrift/sxprotos.h"
25
26 namespace Glk {
27 namespace Adrift {
28
29 /*
30 * Module notes:
31 *
32 * The glob matching functions in this module are derived from an original
33 * (and somewhat hairy) glob.c posted by Arjan Kenter from the University
34 * of Twente, NL, in an assortment of minor variations between 1993 and 1997.
35 * The major modifications are:
36 *
37 * o Added checks to ensure that invalid range patterns such as "[a-" or
38 * "[-" don't cause the loops to walk off the end of the pattern string
39 * and (usually) result in SIGSEGV.
40 * o Moved from plain char to unsigned char to avoid signedness problems
41 * with range comparisons.
42 * o Skipped the leading '[' in the range checker; the original was treating
43 * it as a possible first value of 'r'.
44 * o Moved the range checker while() from the bottom of the loop to the top,
45 * to avoid problems with invalid ranges.
46 * o Gave 'l' in the range checker an initial value that ensures that it
47 * can never match until it's been re-assigned to 'r'.
48 * o Used a return value rather than multiple returns in the matcher, for
49 * better debugability.
50 * o Applied some const-correctness, and replaced some pointers by indexing.
51 * o Added scanf-like special cases, making ']' a valid part of a range if
52 * first, and '-' if last.
53 *
54 * This glob accepts * and ? wild cards, and [] ranges. It does not check
55 * whether the range string is valid (for example, terminates with ']'), but
56 * simply returns the best it can under those circumstances.
57 *
58 * Example call:
59 * glob_match ("a*b?c[A-Za-z_0-9]d*", some_string)
60 */
61
62 /*
63 * glob_inrange_unsigned()
64 * glob_match_unsigned()
65 *
66 * Match a "[...]" character range, and match general glob wildcards. See
67 * above for notes on where these functions came from originally.
68 */
glob_inrange_unsigned(const unsigned char ** const pattern,unsigned char ch)69 static int glob_inrange_unsigned(const unsigned char **const pattern, unsigned char ch) {
70 const unsigned char *const pattern_ = *pattern;
71 int in_range = FALSE;
72 unsigned int l = 256, r = 0, index_;
73
74 /* Skip the leading '[' on entry to a range check. */
75 index_ = 1;
76
77 /* Special-case a range that has ']' as its first character. */
78 if (pattern_[index_] == ']') {
79 r = pattern_[index_++];
80 if (ch == r)
81 in_range = TRUE;
82 }
83
84 /*
85 * Check at the loop top, rather than the bottom, to avoid problems with
86 * invalid or uncompleted ranges.
87 */
88 while (pattern_[index_] && pattern_[index_] != ']') {
89 r = pattern_[index_++];
90 if (r == '-') {
91 /* Special-case a range that has '-' as its last character. */
92 if (pattern_[index_] == ']' || !pattern_[index_]) {
93 if (ch == r)
94 in_range = TRUE;
95 break;
96 }
97
98 /* Break the loop on unterminated range ending with '-'. */
99 if (!pattern_[index_])
100 break;
101
102 r = pattern_[index_++];
103 if (l <= ch && ch <= r)
104 in_range = TRUE;
105 } else {
106 l = r;
107 if (ch == r)
108 in_range = TRUE;
109 }
110 }
111
112 /* Update pattern with characters consumed, return result. */
113 *pattern += index_;
114 return in_range;
115 }
116
glob_match_unsigned(const unsigned char * pattern,const unsigned char * string)117 static int glob_match_unsigned(const unsigned char *pattern, const unsigned char *string) {
118 int is_match = FALSE;
119
120 if (!*string) {
121 if (*pattern == '*')
122 is_match = glob_match_unsigned(pattern + 1, string);
123 else
124 is_match = !*pattern;
125 } else {
126 switch (*pattern) {
127 case '\0':
128 is_match = !*string;
129 break;
130 case '*':
131 if (glob_match_unsigned(pattern + 1, string))
132 is_match = TRUE;
133 else
134 is_match = glob_match_unsigned(pattern, string + 1);
135 break;
136 case '?':
137 is_match = glob_match_unsigned(pattern + 1, string + 1);
138 break;
139 case '[':
140 /*
141 * After a range check, we need to see if we hit the end of the
142 * pattern before recursively matching pattern + 1.
143 */
144 is_match = glob_inrange_unsigned(&pattern, *string)
145 && (!*pattern
146 || glob_match_unsigned(pattern + 1, string + 1));
147 break;
148 default:
149 is_match = *pattern == *string
150 && glob_match_unsigned(pattern + 1, string + 1);
151 break;
152 }
153 }
154
155 return is_match;
156 }
157
158
159 /* Structures and data for the self test function. */
160 struct sx_test_data_t {
161 const sc_char *const pattern;
162 const sc_char *const string;
163 };
164
165 static const sx_test_data_t SHOULD_MATCH[] = {
166 {"a", "a"}, {"abc", "abc"}, {"", ""},
167 {"*", ""}, {"*", "abc"}, {"*", "cba"},
168 {"*c", "c"}, {"*c", "abc"}, {"*c", "cbac"},
169 {"a*", "a"}, {"a*", "abc"}, {"a*", "abca"},
170 {"a*c", "ac"}, {"a*c", "abc"}, {"a*c", "abcbcbc"},
171 {"a**c", "ac"}, {"a**c", "abc"}, {"a**c", "abcbcbc"},
172 {"*b*", "b"}, {"*b*", "abc"}, {"*b*", "ab"}, {"*b*", "bc"},
173 {"?", "a"}, {"?", "z"}, {"?", "?"}, {"[?]", "?"},
174 {"a?", "aa"}, {"a?", "az"}, {"a?", "a?"},
175 {"?c", "ac"}, {"?c", "zc"}, {"?c", "?c"},
176 {"[abz]", "a"}, {"[abz]", "b"}, {"[abz]", "z"},
177 {"[a-c]", "a"}, {"[a-c]", "b"}, {"[a-c]", "c"},
178 {"[ac]b[ac]", "abc"}, {"[ac]b[ac]", "cba"},
179
180 {"[]]", "]"}, {"[]a-c]", "a"}, {"[]a-c]", "b"}, {"[]a-c]", "c"},
181 {"[?]", "?" }, {"[-]", "-"}, {"[z-]", "z"}, {"[z-]", "-"},
182 {"[][-]", "]"}, {"[][-]", "["}, {"[][-]", "-"},
183 {"[a-c-]", "a"}, {"[a-c-]", "b"}, {"[a-c-]", "c"}, {"[a-c-]", "-"},
184
185 {"*[a-z]*abc?xyz", "a<star>abcQxyz"}, {"*[a-z]*abc?xyz", "<star>aabcQxyz"},
186 {"*[a-z]*abc?xyz", "aabcQxyz"}, {"*[a-z]*abc?xyz", "<star>a<star>abcQxyz"},
187
188 {"???]", "abc]"}, {"[z-a]", "z"},
189 {"[a-z", "a"}, {"[a-", "a"}, {"[a", "a"}, {"[[", "["},
190 {NULL, NULL}
191 };
192
193 static const sx_test_data_t SHOULD_NOT_MATCH[] = {
194 {"a", "b"}, {"abc", "abd"}, {"a", ""}, {"", "a"},
195 {"*c", "a"}, {"*c", "ab"}, {"*c", "abca"},
196 {"a*", "c"}, {"a*", "cba"}, {"a*", "cbac"},
197 {"a*c", "ca"}, {"a*c", "cba"}, {"a*c", "cbababa"},
198 {"a**c", "ca"}, {"a**c", "cba"}, {"a**c", "cbababa"},
199 {"*b*", ""}, {"*b*", "z"}, {"*b*", "ac"}, {"*b*", "azc"},
200 {"?", ""}, {"?", "ab"}, {"?", "abc"}, {"[?]", "a"},
201 {"a?", "ca"}, {"a?", "cz"}, {"a?", "??"},
202 {"?c", "ab"}, {"?c", "zb"}, {"?c", "??"},
203 {"[bcy]", "a"}, {"[bcy]", "d"}, {"[bcy]", "z"},
204 {"[b-d]", "a"}, {"[b-d]", "e"}, {"[b-d]", ""}, {"[b-d]", "bc"},
205 {"[ac]b[ac]", "aaa"}, {"[ac]b[ac]", "bbb"}, {"[ac]b[ac]", "ccc"},
206
207 {"[]]", "["}, {"[]]", "a"}, {"[]a-c]", "z"},
208 {"[?]", "a" }, {"[-]", "a"}, {"[z-]", "a"},
209 {"[][-]", "a"}, {"[][-]", "z"},
210 {"[a-c-]", "z"},
211
212 {"*[a-z]*abc?xyz", "A<STAR>abcQxyz"}, {"*[a-z]*abc?xyz", "<STAR>AabcQxyz"},
213 {"*[a-z]*abc?xyz", "AabcQxyz"}, {"*[a-z]*abc?xyz", "aabcxyz"},
214
215 {"[z-a]", "a"}, {"[z-a]", "b"}, {"[", "a"}, {"[[", "a"},
216 {NULL, NULL}
217 };
218
219
220 /*
221 * glob_self_test()
222 *
223 * Sed quis custodiet ipsos custodes?
224 */
glob_self_test(void)225 static void glob_self_test(void) {
226 const sx_test_data_t *test;
227 sc_int errors;
228
229 /*
230 * Run each test case and compare against expected result. To avoid a lot
231 * of ugly casting, we use the main public glob_match() function.
232 */
233 errors = 0;
234 for (test = SHOULD_MATCH; test->pattern; test++) {
235 if (!glob_match(test->pattern, test->string)) {
236 sx_error("glob_self_test: \"%s\", \"%s\""
237 " did not match, and should have matched\n",
238 test->pattern, test->string);
239 errors++;
240 }
241 }
242
243 for (test = SHOULD_NOT_MATCH; test->pattern; test++) {
244 if (glob_match(test->pattern, test->string)) {
245 sx_error("glob_self_test: \"%s\", \"%s\""
246 " matched, and should not have matched\n",
247 test->pattern, test->string);
248 errors++;
249 }
250 }
251
252 /*
253 * Abort if any error. As befits our distrustful nature, we won't even
254 * trust that sx_fatal() calls abort() (though it should).
255 */
256 if (errors > 0) {
257 sx_fatal("glob_self_test: %ld self-test error%s found, aborting\n",
258 errors, (errors == 1) ? "" : "s");
259 }
260 }
261
262
263 /*
264 * glob_match()
265 *
266 * Adapter for the above globbing functions, presenting a more standard char-
267 * based interface. Here is where all the evil casting lives.
268 */
glob_match(const sc_char * pattern,const sc_char * string)269 sc_bool glob_match(const sc_char *pattern, const sc_char *string) {
270 static sc_bool initialized = FALSE;
271
272 const unsigned char *pattern_ = (const unsigned char *) pattern;
273 const unsigned char *string_ = (const unsigned char *) string;
274 sc_bool retval;
275 assert(pattern && string);
276
277 /* On the first call, run a self-test to verify basic glob matching. */
278 if (!initialized) {
279 /*
280 * To avoid lots of icky casting, the self-test uses the core public
281 * glob_match() that we're in right here to run its tests. So set
282 * initialized _before_ the test, to avoid infinite recursion.
283 */
284 initialized = TRUE;
285 glob_self_test();
286 }
287
288 retval = glob_match_unsigned(pattern_, string_) != 0;
289 return retval;
290 }
291
292 } // End of namespace Adrift
293 } // End of namespace Glk
294