xref: /netbsd/games/quiz/rxp.c (revision 6550d01e)
1 /*	$NetBSD: rxp.c,v 1.13 2009/08/27 00:31:12 dholland Exp $	*/
2 
3 /*-
4  * Copyright (c) 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
9  * Commodore Business Machines.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #ifndef lint
38 #if 0
39 static char sccsid[] = "@(#)rxp.c	8.1 (Berkeley) 5/31/93";
40 #else
41 __RCSID("$NetBSD: rxp.c,v 1.13 2009/08/27 00:31:12 dholland Exp $");
42 #endif
43 #endif /* not lint */
44 
45 /*
46  * regular expression parser
47  *
48  * external functions and return values are:
49  * rxp_compile(s)
50  *	TRUE	success
51  *	FALSE	parse failure; error message will be in char rxperr[]
52  * metas are:
53  *	{...}	optional pattern, equialent to [...|]
54  *	|	alternate pattern
55  *	[...]	pattern delimiters
56  *
57  * rxp_match(s)
58  *	TRUE	string s matches compiled pattern
59  *	FALSE	match failure or regexp error
60  *
61  * rxp_expand()
62  *	char *	reverse-engineered regular expression string
63  *	NULL	regexp error
64  */
65 
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <ctype.h>
69 #include "quiz.h"
70 					/* regexp tokens,	arg */
71 #define	LIT	(-1)			/* literal character,	char */
72 #define	SOT	(-2)			/* start text anchor,	- */
73 #define	EOT	(-3)			/* end text anchor,	- */
74 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
75 #define	GRP_E	(-5)			/* end group,		- */
76 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
77 #define	ALT_E	(-7)			/* alternate ends,	- */
78 #define	END	(-8)			/* end of regexp,	- */
79 
80 typedef short Rxp_t;			/* type for regexp tokens */
81 
82 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
83 char rxperr[128];			/* parser error message */
84 
85 static int	 rxp__compile(const char *, int);
86 static char	*rxp__expand(int);
87 static int	 rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *);
88 
89 int
90 rxp_compile(const char *s)
91 {
92 	return (rxp__compile(s, TRUE));
93 }
94 
95 static int
96 rxp__compile(const char *s, int first)
97 {
98 	static Rxp_t *rp;
99 	static const char *sp;
100 	Rxp_t *grp_ptr;
101 	Rxp_t *alt_ptr;
102 	int esc, err;
103 
104 	esc = 0;
105 	if (first) {
106 		rp = rxpbuf;
107 		sp = s;
108 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
109 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
110 		*rp++ = 0;
111 	}
112 	*rp++ = ALT_S;
113 	alt_ptr = rp;
114 	*rp++ = 0;
115 	for (; *sp; ++sp) {
116 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
117 			(void)snprintf(rxperr, sizeof(rxperr),
118 			    "regular expression too long %s", s);
119 			return (FALSE);
120 		}
121 		if (*sp == ':' && !esc)
122 			break;
123 		if (esc) {
124 			*rp++ = LIT;
125 			*rp++ = *sp;
126 			esc = 0;
127 		}
128 		else switch (*sp) {
129 		case '\\':
130 			esc = 1;
131 			break;
132 		case '{':
133 		case '[':
134 			*rp++ = GRP_S;
135 			grp_ptr = rp;
136 			*rp++ = 0;
137 			sp++;
138 			if ((err = rxp__compile(s, FALSE)) != TRUE)
139 				return (err);
140 			*rp++ = GRP_E;
141 			*grp_ptr = rp - rxpbuf;
142 			break;
143 		case '}':
144 		case ']':
145 		case '|':
146 			*rp++ = ALT_E;
147 			*alt_ptr = rp - rxpbuf;
148 			if (*sp != ']') {
149 				*rp++ = ALT_S;
150 				alt_ptr = rp;
151 				*rp++ = 0;
152 			}
153 			if (*sp != '|') {
154 				if (*sp != ']') {
155 					*rp++ = ALT_E;
156 					*alt_ptr = rp - rxpbuf;
157 				}
158 				if (first) {
159 					(void)snprintf(rxperr, sizeof(rxperr),
160 					    "unmatched alternator in regexp %s",
161 					     s);
162 					return (FALSE);
163 				}
164 				return (TRUE);
165 			}
166 			break;
167 		default:
168 			*rp++ = LIT;
169 			*rp++ = *sp;
170 			esc = 0;
171 			break;
172 		}
173 	}
174 	if (!first) {
175 		(void)snprintf(rxperr, sizeof(rxperr),
176 		    "unmatched alternator in regexp %s", s);
177 		return (FALSE);
178 	}
179 	*rp++ = ALT_E;
180 	*alt_ptr = rp - rxpbuf;
181 	*rp++ = GRP_E;
182 	*(rxpbuf + 2) = rp - rxpbuf;
183 	*rp++ = EOT;
184 	*rp = END;
185 	return (TRUE);
186 }
187 
188 /*
189  * match string against compiled regular expression
190  */
191 int
192 rxp_match(const char *s)
193 {
194 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
195 }
196 
197 static int
198 rxp__match(const char *s,
199 	   int first,
200 	   Rxp_t *j_succ,		/* jump here on successful alt match */
201 	   Rxp_t *j_fail,		/* jump here on failed match */
202 	   const char *sp_fail)		/* reset sp to here on failed match */
203 {
204 	static Rxp_t *rp;
205 	static const char *sp;
206 	int ch;
207 	Rxp_t *grp_end = NULL;
208 
209 	if (first) {
210 		rp = rxpbuf;
211 		sp = s;
212 	}
213 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
214 		switch(*rp) {
215 		case LIT:
216 			rp++;
217 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
218 			if (ch != *sp++) {
219 				rp = j_fail;
220 				sp = sp_fail;
221 				return (FALSE);
222 			}
223 			rp++;
224 			break;
225 		case SOT:
226 			if (sp != s)
227 				return (FALSE);
228 			rp++;
229 			break;
230 		case EOT:
231 			if (*sp != 0)
232 				return (FALSE);
233 			rp++;
234 			break;
235 		case GRP_S:
236 			rp++;
237 			grp_end = rxpbuf + *rp++;
238 			break;
239 		case ALT_S:
240 			rp++;
241 			rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp);
242 			break;
243 		case ALT_E:
244 			rp = j_succ;
245 			return (TRUE);
246 		case GRP_E:
247 			rp = j_fail;
248 			sp = sp_fail;
249 			return (FALSE);
250 		default:
251 			abort();
252 		}
253 	return (*rp != END ? FALSE : TRUE);
254 }
255 
256 /*
257  * Reverse engineer the regular expression, by picking first of all alternates.
258  */
259 char *
260 rxp_expand(void)
261 {
262 	return (rxp__expand(TRUE));
263 }
264 
265 static char *
266 rxp__expand(int first)
267 {
268 	static char buf[RXP_LINE_SZ/2];
269 	static Rxp_t *rp;
270 	static char *bp;
271 	Rxp_t *grp_ptr;
272 	char *err;
273 
274 	if (first) {
275 		rp = rxpbuf;
276 		bp = buf;
277 	}
278 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
279 		switch(*rp) {
280 		case LIT:
281 			rp++;
282 			*bp++ = *rp++;
283 			break;
284 		case GRP_S:
285 			rp++;
286 			grp_ptr = rxpbuf + *rp;
287 			rp++;
288 			if ((err = rxp__expand(FALSE)) == NULL)
289 				return (err);
290 			rp = grp_ptr;
291 			break;
292 		case ALT_E:
293 			return (buf);
294 		case ALT_S:
295 			rp++;
296 			/* FALLTHROUGH */
297 		case SOT:
298 		case EOT:
299 		case GRP_E:
300 			rp++;
301 			break;
302 		default:
303 			return (NULL);
304 		}
305 	if (first) {
306 		if (*rp != END)
307 			return (NULL);
308 		*bp = '\0';
309 	}
310 	return (buf);
311 }
312