xref: /dragonfly/games/quiz/rxp.c (revision 984263bc)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #ifndef lint
39 #if 0
40 static char sccsid[] = "@(#)rxp.c	8.1 (Berkeley) 5/31/93";
41 #endif
42 static const char rcsid[] =
43  "$FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $";
44 #endif /* not lint */
45 
46 /*
47  * regular expression parser
48  *
49  * external functions and return values are:
50  * rxp_compile(s)
51  *	TRUE	success
52  *	FALSE	parse failure; error message will be in char rxperr[]
53  * metas are:
54  *	{...}	optional pattern, equialent to [...|]
55  *	|	alternate pattern
56  *	[...]	pattern delimiters
57  *
58  * rxp_match(s)
59  *	TRUE	string s matches compiled pattern
60  *	FALSE	match failure or regexp error
61  *
62  * rxp_expand()
63  *	char *	reverse-engineered regular expression string
64  *	NULL	regexp error
65  */
66 
67 #include <stdio.h>
68 #include <ctype.h>
69 #include "quiz.h"
70 					/* regexp tokens,	arg */
71 #define	LIT	(-1)			/* literal character,	char */
72 #define	SOT	(-2)			/* start text anchor,	- */
73 #define	EOT	(-3)			/* end text anchor,	- */
74 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
75 #define	GRP_E	(-5)			/* end group,		- */
76 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
77 #define	ALT_E	(-7)			/* alternate ends,	- */
78 #define	END	(-8)			/* end of regexp,	- */
79 
80 typedef short Rxp_t;			/* type for regexp tokens */
81 
82 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
83 char rxperr[128];			/* parser error message */
84 
85 static int	 rxp__compile __P((char *, int));
86 static char	*rxp__expand __P((int));
87 static int	 rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
88 
89 int
90 rxp_compile(s)
91 	char *	s;
92 {
93 	return (rxp__compile(s, TRUE));
94 }
95 
96 static int
97 rxp__compile(s, first)
98 	char *s;
99 	int first;
100 {
101 	static Rxp_t *rp;
102 	static char *sp;
103 	Rxp_t *grp_ptr;
104 	Rxp_t *alt_ptr;
105 	int esc, err;
106 
107 	esc = 0;
108 	if (first) {
109 		rp = rxpbuf;
110 		sp = s;
111 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
112 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
113 		*rp++ = 0;
114 	}
115 	*rp++ = ALT_S;
116 	alt_ptr = rp;
117 	*rp++ = 0;
118 	for (; *sp; ++sp) {
119 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
120 			(void)snprintf(rxperr, sizeof(rxperr),
121 			    "regular expression too long %s", s);
122 			return (FALSE);
123 		}
124 		if (*sp == ':' && !esc)
125 			break;
126 		if (esc) {
127 			*rp++ = LIT;
128 			*rp++ = *sp;
129 			esc = 0;
130 		}
131 		else switch (*sp) {
132 		case '\\':
133 			esc = 1;
134 			break;
135 		case '{':
136 		case '[':
137 			*rp++ = GRP_S;
138 			grp_ptr = rp;
139 			*rp++ = 0;
140 			sp++;
141 			if ((err = rxp__compile(s, FALSE)) != TRUE)
142 				return (err);
143 			*rp++ = GRP_E;
144 			*grp_ptr = rp - rxpbuf;
145 			break;
146 		case '}':
147 		case ']':
148 		case '|':
149 			*rp++ = ALT_E;
150 			*alt_ptr = rp - rxpbuf;
151 			if (*sp != ']') {
152 				*rp++ = ALT_S;
153 				alt_ptr = rp;
154 				*rp++ = 0;
155 			}
156 			if (*sp != '|') {
157 				if (*sp != ']') {
158 					*rp++ = ALT_E;
159 					*alt_ptr = rp - rxpbuf;
160 				}
161 				if (first) {
162 					(void)snprintf(rxperr, sizeof(rxperr),
163 					    "unmatched alternator in regexp %s",
164 					     s);
165 					return (FALSE);
166 				}
167 				return (TRUE);
168 			}
169 			break;
170 		default:
171 			*rp++ = LIT;
172 			*rp++ = *sp;
173 			esc = 0;
174 			break;
175 		}
176 	}
177 	if (!first) {
178 		(void)snprintf(rxperr, sizeof(rxperr),
179 		    "unmatched alternator in regexp %s", s);
180 		return (FALSE);
181 	}
182 	*rp++ = ALT_E;
183 	*alt_ptr = rp - rxpbuf;
184 	*rp++ = GRP_E;
185 	*(rxpbuf + 2) = rp - rxpbuf;
186 	*rp++ = EOT;
187 	*rp = END;
188 	return (TRUE);
189 }
190 
191 /*
192  * match string against compiled regular expression
193  */
194 int
195 rxp_match(s)
196 	char *	s;
197 {
198 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
199 }
200 
201 static int
202 rxp__match(s, first, j_succ, j_fail, sp_fail)
203 	char *s;
204 	int first;
205 	Rxp_t *j_succ;		/* jump here on successful alt match */
206 	Rxp_t *j_fail;		/* jump here on failed match */
207 	char *sp_fail;		/* reset sp to here on failed match */
208 {
209 	static Rxp_t *rp;
210 	static char *sp;
211 	int ch;
212 	Rxp_t *grp_end;
213 	int err;
214 
215 	grp_end = NULL;
216 	if (first) {
217 		rp = rxpbuf;
218 		sp = s;
219 	}
220 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
221 		switch(*rp) {
222 		case LIT:
223 			rp++;
224 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
225 			if (ch != *sp++) {
226 				rp = j_fail;
227 				sp = sp_fail;
228 				return (TRUE);
229 			}
230 			rp++;
231 			break;
232 		case SOT:
233 			if (sp != s)
234 				return (FALSE);
235 			rp++;
236 			break;
237 		case EOT:
238 			if (*sp != 0)
239 				return (FALSE);
240 			rp++;
241 			break;
242 		case GRP_S:
243 			rp++;
244 			grp_end = rxpbuf + *rp++;
245 			break;
246 		case ALT_S:
247 			rp++;
248 			if ((err = rxp__match(sp,
249 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
250 				return (err);
251 			break;
252 		case ALT_E:
253 			rp = j_succ;
254 			return (TRUE);
255 		case GRP_E:
256 		default:
257 			return (FALSE);
258 		}
259 	return (*rp != END ? FALSE : TRUE);
260 }
261 
262 /*
263  * Reverse engineer the regular expression, by picking first of all alternates.
264  */
265 char *
266 rxp_expand()
267 {
268 	return (rxp__expand(TRUE));
269 }
270 
271 static char *
272 rxp__expand(first)
273 	int first;
274 {
275 	static char buf[RXP_LINE_SZ/2];
276 	static Rxp_t *rp;
277 	static char *bp;
278 	Rxp_t *grp_ptr;
279 	char *err;
280 
281 	if (first) {
282 		rp = rxpbuf;
283 		bp = buf;
284 	}
285 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
286 		switch(*rp) {
287 		case LIT:
288 			rp++;
289 			*bp++ = *rp++;
290 			break;
291 		case GRP_S:
292 			rp++;
293 			grp_ptr = rxpbuf + *rp;
294 			rp++;
295 			if ((err = rxp__expand(FALSE)) == NULL)
296 				return (err);
297 			rp = grp_ptr;
298 			break;
299 		case ALT_E:
300 			return (buf);
301 		case ALT_S:
302 			rp++;
303 			/* FALLTHROUGH */
304 		case SOT:
305 		case EOT:
306 		case GRP_E:
307 			rp++;
308 			break;
309 		default:
310 			return (NULL);
311 		}
312 	if (first) {
313 		if (*rp != END)
314 			return (NULL);
315 		*bp = '\0';
316 	}
317 	return (buf);
318 }
319