xref: /dragonfly/games/quiz/rxp.c (revision af79c6e5)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * @(#)rxp.c	8.1 (Berkeley) 5/31/93
38  * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $
39  * $DragonFly: src/games/quiz/rxp.c,v 1.3 2003/11/12 14:53:54 eirikn Exp $
40  */
41 
42 /*
43  * regular expression parser
44  *
45  * external functions and return values are:
46  * rxp_compile(s)
47  *	TRUE	success
48  *	FALSE	parse failure; error message will be in char rxperr[]
49  * metas are:
50  *	{...}	optional pattern, equialent to [...|]
51  *	|	alternate pattern
52  *	[...]	pattern delimiters
53  *
54  * rxp_match(s)
55  *	TRUE	string s matches compiled pattern
56  *	FALSE	match failure or regexp error
57  *
58  * rxp_expand()
59  *	char *	reverse-engineered regular expression string
60  *	NULL	regexp error
61  */
62 
63 #include <stdio.h>
64 #include <ctype.h>
65 #include "quiz.h"
66 					/* regexp tokens,	arg */
67 #define	LIT	(-1)			/* literal character,	char */
68 #define	SOT	(-2)			/* start text anchor,	- */
69 #define	EOT	(-3)			/* end text anchor,	- */
70 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
71 #define	GRP_E	(-5)			/* end group,		- */
72 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
73 #define	ALT_E	(-7)			/* alternate ends,	- */
74 #define	END	(-8)			/* end of regexp,	- */
75 
76 typedef short Rxp_t;			/* type for regexp tokens */
77 
78 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
79 char rxperr[128];			/* parser error message */
80 
81 static int	 rxp__compile (char *, int);
82 static char	*rxp__expand (int);
83 static int	 rxp__match (char *, int, Rxp_t *, Rxp_t *, char *);
84 
85 int
86 rxp_compile(s)
87 	char *	s;
88 {
89 	return (rxp__compile(s, TRUE));
90 }
91 
92 static int
93 rxp__compile(s, first)
94 	char *s;
95 	int first;
96 {
97 	static Rxp_t *rp;
98 	static char *sp;
99 	Rxp_t *grp_ptr;
100 	Rxp_t *alt_ptr;
101 	int esc, err;
102 
103 	esc = 0;
104 	if (first) {
105 		rp = rxpbuf;
106 		sp = s;
107 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
108 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
109 		*rp++ = 0;
110 	}
111 	*rp++ = ALT_S;
112 	alt_ptr = rp;
113 	*rp++ = 0;
114 	for (; *sp; ++sp) {
115 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
116 			(void)snprintf(rxperr, sizeof(rxperr),
117 			    "regular expression too long %s", s);
118 			return (FALSE);
119 		}
120 		if (*sp == ':' && !esc)
121 			break;
122 		if (esc) {
123 			*rp++ = LIT;
124 			*rp++ = *sp;
125 			esc = 0;
126 		}
127 		else switch (*sp) {
128 		case '\\':
129 			esc = 1;
130 			break;
131 		case '{':
132 		case '[':
133 			*rp++ = GRP_S;
134 			grp_ptr = rp;
135 			*rp++ = 0;
136 			sp++;
137 			if ((err = rxp__compile(s, FALSE)) != TRUE)
138 				return (err);
139 			*rp++ = GRP_E;
140 			*grp_ptr = rp - rxpbuf;
141 			break;
142 		case '}':
143 		case ']':
144 		case '|':
145 			*rp++ = ALT_E;
146 			*alt_ptr = rp - rxpbuf;
147 			if (*sp != ']') {
148 				*rp++ = ALT_S;
149 				alt_ptr = rp;
150 				*rp++ = 0;
151 			}
152 			if (*sp != '|') {
153 				if (*sp != ']') {
154 					*rp++ = ALT_E;
155 					*alt_ptr = rp - rxpbuf;
156 				}
157 				if (first) {
158 					(void)snprintf(rxperr, sizeof(rxperr),
159 					    "unmatched alternator in regexp %s",
160 					     s);
161 					return (FALSE);
162 				}
163 				return (TRUE);
164 			}
165 			break;
166 		default:
167 			*rp++ = LIT;
168 			*rp++ = *sp;
169 			esc = 0;
170 			break;
171 		}
172 	}
173 	if (!first) {
174 		(void)snprintf(rxperr, sizeof(rxperr),
175 		    "unmatched alternator in regexp %s", s);
176 		return (FALSE);
177 	}
178 	*rp++ = ALT_E;
179 	*alt_ptr = rp - rxpbuf;
180 	*rp++ = GRP_E;
181 	*(rxpbuf + 2) = rp - rxpbuf;
182 	*rp++ = EOT;
183 	*rp = END;
184 	return (TRUE);
185 }
186 
187 /*
188  * match string against compiled regular expression
189  */
190 int
191 rxp_match(s)
192 	char *	s;
193 {
194 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
195 }
196 
197 static int
198 rxp__match(s, first, j_succ, j_fail, sp_fail)
199 	char *s;
200 	int first;
201 	Rxp_t *j_succ;		/* jump here on successful alt match */
202 	Rxp_t *j_fail;		/* jump here on failed match */
203 	char *sp_fail;		/* reset sp to here on failed match */
204 {
205 	static Rxp_t *rp;
206 	static char *sp;
207 	int ch;
208 	Rxp_t *grp_end;
209 	int err;
210 
211 	grp_end = NULL;
212 	if (first) {
213 		rp = rxpbuf;
214 		sp = s;
215 	}
216 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
217 		switch(*rp) {
218 		case LIT:
219 			rp++;
220 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
221 			if (ch != *sp++) {
222 				rp = j_fail;
223 				sp = sp_fail;
224 				return (TRUE);
225 			}
226 			rp++;
227 			break;
228 		case SOT:
229 			if (sp != s)
230 				return (FALSE);
231 			rp++;
232 			break;
233 		case EOT:
234 			if (*sp != 0)
235 				return (FALSE);
236 			rp++;
237 			break;
238 		case GRP_S:
239 			rp++;
240 			grp_end = rxpbuf + *rp++;
241 			break;
242 		case ALT_S:
243 			rp++;
244 			if ((err = rxp__match(sp,
245 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
246 				return (err);
247 			break;
248 		case ALT_E:
249 			rp = j_succ;
250 			return (TRUE);
251 		case GRP_E:
252 		default:
253 			return (FALSE);
254 		}
255 	return (*rp != END ? FALSE : TRUE);
256 }
257 
258 /*
259  * Reverse engineer the regular expression, by picking first of all alternates.
260  */
261 char *
262 rxp_expand()
263 {
264 	return (rxp__expand(TRUE));
265 }
266 
267 static char *
268 rxp__expand(first)
269 	int first;
270 {
271 	static char buf[RXP_LINE_SZ/2];
272 	static Rxp_t *rp;
273 	static char *bp;
274 	Rxp_t *grp_ptr;
275 	char *err;
276 
277 	if (first) {
278 		rp = rxpbuf;
279 		bp = buf;
280 	}
281 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
282 		switch(*rp) {
283 		case LIT:
284 			rp++;
285 			*bp++ = *rp++;
286 			break;
287 		case GRP_S:
288 			rp++;
289 			grp_ptr = rxpbuf + *rp;
290 			rp++;
291 			if ((err = rxp__expand(FALSE)) == NULL)
292 				return (err);
293 			rp = grp_ptr;
294 			break;
295 		case ALT_E:
296 			return (buf);
297 		case ALT_S:
298 			rp++;
299 			/* FALLTHROUGH */
300 		case SOT:
301 		case EOT:
302 		case GRP_E:
303 			rp++;
304 			break;
305 		default:
306 			return (NULL);
307 		}
308 	if (first) {
309 		if (*rp != END)
310 			return (NULL);
311 		*bp = '\0';
312 	}
313 	return (buf);
314 }
315