xref: /dragonfly/games/quiz/rxp.c (revision 73610d44)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#)rxp.c	8.1 (Berkeley) 5/31/93
34  * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $
35  * $DragonFly: src/games/quiz/rxp.c,v 1.4 2005/04/25 16:10:24 liamfoy Exp $
36  */
37 
38 /*
39  * regular expression parser
40  *
41  * external functions and return values are:
42  * rxp_compile(s)
43  *	TRUE	success
44  *	FALSE	parse failure; error message will be in char rxperr[]
45  * metas are:
46  *	{...}	optional pattern, equialent to [...|]
47  *	|	alternate pattern
48  *	[...]	pattern delimiters
49  *
50  * rxp_match(s)
51  *	TRUE	string s matches compiled pattern
52  *	FALSE	match failure or regexp error
53  *
54  * rxp_expand()
55  *	char *	reverse-engineered regular expression string
56  *	NULL	regexp error
57  */
58 
59 #include <stdio.h>
60 #include <ctype.h>
61 #include "quiz.h"
62 					/* regexp tokens,	arg */
63 #define	LIT	(-1)			/* literal character,	char */
64 #define	SOT	(-2)			/* start text anchor,	- */
65 #define	EOT	(-3)			/* end text anchor,	- */
66 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
67 #define	GRP_E	(-5)			/* end group,		- */
68 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
69 #define	ALT_E	(-7)			/* alternate ends,	- */
70 #define	END	(-8)			/* end of regexp,	- */
71 
72 typedef short Rxp_t;			/* type for regexp tokens */
73 
74 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
75 char rxperr[128];			/* parser error message */
76 
77 static int	 rxp__compile (char *, int);
78 static char	*rxp__expand (int);
79 static int	 rxp__match (char *, int, Rxp_t *, Rxp_t *, char *);
80 
81 int
82 rxp_compile(char *s)
83 {
84 	return (rxp__compile(s, TRUE));
85 }
86 
87 static int
88 rxp__compile(char *s, int first)
89 {
90 	static Rxp_t *rp;
91 	static char *sp;
92 	Rxp_t *grp_ptr;
93 	Rxp_t *alt_ptr;
94 	int esc, err;
95 
96 	esc = 0;
97 	if (first) {
98 		rp = rxpbuf;
99 		sp = s;
100 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
101 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
102 		*rp++ = 0;
103 	}
104 	*rp++ = ALT_S;
105 	alt_ptr = rp;
106 	*rp++ = 0;
107 	for (; *sp; ++sp) {
108 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
109 			(void)snprintf(rxperr, sizeof(rxperr),
110 			    "regular expression too long %s", s);
111 			return (FALSE);
112 		}
113 		if (*sp == ':' && !esc)
114 			break;
115 		if (esc) {
116 			*rp++ = LIT;
117 			*rp++ = *sp;
118 			esc = 0;
119 		}
120 		else switch (*sp) {
121 		case '\\':
122 			esc = 1;
123 			break;
124 		case '{':
125 		case '[':
126 			*rp++ = GRP_S;
127 			grp_ptr = rp;
128 			*rp++ = 0;
129 			sp++;
130 			if ((err = rxp__compile(s, FALSE)) != TRUE)
131 				return (err);
132 			*rp++ = GRP_E;
133 			*grp_ptr = rp - rxpbuf;
134 			break;
135 		case '}':
136 		case ']':
137 		case '|':
138 			*rp++ = ALT_E;
139 			*alt_ptr = rp - rxpbuf;
140 			if (*sp != ']') {
141 				*rp++ = ALT_S;
142 				alt_ptr = rp;
143 				*rp++ = 0;
144 			}
145 			if (*sp != '|') {
146 				if (*sp != ']') {
147 					*rp++ = ALT_E;
148 					*alt_ptr = rp - rxpbuf;
149 				}
150 				if (first) {
151 					(void)snprintf(rxperr, sizeof(rxperr),
152 					    "unmatched alternator in regexp %s",
153 					     s);
154 					return (FALSE);
155 				}
156 				return (TRUE);
157 			}
158 			break;
159 		default:
160 			*rp++ = LIT;
161 			*rp++ = *sp;
162 			esc = 0;
163 			break;
164 		}
165 	}
166 	if (!first) {
167 		(void)snprintf(rxperr, sizeof(rxperr),
168 		    "unmatched alternator in regexp %s", s);
169 		return (FALSE);
170 	}
171 	*rp++ = ALT_E;
172 	*alt_ptr = rp - rxpbuf;
173 	*rp++ = GRP_E;
174 	*(rxpbuf + 2) = rp - rxpbuf;
175 	*rp++ = EOT;
176 	*rp = END;
177 	return (TRUE);
178 }
179 
180 /*
181  * match string against compiled regular expression
182  */
183 int
184 rxp_match(char *s)
185 {
186 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
187 }
188 
189 /*
190  * jump to j_succ on successful alt match
191  * jump to j_fail on failed match
192  * reset sp to sp_fail on failed match
193  */
194 static int
195 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail)
196 {
197 	static Rxp_t *rp;
198 	static char *sp;
199 	int ch;
200 	Rxp_t *grp_end;
201 	int err;
202 
203 	grp_end = NULL;
204 	if (first) {
205 		rp = rxpbuf;
206 		sp = s;
207 	}
208 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
209 		switch(*rp) {
210 		case LIT:
211 			rp++;
212 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
213 			if (ch != *sp++) {
214 				rp = j_fail;
215 				sp = sp_fail;
216 				return (TRUE);
217 			}
218 			rp++;
219 			break;
220 		case SOT:
221 			if (sp != s)
222 				return (FALSE);
223 			rp++;
224 			break;
225 		case EOT:
226 			if (*sp != 0)
227 				return (FALSE);
228 			rp++;
229 			break;
230 		case GRP_S:
231 			rp++;
232 			grp_end = rxpbuf + *rp++;
233 			break;
234 		case ALT_S:
235 			rp++;
236 			if ((err = rxp__match(sp,
237 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
238 				return (err);
239 			break;
240 		case ALT_E:
241 			rp = j_succ;
242 			return (TRUE);
243 		case GRP_E:
244 		default:
245 			return (FALSE);
246 		}
247 	return (*rp != END ? FALSE : TRUE);
248 }
249 
250 /*
251  * Reverse engineer the regular expression, by picking first of all alternates.
252  */
253 char *
254 rxp_expand(void)
255 {
256 	return (rxp__expand(TRUE));
257 }
258 
259 static char *
260 rxp__expand(int first)
261 {
262 	static char buf[RXP_LINE_SZ/2];
263 	static Rxp_t *rp;
264 	static char *bp;
265 	Rxp_t *grp_ptr;
266 	char *err;
267 
268 	if (first) {
269 		rp = rxpbuf;
270 		bp = buf;
271 	}
272 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
273 		switch(*rp) {
274 		case LIT:
275 			rp++;
276 			*bp++ = *rp++;
277 			break;
278 		case GRP_S:
279 			rp++;
280 			grp_ptr = rxpbuf + *rp;
281 			rp++;
282 			if ((err = rxp__expand(FALSE)) == NULL)
283 				return (err);
284 			rp = grp_ptr;
285 			break;
286 		case ALT_E:
287 			return (buf);
288 		case ALT_S:
289 			rp++;
290 			/* FALLTHROUGH */
291 		case SOT:
292 		case EOT:
293 		case GRP_E:
294 			rp++;
295 			break;
296 		default:
297 			return (NULL);
298 		}
299 	if (first) {
300 		if (*rp != END)
301 			return (NULL);
302 		*bp = '\0';
303 	}
304 	return (buf);
305 }
306