xref: /dragonfly/games/quiz/rxp.c (revision f746689a)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  * @(#)rxp.c	8.1 (Berkeley) 5/31/93
38  * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $
39  * $DragonFly: src/games/quiz/rxp.c,v 1.4 2005/04/25 16:10:24 liamfoy Exp $
40  */
41 
42 /*
43  * regular expression parser
44  *
45  * external functions and return values are:
46  * rxp_compile(s)
47  *	TRUE	success
48  *	FALSE	parse failure; error message will be in char rxperr[]
49  * metas are:
50  *	{...}	optional pattern, equialent to [...|]
51  *	|	alternate pattern
52  *	[...]	pattern delimiters
53  *
54  * rxp_match(s)
55  *	TRUE	string s matches compiled pattern
56  *	FALSE	match failure or regexp error
57  *
58  * rxp_expand()
59  *	char *	reverse-engineered regular expression string
60  *	NULL	regexp error
61  */
62 
63 #include <stdio.h>
64 #include <ctype.h>
65 #include "quiz.h"
66 					/* regexp tokens,	arg */
67 #define	LIT	(-1)			/* literal character,	char */
68 #define	SOT	(-2)			/* start text anchor,	- */
69 #define	EOT	(-3)			/* end text anchor,	- */
70 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
71 #define	GRP_E	(-5)			/* end group,		- */
72 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
73 #define	ALT_E	(-7)			/* alternate ends,	- */
74 #define	END	(-8)			/* end of regexp,	- */
75 
76 typedef short Rxp_t;			/* type for regexp tokens */
77 
78 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
79 char rxperr[128];			/* parser error message */
80 
81 static int	 rxp__compile (char *, int);
82 static char	*rxp__expand (int);
83 static int	 rxp__match (char *, int, Rxp_t *, Rxp_t *, char *);
84 
85 int
86 rxp_compile(char *s)
87 {
88 	return (rxp__compile(s, TRUE));
89 }
90 
91 static int
92 rxp__compile(char *s, int first)
93 {
94 	static Rxp_t *rp;
95 	static char *sp;
96 	Rxp_t *grp_ptr;
97 	Rxp_t *alt_ptr;
98 	int esc, err;
99 
100 	esc = 0;
101 	if (first) {
102 		rp = rxpbuf;
103 		sp = s;
104 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
105 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
106 		*rp++ = 0;
107 	}
108 	*rp++ = ALT_S;
109 	alt_ptr = rp;
110 	*rp++ = 0;
111 	for (; *sp; ++sp) {
112 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
113 			(void)snprintf(rxperr, sizeof(rxperr),
114 			    "regular expression too long %s", s);
115 			return (FALSE);
116 		}
117 		if (*sp == ':' && !esc)
118 			break;
119 		if (esc) {
120 			*rp++ = LIT;
121 			*rp++ = *sp;
122 			esc = 0;
123 		}
124 		else switch (*sp) {
125 		case '\\':
126 			esc = 1;
127 			break;
128 		case '{':
129 		case '[':
130 			*rp++ = GRP_S;
131 			grp_ptr = rp;
132 			*rp++ = 0;
133 			sp++;
134 			if ((err = rxp__compile(s, FALSE)) != TRUE)
135 				return (err);
136 			*rp++ = GRP_E;
137 			*grp_ptr = rp - rxpbuf;
138 			break;
139 		case '}':
140 		case ']':
141 		case '|':
142 			*rp++ = ALT_E;
143 			*alt_ptr = rp - rxpbuf;
144 			if (*sp != ']') {
145 				*rp++ = ALT_S;
146 				alt_ptr = rp;
147 				*rp++ = 0;
148 			}
149 			if (*sp != '|') {
150 				if (*sp != ']') {
151 					*rp++ = ALT_E;
152 					*alt_ptr = rp - rxpbuf;
153 				}
154 				if (first) {
155 					(void)snprintf(rxperr, sizeof(rxperr),
156 					    "unmatched alternator in regexp %s",
157 					     s);
158 					return (FALSE);
159 				}
160 				return (TRUE);
161 			}
162 			break;
163 		default:
164 			*rp++ = LIT;
165 			*rp++ = *sp;
166 			esc = 0;
167 			break;
168 		}
169 	}
170 	if (!first) {
171 		(void)snprintf(rxperr, sizeof(rxperr),
172 		    "unmatched alternator in regexp %s", s);
173 		return (FALSE);
174 	}
175 	*rp++ = ALT_E;
176 	*alt_ptr = rp - rxpbuf;
177 	*rp++ = GRP_E;
178 	*(rxpbuf + 2) = rp - rxpbuf;
179 	*rp++ = EOT;
180 	*rp = END;
181 	return (TRUE);
182 }
183 
184 /*
185  * match string against compiled regular expression
186  */
187 int
188 rxp_match(char *s)
189 {
190 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
191 }
192 
193 /*
194  * jump to j_succ on successful alt match
195  * jump to j_fail on failed match
196  * reset sp to sp_fail on failed match
197  */
198 static int
199 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail)
200 {
201 	static Rxp_t *rp;
202 	static char *sp;
203 	int ch;
204 	Rxp_t *grp_end;
205 	int err;
206 
207 	grp_end = NULL;
208 	if (first) {
209 		rp = rxpbuf;
210 		sp = s;
211 	}
212 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
213 		switch(*rp) {
214 		case LIT:
215 			rp++;
216 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
217 			if (ch != *sp++) {
218 				rp = j_fail;
219 				sp = sp_fail;
220 				return (TRUE);
221 			}
222 			rp++;
223 			break;
224 		case SOT:
225 			if (sp != s)
226 				return (FALSE);
227 			rp++;
228 			break;
229 		case EOT:
230 			if (*sp != 0)
231 				return (FALSE);
232 			rp++;
233 			break;
234 		case GRP_S:
235 			rp++;
236 			grp_end = rxpbuf + *rp++;
237 			break;
238 		case ALT_S:
239 			rp++;
240 			if ((err = rxp__match(sp,
241 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
242 				return (err);
243 			break;
244 		case ALT_E:
245 			rp = j_succ;
246 			return (TRUE);
247 		case GRP_E:
248 		default:
249 			return (FALSE);
250 		}
251 	return (*rp != END ? FALSE : TRUE);
252 }
253 
254 /*
255  * Reverse engineer the regular expression, by picking first of all alternates.
256  */
257 char *
258 rxp_expand(void)
259 {
260 	return (rxp__expand(TRUE));
261 }
262 
263 static char *
264 rxp__expand(int first)
265 {
266 	static char buf[RXP_LINE_SZ/2];
267 	static Rxp_t *rp;
268 	static char *bp;
269 	Rxp_t *grp_ptr;
270 	char *err;
271 
272 	if (first) {
273 		rp = rxpbuf;
274 		bp = buf;
275 	}
276 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
277 		switch(*rp) {
278 		case LIT:
279 			rp++;
280 			*bp++ = *rp++;
281 			break;
282 		case GRP_S:
283 			rp++;
284 			grp_ptr = rxpbuf + *rp;
285 			rp++;
286 			if ((err = rxp__expand(FALSE)) == NULL)
287 				return (err);
288 			rp = grp_ptr;
289 			break;
290 		case ALT_E:
291 			return (buf);
292 		case ALT_S:
293 			rp++;
294 			/* FALLTHROUGH */
295 		case SOT:
296 		case EOT:
297 		case GRP_E:
298 			rp++;
299 			break;
300 		default:
301 			return (NULL);
302 		}
303 	if (first) {
304 		if (*rp != END)
305 			return (NULL);
306 		*bp = '\0';
307 	}
308 	return (buf);
309 }
310