xref: /dragonfly/games/quiz/rxp.c (revision d9f85b33)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * @(#)rxp.c	8.1 (Berkeley) 5/31/93
34  * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $
35  */
36 
37 /*
38  * regular expression parser
39  *
40  * external functions and return values are:
41  * rxp_compile(s)
42  *	TRUE	success
43  *	FALSE	parse failure; error message will be in char rxperr[]
44  * metas are:
45  *	{...}	optional pattern, equialent to [...|]
46  *	|	alternate pattern
47  *	[...]	pattern delimiters
48  *
49  * rxp_match(s)
50  *	TRUE	string s matches compiled pattern
51  *	FALSE	match failure or regexp error
52  *
53  * rxp_expand()
54  *	char *	reverse-engineered regular expression string
55  *	NULL	regexp error
56  */
57 
58 #include <stdio.h>
59 #include <ctype.h>
60 #include "quiz.h"
61 					/* regexp tokens,	arg */
62 #define	LIT	(-1)			/* literal character,	char */
63 #define	SOT	(-2)			/* start text anchor,	- */
64 #define	EOT	(-3)			/* end text anchor,	- */
65 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
66 #define	GRP_E	(-5)			/* end group,		- */
67 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
68 #define	ALT_E	(-7)			/* alternate ends,	- */
69 #define	END	(-8)			/* end of regexp,	- */
70 
71 typedef short Rxp_t;			/* type for regexp tokens */
72 
73 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
74 char rxperr[128];			/* parser error message */
75 
76 static int	 rxp__compile (char *, int);
77 static char	*rxp__expand (int);
78 static int	 rxp__match (char *, int, Rxp_t *, Rxp_t *, char *);
79 
80 int
rxp_compile(char * s)81 rxp_compile(char *s)
82 {
83 	return (rxp__compile(s, TRUE));
84 }
85 
86 static int
rxp__compile(char * s,int first)87 rxp__compile(char *s, int first)
88 {
89 	static Rxp_t *rp;
90 	static char *sp;
91 	Rxp_t *grp_ptr;
92 	Rxp_t *alt_ptr;
93 	int esc, err;
94 
95 	esc = 0;
96 	if (first) {
97 		rp = rxpbuf;
98 		sp = s;
99 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
100 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
101 		*rp++ = 0;
102 	}
103 	*rp++ = ALT_S;
104 	alt_ptr = rp;
105 	*rp++ = 0;
106 	for (; *sp; ++sp) {
107 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
108 			snprintf(rxperr, sizeof(rxperr),
109 			    "regular expression too long %s", s);
110 			return (FALSE);
111 		}
112 		if (*sp == ':' && !esc)
113 			break;
114 		if (esc) {
115 			*rp++ = LIT;
116 			*rp++ = *sp;
117 			esc = 0;
118 		}
119 		else switch (*sp) {
120 		case '\\':
121 			esc = 1;
122 			break;
123 		case '{':
124 		case '[':
125 			*rp++ = GRP_S;
126 			grp_ptr = rp;
127 			*rp++ = 0;
128 			sp++;
129 			if ((err = rxp__compile(s, FALSE)) != TRUE)
130 				return (err);
131 			*rp++ = GRP_E;
132 			*grp_ptr = rp - rxpbuf;
133 			break;
134 		case '}':
135 		case ']':
136 		case '|':
137 			*rp++ = ALT_E;
138 			*alt_ptr = rp - rxpbuf;
139 			if (*sp != ']') {
140 				*rp++ = ALT_S;
141 				alt_ptr = rp;
142 				*rp++ = 0;
143 			}
144 			if (*sp != '|') {
145 				if (*sp != ']') {
146 					*rp++ = ALT_E;
147 					*alt_ptr = rp - rxpbuf;
148 				}
149 				if (first) {
150 					snprintf(rxperr, sizeof(rxperr),
151 					    "unmatched alternator in regexp %s",
152 					     s);
153 					return (FALSE);
154 				}
155 				return (TRUE);
156 			}
157 			break;
158 		default:
159 			*rp++ = LIT;
160 			*rp++ = *sp;
161 			esc = 0;
162 			break;
163 		}
164 	}
165 	if (!first) {
166 		snprintf(rxperr, sizeof(rxperr),
167 		    "unmatched alternator in regexp %s", s);
168 		return (FALSE);
169 	}
170 	*rp++ = ALT_E;
171 	*alt_ptr = rp - rxpbuf;
172 	*rp++ = GRP_E;
173 	*(rxpbuf + 2) = rp - rxpbuf;
174 	*rp++ = EOT;
175 	*rp = END;
176 	return (TRUE);
177 }
178 
179 /*
180  * match string against compiled regular expression
181  */
182 int
rxp_match(char * s)183 rxp_match(char *s)
184 {
185 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
186 }
187 
188 /*
189  * jump to j_succ on successful alt match
190  * jump to j_fail on failed match
191  * reset sp to sp_fail on failed match
192  */
193 static int
rxp__match(char * s,int first,Rxp_t * j_succ,Rxp_t * j_fail,char * sp_fail)194 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail)
195 {
196 	static Rxp_t *rp;
197 	static char *sp;
198 	int ch;
199 	Rxp_t *grp_end;
200 	int err;
201 
202 	grp_end = NULL;
203 	if (first) {
204 		rp = rxpbuf;
205 		sp = s;
206 	}
207 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
208 		switch(*rp) {
209 		case LIT:
210 			rp++;
211 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
212 			if (ch != *sp++) {
213 				rp = j_fail;
214 				sp = sp_fail;
215 				return (TRUE);
216 			}
217 			rp++;
218 			break;
219 		case SOT:
220 			if (sp != s)
221 				return (FALSE);
222 			rp++;
223 			break;
224 		case EOT:
225 			if (*sp != 0)
226 				return (FALSE);
227 			rp++;
228 			break;
229 		case GRP_S:
230 			rp++;
231 			grp_end = rxpbuf + *rp++;
232 			break;
233 		case ALT_S:
234 			rp++;
235 			if ((err = rxp__match(sp,
236 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
237 				return (err);
238 			break;
239 		case ALT_E:
240 			rp = j_succ;
241 			return (TRUE);
242 		case GRP_E:
243 		default:
244 			return (FALSE);
245 		}
246 	return (*rp != END ? FALSE : TRUE);
247 }
248 
249 /*
250  * Reverse engineer the regular expression, by picking first of all alternates.
251  */
252 char *
rxp_expand(void)253 rxp_expand(void)
254 {
255 	return (rxp__expand(TRUE));
256 }
257 
258 static char *
rxp__expand(int first)259 rxp__expand(int first)
260 {
261 	static char buf[RXP_LINE_SZ/2];
262 	static Rxp_t *rp;
263 	static char *bp;
264 	Rxp_t *grp_ptr;
265 	char *err;
266 
267 	if (first) {
268 		rp = rxpbuf;
269 		bp = buf;
270 	}
271 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
272 		switch(*rp) {
273 		case LIT:
274 			rp++;
275 			*bp++ = *rp++;
276 			break;
277 		case GRP_S:
278 			rp++;
279 			grp_ptr = rxpbuf + *rp;
280 			rp++;
281 			if ((err = rxp__expand(FALSE)) == NULL)
282 				return (err);
283 			rp = grp_ptr;
284 			break;
285 		case ALT_E:
286 			return (buf);
287 		case ALT_S:
288 			rp++;
289 			/* FALLTHROUGH */
290 		case SOT:
291 		case EOT:
292 		case GRP_E:
293 			rp++;
294 			break;
295 		default:
296 			return (NULL);
297 		}
298 	if (first) {
299 		if (*rp != END)
300 			return (NULL);
301 		*bp = '\0';
302 	}
303 	return (buf);
304 }
305