xref: /openbsd/games/quiz/rxp.c (revision 09467b48)
1 /*	$OpenBSD: rxp.c,v 1.8 2009/10/27 23:59:26 deraadt Exp $	*/
2 /*	$NetBSD: rxp.c,v 1.5 1995/04/22 10:17:00 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1991, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
10  * Commodore Business Machines.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 /*
38  * regular expression parser
39  *
40  * external functions and return values are:
41  * rxp_compile(s)
42  *	TRUE	success
43  *	FALSE	parse failure; error message will be in char rxperr[]
44  * metas are:
45  *	{...}	optional pattern, equialent to [...|]
46  *	|	alternate pattern
47  *	[...]	pattern delimiters
48  *
49  * rxp_match(s)
50  *	TRUE	string s matches compiled pattern
51  *	FALSE	match failure or regexp error
52  *
53  * rxp_expand()
54  *	char *	reverse-engineered regular expression string
55  *	NULL	regexp error
56  */
57 
58 #include <stdio.h>
59 #include <ctype.h>
60 #include "quiz.h"
61 					/* regexp tokens,	arg */
62 #define	LIT	(-1)			/* literal character,	char */
63 #define	SOT	(-2)			/* start text anchor,	- */
64 #define	EOT	(-3)			/* end text anchor,	- */
65 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
66 #define	GRP_E	(-5)			/* end group,		- */
67 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
68 #define	ALT_E	(-7)			/* alternate ends,	- */
69 #define	END	(-8)			/* end of regexp,	- */
70 
71 typedef short Rxp_t;			/* type for regexp tokens */
72 
73 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
74 char rxperr[128];			/* parser error message */
75 
76 static int	 rxp__compile(const char *, int);
77 static char	*rxp__expand(int);
78 static int	 rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *);
79 
80 int
81 rxp_compile(const char *s)
82 {
83 	return (rxp__compile(s, TRUE));
84 }
85 
86 static int
87 rxp__compile(const char *s, int first)
88 {
89 	static Rxp_t *rp;
90 	static const char *sp;
91 	Rxp_t *grp_ptr;
92 	Rxp_t *alt_ptr;
93 	int esc, err;
94 
95 	if (s == NULL) {
96 		(void)snprintf(rxperr, sizeof(rxperr),
97 		    "null string sent to rxp_compile");
98 		return(FALSE);
99 	}
100 	esc = 0;
101 	if (first) {
102 		rp = rxpbuf;
103 		sp = s;
104 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
105 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
106 		*rp++ = 0;
107 	}
108 	*rp++ = ALT_S;
109 	alt_ptr = rp;
110 	*rp++ = 0;
111 	for (; *sp; ++sp) {
112 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
113 			(void)snprintf(rxperr, sizeof(rxperr),
114 			    "regular expression too long %s", s);
115 			return (FALSE);
116 		}
117 		if (*sp == ':' && !esc)
118 			break;
119 		if (esc) {
120 			*rp++ = LIT;
121 			*rp++ = *sp;
122 			esc = 0;
123 		}
124 		else switch (*sp) {
125 		case '\\':
126 			esc = 1;
127 			break;
128 		case '{':
129 		case '[':
130 			*rp++ = GRP_S;
131 			grp_ptr = rp;
132 			*rp++ = 0;
133 			sp++;
134 			if ((err = rxp__compile(s, FALSE)) != TRUE)
135 				return (err);
136 			*rp++ = GRP_E;
137 			*grp_ptr = rp - rxpbuf;
138 			break;
139 		case '}':
140 		case ']':
141 		case '|':
142 			*rp++ = ALT_E;
143 			*alt_ptr = rp - rxpbuf;
144 			if (*sp != ']') {
145 				*rp++ = ALT_S;
146 				alt_ptr = rp;
147 				*rp++ = 0;
148 			}
149 			if (*sp != '|') {
150 				if (*sp != ']') {
151 					*rp++ = ALT_E;
152 					*alt_ptr = rp - rxpbuf;
153 				}
154 				if (first) {
155 					(void)snprintf(rxperr, sizeof(rxperr),
156 					    "unmatched alternator in regexp %s",
157 					     s);
158 					return (FALSE);
159 				}
160 				return (TRUE);
161 			}
162 			break;
163 		default:
164 			*rp++ = LIT;
165 			*rp++ = *sp;
166 			esc = 0;
167 			break;
168 		}
169 	}
170 	if (!first) {
171 		(void)snprintf(rxperr, sizeof(rxperr),
172 		    "unmatched alternator in regexp %s", s);
173 		return (FALSE);
174 	}
175 	*rp++ = ALT_E;
176 	*alt_ptr = rp - rxpbuf;
177 	*rp++ = GRP_E;
178 	*(rxpbuf + 2) = rp - rxpbuf;
179 	*rp++ = EOT;
180 	*rp = END;
181 	return (TRUE);
182 }
183 
184 /*
185  * match string against compiled regular expression
186  */
187 int
188 rxp_match(const char *s)
189 {
190 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
191 }
192 
193 /*
194  * j_succ : jump here on successful alt match
195  * j_fail : jump here on failed match
196  * sp_fail: reset sp to here on failed match
197  */
198 static int
199 rxp__match(const char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail,
200            const char *sp_fail)
201 {
202 	static Rxp_t *rp;
203 	static const char *sp;
204 	int ch;
205 	Rxp_t *grp_end = NULL;
206 	int err;
207 
208 	if (first) {
209 		rp = rxpbuf;
210 		sp = s;
211 	}
212 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
213 		switch(*rp) {
214 		case LIT:
215 			rp++;
216 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
217 			if (ch != *sp++) {
218 				rp = j_fail;
219 				sp = sp_fail;
220 				return (TRUE);
221 			}
222 			rp++;
223 			break;
224 		case SOT:
225 			if (sp != s)
226 				return (FALSE);
227 			rp++;
228 			break;
229 		case EOT:
230 			if (*sp != 0)
231 				return (FALSE);
232 			rp++;
233 			break;
234 		case GRP_S:
235 			rp++;
236 			grp_end = rxpbuf + *rp++;
237 			break;
238 		case ALT_S:
239 			rp++;
240 			if ((err = rxp__match(sp,
241 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
242 				return (err);
243 			break;
244 		case ALT_E:
245 			rp = j_succ;
246 			return (TRUE);
247 		case GRP_E:
248 		default:
249 			return (FALSE);
250 		}
251 	return (*rp != END ? FALSE : TRUE);
252 }
253 
254 /*
255  * Reverse engineer the regular expression, by picking first of all alternates.
256  */
257 char *
258 rxp_expand(void)
259 {
260 	return (rxp__expand(TRUE));
261 }
262 
263 static char *
264 rxp__expand(int first)
265 {
266 	static char buf[RXP_LINE_SZ/2];
267 	static Rxp_t *rp;
268 	static char *bp;
269 	Rxp_t *grp_ptr;
270 	char *err;
271 
272 	if (first) {
273 		rp = rxpbuf;
274 		bp = buf;
275 	}
276 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
277 		switch(*rp) {
278 		case LIT:
279 			rp++;
280 			*bp++ = *rp++;
281 			break;
282 		case GRP_S:
283 			rp++;
284 			grp_ptr = rxpbuf + *rp;
285 			rp++;
286 			if ((err = rxp__expand(FALSE)) == NULL)
287 				return (err);
288 			rp = grp_ptr;
289 			break;
290 		case ALT_E:
291 			return (buf);
292 		case ALT_S:
293 			rp++;
294 			/* FALLTHROUGH */
295 		case SOT:
296 		case EOT:
297 		case GRP_E:
298 			rp++;
299 			break;
300 		default:
301 			return (NULL);
302 		}
303 	if (first) {
304 		if (*rp != END)
305 			return (NULL);
306 		*bp = '\0';
307 	}
308 	return (buf);
309 }
310