xref: /original-bsd/games/quiz/rxp.c (revision 07303858)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7  * Commodore Business Machines.
8  *
9  * %sccs.include.redist.c%
10  */
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)rxp.c	5.3 (Berkeley) 02/03/93";
14 #endif /* not lint */
15 
16 /*
17  * regular expression parser
18  *
19  * external functions and return values are:
20  * rxp_compile(s)
21  *	TRUE	success
22  *	FALSE	parse failure; error message will be in char rxperr[]
23  * metas are:
24  *	{...}	optional pattern, equialent to [...|]
25  *	|	alternate pattern
26  *	[...]	pattern delimiters
27  *
28  * rxp_match(s)
29  *	TRUE	string s matches compiled pattern
30  *	FALSE	match failure or regexp error
31  *
32  * rxp_expand()
33  *	char *	reverse-engineered regular expression string
34  *	NULL	regexp error
35  */
36 
37 #include <stdio.h>
38 #include <ctype.h>
39 #include "quiz.h"
40 					/* regexp tokens,	arg */
41 #define	LIT	(-1)			/* literal character,	char */
42 #define	SOT	(-2)			/* start text anchor,	- */
43 #define	EOT	(-3)			/* end text anchor,	- */
44 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
45 #define	GRP_E	(-5)			/* end group,		- */
46 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
47 #define	ALT_E	(-7)			/* alternate ends,	- */
48 #define	END	(-8)			/* end of regexp,	- */
49 
50 typedef short Rxp_t;			/* type for regexp tokens */
51 
52 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
53 char rxperr[128];			/* parser error message */
54 
55 static int	 rxp__compile __P((char *, int));
56 static char	*rxp__expand __P((int));
57 static int	 rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
58 
59 int
60 rxp_compile(s)
61 	register char *	s;
62 {
63 	return (rxp__compile(s, TRUE));
64 }
65 
66 static int
67 rxp__compile(s, first)
68 	register char *s;
69 	int first;
70 {
71 	static Rxp_t *rp;
72 	static char *sp;
73 	Rxp_t *grp_ptr;
74 	Rxp_t *alt_ptr;
75 	int esc, err;
76 
77 	esc = 0;
78 	if (first) {
79 		rp = rxpbuf;
80 		sp = s;
81 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
82 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
83 		*rp++ = 0;
84 	}
85 	*rp++ = ALT_S;
86 	alt_ptr = rp;
87 	*rp++ = 0;
88 	for (; *sp; ++sp) {
89 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
90 			(void)snprintf(rxperr, sizeof(rxperr),
91 			    "regular expression too long %s", s);
92 			return (FALSE);
93 		}
94 		if (*sp == ':' && !esc)
95 			break;
96 		if (esc) {
97 			*rp++ = LIT;
98 			*rp++ = *sp;
99 			esc = 0;
100 		}
101 		else switch (*sp) {
102 		case '\\':
103 			esc = 1;
104 			break;
105 		case '{':
106 		case '[':
107 			*rp++ = GRP_S;
108 			grp_ptr = rp;
109 			*rp++ = 0;
110 			sp++;
111 			if ((err = rxp__compile(s, FALSE)) != TRUE)
112 				return (err);
113 			*rp++ = GRP_E;
114 			*grp_ptr = rp - rxpbuf;
115 			break;
116 		case '}':
117 		case ']':
118 		case '|':
119 			*rp++ = ALT_E;
120 			*alt_ptr = rp - rxpbuf;
121 			if (*sp != ']') {
122 				*rp++ = ALT_S;
123 				alt_ptr = rp;
124 				*rp++ = 0;
125 			}
126 			if (*sp != '|') {
127 				if (*sp != ']') {
128 					*rp++ = ALT_E;
129 					*alt_ptr = rp - rxpbuf;
130 				}
131 				if (first) {
132 					(void)snprintf(rxperr, sizeof(rxperr),
133 					    "unmatched alternator in regexp %s",
134 					     s);
135 					return (FALSE);
136 				}
137 				return (TRUE);
138 			}
139 			break;
140 		default:
141 			*rp++ = LIT;
142 			*rp++ = *sp;
143 			esc = 0;
144 			break;
145 		}
146 	}
147 	if (!first) {
148 		(void)snprintf(rxperr, sizeof(rxperr),
149 		    "unmatched alternator in regexp %s", s);
150 		return (FALSE);
151 	}
152 	*rp++ = ALT_E;
153 	*alt_ptr = rp - rxpbuf;
154 	*rp++ = GRP_E;
155 	*(rxpbuf + 2) = rp - rxpbuf;
156 	*rp++ = EOT;
157 	*rp = END;
158 	return (TRUE);
159 }
160 
161 /*
162  * match string against compiled regular expression
163  */
164 int
165 rxp_match(s)
166 	register char *	s;
167 {
168 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
169 }
170 
171 static int
172 rxp__match(s, first, j_succ, j_fail, sp_fail)
173 	char *s;
174 	int first;
175 	Rxp_t *j_succ;		/* jump here on successful alt match */
176 	Rxp_t *j_fail;		/* jump here on failed match */
177 	char *sp_fail;		/* reset sp to here on failed match */
178 {
179 	static Rxp_t *rp;
180 	static char *sp;
181 	register int ch;
182 	Rxp_t *grp_end;
183 	int err;
184 
185 	if (first) {
186 		rp = rxpbuf;
187 		sp = s;
188 	}
189 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
190 		switch(*rp) {
191 		case LIT:
192 			rp++;
193 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
194 			if (ch != *sp++) {
195 				rp = j_fail;
196 				sp = sp_fail;
197 				return (TRUE);
198 			}
199 			rp++;
200 			break;
201 		case SOT:
202 			if (sp != s)
203 				return (FALSE);
204 			rp++;
205 			break;
206 		case EOT:
207 			if (*sp != 0)
208 				return (FALSE);
209 			rp++;
210 			break;
211 		case GRP_S:
212 			rp++;
213 			grp_end = rxpbuf + *rp++;
214 			break;
215 		case ALT_S:
216 			rp++;
217 			if ((err = rxp__match(sp,
218 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
219 				return (err);
220 			break;
221 		case ALT_E:
222 			rp = j_succ;
223 			return (TRUE);
224 		case GRP_E:
225 		default:
226 			return (FALSE);
227 		}
228 	return (*rp != END ? FALSE : TRUE);
229 }
230 
231 /*
232  * Reverse engineer the regular expression, by picking first of all alternates.
233  */
234 char *
235 rxp_expand()
236 {
237 	return (rxp__expand(TRUE));
238 }
239 
240 static char *
241 rxp__expand(first)
242 	int first;
243 {
244 	static char buf[RXP_LINE_SZ/2];
245 	static Rxp_t *rp;
246 	static char *bp;
247 	Rxp_t *grp_ptr;
248 	char *err;
249 
250 	if (first) {
251 		rp = rxpbuf;
252 		bp = buf;
253 	}
254 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
255 		switch(*rp) {
256 		case LIT:
257 			rp++;
258 			*bp++ = *rp++;
259 			break;
260 		case GRP_S:
261 			rp++;
262 			grp_ptr = rxpbuf + *rp;
263 			rp++;
264 			if ((err = rxp__expand(FALSE)) == NULL)
265 				return (err);
266 			rp = grp_ptr;
267 			break;
268 		case ALT_E:
269 			return (buf);
270 		case ALT_S:
271 			rp++;
272 			/* FALLTHROUGH */
273 		case SOT:
274 		case EOT:
275 		case GRP_E:
276 			rp++;
277 			break;
278 		default:
279 			return (NULL);
280 		}
281 	if (first) {
282 		if (*rp != END)
283 			return (NULL);
284 		*bp = '\0';
285 	}
286 	return (buf);
287 }
288