xref: /original-bsd/games/quiz/rxp.c (revision e59fb703)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Jim R. Oldroyd at The Instruction Set.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char sccsid[] = "@(#)rxp.c	5.1 (Berkeley) 11/10/91";
13 #endif /* not lint */
14 
15 /*
16  * regular expression parser
17  *
18  * external functions and return values are:
19  * rxp_compile(s)
20  *	TRUE	success
21  *	FALSE	parse failure; error message will be in char rxperr[]
22  * metas are:
23  *	{...}	optional pattern, equialent to [...|]
24  *	|	alternate pattern
25  *	[...]	pattern delimiters
26  *
27  * rxp_match(s)
28  *	TRUE	string s matches compiled pattern
29  *	FALSE	match failure or regexp error
30  *
31  * rxp_expand()
32  *	char *	reverse-engineered regular expression string
33  *	NULL	regexp error
34  */
35 
36 #include <stdio.h>
37 #include <ctype.h>
38 #include "quiz.h"
39 					/* regexp tokens,	arg */
40 #define	LIT	(-1)			/* literal character,	char */
41 #define	SOT	(-2)			/* start text anchor,	- */
42 #define	EOT	(-3)			/* end text anchor,	- */
43 #define	GRP_S	(-4)			/* start alternate grp,	ptr_to_end */
44 #define	GRP_E	(-5)			/* end group,		- */
45 #define	ALT_S	(-6)			/* alternate starts,	ptr_to_next */
46 #define	ALT_E	(-7)			/* alternate ends,	- */
47 #define	END	(-8)			/* end of regexp,	- */
48 
49 typedef short Rxp_t;			/* type for regexp tokens */
50 
51 static Rxp_t rxpbuf[RXP_LINE_SZ];	/* compiled regular expression buffer */
52 char rxperr[128];			/* parser error message */
53 
54 int	 rxp__compile __P((char *, int));
55 char	*rxp__expand __P((int));
56 int	 rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
57 
58 int
59 rxp_compile(s)
60 	register char *	s;
61 {
62 	return (rxp__compile(s, TRUE));
63 }
64 
65 static int
66 rxp__compile(s, first)
67 	register char *s;
68 	int first;
69 {
70 	static Rxp_t *rp;
71 	static char *sp;
72 	Rxp_t *grp_ptr;
73 	Rxp_t *alt_ptr;
74 	int esc, err;
75 
76 	esc = 0;
77 	if (first) {
78 		rp = rxpbuf;
79 		sp = s;
80 		*rp++ = SOT;	/* auto-anchor: pat is really ^pat$ */
81 		*rp++ = GRP_S;	/* auto-group: ^pat$ is really ^[pat]$ */
82 		*rp++ = 0;
83 	}
84 	*rp++ = ALT_S;
85 	alt_ptr = rp;
86 	*rp++ = 0;
87 	for (; *sp; ++sp) {
88 		if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
89 			(void)snprintf(rxperr, sizeof(rxperr),
90 			    "regular expression too long %s", s);
91 			return (FALSE);
92 		}
93 		if (*sp == ':' && !esc)
94 			break;
95 		if (esc) {
96 			*rp++ = LIT;
97 			*rp++ = *sp;
98 			esc = 0;
99 		}
100 		else switch (*sp) {
101 		case '\\':
102 			esc = 1;
103 			break;
104 		case '{':
105 		case '[':
106 			*rp++ = GRP_S;
107 			grp_ptr = rp;
108 			*rp++ = 0;
109 			sp++;
110 			if ((err = rxp__compile(s, FALSE)) != TRUE)
111 				return (err);
112 			*rp++ = GRP_E;
113 			*grp_ptr = rp - rxpbuf;
114 			break;
115 		case '}':
116 		case ']':
117 		case '|':
118 			*rp++ = ALT_E;
119 			*alt_ptr = rp - rxpbuf;
120 			if (*sp != ']') {
121 				*rp++ = ALT_S;
122 				alt_ptr = rp;
123 				*rp++ = 0;
124 			}
125 			if (*sp != '|') {
126 				if (*sp != ']') {
127 					*rp++ = ALT_E;
128 					*alt_ptr = rp - rxpbuf;
129 				}
130 				if (first) {
131 					(void)snprintf(rxperr, sizeof(rxperr),
132 					    "unmatched alternator in regexp %s",
133 					     s);
134 					return (FALSE);
135 				}
136 				return (TRUE);
137 			}
138 			break;
139 		default:
140 			*rp++ = LIT;
141 			*rp++ = *sp;
142 			esc = 0;
143 			break;
144 		}
145 	}
146 	if (!first) {
147 		(void)snprintf(rxperr, sizeof(rxperr),
148 		    "unmatched alternator in regexp %s", s);
149 		return (FALSE);
150 	}
151 	*rp++ = ALT_E;
152 	*alt_ptr = rp - rxpbuf;
153 	*rp++ = GRP_E;
154 	*(rxpbuf + 2) = rp - rxpbuf;
155 	*rp++ = EOT;
156 	*rp = END;
157 	return (TRUE);
158 }
159 
160 /*
161  * match string against compiled regular expression
162  */
163 int
164 rxp_match(s)
165 	register char *	s;
166 {
167 	return (rxp__match(s, TRUE, NULL, NULL, NULL));
168 }
169 
170 static int
171 rxp__match(s, first, j_succ, j_fail, sp_fail)
172 	char *s;
173 	int first;
174 	Rxp_t *j_succ;		/* jump here on successful alt match */
175 	Rxp_t *j_fail;		/* jump here on failed match */
176 	char *sp_fail;		/* reset sp to here on failed match */
177 {
178 	static Rxp_t *rp;
179 	static char *sp;
180 	register int ch;
181 	Rxp_t *grp_end;
182 	int err;
183 
184 	if (first) {
185 		rp = rxpbuf;
186 		sp = s;
187 	}
188 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
189 		switch(*rp) {
190 		case LIT:
191 			rp++;
192 			ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
193 			if (ch != *sp++) {
194 				rp = j_fail;
195 				sp = sp_fail;
196 				return (TRUE);
197 			}
198 			rp++;
199 			break;
200 		case SOT:
201 			if (sp != s)
202 				return (FALSE);
203 			rp++;
204 			break;
205 		case EOT:
206 			if (*sp != 0)
207 				return (FALSE);
208 			rp++;
209 			break;
210 		case GRP_S:
211 			rp++;
212 			grp_end = rxpbuf + *rp++;
213 			break;
214 		case ALT_S:
215 			rp++;
216 			if ((err = rxp__match(sp,
217 			    FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
218 				return (err);
219 			break;
220 		case ALT_E:
221 			rp = j_succ;
222 			return (TRUE);
223 		case GRP_E:
224 		default:
225 			return (FALSE);
226 		}
227 	return (*rp != END ? FALSE : TRUE);
228 }
229 
230 /*
231  * Reverse engineer the regular expression, by picking first of all alternates.
232  */
233 char *
234 rxp_expand()
235 {
236 	return (rxp__expand(TRUE));
237 }
238 
239 static char *
240 rxp__expand(first)
241 	int first;
242 {
243 	static char buf[RXP_LINE_SZ/2];
244 	static Rxp_t *rp;
245 	static char *bp;
246 	Rxp_t *grp_ptr;
247 	char *err;
248 
249 	if (first) {
250 		rp = rxpbuf;
251 		bp = buf;
252 	}
253 	while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
254 		switch(*rp) {
255 		case LIT:
256 			rp++;
257 			*bp++ = *rp++;
258 			break;
259 		case GRP_S:
260 			rp++;
261 			grp_ptr = rxpbuf + *rp;
262 			rp++;
263 			if ((err = rxp__expand(FALSE)) == NULL)
264 				return (err);
265 			rp = grp_ptr;
266 			break;
267 		case ALT_E:
268 			return (buf);
269 		case ALT_S:
270 			rp++;
271 			/* FALLTHROUGH */
272 		case SOT:
273 		case EOT:
274 		case GRP_E:
275 			rp++;
276 			break;
277 		default:
278 			return (NULL);
279 		}
280 	if (first) {
281 		if (*rp != END)
282 			return (NULL);
283 		*bp = '\0';
284 	}
285 	return (buf);
286 }
287