1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7 * Commodore Business Machines.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)rxp.c 8.1 (Berkeley) 5/31/93
34 * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $
35 */
36
37 /*
38 * regular expression parser
39 *
40 * external functions and return values are:
41 * rxp_compile(s)
42 * TRUE success
43 * FALSE parse failure; error message will be in char rxperr[]
44 * metas are:
45 * {...} optional pattern, equialent to [...|]
46 * | alternate pattern
47 * [...] pattern delimiters
48 *
49 * rxp_match(s)
50 * TRUE string s matches compiled pattern
51 * FALSE match failure or regexp error
52 *
53 * rxp_expand()
54 * char * reverse-engineered regular expression string
55 * NULL regexp error
56 */
57
58 #include <stdio.h>
59 #include <ctype.h>
60 #include "quiz.h"
61 /* regexp tokens, arg */
62 #define LIT (-1) /* literal character, char */
63 #define SOT (-2) /* start text anchor, - */
64 #define EOT (-3) /* end text anchor, - */
65 #define GRP_S (-4) /* start alternate grp, ptr_to_end */
66 #define GRP_E (-5) /* end group, - */
67 #define ALT_S (-6) /* alternate starts, ptr_to_next */
68 #define ALT_E (-7) /* alternate ends, - */
69 #define END (-8) /* end of regexp, - */
70
71 typedef short Rxp_t; /* type for regexp tokens */
72
73 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
74 char rxperr[128]; /* parser error message */
75
76 static int rxp__compile (char *, int);
77 static char *rxp__expand (int);
78 static int rxp__match (char *, int, Rxp_t *, Rxp_t *, char *);
79
80 int
rxp_compile(char * s)81 rxp_compile(char *s)
82 {
83 return (rxp__compile(s, TRUE));
84 }
85
86 static int
rxp__compile(char * s,int first)87 rxp__compile(char *s, int first)
88 {
89 static Rxp_t *rp;
90 static char *sp;
91 Rxp_t *grp_ptr;
92 Rxp_t *alt_ptr;
93 int esc, err;
94
95 esc = 0;
96 if (first) {
97 rp = rxpbuf;
98 sp = s;
99 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
100 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
101 *rp++ = 0;
102 }
103 *rp++ = ALT_S;
104 alt_ptr = rp;
105 *rp++ = 0;
106 for (; *sp; ++sp) {
107 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
108 snprintf(rxperr, sizeof(rxperr),
109 "regular expression too long %s", s);
110 return (FALSE);
111 }
112 if (*sp == ':' && !esc)
113 break;
114 if (esc) {
115 *rp++ = LIT;
116 *rp++ = *sp;
117 esc = 0;
118 }
119 else switch (*sp) {
120 case '\\':
121 esc = 1;
122 break;
123 case '{':
124 case '[':
125 *rp++ = GRP_S;
126 grp_ptr = rp;
127 *rp++ = 0;
128 sp++;
129 if ((err = rxp__compile(s, FALSE)) != TRUE)
130 return (err);
131 *rp++ = GRP_E;
132 *grp_ptr = rp - rxpbuf;
133 break;
134 case '}':
135 case ']':
136 case '|':
137 *rp++ = ALT_E;
138 *alt_ptr = rp - rxpbuf;
139 if (*sp != ']') {
140 *rp++ = ALT_S;
141 alt_ptr = rp;
142 *rp++ = 0;
143 }
144 if (*sp != '|') {
145 if (*sp != ']') {
146 *rp++ = ALT_E;
147 *alt_ptr = rp - rxpbuf;
148 }
149 if (first) {
150 snprintf(rxperr, sizeof(rxperr),
151 "unmatched alternator in regexp %s",
152 s);
153 return (FALSE);
154 }
155 return (TRUE);
156 }
157 break;
158 default:
159 *rp++ = LIT;
160 *rp++ = *sp;
161 esc = 0;
162 break;
163 }
164 }
165 if (!first) {
166 snprintf(rxperr, sizeof(rxperr),
167 "unmatched alternator in regexp %s", s);
168 return (FALSE);
169 }
170 *rp++ = ALT_E;
171 *alt_ptr = rp - rxpbuf;
172 *rp++ = GRP_E;
173 *(rxpbuf + 2) = rp - rxpbuf;
174 *rp++ = EOT;
175 *rp = END;
176 return (TRUE);
177 }
178
179 /*
180 * match string against compiled regular expression
181 */
182 int
rxp_match(char * s)183 rxp_match(char *s)
184 {
185 return (rxp__match(s, TRUE, NULL, NULL, NULL));
186 }
187
188 /*
189 * jump to j_succ on successful alt match
190 * jump to j_fail on failed match
191 * reset sp to sp_fail on failed match
192 */
193 static int
rxp__match(char * s,int first,Rxp_t * j_succ,Rxp_t * j_fail,char * sp_fail)194 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail)
195 {
196 static Rxp_t *rp;
197 static char *sp;
198 int ch;
199 Rxp_t *grp_end;
200 int err;
201
202 grp_end = NULL;
203 if (first) {
204 rp = rxpbuf;
205 sp = s;
206 }
207 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
208 switch(*rp) {
209 case LIT:
210 rp++;
211 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
212 if (ch != *sp++) {
213 rp = j_fail;
214 sp = sp_fail;
215 return (TRUE);
216 }
217 rp++;
218 break;
219 case SOT:
220 if (sp != s)
221 return (FALSE);
222 rp++;
223 break;
224 case EOT:
225 if (*sp != 0)
226 return (FALSE);
227 rp++;
228 break;
229 case GRP_S:
230 rp++;
231 grp_end = rxpbuf + *rp++;
232 break;
233 case ALT_S:
234 rp++;
235 if ((err = rxp__match(sp,
236 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
237 return (err);
238 break;
239 case ALT_E:
240 rp = j_succ;
241 return (TRUE);
242 case GRP_E:
243 default:
244 return (FALSE);
245 }
246 return (*rp != END ? FALSE : TRUE);
247 }
248
249 /*
250 * Reverse engineer the regular expression, by picking first of all alternates.
251 */
252 char *
rxp_expand(void)253 rxp_expand(void)
254 {
255 return (rxp__expand(TRUE));
256 }
257
258 static char *
rxp__expand(int first)259 rxp__expand(int first)
260 {
261 static char buf[RXP_LINE_SZ/2];
262 static Rxp_t *rp;
263 static char *bp;
264 Rxp_t *grp_ptr;
265 char *err;
266
267 if (first) {
268 rp = rxpbuf;
269 bp = buf;
270 }
271 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
272 switch(*rp) {
273 case LIT:
274 rp++;
275 *bp++ = *rp++;
276 break;
277 case GRP_S:
278 rp++;
279 grp_ptr = rxpbuf + *rp;
280 rp++;
281 if ((err = rxp__expand(FALSE)) == NULL)
282 return (err);
283 rp = grp_ptr;
284 break;
285 case ALT_E:
286 return (buf);
287 case ALT_S:
288 rp++;
289 /* FALLTHROUGH */
290 case SOT:
291 case EOT:
292 case GRP_E:
293 rp++;
294 break;
295 default:
296 return (NULL);
297 }
298 if (first) {
299 if (*rp != END)
300 return (NULL);
301 *bp = '\0';
302 }
303 return (buf);
304 }
305