1 /*-
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at
7 * Commodore Business Machines.
8 *
9 * %sccs.include.redist.c%
10 */
11
12 #ifndef lint
13 static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 05/31/93";
14 #endif /* not lint */
15
16 /*
17 * regular expression parser
18 *
19 * external functions and return values are:
20 * rxp_compile(s)
21 * TRUE success
22 * FALSE parse failure; error message will be in char rxperr[]
23 * metas are:
24 * {...} optional pattern, equialent to [...|]
25 * | alternate pattern
26 * [...] pattern delimiters
27 *
28 * rxp_match(s)
29 * TRUE string s matches compiled pattern
30 * FALSE match failure or regexp error
31 *
32 * rxp_expand()
33 * char * reverse-engineered regular expression string
34 * NULL regexp error
35 */
36
37 #include <stdio.h>
38 #include <ctype.h>
39 #include "quiz.h"
40 /* regexp tokens, arg */
41 #define LIT (-1) /* literal character, char */
42 #define SOT (-2) /* start text anchor, - */
43 #define EOT (-3) /* end text anchor, - */
44 #define GRP_S (-4) /* start alternate grp, ptr_to_end */
45 #define GRP_E (-5) /* end group, - */
46 #define ALT_S (-6) /* alternate starts, ptr_to_next */
47 #define ALT_E (-7) /* alternate ends, - */
48 #define END (-8) /* end of regexp, - */
49
50 typedef short Rxp_t; /* type for regexp tokens */
51
52 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */
53 char rxperr[128]; /* parser error message */
54
55 static int rxp__compile __P((char *, int));
56 static char *rxp__expand __P((int));
57 static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *));
58
59 int
rxp_compile(s)60 rxp_compile(s)
61 register char * s;
62 {
63 return (rxp__compile(s, TRUE));
64 }
65
66 static int
rxp__compile(s,first)67 rxp__compile(s, first)
68 register char *s;
69 int first;
70 {
71 static Rxp_t *rp;
72 static char *sp;
73 Rxp_t *grp_ptr;
74 Rxp_t *alt_ptr;
75 int esc, err;
76
77 esc = 0;
78 if (first) {
79 rp = rxpbuf;
80 sp = s;
81 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */
82 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */
83 *rp++ = 0;
84 }
85 *rp++ = ALT_S;
86 alt_ptr = rp;
87 *rp++ = 0;
88 for (; *sp; ++sp) {
89 if (rp - rxpbuf >= RXP_LINE_SZ - 4) {
90 (void)snprintf(rxperr, sizeof(rxperr),
91 "regular expression too long %s", s);
92 return (FALSE);
93 }
94 if (*sp == ':' && !esc)
95 break;
96 if (esc) {
97 *rp++ = LIT;
98 *rp++ = *sp;
99 esc = 0;
100 }
101 else switch (*sp) {
102 case '\\':
103 esc = 1;
104 break;
105 case '{':
106 case '[':
107 *rp++ = GRP_S;
108 grp_ptr = rp;
109 *rp++ = 0;
110 sp++;
111 if ((err = rxp__compile(s, FALSE)) != TRUE)
112 return (err);
113 *rp++ = GRP_E;
114 *grp_ptr = rp - rxpbuf;
115 break;
116 case '}':
117 case ']':
118 case '|':
119 *rp++ = ALT_E;
120 *alt_ptr = rp - rxpbuf;
121 if (*sp != ']') {
122 *rp++ = ALT_S;
123 alt_ptr = rp;
124 *rp++ = 0;
125 }
126 if (*sp != '|') {
127 if (*sp != ']') {
128 *rp++ = ALT_E;
129 *alt_ptr = rp - rxpbuf;
130 }
131 if (first) {
132 (void)snprintf(rxperr, sizeof(rxperr),
133 "unmatched alternator in regexp %s",
134 s);
135 return (FALSE);
136 }
137 return (TRUE);
138 }
139 break;
140 default:
141 *rp++ = LIT;
142 *rp++ = *sp;
143 esc = 0;
144 break;
145 }
146 }
147 if (!first) {
148 (void)snprintf(rxperr, sizeof(rxperr),
149 "unmatched alternator in regexp %s", s);
150 return (FALSE);
151 }
152 *rp++ = ALT_E;
153 *alt_ptr = rp - rxpbuf;
154 *rp++ = GRP_E;
155 *(rxpbuf + 2) = rp - rxpbuf;
156 *rp++ = EOT;
157 *rp = END;
158 return (TRUE);
159 }
160
161 /*
162 * match string against compiled regular expression
163 */
164 int
rxp_match(s)165 rxp_match(s)
166 register char * s;
167 {
168 return (rxp__match(s, TRUE, NULL, NULL, NULL));
169 }
170
171 static int
rxp__match(s,first,j_succ,j_fail,sp_fail)172 rxp__match(s, first, j_succ, j_fail, sp_fail)
173 char *s;
174 int first;
175 Rxp_t *j_succ; /* jump here on successful alt match */
176 Rxp_t *j_fail; /* jump here on failed match */
177 char *sp_fail; /* reset sp to here on failed match */
178 {
179 static Rxp_t *rp;
180 static char *sp;
181 register int ch;
182 Rxp_t *grp_end;
183 int err;
184
185 if (first) {
186 rp = rxpbuf;
187 sp = s;
188 }
189 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
190 switch(*rp) {
191 case LIT:
192 rp++;
193 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp;
194 if (ch != *sp++) {
195 rp = j_fail;
196 sp = sp_fail;
197 return (TRUE);
198 }
199 rp++;
200 break;
201 case SOT:
202 if (sp != s)
203 return (FALSE);
204 rp++;
205 break;
206 case EOT:
207 if (*sp != 0)
208 return (FALSE);
209 rp++;
210 break;
211 case GRP_S:
212 rp++;
213 grp_end = rxpbuf + *rp++;
214 break;
215 case ALT_S:
216 rp++;
217 if ((err = rxp__match(sp,
218 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE)
219 return (err);
220 break;
221 case ALT_E:
222 rp = j_succ;
223 return (TRUE);
224 case GRP_E:
225 default:
226 return (FALSE);
227 }
228 return (*rp != END ? FALSE : TRUE);
229 }
230
231 /*
232 * Reverse engineer the regular expression, by picking first of all alternates.
233 */
234 char *
rxp_expand()235 rxp_expand()
236 {
237 return (rxp__expand(TRUE));
238 }
239
240 static char *
rxp__expand(first)241 rxp__expand(first)
242 int first;
243 {
244 static char buf[RXP_LINE_SZ/2];
245 static Rxp_t *rp;
246 static char *bp;
247 Rxp_t *grp_ptr;
248 char *err;
249
250 if (first) {
251 rp = rxpbuf;
252 bp = buf;
253 }
254 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END)
255 switch(*rp) {
256 case LIT:
257 rp++;
258 *bp++ = *rp++;
259 break;
260 case GRP_S:
261 rp++;
262 grp_ptr = rxpbuf + *rp;
263 rp++;
264 if ((err = rxp__expand(FALSE)) == NULL)
265 return (err);
266 rp = grp_ptr;
267 break;
268 case ALT_E:
269 return (buf);
270 case ALT_S:
271 rp++;
272 /* FALLTHROUGH */
273 case SOT:
274 case EOT:
275 case GRP_E:
276 rp++;
277 break;
278 default:
279 return (NULL);
280 }
281 if (first) {
282 if (*rp != END)
283 return (NULL);
284 *bp = '\0';
285 }
286 return (buf);
287 }
288