1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 7 * Commodore Business Machines. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #ifndef lint 39 #if 0 40 static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93"; 41 #endif 42 static const char rcsid[] = 43 "$FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $"; 44 #endif /* not lint */ 45 46 /* 47 * regular expression parser 48 * 49 * external functions and return values are: 50 * rxp_compile(s) 51 * TRUE success 52 * FALSE parse failure; error message will be in char rxperr[] 53 * metas are: 54 * {...} optional pattern, equialent to [...|] 55 * | alternate pattern 56 * [...] pattern delimiters 57 * 58 * rxp_match(s) 59 * TRUE string s matches compiled pattern 60 * FALSE match failure or regexp error 61 * 62 * rxp_expand() 63 * char * reverse-engineered regular expression string 64 * NULL regexp error 65 */ 66 67 #include <stdio.h> 68 #include <ctype.h> 69 #include "quiz.h" 70 /* regexp tokens, arg */ 71 #define LIT (-1) /* literal character, char */ 72 #define SOT (-2) /* start text anchor, - */ 73 #define EOT (-3) /* end text anchor, - */ 74 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 75 #define GRP_E (-5) /* end group, - */ 76 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 77 #define ALT_E (-7) /* alternate ends, - */ 78 #define END (-8) /* end of regexp, - */ 79 80 typedef short Rxp_t; /* type for regexp tokens */ 81 82 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 83 char rxperr[128]; /* parser error message */ 84 85 static int rxp__compile __P((char *, int)); 86 static char *rxp__expand __P((int)); 87 static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *)); 88 89 int 90 rxp_compile(s) 91 char * s; 92 { 93 return (rxp__compile(s, TRUE)); 94 } 95 96 static int 97 rxp__compile(s, first) 98 char *s; 99 int first; 100 { 101 static Rxp_t *rp; 102 static char *sp; 103 Rxp_t *grp_ptr; 104 Rxp_t *alt_ptr; 105 int esc, err; 106 107 esc = 0; 108 if (first) { 109 rp = rxpbuf; 110 sp = s; 111 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 112 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 113 *rp++ = 0; 114 } 115 *rp++ = ALT_S; 116 alt_ptr = rp; 117 *rp++ = 0; 118 for (; *sp; ++sp) { 119 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 120 (void)snprintf(rxperr, sizeof(rxperr), 121 "regular expression too long %s", s); 122 return (FALSE); 123 } 124 if (*sp == ':' && !esc) 125 break; 126 if (esc) { 127 *rp++ = LIT; 128 *rp++ = *sp; 129 esc = 0; 130 } 131 else switch (*sp) { 132 case '\\': 133 esc = 1; 134 break; 135 case '{': 136 case '[': 137 *rp++ = GRP_S; 138 grp_ptr = rp; 139 *rp++ = 0; 140 sp++; 141 if ((err = rxp__compile(s, FALSE)) != TRUE) 142 return (err); 143 *rp++ = GRP_E; 144 *grp_ptr = rp - rxpbuf; 145 break; 146 case '}': 147 case ']': 148 case '|': 149 *rp++ = ALT_E; 150 *alt_ptr = rp - rxpbuf; 151 if (*sp != ']') { 152 *rp++ = ALT_S; 153 alt_ptr = rp; 154 *rp++ = 0; 155 } 156 if (*sp != '|') { 157 if (*sp != ']') { 158 *rp++ = ALT_E; 159 *alt_ptr = rp - rxpbuf; 160 } 161 if (first) { 162 (void)snprintf(rxperr, sizeof(rxperr), 163 "unmatched alternator in regexp %s", 164 s); 165 return (FALSE); 166 } 167 return (TRUE); 168 } 169 break; 170 default: 171 *rp++ = LIT; 172 *rp++ = *sp; 173 esc = 0; 174 break; 175 } 176 } 177 if (!first) { 178 (void)snprintf(rxperr, sizeof(rxperr), 179 "unmatched alternator in regexp %s", s); 180 return (FALSE); 181 } 182 *rp++ = ALT_E; 183 *alt_ptr = rp - rxpbuf; 184 *rp++ = GRP_E; 185 *(rxpbuf + 2) = rp - rxpbuf; 186 *rp++ = EOT; 187 *rp = END; 188 return (TRUE); 189 } 190 191 /* 192 * match string against compiled regular expression 193 */ 194 int 195 rxp_match(s) 196 char * s; 197 { 198 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 199 } 200 201 static int 202 rxp__match(s, first, j_succ, j_fail, sp_fail) 203 char *s; 204 int first; 205 Rxp_t *j_succ; /* jump here on successful alt match */ 206 Rxp_t *j_fail; /* jump here on failed match */ 207 char *sp_fail; /* reset sp to here on failed match */ 208 { 209 static Rxp_t *rp; 210 static char *sp; 211 int ch; 212 Rxp_t *grp_end; 213 int err; 214 215 grp_end = NULL; 216 if (first) { 217 rp = rxpbuf; 218 sp = s; 219 } 220 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 221 switch(*rp) { 222 case LIT: 223 rp++; 224 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 225 if (ch != *sp++) { 226 rp = j_fail; 227 sp = sp_fail; 228 return (TRUE); 229 } 230 rp++; 231 break; 232 case SOT: 233 if (sp != s) 234 return (FALSE); 235 rp++; 236 break; 237 case EOT: 238 if (*sp != 0) 239 return (FALSE); 240 rp++; 241 break; 242 case GRP_S: 243 rp++; 244 grp_end = rxpbuf + *rp++; 245 break; 246 case ALT_S: 247 rp++; 248 if ((err = rxp__match(sp, 249 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 250 return (err); 251 break; 252 case ALT_E: 253 rp = j_succ; 254 return (TRUE); 255 case GRP_E: 256 default: 257 return (FALSE); 258 } 259 return (*rp != END ? FALSE : TRUE); 260 } 261 262 /* 263 * Reverse engineer the regular expression, by picking first of all alternates. 264 */ 265 char * 266 rxp_expand() 267 { 268 return (rxp__expand(TRUE)); 269 } 270 271 static char * 272 rxp__expand(first) 273 int first; 274 { 275 static char buf[RXP_LINE_SZ/2]; 276 static Rxp_t *rp; 277 static char *bp; 278 Rxp_t *grp_ptr; 279 char *err; 280 281 if (first) { 282 rp = rxpbuf; 283 bp = buf; 284 } 285 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 286 switch(*rp) { 287 case LIT: 288 rp++; 289 *bp++ = *rp++; 290 break; 291 case GRP_S: 292 rp++; 293 grp_ptr = rxpbuf + *rp; 294 rp++; 295 if ((err = rxp__expand(FALSE)) == NULL) 296 return (err); 297 rp = grp_ptr; 298 break; 299 case ALT_E: 300 return (buf); 301 case ALT_S: 302 rp++; 303 /* FALLTHROUGH */ 304 case SOT: 305 case EOT: 306 case GRP_E: 307 rp++; 308 break; 309 default: 310 return (NULL); 311 } 312 if (first) { 313 if (*rp != END) 314 return (NULL); 315 *bp = '\0'; 316 } 317 return (buf); 318 } 319