1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 7 * Commodore Business Machines. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)rxp.c 8.1 (Berkeley) 5/31/93 34 * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $ 35 */ 36 37 /* 38 * regular expression parser 39 * 40 * external functions and return values are: 41 * rxp_compile(s) 42 * TRUE success 43 * FALSE parse failure; error message will be in char rxperr[] 44 * metas are: 45 * {...} optional pattern, equialent to [...|] 46 * | alternate pattern 47 * [...] pattern delimiters 48 * 49 * rxp_match(s) 50 * TRUE string s matches compiled pattern 51 * FALSE match failure or regexp error 52 * 53 * rxp_expand() 54 * char * reverse-engineered regular expression string 55 * NULL regexp error 56 */ 57 58 #include <stdio.h> 59 #include <ctype.h> 60 #include "quiz.h" 61 /* regexp tokens, arg */ 62 #define LIT (-1) /* literal character, char */ 63 #define SOT (-2) /* start text anchor, - */ 64 #define EOT (-3) /* end text anchor, - */ 65 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 66 #define GRP_E (-5) /* end group, - */ 67 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 68 #define ALT_E (-7) /* alternate ends, - */ 69 #define END (-8) /* end of regexp, - */ 70 71 typedef short Rxp_t; /* type for regexp tokens */ 72 73 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 74 char rxperr[128]; /* parser error message */ 75 76 static int rxp__compile (char *, int); 77 static char *rxp__expand (int); 78 static int rxp__match (char *, int, Rxp_t *, Rxp_t *, char *); 79 80 int 81 rxp_compile(char *s) 82 { 83 return (rxp__compile(s, TRUE)); 84 } 85 86 static int 87 rxp__compile(char *s, int first) 88 { 89 static Rxp_t *rp; 90 static char *sp; 91 Rxp_t *grp_ptr; 92 Rxp_t *alt_ptr; 93 int esc, err; 94 95 esc = 0; 96 if (first) { 97 rp = rxpbuf; 98 sp = s; 99 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 100 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 101 *rp++ = 0; 102 } 103 *rp++ = ALT_S; 104 alt_ptr = rp; 105 *rp++ = 0; 106 for (; *sp; ++sp) { 107 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 108 snprintf(rxperr, sizeof(rxperr), 109 "regular expression too long %s", s); 110 return (FALSE); 111 } 112 if (*sp == ':' && !esc) 113 break; 114 if (esc) { 115 *rp++ = LIT; 116 *rp++ = *sp; 117 esc = 0; 118 } 119 else switch (*sp) { 120 case '\\': 121 esc = 1; 122 break; 123 case '{': 124 case '[': 125 *rp++ = GRP_S; 126 grp_ptr = rp; 127 *rp++ = 0; 128 sp++; 129 if ((err = rxp__compile(s, FALSE)) != TRUE) 130 return (err); 131 *rp++ = GRP_E; 132 *grp_ptr = rp - rxpbuf; 133 break; 134 case '}': 135 case ']': 136 case '|': 137 *rp++ = ALT_E; 138 *alt_ptr = rp - rxpbuf; 139 if (*sp != ']') { 140 *rp++ = ALT_S; 141 alt_ptr = rp; 142 *rp++ = 0; 143 } 144 if (*sp != '|') { 145 if (*sp != ']') { 146 *rp++ = ALT_E; 147 *alt_ptr = rp - rxpbuf; 148 } 149 if (first) { 150 snprintf(rxperr, sizeof(rxperr), 151 "unmatched alternator in regexp %s", 152 s); 153 return (FALSE); 154 } 155 return (TRUE); 156 } 157 break; 158 default: 159 *rp++ = LIT; 160 *rp++ = *sp; 161 esc = 0; 162 break; 163 } 164 } 165 if (!first) { 166 snprintf(rxperr, sizeof(rxperr), 167 "unmatched alternator in regexp %s", s); 168 return (FALSE); 169 } 170 *rp++ = ALT_E; 171 *alt_ptr = rp - rxpbuf; 172 *rp++ = GRP_E; 173 *(rxpbuf + 2) = rp - rxpbuf; 174 *rp++ = EOT; 175 *rp = END; 176 return (TRUE); 177 } 178 179 /* 180 * match string against compiled regular expression 181 */ 182 int 183 rxp_match(char *s) 184 { 185 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 186 } 187 188 /* 189 * jump to j_succ on successful alt match 190 * jump to j_fail on failed match 191 * reset sp to sp_fail on failed match 192 */ 193 static int 194 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail) 195 { 196 static Rxp_t *rp; 197 static char *sp; 198 int ch; 199 Rxp_t *grp_end; 200 int err; 201 202 grp_end = NULL; 203 if (first) { 204 rp = rxpbuf; 205 sp = s; 206 } 207 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 208 switch(*rp) { 209 case LIT: 210 rp++; 211 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 212 if (ch != *sp++) { 213 rp = j_fail; 214 sp = sp_fail; 215 return (TRUE); 216 } 217 rp++; 218 break; 219 case SOT: 220 if (sp != s) 221 return (FALSE); 222 rp++; 223 break; 224 case EOT: 225 if (*sp != 0) 226 return (FALSE); 227 rp++; 228 break; 229 case GRP_S: 230 rp++; 231 grp_end = rxpbuf + *rp++; 232 break; 233 case ALT_S: 234 rp++; 235 if ((err = rxp__match(sp, 236 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 237 return (err); 238 break; 239 case ALT_E: 240 rp = j_succ; 241 return (TRUE); 242 case GRP_E: 243 default: 244 return (FALSE); 245 } 246 return (*rp != END ? FALSE : TRUE); 247 } 248 249 /* 250 * Reverse engineer the regular expression, by picking first of all alternates. 251 */ 252 char * 253 rxp_expand(void) 254 { 255 return (rxp__expand(TRUE)); 256 } 257 258 static char * 259 rxp__expand(int first) 260 { 261 static char buf[RXP_LINE_SZ/2]; 262 static Rxp_t *rp; 263 static char *bp; 264 Rxp_t *grp_ptr; 265 char *err; 266 267 if (first) { 268 rp = rxpbuf; 269 bp = buf; 270 } 271 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 272 switch(*rp) { 273 case LIT: 274 rp++; 275 *bp++ = *rp++; 276 break; 277 case GRP_S: 278 rp++; 279 grp_ptr = rxpbuf + *rp; 280 rp++; 281 if ((err = rxp__expand(FALSE)) == NULL) 282 return (err); 283 rp = grp_ptr; 284 break; 285 case ALT_E: 286 return (buf); 287 case ALT_S: 288 rp++; 289 /* FALLTHROUGH */ 290 case SOT: 291 case EOT: 292 case GRP_E: 293 rp++; 294 break; 295 default: 296 return (NULL); 297 } 298 if (first) { 299 if (*rp != END) 300 return (NULL); 301 *bp = '\0'; 302 } 303 return (buf); 304 } 305