1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 7 * Commodore Business Machines. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)rxp.c 8.1 (Berkeley) 5/31/93 38 * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $ 39 * $DragonFly: src/games/quiz/rxp.c,v 1.3 2003/11/12 14:53:54 eirikn Exp $ 40 */ 41 42 /* 43 * regular expression parser 44 * 45 * external functions and return values are: 46 * rxp_compile(s) 47 * TRUE success 48 * FALSE parse failure; error message will be in char rxperr[] 49 * metas are: 50 * {...} optional pattern, equialent to [...|] 51 * | alternate pattern 52 * [...] pattern delimiters 53 * 54 * rxp_match(s) 55 * TRUE string s matches compiled pattern 56 * FALSE match failure or regexp error 57 * 58 * rxp_expand() 59 * char * reverse-engineered regular expression string 60 * NULL regexp error 61 */ 62 63 #include <stdio.h> 64 #include <ctype.h> 65 #include "quiz.h" 66 /* regexp tokens, arg */ 67 #define LIT (-1) /* literal character, char */ 68 #define SOT (-2) /* start text anchor, - */ 69 #define EOT (-3) /* end text anchor, - */ 70 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 71 #define GRP_E (-5) /* end group, - */ 72 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 73 #define ALT_E (-7) /* alternate ends, - */ 74 #define END (-8) /* end of regexp, - */ 75 76 typedef short Rxp_t; /* type for regexp tokens */ 77 78 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 79 char rxperr[128]; /* parser error message */ 80 81 static int rxp__compile (char *, int); 82 static char *rxp__expand (int); 83 static int rxp__match (char *, int, Rxp_t *, Rxp_t *, char *); 84 85 int 86 rxp_compile(s) 87 char * s; 88 { 89 return (rxp__compile(s, TRUE)); 90 } 91 92 static int 93 rxp__compile(s, first) 94 char *s; 95 int first; 96 { 97 static Rxp_t *rp; 98 static char *sp; 99 Rxp_t *grp_ptr; 100 Rxp_t *alt_ptr; 101 int esc, err; 102 103 esc = 0; 104 if (first) { 105 rp = rxpbuf; 106 sp = s; 107 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 108 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 109 *rp++ = 0; 110 } 111 *rp++ = ALT_S; 112 alt_ptr = rp; 113 *rp++ = 0; 114 for (; *sp; ++sp) { 115 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 116 (void)snprintf(rxperr, sizeof(rxperr), 117 "regular expression too long %s", s); 118 return (FALSE); 119 } 120 if (*sp == ':' && !esc) 121 break; 122 if (esc) { 123 *rp++ = LIT; 124 *rp++ = *sp; 125 esc = 0; 126 } 127 else switch (*sp) { 128 case '\\': 129 esc = 1; 130 break; 131 case '{': 132 case '[': 133 *rp++ = GRP_S; 134 grp_ptr = rp; 135 *rp++ = 0; 136 sp++; 137 if ((err = rxp__compile(s, FALSE)) != TRUE) 138 return (err); 139 *rp++ = GRP_E; 140 *grp_ptr = rp - rxpbuf; 141 break; 142 case '}': 143 case ']': 144 case '|': 145 *rp++ = ALT_E; 146 *alt_ptr = rp - rxpbuf; 147 if (*sp != ']') { 148 *rp++ = ALT_S; 149 alt_ptr = rp; 150 *rp++ = 0; 151 } 152 if (*sp != '|') { 153 if (*sp != ']') { 154 *rp++ = ALT_E; 155 *alt_ptr = rp - rxpbuf; 156 } 157 if (first) { 158 (void)snprintf(rxperr, sizeof(rxperr), 159 "unmatched alternator in regexp %s", 160 s); 161 return (FALSE); 162 } 163 return (TRUE); 164 } 165 break; 166 default: 167 *rp++ = LIT; 168 *rp++ = *sp; 169 esc = 0; 170 break; 171 } 172 } 173 if (!first) { 174 (void)snprintf(rxperr, sizeof(rxperr), 175 "unmatched alternator in regexp %s", s); 176 return (FALSE); 177 } 178 *rp++ = ALT_E; 179 *alt_ptr = rp - rxpbuf; 180 *rp++ = GRP_E; 181 *(rxpbuf + 2) = rp - rxpbuf; 182 *rp++ = EOT; 183 *rp = END; 184 return (TRUE); 185 } 186 187 /* 188 * match string against compiled regular expression 189 */ 190 int 191 rxp_match(s) 192 char * s; 193 { 194 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 195 } 196 197 static int 198 rxp__match(s, first, j_succ, j_fail, sp_fail) 199 char *s; 200 int first; 201 Rxp_t *j_succ; /* jump here on successful alt match */ 202 Rxp_t *j_fail; /* jump here on failed match */ 203 char *sp_fail; /* reset sp to here on failed match */ 204 { 205 static Rxp_t *rp; 206 static char *sp; 207 int ch; 208 Rxp_t *grp_end; 209 int err; 210 211 grp_end = NULL; 212 if (first) { 213 rp = rxpbuf; 214 sp = s; 215 } 216 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 217 switch(*rp) { 218 case LIT: 219 rp++; 220 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 221 if (ch != *sp++) { 222 rp = j_fail; 223 sp = sp_fail; 224 return (TRUE); 225 } 226 rp++; 227 break; 228 case SOT: 229 if (sp != s) 230 return (FALSE); 231 rp++; 232 break; 233 case EOT: 234 if (*sp != 0) 235 return (FALSE); 236 rp++; 237 break; 238 case GRP_S: 239 rp++; 240 grp_end = rxpbuf + *rp++; 241 break; 242 case ALT_S: 243 rp++; 244 if ((err = rxp__match(sp, 245 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 246 return (err); 247 break; 248 case ALT_E: 249 rp = j_succ; 250 return (TRUE); 251 case GRP_E: 252 default: 253 return (FALSE); 254 } 255 return (*rp != END ? FALSE : TRUE); 256 } 257 258 /* 259 * Reverse engineer the regular expression, by picking first of all alternates. 260 */ 261 char * 262 rxp_expand() 263 { 264 return (rxp__expand(TRUE)); 265 } 266 267 static char * 268 rxp__expand(first) 269 int first; 270 { 271 static char buf[RXP_LINE_SZ/2]; 272 static Rxp_t *rp; 273 static char *bp; 274 Rxp_t *grp_ptr; 275 char *err; 276 277 if (first) { 278 rp = rxpbuf; 279 bp = buf; 280 } 281 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 282 switch(*rp) { 283 case LIT: 284 rp++; 285 *bp++ = *rp++; 286 break; 287 case GRP_S: 288 rp++; 289 grp_ptr = rxpbuf + *rp; 290 rp++; 291 if ((err = rxp__expand(FALSE)) == NULL) 292 return (err); 293 rp = grp_ptr; 294 break; 295 case ALT_E: 296 return (buf); 297 case ALT_S: 298 rp++; 299 /* FALLTHROUGH */ 300 case SOT: 301 case EOT: 302 case GRP_E: 303 rp++; 304 break; 305 default: 306 return (NULL); 307 } 308 if (first) { 309 if (*rp != END) 310 return (NULL); 311 *bp = '\0'; 312 } 313 return (buf); 314 } 315