1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 7 * Commodore Business Machines. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the University of 20 * California, Berkeley and its contributors. 21 * 4. Neither the name of the University nor the names of its contributors 22 * may be used to endorse or promote products derived from this software 23 * without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * @(#)rxp.c 8.1 (Berkeley) 5/31/93 38 * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $ 39 * $DragonFly: src/games/quiz/rxp.c,v 1.4 2005/04/25 16:10:24 liamfoy Exp $ 40 */ 41 42 /* 43 * regular expression parser 44 * 45 * external functions and return values are: 46 * rxp_compile(s) 47 * TRUE success 48 * FALSE parse failure; error message will be in char rxperr[] 49 * metas are: 50 * {...} optional pattern, equialent to [...|] 51 * | alternate pattern 52 * [...] pattern delimiters 53 * 54 * rxp_match(s) 55 * TRUE string s matches compiled pattern 56 * FALSE match failure or regexp error 57 * 58 * rxp_expand() 59 * char * reverse-engineered regular expression string 60 * NULL regexp error 61 */ 62 63 #include <stdio.h> 64 #include <ctype.h> 65 #include "quiz.h" 66 /* regexp tokens, arg */ 67 #define LIT (-1) /* literal character, char */ 68 #define SOT (-2) /* start text anchor, - */ 69 #define EOT (-3) /* end text anchor, - */ 70 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 71 #define GRP_E (-5) /* end group, - */ 72 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 73 #define ALT_E (-7) /* alternate ends, - */ 74 #define END (-8) /* end of regexp, - */ 75 76 typedef short Rxp_t; /* type for regexp tokens */ 77 78 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 79 char rxperr[128]; /* parser error message */ 80 81 static int rxp__compile (char *, int); 82 static char *rxp__expand (int); 83 static int rxp__match (char *, int, Rxp_t *, Rxp_t *, char *); 84 85 int 86 rxp_compile(char *s) 87 { 88 return (rxp__compile(s, TRUE)); 89 } 90 91 static int 92 rxp__compile(char *s, int first) 93 { 94 static Rxp_t *rp; 95 static char *sp; 96 Rxp_t *grp_ptr; 97 Rxp_t *alt_ptr; 98 int esc, err; 99 100 esc = 0; 101 if (first) { 102 rp = rxpbuf; 103 sp = s; 104 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 105 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 106 *rp++ = 0; 107 } 108 *rp++ = ALT_S; 109 alt_ptr = rp; 110 *rp++ = 0; 111 for (; *sp; ++sp) { 112 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 113 (void)snprintf(rxperr, sizeof(rxperr), 114 "regular expression too long %s", s); 115 return (FALSE); 116 } 117 if (*sp == ':' && !esc) 118 break; 119 if (esc) { 120 *rp++ = LIT; 121 *rp++ = *sp; 122 esc = 0; 123 } 124 else switch (*sp) { 125 case '\\': 126 esc = 1; 127 break; 128 case '{': 129 case '[': 130 *rp++ = GRP_S; 131 grp_ptr = rp; 132 *rp++ = 0; 133 sp++; 134 if ((err = rxp__compile(s, FALSE)) != TRUE) 135 return (err); 136 *rp++ = GRP_E; 137 *grp_ptr = rp - rxpbuf; 138 break; 139 case '}': 140 case ']': 141 case '|': 142 *rp++ = ALT_E; 143 *alt_ptr = rp - rxpbuf; 144 if (*sp != ']') { 145 *rp++ = ALT_S; 146 alt_ptr = rp; 147 *rp++ = 0; 148 } 149 if (*sp != '|') { 150 if (*sp != ']') { 151 *rp++ = ALT_E; 152 *alt_ptr = rp - rxpbuf; 153 } 154 if (first) { 155 (void)snprintf(rxperr, sizeof(rxperr), 156 "unmatched alternator in regexp %s", 157 s); 158 return (FALSE); 159 } 160 return (TRUE); 161 } 162 break; 163 default: 164 *rp++ = LIT; 165 *rp++ = *sp; 166 esc = 0; 167 break; 168 } 169 } 170 if (!first) { 171 (void)snprintf(rxperr, sizeof(rxperr), 172 "unmatched alternator in regexp %s", s); 173 return (FALSE); 174 } 175 *rp++ = ALT_E; 176 *alt_ptr = rp - rxpbuf; 177 *rp++ = GRP_E; 178 *(rxpbuf + 2) = rp - rxpbuf; 179 *rp++ = EOT; 180 *rp = END; 181 return (TRUE); 182 } 183 184 /* 185 * match string against compiled regular expression 186 */ 187 int 188 rxp_match(char *s) 189 { 190 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 191 } 192 193 /* 194 * jump to j_succ on successful alt match 195 * jump to j_fail on failed match 196 * reset sp to sp_fail on failed match 197 */ 198 static int 199 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail) 200 { 201 static Rxp_t *rp; 202 static char *sp; 203 int ch; 204 Rxp_t *grp_end; 205 int err; 206 207 grp_end = NULL; 208 if (first) { 209 rp = rxpbuf; 210 sp = s; 211 } 212 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 213 switch(*rp) { 214 case LIT: 215 rp++; 216 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 217 if (ch != *sp++) { 218 rp = j_fail; 219 sp = sp_fail; 220 return (TRUE); 221 } 222 rp++; 223 break; 224 case SOT: 225 if (sp != s) 226 return (FALSE); 227 rp++; 228 break; 229 case EOT: 230 if (*sp != 0) 231 return (FALSE); 232 rp++; 233 break; 234 case GRP_S: 235 rp++; 236 grp_end = rxpbuf + *rp++; 237 break; 238 case ALT_S: 239 rp++; 240 if ((err = rxp__match(sp, 241 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 242 return (err); 243 break; 244 case ALT_E: 245 rp = j_succ; 246 return (TRUE); 247 case GRP_E: 248 default: 249 return (FALSE); 250 } 251 return (*rp != END ? FALSE : TRUE); 252 } 253 254 /* 255 * Reverse engineer the regular expression, by picking first of all alternates. 256 */ 257 char * 258 rxp_expand(void) 259 { 260 return (rxp__expand(TRUE)); 261 } 262 263 static char * 264 rxp__expand(int first) 265 { 266 static char buf[RXP_LINE_SZ/2]; 267 static Rxp_t *rp; 268 static char *bp; 269 Rxp_t *grp_ptr; 270 char *err; 271 272 if (first) { 273 rp = rxpbuf; 274 bp = buf; 275 } 276 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 277 switch(*rp) { 278 case LIT: 279 rp++; 280 *bp++ = *rp++; 281 break; 282 case GRP_S: 283 rp++; 284 grp_ptr = rxpbuf + *rp; 285 rp++; 286 if ((err = rxp__expand(FALSE)) == NULL) 287 return (err); 288 rp = grp_ptr; 289 break; 290 case ALT_E: 291 return (buf); 292 case ALT_S: 293 rp++; 294 /* FALLTHROUGH */ 295 case SOT: 296 case EOT: 297 case GRP_E: 298 rp++; 299 break; 300 default: 301 return (NULL); 302 } 303 if (first) { 304 if (*rp != END) 305 return (NULL); 306 *bp = '\0'; 307 } 308 return (buf); 309 } 310