1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 7 * Commodore Business Machines. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)rxp.c 8.1 (Berkeley) 5/31/93 34 * $FreeBSD: src/games/quiz/rxp.c,v 1.5 1999/12/12 02:29:54 billf Exp $ 35 * $DragonFly: src/games/quiz/rxp.c,v 1.4 2005/04/25 16:10:24 liamfoy Exp $ 36 */ 37 38 /* 39 * regular expression parser 40 * 41 * external functions and return values are: 42 * rxp_compile(s) 43 * TRUE success 44 * FALSE parse failure; error message will be in char rxperr[] 45 * metas are: 46 * {...} optional pattern, equialent to [...|] 47 * | alternate pattern 48 * [...] pattern delimiters 49 * 50 * rxp_match(s) 51 * TRUE string s matches compiled pattern 52 * FALSE match failure or regexp error 53 * 54 * rxp_expand() 55 * char * reverse-engineered regular expression string 56 * NULL regexp error 57 */ 58 59 #include <stdio.h> 60 #include <ctype.h> 61 #include "quiz.h" 62 /* regexp tokens, arg */ 63 #define LIT (-1) /* literal character, char */ 64 #define SOT (-2) /* start text anchor, - */ 65 #define EOT (-3) /* end text anchor, - */ 66 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 67 #define GRP_E (-5) /* end group, - */ 68 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 69 #define ALT_E (-7) /* alternate ends, - */ 70 #define END (-8) /* end of regexp, - */ 71 72 typedef short Rxp_t; /* type for regexp tokens */ 73 74 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 75 char rxperr[128]; /* parser error message */ 76 77 static int rxp__compile (char *, int); 78 static char *rxp__expand (int); 79 static int rxp__match (char *, int, Rxp_t *, Rxp_t *, char *); 80 81 int 82 rxp_compile(char *s) 83 { 84 return (rxp__compile(s, TRUE)); 85 } 86 87 static int 88 rxp__compile(char *s, int first) 89 { 90 static Rxp_t *rp; 91 static char *sp; 92 Rxp_t *grp_ptr; 93 Rxp_t *alt_ptr; 94 int esc, err; 95 96 esc = 0; 97 if (first) { 98 rp = rxpbuf; 99 sp = s; 100 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 101 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 102 *rp++ = 0; 103 } 104 *rp++ = ALT_S; 105 alt_ptr = rp; 106 *rp++ = 0; 107 for (; *sp; ++sp) { 108 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 109 (void)snprintf(rxperr, sizeof(rxperr), 110 "regular expression too long %s", s); 111 return (FALSE); 112 } 113 if (*sp == ':' && !esc) 114 break; 115 if (esc) { 116 *rp++ = LIT; 117 *rp++ = *sp; 118 esc = 0; 119 } 120 else switch (*sp) { 121 case '\\': 122 esc = 1; 123 break; 124 case '{': 125 case '[': 126 *rp++ = GRP_S; 127 grp_ptr = rp; 128 *rp++ = 0; 129 sp++; 130 if ((err = rxp__compile(s, FALSE)) != TRUE) 131 return (err); 132 *rp++ = GRP_E; 133 *grp_ptr = rp - rxpbuf; 134 break; 135 case '}': 136 case ']': 137 case '|': 138 *rp++ = ALT_E; 139 *alt_ptr = rp - rxpbuf; 140 if (*sp != ']') { 141 *rp++ = ALT_S; 142 alt_ptr = rp; 143 *rp++ = 0; 144 } 145 if (*sp != '|') { 146 if (*sp != ']') { 147 *rp++ = ALT_E; 148 *alt_ptr = rp - rxpbuf; 149 } 150 if (first) { 151 (void)snprintf(rxperr, sizeof(rxperr), 152 "unmatched alternator in regexp %s", 153 s); 154 return (FALSE); 155 } 156 return (TRUE); 157 } 158 break; 159 default: 160 *rp++ = LIT; 161 *rp++ = *sp; 162 esc = 0; 163 break; 164 } 165 } 166 if (!first) { 167 (void)snprintf(rxperr, sizeof(rxperr), 168 "unmatched alternator in regexp %s", s); 169 return (FALSE); 170 } 171 *rp++ = ALT_E; 172 *alt_ptr = rp - rxpbuf; 173 *rp++ = GRP_E; 174 *(rxpbuf + 2) = rp - rxpbuf; 175 *rp++ = EOT; 176 *rp = END; 177 return (TRUE); 178 } 179 180 /* 181 * match string against compiled regular expression 182 */ 183 int 184 rxp_match(char *s) 185 { 186 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 187 } 188 189 /* 190 * jump to j_succ on successful alt match 191 * jump to j_fail on failed match 192 * reset sp to sp_fail on failed match 193 */ 194 static int 195 rxp__match(char *s, int first, Rxp_t *j_succ, Rxp_t *j_fail, char *sp_fail) 196 { 197 static Rxp_t *rp; 198 static char *sp; 199 int ch; 200 Rxp_t *grp_end; 201 int err; 202 203 grp_end = NULL; 204 if (first) { 205 rp = rxpbuf; 206 sp = s; 207 } 208 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 209 switch(*rp) { 210 case LIT: 211 rp++; 212 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 213 if (ch != *sp++) { 214 rp = j_fail; 215 sp = sp_fail; 216 return (TRUE); 217 } 218 rp++; 219 break; 220 case SOT: 221 if (sp != s) 222 return (FALSE); 223 rp++; 224 break; 225 case EOT: 226 if (*sp != 0) 227 return (FALSE); 228 rp++; 229 break; 230 case GRP_S: 231 rp++; 232 grp_end = rxpbuf + *rp++; 233 break; 234 case ALT_S: 235 rp++; 236 if ((err = rxp__match(sp, 237 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 238 return (err); 239 break; 240 case ALT_E: 241 rp = j_succ; 242 return (TRUE); 243 case GRP_E: 244 default: 245 return (FALSE); 246 } 247 return (*rp != END ? FALSE : TRUE); 248 } 249 250 /* 251 * Reverse engineer the regular expression, by picking first of all alternates. 252 */ 253 char * 254 rxp_expand(void) 255 { 256 return (rxp__expand(TRUE)); 257 } 258 259 static char * 260 rxp__expand(int first) 261 { 262 static char buf[RXP_LINE_SZ/2]; 263 static Rxp_t *rp; 264 static char *bp; 265 Rxp_t *grp_ptr; 266 char *err; 267 268 if (first) { 269 rp = rxpbuf; 270 bp = buf; 271 } 272 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 273 switch(*rp) { 274 case LIT: 275 rp++; 276 *bp++ = *rp++; 277 break; 278 case GRP_S: 279 rp++; 280 grp_ptr = rxpbuf + *rp; 281 rp++; 282 if ((err = rxp__expand(FALSE)) == NULL) 283 return (err); 284 rp = grp_ptr; 285 break; 286 case ALT_E: 287 return (buf); 288 case ALT_S: 289 rp++; 290 /* FALLTHROUGH */ 291 case SOT: 292 case EOT: 293 case GRP_E: 294 rp++; 295 break; 296 default: 297 return (NULL); 298 } 299 if (first) { 300 if (*rp != END) 301 return (NULL); 302 *bp = '\0'; 303 } 304 return (buf); 305 } 306