1 /* $NetBSD: rxp.c,v 1.13 2009/08/27 00:31:12 dholland Exp $ */ 2 3 /*- 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 9 * Commodore Business Machines. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #ifndef lint 38 #if 0 39 static char sccsid[] = "@(#)rxp.c 8.1 (Berkeley) 5/31/93"; 40 #else 41 __RCSID("$NetBSD: rxp.c,v 1.13 2009/08/27 00:31:12 dholland Exp $"); 42 #endif 43 #endif /* not lint */ 44 45 /* 46 * regular expression parser 47 * 48 * external functions and return values are: 49 * rxp_compile(s) 50 * TRUE success 51 * FALSE parse failure; error message will be in char rxperr[] 52 * metas are: 53 * {...} optional pattern, equialent to [...|] 54 * | alternate pattern 55 * [...] pattern delimiters 56 * 57 * rxp_match(s) 58 * TRUE string s matches compiled pattern 59 * FALSE match failure or regexp error 60 * 61 * rxp_expand() 62 * char * reverse-engineered regular expression string 63 * NULL regexp error 64 */ 65 66 #include <stdio.h> 67 #include <stdlib.h> 68 #include <ctype.h> 69 #include "quiz.h" 70 /* regexp tokens, arg */ 71 #define LIT (-1) /* literal character, char */ 72 #define SOT (-2) /* start text anchor, - */ 73 #define EOT (-3) /* end text anchor, - */ 74 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 75 #define GRP_E (-5) /* end group, - */ 76 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 77 #define ALT_E (-7) /* alternate ends, - */ 78 #define END (-8) /* end of regexp, - */ 79 80 typedef short Rxp_t; /* type for regexp tokens */ 81 82 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 83 char rxperr[128]; /* parser error message */ 84 85 static int rxp__compile(const char *, int); 86 static char *rxp__expand(int); 87 static int rxp__match(const char *, int, Rxp_t *, Rxp_t *, const char *); 88 89 int 90 rxp_compile(const char *s) 91 { 92 return (rxp__compile(s, TRUE)); 93 } 94 95 static int 96 rxp__compile(const char *s, int first) 97 { 98 static Rxp_t *rp; 99 static const char *sp; 100 Rxp_t *grp_ptr; 101 Rxp_t *alt_ptr; 102 int esc, err; 103 104 esc = 0; 105 if (first) { 106 rp = rxpbuf; 107 sp = s; 108 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 109 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 110 *rp++ = 0; 111 } 112 *rp++ = ALT_S; 113 alt_ptr = rp; 114 *rp++ = 0; 115 for (; *sp; ++sp) { 116 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 117 (void)snprintf(rxperr, sizeof(rxperr), 118 "regular expression too long %s", s); 119 return (FALSE); 120 } 121 if (*sp == ':' && !esc) 122 break; 123 if (esc) { 124 *rp++ = LIT; 125 *rp++ = *sp; 126 esc = 0; 127 } 128 else switch (*sp) { 129 case '\\': 130 esc = 1; 131 break; 132 case '{': 133 case '[': 134 *rp++ = GRP_S; 135 grp_ptr = rp; 136 *rp++ = 0; 137 sp++; 138 if ((err = rxp__compile(s, FALSE)) != TRUE) 139 return (err); 140 *rp++ = GRP_E; 141 *grp_ptr = rp - rxpbuf; 142 break; 143 case '}': 144 case ']': 145 case '|': 146 *rp++ = ALT_E; 147 *alt_ptr = rp - rxpbuf; 148 if (*sp != ']') { 149 *rp++ = ALT_S; 150 alt_ptr = rp; 151 *rp++ = 0; 152 } 153 if (*sp != '|') { 154 if (*sp != ']') { 155 *rp++ = ALT_E; 156 *alt_ptr = rp - rxpbuf; 157 } 158 if (first) { 159 (void)snprintf(rxperr, sizeof(rxperr), 160 "unmatched alternator in regexp %s", 161 s); 162 return (FALSE); 163 } 164 return (TRUE); 165 } 166 break; 167 default: 168 *rp++ = LIT; 169 *rp++ = *sp; 170 esc = 0; 171 break; 172 } 173 } 174 if (!first) { 175 (void)snprintf(rxperr, sizeof(rxperr), 176 "unmatched alternator in regexp %s", s); 177 return (FALSE); 178 } 179 *rp++ = ALT_E; 180 *alt_ptr = rp - rxpbuf; 181 *rp++ = GRP_E; 182 *(rxpbuf + 2) = rp - rxpbuf; 183 *rp++ = EOT; 184 *rp = END; 185 return (TRUE); 186 } 187 188 /* 189 * match string against compiled regular expression 190 */ 191 int 192 rxp_match(const char *s) 193 { 194 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 195 } 196 197 static int 198 rxp__match(const char *s, 199 int first, 200 Rxp_t *j_succ, /* jump here on successful alt match */ 201 Rxp_t *j_fail, /* jump here on failed match */ 202 const char *sp_fail) /* reset sp to here on failed match */ 203 { 204 static Rxp_t *rp; 205 static const char *sp; 206 int ch; 207 Rxp_t *grp_end = NULL; 208 209 if (first) { 210 rp = rxpbuf; 211 sp = s; 212 } 213 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 214 switch(*rp) { 215 case LIT: 216 rp++; 217 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 218 if (ch != *sp++) { 219 rp = j_fail; 220 sp = sp_fail; 221 return (FALSE); 222 } 223 rp++; 224 break; 225 case SOT: 226 if (sp != s) 227 return (FALSE); 228 rp++; 229 break; 230 case EOT: 231 if (*sp != 0) 232 return (FALSE); 233 rp++; 234 break; 235 case GRP_S: 236 rp++; 237 grp_end = rxpbuf + *rp++; 238 break; 239 case ALT_S: 240 rp++; 241 rxp__match(sp, FALSE, grp_end, rxpbuf + *rp++, sp); 242 break; 243 case ALT_E: 244 rp = j_succ; 245 return (TRUE); 246 case GRP_E: 247 rp = j_fail; 248 sp = sp_fail; 249 return (FALSE); 250 default: 251 abort(); 252 } 253 return (*rp != END ? FALSE : TRUE); 254 } 255 256 /* 257 * Reverse engineer the regular expression, by picking first of all alternates. 258 */ 259 char * 260 rxp_expand(void) 261 { 262 return (rxp__expand(TRUE)); 263 } 264 265 static char * 266 rxp__expand(int first) 267 { 268 static char buf[RXP_LINE_SZ/2]; 269 static Rxp_t *rp; 270 static char *bp; 271 Rxp_t *grp_ptr; 272 char *err; 273 274 if (first) { 275 rp = rxpbuf; 276 bp = buf; 277 } 278 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 279 switch(*rp) { 280 case LIT: 281 rp++; 282 *bp++ = *rp++; 283 break; 284 case GRP_S: 285 rp++; 286 grp_ptr = rxpbuf + *rp; 287 rp++; 288 if ((err = rxp__expand(FALSE)) == NULL) 289 return (err); 290 rp = grp_ptr; 291 break; 292 case ALT_E: 293 return (buf); 294 case ALT_S: 295 rp++; 296 /* FALLTHROUGH */ 297 case SOT: 298 case EOT: 299 case GRP_E: 300 rp++; 301 break; 302 default: 303 return (NULL); 304 } 305 if (first) { 306 if (*rp != END) 307 return (NULL); 308 *bp = '\0'; 309 } 310 return (buf); 311 } 312