1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Jim R. Oldroyd at The Instruction Set. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #ifndef lint 12 static char sccsid[] = "@(#)rxp.c 5.1 (Berkeley) 11/10/91"; 13 #endif /* not lint */ 14 15 /* 16 * regular expression parser 17 * 18 * external functions and return values are: 19 * rxp_compile(s) 20 * TRUE success 21 * FALSE parse failure; error message will be in char rxperr[] 22 * metas are: 23 * {...} optional pattern, equialent to [...|] 24 * | alternate pattern 25 * [...] pattern delimiters 26 * 27 * rxp_match(s) 28 * TRUE string s matches compiled pattern 29 * FALSE match failure or regexp error 30 * 31 * rxp_expand() 32 * char * reverse-engineered regular expression string 33 * NULL regexp error 34 */ 35 36 #include <stdio.h> 37 #include <ctype.h> 38 #include "quiz.h" 39 /* regexp tokens, arg */ 40 #define LIT (-1) /* literal character, char */ 41 #define SOT (-2) /* start text anchor, - */ 42 #define EOT (-3) /* end text anchor, - */ 43 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 44 #define GRP_E (-5) /* end group, - */ 45 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 46 #define ALT_E (-7) /* alternate ends, - */ 47 #define END (-8) /* end of regexp, - */ 48 49 typedef short Rxp_t; /* type for regexp tokens */ 50 51 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 52 char rxperr[128]; /* parser error message */ 53 54 int rxp__compile __P((char *, int)); 55 char *rxp__expand __P((int)); 56 int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *)); 57 58 int 59 rxp_compile(s) 60 register char * s; 61 { 62 return (rxp__compile(s, TRUE)); 63 } 64 65 static int 66 rxp__compile(s, first) 67 register char *s; 68 int first; 69 { 70 static Rxp_t *rp; 71 static char *sp; 72 Rxp_t *grp_ptr; 73 Rxp_t *alt_ptr; 74 int esc, err; 75 76 esc = 0; 77 if (first) { 78 rp = rxpbuf; 79 sp = s; 80 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 81 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 82 *rp++ = 0; 83 } 84 *rp++ = ALT_S; 85 alt_ptr = rp; 86 *rp++ = 0; 87 for (; *sp; ++sp) { 88 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 89 (void)snprintf(rxperr, sizeof(rxperr), 90 "regular expression too long %s", s); 91 return (FALSE); 92 } 93 if (*sp == ':' && !esc) 94 break; 95 if (esc) { 96 *rp++ = LIT; 97 *rp++ = *sp; 98 esc = 0; 99 } 100 else switch (*sp) { 101 case '\\': 102 esc = 1; 103 break; 104 case '{': 105 case '[': 106 *rp++ = GRP_S; 107 grp_ptr = rp; 108 *rp++ = 0; 109 sp++; 110 if ((err = rxp__compile(s, FALSE)) != TRUE) 111 return (err); 112 *rp++ = GRP_E; 113 *grp_ptr = rp - rxpbuf; 114 break; 115 case '}': 116 case ']': 117 case '|': 118 *rp++ = ALT_E; 119 *alt_ptr = rp - rxpbuf; 120 if (*sp != ']') { 121 *rp++ = ALT_S; 122 alt_ptr = rp; 123 *rp++ = 0; 124 } 125 if (*sp != '|') { 126 if (*sp != ']') { 127 *rp++ = ALT_E; 128 *alt_ptr = rp - rxpbuf; 129 } 130 if (first) { 131 (void)snprintf(rxperr, sizeof(rxperr), 132 "unmatched alternator in regexp %s", 133 s); 134 return (FALSE); 135 } 136 return (TRUE); 137 } 138 break; 139 default: 140 *rp++ = LIT; 141 *rp++ = *sp; 142 esc = 0; 143 break; 144 } 145 } 146 if (!first) { 147 (void)snprintf(rxperr, sizeof(rxperr), 148 "unmatched alternator in regexp %s", s); 149 return (FALSE); 150 } 151 *rp++ = ALT_E; 152 *alt_ptr = rp - rxpbuf; 153 *rp++ = GRP_E; 154 *(rxpbuf + 2) = rp - rxpbuf; 155 *rp++ = EOT; 156 *rp = END; 157 return (TRUE); 158 } 159 160 /* 161 * match string against compiled regular expression 162 */ 163 int 164 rxp_match(s) 165 register char * s; 166 { 167 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 168 } 169 170 static int 171 rxp__match(s, first, j_succ, j_fail, sp_fail) 172 char *s; 173 int first; 174 Rxp_t *j_succ; /* jump here on successful alt match */ 175 Rxp_t *j_fail; /* jump here on failed match */ 176 char *sp_fail; /* reset sp to here on failed match */ 177 { 178 static Rxp_t *rp; 179 static char *sp; 180 register int ch; 181 Rxp_t *grp_end; 182 int err; 183 184 if (first) { 185 rp = rxpbuf; 186 sp = s; 187 } 188 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 189 switch(*rp) { 190 case LIT: 191 rp++; 192 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 193 if (ch != *sp++) { 194 rp = j_fail; 195 sp = sp_fail; 196 return (TRUE); 197 } 198 rp++; 199 break; 200 case SOT: 201 if (sp != s) 202 return (FALSE); 203 rp++; 204 break; 205 case EOT: 206 if (*sp != 0) 207 return (FALSE); 208 rp++; 209 break; 210 case GRP_S: 211 rp++; 212 grp_end = rxpbuf + *rp++; 213 break; 214 case ALT_S: 215 rp++; 216 if ((err = rxp__match(sp, 217 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 218 return (err); 219 break; 220 case ALT_E: 221 rp = j_succ; 222 return (TRUE); 223 case GRP_E: 224 default: 225 return (FALSE); 226 } 227 return (*rp != END ? FALSE : TRUE); 228 } 229 230 /* 231 * Reverse engineer the regular expression, by picking first of all alternates. 232 */ 233 char * 234 rxp_expand() 235 { 236 return (rxp__expand(TRUE)); 237 } 238 239 static char * 240 rxp__expand(first) 241 int first; 242 { 243 static char buf[RXP_LINE_SZ/2]; 244 static Rxp_t *rp; 245 static char *bp; 246 Rxp_t *grp_ptr; 247 char *err; 248 249 if (first) { 250 rp = rxpbuf; 251 bp = buf; 252 } 253 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 254 switch(*rp) { 255 case LIT: 256 rp++; 257 *bp++ = *rp++; 258 break; 259 case GRP_S: 260 rp++; 261 grp_ptr = rxpbuf + *rp; 262 rp++; 263 if ((err = rxp__expand(FALSE)) == NULL) 264 return (err); 265 rp = grp_ptr; 266 break; 267 case ALT_E: 268 return (buf); 269 case ALT_S: 270 rp++; 271 /* FALLTHROUGH */ 272 case SOT: 273 case EOT: 274 case GRP_E: 275 rp++; 276 break; 277 default: 278 return (NULL); 279 } 280 if (first) { 281 if (*rp != END) 282 return (NULL); 283 *bp = '\0'; 284 } 285 return (buf); 286 } 287