1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Jim R. Oldroyd at The Instruction Set and Keith Gabryelski at 7 * Commodore Business Machines. 8 * 9 * %sccs.include.redist.c% 10 */ 11 12 #ifndef lint 13 static char sccsid[] = "@(#)rxp.c 5.3 (Berkeley) 02/03/93"; 14 #endif /* not lint */ 15 16 /* 17 * regular expression parser 18 * 19 * external functions and return values are: 20 * rxp_compile(s) 21 * TRUE success 22 * FALSE parse failure; error message will be in char rxperr[] 23 * metas are: 24 * {...} optional pattern, equialent to [...|] 25 * | alternate pattern 26 * [...] pattern delimiters 27 * 28 * rxp_match(s) 29 * TRUE string s matches compiled pattern 30 * FALSE match failure or regexp error 31 * 32 * rxp_expand() 33 * char * reverse-engineered regular expression string 34 * NULL regexp error 35 */ 36 37 #include <stdio.h> 38 #include <ctype.h> 39 #include "quiz.h" 40 /* regexp tokens, arg */ 41 #define LIT (-1) /* literal character, char */ 42 #define SOT (-2) /* start text anchor, - */ 43 #define EOT (-3) /* end text anchor, - */ 44 #define GRP_S (-4) /* start alternate grp, ptr_to_end */ 45 #define GRP_E (-5) /* end group, - */ 46 #define ALT_S (-6) /* alternate starts, ptr_to_next */ 47 #define ALT_E (-7) /* alternate ends, - */ 48 #define END (-8) /* end of regexp, - */ 49 50 typedef short Rxp_t; /* type for regexp tokens */ 51 52 static Rxp_t rxpbuf[RXP_LINE_SZ]; /* compiled regular expression buffer */ 53 char rxperr[128]; /* parser error message */ 54 55 static int rxp__compile __P((char *, int)); 56 static char *rxp__expand __P((int)); 57 static int rxp__match __P((char *, int, Rxp_t *, Rxp_t *, char *)); 58 59 int 60 rxp_compile(s) 61 register char * s; 62 { 63 return (rxp__compile(s, TRUE)); 64 } 65 66 static int 67 rxp__compile(s, first) 68 register char *s; 69 int first; 70 { 71 static Rxp_t *rp; 72 static char *sp; 73 Rxp_t *grp_ptr; 74 Rxp_t *alt_ptr; 75 int esc, err; 76 77 esc = 0; 78 if (first) { 79 rp = rxpbuf; 80 sp = s; 81 *rp++ = SOT; /* auto-anchor: pat is really ^pat$ */ 82 *rp++ = GRP_S; /* auto-group: ^pat$ is really ^[pat]$ */ 83 *rp++ = 0; 84 } 85 *rp++ = ALT_S; 86 alt_ptr = rp; 87 *rp++ = 0; 88 for (; *sp; ++sp) { 89 if (rp - rxpbuf >= RXP_LINE_SZ - 4) { 90 (void)snprintf(rxperr, sizeof(rxperr), 91 "regular expression too long %s", s); 92 return (FALSE); 93 } 94 if (*sp == ':' && !esc) 95 break; 96 if (esc) { 97 *rp++ = LIT; 98 *rp++ = *sp; 99 esc = 0; 100 } 101 else switch (*sp) { 102 case '\\': 103 esc = 1; 104 break; 105 case '{': 106 case '[': 107 *rp++ = GRP_S; 108 grp_ptr = rp; 109 *rp++ = 0; 110 sp++; 111 if ((err = rxp__compile(s, FALSE)) != TRUE) 112 return (err); 113 *rp++ = GRP_E; 114 *grp_ptr = rp - rxpbuf; 115 break; 116 case '}': 117 case ']': 118 case '|': 119 *rp++ = ALT_E; 120 *alt_ptr = rp - rxpbuf; 121 if (*sp != ']') { 122 *rp++ = ALT_S; 123 alt_ptr = rp; 124 *rp++ = 0; 125 } 126 if (*sp != '|') { 127 if (*sp != ']') { 128 *rp++ = ALT_E; 129 *alt_ptr = rp - rxpbuf; 130 } 131 if (first) { 132 (void)snprintf(rxperr, sizeof(rxperr), 133 "unmatched alternator in regexp %s", 134 s); 135 return (FALSE); 136 } 137 return (TRUE); 138 } 139 break; 140 default: 141 *rp++ = LIT; 142 *rp++ = *sp; 143 esc = 0; 144 break; 145 } 146 } 147 if (!first) { 148 (void)snprintf(rxperr, sizeof(rxperr), 149 "unmatched alternator in regexp %s", s); 150 return (FALSE); 151 } 152 *rp++ = ALT_E; 153 *alt_ptr = rp - rxpbuf; 154 *rp++ = GRP_E; 155 *(rxpbuf + 2) = rp - rxpbuf; 156 *rp++ = EOT; 157 *rp = END; 158 return (TRUE); 159 } 160 161 /* 162 * match string against compiled regular expression 163 */ 164 int 165 rxp_match(s) 166 register char * s; 167 { 168 return (rxp__match(s, TRUE, NULL, NULL, NULL)); 169 } 170 171 static int 172 rxp__match(s, first, j_succ, j_fail, sp_fail) 173 char *s; 174 int first; 175 Rxp_t *j_succ; /* jump here on successful alt match */ 176 Rxp_t *j_fail; /* jump here on failed match */ 177 char *sp_fail; /* reset sp to here on failed match */ 178 { 179 static Rxp_t *rp; 180 static char *sp; 181 register int ch; 182 Rxp_t *grp_end; 183 int err; 184 185 if (first) { 186 rp = rxpbuf; 187 sp = s; 188 } 189 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 190 switch(*rp) { 191 case LIT: 192 rp++; 193 ch = isascii(*rp) && isupper(*rp) ? tolower(*rp) : *rp; 194 if (ch != *sp++) { 195 rp = j_fail; 196 sp = sp_fail; 197 return (TRUE); 198 } 199 rp++; 200 break; 201 case SOT: 202 if (sp != s) 203 return (FALSE); 204 rp++; 205 break; 206 case EOT: 207 if (*sp != 0) 208 return (FALSE); 209 rp++; 210 break; 211 case GRP_S: 212 rp++; 213 grp_end = rxpbuf + *rp++; 214 break; 215 case ALT_S: 216 rp++; 217 if ((err = rxp__match(sp, 218 FALSE, grp_end, rxpbuf + *rp++, sp)) != TRUE) 219 return (err); 220 break; 221 case ALT_E: 222 rp = j_succ; 223 return (TRUE); 224 case GRP_E: 225 default: 226 return (FALSE); 227 } 228 return (*rp != END ? FALSE : TRUE); 229 } 230 231 /* 232 * Reverse engineer the regular expression, by picking first of all alternates. 233 */ 234 char * 235 rxp_expand() 236 { 237 return (rxp__expand(TRUE)); 238 } 239 240 static char * 241 rxp__expand(first) 242 int first; 243 { 244 static char buf[RXP_LINE_SZ/2]; 245 static Rxp_t *rp; 246 static char *bp; 247 Rxp_t *grp_ptr; 248 char *err; 249 250 if (first) { 251 rp = rxpbuf; 252 bp = buf; 253 } 254 while (rp < rxpbuf + RXP_LINE_SZ && *rp != END) 255 switch(*rp) { 256 case LIT: 257 rp++; 258 *bp++ = *rp++; 259 break; 260 case GRP_S: 261 rp++; 262 grp_ptr = rxpbuf + *rp; 263 rp++; 264 if ((err = rxp__expand(FALSE)) == NULL) 265 return (err); 266 rp = grp_ptr; 267 break; 268 case ALT_E: 269 return (buf); 270 case ALT_S: 271 rp++; 272 /* FALLTHROUGH */ 273 case SOT: 274 case EOT: 275 case GRP_E: 276 rp++; 277 break; 278 default: 279 return (NULL); 280 } 281 if (first) { 282 if (*rp != END) 283 return (NULL); 284 *bp = '\0'; 285 } 286 return (buf); 287 } 288