1 #include <ctype.h> 2 3 typedef int boolean; 4 #define TRUE 1 5 #define FALSE 0 6 #define NIL 0 7 8 boolean l_onecase; /* true if upper and lower equivalent */ 9 10 #define makelower(c) (isupper((c)) ? tolower((c)) : (c)) 11 12 /* STRNCMP - like strncmp except that we convert the 13 * first string to lower case before comparing 14 * if l_onecase is set. 15 */ 16 17 STRNCMP(s1, s2, len) 18 register char *s1,*s2; 19 register int len; 20 { 21 if (l_onecase) { 22 do 23 if (*s2 - makelower(*s1)) 24 return (*s2 - makelower(*s1)); 25 else { 26 s2++; 27 s1++; 28 } 29 while (--len); 30 } else { 31 do 32 if (*s2 - *s1) 33 return (*s2 - *s1); 34 else { 35 s2++; 36 s1++; 37 } 38 while (--len); 39 } 40 return(0); 41 } 42 43 /* The following routine converts an irregular expression to 44 * internal format. 45 * 46 * Either meta symbols (\a \d or \p) or character strings or 47 * operations ( alternation or perenthesizing ) can be 48 * specified. Each starts with a descriptor byte. The descriptor 49 * byte has STR set for strings, META set for meta symbols 50 * and OPER set for operations. 51 * The descriptor byte can also have the OPT bit set if the object 52 * defined is optional. Also ALT can be set to indicate an alternation. 53 * 54 * For metasymbols the byte following the descriptor byte identities 55 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For 56 * strings the byte after the descriptor is a character count for 57 * the string: 58 * 59 * meta symbols := descriptor 60 * symbol 61 * 62 * strings := descriptor 63 * character count 64 * the string 65 * 66 * operatins := descriptor 67 * symbol 68 * character count 69 */ 70 71 /* 72 * handy macros for accessing parts of match blocks 73 */ 74 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */ 75 #define MNEXT(A) (A+2) /* character following a metasymbol block */ 76 77 #define OSYM(A) (*(A+1)) /* symbol in an operation block */ 78 #define OCNT(A) (*(A+2)) /* character count */ 79 #define ONEXT(A) (A+3) /* next character after the operation */ 80 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */ 81 82 #define SCNT(A) (*(A+1)) /* byte count of a string */ 83 #define SSTR(A) (A+2) /* address of the string */ 84 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */ 85 86 /* 87 * bit flags in the descriptor 88 */ 89 #define OPT 1 90 #define STR 2 91 #define META 4 92 #define ALT 8 93 #define OPER 16 94 95 char *ure; /* pointer current position in unconverted exp */ 96 char *ccre; /* pointer to current position in converted exp*/ 97 char *malloc(); 98 99 char * 100 convexp(re) 101 char *re; /* unconverted irregular expression */ 102 { 103 register char *cre; /* pointer to converted regular expression */ 104 105 /* allocate room for the converted expression */ 106 if (re == NIL) 107 return (NIL); 108 if (*re == '\0') 109 return (NIL); 110 cre = malloc (4 * strlen(re) + 3); 111 ccre = cre; 112 ure = re; 113 114 /* start the conversion with a \a */ 115 *cre = META | OPT; 116 MSYM(cre) = 'a'; 117 ccre = MNEXT(cre); 118 119 /* start the conversion (its recursive) */ 120 expconv (); 121 *ccre = 0; 122 return (cre); 123 } 124 125 expconv() 126 { 127 register char *cs; /* pointer to current symbol in converted exp */ 128 register char c; /* character being processed */ 129 register char *acs; /* pinter to last alternate */ 130 register int temp; 131 132 /* let the conversion begin */ 133 acs = NIL; 134 while (*ure != NIL) { 135 switch (c = *ure++) { 136 137 case '\\': 138 switch (c = *ure++) { 139 140 /* escaped characters are just characters */ 141 default: 142 if ((*cs & STR) == 0) { 143 cs = ccre; 144 *cs = STR; 145 SCNT(cs) = 1; 146 ccre += 2; 147 } else 148 SCNT(cs)++; 149 *ccre++ = c; 150 break; 151 152 /* normal(?) metacharacters */ 153 case 'a': 154 case 'd': 155 case 'e': 156 case 'p': 157 if (acs != NIL && acs != cs) { 158 do { 159 temp = OCNT(acs); 160 OCNT(acs) = ccre - acs; 161 acs -= temp; 162 } while (temp != 0); 163 acs = NIL; 164 } 165 cs = ccre; 166 *cs = META; 167 MSYM(cs) = c; 168 ccre = MNEXT(cs); 169 break; 170 } 171 break; 172 173 /* just put the symbol in */ 174 case '^': 175 case '$': 176 if (acs != NIL && acs != cs) { 177 do { 178 temp = OCNT(acs); 179 OCNT(acs) = ccre - acs; 180 acs -= temp; 181 } while (temp != 0); 182 acs = NIL; 183 } 184 cs = ccre; 185 *cs = META; 186 MSYM(cs) = c; 187 ccre = MNEXT(cs); 188 break; 189 190 /* mark the last match sequence as optional */ 191 case '?': 192 *cs = *cs | OPT; 193 break; 194 195 /* recurse and define a subexpression */ 196 case '(': 197 if (acs != NIL && acs != cs) { 198 do { 199 temp = OCNT(acs); 200 OCNT(acs) = ccre - acs; 201 acs -= temp; 202 } while (temp != 0); 203 acs = NIL; 204 } 205 cs = ccre; 206 *cs = OPER; 207 OSYM(cs) = '('; 208 ccre = ONEXT(cs); 209 expconv (); 210 OCNT(cs) = ccre - cs; /* offset to next symbol */ 211 break; 212 213 /* return from a recursion */ 214 case ')': 215 if (acs != NIL) { 216 do { 217 temp = OCNT(acs); 218 OCNT(acs) = ccre - acs; 219 acs -= temp; 220 } while (temp != 0); 221 acs = NIL; 222 } 223 cs = ccre; 224 *cs = META; 225 MSYM(cs) = c; 226 ccre = MNEXT(cs); 227 return; 228 229 /* mark the last match sequence as having an alternate */ 230 /* the third byte will contain an offset to jump over the */ 231 /* alternate match in case the first did not fail */ 232 case '|': 233 if (acs != NIL && acs != cs) 234 OCNT(ccre) = ccre - acs; /* make a back pointer */ 235 else 236 OCNT(ccre) = 0; 237 *cs |= ALT; 238 cs = ccre; 239 *cs = OPER; 240 OSYM(cs) = '|'; 241 ccre = ONEXT(cs); 242 acs = cs; /* remember that the pointer is to be filles */ 243 break; 244 245 /* if its not a metasymbol just build a scharacter string */ 246 default: 247 if ((*cs & STR) == 0) { 248 cs = ccre; 249 *cs = STR; 250 SCNT(cs) = 1; 251 ccre = SSTR(cs); 252 } else 253 SCNT(cs)++; 254 *ccre++ = c; 255 break; 256 } 257 } 258 if (acs != NIL) { 259 do { 260 temp = OCNT(acs); 261 OCNT(acs) = ccre - acs; 262 acs -= temp; 263 } while (temp != 0); 264 acs = NIL; 265 } 266 return; 267 } 268 /* end of convertre */ 269 270 271 /* 272 * The following routine recognises an irregular expresion 273 * with the following special characters: 274 * 275 * \? - means last match was optional 276 * \a - matches any number of characters 277 * \d - matches any number of spaces and tabs 278 * \p - matches any number of alphanumeric 279 * characters. The 280 * characters matched will be copied into 281 * the area pointed to by 'name'. 282 * \| - alternation 283 * \( \) - grouping used mostly for alternation and 284 * optionality 285 * 286 * The irregular expression must be translated to internal form 287 * prior to calling this routine 288 * 289 * The value returned is the pointer to the first non \a 290 * character matched. 291 */ 292 293 boolean _escaped; /* true if we are currently _escaped */ 294 char *_start; /* start of string */ 295 296 char * 297 expmatch (s, re, mstring) 298 register char *s; /* string to check for a match in */ 299 register char *re; /* a converted irregular expression */ 300 register char *mstring; /* where to put whatever matches a \p */ 301 { 302 register char *cs; /* the current symbol */ 303 register char *ptr,*s1; /* temporary pointer */ 304 boolean matched; /* a temporary boolean */ 305 306 /* initial conditions */ 307 if (re == NIL) 308 return (NIL); 309 cs = re; 310 matched = FALSE; 311 312 /* loop till expression string is exhausted (or at least pretty tired) */ 313 while (*cs) { 314 switch (*cs & (OPER | STR | META)) { 315 316 /* try to match a string */ 317 case STR: 318 matched = !STRNCMP (s, SSTR(cs), SCNT(cs)); 319 if (matched) { 320 321 /* hoorah it matches */ 322 s += SCNT(cs); 323 cs = SNEXT(cs); 324 } else if (*cs & ALT) { 325 326 /* alternation, skip to next expression */ 327 cs = SNEXT(cs); 328 } else if (*cs & OPT) { 329 330 /* the match is optional */ 331 cs = SNEXT(cs); 332 matched = 1; /* indicate a successful match */ 333 } else { 334 335 /* no match, error return */ 336 return (NIL); 337 } 338 break; 339 340 /* an operator, do something fancy */ 341 case OPER: 342 switch (OSYM(cs)) { 343 344 /* this is an alternation */ 345 case '|': 346 if (matched) 347 348 /* last thing in the alternation was a match, skip ahead */ 349 cs = OPTR(cs); 350 else 351 352 /* no match, keep trying */ 353 cs = ONEXT(cs); 354 break; 355 356 /* this is a grouping, recurse */ 357 case '(': 358 ptr = expmatch (s, ONEXT(cs), mstring); 359 if (ptr != NIL) { 360 361 /* the subexpression matched */ 362 matched = 1; 363 s = ptr; 364 } else if (*cs & ALT) { 365 366 /* alternation, skip to next expression */ 367 matched = 0; 368 } else if (*cs & OPT) { 369 370 /* the match is optional */ 371 matched = 1; /* indicate a successful match */ 372 } else { 373 374 /* no match, error return */ 375 return (NIL); 376 } 377 cs = OPTR(cs); 378 break; 379 } 380 break; 381 382 /* try to match a metasymbol */ 383 case META: 384 switch (MSYM(cs)) { 385 386 /* try to match anything and remember what was matched */ 387 case 'p': 388 /* 389 * This is really the same as trying the match the 390 * remaining parts of the expression to any subset 391 * of the string. 392 */ 393 s1 = s; 394 do { 395 ptr = expmatch (s1, MNEXT(cs), mstring); 396 if (ptr != NIL && s1 != s) { 397 398 /* we have a match, remember the match */ 399 strncpy (mstring, s, s1 - s); 400 mstring[s1 - s] = '\0'; 401 return (ptr); 402 } else if (ptr != NIL && (*cs & OPT)) { 403 404 /* it was aoptional so no match is ok */ 405 return (ptr); 406 } else if (ptr != NIL) { 407 408 /* not optional and we still matched */ 409 return (NIL); 410 } 411 if (!isalnum(*s1) && *s1 != '_') 412 return (NIL); 413 if (*s1 == '\\') 414 _escaped = _escaped ? FALSE : TRUE; 415 else 416 _escaped = FALSE; 417 } while (*s1++); 418 return (NIL); 419 420 /* try to match anything */ 421 case 'a': 422 /* 423 * This is really the same as trying the match the 424 * remaining parts of the expression to any subset 425 * of the string. 426 */ 427 s1 = s; 428 do { 429 ptr = expmatch (s1, MNEXT(cs), mstring); 430 if (ptr != NIL && s1 != s) { 431 432 /* we have a match */ 433 return (ptr); 434 } else if (ptr != NIL && (*cs & OPT)) { 435 436 /* it was aoptional so no match is ok */ 437 return (ptr); 438 } else if (ptr != NIL) { 439 440 /* not optional and we still matched */ 441 return (NIL); 442 } 443 if (*s1 == '\\') 444 _escaped = _escaped ? FALSE : TRUE; 445 else 446 _escaped = FALSE; 447 } while (*s1++); 448 return (NIL); 449 450 /* fail if we are currently _escaped */ 451 case 'e': 452 if (_escaped) 453 return(NIL); 454 cs = MNEXT(cs); 455 break; 456 457 /* match any number of tabs and spaces */ 458 case 'd': 459 ptr = s; 460 while (*s == ' ' || *s == '\t') 461 s++; 462 if (s != ptr || s == _start) { 463 464 /* match, be happy */ 465 matched = 1; 466 cs = MNEXT(cs); 467 } else if (*s == '\n' || *s == '\0') { 468 469 /* match, be happy */ 470 matched = 1; 471 cs = MNEXT(cs); 472 } else if (*cs & ALT) { 473 474 /* try the next part */ 475 matched = 0; 476 cs = MNEXT(cs); 477 } else if (*cs & OPT) { 478 479 /* doesn't matter */ 480 matched = 1; 481 cs = MNEXT(cs); 482 } else 483 484 /* no match, error return */ 485 return (NIL); 486 break; 487 488 /* check for end of line */ 489 case '$': 490 if (*s == '\0' || *s == '\n') { 491 492 /* match, be happy */ 493 s++; 494 matched = 1; 495 cs = MNEXT(cs); 496 } else if (*cs & ALT) { 497 498 /* try the next part */ 499 matched = 0; 500 cs = MNEXT(cs); 501 } else if (*cs & OPT) { 502 503 /* doesn't matter */ 504 matched = 1; 505 cs = MNEXT(cs); 506 } else 507 508 /* no match, error return */ 509 return (NIL); 510 break; 511 512 /* check for start of line */ 513 case '^': 514 if (s == _start) { 515 516 /* match, be happy */ 517 matched = 1; 518 cs = MNEXT(cs); 519 } else if (*cs & ALT) { 520 521 /* try the next part */ 522 matched = 0; 523 cs = MNEXT(cs); 524 } else if (*cs & OPT) { 525 526 /* doesn't matter */ 527 matched = 1; 528 cs = MNEXT(cs); 529 } else 530 531 /* no match, error return */ 532 return (NIL); 533 break; 534 535 /* end of a subexpression, return success */ 536 case ')': 537 return (s); 538 } 539 break; 540 } 541 } 542 return (s); 543 } 544