1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 34 #ifndef lint 35 static const char copyright[] = 36 "@(#) Copyright (c) 1980, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38 #endif 39 40 41 #include <ctype.h> 42 #include <stdlib.h> 43 #include <stdbool.h> 44 #include <string.h> 45 46 #include "extern.h" 47 48 static void expconv(void); 49 50 bool _escaped; /* true if we are currently x_escaped */ 51 char *s_start; /* start of string */ 52 bool l_onecase; /* true if upper and lower equivalent */ 53 54 #define makelower(c) (isupper((c)) ? tolower((c)) : (c)) 55 56 /* STRNCMP - like strncmp except that we convert the 57 * first string to lower case before comparing 58 * if l_onecase is set. 59 */ 60 61 int 62 STRNCMP(register char *s1, register char *s2, register int len) 63 { 64 if (l_onecase) { 65 do 66 if (*s2 - makelower(*s1)) 67 return (*s2 - makelower(*s1)); 68 else { 69 s2++; 70 s1++; 71 } 72 while (--len); 73 } else { 74 do 75 if (*s2 - *s1) 76 return (*s2 - *s1); 77 else { 78 s2++; 79 s1++; 80 } 81 while (--len); 82 } 83 return(0); 84 } 85 86 /* The following routine converts an irregular expression to 87 * internal format. 88 * 89 * Either meta symbols (\a \d or \p) or character strings or 90 * operations ( alternation or parenthesizing ) can be 91 * specified. Each starts with a descriptor byte. The descriptor 92 * byte has STR set for strings, META set for meta symbols 93 * and OPER set for operations. 94 * The descriptor byte can also have the OPT bit set if the object 95 * defined is optional. Also ALT can be set to indicate an alternation. 96 * 97 * For metasymbols the byte following the descriptor byte identities 98 * the meta symbol (containing an ascii 'a', 'd', 'p', '|', or '('). For 99 * strings the byte after the descriptor is a character count for 100 * the string: 101 * 102 * meta symbols := descriptor 103 * symbol 104 * 105 * strings := descriptor 106 * character count 107 * the string 108 * 109 * operations := descriptor 110 * symbol 111 * character count 112 */ 113 114 /* 115 * handy macros for accessing parts of match blocks 116 */ 117 #define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */ 118 #define MNEXT(A) (A+2) /* character following a metasymbol block */ 119 120 #define OSYM(A) (*(A+1)) /* symbol in an operation block */ 121 #define OCNT(A) (*(A+2)) /* character count */ 122 #define ONEXT(A) (A+3) /* next character after the operation */ 123 #define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */ 124 125 #define SCNT(A) (*(A+1)) /* byte count of a string */ 126 #define SSTR(A) (A+2) /* address of the string */ 127 #define SNEXT(A) (A+2+*(A+1)) /* character following the string */ 128 129 /* 130 * bit flags in the descriptor 131 */ 132 #define OPT 1 133 #define STR 2 134 #define META 4 135 #define ALT 8 136 #define OPER 16 137 138 static char *ccre; /* pointer to current position in converted exp*/ 139 static char *ure; /* pointer current position in unconverted exp */ 140 141 /* re: unconverted irregular expression */ 142 char * 143 convexp(char *re) 144 { 145 register char *cre; /* pointer to converted regular expression */ 146 147 /* allocate room for the converted expression */ 148 if (re == NULL) 149 return (NULL); 150 if (*re == '\0') 151 return (NULL); 152 cre = malloc(4 * strlen(re) + 3); 153 ccre = cre; 154 ure = re; 155 156 /* start the conversion with a \a */ 157 *cre = META | OPT; 158 MSYM(cre) = 'a'; 159 ccre = MNEXT(cre); 160 161 /* start the conversion (its recursive) */ 162 expconv (); 163 *ccre = 0; 164 return (cre); 165 } 166 167 static void 168 expconv() 169 { 170 register char *cs; /* pointer to current symbol in converted exp */ 171 register char c; /* character being processed */ 172 register char *acs; /* pinter to last alternate */ 173 register int temp; 174 175 /* let the conversion begin */ 176 acs = NULL; 177 cs = NULL; 178 while (*ure) { 179 switch (c = *ure++) { 180 181 case '\\': 182 switch (c = *ure++) { 183 184 /* escaped characters are just characters */ 185 default: 186 if (cs == NULL || (*cs & STR) == 0) { 187 cs = ccre; 188 *cs = STR; 189 SCNT(cs) = 1; 190 ccre += 2; 191 } else 192 SCNT(cs)++; 193 *ccre++ = c; 194 break; 195 196 /* normal(?) metacharacters */ 197 case 'a': 198 case 'd': 199 case 'e': 200 case 'p': 201 if (acs != NULL && acs != cs) { 202 do { 203 temp = OCNT(acs); 204 OCNT(acs) = ccre - acs; 205 acs -= temp; 206 } while (temp != 0); 207 acs = NULL; 208 } 209 cs = ccre; 210 *cs = META; 211 MSYM(cs) = c; 212 ccre = MNEXT(cs); 213 break; 214 } 215 break; 216 217 /* just put the symbol in */ 218 case '^': 219 case '$': 220 if (acs != NULL && acs != cs) { 221 do { 222 temp = OCNT(acs); 223 OCNT(acs) = ccre - acs; 224 acs -= temp; 225 } while (temp != 0); 226 acs = NULL; 227 } 228 cs = ccre; 229 *cs = META; 230 MSYM(cs) = c; 231 ccre = MNEXT(cs); 232 break; 233 234 /* mark the last match sequence as optional */ 235 case '?': 236 if (cs) 237 *cs = *cs | OPT; 238 break; 239 240 /* recurse and define a subexpression */ 241 case '(': 242 if (acs != NULL && acs != cs) { 243 do { 244 temp = OCNT(acs); 245 OCNT(acs) = ccre - acs; 246 acs -= temp; 247 } while (temp != 0); 248 acs = NULL; 249 } 250 cs = ccre; 251 *cs = OPER; 252 OSYM(cs) = '('; 253 ccre = ONEXT(cs); 254 expconv(); 255 OCNT(cs) = ccre - cs; /* offset to next symbol */ 256 break; 257 258 /* return from a recursion */ 259 case ')': 260 if (acs != NULL) { 261 do { 262 temp = OCNT(acs); 263 OCNT(acs) = ccre - acs; 264 acs -= temp; 265 } while (temp != 0); 266 acs = NULL; 267 } 268 cs = ccre; 269 *cs = META; 270 MSYM(cs) = c; 271 ccre = MNEXT(cs); 272 return; 273 274 /* mark the last match sequence as having an alternate */ 275 /* the third byte will contain an offset to jump over the */ 276 /* alternate match in case the first did not fail */ 277 case '|': 278 if (acs != NULL && acs != cs) 279 OCNT(ccre) = ccre - acs; /* make a back pointer */ 280 else 281 OCNT(ccre) = 0; 282 *cs |= ALT; 283 cs = ccre; 284 *cs = OPER; 285 OSYM(cs) = '|'; 286 ccre = ONEXT(cs); 287 acs = cs; /* remember that the pointer is to be filles */ 288 break; 289 290 /* if its not a metasymbol just build a scharacter string */ 291 default: 292 if (cs == NULL || (*cs & STR) == 0) { 293 cs = ccre; 294 *cs = STR; 295 SCNT(cs) = 1; 296 ccre = SSTR(cs); 297 } else 298 SCNT(cs)++; 299 *ccre++ = c; 300 break; 301 } 302 } 303 if (acs != NULL) { 304 do { 305 temp = OCNT(acs); 306 OCNT(acs) = ccre - acs; 307 acs -= temp; 308 } while (temp != 0); 309 acs = NULL; 310 } 311 return; 312 } 313 /* end of convertre */ 314 315 316 /* 317 * The following routine recognises an irregular expression 318 * with the following special characters: 319 * 320 * \? - means last match was optional 321 * \a - matches any number of characters 322 * \d - matches any number of spaces and tabs 323 * \p - matches any number of alphanumeric 324 * characters. The 325 * characters matched will be copied into 326 * the area pointed to by 'name'. 327 * \| - alternation 328 * \( \) - grouping used mostly for alternation and 329 * optionality 330 * 331 * The irregular expression must be translated to internal form 332 * prior to calling this routine 333 * 334 * The value returned is the pointer to the first non \a 335 * character matched. 336 */ 337 338 /* 339 * s: string to check for a match in 340 * re: a converted irregular expression 341 * mstring: where to put whatever matches a \p 342 */ 343 char * 344 expmatch (register char *s, register char *re, register char *mstring) 345 { 346 register char *cs; /* the current symbol */ 347 register char *ptr,*s1; /* temporary pointer */ 348 bool matched; /* a temporary bool */ 349 350 /* initial conditions */ 351 if (re == NULL) 352 return (NULL); 353 cs = re; 354 matched = false; 355 356 /* loop till expression string is exhausted (or at least pretty tired) */ 357 while (*cs) { 358 switch (*cs & (OPER | STR | META)) { 359 360 /* try to match a string */ 361 case STR: 362 matched = !STRNCMP (s, SSTR(cs), SCNT(cs)); 363 if (matched) { 364 365 /* hoorah it matches */ 366 s += SCNT(cs); 367 cs = SNEXT(cs); 368 } else if (*cs & ALT) { 369 370 /* alternation, skip to next expression */ 371 cs = SNEXT(cs); 372 } else if (*cs & OPT) { 373 374 /* the match is optional */ 375 cs = SNEXT(cs); 376 matched = 1; /* indicate a successful match */ 377 } else { 378 379 /* no match, error return */ 380 return (NULL); 381 } 382 break; 383 384 /* an operator, do something fancy */ 385 case OPER: 386 switch (OSYM(cs)) { 387 388 /* this is an alternation */ 389 case '|': 390 if (matched) 391 392 /* last thing in the alternation was a match, skip ahead */ 393 cs = OPTR(cs); 394 else 395 396 /* no match, keep trying */ 397 cs = ONEXT(cs); 398 break; 399 400 /* this is a grouping, recurse */ 401 case '(': 402 ptr = expmatch(s, ONEXT(cs), mstring); 403 if (ptr != NULL) { 404 405 /* the subexpression matched */ 406 matched = 1; 407 s = ptr; 408 } else if (*cs & ALT) { 409 410 /* alternation, skip to next expression */ 411 matched = 0; 412 } else if (*cs & OPT) { 413 414 /* the match is optional */ 415 matched = 1; /* indicate a successful match */ 416 } else { 417 418 /* no match, error return */ 419 return (NULL); 420 } 421 cs = OPTR(cs); 422 break; 423 } 424 break; 425 426 /* try to match a metasymbol */ 427 case META: 428 switch (MSYM(cs)) { 429 430 /* try to match anything and remember what was matched */ 431 case 'p': 432 /* 433 * This is really the same as trying the match the 434 * remaining parts of the expression to any subset 435 * of the string. 436 */ 437 s1 = s; 438 do { 439 ptr = expmatch(s1, MNEXT(cs), mstring); 440 if (ptr != NULL && s1 != s) { 441 442 /* we have a match, remember the match */ 443 strncpy (mstring, s, s1 - s); 444 mstring[s1 - s] = '\0'; 445 return (ptr); 446 } else if (ptr != NULL && (*cs & OPT)) { 447 448 /* it was aoptional so no match is ok */ 449 return (ptr); 450 } else if (ptr != NULL) { 451 452 /* not optional and we still matched */ 453 return (NULL); 454 } 455 if (!(isalnum(*s1) || *s1 == '_' || 456 /* C++ destructor */ 457 *s1 == '~' || 458 /* C++ scope operator */ 459 (strlen(s1) > 1 && *s1 == ':' && s1[1] == ':' && 460 (s1++, true)))) 461 return (NULL); 462 if (*s1 == '\\') 463 _escaped = _escaped ? false : true; 464 else 465 _escaped = false; 466 } while (*s1++); 467 return (NULL); 468 469 /* try to match anything */ 470 case 'a': 471 /* 472 * This is really the same as trying the match the 473 * remaining parts of the expression to any subset 474 * of the string. 475 */ 476 s1 = s; 477 do { 478 ptr = expmatch(s1, MNEXT(cs), mstring); 479 if (ptr != NULL && s1 != s) { 480 481 /* we have a match */ 482 return (ptr); 483 } else if (ptr != NULL && (*cs & OPT)) { 484 485 /* it was aoptional so no match is ok */ 486 return (ptr); 487 } else if (ptr != NULL) { 488 489 /* not optional and we still matched */ 490 return (NULL); 491 } 492 if (*s1 == '\\') 493 _escaped = _escaped ? false : true; 494 else 495 _escaped = false; 496 } while (*s1++); 497 return (NULL); 498 499 /* fail if we are currently _escaped */ 500 case 'e': 501 if (_escaped) 502 return(NULL); 503 cs = MNEXT(cs); 504 break; 505 506 /* match any number of tabs and spaces */ 507 case 'd': 508 ptr = s; 509 while (*s == ' ' || *s == '\t') 510 s++; 511 if (s != ptr || s == s_start) { 512 513 /* match, be happy */ 514 matched = 1; 515 cs = MNEXT(cs); 516 } else if (*s == '\n' || *s == '\0') { 517 518 /* match, be happy */ 519 matched = 1; 520 cs = MNEXT(cs); 521 } else if (*cs & ALT) { 522 523 /* try the next part */ 524 matched = 0; 525 cs = MNEXT(cs); 526 } else if (*cs & OPT) { 527 528 /* doesn't matter */ 529 matched = 1; 530 cs = MNEXT(cs); 531 } else 532 533 /* no match, error return */ 534 return (NULL); 535 break; 536 537 /* check for end of line */ 538 case '$': 539 if (*s == '\0' || *s == '\n') { 540 541 /* match, be happy */ 542 s++; 543 matched = 1; 544 cs = MNEXT(cs); 545 } else if (*cs & ALT) { 546 547 /* try the next part */ 548 matched = 0; 549 cs = MNEXT(cs); 550 } else if (*cs & OPT) { 551 552 /* doesn't matter */ 553 matched = 1; 554 cs = MNEXT(cs); 555 } else 556 557 /* no match, error return */ 558 return (NULL); 559 break; 560 561 /* check for start of line */ 562 case '^': 563 if (s == s_start) { 564 565 /* match, be happy */ 566 matched = 1; 567 cs = MNEXT(cs); 568 } else if (*cs & ALT) { 569 570 /* try the next part */ 571 matched = 0; 572 cs = MNEXT(cs); 573 } else if (*cs & OPT) { 574 575 /* doesn't matter */ 576 matched = 1; 577 cs = MNEXT(cs); 578 } else 579 580 /* no match, error return */ 581 return (NULL); 582 break; 583 584 /* end of a subexpression, return success */ 585 case ')': 586 return (s); 587 } 588 break; 589 } 590 } 591 return (s); 592 } 593