1 # include "sendmail.h" 2 3 SCCSID(@(#)parseaddr.c 3.40 03/20/82); 4 5 /* 6 ** PARSE -- Parse an address 7 ** 8 ** Parses an address and breaks it up into three parts: a 9 ** net to transmit the message on, the host to transmit it 10 ** to, and a user on that host. These are loaded into an 11 ** ADDRESS header with the values squirreled away if necessary. 12 ** The "user" part may not be a real user; the process may 13 ** just reoccur on that machine. For example, on a machine 14 ** with an arpanet connection, the address 15 ** csvax.bill@berkeley 16 ** will break up to a "user" of 'csvax.bill' and a host 17 ** of 'berkeley' -- to be transmitted over the arpanet. 18 ** 19 ** Parameters: 20 ** addr -- the address to parse. 21 ** a -- a pointer to the address descriptor buffer. 22 ** If NULL, a header will be created. 23 ** copyf -- determines what shall be copied: 24 ** -1 -- don't copy anything. The printname 25 ** (q_paddr) is just addr, and the 26 ** user & host are allocated internally 27 ** to parse. 28 ** 0 -- copy out the parsed user & host, but 29 ** don't copy the printname. 30 ** +1 -- copy everything. 31 ** 32 ** Returns: 33 ** A pointer to the address descriptor header (`a' if 34 ** `a' is non-NULL). 35 ** NULL on error. 36 ** 37 ** Side Effects: 38 ** none 39 */ 40 41 # define DELIMCHARS "$()<>,;\\\"\r\n" /* word delimiters */ 42 43 ADDRESS * 44 parse(addr, a, copyf) 45 char *addr; 46 register ADDRESS *a; 47 int copyf; 48 { 49 register char **pvp; 50 register struct mailer *m; 51 extern char **prescan(); 52 extern ADDRESS *buildaddr(); 53 54 /* 55 ** Initialize and prescan address. 56 */ 57 58 To = addr; 59 # ifdef DEBUG 60 if (Debug) 61 printf("\n--parse(%s)\n", addr); 62 # endif DEBUG 63 64 pvp = prescan(addr, '\0'); 65 if (pvp == NULL) 66 return (NULL); 67 68 /* 69 ** Apply rewriting rules. 70 */ 71 72 rewrite(pvp, 0); 73 74 /* 75 ** See if we resolved to a real mailer. 76 */ 77 78 if (pvp[0][0] != CANONNET) 79 { 80 setstat(EX_USAGE); 81 usrerr("cannot resolve name"); 82 return (NULL); 83 } 84 85 /* 86 ** Build canonical address from pvp. 87 */ 88 89 a = buildaddr(pvp, a); 90 if (a == NULL) 91 return (NULL); 92 m = a->q_mailer; 93 94 /* 95 ** Make local copies of the host & user and then 96 ** transport them out. 97 */ 98 99 if (copyf > 0) 100 a->q_paddr = newstr(addr); 101 else 102 a->q_paddr = addr; 103 104 if (copyf >= 0) 105 { 106 if (a->q_host != NULL) 107 a->q_host = newstr(a->q_host); 108 else 109 a->q_host = ""; 110 if (a->q_user != a->q_paddr) 111 a->q_user = newstr(a->q_user); 112 } 113 114 /* 115 ** Do UPPER->lower case mapping unless inhibited. 116 */ 117 118 if (!bitset(M_HST_UPPER, m->m_flags)) 119 makelower(a->q_host); 120 if (!bitset(M_USR_UPPER, m->m_flags)) 121 makelower(a->q_user); 122 123 /* 124 ** Compute return value. 125 */ 126 127 # ifdef DEBUG 128 if (Debug) 129 { 130 printf("parse-->"); 131 printaddr(a, FALSE); 132 } 133 # endif DEBUG 134 135 return (a); 136 } 137 /* 138 ** PRESCAN -- Prescan name and make it canonical 139 ** 140 ** Scans a name and turns it into canonical form. This involves 141 ** deleting blanks, comments (in parentheses), and turning the 142 ** word "at" into an at-sign ("@"). The name is copied as this 143 ** is done; it is legal to copy a name onto itself, since this 144 ** process can only make things smaller. 145 ** 146 ** This routine knows about quoted strings and angle brackets. 147 ** 148 ** There are certain subtleties to this routine. The one that 149 ** comes to mind now is that backslashes on the ends of names 150 ** are silently stripped off; this is intentional. The problem 151 ** is that some versions of sndmsg (like at LBL) set the kill 152 ** character to something other than @ when reading addresses; 153 ** so people type "csvax.eric\@berkeley" -- which screws up the 154 ** berknet mailer. 155 ** 156 ** Parameters: 157 ** addr -- the name to chomp. 158 ** delim -- the delimiter for the address, normally 159 ** '\0' or ','; \0 is accepted in any case. 160 ** are moving in place; set buflim to high core. 161 ** 162 ** Returns: 163 ** A pointer to a vector of tokens. 164 ** NULL on error. 165 ** 166 ** Side Effects: 167 ** none. 168 */ 169 170 # define OPER 1 171 # define ATOM 2 172 # define EOTOK 3 173 # define QSTRING 4 174 # define SPACE 5 175 # define ONEMORE 6 176 # define GETONE 7 177 # define MACRO 8 178 179 char ** 180 prescan(addr, delim) 181 char *addr; 182 char delim; 183 { 184 register char *p; 185 static char buf[MAXNAME+MAXATOM]; 186 static char *av[MAXATOM+1]; 187 char **avp; 188 bool bslashmode; 189 int cmntcnt; 190 int brccnt; 191 register char c; 192 char *tok; 193 register char *q; 194 register int state; 195 int nstate; 196 extern char lower(); 197 198 q = buf; 199 bslashmode = FALSE; 200 cmntcnt = brccnt = 0; 201 avp = av; 202 state = OPER; 203 for (p = addr; *p != '\0' && *p != delim; ) 204 { 205 /* read a token */ 206 tok = q; 207 while ((c = *p++) != '\0' && c != delim) 208 { 209 /* chew up special characters */ 210 c &= ~0200; 211 *q = '\0'; 212 if (bslashmode) 213 { 214 c |= 0200; 215 bslashmode = FALSE; 216 } 217 else if (c == '\\') 218 { 219 bslashmode = TRUE; 220 continue; 221 } 222 else if (c == '"') 223 { 224 if (state == QSTRING) 225 state = OPER; 226 else 227 state = QSTRING; 228 break; 229 } 230 231 nstate = toktype(c); 232 switch (state) 233 { 234 case QSTRING: /* in quoted string */ 235 break; 236 237 case ATOM: /* regular atom */ 238 if (nstate != ATOM) 239 { 240 state = EOTOK; 241 p--; 242 } 243 break; 244 245 case GETONE: /* grab one character */ 246 state = OPER; 247 break; 248 249 case EOTOK: /* after atom or q-string */ 250 state = nstate; 251 if (state == SPACE) 252 continue; 253 break; 254 255 case SPACE: /* linear white space */ 256 state = nstate; 257 break; 258 259 case OPER: /* operator */ 260 if (nstate == SPACE) 261 continue; 262 state = nstate; 263 break; 264 265 case ONEMORE: /* $- etc. */ 266 state = GETONE; 267 break; 268 269 default: 270 syserr("prescan: unknown state %d", state); 271 } 272 273 if (state == EOTOK || state == SPACE) 274 break; 275 276 /* squirrel it away */ 277 if (q >= &buf[sizeof buf - 5]) 278 { 279 usrerr("Address too long"); 280 return (NULL); 281 } 282 *q++ = c; 283 284 /* decide whether this represents end of token */ 285 if (state == OPER || state == GETONE) 286 break; 287 } 288 if (c == '\0' || c == delim) 289 p--; 290 291 /* new token */ 292 if (tok == q) 293 continue; 294 *q++ = '\0'; 295 296 c = tok[0]; 297 if (c == '(') 298 { 299 cmntcnt++; 300 continue; 301 } 302 else if (c == ')') 303 { 304 if (cmntcnt <= 0) 305 { 306 usrerr("Unbalanced ')'"); 307 return (NULL); 308 } 309 else 310 { 311 cmntcnt--; 312 continue; 313 } 314 } 315 else if (cmntcnt > 0) 316 continue; 317 318 /* we prefer <> specs */ 319 if (c == '<') 320 { 321 if (brccnt < 0) 322 { 323 usrerr("multiple < spec"); 324 return (NULL); 325 } 326 brccnt++; 327 if (brccnt == 1) 328 { 329 /* we prefer using machine readable name */ 330 q = buf; 331 *q = '\0'; 332 avp = av; 333 continue; 334 } 335 } 336 else if (c == '>') 337 { 338 if (brccnt <= 0) 339 { 340 usrerr("Unbalanced `>'"); 341 return (NULL); 342 } 343 else 344 brccnt--; 345 if (brccnt <= 0) 346 { 347 brccnt = -1; 348 continue; 349 } 350 } 351 352 if (avp >= &av[MAXATOM]) 353 { 354 syserr("prescan: too many tokens"); 355 return (NULL); 356 } 357 *avp++ = tok; 358 } 359 *avp = NULL; 360 if (cmntcnt > 0) 361 usrerr("Unbalanced '('"); 362 else if (brccnt > 0) 363 usrerr("Unbalanced '<'"); 364 else if (state == QSTRING) 365 usrerr("Unbalanced '\"'"); 366 else if (av[0] != NULL) 367 return (av); 368 return (NULL); 369 } 370 /* 371 ** TOKTYPE -- return token type 372 ** 373 ** Parameters: 374 ** c -- the character in question. 375 ** 376 ** Returns: 377 ** Its type. 378 ** 379 ** Side Effects: 380 ** none. 381 */ 382 383 toktype(c) 384 register char c; 385 { 386 static char buf[50]; 387 static bool firstime = TRUE; 388 389 if (firstime) 390 { 391 firstime = FALSE; 392 (void) expand("$o", buf, &buf[sizeof buf - 1]); 393 strcat(buf, DELIMCHARS); 394 } 395 if (c == MATCHCLASS || c == MATCHREPL) 396 return (ONEMORE); 397 if (!isascii(c)) 398 return (ATOM); 399 if (isspace(c)) 400 return (SPACE); 401 if (iscntrl(c) || index(buf, c) != NULL) 402 return (OPER); 403 return (ATOM); 404 } 405 /* 406 ** REWRITE -- apply rewrite rules to token vector. 407 ** 408 ** This routine is an ordered production system. Each rewrite 409 ** rule has a LHS (called the pattern) and a RHS (called the 410 ** rewrite); 'rwr' points the the current rewrite rule. 411 ** 412 ** For each rewrite rule, 'avp' points the address vector we 413 ** are trying to match against, and 'pvp' points to the pattern. 414 ** If pvp points to a special match value (MATCHANY, MATCHONE, 415 ** MATCHCLASS) then the address in avp matched is saved away 416 ** in the match vector (pointed to by 'mvp'). 417 ** 418 ** When a match between avp & pvp does not match, we try to 419 ** back out. If we back up over a MATCHONE or a MATCHCLASS 420 ** we must also back out the match in mvp. If we reach a 421 ** MATCHANY we just extend the match and start over again. 422 ** 423 ** When we finally match, we rewrite the address vector 424 ** and try over again. 425 ** 426 ** Parameters: 427 ** pvp -- pointer to token vector. 428 ** 429 ** Returns: 430 ** none. 431 ** 432 ** Side Effects: 433 ** pvp is modified. 434 */ 435 436 struct match 437 { 438 char **first; /* first token matched */ 439 char **last; /* last token matched */ 440 }; 441 442 # define MAXMATCH 9 /* max params per rewrite */ 443 444 445 rewrite(pvp, ruleset) 446 char **pvp; 447 int ruleset; 448 { 449 register char *ap; /* address pointer */ 450 register char *rp; /* rewrite pointer */ 451 register char **avp; /* address vector pointer */ 452 register char **rvp; /* rewrite vector pointer */ 453 struct rewrite *rwr; /* pointer to current rewrite rule */ 454 struct match mlist[MAXMATCH]; /* stores match on LHS */ 455 struct match *mlp; /* cur ptr into mlist */ 456 char *npvp[MAXATOM+1]; /* temporary space for rebuild */ 457 extern bool sameword(); 458 459 # ifdef DEBUG 460 if (Debug > 9) 461 { 462 printf("rewrite: original pvp:\n"); 463 printav(pvp); 464 } 465 # endif DEBUG 466 467 /* 468 ** Run through the list of rewrite rules, applying 469 ** any that match. 470 */ 471 472 for (rwr = RewriteRules[ruleset]; rwr != NULL; ) 473 { 474 # ifdef DEBUG 475 if (Debug > 10) 476 { 477 printf("-----trying rule:\n"); 478 printav(rwr->r_lhs); 479 } 480 # endif DEBUG 481 482 /* try to match on this rule */ 483 mlp = mlist; 484 for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; ) 485 { 486 ap = *avp; 487 rp = *rvp; 488 489 if (rp == NULL) 490 { 491 /* end-of-pattern before end-of-address */ 492 goto fail; 493 } 494 495 switch (*rp) 496 { 497 register STAB *s; 498 register int class; 499 500 case MATCHCLASS: 501 /* match any token in a class */ 502 class = rp[1]; 503 if (!isalpha(class)) 504 goto fail; 505 if (isupper(class)) 506 class -= 'A'; 507 else 508 class -= 'a'; 509 s = stab(ap, ST_CLASS, ST_FIND); 510 if (s == NULL || (s->s_class & (1L << class)) == 0) 511 goto fail; 512 513 /* explicit fall-through */ 514 515 case MATCHONE: 516 case MATCHANY: 517 /* match exactly one token */ 518 mlp->first = mlp->last = avp++; 519 mlp++; 520 break; 521 522 default: 523 /* must have exact match */ 524 if (!sameword(rp, ap)) 525 goto fail; 526 avp++; 527 break; 528 } 529 530 /* successful match on this token */ 531 rvp++; 532 continue; 533 534 fail: 535 /* match failed -- back up */ 536 while (--rvp >= rwr->r_lhs) 537 { 538 rp = *rvp; 539 if (*rp == MATCHANY) 540 { 541 /* extend binding and continue */ 542 mlp[-1].last = avp++; 543 rvp++; 544 break; 545 } 546 avp--; 547 if (*rp == MATCHONE || *rp == MATCHCLASS) 548 { 549 /* back out binding */ 550 mlp--; 551 } 552 } 553 554 if (rvp < rwr->r_lhs) 555 { 556 /* total failure to match */ 557 break; 558 } 559 } 560 561 /* 562 ** See if we successfully matched 563 */ 564 565 if (rvp >= rwr->r_lhs && *rvp == NULL) 566 { 567 # ifdef DEBUG 568 if (Debug > 10) 569 { 570 printf("-----rule matches:\n"); 571 printav(rwr->r_rhs); 572 } 573 # endif DEBUG 574 575 /* substitute */ 576 for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++) 577 { 578 rp = *rvp; 579 if (*rp == MATCHREPL) 580 { 581 register struct match *m; 582 register char **pp; 583 584 m = &mlist[rp[1] - '1']; 585 # ifdef DEBUG 586 if (Debug > 13) 587 { 588 printf("$%c:", rp[1]); 589 pp = m->first; 590 do 591 { 592 printf(" %x=\"", *pp); 593 (void) fflush(stdout); 594 printf("%s\"", *pp); 595 } while (pp++ != m->last); 596 printf("\n"); 597 } 598 # endif DEBUG 599 pp = m->first; 600 do 601 { 602 if (avp >= &npvp[MAXATOM]) 603 { 604 syserr("rewrite: expansion too long"); 605 return; 606 } 607 *avp++ = *pp; 608 } while (pp++ != m->last); 609 } 610 else 611 { 612 if (avp >= &npvp[MAXATOM]) 613 { 614 syserr("rewrite: expansion too long"); 615 return; 616 } 617 *avp++ = rp; 618 } 619 } 620 *avp++ = NULL; 621 bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp); 622 # ifdef DEBUG 623 if (Debug > 3) 624 { 625 char **vp; 626 627 printf("rewritten as `"); 628 for (vp = pvp; *vp != NULL; vp++) 629 { 630 if (vp != pvp) 631 printf("_"); 632 xputs(*vp); 633 } 634 printf("'\n"); 635 } 636 # endif DEBUG 637 if (pvp[0][0] == CANONNET) 638 break; 639 } 640 else 641 { 642 # ifdef DEBUG 643 if (Debug > 10) 644 printf("----- rule fails\n"); 645 # endif DEBUG 646 rwr = rwr->r_next; 647 } 648 } 649 } 650 /* 651 ** BUILDADDR -- build address from token vector. 652 ** 653 ** Parameters: 654 ** tv -- token vector. 655 ** a -- pointer to address descriptor to fill. 656 ** If NULL, one will be allocated. 657 ** 658 ** Returns: 659 ** NULL if there was an error. 660 ** 'a' otherwise. 661 ** 662 ** Side Effects: 663 ** fills in 'a' 664 */ 665 666 ADDRESS * 667 buildaddr(tv, a) 668 register char **tv; 669 register ADDRESS *a; 670 { 671 static char buf[MAXNAME]; 672 struct mailer **mp; 673 register struct mailer *m; 674 extern bool sameword(); 675 676 if (a == NULL) 677 a = (ADDRESS *) xalloc(sizeof *a); 678 clear((char *) a, sizeof *a); 679 680 /* figure out what net/mailer to use */ 681 if (**tv != CANONNET) 682 { 683 syserr("buildaddr: no net"); 684 return (NULL); 685 } 686 tv++; 687 if (sameword(*tv, "error")) 688 { 689 if (**++tv != CANONUSER) 690 syserr("buildaddr: error: no user"); 691 buf[0] = '\0'; 692 while (*++tv != NULL) 693 { 694 if (buf[0] != '\0') 695 strcat(buf, " "); 696 strcat(buf, *tv); 697 } 698 usrerr(buf); 699 return (NULL); 700 } 701 for (mp = Mailer; (m = *mp++) != NULL; ) 702 { 703 if (sameword(m->m_name, *tv)) 704 break; 705 } 706 if (m == NULL) 707 { 708 syserr("buildaddr: unknown net %s", *tv); 709 return (NULL); 710 } 711 a->q_mailer = m; 712 713 /* figure out what host (if any) */ 714 tv++; 715 if (!bitset(M_LOCAL, m->m_flags)) 716 { 717 if (**tv++ != CANONHOST) 718 { 719 syserr("buildaddr: no host"); 720 return (NULL); 721 } 722 buf[0] = '\0'; 723 while (*tv != NULL && **tv != CANONUSER) 724 strcat(buf, *tv++); 725 a->q_host = newstr(buf); 726 } 727 else 728 a->q_host = NULL; 729 730 /* figure out the user */ 731 if (**tv != CANONUSER) 732 { 733 syserr("buildaddr: no user"); 734 return (NULL); 735 } 736 cataddr(++tv, buf, sizeof buf); 737 a->q_user = buf; 738 739 return (a); 740 } 741 /* 742 ** CATADDR -- concatenate pieces of addresses (putting in <LWSP> subs) 743 ** 744 ** Parameters: 745 ** pvp -- parameter vector to rebuild. 746 ** buf -- buffer to build the string into. 747 ** sz -- size of buf. 748 ** 749 ** Returns: 750 ** none. 751 ** 752 ** Side Effects: 753 ** Destroys buf. 754 */ 755 756 cataddr(pvp, buf, sz) 757 char **pvp; 758 char *buf; 759 register int sz; 760 { 761 bool oatomtok = FALSE; 762 bool natomtok = FALSE; 763 register int i; 764 register char *p; 765 766 p = buf; 767 sz--; 768 while (*pvp != NULL && (i = strlen(*pvp)) < sz) 769 { 770 natomtok = (toktype(**pvp) == ATOM); 771 if (oatomtok && natomtok) 772 *p++ = SPACESUB; 773 (void) strcpy(p, *pvp); 774 oatomtok = natomtok; 775 p += i; 776 sz -= i; 777 pvp++; 778 } 779 *p = '\0'; 780 } 781 /* 782 ** SAMEADDR -- Determine if two addresses are the same 783 ** 784 ** This is not just a straight comparison -- if the mailer doesn't 785 ** care about the host we just ignore it, etc. 786 ** 787 ** Parameters: 788 ** a, b -- pointers to the internal forms to compare. 789 ** wildflg -- if TRUE, 'a' may have no user specified, 790 ** in which case it is to match anything. 791 ** 792 ** Returns: 793 ** TRUE -- they represent the same mailbox. 794 ** FALSE -- they don't. 795 ** 796 ** Side Effects: 797 ** none. 798 */ 799 800 bool 801 sameaddr(a, b, wildflg) 802 register ADDRESS *a; 803 register ADDRESS *b; 804 bool wildflg; 805 { 806 /* if they don't have the same mailer, forget it */ 807 if (a->q_mailer != b->q_mailer) 808 return (FALSE); 809 810 /* if the user isn't the same, we can drop out */ 811 if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0) 812 return (FALSE); 813 814 /* if the mailer ignores hosts, we have succeeded! */ 815 if (bitset(M_LOCAL, a->q_mailer->m_flags)) 816 return (TRUE); 817 818 /* otherwise compare hosts (but be careful for NULL ptrs) */ 819 if (a->q_host == NULL || b->q_host == NULL) 820 return (FALSE); 821 if (strcmp(a->q_host, b->q_host) != 0) 822 return (FALSE); 823 824 return (TRUE); 825 } 826 /* 827 ** PRINTADDR -- print address (for debugging) 828 ** 829 ** Parameters: 830 ** a -- the address to print 831 ** follow -- follow the q_next chain. 832 ** 833 ** Returns: 834 ** none. 835 ** 836 ** Side Effects: 837 ** none. 838 */ 839 840 # ifdef DEBUG 841 842 printaddr(a, follow) 843 register ADDRESS *a; 844 bool follow; 845 { 846 bool first = TRUE; 847 848 while (a != NULL) 849 { 850 first = FALSE; 851 printf("%x=", a); 852 (void) fflush(stdout); 853 printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr, 854 a->q_mailer->m_mno, a->q_mailer->m_name, a->q_host, a->q_user); 855 printf("\tnext=%x, flags=%o, rmailer %d, alias %x\n", a->q_next, 856 a->q_flags, a->q_rmailer, a->q_alias); 857 printf("\thome=\"%s\", fullname=\"%s\"\n", a->q_home, a->q_fullname); 858 859 if (!follow) 860 return; 861 a = a->q_next; 862 } 863 if (first) 864 printf("[NULL]\n"); 865 } 866 867 # endif DEBUG 868