1 # include "sendmail.h"
2 
3 SCCSID(@(#)parseaddr.c	3.40		03/20/82);
4 
5 /*
6 **  PARSE -- Parse an address
7 **
8 **	Parses an address and breaks it up into three parts: a
9 **	net to transmit the message on, the host to transmit it
10 **	to, and a user on that host.  These are loaded into an
11 **	ADDRESS header with the values squirreled away if necessary.
12 **	The "user" part may not be a real user; the process may
13 **	just reoccur on that machine.  For example, on a machine
14 **	with an arpanet connection, the address
15 **		csvax.bill@berkeley
16 **	will break up to a "user" of 'csvax.bill' and a host
17 **	of 'berkeley' -- to be transmitted over the arpanet.
18 **
19 **	Parameters:
20 **		addr -- the address to parse.
21 **		a -- a pointer to the address descriptor buffer.
22 **			If NULL, a header will be created.
23 **		copyf -- determines what shall be copied:
24 **			-1 -- don't copy anything.  The printname
25 **				(q_paddr) is just addr, and the
26 **				user & host are allocated internally
27 **				to parse.
28 **			0 -- copy out the parsed user & host, but
29 **				don't copy the printname.
30 **			+1 -- copy everything.
31 **
32 **	Returns:
33 **		A pointer to the address descriptor header (`a' if
34 **			`a' is non-NULL).
35 **		NULL on error.
36 **
37 **	Side Effects:
38 **		none
39 */
40 
41 # define DELIMCHARS	"$()<>,;\\\"\r\n"	/* word delimiters */
42 
43 ADDRESS *
44 parse(addr, a, copyf)
45 	char *addr;
46 	register ADDRESS *a;
47 	int copyf;
48 {
49 	register char **pvp;
50 	register struct mailer *m;
51 	extern char **prescan();
52 	extern ADDRESS *buildaddr();
53 
54 	/*
55 	**  Initialize and prescan address.
56 	*/
57 
58 	To = addr;
59 # ifdef DEBUG
60 	if (Debug)
61 		printf("\n--parse(%s)\n", addr);
62 # endif DEBUG
63 
64 	pvp = prescan(addr, '\0');
65 	if (pvp == NULL)
66 		return (NULL);
67 
68 	/*
69 	**  Apply rewriting rules.
70 	*/
71 
72 	rewrite(pvp, 0);
73 
74 	/*
75 	**  See if we resolved to a real mailer.
76 	*/
77 
78 	if (pvp[0][0] != CANONNET)
79 	{
80 		setstat(EX_USAGE);
81 		usrerr("cannot resolve name");
82 		return (NULL);
83 	}
84 
85 	/*
86 	**  Build canonical address from pvp.
87 	*/
88 
89 	a = buildaddr(pvp, a);
90 	if (a == NULL)
91 		return (NULL);
92 	m = a->q_mailer;
93 
94 	/*
95 	**  Make local copies of the host & user and then
96 	**  transport them out.
97 	*/
98 
99 	if (copyf > 0)
100 		a->q_paddr = newstr(addr);
101 	else
102 		a->q_paddr = addr;
103 
104 	if (copyf >= 0)
105 	{
106 		if (a->q_host != NULL)
107 			a->q_host = newstr(a->q_host);
108 		else
109 			a->q_host = "";
110 		if (a->q_user != a->q_paddr)
111 			a->q_user = newstr(a->q_user);
112 	}
113 
114 	/*
115 	**  Do UPPER->lower case mapping unless inhibited.
116 	*/
117 
118 	if (!bitset(M_HST_UPPER, m->m_flags))
119 		makelower(a->q_host);
120 	if (!bitset(M_USR_UPPER, m->m_flags))
121 		makelower(a->q_user);
122 
123 	/*
124 	**  Compute return value.
125 	*/
126 
127 # ifdef DEBUG
128 	if (Debug)
129 	{
130 		printf("parse-->");
131 		printaddr(a, FALSE);
132 	}
133 # endif DEBUG
134 
135 	return (a);
136 }
137 /*
138 **  PRESCAN -- Prescan name and make it canonical
139 **
140 **	Scans a name and turns it into canonical form.  This involves
141 **	deleting blanks, comments (in parentheses), and turning the
142 **	word "at" into an at-sign ("@").  The name is copied as this
143 **	is done; it is legal to copy a name onto itself, since this
144 **	process can only make things smaller.
145 **
146 **	This routine knows about quoted strings and angle brackets.
147 **
148 **	There are certain subtleties to this routine.  The one that
149 **	comes to mind now is that backslashes on the ends of names
150 **	are silently stripped off; this is intentional.  The problem
151 **	is that some versions of sndmsg (like at LBL) set the kill
152 **	character to something other than @ when reading addresses;
153 **	so people type "csvax.eric\@berkeley" -- which screws up the
154 **	berknet mailer.
155 **
156 **	Parameters:
157 **		addr -- the name to chomp.
158 **		delim -- the delimiter for the address, normally
159 **			'\0' or ','; \0 is accepted in any case.
160 **			are moving in place; set buflim to high core.
161 **
162 **	Returns:
163 **		A pointer to a vector of tokens.
164 **		NULL on error.
165 **
166 **	Side Effects:
167 **		none.
168 */
169 
170 # define OPER		1
171 # define ATOM		2
172 # define EOTOK		3
173 # define QSTRING	4
174 # define SPACE		5
175 # define ONEMORE	6
176 # define GETONE		7
177 # define MACRO		8
178 
179 char **
180 prescan(addr, delim)
181 	char *addr;
182 	char delim;
183 {
184 	register char *p;
185 	static char buf[MAXNAME+MAXATOM];
186 	static char *av[MAXATOM+1];
187 	char **avp;
188 	bool bslashmode;
189 	int cmntcnt;
190 	int brccnt;
191 	register char c;
192 	char *tok;
193 	register char *q;
194 	register int state;
195 	int nstate;
196 	extern char lower();
197 
198 	q = buf;
199 	bslashmode = FALSE;
200 	cmntcnt = brccnt = 0;
201 	avp = av;
202 	state = OPER;
203 	for (p = addr; *p != '\0' && *p != delim; )
204 	{
205 		/* read a token */
206 		tok = q;
207 		while ((c = *p++) != '\0' && c != delim)
208 		{
209 			/* chew up special characters */
210 			c &= ~0200;
211 			*q = '\0';
212 			if (bslashmode)
213 			{
214 				c |= 0200;
215 				bslashmode = FALSE;
216 			}
217 			else if (c == '\\')
218 			{
219 				bslashmode = TRUE;
220 				continue;
221 			}
222 			else if (c == '"')
223 			{
224 				if (state == QSTRING)
225 					state = OPER;
226 				else
227 					state = QSTRING;
228 				break;
229 			}
230 
231 			nstate = toktype(c);
232 			switch (state)
233 			{
234 			  case QSTRING:		/* in quoted string */
235 				break;
236 
237 			  case ATOM:		/* regular atom */
238 				if (nstate != ATOM)
239 				{
240 					state = EOTOK;
241 					p--;
242 				}
243 				break;
244 
245 			  case GETONE:		/* grab one character */
246 				state = OPER;
247 				break;
248 
249 			  case EOTOK:		/* after atom or q-string */
250 				state = nstate;
251 				if (state == SPACE)
252 					continue;
253 				break;
254 
255 			  case SPACE:		/* linear white space */
256 				state = nstate;
257 				break;
258 
259 			  case OPER:		/* operator */
260 				if (nstate == SPACE)
261 					continue;
262 				state = nstate;
263 				break;
264 
265 			  case ONEMORE:		/* $- etc. */
266 				state = GETONE;
267 				break;
268 
269 			  default:
270 				syserr("prescan: unknown state %d", state);
271 			}
272 
273 			if (state == EOTOK || state == SPACE)
274 				break;
275 
276 			/* squirrel it away */
277 			if (q >= &buf[sizeof buf - 5])
278 			{
279 				usrerr("Address too long");
280 				return (NULL);
281 			}
282 			*q++ = c;
283 
284 			/* decide whether this represents end of token */
285 			if (state == OPER || state == GETONE)
286 				break;
287 		}
288 		if (c == '\0' || c == delim)
289 			p--;
290 
291 		/* new token */
292 		if (tok == q)
293 			continue;
294 		*q++ = '\0';
295 
296 		c = tok[0];
297 		if (c == '(')
298 		{
299 			cmntcnt++;
300 			continue;
301 		}
302 		else if (c == ')')
303 		{
304 			if (cmntcnt <= 0)
305 			{
306 				usrerr("Unbalanced ')'");
307 				return (NULL);
308 			}
309 			else
310 			{
311 				cmntcnt--;
312 				continue;
313 			}
314 		}
315 		else if (cmntcnt > 0)
316 			continue;
317 
318 		/* we prefer <> specs */
319 		if (c == '<')
320 		{
321 			if (brccnt < 0)
322 			{
323 				usrerr("multiple < spec");
324 				return (NULL);
325 			}
326 			brccnt++;
327 			if (brccnt == 1)
328 			{
329 				/* we prefer using machine readable name */
330 				q = buf;
331 				*q = '\0';
332 				avp = av;
333 				continue;
334 			}
335 		}
336 		else if (c == '>')
337 		{
338 			if (brccnt <= 0)
339 			{
340 				usrerr("Unbalanced `>'");
341 				return (NULL);
342 			}
343 			else
344 				brccnt--;
345 			if (brccnt <= 0)
346 			{
347 				brccnt = -1;
348 				continue;
349 			}
350 		}
351 
352 		if (avp >= &av[MAXATOM])
353 		{
354 			syserr("prescan: too many tokens");
355 			return (NULL);
356 		}
357 		*avp++ = tok;
358 	}
359 	*avp = NULL;
360 	if (cmntcnt > 0)
361 		usrerr("Unbalanced '('");
362 	else if (brccnt > 0)
363 		usrerr("Unbalanced '<'");
364 	else if (state == QSTRING)
365 		usrerr("Unbalanced '\"'");
366 	else if (av[0] != NULL)
367 		return (av);
368 	return (NULL);
369 }
370 /*
371 **  TOKTYPE -- return token type
372 **
373 **	Parameters:
374 **		c -- the character in question.
375 **
376 **	Returns:
377 **		Its type.
378 **
379 **	Side Effects:
380 **		none.
381 */
382 
383 toktype(c)
384 	register char c;
385 {
386 	static char buf[50];
387 	static bool firstime = TRUE;
388 
389 	if (firstime)
390 	{
391 		firstime = FALSE;
392 		(void) expand("$o", buf, &buf[sizeof buf - 1]);
393 		strcat(buf, DELIMCHARS);
394 	}
395 	if (c == MATCHCLASS || c == MATCHREPL)
396 		return (ONEMORE);
397 	if (!isascii(c))
398 		return (ATOM);
399 	if (isspace(c))
400 		return (SPACE);
401 	if (iscntrl(c) || index(buf, c) != NULL)
402 		return (OPER);
403 	return (ATOM);
404 }
405 /*
406 **  REWRITE -- apply rewrite rules to token vector.
407 **
408 **	This routine is an ordered production system.  Each rewrite
409 **	rule has a LHS (called the pattern) and a RHS (called the
410 **	rewrite); 'rwr' points the the current rewrite rule.
411 **
412 **	For each rewrite rule, 'avp' points the address vector we
413 **	are trying to match against, and 'pvp' points to the pattern.
414 **	If pvp points to a special match value (MATCHANY, MATCHONE,
415 **	MATCHCLASS) then the address in avp matched is saved away
416 **	in the match vector (pointed to by 'mvp').
417 **
418 **	When a match between avp & pvp does not match, we try to
419 **	back out.  If we back up over a MATCHONE or a MATCHCLASS
420 **	we must also back out the match in mvp.  If we reach a
421 **	MATCHANY we just extend the match and start over again.
422 **
423 **	When we finally match, we rewrite the address vector
424 **	and try over again.
425 **
426 **	Parameters:
427 **		pvp -- pointer to token vector.
428 **
429 **	Returns:
430 **		none.
431 **
432 **	Side Effects:
433 **		pvp is modified.
434 */
435 
436 struct match
437 {
438 	char	**first;	/* first token matched */
439 	char	**last;		/* last token matched */
440 };
441 
442 # define MAXMATCH	9	/* max params per rewrite */
443 
444 
445 rewrite(pvp, ruleset)
446 	char **pvp;
447 	int ruleset;
448 {
449 	register char *ap;		/* address pointer */
450 	register char *rp;		/* rewrite pointer */
451 	register char **avp;		/* address vector pointer */
452 	register char **rvp;		/* rewrite vector pointer */
453 	struct rewrite *rwr;		/* pointer to current rewrite rule */
454 	struct match mlist[MAXMATCH];	/* stores match on LHS */
455 	struct match *mlp;		/* cur ptr into mlist */
456 	char *npvp[MAXATOM+1];		/* temporary space for rebuild */
457 	extern bool sameword();
458 
459 # ifdef DEBUG
460 	if (Debug > 9)
461 	{
462 		printf("rewrite: original pvp:\n");
463 		printav(pvp);
464 	}
465 # endif DEBUG
466 
467 	/*
468 	**  Run through the list of rewrite rules, applying
469 	**	any that match.
470 	*/
471 
472 	for (rwr = RewriteRules[ruleset]; rwr != NULL; )
473 	{
474 # ifdef DEBUG
475 		if (Debug > 10)
476 		{
477 			printf("-----trying rule:\n");
478 			printav(rwr->r_lhs);
479 		}
480 # endif DEBUG
481 
482 		/* try to match on this rule */
483 		mlp = mlist;
484 		for (rvp = rwr->r_lhs, avp = pvp; *avp != NULL; )
485 		{
486 			ap = *avp;
487 			rp = *rvp;
488 
489 			if (rp == NULL)
490 			{
491 				/* end-of-pattern before end-of-address */
492 				goto fail;
493 			}
494 
495 			switch (*rp)
496 			{
497 				register STAB *s;
498 				register int class;
499 
500 			  case MATCHCLASS:
501 				/* match any token in a class */
502 				class = rp[1];
503 				if (!isalpha(class))
504 					goto fail;
505 				if (isupper(class))
506 					class -= 'A';
507 				else
508 					class -= 'a';
509 				s = stab(ap, ST_CLASS, ST_FIND);
510 				if (s == NULL || (s->s_class & (1L << class)) == 0)
511 					goto fail;
512 
513 				/* explicit fall-through */
514 
515 			  case MATCHONE:
516 			  case MATCHANY:
517 				/* match exactly one token */
518 				mlp->first = mlp->last = avp++;
519 				mlp++;
520 				break;
521 
522 			  default:
523 				/* must have exact match */
524 				if (!sameword(rp, ap))
525 					goto fail;
526 				avp++;
527 				break;
528 			}
529 
530 			/* successful match on this token */
531 			rvp++;
532 			continue;
533 
534 		  fail:
535 			/* match failed -- back up */
536 			while (--rvp >= rwr->r_lhs)
537 			{
538 				rp = *rvp;
539 				if (*rp == MATCHANY)
540 				{
541 					/* extend binding and continue */
542 					mlp[-1].last = avp++;
543 					rvp++;
544 					break;
545 				}
546 				avp--;
547 				if (*rp == MATCHONE || *rp == MATCHCLASS)
548 				{
549 					/* back out binding */
550 					mlp--;
551 				}
552 			}
553 
554 			if (rvp < rwr->r_lhs)
555 			{
556 				/* total failure to match */
557 				break;
558 			}
559 		}
560 
561 		/*
562 		**  See if we successfully matched
563 		*/
564 
565 		if (rvp >= rwr->r_lhs && *rvp == NULL)
566 		{
567 # ifdef DEBUG
568 			if (Debug > 10)
569 			{
570 				printf("-----rule matches:\n");
571 				printav(rwr->r_rhs);
572 			}
573 # endif DEBUG
574 
575 			/* substitute */
576 			for (rvp = rwr->r_rhs, avp = npvp; *rvp != NULL; rvp++)
577 			{
578 				rp = *rvp;
579 				if (*rp == MATCHREPL)
580 				{
581 					register struct match *m;
582 					register char **pp;
583 
584 					m = &mlist[rp[1] - '1'];
585 # ifdef DEBUG
586 					if (Debug > 13)
587 					{
588 						printf("$%c:", rp[1]);
589 						pp = m->first;
590 						do
591 						{
592 							printf(" %x=\"", *pp);
593 							(void) fflush(stdout);
594 							printf("%s\"", *pp);
595 						} while (pp++ != m->last);
596 						printf("\n");
597 					}
598 # endif DEBUG
599 					pp = m->first;
600 					do
601 					{
602 						if (avp >= &npvp[MAXATOM])
603 						{
604 							syserr("rewrite: expansion too long");
605 							return;
606 						}
607 						*avp++ = *pp;
608 					} while (pp++ != m->last);
609 				}
610 				else
611 				{
612 					if (avp >= &npvp[MAXATOM])
613 					{
614 						syserr("rewrite: expansion too long");
615 						return;
616 					}
617 					*avp++ = rp;
618 				}
619 			}
620 			*avp++ = NULL;
621 			bmove((char *) npvp, (char *) pvp, (avp - npvp) * sizeof *avp);
622 # ifdef DEBUG
623 			if (Debug > 3)
624 			{
625 				char **vp;
626 
627 				printf("rewritten as `");
628 				for (vp = pvp; *vp != NULL; vp++)
629 				{
630 					if (vp != pvp)
631 						printf("_");
632 					xputs(*vp);
633 				}
634 				printf("'\n");
635 			}
636 # endif DEBUG
637 			if (pvp[0][0] == CANONNET)
638 				break;
639 		}
640 		else
641 		{
642 # ifdef DEBUG
643 			if (Debug > 10)
644 				printf("----- rule fails\n");
645 # endif DEBUG
646 			rwr = rwr->r_next;
647 		}
648 	}
649 }
650 /*
651 **  BUILDADDR -- build address from token vector.
652 **
653 **	Parameters:
654 **		tv -- token vector.
655 **		a -- pointer to address descriptor to fill.
656 **			If NULL, one will be allocated.
657 **
658 **	Returns:
659 **		NULL if there was an error.
660 **		'a' otherwise.
661 **
662 **	Side Effects:
663 **		fills in 'a'
664 */
665 
666 ADDRESS *
667 buildaddr(tv, a)
668 	register char **tv;
669 	register ADDRESS *a;
670 {
671 	static char buf[MAXNAME];
672 	struct mailer **mp;
673 	register struct mailer *m;
674 	extern bool sameword();
675 
676 	if (a == NULL)
677 		a = (ADDRESS *) xalloc(sizeof *a);
678 	clear((char *) a, sizeof *a);
679 
680 	/* figure out what net/mailer to use */
681 	if (**tv != CANONNET)
682 	{
683 		syserr("buildaddr: no net");
684 		return (NULL);
685 	}
686 	tv++;
687 	if (sameword(*tv, "error"))
688 	{
689 		if (**++tv != CANONUSER)
690 			syserr("buildaddr: error: no user");
691 		buf[0] = '\0';
692 		while (*++tv != NULL)
693 		{
694 			if (buf[0] != '\0')
695 				strcat(buf, " ");
696 			strcat(buf, *tv);
697 		}
698 		usrerr(buf);
699 		return (NULL);
700 	}
701 	for (mp = Mailer; (m = *mp++) != NULL; )
702 	{
703 		if (sameword(m->m_name, *tv))
704 			break;
705 	}
706 	if (m == NULL)
707 	{
708 		syserr("buildaddr: unknown net %s", *tv);
709 		return (NULL);
710 	}
711 	a->q_mailer = m;
712 
713 	/* figure out what host (if any) */
714 	tv++;
715 	if (!bitset(M_LOCAL, m->m_flags))
716 	{
717 		if (**tv++ != CANONHOST)
718 		{
719 			syserr("buildaddr: no host");
720 			return (NULL);
721 		}
722 		buf[0] = '\0';
723 		while (*tv != NULL && **tv != CANONUSER)
724 			strcat(buf, *tv++);
725 		a->q_host = newstr(buf);
726 	}
727 	else
728 		a->q_host = NULL;
729 
730 	/* figure out the user */
731 	if (**tv != CANONUSER)
732 	{
733 		syserr("buildaddr: no user");
734 		return (NULL);
735 	}
736 	cataddr(++tv, buf, sizeof buf);
737 	a->q_user = buf;
738 
739 	return (a);
740 }
741 /*
742 **  CATADDR -- concatenate pieces of addresses (putting in <LWSP> subs)
743 **
744 **	Parameters:
745 **		pvp -- parameter vector to rebuild.
746 **		buf -- buffer to build the string into.
747 **		sz -- size of buf.
748 **
749 **	Returns:
750 **		none.
751 **
752 **	Side Effects:
753 **		Destroys buf.
754 */
755 
756 cataddr(pvp, buf, sz)
757 	char **pvp;
758 	char *buf;
759 	register int sz;
760 {
761 	bool oatomtok = FALSE;
762 	bool natomtok = FALSE;
763 	register int i;
764 	register char *p;
765 
766 	p = buf;
767 	sz--;
768 	while (*pvp != NULL && (i = strlen(*pvp)) < sz)
769 	{
770 		natomtok = (toktype(**pvp) == ATOM);
771 		if (oatomtok && natomtok)
772 			*p++ = SPACESUB;
773 		(void) strcpy(p, *pvp);
774 		oatomtok = natomtok;
775 		p += i;
776 		sz -= i;
777 		pvp++;
778 	}
779 	*p = '\0';
780 }
781 /*
782 **  SAMEADDR -- Determine if two addresses are the same
783 **
784 **	This is not just a straight comparison -- if the mailer doesn't
785 **	care about the host we just ignore it, etc.
786 **
787 **	Parameters:
788 **		a, b -- pointers to the internal forms to compare.
789 **		wildflg -- if TRUE, 'a' may have no user specified,
790 **			in which case it is to match anything.
791 **
792 **	Returns:
793 **		TRUE -- they represent the same mailbox.
794 **		FALSE -- they don't.
795 **
796 **	Side Effects:
797 **		none.
798 */
799 
800 bool
801 sameaddr(a, b, wildflg)
802 	register ADDRESS *a;
803 	register ADDRESS *b;
804 	bool wildflg;
805 {
806 	/* if they don't have the same mailer, forget it */
807 	if (a->q_mailer != b->q_mailer)
808 		return (FALSE);
809 
810 	/* if the user isn't the same, we can drop out */
811 	if ((!wildflg || a->q_user[0] != '\0') && strcmp(a->q_user, b->q_user) != 0)
812 		return (FALSE);
813 
814 	/* if the mailer ignores hosts, we have succeeded! */
815 	if (bitset(M_LOCAL, a->q_mailer->m_flags))
816 		return (TRUE);
817 
818 	/* otherwise compare hosts (but be careful for NULL ptrs) */
819 	if (a->q_host == NULL || b->q_host == NULL)
820 		return (FALSE);
821 	if (strcmp(a->q_host, b->q_host) != 0)
822 		return (FALSE);
823 
824 	return (TRUE);
825 }
826 /*
827 **  PRINTADDR -- print address (for debugging)
828 **
829 **	Parameters:
830 **		a -- the address to print
831 **		follow -- follow the q_next chain.
832 **
833 **	Returns:
834 **		none.
835 **
836 **	Side Effects:
837 **		none.
838 */
839 
840 # ifdef DEBUG
841 
842 printaddr(a, follow)
843 	register ADDRESS *a;
844 	bool follow;
845 {
846 	bool first = TRUE;
847 
848 	while (a != NULL)
849 	{
850 		first = FALSE;
851 		printf("%x=", a);
852 		(void) fflush(stdout);
853 		printf("%s: mailer %d (%s), host `%s', user `%s'\n", a->q_paddr,
854 		       a->q_mailer->m_mno, a->q_mailer->m_name, a->q_host, a->q_user);
855 		printf("\tnext=%x, flags=%o, rmailer %d, alias %x\n", a->q_next,
856 		       a->q_flags, a->q_rmailer, a->q_alias);
857 		printf("\thome=\"%s\", fullname=\"%s\"\n", a->q_home, a->q_fullname);
858 
859 		if (!follow)
860 			return;
861 		a = a->q_next;
862 	}
863 	if (first)
864 		printf("[NULL]\n");
865 }
866 
867 # endif DEBUG
868