1 /*
2  * Copyright (c) 1983 Eric P. Allman
3  * Copyright (c) 1988, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)collect.c	8.31 (Berkeley) 03/21/95";
11 #endif /* not lint */
12 
13 # include <errno.h>
14 # include "sendmail.h"
15 
16 /*
17 **  COLLECT -- read & parse message header & make temp file.
18 **
19 **	Creates a temporary file name and copies the standard
20 **	input to that file.  Leading UNIX-style "From" lines are
21 **	stripped off (after important information is extracted).
22 **
23 **	Parameters:
24 **		fp -- file to read.
25 **		smtpmode -- if set, we are running SMTP: give an RFC821
26 **			style message to say we are ready to collect
27 **			input, and never ignore a single dot to mean
28 **			end of message.
29 **		requeueflag -- this message will be requeued later, so
30 **			don't do final processing on it.
31 **		hdrp -- the location to stash the header.
32 **		e -- the current envelope.
33 **
34 **	Returns:
35 **		none.
36 **
37 **	Side Effects:
38 **		Temp file is created and filled.
39 **		The from person may be set.
40 */
41 
42 char	*CollectErrorMessage;
43 bool	CollectErrno;
44 
45 static jmp_buf	CtxCollectTimeout;
46 static void	collecttimeout();
47 static bool	CollectProgress;
48 static EVENT	*CollectTimeout;
49 
50 /* values for input state machine */
51 #define IS_NORM		0	/* middle of line */
52 #define IS_BOL		1	/* beginning of line */
53 #define IS_DOT		2	/* read a dot at beginning of line */
54 #define IS_DOTCR	3	/* read ".\r" at beginning of line */
55 #define IS_CR		4	/* read a carriage return */
56 
57 /* values for message state machine */
58 #define MS_UFROM	0	/* reading Unix from line */
59 #define MS_HEADER	1	/* reading message header */
60 #define MS_BODY		2	/* reading message body */
61 
62 
63 void
64 collect(fp, smtpmode, requeueflag, hdrp, e)
65 	FILE *fp;
66 	bool smtpmode;
67 	bool requeueflag;
68 	HDR **hdrp;
69 	register ENVELOPE *e;
70 {
71 	register FILE *tf;
72 	bool ignrdot = smtpmode ? FALSE : IgnrDot;
73 	time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
74 	register char *bp;
75 	int c = '\0';
76 	bool inputerr = FALSE;
77 	bool headeronly = FALSE;
78 	char *buf;
79 	int buflen;
80 	int istate;
81 	int mstate;
82 	char *pbp;
83 	char peekbuf[8];
84 	char dfname[20];
85 	char bufbuf[MAXLINE];
86 	extern bool isheader();
87 	extern void eatheader();
88 	extern void tferror();
89 
90 	CollectErrorMessage = NULL;
91 	CollectErrno = 0;
92 	if (hdrp == NULL)
93 		hdrp = &e->e_header;
94 	else
95 		headeronly = TRUE;
96 
97 	/*
98 	**  Create the temp file name and create the file.
99 	*/
100 
101 	if (!headeronly)
102 	{
103 		struct stat stbuf;
104 
105 		strcpy(dfname, queuename(e, 'd'));
106 		if ((tf = dfopen(dfname, O_WRONLY|O_CREAT|O_TRUNC, FileMode)) == NULL)
107 		{
108 			syserr("Cannot create %s", dfname);
109 			e->e_flags |= EF_NO_BODY_RETN;
110 			finis();
111 		}
112 		if (fstat(fileno(tf), &stbuf) < 0)
113 			e->e_dfino = -1;
114 		else
115 		{
116 			e->e_dfdev = stbuf.st_dev;
117 			e->e_dfino = stbuf.st_ino;
118 		}
119 		HasEightBits = FALSE;
120 		e->e_msgsize = 0;
121 		e->e_flags |= EF_HAS_DF;
122 	}
123 
124 	/*
125 	**  Tell ARPANET to go ahead.
126 	*/
127 
128 	if (smtpmode)
129 		message("354 Enter mail, end with \".\" on a line by itself");
130 
131 	/*
132 	**  Read the message.
133 	**
134 	**	This is done using two interleaved state machines.
135 	**	The input state machine is looking for things like
136 	**	hidden dots; the message state machine is handling
137 	**	the larger picture (e.g., header versus body).
138 	*/
139 
140 	buf = bp = bufbuf;
141 	buflen = sizeof bufbuf;
142 	pbp = peekbuf;
143 	istate = IS_BOL;
144 	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
145 	CollectProgress = FALSE;
146 
147 	/* if transmitting binary, don't map NL to EOL */
148 	if (e->e_bodytype != NULL && strcasecmp(e->e_bodytype, "8BITMIME") == 0)
149 		e->e_flags |= EF_NL_NOT_EOL;
150 
151 	if (dbto != 0)
152 	{
153 		/* handle possible input timeout */
154 		if (setjmp(CtxCollectTimeout) != 0)
155 		{
156 #ifdef LOG
157 			syslog(LOG_NOTICE,
158 			    "timeout waiting for input from %s during message collect",
159 			    CurHostName ? CurHostName : "<local machine>");
160 #endif
161 			errno = 0;
162 			usrerr("451 timeout waiting for input during message collect");
163 			goto readerr;
164 		}
165 		CollectTimeout = setevent(dbto, collecttimeout, dbto);
166 	}
167 
168 	for (;;)
169 	{
170 		if (tTd(30, 35))
171 			printf("top, istate=%d, mstate=%d\n", istate, mstate);
172 		for (;;)
173 		{
174 			if (pbp > peekbuf)
175 				c = *--pbp;
176 			else
177 			{
178 				while (!feof(InChannel) && !ferror(InChannel))
179 				{
180 					errno = 0;
181 					c = fgetc(InChannel);
182 					if (errno != EINTR)
183 						break;
184 					clearerr(InChannel);
185 				}
186 				CollectProgress = TRUE;
187 				if (TrafficLogFile != NULL)
188 				{
189 					if (istate == IS_BOL)
190 						fprintf(TrafficLogFile, "%05d <<< ",
191 							getpid());
192 					if (c == EOF)
193 						fprintf(TrafficLogFile, "[EOF]\n");
194 					else
195 						fputc(c, TrafficLogFile);
196 				}
197 				if (c == EOF)
198 					goto readerr;
199 				if (SevenBitInput)
200 					c &= 0x7f;
201 				else
202 					HasEightBits |= bitset(0x80, c);
203 				if (!headeronly)
204 					e->e_msgsize++;
205 			}
206 			if (tTd(30, 94))
207 				printf("istate=%d, c=%c (0x%x)\n",
208 					istate, c, c);
209 			switch (istate)
210 			{
211 			  case IS_BOL:
212 				if (c == '.')
213 				{
214 					istate = IS_DOT;
215 					continue;
216 				}
217 				break;
218 
219 			  case IS_DOT:
220 				if (c == '\n' && !ignrdot &&
221 				    !bitset(EF_NL_NOT_EOL, e->e_flags))
222 					goto readerr;
223 				else if (c == '\r' &&
224 					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
225 				{
226 					istate = IS_DOTCR;
227 					continue;
228 				}
229 				else if (c != '.' ||
230 					 (OpMode != MD_SMTP &&
231 					  OpMode != MD_DAEMON &&
232 					  OpMode != MD_ARPAFTP))
233 				{
234 					*pbp++ = c;
235 					c = '.';
236 				}
237 				break;
238 
239 			  case IS_DOTCR:
240 				if (c == '\n')
241 					goto readerr;
242 				else
243 				{
244 					/* push back the ".\rx" */
245 					*pbp++ = c;
246 					*pbp++ = '\r';
247 					c = '.';
248 				}
249 				break;
250 
251 			  case IS_CR:
252 				if (c == '\n')
253 					istate = IS_BOL;
254 				else
255 				{
256 					ungetc(c, InChannel);
257 					c = '\r';
258 					istate = IS_NORM;
259 				}
260 				goto bufferchar;
261 			}
262 
263 			if (c == '\r' && !bitset(EF_CRLF_NOT_EOL, e->e_flags))
264 			{
265 				istate = IS_CR;
266 				continue;
267 			}
268 			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
269 				istate = IS_BOL;
270 			else
271 				istate = IS_NORM;
272 
273 bufferchar:
274 			if (mstate == MS_BODY)
275 			{
276 				/* just put the character out */
277 				if (MaxMessageSize <= 0 ||
278 				    e->e_msgsize <= MaxMessageSize)
279 					fputc(c, tf);
280 				continue;
281 			}
282 
283 			/* header -- buffer up */
284 			if (bp >= &buf[buflen - 2])
285 			{
286 				char *obuf;
287 
288 				if (mstate != MS_HEADER)
289 					break;
290 
291 				/* out of space for header */
292 				obuf = buf;
293 				if (buflen < MEMCHUNKSIZE)
294 					buflen *= 2;
295 				else
296 					buflen += MEMCHUNKSIZE;
297 				buf = xalloc(buflen);
298 				bcopy(obuf, buf, bp - obuf);
299 				bp = &buf[bp - obuf];
300 				if (obuf != bufbuf)
301 					free(obuf);
302 			}
303 			*bp++ = c;
304 			if (istate == IS_BOL)
305 				break;
306 		}
307 		*bp = '\0';
308 
309 nextstate:
310 		if (tTd(30, 35))
311 			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
312 				istate, mstate, buf);
313 		switch (mstate)
314 		{
315 			extern int chompheader();
316 
317 		  case MS_UFROM:
318 			mstate = MS_HEADER;
319 			if (strncmp(buf, "From ", 5) == 0)
320 			{
321 				extern void eatfrom();
322 
323 				bp = buf;
324 				eatfrom(buf, e);
325 				continue;
326 			}
327 			/* fall through */
328 
329 		  case MS_HEADER:
330 			if (!isheader(buf))
331 			{
332 				mstate = MS_BODY;
333 				goto nextstate;
334 			}
335 
336 			/* check for possible continuation line */
337 			do
338 			{
339 				clearerr(InChannel);
340 				errno = 0;
341 				c = fgetc(InChannel);
342 			} while (errno == EINTR);
343 			if (c != EOF)
344 				ungetc(c, InChannel);
345 			if (c == ' ' || c == '\t')
346 			{
347 				/* yep -- defer this */
348 				continue;
349 			}
350 
351 			/* trim off trailing CRLF or NL */
352 			if (*--bp != '\n' || *--bp != '\r')
353 				bp++;
354 			*bp = '\0';
355 			if (bitset(H_EOH, chompheader(buf, FALSE, e)))
356 				mstate = MS_BODY;
357 			break;
358 
359 		  case MS_BODY:
360 			if (tTd(30, 1))
361 				printf("EOH\n");
362 			if (headeronly)
363 				goto readerr;
364 			bp = buf;
365 
366 			/* toss blank line */
367 			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
368 				bp[0] == '\r' && bp[1] == '\n') ||
369 			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
370 				bp[0] == '\n'))
371 			{
372 				break;
373 			}
374 
375 			/* if not a blank separator, write it out */
376 			if (MaxMessageSize <= 0 ||
377 			    e->e_msgsize <= MaxMessageSize)
378 			{
379 				while (*bp != '\0')
380 					fputc(*bp++, tf);
381 			}
382 			break;
383 		}
384 		bp = buf;
385 	}
386 
387 readerr:
388 	if ((feof(fp) && smtpmode) || ferror(fp))
389 	{
390 		if (tTd(30, 1))
391 			printf("collect: read error\n");
392 		inputerr = TRUE;
393 	}
394 
395 	/* reset global timer */
396 	clrevent(CollectTimeout);
397 
398 	if (headeronly)
399 		return;
400 
401 	if (tf != NULL)
402 	{
403 		if (fflush(tf) != 0)
404 			tferror(tf, e);
405 		if (fsync(fileno(tf)) < 0 || fclose(tf) < 0)
406 		{
407 			tferror(tf, e);
408 			finis();
409 		}
410 	}
411 
412 	if (CollectErrorMessage != NULL && Errors <= 0)
413 	{
414 		if (CollectErrno != 0)
415 		{
416 			errno = CollectErrno;
417 			syserr(CollectErrorMessage, dfname);
418 			finis();
419 		}
420 		usrerr(CollectErrorMessage);
421 	}
422 	else if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
423 	{
424 		/* An EOF when running SMTP is an error */
425 		char *host;
426 		char *problem;
427 
428 		host = RealHostName;
429 		if (host == NULL)
430 			host = "localhost";
431 
432 		if (feof(fp))
433 			problem = "unexpected close";
434 		else if (ferror(fp))
435 			problem = "I/O error";
436 		else
437 			problem = "read timeout";
438 # ifdef LOG
439 		if (LogLevel > 0 && feof(fp))
440 			syslog(LOG_NOTICE,
441 			    "collect: %s on connection from %s, sender=%s: %s\n",
442 			    problem, host, e->e_from.q_paddr, errstring(errno));
443 # endif
444 		if (feof(fp))
445 			usrerr("451 collect: %s on connection from %s, from=%s",
446 				problem, host, e->e_from.q_paddr);
447 		else
448 			syserr("451 collect: %s on connection from %s, from=%s",
449 				problem, host, e->e_from.q_paddr);
450 
451 		/* don't return an error indication */
452 		e->e_to = NULL;
453 		e->e_flags &= ~EF_FATALERRS;
454 		e->e_flags |= EF_CLRQUEUE;
455 
456 		/* and don't try to deliver the partial message either */
457 		if (InChild)
458 			ExitStat = EX_QUIT;
459 		finis();
460 	}
461 
462 	/*
463 	**  Find out some information from the headers.
464 	**	Examples are who is the from person & the date.
465 	*/
466 
467 	eatheader(e, !requeueflag);
468 
469 	/* collect statistics */
470 	if (OpMode != MD_VERIFY)
471 	{
472 		extern void markstats();
473 
474 		markstats(e, (ADDRESS *) NULL);
475 	}
476 
477 	/*
478 	**  Add an Apparently-To: line if we have no recipient lines.
479 	*/
480 
481 	if (hvalue("to", e->e_header) == NULL &&
482 	    hvalue("cc", e->e_header) == NULL &&
483 	    hvalue("bcc", e->e_header) == NULL &&
484 	    hvalue("apparently-to", e->e_header) == NULL)
485 	{
486 		register ADDRESS *q;
487 		char *hdr = NULL;
488 		extern void addheader();
489 
490 		/* create an Apparently-To: field */
491 		/*    that or reject the message.... */
492 		switch (NoRecipientAction)
493 		{
494 		  case NRA_ADD_APPARENTLY_TO:
495 			hdr = "Apparently-To";
496 			break;
497 
498 		  case NRA_ADD_TO:
499 			hdr = "To";
500 			break;
501 
502 		  case NRA_ADD_BCC:
503 			addheader("Bcc", "", &e->e_header);
504 			break;
505 
506 		  case NRA_ADD_TO_UNDISCLOSED:
507 			addheader("To", "undisclosed-recipients:;", &e->e_header);
508 			break;
509 		}
510 
511 		if (hdr != NULL)
512 		{
513 			for (q = e->e_sendqueue; q != NULL; q = q->q_next)
514 			{
515 				if (q->q_alias != NULL)
516 					continue;
517 				if (tTd(30, 3))
518 					printf("Adding %s: %s\n",
519 						hdr, q->q_paddr);
520 				addheader(hdr, q->q_paddr, &e->e_header);
521 			}
522 		}
523 	}
524 
525 	/* check for message too large */
526 	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
527 	{
528 		usrerr("552 Message exceeds maximum fixed size (%ld)",
529 			MaxMessageSize);
530 	}
531 
532 	/* check for illegal 8-bit data */
533 	if (HasEightBits)
534 	{
535 		e->e_flags |= EF_HAS8BIT;
536 		if (!bitset(MM_PASS8BIT|MM_MIME8BIT, MimeMode))
537 			usrerr("554 Eight bit data not allowed");
538 	}
539 
540 	if ((e->e_dfp = fopen(dfname, "r")) == NULL)
541 	{
542 		/* we haven't acked receipt yet, so just chuck this */
543 		syserr("Cannot reopen %s", dfname);
544 		finis();
545 	}
546 }
547 
548 
549 static void
550 collecttimeout(timeout)
551 	time_t timeout;
552 {
553 	/* if no progress was made, die now */
554 	if (!CollectProgress)
555 		longjmp(CtxCollectTimeout, 1);
556 
557 	/* otherwise reset the timeout */
558 	CollectTimeout = setevent(timeout, collecttimeout, timeout);
559 	CollectProgress = FALSE;
560 }
561 /*
562 **  TFERROR -- signal error on writing the temporary file.
563 **
564 **	Parameters:
565 **		tf -- the file pointer for the temporary file.
566 **
567 **	Returns:
568 **		none.
569 **
570 **	Side Effects:
571 **		Gives an error message.
572 **		Arranges for following output to go elsewhere.
573 */
574 
575 void
576 tferror(tf, e)
577 	FILE *tf;
578 	register ENVELOPE *e;
579 {
580 	CollectErrno = errno;
581 	if (errno == ENOSPC)
582 	{
583 		struct stat st;
584 		long avail;
585 		long bsize;
586 
587 		e->e_flags |= EF_NO_BODY_RETN;
588 		if (fstat(fileno(tf), &st) < 0)
589 			st.st_size = 0;
590 		(void) freopen(queuename(e, 'd'), "w", tf);
591 		if (st.st_size <= 0)
592 			fprintf(tf, "\n*** Mail could not be accepted");
593 		else if (sizeof st.st_size > sizeof (long))
594 			fprintf(tf, "\n*** Mail of at least %qd bytes could not be accepted\n",
595 				st.st_size);
596 		else
597 			fprintf(tf, "\n*** Mail of at least %ld bytes could not be accepted\n",
598 				st.st_size);
599 		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
600 			MyHostName);
601 		avail = freespace(QueueDir, &bsize);
602 		if (avail > 0)
603 		{
604 			if (bsize > 1024)
605 				avail *= bsize / 1024;
606 			else if (bsize < 1024)
607 				avail /= 1024 / bsize;
608 			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
609 				avail);
610 		}
611 		CollectErrorMessage = "452 Out of disk space for temp file";
612 	}
613 	else
614 	{
615 		CollectErrorMessage = "cannot write message body to disk (%s)";
616 	}
617 	(void) freopen("/dev/null", "w", tf);
618 }
619 /*
620 **  EATFROM -- chew up a UNIX style from line and process
621 **
622 **	This does indeed make some assumptions about the format
623 **	of UNIX messages.
624 **
625 **	Parameters:
626 **		fm -- the from line.
627 **
628 **	Returns:
629 **		none.
630 **
631 **	Side Effects:
632 **		extracts what information it can from the header,
633 **		such as the date.
634 */
635 
636 # ifndef NOTUNIX
637 
638 char	*DowList[] =
639 {
640 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
641 };
642 
643 char	*MonthList[] =
644 {
645 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
646 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
647 	NULL
648 };
649 
650 void
651 eatfrom(fm, e)
652 	char *fm;
653 	register ENVELOPE *e;
654 {
655 	register char *p;
656 	register char **dt;
657 
658 	if (tTd(30, 2))
659 		printf("eatfrom(%s)\n", fm);
660 
661 	/* find the date part */
662 	p = fm;
663 	while (*p != '\0')
664 	{
665 		/* skip a word */
666 		while (*p != '\0' && *p != ' ')
667 			p++;
668 		while (*p == ' ')
669 			p++;
670 		if (!(isascii(*p) && isupper(*p)) ||
671 		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
672 			continue;
673 
674 		/* we have a possible date */
675 		for (dt = DowList; *dt != NULL; dt++)
676 			if (strncmp(*dt, p, 3) == 0)
677 				break;
678 		if (*dt == NULL)
679 			continue;
680 
681 		for (dt = MonthList; *dt != NULL; dt++)
682 			if (strncmp(*dt, &p[4], 3) == 0)
683 				break;
684 		if (*dt != NULL)
685 			break;
686 	}
687 
688 	if (*p != '\0')
689 	{
690 		char *q;
691 		extern char *arpadate();
692 
693 		/* we have found a date */
694 		q = xalloc(25);
695 		(void) strncpy(q, p, 25);
696 		q[24] = '\0';
697 		q = arpadate(q);
698 		define('a', newstr(q), e);
699 	}
700 }
701 
702 # endif /* NOTUNIX */
703