1 /*
2  * Copyright (c) 1983, 1995 Eric P. Allman
3  * Copyright (c) 1988, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 #ifndef lint
10 static char sccsid[] = "@(#)collect.c	8.44 (Berkeley) 06/10/95";
11 #endif /* not lint */
12 
13 # include <errno.h>
14 # include "sendmail.h"
15 
16 /*
17 **  COLLECT -- read & parse message header & make temp file.
18 **
19 **	Creates a temporary file name and copies the standard
20 **	input to that file.  Leading UNIX-style "From" lines are
21 **	stripped off (after important information is extracted).
22 **
23 **	Parameters:
24 **		fp -- file to read.
25 **		smtpmode -- if set, we are running SMTP: give an RFC821
26 **			style message to say we are ready to collect
27 **			input, and never ignore a single dot to mean
28 **			end of message.
29 **		requeueflag -- this message will be requeued later, so
30 **			don't do final processing on it.
31 **		hdrp -- the location to stash the header.
32 **		e -- the current envelope.
33 **
34 **	Returns:
35 **		none.
36 **
37 **	Side Effects:
38 **		Temp file is created and filled.
39 **		The from person may be set.
40 */
41 
42 static jmp_buf	CtxCollectTimeout;
43 static void	collecttimeout();
44 static bool	CollectProgress;
45 static EVENT	*CollectTimeout;
46 
47 /* values for input state machine */
48 #define IS_NORM		0	/* middle of line */
49 #define IS_BOL		1	/* beginning of line */
50 #define IS_DOT		2	/* read a dot at beginning of line */
51 #define IS_DOTCR	3	/* read ".\r" at beginning of line */
52 #define IS_CR		4	/* read a carriage return */
53 
54 /* values for message state machine */
55 #define MS_UFROM	0	/* reading Unix from line */
56 #define MS_HEADER	1	/* reading message header */
57 #define MS_BODY		2	/* reading message body */
58 
59 void
collect(fp,smtpmode,requeueflag,hdrp,e)60 collect(fp, smtpmode, requeueflag, hdrp, e)
61 	FILE *fp;
62 	bool smtpmode;
63 	bool requeueflag;
64 	HDR **hdrp;
65 	register ENVELOPE *e;
66 {
67 	register FILE *tf;
68 	bool ignrdot = smtpmode ? FALSE : IgnrDot;
69 	time_t dbto = smtpmode ? TimeOuts.to_datablock : 0;
70 	register char *bp;
71 	int c = '\0';
72 	bool inputerr = FALSE;
73 	bool headeronly;
74 	char *buf;
75 	int buflen;
76 	int istate;
77 	int mstate;
78 	char *pbp;
79 	char peekbuf[8];
80 	char dfname[20];
81 	char bufbuf[MAXLINE];
82 	extern bool isheader();
83 	extern void eatheader();
84 	extern void tferror();
85 
86 	headeronly = hdrp != NULL;
87 
88 	/*
89 	**  Create the temp file name and create the file.
90 	*/
91 
92 	if (!headeronly)
93 	{
94 		struct stat stbuf;
95 
96 		strcpy(dfname, queuename(e, 'd'));
97 		if ((tf = dfopen(dfname, O_WRONLY|O_CREAT|O_TRUNC, FileMode)) == NULL)
98 		{
99 			syserr("Cannot create %s", dfname);
100 			e->e_flags |= EF_NO_BODY_RETN;
101 			finis();
102 		}
103 		if (fstat(fileno(tf), &stbuf) < 0)
104 			e->e_dfino = -1;
105 		else
106 		{
107 			e->e_dfdev = stbuf.st_dev;
108 			e->e_dfino = stbuf.st_ino;
109 		}
110 		HasEightBits = FALSE;
111 		e->e_msgsize = 0;
112 		e->e_flags |= EF_HAS_DF;
113 	}
114 
115 	/*
116 	**  Tell ARPANET to go ahead.
117 	*/
118 
119 	if (smtpmode)
120 		message("354 Enter mail, end with \".\" on a line by itself");
121 
122 	if (tTd(30, 2))
123 		printf("collect\n");
124 
125 	/*
126 	**  Read the message.
127 	**
128 	**	This is done using two interleaved state machines.
129 	**	The input state machine is looking for things like
130 	**	hidden dots; the message state machine is handling
131 	**	the larger picture (e.g., header versus body).
132 	*/
133 
134 	buf = bp = bufbuf;
135 	buflen = sizeof bufbuf;
136 	pbp = peekbuf;
137 	istate = IS_BOL;
138 	mstate = SaveFrom ? MS_HEADER : MS_UFROM;
139 	CollectProgress = FALSE;
140 
141 	if (dbto != 0)
142 	{
143 		/* handle possible input timeout */
144 		if (setjmp(CtxCollectTimeout) != 0)
145 		{
146 #ifdef LOG
147 			syslog(LOG_NOTICE,
148 			    "timeout waiting for input from %s during message collect",
149 			    CurHostName ? CurHostName : "<local machine>");
150 #endif
151 			errno = 0;
152 			usrerr("451 timeout waiting for input during message collect");
153 			goto readerr;
154 		}
155 		CollectTimeout = setevent(dbto, collecttimeout, dbto);
156 	}
157 
158 	for (;;)
159 	{
160 		if (tTd(30, 35))
161 			printf("top, istate=%d, mstate=%d\n", istate, mstate);
162 		for (;;)
163 		{
164 			if (pbp > peekbuf)
165 				c = *--pbp;
166 			else
167 			{
168 				while (!feof(fp) && !ferror(fp))
169 				{
170 					errno = 0;
171 					c = getc(fp);
172 					if (errno != EINTR)
173 						break;
174 					clearerr(fp);
175 				}
176 				CollectProgress = TRUE;
177 				if (TrafficLogFile != NULL && !headeronly)
178 				{
179 					if (istate == IS_BOL)
180 						fprintf(TrafficLogFile, "%05d <<< ",
181 							getpid());
182 					if (c == EOF)
183 						fprintf(TrafficLogFile, "[EOF]\n");
184 					else
185 						putc(c, TrafficLogFile);
186 				}
187 				if (c == EOF)
188 					goto readerr;
189 				if (SevenBitInput)
190 					c &= 0x7f;
191 				else
192 					HasEightBits |= bitset(0x80, c);
193 				if (!headeronly)
194 					e->e_msgsize++;
195 			}
196 			if (tTd(30, 94))
197 				printf("istate=%d, c=%c (0x%x)\n",
198 					istate, c, c);
199 			switch (istate)
200 			{
201 			  case IS_BOL:
202 				if (c == '.')
203 				{
204 					istate = IS_DOT;
205 					continue;
206 				}
207 				break;
208 
209 			  case IS_DOT:
210 				if (c == '\n' && !ignrdot &&
211 				    !bitset(EF_NL_NOT_EOL, e->e_flags))
212 					goto readerr;
213 				else if (c == '\r' &&
214 					 !bitset(EF_CRLF_NOT_EOL, e->e_flags))
215 				{
216 					istate = IS_DOTCR;
217 					continue;
218 				}
219 				else if (c != '.' ||
220 					 (OpMode != MD_SMTP &&
221 					  OpMode != MD_DAEMON &&
222 					  OpMode != MD_ARPAFTP))
223 				{
224 					*pbp++ = c;
225 					c = '.';
226 				}
227 				break;
228 
229 			  case IS_DOTCR:
230 				if (c == '\n')
231 					goto readerr;
232 				else
233 				{
234 					/* push back the ".\rx" */
235 					*pbp++ = c;
236 					*pbp++ = '\r';
237 					c = '.';
238 				}
239 				break;
240 
241 			  case IS_CR:
242 				if (c == '\n')
243 					istate = IS_BOL;
244 				else
245 				{
246 					ungetc(c, fp);
247 					c = '\r';
248 					istate = IS_NORM;
249 				}
250 				goto bufferchar;
251 			}
252 
253 			if (c == '\r' && !bitset(EF_CRLF_NOT_EOL, e->e_flags))
254 			{
255 				istate = IS_CR;
256 				continue;
257 			}
258 			else if (c == '\n' && !bitset(EF_NL_NOT_EOL, e->e_flags))
259 				istate = IS_BOL;
260 			else
261 				istate = IS_NORM;
262 
263 bufferchar:
264 			if (mstate == MS_BODY)
265 			{
266 				/* just put the character out */
267 				if (MaxMessageSize <= 0 ||
268 				    e->e_msgsize <= MaxMessageSize)
269 					putc(c, tf);
270 				continue;
271 			}
272 
273 			/* header -- buffer up */
274 			if (bp >= &buf[buflen - 2])
275 			{
276 				char *obuf;
277 
278 				if (mstate != MS_HEADER)
279 					break;
280 
281 				/* out of space for header */
282 				obuf = buf;
283 				if (buflen < MEMCHUNKSIZE)
284 					buflen *= 2;
285 				else
286 					buflen += MEMCHUNKSIZE;
287 				buf = xalloc(buflen);
288 				bcopy(obuf, buf, bp - obuf);
289 				bp = &buf[bp - obuf];
290 				if (obuf != bufbuf)
291 					free(obuf);
292 			}
293 			if (c != '\0')
294 				*bp++ = c;
295 			if (istate == IS_BOL)
296 				break;
297 		}
298 		*bp = '\0';
299 
300 nextstate:
301 		if (tTd(30, 35))
302 			printf("nextstate, istate=%d, mstate=%d, line = \"%s\"\n",
303 				istate, mstate, buf);
304 		switch (mstate)
305 		{
306 			extern int chompheader();
307 
308 		  case MS_UFROM:
309 			mstate = MS_HEADER;
310 			if (strncmp(buf, "From ", 5) == 0)
311 			{
312 				extern void eatfrom();
313 
314 				bp = buf;
315 				eatfrom(buf, e);
316 				continue;
317 			}
318 			/* fall through */
319 
320 		  case MS_HEADER:
321 			if (!isheader(buf))
322 			{
323 				mstate = MS_BODY;
324 				goto nextstate;
325 			}
326 
327 			/* check for possible continuation line */
328 			do
329 			{
330 				clearerr(fp);
331 				errno = 0;
332 				c = getc(fp);
333 			} while (errno == EINTR);
334 			if (c != EOF)
335 				ungetc(c, fp);
336 			if (c == ' ' || c == '\t')
337 			{
338 				/* yep -- defer this */
339 				continue;
340 			}
341 
342 			/* trim off trailing CRLF or NL */
343 			if (*--bp != '\n' || *--bp != '\r')
344 				bp++;
345 			*bp = '\0';
346 			if (bitset(H_EOH, chompheader(buf, FALSE, hdrp, e)))
347 				mstate = MS_BODY;
348 			break;
349 
350 		  case MS_BODY:
351 			if (tTd(30, 1))
352 				printf("EOH\n");
353 			if (headeronly)
354 				goto readerr;
355 			bp = buf;
356 
357 			/* toss blank line */
358 			if ((!bitset(EF_CRLF_NOT_EOL, e->e_flags) &&
359 				bp[0] == '\r' && bp[1] == '\n') ||
360 			    (!bitset(EF_NL_NOT_EOL, e->e_flags) &&
361 				bp[0] == '\n'))
362 			{
363 				break;
364 			}
365 
366 			/* if not a blank separator, write it out */
367 			if (MaxMessageSize <= 0 ||
368 			    e->e_msgsize <= MaxMessageSize)
369 			{
370 				while (*bp != '\0')
371 					putc(*bp++, tf);
372 			}
373 			break;
374 		}
375 		bp = buf;
376 	}
377 
378 readerr:
379 	if ((feof(fp) && smtpmode) || ferror(fp))
380 	{
381 		const char *errmsg = errstring(errno);
382 
383 		if (tTd(30, 1))
384 			printf("collect: premature EOM: %s\n", errmsg);
385 #ifdef LOG
386 		if (LogLevel >= 2)
387 			syslog(LOG_WARNING, "collect: premature EOM: %s", errmsg);
388 #endif
389 		inputerr = TRUE;
390 	}
391 
392 	/* reset global timer */
393 	clrevent(CollectTimeout);
394 
395 	if (headeronly)
396 		return;
397 
398 	if (tf != NULL)
399 	{
400 		if (fflush(tf) != 0)
401 			tferror(tf, e);
402 		if (fsync(fileno(tf)) < 0 || fclose(tf) < 0)
403 		{
404 			tferror(tf, e);
405 			finis();
406 		}
407 	}
408 
409 	/* An EOF when running SMTP is an error */
410 	if (inputerr && (OpMode == MD_SMTP || OpMode == MD_DAEMON))
411 	{
412 		char *host;
413 		char *problem;
414 
415 		host = RealHostName;
416 		if (host == NULL)
417 			host = "localhost";
418 
419 		if (feof(fp))
420 			problem = "unexpected close";
421 		else if (ferror(fp))
422 			problem = "I/O error";
423 		else
424 			problem = "read timeout";
425 # ifdef LOG
426 		if (LogLevel > 0 && feof(fp))
427 			syslog(LOG_NOTICE,
428 			    "collect: %s on connection from %s, sender=%s: %s\n",
429 			    problem, host, e->e_from.q_paddr, errstring(errno));
430 # endif
431 		if (feof(fp))
432 			usrerr("451 collect: %s on connection from %s, from=%s",
433 				problem, host, e->e_from.q_paddr);
434 		else
435 			syserr("451 collect: %s on connection from %s, from=%s",
436 				problem, host, e->e_from.q_paddr);
437 
438 		/* don't return an error indication */
439 		e->e_to = NULL;
440 		e->e_flags &= ~EF_FATALERRS;
441 		e->e_flags |= EF_CLRQUEUE;
442 
443 		/* and don't try to deliver the partial message either */
444 		if (InChild)
445 			ExitStat = EX_QUIT;
446 		finis();
447 	}
448 
449 	/*
450 	**  Find out some information from the headers.
451 	**	Examples are who is the from person & the date.
452 	*/
453 
454 	eatheader(e, !requeueflag);
455 
456 	if (GrabTo && e->e_sendqueue == NULL)
457 		usrerr("No recipient addresses found in header");
458 
459 	/* collect statistics */
460 	if (OpMode != MD_VERIFY)
461 	{
462 		extern void markstats();
463 
464 		markstats(e, (ADDRESS *) NULL);
465 	}
466 
467 	/*
468 	**  Add an Apparently-To: line if we have no recipient lines.
469 	*/
470 
471 	if (hvalue("to", e->e_header) == NULL &&
472 	    hvalue("cc", e->e_header) == NULL &&
473 	    hvalue("bcc", e->e_header) == NULL &&
474 	    hvalue("apparently-to", e->e_header) == NULL)
475 	{
476 		register ADDRESS *q;
477 		char *hdr = NULL;
478 		extern void addheader();
479 
480 		/* create an Apparently-To: field */
481 		/*    that or reject the message.... */
482 		switch (NoRecipientAction)
483 		{
484 		  case NRA_ADD_APPARENTLY_TO:
485 			hdr = "Apparently-To";
486 			break;
487 
488 		  case NRA_ADD_TO:
489 			hdr = "To";
490 			break;
491 
492 		  case NRA_ADD_BCC:
493 			addheader("Bcc", "", &e->e_header);
494 			break;
495 
496 		  case NRA_ADD_TO_UNDISCLOSED:
497 			addheader("To", "undisclosed-recipients:;", &e->e_header);
498 			break;
499 		}
500 
501 		if (hdr != NULL)
502 		{
503 			for (q = e->e_sendqueue; q != NULL; q = q->q_next)
504 			{
505 				if (q->q_alias != NULL)
506 					continue;
507 				if (tTd(30, 3))
508 					printf("Adding %s: %s\n",
509 						hdr, q->q_paddr);
510 				addheader(hdr, q->q_paddr, &e->e_header);
511 			}
512 		}
513 	}
514 
515 	/* check for message too large */
516 	if (MaxMessageSize > 0 && e->e_msgsize > MaxMessageSize)
517 	{
518 		e->e_status = "5.2.3";
519 		usrerr("552 Message exceeds maximum fixed size (%ld)",
520 			MaxMessageSize);
521 # ifdef LOG
522 		if (LogLevel > 6)
523 			syslog(LOG_NOTICE, "%s: message size (%ld) exceeds maximum (%ld)",
524 				e->e_id, e->e_msgsize, MaxMessageSize);
525 # endif
526 	}
527 
528 	/* check for illegal 8-bit data */
529 	if (HasEightBits)
530 	{
531 		e->e_flags |= EF_HAS8BIT;
532 		if (!bitset(MM_PASS8BIT|MM_MIME8BIT, MimeMode))
533 		{
534 			e->e_status = "5.6.1";
535 			usrerr("554 Eight bit data not allowed");
536 		}
537 	}
538 	else
539 	{
540 		/* if it claimed to be 8 bits, well, it lied.... */
541 		if (e->e_bodytype != NULL &&
542 		    strcasecmp(e->e_bodytype, "8BITMIME") == 0)
543 			e->e_bodytype = "7BIT";
544 	}
545 
546 	if ((e->e_dfp = fopen(dfname, "r")) == NULL)
547 	{
548 		/* we haven't acked receipt yet, so just chuck this */
549 		syserr("Cannot reopen %s", dfname);
550 		finis();
551 	}
552 }
553 
554 
555 static void
collecttimeout(timeout)556 collecttimeout(timeout)
557 	time_t timeout;
558 {
559 	/* if no progress was made, die now */
560 	if (!CollectProgress)
561 		longjmp(CtxCollectTimeout, 1);
562 
563 	/* otherwise reset the timeout */
564 	CollectTimeout = setevent(timeout, collecttimeout, timeout);
565 	CollectProgress = FALSE;
566 }
567 /*
568 **  TFERROR -- signal error on writing the temporary file.
569 **
570 **	Parameters:
571 **		tf -- the file pointer for the temporary file.
572 **		e -- the current envelope.
573 **
574 **	Returns:
575 **		none.
576 **
577 **	Side Effects:
578 **		Gives an error message.
579 **		Arranges for following output to go elsewhere.
580 */
581 
582 void
tferror(tf,e)583 tferror(tf, e)
584 	FILE *tf;
585 	register ENVELOPE *e;
586 {
587 	if (errno == ENOSPC)
588 	{
589 		struct stat st;
590 		long avail;
591 		long bsize;
592 
593 		e->e_flags |= EF_NO_BODY_RETN;
594 		if (fstat(fileno(tf), &st) < 0)
595 			st.st_size = 0;
596 		(void) freopen(queuename(e, 'd'), "w", tf);
597 		if (st.st_size <= 0)
598 			fprintf(tf, "\n*** Mail could not be accepted");
599 		else if (sizeof st.st_size > sizeof (long))
600 			fprintf(tf, "\n*** Mail of at least %qd bytes could not be accepted\n",
601 				st.st_size);
602 		else
603 			fprintf(tf, "\n*** Mail of at least %ld bytes could not be accepted\n",
604 				st.st_size);
605 		fprintf(tf, "*** at %s due to lack of disk space for temp file.\n",
606 			MyHostName);
607 		avail = freediskspace(QueueDir, &bsize);
608 		if (avail > 0)
609 		{
610 			if (bsize > 1024)
611 				avail *= bsize / 1024;
612 			else if (bsize < 1024)
613 				avail /= 1024 / bsize;
614 			fprintf(tf, "*** Currently, %ld kilobytes are available for mail temp files.\n",
615 				avail);
616 		}
617 		e->e_status = "4.3.1";
618 		usrerr("452 Out of disk space for temp file");
619 	}
620 	else
621 		syserr("collect: Cannot write tf%s", e->e_id);
622 	(void) freopen("/dev/null", "w", tf);
623 }
624 /*
625 **  EATFROM -- chew up a UNIX style from line and process
626 **
627 **	This does indeed make some assumptions about the format
628 **	of UNIX messages.
629 **
630 **	Parameters:
631 **		fm -- the from line.
632 **
633 **	Returns:
634 **		none.
635 **
636 **	Side Effects:
637 **		extracts what information it can from the header,
638 **		such as the date.
639 */
640 
641 # ifndef NOTUNIX
642 
643 char	*DowList[] =
644 {
645 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
646 };
647 
648 char	*MonthList[] =
649 {
650 	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
651 	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
652 	NULL
653 };
654 
655 void
eatfrom(fm,e)656 eatfrom(fm, e)
657 	char *fm;
658 	register ENVELOPE *e;
659 {
660 	register char *p;
661 	register char **dt;
662 
663 	if (tTd(30, 2))
664 		printf("eatfrom(%s)\n", fm);
665 
666 	/* find the date part */
667 	p = fm;
668 	while (*p != '\0')
669 	{
670 		/* skip a word */
671 		while (*p != '\0' && *p != ' ')
672 			p++;
673 		while (*p == ' ')
674 			p++;
675 		if (!(isascii(*p) && isupper(*p)) ||
676 		    p[3] != ' ' || p[13] != ':' || p[16] != ':')
677 			continue;
678 
679 		/* we have a possible date */
680 		for (dt = DowList; *dt != NULL; dt++)
681 			if (strncmp(*dt, p, 3) == 0)
682 				break;
683 		if (*dt == NULL)
684 			continue;
685 
686 		for (dt = MonthList; *dt != NULL; dt++)
687 			if (strncmp(*dt, &p[4], 3) == 0)
688 				break;
689 		if (*dt != NULL)
690 			break;
691 	}
692 
693 	if (*p != '\0')
694 	{
695 		char *q;
696 		extern char *arpadate();
697 
698 		/* we have found a date */
699 		q = xalloc(25);
700 		(void) strncpy(q, p, 25);
701 		q[24] = '\0';
702 		q = arpadate(q);
703 		define('a', newstr(q), e);
704 	}
705 }
706 
707 # endif /* NOTUNIX */
708