xref: /freebsd/contrib/sendmail/src/mime.c (revision 605302a5)
1 /*
2  * Copyright (c) 1998-2002 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 SM_RCSID("@(#)$Id: mime.c,v 8.129 2002/03/13 07:28:05 gshapiro Exp $")
18 
19 /*
20 **  MIME support.
21 **
22 **	I am indebted to John Beck of Hewlett-Packard, who contributed
23 **	his code to me for inclusion.  As it turns out, I did not use
24 **	his code since he used a "minimum change" approach that used
25 **	several temp files, and I wanted a "minimum impact" approach
26 **	that would avoid copying.  However, looking over his code
27 **	helped me cement my understanding of the problem.
28 **
29 **	I also looked at, but did not directly use, Nathaniel
30 **	Borenstein's "code.c" module.  Again, it functioned as
31 **	a file-to-file translator, which did not fit within my
32 **	design bounds, but it was a useful base for understanding
33 **	the problem.
34 */
35 
36 #if MIME8TO7
37 static int	isboundary __P((char *, char **));
38 static int	mimeboundary __P((char *, char **));
39 static int	mime_getchar __P((SM_FILE_T *, char **, int *));
40 static int	mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
41 
42 /* character set for hex and base64 encoding */
43 static char	Base16Code[] =	"0123456789ABCDEF";
44 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
45 
46 /* types of MIME boundaries */
47 # define MBT_SYNTAX	0	/* syntax error */
48 # define MBT_NOTSEP	1	/* not a boundary */
49 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
50 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
51 
52 static char	*MimeBoundaryNames[] =
53 {
54 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
55 };
56 
57 static bool	MapNLtoCRLF;
58 
59 /*
60 **  MIME8TO7 -- output 8 bit body in 7 bit format
61 **
62 **	The header has already been output -- this has to do the
63 **	8 to 7 bit conversion.  It would be easy if we didn't have
64 **	to deal with nested formats (multipart/xxx and message/rfc822).
65 **
66 **	We won't be called if we don't have to do a conversion, and
67 **	appropriate MIME-Version: and Content-Type: fields have been
68 **	output.  Any Content-Transfer-Encoding: field has not been
69 **	output, and we can add it here.
70 **
71 **	Parameters:
72 **		mci -- mailer connection information.
73 **		header -- the header for this body part.
74 **		e -- envelope.
75 **		boundaries -- the currently pending message boundaries.
76 **			NULL if we are processing the outer portion.
77 **		flags -- to tweak processing.
78 **
79 **	Returns:
80 **		An indicator of what terminated the message part:
81 **		  MBT_FINAL -- the final boundary
82 **		  MBT_INTERMED -- an intermediate boundary
83 **		  MBT_NOTSEP -- an end of file
84 */
85 
86 struct args
87 {
88 	char	*a_field;	/* name of field */
89 	char	*a_value;	/* value of that field */
90 };
91 
92 int
93 mime8to7(mci, header, e, boundaries, flags)
94 	register MCI *mci;
95 	HDR *header;
96 	register ENVELOPE *e;
97 	char **boundaries;
98 	int flags;
99 {
100 	register char *p;
101 	int linelen;
102 	int bt;
103 	off_t offset;
104 	size_t sectionsize, sectionhighbits;
105 	int i;
106 	char *type;
107 	char *subtype;
108 	char *cte;
109 	char **pvp;
110 	int argc = 0;
111 	char *bp;
112 	bool use_qp = false;
113 	struct args argv[MAXMIMEARGS];
114 	char bbuf[128];
115 	char buf[MAXLINE];
116 	char pvpbuf[MAXLINE];
117 	extern unsigned char MimeTokenTab[256];
118 
119 	if (tTd(43, 1))
120 	{
121 		sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
122 		if (boundaries[0] == NULL)
123 			sm_dprintf(" <none>");
124 		else
125 		{
126 			for (i = 0; boundaries[i] != NULL; i++)
127 				sm_dprintf(" %s", boundaries[i]);
128 		}
129 		sm_dprintf("\n");
130 	}
131 	MapNLtoCRLF = true;
132 	p = hvalue("Content-Transfer-Encoding", header);
133 	if (p == NULL ||
134 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
135 			   MimeTokenTab)) == NULL ||
136 	    pvp[0] == NULL)
137 	{
138 		cte = NULL;
139 	}
140 	else
141 	{
142 		cataddr(pvp, NULL, buf, sizeof buf, '\0');
143 		cte = sm_rpool_strdup_x(e->e_rpool, buf);
144 	}
145 
146 	type = subtype = NULL;
147 	p = hvalue("Content-Type", header);
148 	if (p == NULL)
149 	{
150 		if (bitset(M87F_DIGEST, flags))
151 			p = "message/rfc822";
152 		else
153 			p = "text/plain";
154 	}
155 	if (p != NULL &&
156 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
157 			   MimeTokenTab)) != NULL &&
158 	    pvp[0] != NULL)
159 	{
160 		if (tTd(43, 40))
161 		{
162 			for (i = 0; pvp[i] != NULL; i++)
163 				sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
164 		}
165 		type = *pvp++;
166 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
167 		    *++pvp != NULL)
168 		{
169 			subtype = *pvp++;
170 		}
171 
172 		/* break out parameters */
173 		while (*pvp != NULL && argc < MAXMIMEARGS)
174 		{
175 			/* skip to semicolon separator */
176 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
177 				pvp++;
178 			if (*pvp++ == NULL || *pvp == NULL)
179 				break;
180 
181 			/* complain about empty values */
182 			if (strcmp(*pvp, ";") == 0)
183 			{
184 				usrerr("mime8to7: Empty parameter in Content-Type header");
185 
186 				/* avoid bounce loops */
187 				e->e_flags |= EF_DONT_MIME;
188 				continue;
189 			}
190 
191 			/* extract field name */
192 			argv[argc].a_field = *pvp++;
193 
194 			/* see if there is a value */
195 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
196 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
197 			{
198 				argv[argc].a_value = *pvp;
199 				argc++;
200 			}
201 		}
202 	}
203 
204 	/* check for disaster cases */
205 	if (type == NULL)
206 		type = "-none-";
207 	if (subtype == NULL)
208 		subtype = "-none-";
209 
210 	/* don't propogate some flags more than one level into the message */
211 	flags &= ~M87F_DIGEST;
212 
213 	/*
214 	**  Check for cases that can not be encoded.
215 	**
216 	**	For example, you can't encode certain kinds of types
217 	**	or already-encoded messages.  If we find this case,
218 	**	just copy it through.
219 	*/
220 
221 	(void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype);
222 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
223 		flags |= M87F_NO8BIT;
224 
225 # ifdef USE_B_CLASS
226 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
227 		MapNLtoCRLF = false;
228 # endif /* USE_B_CLASS */
229 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
230 		use_qp = true;
231 
232 	/*
233 	**  Multipart requires special processing.
234 	**
235 	**	Do a recursive descent into the message.
236 	*/
237 
238 	if (sm_strcasecmp(type, "multipart") == 0 &&
239 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)))
240 	{
241 
242 		if (sm_strcasecmp(subtype, "digest") == 0)
243 			flags |= M87F_DIGEST;
244 
245 		for (i = 0; i < argc; i++)
246 		{
247 			if (sm_strcasecmp(argv[i].a_field, "boundary") == 0)
248 				break;
249 		}
250 		if (i >= argc || argv[i].a_value == NULL)
251 		{
252 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
253 				i >= argc ? "missing" : "bogus", p);
254 			p = "---";
255 
256 			/* avoid bounce loops */
257 			e->e_flags |= EF_DONT_MIME;
258 		}
259 		else
260 		{
261 			p = argv[i].a_value;
262 			stripquotes(p);
263 		}
264 		if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf)
265 		{
266 			usrerr("mime8to7: multipart boundary \"%s\" too long",
267 				p);
268 
269 			/* avoid bounce loops */
270 			e->e_flags |= EF_DONT_MIME;
271 		}
272 
273 		if (tTd(43, 1))
274 			sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
275 				bbuf);
276 		for (i = 0; i < MAXMIMENESTING; i++)
277 		{
278 			if (boundaries[i] == NULL)
279 				break;
280 		}
281 		if (i >= MAXMIMENESTING)
282 		{
283 			usrerr("mime8to7: multipart nesting boundary too deep");
284 
285 			/* avoid bounce loops */
286 			e->e_flags |= EF_DONT_MIME;
287 		}
288 		else
289 		{
290 			boundaries[i] = bbuf;
291 			boundaries[i + 1] = NULL;
292 		}
293 		mci->mci_flags |= MCIF_INMIME;
294 
295 		/* skip the early "comment" prologue */
296 		putline("", mci);
297 		mci->mci_flags &= ~MCIF_INHEADER;
298 		bt = MBT_FINAL;
299 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
300 			!= NULL)
301 		{
302 			bt = mimeboundary(buf, boundaries);
303 			if (bt != MBT_NOTSEP)
304 				break;
305 			putxline(buf, strlen(buf), mci,
306 				 PXLF_MAPFROM|PXLF_STRIP8BIT);
307 			if (tTd(43, 99))
308 				sm_dprintf("  ...%s", buf);
309 		}
310 		if (sm_io_eof(e->e_dfp))
311 			bt = MBT_FINAL;
312 		while (bt != MBT_FINAL)
313 		{
314 			auto HDR *hdr = NULL;
315 
316 			(void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf);
317 			putline(buf, mci);
318 			if (tTd(43, 35))
319 				sm_dprintf("  ...%s\n", buf);
320 			collect(e->e_dfp, false, &hdr, e);
321 			if (tTd(43, 101))
322 				putline("+++after collect", mci);
323 			putheader(mci, hdr, e, flags);
324 			if (tTd(43, 101))
325 				putline("+++after putheader", mci);
326 			bt = mime8to7(mci, hdr, e, boundaries, flags);
327 		}
328 		(void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--");
329 		putline(buf, mci);
330 		if (tTd(43, 35))
331 			sm_dprintf("  ...%s\n", buf);
332 		boundaries[i] = NULL;
333 		mci->mci_flags &= ~MCIF_INMIME;
334 
335 		/* skip the late "comment" epilogue */
336 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
337 			!= NULL)
338 		{
339 			bt = mimeboundary(buf, boundaries);
340 			if (bt != MBT_NOTSEP)
341 				break;
342 			putxline(buf, strlen(buf), mci,
343 				 PXLF_MAPFROM|PXLF_STRIP8BIT);
344 			if (tTd(43, 99))
345 				sm_dprintf("  ...%s", buf);
346 		}
347 		if (sm_io_eof(e->e_dfp))
348 			bt = MBT_FINAL;
349 		if (tTd(43, 3))
350 			sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
351 				MimeBoundaryNames[bt]);
352 		return bt;
353 	}
354 
355 	/*
356 	**  Message/xxx types -- recurse exactly once.
357 	**
358 	**	Class 's' is predefined to have "rfc822" only.
359 	*/
360 
361 	if (sm_strcasecmp(type, "message") == 0)
362 	{
363 		if (!wordinclass(subtype, 's'))
364 		{
365 			flags |= M87F_NO8BIT;
366 		}
367 		else
368 		{
369 			auto HDR *hdr = NULL;
370 
371 			putline("", mci);
372 
373 			mci->mci_flags |= MCIF_INMIME;
374 			collect(e->e_dfp, false, &hdr, e);
375 			if (tTd(43, 101))
376 				putline("+++after collect", mci);
377 			putheader(mci, hdr, e, flags);
378 			if (tTd(43, 101))
379 				putline("+++after putheader", mci);
380 			if (hvalue("MIME-Version", hdr) == NULL)
381 				putline("MIME-Version: 1.0", mci);
382 			bt = mime8to7(mci, hdr, e, boundaries, flags);
383 			mci->mci_flags &= ~MCIF_INMIME;
384 			return bt;
385 		}
386 	}
387 
388 	/*
389 	**  Non-compound body type
390 	**
391 	**	Compute the ratio of seven to eight bit characters;
392 	**	use that as a heuristic to decide how to do the
393 	**	encoding.
394 	*/
395 
396 	sectionsize = sectionhighbits = 0;
397 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
398 	{
399 		/* remember where we were */
400 		offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
401 		if (offset == -1)
402 			syserr("mime8to7: cannot sm_io_tell on %cf%s",
403 			       DATAFL_LETTER, e->e_id);
404 
405 		/* do a scan of this body type to count character types */
406 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
407 			!= NULL)
408 		{
409 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
410 				break;
411 			for (p = buf; *p != '\0'; p++)
412 			{
413 				/* count bytes with the high bit set */
414 				sectionsize++;
415 				if (bitset(0200, *p))
416 					sectionhighbits++;
417 			}
418 
419 			/*
420 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
421 			**  assume base64.  This heuristic avoids double-reading
422 			**  large graphics or video files.
423 			*/
424 
425 			if (sectionsize >= 4096 &&
426 			    sectionhighbits > sectionsize / 4)
427 				break;
428 		}
429 
430 		/* return to the original offset for processing */
431 		/* XXX use relative seeks to handle >31 bit file sizes? */
432 		if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
433 			syserr("mime8to7: cannot sm_io_fseek on %cf%s",
434 			       DATAFL_LETTER, e->e_id);
435 		else
436 			sm_io_clearerr(e->e_dfp);
437 	}
438 
439 	/*
440 	**  Heuristically determine encoding method.
441 	**	If more than 1/8 of the total characters have the
442 	**	eighth bit set, use base64; else use quoted-printable.
443 	**	However, only encode binary encoded data as base64,
444 	**	since otherwise the NL=>CRLF mapping will be a problem.
445 	*/
446 
447 	if (tTd(43, 8))
448 	{
449 		sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
450 			(long) sectionhighbits, (long) sectionsize,
451 			cte == NULL ? "[none]" : cte,
452 			type == NULL ? "[none]" : type,
453 			subtype == NULL ? "[none]" : subtype);
454 	}
455 	if (cte != NULL && sm_strcasecmp(cte, "binary") == 0)
456 		sectionsize = sectionhighbits;
457 	linelen = 0;
458 	bp = buf;
459 	if (sectionhighbits == 0)
460 	{
461 		/* no encoding necessary */
462 		if (cte != NULL &&
463 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
464 			   mci->mci_flags) &&
465 		    !bitset(M87F_NO8TO7, flags))
466 		{
467 			/*
468 			**  Skip _unless_ in MIME mode and potentially
469 			**  converting from 8 bit to 7 bit MIME.  See
470 			**  putheader() for the counterpart where the
471 			**  CTE header is skipped in the opposite
472 			**  situation.
473 			*/
474 
475 			(void) sm_snprintf(buf, sizeof buf,
476 				"Content-Transfer-Encoding: %.200s", cte);
477 			putline(buf, mci);
478 			if (tTd(43, 36))
479 				sm_dprintf("  ...%s\n", buf);
480 		}
481 		putline("", mci);
482 		mci->mci_flags &= ~MCIF_INHEADER;
483 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
484 			!= NULL)
485 		{
486 			bt = mimeboundary(buf, boundaries);
487 			if (bt != MBT_NOTSEP)
488 				break;
489 			putline(buf, mci);
490 		}
491 		if (sm_io_eof(e->e_dfp))
492 			bt = MBT_FINAL;
493 	}
494 	else if (!MapNLtoCRLF ||
495 		 (sectionsize / 8 < sectionhighbits && !use_qp))
496 	{
497 		/* use base64 encoding */
498 		int c1, c2;
499 
500 		if (tTd(43, 36))
501 			sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
502 		putline("Content-Transfer-Encoding: base64", mci);
503 		(void) sm_snprintf(buf, sizeof buf,
504 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
505 			MyHostName, e->e_id);
506 		putline(buf, mci);
507 		putline("", mci);
508 		mci->mci_flags &= ~MCIF_INHEADER;
509 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
510 			SM_IO_EOF)
511 		{
512 			if (linelen > 71)
513 			{
514 				*bp = '\0';
515 				putline(buf, mci);
516 				linelen = 0;
517 				bp = buf;
518 			}
519 			linelen += 4;
520 			*bp++ = Base64Code[(c1 >> 2)];
521 			c1 = (c1 & 0x03) << 4;
522 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
523 			if (c2 == SM_IO_EOF)
524 			{
525 				*bp++ = Base64Code[c1];
526 				*bp++ = '=';
527 				*bp++ = '=';
528 				break;
529 			}
530 			c1 |= (c2 >> 4) & 0x0f;
531 			*bp++ = Base64Code[c1];
532 			c1 = (c2 & 0x0f) << 2;
533 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
534 			if (c2 == SM_IO_EOF)
535 			{
536 				*bp++ = Base64Code[c1];
537 				*bp++ = '=';
538 				break;
539 			}
540 			c1 |= (c2 >> 6) & 0x03;
541 			*bp++ = Base64Code[c1];
542 			*bp++ = Base64Code[c2 & 0x3f];
543 		}
544 		*bp = '\0';
545 		putline(buf, mci);
546 	}
547 	else
548 	{
549 		/* use quoted-printable encoding */
550 		int c1, c2;
551 		int fromstate;
552 		BITMAP256 badchars;
553 
554 		/* set up map of characters that must be mapped */
555 		clrbitmap(badchars);
556 		for (c1 = 0x00; c1 < 0x20; c1++)
557 			setbitn(c1, badchars);
558 		clrbitn('\t', badchars);
559 		for (c1 = 0x7f; c1 < 0x100; c1++)
560 			setbitn(c1, badchars);
561 		setbitn('=', badchars);
562 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
563 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
564 				setbitn(*p, badchars);
565 
566 		if (tTd(43, 36))
567 			sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
568 		putline("Content-Transfer-Encoding: quoted-printable", mci);
569 		(void) sm_snprintf(buf, sizeof buf,
570 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
571 			MyHostName, e->e_id);
572 		putline(buf, mci);
573 		putline("", mci);
574 		mci->mci_flags &= ~MCIF_INHEADER;
575 		fromstate = 0;
576 		c2 = '\n';
577 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
578 			SM_IO_EOF)
579 		{
580 			if (c1 == '\n')
581 			{
582 				if (c2 == ' ' || c2 == '\t')
583 				{
584 					*bp++ = '=';
585 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
586 					*bp++ = Base16Code[c2 & 0x0f];
587 				}
588 				if (buf[0] == '.' && bp == &buf[1])
589 				{
590 					buf[0] = '=';
591 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
592 					*bp++ = Base16Code['.' & 0x0f];
593 				}
594 				*bp = '\0';
595 				putline(buf, mci);
596 				linelen = fromstate = 0;
597 				bp = buf;
598 				c2 = c1;
599 				continue;
600 			}
601 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
602 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
603 			{
604 				*bp++ = '=';
605 				*bp++ = '2';
606 				*bp++ = '0';
607 				linelen += 3;
608 			}
609 			else if (c2 == ' ' || c2 == '\t')
610 			{
611 				*bp++ = c2;
612 				linelen++;
613 			}
614 			if (linelen > 72 &&
615 			    (linelen > 75 || c1 != '.' ||
616 			     (linelen > 73 && c2 == '.')))
617 			{
618 				if (linelen > 73 && c2 == '.')
619 					bp--;
620 				else
621 					c2 = '\n';
622 				*bp++ = '=';
623 				*bp = '\0';
624 				putline(buf, mci);
625 				linelen = fromstate = 0;
626 				bp = buf;
627 				if (c2 == '.')
628 				{
629 					*bp++ = '.';
630 					linelen++;
631 				}
632 			}
633 			if (bitnset(bitidx(c1), badchars))
634 			{
635 				*bp++ = '=';
636 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
637 				*bp++ = Base16Code[c1 & 0x0f];
638 				linelen += 3;
639 			}
640 			else if (c1 != ' ' && c1 != '\t')
641 			{
642 				if (linelen < 4 && c1 == "From"[linelen])
643 					fromstate++;
644 				*bp++ = c1;
645 				linelen++;
646 			}
647 			c2 = c1;
648 		}
649 
650 		/* output any saved character */
651 		if (c2 == ' ' || c2 == '\t')
652 		{
653 			*bp++ = '=';
654 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
655 			*bp++ = Base16Code[c2 & 0x0f];
656 			linelen += 3;
657 		}
658 
659 		if (linelen > 0 || boundaries[0] != NULL)
660 		{
661 			*bp = '\0';
662 			putline(buf, mci);
663 		}
664 
665 	}
666 	if (tTd(43, 3))
667 		sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
668 	return bt;
669 }
670 /*
671 **  MIME_GETCHAR -- get a character for MIME processing
672 **
673 **	Treats boundaries as SM_IO_EOF.
674 **
675 **	Parameters:
676 **		fp -- the input file.
677 **		boundaries -- the current MIME boundaries.
678 **		btp -- if the return value is SM_IO_EOF, *btp is set to
679 **			the type of the boundary.
680 **
681 **	Returns:
682 **		The next character in the input stream.
683 */
684 
685 static int
686 mime_getchar(fp, boundaries, btp)
687 	register SM_FILE_T *fp;
688 	char **boundaries;
689 	int *btp;
690 {
691 	int c;
692 	static unsigned char *bp = NULL;
693 	static int buflen = 0;
694 	static bool atbol = true;	/* at beginning of line */
695 	static int bt = MBT_SYNTAX;	/* boundary type of next SM_IO_EOF */
696 	static unsigned char buf[128];	/* need not be a full line */
697 	int start = 0;			/* indicates position of - in buffer */
698 
699 	if (buflen == 1 && *bp == '\n')
700 	{
701 		/* last \n in buffer may be part of next MIME boundary */
702 		c = *bp;
703 	}
704 	else if (buflen > 0)
705 	{
706 		buflen--;
707 		return *bp++;
708 	}
709 	else
710 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
711 	bp = buf;
712 	buflen = 0;
713 	if (c == '\n')
714 	{
715 		/* might be part of a MIME boundary */
716 		*bp++ = c;
717 		atbol = true;
718 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
719 		if (c == '\n')
720 		{
721 			(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
722 			return c;
723 		}
724 		start = 1;
725 	}
726 	if (c != SM_IO_EOF)
727 		*bp++ = c;
728 	else
729 		bt = MBT_FINAL;
730 	if (atbol && c == '-')
731 	{
732 		/* check for a message boundary */
733 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
734 		if (c != '-')
735 		{
736 			if (c != SM_IO_EOF)
737 				*bp++ = c;
738 			else
739 				bt = MBT_FINAL;
740 			buflen = bp - buf - 1;
741 			bp = buf;
742 			return *bp++;
743 		}
744 
745 		/* got "--", now check for rest of separator */
746 		*bp++ = '-';
747 		while (bp < &buf[sizeof buf - 2] &&
748 		       (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
749 		       c != '\n')
750 		{
751 			*bp++ = c;
752 		}
753 		*bp = '\0';	/* XXX simply cut off? */
754 		bt = mimeboundary((char *) &buf[start], boundaries);
755 		switch (bt)
756 		{
757 		  case MBT_FINAL:
758 		  case MBT_INTERMED:
759 			/* we have a message boundary */
760 			buflen = 0;
761 			*btp = bt;
762 			return SM_IO_EOF;
763 		}
764 
765 		atbol = c == '\n';
766 		if (c != SM_IO_EOF)
767 			*bp++ = c;
768 	}
769 
770 	buflen = bp - buf - 1;
771 	if (buflen < 0)
772 	{
773 		*btp = bt;
774 		return SM_IO_EOF;
775 	}
776 	bp = buf;
777 	return *bp++;
778 }
779 /*
780 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
781 **
782 **	Parameters:
783 **		fp -- the input file.
784 **		boundaries -- the current MIME boundaries.
785 **		btp -- if the return value is SM_IO_EOF, *btp is set to
786 **			the type of the boundary.
787 **
788 **	Returns:
789 **		The next character in the input stream.
790 */
791 
792 static int
793 mime_getchar_crlf(fp, boundaries, btp)
794 	register SM_FILE_T *fp;
795 	char **boundaries;
796 	int *btp;
797 {
798 	static bool sendlf = false;
799 	int c;
800 
801 	if (sendlf)
802 	{
803 		sendlf = false;
804 		return '\n';
805 	}
806 	c = mime_getchar(fp, boundaries, btp);
807 	if (c == '\n' && MapNLtoCRLF)
808 	{
809 		sendlf = true;
810 		return '\r';
811 	}
812 	return c;
813 }
814 /*
815 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
816 **
817 **	Parameters:
818 **		line -- the input line.
819 **		boundaries -- the set of currently pending boundaries.
820 **
821 **	Returns:
822 **		MBT_NOTSEP -- if this is not a separator line
823 **		MBT_INTERMED -- if this is an intermediate separator
824 **		MBT_FINAL -- if this is a final boundary
825 **		MBT_SYNTAX -- if this is a boundary for the wrong
826 **			enclosure -- i.e., a syntax error.
827 */
828 
829 static int
830 mimeboundary(line, boundaries)
831 	register char *line;
832 	char **boundaries;
833 {
834 	int type = MBT_NOTSEP;
835 	int i;
836 	int savec;
837 
838 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
839 		return MBT_NOTSEP;
840 	i = strlen(line);
841 	if (i > 0 && line[i - 1] == '\n')
842 		i--;
843 
844 	/* strip off trailing whitespace */
845 	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'))
846 		i--;
847 	savec = line[i];
848 	line[i] = '\0';
849 
850 	if (tTd(43, 5))
851 		sm_dprintf("mimeboundary: line=\"%s\"... ", line);
852 
853 	/* check for this as an intermediate boundary */
854 	if (isboundary(&line[2], boundaries) >= 0)
855 		type = MBT_INTERMED;
856 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
857 	{
858 		/* check for a final boundary */
859 		line[i - 2] = '\0';
860 		if (isboundary(&line[2], boundaries) >= 0)
861 			type = MBT_FINAL;
862 		line[i - 2] = '-';
863 	}
864 
865 	line[i] = savec;
866 	if (tTd(43, 5))
867 		sm_dprintf("%s\n", MimeBoundaryNames[type]);
868 	return type;
869 }
870 /*
871 **  DEFCHARSET -- return default character set for message
872 **
873 **	The first choice for character set is for the mailer
874 **	corresponding to the envelope sender.  If neither that
875 **	nor the global configuration file has a default character
876 **	set defined, return "unknown-8bit" as recommended by
877 **	RFC 1428 section 3.
878 **
879 **	Parameters:
880 **		e -- the envelope for this message.
881 **
882 **	Returns:
883 **		The default character set for that mailer.
884 */
885 
886 char *
887 defcharset(e)
888 	register ENVELOPE *e;
889 {
890 	if (e != NULL && e->e_from.q_mailer != NULL &&
891 	    e->e_from.q_mailer->m_defcharset != NULL)
892 		return e->e_from.q_mailer->m_defcharset;
893 	if (DefaultCharSet != NULL)
894 		return DefaultCharSet;
895 	return "unknown-8bit";
896 }
897 /*
898 **  ISBOUNDARY -- is a given string a currently valid boundary?
899 **
900 **	Parameters:
901 **		line -- the current input line.
902 **		boundaries -- the list of valid boundaries.
903 **
904 **	Returns:
905 **		The index number in boundaries if the line is found.
906 **		-1 -- otherwise.
907 **
908 */
909 
910 static int
911 isboundary(line, boundaries)
912 	char *line;
913 	char **boundaries;
914 {
915 	register int i;
916 
917 	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
918 	{
919 		if (strcmp(line, boundaries[i]) == 0)
920 			return i;
921 	}
922 	return -1;
923 }
924 #endif /* MIME8TO7 */
925 
926 #if MIME7TO8
927 static int	mime_fromqp __P((unsigned char *, unsigned char **, int));
928 
929 /*
930 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
931 **
932 **  This is a hack. Supports translating the two 7-bit body-encodings
933 **  (quoted-printable and base64) to 8-bit coded bodies.
934 **
935 **  There is not much point in supporting multipart here, as the UA
936 **  will be able to deal with encoded MIME bodies if it can parse MIME
937 **  multipart messages.
938 **
939 **  Note also that we won't be called unless it is a text/plain MIME
940 **  message, encoded base64 or QP and mailer flag '9' has been defined
941 **  on mailer.
942 **
943 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
944 **
945 **	Parameters:
946 **		mci -- mailer connection information.
947 **		header -- the header for this body part.
948 **		e -- envelope.
949 **
950 **	Returns:
951 **		none.
952 */
953 
954 static char index_64[128] =
955 {
956 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
957 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
958 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
959 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
960 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
961 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
962 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
963 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
964 };
965 
966 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
967 
968 void
969 mime7to8(mci, header, e)
970 	register MCI *mci;
971 	HDR *header;
972 	register ENVELOPE *e;
973 {
974 	int pxflags;
975 	register char *p;
976 	char *cte;
977 	char **pvp;
978 	unsigned char *fbufp;
979 	char buf[MAXLINE];
980 	unsigned char fbuf[MAXLINE + 1];
981 	char pvpbuf[MAXLINE];
982 	extern unsigned char MimeTokenTab[256];
983 
984 	p = hvalue("Content-Transfer-Encoding", header);
985 	if (p == NULL ||
986 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
987 			   MimeTokenTab)) == NULL ||
988 	    pvp[0] == NULL)
989 	{
990 		/* "can't happen" -- upper level should have caught this */
991 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
992 
993 		/* avoid bounce loops */
994 		e->e_flags |= EF_DONT_MIME;
995 
996 		/* cheap failsafe algorithm -- should work on text/plain */
997 		if (p != NULL)
998 		{
999 			(void) sm_snprintf(buf, sizeof buf,
1000 				"Content-Transfer-Encoding: %s", p);
1001 			putline(buf, mci);
1002 		}
1003 		putline("", mci);
1004 		mci->mci_flags &= ~MCIF_INHEADER;
1005 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
1006 			!= NULL)
1007 			putline(buf, mci);
1008 		return;
1009 	}
1010 	cataddr(pvp, NULL, buf, sizeof buf, '\0');
1011 	cte = sm_rpool_strdup_x(e->e_rpool, buf);
1012 
1013 	mci->mci_flags |= MCIF_INHEADER;
1014 	putline("Content-Transfer-Encoding: 8bit", mci);
1015 	(void) sm_snprintf(buf, sizeof buf,
1016 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1017 		cte, MyHostName, e->e_id);
1018 	putline(buf, mci);
1019 	putline("", mci);
1020 	mci->mci_flags &= ~MCIF_INHEADER;
1021 
1022 	/*
1023 	**  Translate body encoding to 8-bit.  Supports two types of
1024 	**  encodings; "base64" and "quoted-printable". Assume qp if
1025 	**  it is not base64.
1026 	*/
1027 
1028 	pxflags = PXLF_MAPFROM;
1029 	if (sm_strcasecmp(cte, "base64") == 0)
1030 	{
1031 		int c1, c2, c3, c4;
1032 
1033 		fbufp = fbuf;
1034 		while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1035 			SM_IO_EOF)
1036 		{
1037 			if (isascii(c1) && isspace(c1))
1038 				continue;
1039 
1040 			do
1041 			{
1042 				c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1043 			} while (isascii(c2) && isspace(c2));
1044 			if (c2 == SM_IO_EOF)
1045 				break;
1046 
1047 			do
1048 			{
1049 				c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1050 			} while (isascii(c3) && isspace(c3));
1051 			if (c3 == SM_IO_EOF)
1052 				break;
1053 
1054 			do
1055 			{
1056 				c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1057 			} while (isascii(c4) && isspace(c4));
1058 			if (c4 == SM_IO_EOF)
1059 				break;
1060 
1061 			if (c1 == '=' || c2 == '=')
1062 				continue;
1063 			c1 = CHAR64(c1);
1064 			c2 = CHAR64(c2);
1065 
1066 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1067 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1068 			{
1069 				if (*--fbufp != '\n' ||
1070 				    (fbufp > fbuf && *--fbufp != '\r'))
1071 				{
1072 					pxflags |= PXLF_NOADDEOL;
1073 					fbufp++;
1074 				}
1075 				putxline((char *) fbuf, fbufp - fbuf,
1076 					 mci, pxflags);
1077 				pxflags &= ~PXLF_NOADDEOL;
1078 				fbufp = fbuf;
1079 			}
1080 			if (c3 == '=')
1081 				continue;
1082 			c3 = CHAR64(c3);
1083 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1084 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1085 			{
1086 				if (*--fbufp != '\n' ||
1087 				    (fbufp > fbuf && *--fbufp != '\r'))
1088 				{
1089 					pxflags |= PXLF_NOADDEOL;
1090 					fbufp++;
1091 				}
1092 				putxline((char *) fbuf, fbufp - fbuf,
1093 					 mci, pxflags);
1094 				pxflags &= ~PXLF_NOADDEOL;
1095 				fbufp = fbuf;
1096 			}
1097 			if (c4 == '=')
1098 				continue;
1099 			c4 = CHAR64(c4);
1100 			*fbufp = ((c3 & 0x03) << 6) | c4;
1101 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1102 			{
1103 				if (*--fbufp != '\n' ||
1104 				    (fbufp > fbuf && *--fbufp != '\r'))
1105 				{
1106 					pxflags |= PXLF_NOADDEOL;
1107 					fbufp++;
1108 				}
1109 				putxline((char *) fbuf, fbufp - fbuf,
1110 					 mci, pxflags);
1111 				pxflags &= ~PXLF_NOADDEOL;
1112 				fbufp = fbuf;
1113 			}
1114 		}
1115 	}
1116 	else
1117 	{
1118 		int off;
1119 
1120 		/* quoted-printable */
1121 		pxflags |= PXLF_NOADDEOL;
1122 		fbufp = fbuf;
1123 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1124 				   sizeof buf) != NULL)
1125 		{
1126 			off = mime_fromqp((unsigned char *) buf, &fbufp,
1127 					  &fbuf[MAXLINE] - fbufp);
1128 again:
1129 			if (off < -1)
1130 				continue;
1131 
1132 			if (fbufp - fbuf > 0)
1133 				putxline((char *) fbuf, fbufp - fbuf - 1, mci,
1134 					 pxflags);
1135 			fbufp = fbuf;
1136 			if (off >= 0 && buf[off] != '\0')
1137 			{
1138 				off = mime_fromqp((unsigned char *) (buf + off),
1139 						  &fbufp,
1140 						  &fbuf[MAXLINE] - fbufp);
1141 				goto again;
1142 			}
1143 		}
1144 	}
1145 
1146 	/* force out partial last line */
1147 	if (fbufp > fbuf)
1148 	{
1149 		*fbufp = '\0';
1150 		putxline((char *) fbuf, fbufp - fbuf, mci, pxflags);
1151 	}
1152 
1153 	/*
1154 	**  The decoded text may end without an EOL.  Since this function
1155 	**  is only called for text/plain MIME messages, it is safe to
1156 	**  add an extra one at the end just in case.  This is a hack,
1157 	**  but so is auto-converting MIME in the first place.
1158 	*/
1159 
1160 	putline("", mci);
1161 
1162 	if (tTd(43, 3))
1163 		sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1164 }
1165 /*
1166 **  The following is based on Borenstein's "codes.c" module, with simplifying
1167 **  changes as we do not deal with multipart, and to do the translation in-core,
1168 **  with an attempt to prevent overrun of output buffers.
1169 **
1170 **  What is needed here are changes to defend this code better against
1171 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1172 */
1173 
1174 static char index_hex[128] =
1175 {
1176 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1177 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1178 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1179 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1180 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1181 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1182 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1183 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1184 };
1185 
1186 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1187 
1188 /*
1189 **  MIME_FROMQP -- decode quoted printable string
1190 **
1191 **	Parameters:
1192 **		infile -- input (encoded) string
1193 **		outfile -- output string
1194 **		maxlen -- size of output buffer
1195 **
1196 **	Returns:
1197 **		-2 if decoding failure
1198 **		-1 if infile completely decoded into outfile
1199 **		>= 0 is the position in infile decoding
1200 **			reached before maxlen was reached
1201 */
1202 
1203 static int
1204 mime_fromqp(infile, outfile, maxlen)
1205 	unsigned char *infile;
1206 	unsigned char **outfile;
1207 	int maxlen;		/* Max # of chars allowed in outfile */
1208 {
1209 	int c1, c2;
1210 	int nchar = 0;
1211 	unsigned char *b;
1212 
1213 	/* decrement by one for trailing '\0', at least one other char */
1214 	if (--maxlen < 1)
1215 		return 0;
1216 
1217 	b = infile;
1218 	while ((c1 = *infile++) != '\0' && nchar < maxlen)
1219 	{
1220 		if (c1 == '=')
1221 		{
1222 			if ((c1 = *infile++) == '\0')
1223 				break;
1224 
1225 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1226 			{
1227 				/* ignore it and the rest of the buffer */
1228 				return -2;
1229 			}
1230 			else
1231 			{
1232 				do
1233 				{
1234 					if ((c2 = *infile++) == '\0')
1235 					{
1236 						c2 = -1;
1237 						break;
1238 					}
1239 				} while ((c2 = HEXCHAR(c2)) == -1);
1240 
1241 				if (c2 == -1)
1242 					break;
1243 				nchar++;
1244 				*(*outfile)++ = c1 << 4 | c2;
1245 			}
1246 		}
1247 		else
1248 		{
1249 			nchar++;
1250 			*(*outfile)++ = c1;
1251 			if (c1 == '\n')
1252 				break;
1253 		}
1254 	}
1255 	*(*outfile)++ = '\0';
1256 	if (nchar >= maxlen)
1257 		return (infile - b - 1);
1258 	return -1;
1259 }
1260 #endif /* MIME7TO8 */
1261