xref: /original-bsd/usr.sbin/sendmail/src/mime.c (revision 1957deeb)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.19.1.1 (Berkeley) 04/29/95";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 **
32 **	Also, I looked at John G. Meyer's "part.[ch]" code.  Some
33 **	ideas have been loosely borrowed.
34 */
35 
36 
37 /* character set for hex and base64 encoding */
38 char	Base16Code[] =	"0123456789ABCDEF";
39 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
40 
41 /* types of MIME boundaries */
42 #define MBT_SYNTAX	0	/* syntax error */
43 #define MBT_NOTSEP	1	/* not a boundary */
44 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
45 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
46 
47 static char	*MimeBoundaryNames[] =
48 {
49 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
50 };
51 
52 /*
53 **  Pseudo-file structure for reading up to MIME boundaries.
54 */
55 
56 #define MFILE	struct _mfile
57 
58 MFILE
59 {
60 	FILE	*mf_fp;		/* underlying file */
61 	u_char	*mf_buf;	/* a cache buffer */
62 	u_char	*mf_bp;		/* current pointer into mf_buf */
63 	int	mf_cnt;		/* number of characters left in mf_buf */
64 	int	mf_bsize;	/* size of mf_buf */
65 	int	(*mf_fill)();	/* buffer fill routine */
66 	char	mf_boundaries[MAXMIMENESTING + 1];
67 				/* current boundaries */
68 };
69 
70 #define mime_getc(mf)	((mf)->mf_cnt-- > 0 : *(mf)->mf_ptr++ : mf->mf_fill(mf))
71 /*
72 **  MIME8TO7 -- output 8 bit body in 7 bit format
73 **
74 **	The header has already been output -- this has to do the
75 **	8 to 7 bit conversion.  It would be easy if we didn't have
76 **	to deal with nested formats (multipart/xxx and message/rfc822).
77 **
78 **	We won't be called if we don't have to do a conversion, and
79 **	appropriate MIME-Version: and Content-Type: fields have been
80 **	output.  Any Content-Transfer-Encoding: field has not been
81 **	output, and we can add it here.
82 **
83 **	Parameters:
84 **		mci -- mailer connection information.
85 **		header -- the header for this body part.
86 **		e -- envelope.
87 **		flags -- to tweak processing.
88 **
89 **	Returns:
90 **		none.
91 */
92 
93 int
94 mime8to7(mci, header, e, flags)
95 	register MCI *mci;
96 	HDR *header;
97 	register ENVELOPE *e;
98 	int flags;
99 {
100 	MFILE mfile;
101 
102 	mfile.mf_fp = e->e_dfp;
103 	mfile.mf_cnt = mfile.mf_size = 0;
104 
105 	(void) mime8to7x(mci, header, e, &mfile, flags);
106 	if (mfile.mf_buf != NULL)
107 		free(mfile.mf_buf);
108 }
109 /*
110 **  MIME8TO7X -- internal interface for mime8to7.
111 **
112 **	Parameters:
113 **		mci -- mailer connection information.
114 **		header -- the header for this body part.
115 **		e -- envelope.
116 **		mf -- the file from which to read.
117 **		flags -- to tweak processing.
118 **
119 **	Returns:
120 **		An indicator of what terminated the message part:
121 **		  MBT_FINAL -- the final boundary
122 **		  MBT_INTERMED -- an intermediate boundary
123 **		  MBT_NOTSEP -- an end of file
124 */
125 
126 struct args
127 {
128 	char	*field;		/* name of field */
129 	char	*value;		/* value of that field */
130 };
131 
132 int
133 mime8to7x(mci, header, e, boundaries, flags)
134 	register MCI *mci;
135 	HDR *header;
136 	register ENVELOPE *e;
137 	char **boundaries;
138 	int flags;
139 {
140 	register char *p;
141 	int linelen;
142 	int bt;
143 	off_t offset;
144 	size_t sectionsize, sectionhighbits;
145 	int i;
146 	char *type;
147 	char *subtype;
148 	char *cte;
149 	char **pvp;
150 	int argc = 0;
151 	char *bp;
152 	struct args argv[MAXMIMEARGS];
153 	char bbuf[128];
154 	char buf[MAXLINE];
155 	char pvpbuf[MAXLINE];
156 	extern char MimeTokenTab[256];
157 
158 	if (tTd(43, 1))
159 	{
160 		printf("mime8to7: flags = %x, boundaries =", flags);
161 		if (boundaries[0] == NULL)
162 			printf(" <none>");
163 		else
164 		{
165 			for (i = 0; boundaries[i] != NULL; i++)
166 				printf(" %s", boundaries[i]);
167 		}
168 		printf("\n");
169 	}
170 	p = hvalue("Content-Transfer-Encoding", header);
171 	if (p == NULL ||
172 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
173 			   MimeTokenTab)) == NULL ||
174 	    pvp[0] == NULL)
175 	{
176 		cte = NULL;
177 	}
178 	else
179 	{
180 		cataddr(pvp, NULL, buf, sizeof buf, '\0');
181 		cte = newstr(buf);
182 	}
183 
184 	type = subtype = NULL;
185 	p = hvalue("Content-Type", header);
186 	if (p == NULL)
187 	{
188 		if (bitset(M87F_DIGEST, flags))
189 			p = "message/rfc822";
190 		else
191 			p = "text/plain";
192 	}
193 	if (p != NULL &&
194 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
195 			   MimeTokenTab)) != NULL &&
196 	    pvp[0] != NULL)
197 	{
198 		if (tTd(43, 40))
199 		{
200 			for (i = 0; pvp[i] != NULL; i++)
201 				printf("pvp[%d] = \"%s\"\n", i, pvp[i]);
202 		}
203 		type = *pvp++;
204 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
205 		    *++pvp != NULL)
206 		{
207 			subtype = *pvp++;
208 		}
209 
210 		/* break out parameters */
211 		while (*pvp != NULL && argc < MAXMIMEARGS)
212 		{
213 			/* skip to semicolon separator */
214 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
215 				pvp++;
216 			if (*pvp++ == NULL || *pvp == NULL)
217 				break;
218 
219 			/* extract field name */
220 			argv[argc].field = *pvp++;
221 
222 			/* see if there is a value */
223 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
224 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
225 			{
226 				argv[argc].value = *pvp;
227 				argc++;
228 			}
229 		}
230 	}
231 
232 	/* check for disaster cases */
233 	if (type == NULL)
234 		type = "-none-";
235 	if (subtype == NULL)
236 		subtype = "-none-";
237 
238 	/* don't propogate some flags more than one level into the message */
239 	flags &= ~M87F_DIGEST;
240 
241 	/*
242 	**  Check for cases that can not be encoded.
243 	**
244 	**	For example, you can't encode certain kinds of types
245 	**	or already-encoded messages.  If we find this case,
246 	**	just copy it through.
247 	*/
248 
249 	sprintf(buf, "%s/%s", type, subtype);
250 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
251 		flags |= M87F_NO8BIT;
252 
253 	/*
254 	**  Multipart requires special processing.
255 	**
256 	**	Do a recursive descent into the message.
257 	*/
258 
259 	if (strcasecmp(type, "multipart") == 0 && !bitset(M87F_NO8BIT, flags))
260 	{
261 		int blen;
262 
263 		if (strcasecmp(subtype, "digest") == 0)
264 			flags |= M87F_DIGEST;
265 
266 		for (i = 0; i < argc; i++)
267 		{
268 			if (strcasecmp(argv[i].field, "boundary") == 0)
269 				break;
270 		}
271 		if (i >= argc)
272 		{
273 			syserr("mime8to7: Content-Type: %s missing boundary", p);
274 			p = "---";
275 		}
276 		else
277 		{
278 			p = argv[i].value;
279 			stripquotes(p);
280 		}
281 		blen = strlen(p);
282 		if (blen > sizeof bbuf - 1)
283 		{
284 			syserr("mime8to7: multipart boundary \"%s\" too long",
285 				p);
286 			blen = sizeof bbuf - 1;
287 		}
288 		strncpy(bbuf, p, blen);
289 		bbuf[blen] = '\0';
290 		if (tTd(43, 1))
291 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
292 		for (i = 0; i < MAXMIMENESTING; i++)
293 			if (boundaries[i] == NULL)
294 				break;
295 		if (i >= MAXMIMENESTING)
296 			syserr("mime8to7: multipart nesting boundary too deep");
297 		else
298 		{
299 			boundaries[i] = bbuf;
300 			boundaries[i + 1] = NULL;
301 		}
302 
303 		/* skip the early "comment" prologue */
304 		putline("", mci);
305 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
306 		{
307 			bt = mimeboundary(buf, boundaries);
308 			if (bt != MBT_NOTSEP)
309 				break;
310 			putxline(buf, mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
311 			if (tTd(43, 99))
312 				printf("  ...%s", buf);
313 		}
314 		if (feof(e->e_dfp))
315 			bt = MBT_FINAL;
316 		while (bt != MBT_FINAL)
317 		{
318 			auto HDR *hdr = NULL;
319 
320 			sprintf(buf, "--%s", bbuf);
321 			putline(buf, mci);
322 			if (tTd(43, 35))
323 				printf("  ...%s\n", buf);
324 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
325 			if (tTd(43, 101))
326 				putline("+++after collect", mci);
327 			putheader(mci, hdr, e);
328 			if (tTd(43, 101))
329 				putline("+++after putheader", mci);
330 			bt = mime8to7x(mci, hdr, e, boundaries, flags);
331 		}
332 		sprintf(buf, "--%s--", bbuf);
333 		putline(buf, mci);
334 		if (tTd(43, 35))
335 			printf("  ...%s\n", buf);
336 		boundaries[i] = NULL;
337 
338 		/* skip the late "comment" epilogue */
339 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
340 		{
341 			bt = mimeboundary(buf, boundaries);
342 			if (bt != MBT_NOTSEP)
343 				break;
344 			putxline(buf, mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
345 			if (tTd(43, 99))
346 				printf("  ...%s", buf);
347 		}
348 		if (feof(e->e_dfp))
349 			bt = MBT_FINAL;
350 		if (tTd(43, 3))
351 			printf("\t\t\tmime8to7=>%s (multipart)\n",
352 				MimeBoundaryNames[bt]);
353 		return bt;
354 	}
355 
356 	/*
357 	**  Message/* types -- recurse exactly once.
358 	*/
359 
360 	if (strcasecmp(type, "message") == 0)
361 	{
362 		if (strcasecmp(subtype, "rfc822") != 0)
363 		{
364 			flags |= M87F_NO8BIT;
365 		}
366 		else
367 		{
368 			register char *q;
369 			auto HDR *hdr = NULL;
370 
371 			putline("", mci);
372 
373 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
374 			if (tTd(43, 101))
375 				putline("+++after collect", mci);
376 			putheader(mci, hdr, e);
377 			if (tTd(43, 101))
378 				putline("+++after putheader", mci);
379 			bt = mime8to7(mci, hdr, e, boundaries, flags);
380 			return bt;
381 		}
382 	}
383 
384 	/*
385 	**  Non-compound body type
386 	**
387 	**	Compute the ratio of seven to eight bit characters;
388 	**	use that as a heuristic to decide how to do the
389 	**	encoding.
390 	*/
391 
392 	sectionsize = sectionhighbits = 0;
393 	if (!bitset(M87F_NO8BIT, flags))
394 	{
395 		/* remember where we were */
396 		offset = ftell(e->e_dfp);
397 		if (offset == -1)
398 			syserr("mime8to7: cannot ftell on df%s", e->e_id);
399 
400 		/* do a scan of this body type to count character types */
401 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
402 		{
403 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
404 				break;
405 			for (p = buf; *p != '\0'; p++)
406 			{
407 				/* count bytes with the high bit set */
408 				sectionsize++;
409 				if (bitset(0200, *p))
410 					sectionhighbits++;
411 			}
412 
413 			/*
414 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
415 			**  assume base64.  This heuristic avoids double-reading
416 			**  large graphics or video files.
417 			*/
418 
419 			if (sectionsize >= 4096 &&
420 			    sectionhighbits > sectionsize / 4)
421 				break;
422 		}
423 
424 		/* return to the original offset for processing */
425 		/* XXX use relative seeks to handle >31 bit file sizes? */
426 		if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
427 			syserr("mime8to7: cannot fseek on df%s", e->e_id);
428 		else
429 			clearerr(e->e_dfp);
430 	}
431 
432 	/*
433 	**  Heuristically determine encoding method.
434 	**	If more than 1/8 of the total characters have the
435 	**	eighth bit set, use base64; else use quoted-printable.
436 	**	However, only encode binary encoded data as base64,
437 	**	since otherwise the NL=>CRLF mapping will be a problem.
438 	*/
439 
440 	if (tTd(43, 8))
441 	{
442 		printf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s\n",
443 			sectionhighbits, sectionsize,
444 			cte == NULL ? "[none]" : cte);
445 	}
446 	if (cte != NULL && strcasecmp(cte, "binary") == 0)
447 		sectionsize = sectionhighbits;
448 	linelen = 0;
449 	bp = buf;
450 	if (sectionhighbits == 0)
451 	{
452 		/* no encoding necessary */
453 		if (cte != NULL)
454 		{
455 			sprintf(buf, "Content-Transfer-Encoding: %s", cte);
456 			putline(buf, mci);
457 			if (tTd(43, 36))
458 				printf("  ...%s\n", buf);
459 		}
460 		putline("", mci);
461 		mci->mci_flags &= ~MCIF_INHEADER;
462 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
463 		{
464 			bt = mimeboundary(buf, boundaries);
465 			if (bt != MBT_NOTSEP)
466 				break;
467 			putline(buf, mci);
468 		}
469 		if (feof(e->e_dfp))
470 			bt = MBT_FINAL;
471 	}
472 	else if (sectionsize / 8 < sectionhighbits)
473 	{
474 		/* use base64 encoding */
475 		int c1, c2;
476 
477 		putline("Content-Transfer-Encoding: base64", mci);
478 		if (tTd(43, 36))
479 			printf("  ...Content-Transfer-Encoding: base64\n");
480 		putline("", mci);
481 		mci->mci_flags &= ~MCIF_INHEADER;
482 		mf->mf_fill = mime_fill_crlf;
483 		while ((c1 = mime_getc(mf)) != EOF)
484 		{
485 			if (linelen > 71)
486 			{
487 				*bp = '\0';
488 				putline(buf, mci);
489 				linelen = 0;
490 				bp = buf;
491 			}
492 			linelen += 4;
493 			*bp++ = Base64Code[(c1 >> 2)];
494 			c1 = (c1 & 0x03) << 4;
495 			c2 = mime_getc(mf);
496 			if (c2 == EOF)
497 			{
498 				*bp++ = Base64Code[c1];
499 				*bp++ = '=';
500 				*bp++ = '=';
501 				break;
502 			}
503 			c1 |= (c2 >> 4) & 0x0f;
504 			*bp++ = Base64Code[c1];
505 			c1 = (c2 & 0x0f) << 2;
506 			c2 = mime_getc(mf);
507 			if (c2 == EOF)
508 			{
509 				*bp++ = Base64Code[c1];
510 				*bp++ = '=';
511 				break;
512 			}
513 			c1 |= (c2 >> 6) & 0x03;
514 			*bp++ = Base64Code[c1];
515 			*bp++ = Base64Code[c2 & 0x3f];
516 		}
517 	}
518 	else
519 	{
520 		/* use quoted-printable encoding */
521 		int c1, c2;
522 		int fromstate;
523 		BITMAP badchars;
524 
525 		/* set up map of characters that must be mapped */
526 		clrbitmap(badchars);
527 		for (c1 = 0x00; c1 < 0x20; c1++)
528 			setbitn(c1, badchars);
529 		clrbitn('\t', badchars);
530 		for (c1 = 0x7f; c1 < 0x100; c1++)
531 			setbitn(c1, badchars);
532 		setbitn('=', badchars);
533 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
534 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
535 				setbitn(*p, badchars);
536 
537 		putline("Content-Transfer-Encoding: quoted-printable", mci);
538 		if (tTd(43, 36))
539 			printf("  ...Content-Transfer-Encoding: quoted-printable\n");
540 		putline("", mci);
541 		mci->mci_flags &= ~MCIF_INHEADER;
542 		fromstate = 0;
543 		c2 = '\n';
544 		mf->mf_fill = mime_fill_nl;
545 		while ((c1 = mime_getc(mf)) != EOF)
546 		{
547 			if (c1 == '\n')
548 			{
549 				if (c2 == ' ' || c2 == '\t')
550 				{
551 					*bp++ = '=';
552 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
553 					*bp++ = Base16Code[c2 & 0x0f];
554 					*bp = '\0';
555 					putline(buf, mci);
556 					bp = buf;
557 				}
558 				*bp = '\0';
559 				putline(buf, mci);
560 				linelen = fromstate = 0;
561 				bp = buf;
562 				c2 = c1;
563 				continue;
564 			}
565 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
566 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
567 			{
568 				*bp++ = '=';
569 				*bp++ = '2';
570 				*bp++ = '0';
571 				linelen += 3;
572 			}
573 			else if (c2 == ' ' || c2 == '\t')
574 			{
575 				*bp++ = c2;
576 				linelen++;
577 			}
578 			if (linelen > 72)
579 			{
580 				*bp++ = '=';
581 				*bp = '\0';
582 				putline(buf, mci);
583 				linelen = fromstate = 0;
584 				bp = buf;
585 				c2 = '\n';
586 			}
587 			if (bitnset(c1 & 0xff, badchars))
588 			{
589 				*bp++ = '=';
590 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
591 				*bp++ = Base16Code[c1 & 0x0f];
592 				linelen += 3;
593 			}
594 			else if (c1 != ' ' && c1 != '\t')
595 			{
596 				if (linelen < 4 && c1 == "From"[linelen])
597 					fromstate++;
598 				*bp++ = c1;
599 				linelen++;
600 			}
601 			c2 = c1;
602 		}
603 
604 		/* output any saved character */
605 		if (c2 == ' ' || c2 == '\t')
606 		{
607 			*bp++ = '=';
608 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
609 			*bp++ = Base16Code[c2 & 0x0f];
610 			linelen += 3;
611 		}
612 	}
613 	if (linelen > 0)
614 	{
615 		*bp = '\0';
616 		putline(buf, mci);
617 	}
618 	if (tTd(43, 3))
619 		printf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
620 	return bt;
621 }
622 /*
623 **  MIME_GETCHAR -- get a character for MIME processing
624 **
625 **	Treats boundaries as EOF.
626 **
627 **	Parameters:
628 **		fp -- the input file.
629 **		boundaries -- the current MIME boundaries.
630 **		btp -- if the return value is EOF, *btp is set to
631 **			the type of the boundary.
632 **
633 **	Returns:
634 **		The next character in the input stream.
635 */
636 
637 int
638 mime_getchar(fp, boundaries, btp)
639 	register FILE *fp;
640 	char **boundaries;
641 	int *btp;
642 {
643 	int c;
644 	static u_char *bp = NULL;
645 	static int buflen = 0;
646 	static bool atbol = TRUE;	/* at beginning of line */
647 	static int bt = MBT_SYNTAX;	/* boundary type of next EOF */
648 	static u_char buf[128];		/* need not be a full line */
649 
650 	if (buflen > 0)
651 	{
652 		buflen--;
653 		return *bp++;
654 	}
655 	bp = buf;
656 	buflen = 0;
657 	c = getc(fp);
658 	if (c == '\n')
659 	{
660 		/* might be part of a MIME boundary */
661 		*bp++ = c;
662 		atbol = TRUE;
663 		c = getc(fp);
664 		if (c == '\n')
665 		{
666 			ungetc(c, fp);
667 			return c;
668 		}
669 	}
670 	if (c != EOF)
671 		*bp++ = c;
672 	else
673 		bt = MBT_FINAL;
674 	if (atbol && c == '-')
675 	{
676 		/* check for a message boundary */
677 		c = getc(fp);
678 		if (c != '-')
679 		{
680 			if (c != EOF)
681 				*bp++ = c;
682 			else
683 				bt = MBT_FINAL;
684 			buflen = bp - buf - 1;
685 			bp = buf;
686 			return *bp++;
687 		}
688 
689 		/* got "--", now check for rest of separator */
690 		*bp++ = '-';
691 		while (bp < &buf[sizeof buf - 2] &&
692 		       (c = getc(fp)) != EOF && c != '\n')
693 		{
694 			*bp++ = c;
695 		}
696 		*bp = '\0';
697 		bt = mimeboundary(&buf[1], boundaries);
698 		switch (bt)
699 		{
700 		  case MBT_FINAL:
701 		  case MBT_INTERMED:
702 			/* we have a message boundary */
703 			buflen = 0;
704 			*btp = bt;
705 			return EOF;
706 		}
707 
708 		atbol = c == '\n';
709 		if (c != EOF)
710 			*bp++ = c;
711 	}
712 
713 	buflen = bp - buf - 1;
714 	if (buflen < 0)
715 	{
716 		*btp = bt;
717 		return EOF;
718 	}
719 	bp = buf;
720 	return *bp++;
721 }
722 /*
723 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
724 **
725 **	Parameters:
726 **		fp -- the input file.
727 **		boundaries -- the current MIME boundaries.
728 **		btp -- if the return value is EOF, *btp is set to
729 **			the type of the boundary.
730 **
731 **	Returns:
732 **		The next character in the input stream.
733 */
734 
735 int
736 mime_getchar_crlf(fp, boundaries, btp)
737 	register FILE *fp;
738 	char **boundaries;
739 	int *btp;
740 {
741 	static bool sendlf = FALSE;
742 	int c;
743 
744 	if (sendlf)
745 	{
746 		sendlf = FALSE;
747 		return '\n';
748 	}
749 	c = mime_getchar(fp, boundaries, btp);
750 	if (c == '\n')
751 	{
752 		sendlf = TRUE;
753 		return '\r';
754 	}
755 	return c;
756 }
757 /*
758 **  MIME7TO8 -- convert 7 bit MIME message to 8 bit
759 */
760 
761 char	QPdecoder[256] =
762 {
763     /*	nul soh stx etx eot enq ack bel  bs  ht  nl  vt  np  cr  so  si   */
764 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, 16, -1, -1, 16, -1, -1,
765     /*	dle dc1 dc2 dc3 dc4 nak syn etb  can em  sub esc fs  gs  rs  us   */
766 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
767     /*  sp  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /    */
768 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
769     /*	0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?    */
770 	0,  1,  2,  3,  4,  5,  6,  7,   8,  9,  -1, -1, -1, -1, -1, -1,
771     /*	@   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O    */
772 	-1, 10, 11, 12, 13, 14, 15, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
773     /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _    */
774 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
775     /*	`   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o    */
776 	-1, 10, 11, 12, 13, 14, 15, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
777     /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~   del  */
778 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
779 
780     /*	nul soh stx etx eot enq ack bel  bs  ht  nl  vt  np  cr  so  si   */
781 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
782     /*	dle dc1 dc2 dc3 dc4 nak syn etb  can em  sub esc fs  gs  rs  us   */
783 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
784     /*  sp  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /    */
785 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
786     /*	0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?    */
787 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
788     /*	@   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O    */
789 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
790     /*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _    */
791 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
792     /*	`   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o    */
793 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
794     /*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~   del  */
795 	-1, -1, -1, -1, -1, -1, -1, -1,  -1, -1, -1, -1, -1, -1, -1, -1,
796 };
797 
798 
799 int
800 mime7to8(mci, header, e, boundaries, flags)
801 	register MCI *mci;
802 	HDR *header;
803 	register ENVELOPE *e;
804 	char **boundaries;
805 	int flags;
806 {
807 	register char *p;
808 	int linelen;
809 	int bt;
810 	off_t offset;
811 	int i;
812 	char *type;
813 	char *subtype;
814 	char *cte;
815 	char **pvp;
816 	int argc = 0;
817 	char *bp;
818 	struct args argv[MAXMIMEARGS];
819 	char bbuf[128];
820 	char buf[MAXLINE];
821 	char pvpbuf[MAXLINE];
822 	extern char MimeTokenTab[256];
823 
824 	if (tTd(43, 1))
825 	{
826 		printf("mime7to8: flags = %x, boundaries =", flags);
827 		if (boundaries[0] == NULL)
828 			printf(" <none>");
829 		else
830 		{
831 			for (i = 0; boundaries[i] != NULL; i++)
832 				printf(" %s", boundaries[i]);
833 		}
834 		printf("\n");
835 	}
836 	p = hvalue("Content-Transfer-Encoding", header);
837 	if (p == NULL ||
838 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
839 			   MimeTokenTab)) == NULL ||
840 	    pvp[0] == NULL)
841 	{
842 		cte = NULL;
843 	}
844 	else
845 	{
846 		cataddr(pvp, NULL, buf, sizeof buf, '\0');
847 		cte = newstr(buf);
848 	}
849 
850 	type = subtype = NULL;
851 	p = hvalue("Content-Type", header);
852 	if (p == NULL)
853 	{
854 		if (bitset(M87F_DIGEST, flags))
855 			p = "message/rfc822";
856 		else
857 			p = "text/plain";
858 	}
859 	if (p != NULL &&
860 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
861 			   MimeTokenTab)) != NULL &&
862 	    pvp[0] != NULL)
863 	{
864 		if (tTd(43, 40))
865 		{
866 			for (i = 0; pvp[i] != NULL; i++)
867 				printf("pvp[%d] = \"%s\"\n", i, pvp[i]);
868 		}
869 		type = *pvp++;
870 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
871 		    *++pvp != NULL)
872 		{
873 			subtype = *pvp++;
874 		}
875 
876 		/* break out parameters */
877 		while (*pvp != NULL && argc < MAXMIMEARGS)
878 		{
879 			/* skip to semicolon separator */
880 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
881 				pvp++;
882 			if (*pvp++ == NULL || *pvp == NULL)
883 				break;
884 
885 			/* extract field name */
886 			argv[argc].field = *pvp++;
887 
888 			/* see if there is a value */
889 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
890 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
891 			{
892 				argv[argc].value = *pvp;
893 				argc++;
894 			}
895 		}
896 	}
897 
898 	/* check for disaster cases */
899 	if (type == NULL)
900 		type = "-none-";
901 	if (subtype == NULL)
902 		subtype = "-none-";
903 
904 	/* don't propogate some flags more than one level into the message */
905 	flags &= ~M87F_DIGEST;
906 
907 	/*
908 	**  Check for cases that can not be encoded.
909 	**
910 	**	For example, you can't encode certain kinds of types
911 	**	or already-encoded messages.  If we find this case,
912 	**	just copy it through.
913 	*/
914 
915 	sprintf(buf, "%s/%s", type, subtype);
916 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
917 		flags |= M87F_NO8BIT;
918 
919 	/*
920 	**  Multipart requires special processing.
921 	**
922 	**	Do a recursive descent into the message.
923 	*/
924 
925 	if (strcasecmp(type, "multipart") == 0 && !bitset(M87F_NO8BIT, flags))
926 	{
927 		int blen;
928 
929 		if (strcasecmp(subtype, "digest") == 0)
930 			flags |= M87F_DIGEST;
931 
932 		for (i = 0; i < argc; i++)
933 		{
934 			if (strcasecmp(argv[i].field, "boundary") == 0)
935 				break;
936 		}
937 		if (i >= argc)
938 		{
939 			syserr("mime7to8: Content-Type: %s missing boundary", p);
940 			p = "---";
941 		}
942 		else
943 		{
944 			p = argv[i].value;
945 			stripquotes(p);
946 		}
947 		blen = strlen(p);
948 		if (blen > sizeof bbuf - 1)
949 		{
950 			syserr("mime7to8: multipart boundary \"%s\" too long",
951 				p);
952 			blen = sizeof bbuf - 1;
953 		}
954 		strncpy(bbuf, p, blen);
955 		bbuf[blen] = '\0';
956 		if (tTd(43, 1))
957 			printf("mime7to8: multipart boundary \"%s\"\n", bbuf);
958 		for (i = 0; i < MAXMIMENESTING; i++)
959 			if (boundaries[i] == NULL)
960 				break;
961 		if (i >= MAXMIMENESTING)
962 			syserr("mime7to8: multipart nesting boundary too deep");
963 		else
964 		{
965 			boundaries[i] = bbuf;
966 			boundaries[i + 1] = NULL;
967 		}
968 
969 		/* skip the early "comment" prologue */
970 		putline("", mci);
971 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
972 		{
973 			bt = mimeboundary(buf, boundaries);
974 			if (bt != MBT_NOTSEP)
975 				break;
976 			putxline(buf, mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
977 			if (tTd(43, 99))
978 				printf("  ...%s", buf);
979 		}
980 		if (feof(e->e_dfp))
981 			bt = MBT_FINAL;
982 		while (bt != MBT_FINAL)
983 		{
984 			auto HDR *hdr = NULL;
985 
986 			sprintf(buf, "--%s", bbuf);
987 			putline(buf, mci);
988 			if (tTd(43, 35))
989 				printf("  ...%s\n", buf);
990 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
991 			if (tTd(43, 101))
992 				putline("+++after collect", mci);
993 			putheader(mci, hdr, e);
994 			if (tTd(43, 101))
995 				putline("+++after putheader", mci);
996 			bt = mime7to8(mci, hdr, e, boundaries, flags);
997 		}
998 		sprintf(buf, "--%s--", bbuf);
999 		putline(buf, mci);
1000 		if (tTd(43, 35))
1001 			printf("  ...%s\n", buf);
1002 		boundaries[i] = NULL;
1003 
1004 		/* skip the late "comment" epilogue */
1005 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
1006 		{
1007 			bt = mimeboundary(buf, boundaries);
1008 			if (bt != MBT_NOTSEP)
1009 				break;
1010 			putxline(buf, mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
1011 			if (tTd(43, 99))
1012 				printf("  ...%s", buf);
1013 		}
1014 		if (feof(e->e_dfp))
1015 			bt = MBT_FINAL;
1016 		if (tTd(43, 3))
1017 			printf("\t\t\tmime7to8=>%s (multipart)\n",
1018 				MimeBoundaryNames[bt]);
1019 		return bt;
1020 	}
1021 
1022 	/*
1023 	**  Message/* types -- recurse exactly once.
1024 	*/
1025 
1026 	if (strcasecmp(type, "message") == 0)
1027 	{
1028 		if (strcasecmp(subtype, "rfc822") != 0)
1029 		{
1030 			flags |= M87F_NO8BIT;
1031 		}
1032 		else
1033 		{
1034 			register char *q;
1035 			auto HDR *hdr = NULL;
1036 
1037 			putline("", mci);
1038 
1039 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
1040 			if (tTd(43, 101))
1041 				putline("+++after collect", mci);
1042 			putheader(mci, hdr, e);
1043 			if (tTd(43, 101))
1044 				putline("+++after putheader", mci);
1045 			bt = mime7to8(mci, hdr, e, boundaries, flags);
1046 			return bt;
1047 		}
1048 	}
1049 
1050 	/*
1051 	**  Non-compound body type.
1052 	**
1053 	**	First scan to see if it is legally translatable.  This
1054 	**	means:
1055 	**
1056 	**	  * No long lines.
1057 	**	  * No bare CR or NL bytes.
1058 	**
1059 	**	We'll also check to see if there are any 8-bit characters
1060 	**	so we can make a good choice of final encoding method.
1061 	*/
1062 
1063 	nodecode = FALSE;
1064 	has8bit = FALSE;
1065 	if (cte != NULL && strcasecmp(cte, "base64") == 0)
1066 		mf->mf_fill = mime_fill_base64;
1067 	else if (cte != NULL && strcasecmp(cte, "quoted-printable") != 0)
1068 		mf->mf_fill = mime_fill_qp;
1069 	else
1070 		nodecode = TRUE;
1071 
1072 	maxlinelen = 0;
1073 	if (!nodecode)
1074 	{
1075 		offset = mime_tell(mfp);
1076 		if (offset == -1)
1077 			syserr("mime7to8: cannot ftell on df%s", e->e_id);
1078 
1079 		linelen = 0;
1080 		while (!nodecode && (c = mime_getc(mfp)) != EOF)
1081 		{
1082 			if (bitset(0x80, c))
1083 				has8bit = TRUE;
1084 			else if (c == '\n' || c == '\0')
1085 				nodecode = TRUE;
1086 			else if (c == '\r')
1087 			{
1088 				c = mime_getc(mfp);
1089 				if (c == '\n')
1090 				{
1091 					if (linelen > maxlinelen)
1092 						maxlinelen = linelen;
1093 					linelen = 0;
1094 					continue;
1095 				}
1096 				nodecode = TRUE;
1097 			}
1098 			else
1099 				linelen++;
1100 		}
1101 		if (linelen != 0 || maxlinelen > mci->mci_mailer->m_maxline)
1102 			nodecode = TRUE;
1103 		if (mime_seek(mfp, offset, SEEK_SET) < 0)
1104 			syserr("mime7to8: cannot fseek on df%s", e->e_id)
1105 		else
1106 			mime_clearerr(mfp);
1107 		mime_flush(mfp);
1108 	}
1109 
1110 	if (tTd(43, 8))
1111 	{
1112 		printf("mime7to8: cte=%s, maxlinelen=%d, nodecode=%d, has8bit=%d\n",
1113 			cte == NULL ? "[none]" : cte,
1114 			maxlinelen, nodecode, has8bit);
1115 	}
1116 
1117 	if (!nodecode)
1118 	{
1119 		/*
1120 		**  Convert to 8-bit.  We know it will be well-behaved.
1121 		*/
1122 
1123 		if (has8bit)
1124 			p = "8bit";
1125 		else
1126 			p = "7bit";
1127 	}
1128 	else
1129 	{
1130 		p = cte;
1131 		mfp->mf_fill = mime_fill;
1132 	}
1133 	if (p != NULL)
1134 	{
1135 		sprintf(buf, "Content-Transfer-Encoding: %s", p);
1136 		putline(buf, mci);
1137 	}
1138 	putline("", mci);
1139 	mci->mci_flags &= ~MCIF_INHEADER;
1140 
1141 	p = buf;
1142 	linelen = 0;
1143 	while ((c = mime_getc(mfp)) != EOF)
1144 	{
1145 		while (c == '\r')
1146 		{
1147 			c = mime_getc(mfp);
1148 			if (c == '\n')
1149 			{
1150 				*p = '\0';
1151 				putline(buf, mci);
1152 				linelen = fromstate = 0;
1153 				p = buf;
1154 				continue;
1155 			}
1156 			*p++ = '\r';
1157 		}
1158 		if (mci->mci_mailer->m_linelimit > 0 &&
1159 		    linelen > mci->mci_mailer->m_linelimit - 1)
1160 		{
1161 			/* line wrap */
1162 			putc('!', mci->mci_out);
1163 			fputs(mci->mci_mailer->m_eol, mci->mci_out);
1164 			linelen = 0;
1165 		}
1166 		if (linelen == 0 && c == '.' &&
1167 		    bitnset(M_XDOT, mci->mci_mailer->m_flags))
1168 		{
1169 			putc('.', mci->mci_out);
1170 			linelen++;
1171 		}
1172 		if (linelen == fromstate && c == "From "[fromstate])
1173 		{
1174 			if (fromstate == 4)
1175 			{
1176 				/* hide From_? */
1177 				if (bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
1178 				{
1179 					putc('>', mci->mci_out);
1180 					linelen++;
1181 				}
1182 				fputs("From ", mci->mci_out);
1183 				fromstate = -1;
1184 			}
1185 			fromstate++;
1186 			linelen++;
1187 			continue;
1188 		}
1189 		if (fromstate != 0)
1190 		{
1191 			p = "From ";
1192 			while (--fromstate >= 0)
1193 				putc(*p++, mci->mci_out);
1194 		}
1195 		putc(c, mci->mci_out);
1196 		linelen++;
1197 	}
1198 	if (tTd(43, 3))
1199 		printf("\t\t\tmime7to8=>%s (basic)\n", MimeBoundaryNames[bt]);
1200 	return bt;
1201 }
1202 /*
1203 **  MIME_FILL_QP -- fill buffer doing Quoted-Printable decoding
1204 **
1205 **	Parameters:
1206 **		mfp -- MIME file pointer.
1207 **
1208 **	Returns:
1209 **		The next character in the input stream.
1210 */
1211 
1212 int
1213 mime_fill_qp(mfp)
1214 	register MFILE;
1215 {
1216 	int c;
1217 
1218 	/* first do the raw buffer fill */
1219 	if (mime_fill_nl(mfp) == EOF)
1220 		return EOF;
1221 
1222 	/* now scan the buffer converting =XX */
1223 	ip = op = mfp->mf_buf;
1224 	il = mfp->mf_cnt;
1225 	while (--il >= 0)
1226 	{
1227 		if (*ip != '=')
1228 		{
1229 			*op++ = *ip++;
1230 			continue;
1231 		}
1232 
1233 		/* special handling for "=" sign */
1234 		if (il >= 2 && !bitset(0x80, ip[1] | ip[2]) &&
1235 		    isxdigit(ip[1]) && isxdigit(ip[2]))
1236 		{
1237 			/* this is an escape sequence */
1238 			/* ASCII dependence ahead.... */
1239 			if (isdigit(*++ip))
1240 				c = *ip - '0';
1241 			else
1242 				c = (*ip & 07) + 9;
1243 			c <<= 4;
1244 			if (isdigit(*++ip))
1245 				c |= *ip - '0';
1246 			else
1247 				c |= (*ip & 07) + 9;
1248 			*op++ = c;
1249 		}
1250 		else if ((il == 2 && ip[1] == '\r' && ip[2] == '\n') ||
1251 			 (il == 1 && ip[1] == '\n'))
1252 		{
1253 			/* this is a hidden newline */
1254 			break;
1255 		}
1256 
1257 		/* bogus `=' -- what to do?  treat it as normal */
1258 		*op++ = *ip++;
1259 	}
1260 
1261 	/* adjust buffer count to match reality */
1262 	if ((mfp->mf_cnt = op - mfp->mf_buf - 1) <= 0)
1263 		return EOF;
1264 	return *mfp->mf_bp++;
1265 }
1266 /*
1267 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
1268 **
1269 **	Parameters:
1270 **		line -- the input line.
1271 **		boundaries -- the set of currently pending boundaries.
1272 **
1273 **	Returns:
1274 **		MBT_NOTSEP -- if this is not a separator line
1275 **		MBT_INTERMED -- if this is an intermediate separator
1276 **		MBT_FINAL -- if this is a final boundary
1277 **		MBT_SYNTAX -- if this is a boundary for the wrong
1278 **			enclosure -- i.e., a syntax error.
1279 */
1280 
1281 int
1282 mimeboundary(line, boundaries)
1283 	register char *line;
1284 	char **boundaries;
1285 {
1286 	int type;
1287 	int i;
1288 	int savec;
1289 
1290 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
1291 		return MBT_NOTSEP;
1292 	i = strlen(line);
1293 	if (line[i - 1] == '\n')
1294 		i--;
1295 	if (tTd(43, 5))
1296 		printf("mimeboundary: line=\"%.*s\"... ", i, line);
1297 	while (line[i - 1] == ' ' || line[i - 1] == '\t')
1298 		i--;
1299 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
1300 	{
1301 		type = MBT_FINAL;
1302 		i -= 2;
1303 	}
1304 	else
1305 		type = MBT_INTERMED;
1306 
1307 	savec = line[i];
1308 	line[i] = '\0';
1309 	/* XXX should check for improper nesting here */
1310 	if (isboundary(&line[2], boundaries) < 0)
1311 		type = MBT_NOTSEP;
1312 	line[i] = savec;
1313 	if (tTd(43, 5))
1314 		printf("%s\n", MimeBoundaryNames[type]);
1315 	return type;
1316 }
1317 /*
1318 **  DEFCHARSET -- return default character set for message
1319 **
1320 **	The first choice for character set is for the mailer
1321 **	corresponding to the envelope sender.  If neither that
1322 **	nor the global configuration file has a default character
1323 **	set defined, return "unknown-8bit" as recommended by
1324 **	RFC 1428 section 3.
1325 **
1326 **	Parameters:
1327 **		e -- the envelope for this message.
1328 **
1329 **	Returns:
1330 **		The default character set for that mailer.
1331 */
1332 
1333 char *
1334 defcharset(e)
1335 	register ENVELOPE *e;
1336 {
1337 	if (e != NULL && e->e_from.q_mailer != NULL &&
1338 	    e->e_from.q_mailer->m_defcharset != NULL)
1339 		return e->e_from.q_mailer->m_defcharset;
1340 	if (DefaultCharSet != NULL)
1341 		return DefaultCharSet;
1342 	return "unknown-8bit";
1343 }
1344 /*
1345 **  ISBOUNDARY -- is a given string a currently valid boundary?
1346 **
1347 **	Parameters:
1348 **		line -- the current input line.
1349 **		boundaries -- the list of valid boundaries.
1350 **
1351 **	Returns:
1352 **		The index number in boundaries if the line is found.
1353 **		-1 -- otherwise.
1354 **
1355 */
1356 
1357 int
1358 isboundary(line, boundaries)
1359 	char *line;
1360 	char **boundaries;
1361 {
1362 	register int i;
1363 
1364 	for (i = 0; boundaries[i] != NULL; i++)
1365 	{
1366 		if (strcmp(line, boundaries[i]) == 0)
1367 			return i;
1368 	}
1369 	return -1;
1370 }
1371