xref: /original-bsd/usr.sbin/sendmail/src/mime.c (revision fac0c393)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.12 (Berkeley) 03/21/95";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static int	MimeBoundaryType;	/* internal linkage */
45 /*
46 **  MIME8TO7 -- output 8 bit body in 7 bit format
47 **
48 **	The header has already been output -- this has to do the
49 **	8 to 7 bit conversion.  It would be easy if we didn't have
50 **	to deal with nested formats (multipart/xxx and message/rfc822).
51 **
52 **	We won't be called if we don't have to do a conversion, and
53 **	appropriate MIME-Version: and Content-Type: fields have been
54 **	output.  Any Content-Transfer-Encoding: field has not been
55 **	output, and we can add it here.
56 **
57 **	Parameters:
58 **		mci -- mailer connection information.
59 **		header -- the header for this body part.
60 **		e -- envelope.
61 **		boundaries -- the currently pending message boundaries.
62 **			NULL if we are processing the outer portion.
63 **		flags -- to tweak processing.
64 **
65 **	Returns:
66 **		An indicator of what terminated the message part:
67 **		  MBT_FINAL -- the final boundary
68 **		  MBT_INTERMED -- an intermediate boundary
69 **		  MBT_NOTSEP -- an end of file
70 */
71 
72 struct args
73 {
74 	char	*field;		/* name of field */
75 	char	*value;		/* value of that field */
76 };
77 
78 int
79 mime8to7(mci, header, e, boundaries, flags)
80 	register MCI *mci;
81 	HDR *header; register ENVELOPE *e;
82 	char **boundaries;
83 	int flags;
84 {
85 	register char *p;
86 	int linelen;
87 	int bt;
88 	off_t offset;
89 	size_t sectionsize, sectionhighbits;
90 	int i;
91 	char *type;
92 	char *subtype;
93 	char **pvp;
94 	int argc = 0;
95 	struct args argv[MAXMIMEARGS];
96 	char bbuf[128];
97 	char buf[MAXLINE];
98 	char pvpbuf[MAXLINE];
99 
100 	if (tTd(43, 1))
101 	{
102 		printf("mime8to7: boundary=%s\n",
103 			boundaries[0] == NULL ? "<none>" : boundaries[0]);
104 		for (i = 1; boundaries[i] != NULL; i++)
105 			printf("\tboundaries[i]\n");
106 	}
107 	type = subtype = "-none-";
108 	p = hvalue("Content-Type", header);
109 	if (p != NULL &&
110 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL)) != NULL &&
111 	    pvp[0] != NULL)
112 	{
113 		type = *pvp++;
114 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
115 		    *++pvp != NULL)
116 		{
117 			subtype = *pvp++;
118 		}
119 
120 		/* break out parameters */
121 		while (*pvp != NULL && argc < MAXMIMEARGS)
122 		{
123 			/* skip to semicolon separator */
124 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
125 				pvp++;
126 			if (*pvp++ == NULL || *pvp == NULL)
127 				break;
128 
129 			/* extract field name */
130 			argv[argc].field = *pvp++;
131 
132 			/* see if there is a value */
133 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
134 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
135 			{
136 				argv[argc].value = *pvp;
137 				argc++;
138 			}
139 		}
140 	}
141 	if (strcasecmp(type, "multipart") == 0)
142 	{
143 		register char *q;
144 
145 		for (i = 0; i < argc; i++)
146 		{
147 			if (strcasecmp(argv[i].field, "boundary") == 0)
148 				break;
149 		}
150 		if (i >= argc)
151 		{
152 			syserr("mime8to7: Content-Type: %s missing boundary", p);
153 			p = "---";
154 		}
155 		else
156 			p = argv[i].value;
157 		if (*p == '"')
158 			q = strchr(p, '"');
159 		else
160 			q = p + strlen(p);
161 		if (q - p > sizeof bbuf - 1)
162 		{
163 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
164 				q - p, p);
165 			q = p + sizeof bbuf - 1;
166 		}
167 		strncpy(bbuf, p, q - p);
168 		bbuf[q - p] = '\0';
169 		if (tTd(43, 1))
170 		{
171 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
172 		}
173 		for (i = 0; i < MAXMIMENESTING; i++)
174 			if (boundaries[i] == NULL)
175 				break;
176 		if (i >= MAXMIMENESTING)
177 			syserr("mime8to7: multipart nesting boundary too deep");
178 		else
179 		{
180 			boundaries[i] = bbuf;
181 			boundaries[i + 1] = NULL;
182 		}
183 
184 		/* flag subtypes that can't have any 8-bit data */
185 		if (strcasecmp(subtype, "signed") == 0)
186 			flags |= M87F_NO8BIT;
187 
188 		/* skip the early "comment" prologue */
189 		bt = MBT_FINAL;
190 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
191 		{
192 			bt = mimeboundary(buf, boundaries);
193 			if (bt != MBT_NOTSEP)
194 				break;
195 			putline(buf, mci);
196 		}
197 		while (bt != MBT_FINAL)
198 		{
199 			auto HDR *hdr = NULL;
200 
201 			sprintf(buf, "--%s", bbuf);
202 			putline(buf, mci);
203 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
204 			putheader(mci, hdr, e, 0);
205 			bt = mime8to7(mci, hdr, e, boundaries, flags);
206 		}
207 		sprintf(buf, "--%s--", bbuf);
208 		putline(buf, mci);
209 
210 		/* skip the late "comment" epilogue */
211 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
212 		{
213 			putline(buf, mci);
214 			bt = mimeboundary(buf, boundaries);
215 			if (bt != MBT_NOTSEP)
216 				break;
217 		}
218 		boundaries[i] = NULL;
219 		return bt;
220 	}
221 
222 	/*
223 	**  Non-compound body type
224 	**
225 	**	Compute the ratio of seven to eight bit characters;
226 	**	use that as a heuristic to decide how to do the
227 	**	encoding.
228 	*/
229 
230 	/* handle types that cannot have 8-bit data internally */
231 	sprintf(buf, "%s/%s", type, subtype);
232 	if (wordinclass(buf, 'n'))
233 		flags |= M87F_NO8BIT;
234 
235 	sectionsize = sectionhighbits = 0;
236 	if (!bitset(M87F_NO8BIT, flags))
237 	{
238 		/* remember where we were */
239 		offset = ftell(e->e_dfp);
240 		if (offset == -1)
241 			syserr("mime8to7: cannot ftell on df%s", e->e_id);
242 
243 		/* do a scan of this body type to count character types */
244 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
245 		{
246 			bt = mimeboundary(buf, boundaries);
247 			if (bt != MBT_NOTSEP)
248 				break;
249 			for (p = buf; *p != '\0'; p++)
250 			{
251 				/* count bytes with the high bit set */
252 				sectionsize++;
253 				if (bitset(0200, *p))
254 					sectionhighbits++;
255 			}
256 
257 			/*
258 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
259 			**  assume base64.  This heuristic avoids double-reading
260 			**  large graphics or video files.
261 			*/
262 
263 			if (sectionsize >= 4096 &&
264 			    sectionhighbits > sectionsize / 4)
265 				break;
266 		}
267 		if (feof(e->e_dfp))
268 			bt = MBT_FINAL;
269 
270 		/* return to the original offset for processing */
271 		/* XXX use relative seeks to handle >31 bit file sizes? */
272 		if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
273 			syserr("mime8to7: cannot fseek on df%s", e->e_id);
274 	}
275 
276 	/*
277 	**  Heuristically determine encoding method.
278 	**	If more than 1/8 of the total characters have the
279 	**	eighth bit set, use base64; else use quoted-printable.
280 	*/
281 
282 	if (tTd(43, 8))
283 	{
284 		printf("mime8to7: %ld high bits in %ld bytes\n",
285 			sectionhighbits, sectionsize);
286 	}
287 	if (sectionhighbits == 0)
288 	{
289 		/* no encoding necessary */
290 		p = hvalue("content-transfer-encoding", header);
291 		if (p != NULL)
292 		{
293 			sprintf(buf, "Content-Transfer-Encoding: %s", p);
294 			putline(buf, mci);
295 		}
296 		putline("", mci);
297 		mci->mci_flags &= ~MCIF_INHEADER;
298 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
299 		{
300 			bt = mimeboundary(buf, boundaries);
301 			if (bt != MBT_NOTSEP)
302 				break;
303 			if (buf[0] == 'F' &&
304 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
305 			    strncmp(buf, "From ", 5) == 0)
306 				(void) putc('>', mci->mci_out);
307 			putline(buf, mci);
308 		}
309 	}
310 	else if (sectionsize / 8 < sectionhighbits)
311 	{
312 		/* use base64 encoding */
313 		int c1, c2;
314 
315 		putline("Content-Transfer-Encoding: base64", mci);
316 		putline("", mci);
317 		mci->mci_flags &= ~MCIF_INHEADER;
318 		linelen = 0;
319 		while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF)
320 		{
321 			if (linelen > 71)
322 			{
323 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
324 				linelen = 0;
325 			}
326 			linelen += 4;
327 			fputc(Base64Code[c1 >> 2], mci->mci_out);
328 			c1 = (c1 & 0x03) << 4;
329 			c2 = mime_getchar(e->e_dfp, boundaries);
330 			if (c2 == EOF)
331 			{
332 				fputc(Base64Code[c1], mci->mci_out);
333 				fputc('=', mci->mci_out);
334 				fputc('=', mci->mci_out);
335 				break;
336 			}
337 			c1 |= (c2 >> 4) & 0x0f;
338 			fputc(Base64Code[c1], mci->mci_out);
339 			c1 = (c2 & 0x0f) << 2;
340 			c2 = mime_getchar(e->e_dfp, boundaries);
341 			if (c2 == EOF)
342 			{
343 				fputc(Base64Code[c1], mci->mci_out);
344 				fputc('=', mci->mci_out);
345 				break;
346 			}
347 			c1 |= (c2 >> 6) & 0x03;
348 			fputc(Base64Code[c1], mci->mci_out);
349 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
350 		}
351 	}
352 	else
353 	{
354 		/* use quoted-printable encoding */
355 		int c1, c2;
356 		int fromstate;
357 
358 		putline("Content-Transfer-Encoding: quoted-printable", mci);
359 		putline("", mci);
360 		mci->mci_flags &= ~MCIF_INHEADER;
361 		linelen = fromstate = 0;
362 		c2 = '\n';
363 		while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF)
364 		{
365 			if (c1 == '\n')
366 			{
367 				if (c2 == ' ' || c2 == '\t')
368 				{
369 					fputc('=', mci->mci_out);
370 					fputc(Base16Code[(c2 >> 4) & 0x0f],
371 								mci->mci_out);
372 					fputc(Base16Code[c2 & 0x0f],
373 								mci->mci_out);
374 					fputs(mci->mci_mailer->m_eol,
375 								mci->mci_out);
376 				}
377 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
378 				linelen = fromstate = 0;
379 				c2 = c1;
380 				continue;
381 			}
382 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
383 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
384 			{
385 				fputs("=20", mci->mci_out);
386 				linelen += 3;
387 			}
388 			else if (c2 == ' ' || c2 == '\t')
389 			{
390 				fputc(c2, mci->mci_out);
391 				linelen++;
392 			}
393 			if (linelen > 72)
394 			{
395 				fputc('=', mci->mci_out);
396 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
397 				linelen = fromstate = 0;
398 				c2 = '\n';
399 			}
400 			if (c2 == '\n' && c1 == '.' &&
401 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
402 			{
403 				fputc('.', mci->mci_out);
404 				linelen++;
405 			}
406 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
407 			{
408 				fputc('=', mci->mci_out);
409 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
410 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
411 				linelen += 3;
412 			}
413 			else if (c1 != ' ' && c1 != '\t')
414 			{
415 				if (linelen < 4 && c1 == "From"[linelen])
416 					fromstate++;
417 				fputc(c1, mci->mci_out);
418 				linelen++;
419 			}
420 			c2 = c1;
421 		}
422 
423 		/* output any saved character */
424 		if (c2 == ' ' || c2 == '\t')
425 		{
426 			fputc('=', mci->mci_out);
427 			fputc(Base16Code[(c2 >> 4) & 0x0f], mci->mci_out);
428 			fputc(Base16Code[c2 & 0x0f], mci->mci_out);
429 			linelen += 3;
430 		}
431 	}
432 	if (linelen > 0)
433 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
434 	return MimeBoundaryType;
435 }
436 /*
437 **  MIME_GETCHAR -- get a character for MIME processing
438 **
439 **	Treats boundaries as EOF.
440 **
441 **	Parameters:
442 **		fp -- the input file.
443 **		boundaries -- the current MIME boundaries.
444 **
445 **	Returns:
446 **		The next character in the input stream.
447 */
448 
449 int
450 mime_getchar(fp, boundaries)
451 	register FILE *fp;
452 	char **boundaries;
453 {
454 	int c;
455 	static char *bp = NULL;
456 	static int buflen = 0;
457 	static bool atbol = TRUE;	/* at beginning of line */
458 	static char buf[128];		/* need not be a full line */
459 
460 	if (buflen > 0)
461 	{
462 		buflen--;
463 		return *bp++;
464 	}
465 	bp = buf;
466 	buflen = 0;
467 	c = fgetc(fp);
468 	if (c == '\n')
469 	{
470 		/* might be part of a MIME boundary */
471 		*bp++ = c;
472 		atbol = TRUE;
473 		c = fgetc(fp);
474 	}
475 	if (c != EOF)
476 		*bp++ = c;
477 	if (atbol && c == '-')
478 	{
479 		/* check for a message boundary */
480 		c = fgetc(fp);
481 		if (c != '-')
482 		{
483 			if (c != EOF)
484 				*bp++ = c;
485 			buflen = bp - buf - 1;
486 			bp = buf;
487 			return *bp++;
488 		}
489 
490 		/* got "--", now check for rest of separator */
491 		*bp++ = '-';
492 		while (bp < &buf[sizeof buf - 1] &&
493 		       (c = fgetc(fp)) != EOF && c != '\n')
494 		{
495 			*bp++ = c;
496 		}
497 		*bp = '\0';
498 		MimeBoundaryType = mimeboundary(buf, boundaries);
499 		switch (MimeBoundaryType)
500 		{
501 		  case MBT_FINAL:
502 		  case MBT_INTERMED:
503 			/* we have a message boundary */
504 			buflen = 0;
505 			return EOF;
506 		}
507 
508 		atbol = c == '\n';
509 		if (c != EOF)
510 			*bp++ = c;
511 	}
512 
513 	buflen = bp - buf - 1;
514 	if (buflen < 0)
515 		return EOF;
516 	bp = buf;
517 	return *bp++;
518 }
519 /*
520 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
521 **
522 **	Parameters:
523 **		line -- the input line.
524 **		boundaries -- the set of currently pending boundaries.
525 **
526 **	Returns:
527 **		MBT_NOTSEP -- if this is not a separator line
528 **		MBT_INTERMED -- if this is an intermediate separator
529 **		MBT_FINAL -- if this is a final boundary
530 **		MBT_SYNTAX -- if this is a boundary for the wrong
531 **			enclosure -- i.e., a syntax error.
532 */
533 
534 int
535 mimeboundary(line, boundaries)
536 	register char *line;
537 	char **boundaries;
538 {
539 	int type;
540 	int i;
541 	int savec;
542 
543 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
544 		return MBT_NOTSEP;
545 	if (tTd(43, 5))
546 		printf("mimeboundary: line=\"%s\"... ", line);
547 	i = strlen(line);
548 	if (line[i - 1] == '\n')
549 		i--;
550 	while (line[i - 1] == ' ' || line[i - 1] == '\t')
551 		i--;
552 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
553 	{
554 		type = MBT_FINAL;
555 		i -= 2;
556 	}
557 	else
558 		type = MBT_INTERMED;
559 
560 	savec = line[i];
561 	line[i] = '\0';
562 	/* XXX should check for improper nesting here */
563 	if (isboundary(&line[2], boundaries) < 0)
564 		type = MBT_NOTSEP;
565 	line[i] = savec;
566 	if (tTd(43, 5))
567 		printf("%d\n", type);
568 	return type;
569 }
570 /*
571 **  DEFCHARSET -- return default character set for message
572 **
573 **	The first choice for character set is for the mailer
574 **	corresponding to the envelope sender.  If neither that
575 **	nor the global configuration file has a default character
576 **	set defined, return "unknown-8bit" as recommended by
577 **	RFC 1428 section 3.
578 **
579 **	Parameters:
580 **		e -- the envelope for this message.
581 **
582 **	Returns:
583 **		The default character set for that mailer.
584 */
585 
586 char *
587 defcharset(e)
588 	register ENVELOPE *e;
589 {
590 	if (e != NULL && e->e_from.q_mailer != NULL &&
591 	    e->e_from.q_mailer->m_defcharset != NULL)
592 		return e->e_from.q_mailer->m_defcharset;
593 	if (DefaultCharSet != NULL)
594 		return DefaultCharSet;
595 	return "unknown-8bit";
596 }
597 /*
598 **  ISBOUNDARY -- is a given string a currently valid boundary?
599 **
600 **	Parameters:
601 **		line -- the current input line.
602 **		boundaries -- the list of valid boundaries.
603 **
604 **	Returns:
605 **		The index number in boundaries if the line is found.
606 **		-1 -- otherwise.
607 **
608 */
609 
610 int
611 isboundary(line, boundaries)
612 	char *line;
613 	char **boundaries;
614 {
615 	register int i;
616 
617 	i = 0;
618 	while (boundaries[i] != NULL)
619 	{
620 		if (strcmp(line, boundaries[i]) == 0)
621 			return i;
622 	}
623 	return -1;
624 }
625