xref: /original-bsd/usr.sbin/sendmail/src/mime.c (revision 948d00a2)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.9 (Berkeley) 11/19/94";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static int	MimeBoundaryType;	/* internal linkage */
45 /*
46 **  MIME8TO7 -- output 8 bit body in 7 bit format
47 **
48 **	The header has already been output -- this has to do the
49 **	8 to 7 bit conversion.  It would be easy if we didn't have
50 **	to deal with nested formats (multipart/xxx and message/rfc822).
51 **
52 **	We won't be called if we don't have to do a conversion, and
53 **	appropriate MIME-Version: and Content-Type: fields have been
54 **	output.  Any Content-Transfer-Encoding: field has not been
55 **	output, and we can add it here.
56 **
57 **	Parameters:
58 **		mci -- mailer connection information.
59 **		header -- the header for this body part.
60 **		e -- envelope.
61 **		boundary -- the message boundary -- NULL if we are
62 **			processing the outer portion.
63 **
64 **	Returns:
65 **		An indicator of what terminated the message part:
66 **		  MBT_FINAL -- the final boundary
67 **		  MBT_INTERMED -- an intermediate boundary
68 **		  MBT_NOTSEP -- an end of file
69 */
70 
71 int
72 mime8to7(mci, header, e, boundary)
73 	register MCI *mci;
74 	HDR *header;
75 	register ENVELOPE *e;
76 	char *boundary;
77 {
78 	register char *p;
79 	int linelen;
80 	int bt;
81 	off_t offset;
82 	size_t sectionsize, sectionhighbits;
83 	char bbuf[128];
84 	char buf[MAXLINE];
85 
86 	if (tTd(43, 1))
87 	{
88 		printf("mime8to7: boundary=%s\n",
89 			boundary == NULL ? "<none>" : boundary);
90 	}
91 	p = hvalue("Content-Type", header);
92 	if (p != NULL && strncasecmp(p, "multipart/", 10) == 0)
93 	{
94 		register char *q;
95 
96 		/* oh dear -- this part is hard */
97 		p = strstr(p, "boundary=");		/*XXX*/
98 		if (p == NULL)
99 		{
100 			syserr("mime8to7: Content-Type: %s missing boundary", p);
101 			p = "---";
102 		}
103 		else
104 			p += 9;
105 		if (*p == '"')
106 			q = strchr(p, '"');
107 		else
108 			q = strchr(p, ',');
109 		if (q == NULL)
110 			q = p + strlen(p);
111 		if (q - p > sizeof bbuf - 1)
112 		{
113 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
114 				q - p, p);
115 			q = p + sizeof bbuf - 1;
116 		}
117 		strncpy(bbuf, p, q - p);
118 		bbuf[q - p] = '\0';
119 		if (tTd(43, 1))
120 		{
121 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
122 		}
123 
124 		/* skip the early "comment" prologue */
125 		bt = MBT_FINAL;
126 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
127 		{
128 			bt = mimeboundary(buf, bbuf);
129 			if (bt != MBT_NOTSEP)
130 				break;
131 			putline(buf, mci);
132 		}
133 		while (bt != MBT_FINAL)
134 		{
135 			auto HDR *hdr = NULL;
136 
137 			sprintf(buf, "--%s", bbuf);
138 			putline(buf, mci);
139 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
140 			putheader(mci, hdr, e, 0);
141 			bt = mime8to7(mci, hdr, e, bbuf);
142 		}
143 		sprintf(buf, "--%s--", bbuf);
144 		putline(buf, mci);
145 
146 		/* skip the late "comment" epilogue */
147 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
148 		{
149 			putline(buf, mci);
150 			bt = mimeboundary(buf, boundary);
151 			if (bt != MBT_NOTSEP)
152 				break;
153 		}
154 		return bt;
155 	}
156 
157 	/*
158 	**  Non-compound body type
159 	**
160 	**	Compute the ratio of seven to eight bit characters;
161 	**	use that as a heuristic to decide how to do the
162 	**	encoding.
163 	*/
164 
165 	/* remember where we were */
166 	offset = ftell(e->e_dfp);
167 	if (offset == -1)
168 		syserr("mime8to7: cannot ftell on %s", e->e_df);
169 
170 	/* do a scan of this body type to count character types */
171 	sectionsize = sectionhighbits = 0;
172 	while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
173 	{
174 		bt = mimeboundary(buf, boundary);
175 		if (bt != MBT_NOTSEP)
176 			break;
177 		for (p = buf; *p != '\0'; p++)
178 		{
179 			/* count bytes with the high bit set */
180 			sectionsize++;
181 			if (bitset(0200, *p))
182 				sectionhighbits++;
183 		}
184 
185 		/*
186 		**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
187 		**  assume base64.  This heuristic avoids double-reading
188 		**  large graphics or video files.
189 		*/
190 
191 		if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4)
192 			break;
193 	}
194 	if (feof(e->e_dfp))
195 		bt = MBT_FINAL;
196 
197 	/* return to the original offset for processing */
198 	/* XXX use relative seeks to handle >31 bit file sizes? */
199 	if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
200 		syserr("mime8to7: cannot fseek on %s", e->e_df);
201 
202 	/*
203 	**  Heuristically determine encoding method.
204 	**	If more than 1/8 of the total characters have the
205 	**	eighth bit set, use base64; else use quoted-printable.
206 	*/
207 
208 	if (tTd(43, 8))
209 	{
210 		printf("mime8to7: %ld high bits in %ld bytes\n",
211 			sectionhighbits, sectionsize);
212 	}
213 	if (sectionhighbits == 0)
214 	{
215 		/* no encoding necessary */
216 		p = hvalue("content-transfer-encoding", header);
217 		if (p != NULL)
218 		{
219 			sprintf(buf, "Content-Transfer-Encoding: %s", p);
220 			putline(buf, mci);
221 		}
222 		putline("", mci);
223 		mci->mci_flags &= ~MCIF_INHEADER;
224 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
225 		{
226 			bt = mimeboundary(buf, boundary);
227 			if (bt != MBT_NOTSEP)
228 				break;
229 			if (buf[0] == 'F' &&
230 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
231 			    strncmp(buf, "From ", 5) == 0)
232 				(void) putc('>', mci->mci_out);
233 			putline(buf, mci);
234 		}
235 	}
236 	else if (sectionsize / 8 < sectionhighbits)
237 	{
238 		/* use base64 encoding */
239 		int c1, c2;
240 
241 		putline("Content-Transfer-Encoding: base64", mci);
242 		putline("", mci);
243 		mci->mci_flags &= ~MCIF_INHEADER;
244 		linelen = 0;
245 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
246 		{
247 			if (linelen > 71)
248 			{
249 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
250 				linelen = 0;
251 			}
252 			linelen += 4;
253 			fputc(Base64Code[c1 >> 2], mci->mci_out);
254 			c1 = (c1 & 0x03) << 4;
255 			c2 = mime_getchar(e->e_dfp, boundary);
256 			if (c2 == EOF)
257 			{
258 				fputc(Base64Code[c1], mci->mci_out);
259 				fputc('=', mci->mci_out);
260 				fputc('=', mci->mci_out);
261 				break;
262 			}
263 			c1 |= (c2 >> 4) & 0x0f;
264 			fputc(Base64Code[c1], mci->mci_out);
265 			c1 = (c2 & 0x0f) << 2;
266 			c2 = mime_getchar(e->e_dfp, boundary);
267 			if (c2 == EOF)
268 			{
269 				fputc(Base64Code[c1], mci->mci_out);
270 				fputc('=', mci->mci_out);
271 				break;
272 			}
273 			c1 |= (c2 >> 6) & 0x03;
274 			fputc(Base64Code[c1], mci->mci_out);
275 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
276 		}
277 	}
278 	else
279 	{
280 		/* use quoted-printable encoding */
281 		int c1, c2;
282 
283 		putline("Content-Transfer-Encoding: quoted-printable", mci);
284 		putline("", mci);
285 		mci->mci_flags &= ~MCIF_INHEADER;
286 		linelen = 0;
287 		c2 = '\n';
288 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
289 		{
290 			if (c1 == '\n')
291 			{
292 				if (c2 == ' ' || c2 == '\t')
293 				{
294 					fputc('=', mci->mci_out);
295 					fputc(Base16Code[(c2 >> 4) & 0x0f],
296 								mci->mci_out);
297 					fputc(Base16Code[c2 & 0x0f],
298 								mci->mci_out);
299 					fputs(mci->mci_mailer->m_eol,
300 								mci->mci_out);
301 				}
302 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
303 				linelen = 0;
304 				c2 = c1;
305 				continue;
306 			}
307 			if (c2 == ' ' || c2 == '\t')
308 			{
309 				fputc(c2, mci->mci_out);
310 				linelen++;
311 			}
312 			if (linelen > 72)
313 			{
314 				fputc('=', mci->mci_out);
315 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
316 				linelen = 0;
317 				c2 = '\n';
318 			}
319 			if (c2 == '\n' && c1 == '.' &&
320 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
321 			{
322 				fputc('.', mci->mci_out);
323 				linelen++;
324 			}
325 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
326 			{
327 				fputc('=', mci->mci_out);
328 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
329 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
330 				linelen += 3;
331 			}
332 			else if (c1 != ' ' && c1 != '\t')
333 			{
334 				fputc(c1, mci->mci_out);
335 				linelen++;
336 			}
337 			c2 = c1;
338 		}
339 
340 		/* output any saved character */
341 		if (c2 == ' ' || c2 == '\t')
342 		{
343 			fputc(c2, mci->mci_out);
344 			linelen++;
345 		}
346 	}
347 	if (linelen > 0)
348 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
349 	return MimeBoundaryType;
350 }
351 
352 
353 int
354 mime_getchar(fp, boundary)
355 	register FILE *fp;
356 	char *boundary;
357 {
358 	int c;
359 	static char *bp = NULL;
360 	static int buflen = 0;
361 	static bool atbol = TRUE;	/* at beginning of line */
362 	static char buf[128];		/* need not be a full line */
363 
364 	if (buflen > 0)
365 	{
366 		buflen--;
367 		return *bp++;
368 	}
369 	c = fgetc(fp);
370 	if (atbol && c == '-' && boundary != NULL)
371 	{
372 		/* check for a message boundary */
373 		bp = buf;
374 		c = fgetc(fp);
375 		if (c != '-')
376 		{
377 			if (c != EOF)
378 			{
379 				*bp = c;
380 				buflen++;
381 			}
382 			return '-';
383 		}
384 
385 		/* got "--", now check for rest of separator */
386 		*bp++ = '-';
387 		*bp++ = '-';
388 		while (bp < &buf[sizeof buf - 1] &&
389 		       (c = fgetc(fp)) != EOF && c != '\n')
390 		{
391 			*bp++ = c;
392 		}
393 		*bp = '\0';
394 		MimeBoundaryType = mimeboundary(buf, boundary);
395 		switch (MimeBoundaryType)
396 		{
397 		  case MBT_FINAL:
398 		  case MBT_INTERMED:
399 			/* we have a message boundary */
400 			buflen = 0;
401 			return EOF;
402 		}
403 
404 		atbol = c == '\n';
405 		if (c != EOF)
406 			*bp++ = c;
407 		buflen = bp - buf - 1;
408 		bp = buf;
409 		return *bp++;
410 	}
411 
412 	atbol = c == '\n';
413 	return c;
414 }
415 /*
416 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
417 **
418 **	Parameters:
419 **		line -- the input line.
420 **		boundary -- the expected boundary.
421 **
422 **	Returns:
423 **		MBT_NOTSEP -- if this is not a separator line
424 **		MBT_INTERMED -- if this is an intermediate separator
425 **		MBT_FINAL -- if this is a final boundary
426 **		MBT_SYNTAX -- if this is a boundary for the wrong
427 **			enclosure -- i.e., a syntax error.
428 */
429 
430 int
431 mimeboundary(line, boundary)
432 	register char *line;
433 	char *boundary;
434 {
435 	int type;
436 	int i;
437 
438 	if (line[0] != '-' || line[1] != '-' || boundary == NULL)
439 		return MBT_NOTSEP;
440 	if (tTd(43, 5))
441 		printf("mimeboundary: bound=\"%s\", line=\"%s\"... ",
442 			boundary, line);
443 	i = strlen(line);
444 	if (line[i - 1] == '\n')
445 		i--;
446 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
447 	{
448 		type = MBT_FINAL;
449 		i -= 2;
450 	}
451 	else
452 		type = MBT_INTERMED;
453 
454 	/* XXX should check for improper nesting here */
455 	if (strncmp(boundary, &line[2], i - 2) != 0 ||
456 	    strlen(boundary) != i - 2)
457 		type = MBT_NOTSEP;
458 	if (tTd(43, 5))
459 		printf("%d\n", type);
460 	return type;
461 }
462 /*
463 **  DEFCHARSET -- return default character set for message
464 **
465 **	The first choice for character set is for the mailer
466 **	corresponding to the envelope sender.  If neither that
467 **	nor the global configuration file has a default character
468 **	set defined, return "unknown-8bit" as recommended by
469 **	RFC 1428 section 3.
470 **
471 **	Parameters:
472 **		e -- the envelope for this message.
473 **
474 **	Returns:
475 **		The default character set for that mailer.
476 */
477 
478 char *
479 defcharset(e)
480 	register ENVELOPE *e;
481 {
482 	if (e != NULL && e->e_from.q_mailer != NULL &&
483 	    e->e_from.q_mailer->m_defcharset != NULL)
484 		return e->e_from.q_mailer->m_defcharset;
485 	if (DefaultCharSet != NULL)
486 		return DefaultCharSet;
487 	return "unknown-8bit";
488 }
489