xref: /original-bsd/usr.sbin/sendmail/src/mime.c (revision cf2e4d33)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.5 (Berkeley) 08/17/94";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static int	MimeBoundaryType;	/* internal linkage */
45 /*
46 **  MIME8TO7 -- output 8 bit body in 7 bit format
47 **
48 **	The header has already been output -- this has to do the
49 **	8 to 7 bit conversion.  It would be easy if we didn't have
50 **	to deal with nested formats (multipart/xxx and message/rfc822).
51 **
52 **	We won't be called if we don't have to do a conversion, and
53 **	appropriate MIME-Version: and Content-Type: fields have been
54 **	output.  Any Content-Transfer-Encoding: field has not been
55 **	output, and we can add it here.
56 **
57 **	Parameters:
58 **		mci -- mailer connection information.
59 **		header -- the header for this body part.
60 **		e -- envelope.
61 **		boundary -- the message boundary -- NULL if we are
62 **			processing the outer portion.
63 **
64 **	Returns:
65 **		An indicator of what terminated the message part:
66 **		  MBT_FINAL -- the final boundary
67 **		  MBT_INTERMED -- an intermediate boundary
68 **		  MBT_NOTSEP -- an end of file
69 */
70 
71 int
72 mime8to7(mci, header, e, boundary)
73 	register MCI *mci;
74 	HDR *header;
75 	register ENVELOPE *e;
76 	char *boundary;
77 {
78 	register char *p;
79 	int linelen;
80 	int bt;
81 	off_t offset;
82 	size_t sectionsize, sectionhighbits;
83 	char bbuf[128];
84 	char buf[MAXLINE];
85 
86 	if (tTd(43, 1))
87 	{
88 		printf("mime8to7: boundary=%s\n",
89 			boundary == NULL ? "<none>" : boundary);
90 	}
91 	p = hvalue("Content-Type", header);
92 	if (p != NULL && strncasecmp(p, "multipart/", 10) == 0)
93 	{
94 		register char *q;
95 
96 		/* oh dear -- this part is hard */
97 		p = strstr(p, "boundary=");		/*XXX*/
98 		if (p == NULL)
99 		{
100 			syserr("mime8to7: Content-Type: %s missing boundary", p);
101 			p = "---";
102 		}
103 		else
104 			p += 9;
105 		if (*p == '"')
106 			q = strchr(p, '"');
107 		else
108 			q = strchr(p, ',');
109 		if (q == NULL)
110 			q = p + strlen(p);
111 		if (q - p > sizeof bbuf - 1)
112 		{
113 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
114 				q - p, p);
115 			q = p + sizeof bbuf - 1;
116 		}
117 		strncpy(bbuf, p, q - p);
118 		bbuf[q - p] = '\0';
119 		if (tTd(43, 1))
120 		{
121 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
122 		}
123 
124 		/* skip the early "comment" prologue */
125 		bt = MBT_FINAL;
126 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
127 		{
128 			bt = mimeboundary(buf, bbuf);
129 			if (bt != MBT_NOTSEP)
130 				break;
131 			putline(buf, mci);
132 		}
133 		while (bt != MBT_FINAL)
134 		{
135 			auto HDR *hdr = NULL;
136 
137 			sprintf(buf, "--%s", bbuf);
138 			putline(buf, mci);
139 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
140 			putheader(mci, hdr, e);
141 			bt = mime8to7(mci, hdr, e, bbuf);
142 		}
143 		sprintf(buf, "--%s--", bbuf);
144 		putline(buf, mci);
145 
146 		/* skip the late "comment" epilogue */
147 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
148 		{
149 			putline(buf, mci);
150 			bt = mimeboundary(buf, boundary);
151 			if (bt != MBT_NOTSEP)
152 				break;
153 		}
154 		return bt;
155 	}
156 
157 	/*
158 	**  Non-compound body type
159 	**
160 	**	Compute the ratio of seven to eight bit characters;
161 	**	use that as a heuristic to decide how to do the
162 	**	encoding.
163 	*/
164 
165 	/* remember where we were */
166 	offset = ftell(e->e_dfp);
167 	if (offset == -1)
168 		syserr("mime8to7: cannot ftell on %s", e->e_df);
169 
170 	/* do a scan of this body type to count character types */
171 	sectionsize = sectionhighbits = 0;
172 	while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
173 	{
174 		bt = mimeboundary(buf, boundary);
175 		if (bt != MBT_NOTSEP)
176 			break;
177 		for (p = buf; *p != '\0'; p++)
178 		{
179 			/* count bytes with the high bit set */
180 			sectionsize++;
181 			if (bitset(0200, *p))
182 				sectionhighbits++;
183 		}
184 
185 		/*
186 		**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
187 		**  assume base64.  This heuristic avoids double-reading
188 		**  large graphics or video files.
189 		*/
190 
191 		if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4)
192 			break;
193 	}
194 	if (feof(e->e_dfp))
195 		bt = MBT_FINAL;
196 
197 	/* return to the original offset for processing */
198 	/* XXX use relative seeks to handle >31 bit file sizes? */
199 	if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
200 		syserr("mime8to7: cannot fseek on %s", e->e_df);
201 
202 	/*
203 	**  Heuristically determine encoding method.
204 	**	If more than 1/8 of the total characters have the
205 	**	eighth bit set, use base64; else use quoted-printable.
206 	*/
207 
208 	if (tTd(43, 8))
209 	{
210 		printf("mime8to7: %ld high bits in %ld bytes\n",
211 			sectionhighbits, sectionsize);
212 	}
213 	if (sectionhighbits == 0)
214 	{
215 		/* no encoding necessary */
216 		p = hvalue("content-transfer-encoding", header);
217 		if (p != NULL)
218 		{
219 			sprintf(buf, "Content-Transfer-Encoding: %s", p);
220 			putline(buf, mci);
221 		}
222 		putline("", mci);
223 		mci->mci_flags &= ~MCIF_INHEADER;
224 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
225 		{
226 			bt = mimeboundary(buf, boundary);
227 			if (bt != MBT_NOTSEP)
228 				break;
229 			if (buf[0] == 'F' &&
230 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
231 			    strncmp(buf, "From ", 5) == 0)
232 				(void) putc('>', mci->mci_out);
233 			putline(buf, mci);
234 		}
235 	}
236 	else if (sectionsize / 8 < sectionhighbits)
237 	{
238 		/* use base64 encoding */
239 		int c1, c2;
240 
241 		putline("Content-Transfer-Encoding: base64", mci);
242 		putline("", mci);
243 		mci->mci_flags &= ~MCIF_INHEADER;
244 		linelen = 0;
245 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
246 		{
247 			if (linelen > 71)
248 			{
249 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
250 				linelen = 0;
251 			}
252 			linelen += 4;
253 			fputc(Base64Code[c1 >> 2], mci->mci_out);
254 			c1 = (c1 & 0x03) << 4;
255 			c2 = mime_getchar(e->e_dfp, boundary);
256 			if (c2 == EOF)
257 			{
258 				fputc(Base64Code[c1], mci->mci_out);
259 				fputc('=', mci->mci_out);
260 				fputc('=', mci->mci_out);
261 				break;
262 			}
263 			c1 |= (c2 >> 4) & 0x0f;
264 			fputc(Base64Code[c1], mci->mci_out);
265 			c1 = (c2 & 0x0f) << 2;
266 			c2 = mime_getchar(e->e_dfp, boundary);
267 			if (c2 == EOF)
268 			{
269 				fputc(Base64Code[c1], mci->mci_out);
270 				fputc('=', mci->mci_out);
271 				break;
272 			}
273 			c1 |= (c2 >> 6) & 0x03;
274 			fputc(Base64Code[c1], mci->mci_out);
275 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
276 		}
277 	}
278 	else
279 	{
280 		/* use quoted-printable encoding */
281 		int c1, c2;
282 
283 		putline("Content-Transfer-Encoding: quoted-printable", mci);
284 		putline("", mci);
285 		mci->mci_flags &= ~MCIF_INHEADER;
286 		linelen = 0;
287 		c2 = '\n';
288 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
289 		{
290 			if (c1 == '\n')
291 			{
292 				if (c2 == ' ' || c2 == '\t')
293 				{
294 					fputc('=', mci->mci_out);
295 					fputs(mci->mci_mailer->m_eol, mci->mci_out);
296 				}
297 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
298 				linelen = 0;
299 				c2 = c1;
300 				continue;
301 			}
302 			else if (c2 == '\n' && c1 == '.' &&
303 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
304 			{
305 				fputc('.', mci->mci_out);
306 				linelen++;
307 			}
308 			if (linelen > 72)
309 			{
310 				fputc('=', mci->mci_out);
311 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
312 				linelen = 0;
313 				c2 = '\n';
314 			}
315 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
316 			{
317 				fputc('=', mci->mci_out);
318 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
319 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
320 				linelen += 3;
321 			}
322 			else
323 			{
324 				fputc(c1, mci->mci_out);
325 				linelen++;
326 			}
327 			c2 = c1;
328 		}
329 	}
330 	if (linelen > 0)
331 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
332 	return MimeBoundaryType;
333 }
334 
335 
336 int
337 mime_getchar(fp, boundary)
338 	register FILE *fp;
339 	char *boundary;
340 {
341 	int c;
342 	static char *bp = NULL;
343 	static int buflen = 0;
344 	static bool atbol = TRUE;	/* at beginning of line */
345 	static char buf[128];		/* need not be a full line */
346 
347 	if (buflen > 0)
348 	{
349 		buflen--;
350 		return *bp++;
351 	}
352 	c = fgetc(fp);
353 	if (atbol && c == '-' && boundary != NULL)
354 	{
355 		/* check for a message boundary */
356 		bp = buf;
357 		c = fgetc(fp);
358 		if (c != '-')
359 		{
360 			if (c != EOF)
361 			{
362 				*bp = c;
363 				buflen++;
364 			}
365 			return '-';
366 		}
367 
368 		/* got "--", now check for rest of separator */
369 		*bp++ = '-';
370 		*bp++ = '-';
371 		while (bp < &buf[sizeof buf - 1] &&
372 		       (c = fgetc(fp)) != EOF && c != '\n')
373 		{
374 			*bp++ = c;
375 		}
376 		*bp = '\0';
377 		MimeBoundaryType = mimeboundary(buf, boundary);
378 		switch (MimeBoundaryType)
379 		{
380 		  case MBT_FINAL:
381 		  case MBT_INTERMED:
382 			/* we have a message boundary */
383 			buflen = 0;
384 			return EOF;
385 		}
386 
387 		atbol = c == '\n';
388 		if (c != EOF)
389 			*bp++ = c;
390 		buflen = bp - buf - 1;
391 		bp = buf;
392 		return *bp++;
393 	}
394 
395 	atbol = c == '\n';
396 	return c;
397 }
398 /*
399 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
400 **
401 **	Parameters:
402 **		line -- the input line.
403 **		boundary -- the expected boundary.
404 **
405 **	Returns:
406 **		MBT_NOTSEP -- if this is not a separator line
407 **		MBT_INTERMED -- if this is an intermediate separator
408 **		MBT_FINAL -- if this is a final boundary
409 **		MBT_SYNTAX -- if this is a boundary for the wrong
410 **			enclosure -- i.e., a syntax error.
411 */
412 
413 int
414 mimeboundary(line, boundary)
415 	register char *line;
416 	char *boundary;
417 {
418 	int type;
419 	int i;
420 
421 	if (line[0] != '-' || line[1] != '-' || boundary == NULL)
422 		return MBT_NOTSEP;
423 	if (tTd(43, 5))
424 		printf("mimeboundary: bound=\"%s\", line=\"%s\"... ",
425 			boundary, line);
426 	i = strlen(line);
427 	if (line[i - 1] == '\n')
428 		i--;
429 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
430 	{
431 		type = MBT_FINAL;
432 		i -= 2;
433 	}
434 	else
435 		type = MBT_INTERMED;
436 
437 	/* XXX should check for improper nesting here */
438 	if (strncmp(boundary, &line[2], i - 2) != 0 ||
439 	    strlen(boundary) != i - 2)
440 		type = MBT_NOTSEP;
441 	if (tTd(43, 5))
442 		printf("%d\n", type);
443 	return type;
444 }
445