xref: /original-bsd/usr.sbin/sendmail/src/mime.c (revision 4ba124f7)
1 /*
2  * Copyright (c) 1994 Eric P. Allman
3  * Copyright (c) 1994
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * %sccs.include.redist.c%
7  */
8 
9 # include "sendmail.h"
10 # include <string.h>
11 
12 #ifndef lint
13 static char sccsid[] = "@(#)mime.c	8.3 (Berkeley) 07/23/94";
14 #endif /* not lint */
15 
16 /*
17 **  MIME support.
18 **
19 **	I am indebted to John Beck of Hewlett-Packard, who contributed
20 **	his code to me for inclusion.  As it turns out, I did not use
21 **	his code since he used a "minimum change" approach that used
22 **	several temp files, and I wanted a "minimum impact" approach
23 **	that would avoid copying.  However, looking over his code
24 **	helped me cement my understanding of the problem.
25 **
26 **	I also looked at, but did not directly use, Nathaniel
27 **	Borenstein's "code.c" module.  Again, it functioned as
28 **	a file-to-file translator, which did not fit within my
29 **	design bounds, but it was a useful base for understanding
30 **	the problem.
31 */
32 
33 
34 /* character set for hex and base64 encoding */
35 char	Base16Code[] =	"0123456789ABCDEF";
36 char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
37 
38 /* types of MIME boundaries */
39 #define MBT_SYNTAX	0	/* syntax error */
40 #define MBT_NOTSEP	1	/* not a boundary */
41 #define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
42 #define MBT_FINAL	3	/* final boundary (trailing -- included) */
43 
44 static int	MimeBoundaryType;	/* internal linkage */
45 /*
46 **  MIME8TO7 -- output 8 bit body in 7 bit format
47 **
48 **	The header has already been output -- this has to do the
49 **	8 to 7 bit conversion.  It would be easy if we didn't have
50 **	to deal with nested formats (multipart/xxx and message/rfc822).
51 **
52 **	We won't be called if we don't have to do a conversion, and
53 **	appropriate MIME-Version: and Content-Type: fields have been
54 **	output.  Any Content-Transfer-Encoding: field has not been
55 **	output, and we can add it here.
56 **
57 **	Parameters:
58 **		mci -- mailer connection information.
59 **		header -- the header for this body part.
60 **		e -- envelope.
61 **		boundary -- the message boundary -- NULL if we are
62 **			processing the outer portion.
63 **
64 **	Returns:
65 **		An indicator of what terminated the message part:
66 **		  MBT_FINAL -- the final boundary
67 **		  MBT_INTERMED -- an intermediate boundary
68 **		  MBT_NOTSEP -- an end of file
69 */
70 
71 int
72 mime8to7(mci, header, e, boundary)
73 	register MCI *mci;
74 	HDR *header;
75 	register ENVELOPE *e;
76 	char *boundary;
77 {
78 	register char *p;
79 	int linelen;
80 	int bt;
81 	off_t offset;
82 	size_t sectionsize, sectionhighbits;
83 	char bbuf[128];
84 	char buf[MAXLINE];
85 	extern char *hvalue();
86 
87 	if (tTd(43, 1))
88 	{
89 		printf("mime8to7: boundary=%s\n",
90 			boundary == NULL ? "<none>" : boundary);
91 	}
92 	p = hvalue("Content-Type", header);
93 	if (p != NULL && strncasecmp(p, "multipart/", 10) == 0)
94 	{
95 		register char *q;
96 
97 		/* oh dear -- this part is hard */
98 		p = strstr(p, "boundary=");		/*XXX*/
99 		if (p == NULL)
100 		{
101 			syserr("mime8to7: Content-Type: %s missing boundary", p);
102 			p = "---";
103 		}
104 		else
105 			p += 9;
106 		if (*p == '"')
107 			q = strchr(p, '"');
108 		else
109 			q = strchr(p, ',');
110 		if (q == NULL)
111 			q = p + strlen(p);
112 		if (q - p > sizeof bbuf - 1)
113 		{
114 			syserr("mime8to7: multipart boundary \"%.*s\" too long",
115 				q - p, p);
116 			q = p + sizeof bbuf - 1;
117 		}
118 		strncpy(bbuf, p, q - p);
119 		bbuf[q - p] = '\0';
120 		if (tTd(43, 1))
121 		{
122 			printf("mime8to7: multipart boundary \"%s\"\n", bbuf);
123 		}
124 
125 		/* skip the early "comment" prologue */
126 		bt = MBT_FINAL;
127 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
128 		{
129 			bt = mimeboundary(buf, bbuf);
130 			if (bt != MBT_NOTSEP)
131 				break;
132 			putline(buf, mci);
133 		}
134 		while (bt != MBT_FINAL)
135 		{
136 			auto HDR *hdr = NULL;
137 
138 			sprintf(buf, "--%s", bbuf);
139 			putline(buf, mci);
140 			collect(e->e_dfp, FALSE, FALSE, &hdr, e);
141 			putheader(mci, hdr, e);
142 			bt = mime8to7(mci, hdr, e, bbuf);
143 		}
144 		sprintf(buf, "--%s--", bbuf);
145 		putline(buf, mci);
146 
147 		/* skip the late "comment" epilogue */
148 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
149 		{
150 			putline(buf, mci);
151 			bt = mimeboundary(buf, boundary);
152 			if (bt != MBT_NOTSEP)
153 				break;
154 		}
155 		return bt;
156 	}
157 
158 	/*
159 	**  Non-compound body type
160 	**
161 	**	Compute the ratio of seven to eight bit characters;
162 	**	use that as a heuristic to decide how to do the
163 	**	encoding.
164 	*/
165 
166 	/* remember where we were */
167 	offset = ftell(e->e_dfp);
168 	if (offset == -1)
169 		syserr("mime8to7: cannot ftell on %s", e->e_df);
170 
171 	/* do a scan of this body type to count character types */
172 	sectionsize = sectionhighbits = 0;
173 	while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
174 	{
175 		bt = mimeboundary(buf, boundary);
176 		if (bt != MBT_NOTSEP)
177 			break;
178 		for (p = buf; *p != '\0'; p++)
179 		{
180 			/* count bytes with the high bit set */
181 			sectionsize++;
182 			if (bitset(0200, *p))
183 				sectionhighbits++;
184 		}
185 
186 		/*
187 		**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
188 		**  assume base64.  This heuristic avoids double-reading
189 		**  large graphics or video files.
190 		*/
191 
192 		if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4)
193 			break;
194 	}
195 	if (feof(e->e_dfp))
196 		bt = MBT_FINAL;
197 
198 	/* return to the original offset for processing */
199 	/* XXX use relative seeks to handle >31 bit file sizes? */
200 	if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
201 		syserr("mime8to7: cannot fseek on %s", e->e_df);
202 
203 	/*
204 	**  Heuristically determine encoding method.
205 	**	If more than 1/8 of the total characters have the
206 	**	eighth bit set, use base64; else use quoted-printable.
207 	*/
208 
209 	if (tTd(43, 8))
210 	{
211 		printf("mime8to7: %ld high bits in %ld bytes\n",
212 			sectionhighbits, sectionsize);
213 	}
214 	if (sectionhighbits == 0)
215 	{
216 		/* no encoding necessary */
217 		putline("", mci);
218 		mci->mci_flags &= ~MCIF_INHEADER;
219 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
220 		{
221 			bt = mimeboundary(buf, boundary);
222 			if (bt != MBT_NOTSEP)
223 				break;
224 			if (buf[0] == 'F' &&
225 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags) &&
226 			    strncmp(buf, "From ", 5) == 0)
227 				(void) putc('>', mci->mci_out);
228 			putline(buf, mci);
229 		}
230 	}
231 	else if (sectionsize / 8 < sectionhighbits)
232 	{
233 		/* use base64 encoding */
234 		int c1, c2;
235 
236 		putline("Content-Transfer-Encoding: base64", mci);
237 		putline("", mci);
238 		mci->mci_flags &= ~MCIF_INHEADER;
239 		linelen = 0;
240 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
241 		{
242 			if (linelen > 71)
243 			{
244 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
245 				linelen = 0;
246 			}
247 			linelen += 4;
248 			fputc(Base64Code[c1 >> 2], mci->mci_out);
249 			c1 = (c1 & 0x03) << 4;
250 			c2 = mime_getchar(e->e_dfp, boundary);
251 			if (c2 == EOF)
252 			{
253 				fputc(Base64Code[c1], mci->mci_out);
254 				fputc('=', mci->mci_out);
255 				fputc('=', mci->mci_out);
256 				break;
257 			}
258 			c1 |= (c2 >> 4) & 0x0f;
259 			fputc(Base64Code[c1], mci->mci_out);
260 			c1 = (c2 & 0x0f) << 2;
261 			c2 = mime_getchar(e->e_dfp, boundary);
262 			if (c2 == EOF)
263 			{
264 				fputc(Base64Code[c1], mci->mci_out);
265 				fputc('=', mci->mci_out);
266 				break;
267 			}
268 			c1 |= (c2 >> 6) & 0x03;
269 			fputc(Base64Code[c1], mci->mci_out);
270 			fputc(Base64Code[c2 & 0x3f], mci->mci_out);
271 		}
272 	}
273 	else
274 	{
275 		/* use quoted-printable encoding */
276 		int c1, c2;
277 
278 		putline("Content-Transfer-Encoding: quoted-printable", mci);
279 		putline("", mci);
280 		mci->mci_flags &= ~MCIF_INHEADER;
281 		linelen = 0;
282 		c2 = '\n';
283 		while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF)
284 		{
285 			if (c1 == '\n')
286 			{
287 				if (c2 == ' ' || c2 == '\t')
288 				{
289 					fputc('=', mci->mci_out);
290 					fputs(mci->mci_mailer->m_eol, mci->mci_out);
291 				}
292 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
293 				linelen = 0;
294 				c2 = c1;
295 				continue;
296 			}
297 			else if (c2 == '\n' && c1 == '.' &&
298 				 bitnset(M_XDOT, mci->mci_mailer->m_flags))
299 			{
300 				fputc('.', mci->mci_out);
301 				linelen++;
302 			}
303 			if (linelen > 72)
304 			{
305 				fputc('=', mci->mci_out);
306 				fputs(mci->mci_mailer->m_eol, mci->mci_out);
307 				linelen = 0;
308 				c2 = '\n';
309 			}
310 			if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=')
311 			{
312 				fputc('=', mci->mci_out);
313 				fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out);
314 				fputc(Base16Code[c1 & 0x0f], mci->mci_out);
315 				linelen += 3;
316 			}
317 			else
318 			{
319 				fputc(c1, mci->mci_out);
320 				linelen++;
321 			}
322 			c2 = c1;
323 		}
324 	}
325 	if (linelen > 0)
326 		fputs(mci->mci_mailer->m_eol, mci->mci_out);
327 	return MimeBoundaryType;
328 }
329 
330 
331 int
332 mime_getchar(fp, boundary)
333 	register FILE *fp;
334 	char *boundary;
335 {
336 	int c;
337 	static char *bp = NULL;
338 	static int buflen = 0;
339 	static bool atbol = TRUE;	/* at beginning of line */
340 	static char buf[128];		/* need not be a full line */
341 
342 	if (buflen > 0)
343 	{
344 		buflen--;
345 		return *bp++;
346 	}
347 	c = fgetc(fp);
348 	if (atbol && c == '-' && boundary != NULL)
349 	{
350 		/* check for a message boundary */
351 		bp = buf;
352 		c = fgetc(fp);
353 		if (c != '-')
354 		{
355 			if (c != EOF)
356 			{
357 				*bp = c;
358 				buflen++;
359 			}
360 			return '-';
361 		}
362 
363 		/* got "--", now check for rest of separator */
364 		*bp++ = '-';
365 		*bp++ = '-';
366 		while (bp < &buf[sizeof buf - 1] &&
367 		       (c = fgetc(fp)) != EOF && c != '\n')
368 		{
369 			*bp++ = c;
370 		}
371 		*bp = '\0';
372 		MimeBoundaryType = mimeboundary(buf, boundary);
373 		switch (MimeBoundaryType)
374 		{
375 		  case MBT_FINAL:
376 		  case MBT_INTERMED:
377 			/* we have a message boundary */
378 			buflen = 0;
379 			return EOF;
380 		}
381 
382 		atbol = c == '\n';
383 		if (c != EOF)
384 			*bp++ = c;
385 		buflen = bp - buf - 1;
386 		bp = buf;
387 		return *bp++;
388 	}
389 
390 	atbol = c == '\n';
391 	return c;
392 }
393 /*
394 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
395 **
396 **	Parameters:
397 **		line -- the input line.
398 **		boundary -- the expected boundary.
399 **
400 **	Returns:
401 **		MBT_NOTSEP -- if this is not a separator line
402 **		MBT_INTERMED -- if this is an intermediate separator
403 **		MBT_FINAL -- if this is a final boundary
404 **		MBT_SYNTAX -- if this is a boundary for the wrong
405 **			enclosure -- i.e., a syntax error.
406 */
407 
408 int
409 mimeboundary(line, boundary)
410 	register char *line;
411 	char *boundary;
412 {
413 	int type;
414 	int i;
415 
416 	if (line[0] != '-' || line[1] != '-' || boundary == NULL)
417 		return MBT_NOTSEP;
418 	if (tTd(43, 5))
419 		printf("mimeboundary: bound=\"%s\", line=\"%s\"... ",
420 			boundary, line);
421 	i = strlen(line);
422 	if (line[i - 1] == '\n')
423 		i--;
424 	if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
425 	{
426 		type = MBT_FINAL;
427 		i -= 2;
428 	}
429 	else
430 		type = MBT_INTERMED;
431 
432 	/* XXX should check for improper nesting here */
433 	if (strncmp(boundary, &line[2], i - 2) != 0 ||
434 	    strlen(boundary) != i - 2)
435 		type = MBT_NOTSEP;
436 	if (tTd(43, 5))
437 		printf("%d\n", type);
438 	return type;
439 }
440