1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.3 (Berkeley) 07/23/94"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static int MimeBoundaryType; /* internal linkage */ 45 /* 46 ** MIME8TO7 -- output 8 bit body in 7 bit format 47 ** 48 ** The header has already been output -- this has to do the 49 ** 8 to 7 bit conversion. It would be easy if we didn't have 50 ** to deal with nested formats (multipart/xxx and message/rfc822). 51 ** 52 ** We won't be called if we don't have to do a conversion, and 53 ** appropriate MIME-Version: and Content-Type: fields have been 54 ** output. Any Content-Transfer-Encoding: field has not been 55 ** output, and we can add it here. 56 ** 57 ** Parameters: 58 ** mci -- mailer connection information. 59 ** header -- the header for this body part. 60 ** e -- envelope. 61 ** boundary -- the message boundary -- NULL if we are 62 ** processing the outer portion. 63 ** 64 ** Returns: 65 ** An indicator of what terminated the message part: 66 ** MBT_FINAL -- the final boundary 67 ** MBT_INTERMED -- an intermediate boundary 68 ** MBT_NOTSEP -- an end of file 69 */ 70 71 int 72 mime8to7(mci, header, e, boundary) 73 register MCI *mci; 74 HDR *header; 75 register ENVELOPE *e; 76 char *boundary; 77 { 78 register char *p; 79 int linelen; 80 int bt; 81 off_t offset; 82 size_t sectionsize, sectionhighbits; 83 char bbuf[128]; 84 char buf[MAXLINE]; 85 extern char *hvalue(); 86 87 if (tTd(43, 1)) 88 { 89 printf("mime8to7: boundary=%s\n", 90 boundary == NULL ? "<none>" : boundary); 91 } 92 p = hvalue("Content-Type", header); 93 if (p != NULL && strncasecmp(p, "multipart/", 10) == 0) 94 { 95 register char *q; 96 97 /* oh dear -- this part is hard */ 98 p = strstr(p, "boundary="); /*XXX*/ 99 if (p == NULL) 100 { 101 syserr("mime8to7: Content-Type: %s missing boundary", p); 102 p = "---"; 103 } 104 else 105 p += 9; 106 if (*p == '"') 107 q = strchr(p, '"'); 108 else 109 q = strchr(p, ','); 110 if (q == NULL) 111 q = p + strlen(p); 112 if (q - p > sizeof bbuf - 1) 113 { 114 syserr("mime8to7: multipart boundary \"%.*s\" too long", 115 q - p, p); 116 q = p + sizeof bbuf - 1; 117 } 118 strncpy(bbuf, p, q - p); 119 bbuf[q - p] = '\0'; 120 if (tTd(43, 1)) 121 { 122 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 123 } 124 125 /* skip the early "comment" prologue */ 126 bt = MBT_FINAL; 127 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 128 { 129 bt = mimeboundary(buf, bbuf); 130 if (bt != MBT_NOTSEP) 131 break; 132 putline(buf, mci); 133 } 134 while (bt != MBT_FINAL) 135 { 136 auto HDR *hdr = NULL; 137 138 sprintf(buf, "--%s", bbuf); 139 putline(buf, mci); 140 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 141 putheader(mci, hdr, e); 142 bt = mime8to7(mci, hdr, e, bbuf); 143 } 144 sprintf(buf, "--%s--", bbuf); 145 putline(buf, mci); 146 147 /* skip the late "comment" epilogue */ 148 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 149 { 150 putline(buf, mci); 151 bt = mimeboundary(buf, boundary); 152 if (bt != MBT_NOTSEP) 153 break; 154 } 155 return bt; 156 } 157 158 /* 159 ** Non-compound body type 160 ** 161 ** Compute the ratio of seven to eight bit characters; 162 ** use that as a heuristic to decide how to do the 163 ** encoding. 164 */ 165 166 /* remember where we were */ 167 offset = ftell(e->e_dfp); 168 if (offset == -1) 169 syserr("mime8to7: cannot ftell on %s", e->e_df); 170 171 /* do a scan of this body type to count character types */ 172 sectionsize = sectionhighbits = 0; 173 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 174 { 175 bt = mimeboundary(buf, boundary); 176 if (bt != MBT_NOTSEP) 177 break; 178 for (p = buf; *p != '\0'; p++) 179 { 180 /* count bytes with the high bit set */ 181 sectionsize++; 182 if (bitset(0200, *p)) 183 sectionhighbits++; 184 } 185 186 /* 187 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 188 ** assume base64. This heuristic avoids double-reading 189 ** large graphics or video files. 190 */ 191 192 if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4) 193 break; 194 } 195 if (feof(e->e_dfp)) 196 bt = MBT_FINAL; 197 198 /* return to the original offset for processing */ 199 /* XXX use relative seeks to handle >31 bit file sizes? */ 200 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 201 syserr("mime8to7: cannot fseek on %s", e->e_df); 202 203 /* 204 ** Heuristically determine encoding method. 205 ** If more than 1/8 of the total characters have the 206 ** eighth bit set, use base64; else use quoted-printable. 207 */ 208 209 if (tTd(43, 8)) 210 { 211 printf("mime8to7: %ld high bits in %ld bytes\n", 212 sectionhighbits, sectionsize); 213 } 214 if (sectionhighbits == 0) 215 { 216 /* no encoding necessary */ 217 putline("", mci); 218 mci->mci_flags &= ~MCIF_INHEADER; 219 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 220 { 221 bt = mimeboundary(buf, boundary); 222 if (bt != MBT_NOTSEP) 223 break; 224 if (buf[0] == 'F' && 225 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 226 strncmp(buf, "From ", 5) == 0) 227 (void) putc('>', mci->mci_out); 228 putline(buf, mci); 229 } 230 } 231 else if (sectionsize / 8 < sectionhighbits) 232 { 233 /* use base64 encoding */ 234 int c1, c2; 235 236 putline("Content-Transfer-Encoding: base64", mci); 237 putline("", mci); 238 mci->mci_flags &= ~MCIF_INHEADER; 239 linelen = 0; 240 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 241 { 242 if (linelen > 71) 243 { 244 fputs(mci->mci_mailer->m_eol, mci->mci_out); 245 linelen = 0; 246 } 247 linelen += 4; 248 fputc(Base64Code[c1 >> 2], mci->mci_out); 249 c1 = (c1 & 0x03) << 4; 250 c2 = mime_getchar(e->e_dfp, boundary); 251 if (c2 == EOF) 252 { 253 fputc(Base64Code[c1], mci->mci_out); 254 fputc('=', mci->mci_out); 255 fputc('=', mci->mci_out); 256 break; 257 } 258 c1 |= (c2 >> 4) & 0x0f; 259 fputc(Base64Code[c1], mci->mci_out); 260 c1 = (c2 & 0x0f) << 2; 261 c2 = mime_getchar(e->e_dfp, boundary); 262 if (c2 == EOF) 263 { 264 fputc(Base64Code[c1], mci->mci_out); 265 fputc('=', mci->mci_out); 266 break; 267 } 268 c1 |= (c2 >> 6) & 0x03; 269 fputc(Base64Code[c1], mci->mci_out); 270 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 271 } 272 } 273 else 274 { 275 /* use quoted-printable encoding */ 276 int c1, c2; 277 278 putline("Content-Transfer-Encoding: quoted-printable", mci); 279 putline("", mci); 280 mci->mci_flags &= ~MCIF_INHEADER; 281 linelen = 0; 282 c2 = '\n'; 283 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 284 { 285 if (c1 == '\n') 286 { 287 if (c2 == ' ' || c2 == '\t') 288 { 289 fputc('=', mci->mci_out); 290 fputs(mci->mci_mailer->m_eol, mci->mci_out); 291 } 292 fputs(mci->mci_mailer->m_eol, mci->mci_out); 293 linelen = 0; 294 c2 = c1; 295 continue; 296 } 297 else if (c2 == '\n' && c1 == '.' && 298 bitnset(M_XDOT, mci->mci_mailer->m_flags)) 299 { 300 fputc('.', mci->mci_out); 301 linelen++; 302 } 303 if (linelen > 72) 304 { 305 fputc('=', mci->mci_out); 306 fputs(mci->mci_mailer->m_eol, mci->mci_out); 307 linelen = 0; 308 c2 = '\n'; 309 } 310 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 311 { 312 fputc('=', mci->mci_out); 313 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 314 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 315 linelen += 3; 316 } 317 else 318 { 319 fputc(c1, mci->mci_out); 320 linelen++; 321 } 322 c2 = c1; 323 } 324 } 325 if (linelen > 0) 326 fputs(mci->mci_mailer->m_eol, mci->mci_out); 327 return MimeBoundaryType; 328 } 329 330 331 int 332 mime_getchar(fp, boundary) 333 register FILE *fp; 334 char *boundary; 335 { 336 int c; 337 static char *bp = NULL; 338 static int buflen = 0; 339 static bool atbol = TRUE; /* at beginning of line */ 340 static char buf[128]; /* need not be a full line */ 341 342 if (buflen > 0) 343 { 344 buflen--; 345 return *bp++; 346 } 347 c = fgetc(fp); 348 if (atbol && c == '-' && boundary != NULL) 349 { 350 /* check for a message boundary */ 351 bp = buf; 352 c = fgetc(fp); 353 if (c != '-') 354 { 355 if (c != EOF) 356 { 357 *bp = c; 358 buflen++; 359 } 360 return '-'; 361 } 362 363 /* got "--", now check for rest of separator */ 364 *bp++ = '-'; 365 *bp++ = '-'; 366 while (bp < &buf[sizeof buf - 1] && 367 (c = fgetc(fp)) != EOF && c != '\n') 368 { 369 *bp++ = c; 370 } 371 *bp = '\0'; 372 MimeBoundaryType = mimeboundary(buf, boundary); 373 switch (MimeBoundaryType) 374 { 375 case MBT_FINAL: 376 case MBT_INTERMED: 377 /* we have a message boundary */ 378 buflen = 0; 379 return EOF; 380 } 381 382 atbol = c == '\n'; 383 if (c != EOF) 384 *bp++ = c; 385 buflen = bp - buf - 1; 386 bp = buf; 387 return *bp++; 388 } 389 390 atbol = c == '\n'; 391 return c; 392 } 393 /* 394 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 395 ** 396 ** Parameters: 397 ** line -- the input line. 398 ** boundary -- the expected boundary. 399 ** 400 ** Returns: 401 ** MBT_NOTSEP -- if this is not a separator line 402 ** MBT_INTERMED -- if this is an intermediate separator 403 ** MBT_FINAL -- if this is a final boundary 404 ** MBT_SYNTAX -- if this is a boundary for the wrong 405 ** enclosure -- i.e., a syntax error. 406 */ 407 408 int 409 mimeboundary(line, boundary) 410 register char *line; 411 char *boundary; 412 { 413 int type; 414 int i; 415 416 if (line[0] != '-' || line[1] != '-' || boundary == NULL) 417 return MBT_NOTSEP; 418 if (tTd(43, 5)) 419 printf("mimeboundary: bound=\"%s\", line=\"%s\"... ", 420 boundary, line); 421 i = strlen(line); 422 if (line[i - 1] == '\n') 423 i--; 424 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 425 { 426 type = MBT_FINAL; 427 i -= 2; 428 } 429 else 430 type = MBT_INTERMED; 431 432 /* XXX should check for improper nesting here */ 433 if (strncmp(boundary, &line[2], i - 2) != 0 || 434 strlen(boundary) != i - 2) 435 type = MBT_NOTSEP; 436 if (tTd(43, 5)) 437 printf("%d\n", type); 438 return type; 439 } 440