1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.12 (Berkeley) 03/21/95"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static int MimeBoundaryType; /* internal linkage */ 45 /* 46 ** MIME8TO7 -- output 8 bit body in 7 bit format 47 ** 48 ** The header has already been output -- this has to do the 49 ** 8 to 7 bit conversion. It would be easy if we didn't have 50 ** to deal with nested formats (multipart/xxx and message/rfc822). 51 ** 52 ** We won't be called if we don't have to do a conversion, and 53 ** appropriate MIME-Version: and Content-Type: fields have been 54 ** output. Any Content-Transfer-Encoding: field has not been 55 ** output, and we can add it here. 56 ** 57 ** Parameters: 58 ** mci -- mailer connection information. 59 ** header -- the header for this body part. 60 ** e -- envelope. 61 ** boundaries -- the currently pending message boundaries. 62 ** NULL if we are processing the outer portion. 63 ** flags -- to tweak processing. 64 ** 65 ** Returns: 66 ** An indicator of what terminated the message part: 67 ** MBT_FINAL -- the final boundary 68 ** MBT_INTERMED -- an intermediate boundary 69 ** MBT_NOTSEP -- an end of file 70 */ 71 72 struct args 73 { 74 char *field; /* name of field */ 75 char *value; /* value of that field */ 76 }; 77 78 int 79 mime8to7(mci, header, e, boundaries, flags) 80 register MCI *mci; 81 HDR *header; register ENVELOPE *e; 82 char **boundaries; 83 int flags; 84 { 85 register char *p; 86 int linelen; 87 int bt; 88 off_t offset; 89 size_t sectionsize, sectionhighbits; 90 int i; 91 char *type; 92 char *subtype; 93 char **pvp; 94 int argc = 0; 95 struct args argv[MAXMIMEARGS]; 96 char bbuf[128]; 97 char buf[MAXLINE]; 98 char pvpbuf[MAXLINE]; 99 100 if (tTd(43, 1)) 101 { 102 printf("mime8to7: boundary=%s\n", 103 boundaries[0] == NULL ? "<none>" : boundaries[0]); 104 for (i = 1; boundaries[i] != NULL; i++) 105 printf("\tboundaries[i]\n"); 106 } 107 type = subtype = "-none-"; 108 p = hvalue("Content-Type", header); 109 if (p != NULL && 110 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL)) != NULL && 111 pvp[0] != NULL) 112 { 113 type = *pvp++; 114 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 115 *++pvp != NULL) 116 { 117 subtype = *pvp++; 118 } 119 120 /* break out parameters */ 121 while (*pvp != NULL && argc < MAXMIMEARGS) 122 { 123 /* skip to semicolon separator */ 124 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 125 pvp++; 126 if (*pvp++ == NULL || *pvp == NULL) 127 break; 128 129 /* extract field name */ 130 argv[argc].field = *pvp++; 131 132 /* see if there is a value */ 133 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 134 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 135 { 136 argv[argc].value = *pvp; 137 argc++; 138 } 139 } 140 } 141 if (strcasecmp(type, "multipart") == 0) 142 { 143 register char *q; 144 145 for (i = 0; i < argc; i++) 146 { 147 if (strcasecmp(argv[i].field, "boundary") == 0) 148 break; 149 } 150 if (i >= argc) 151 { 152 syserr("mime8to7: Content-Type: %s missing boundary", p); 153 p = "---"; 154 } 155 else 156 p = argv[i].value; 157 if (*p == '"') 158 q = strchr(p, '"'); 159 else 160 q = p + strlen(p); 161 if (q - p > sizeof bbuf - 1) 162 { 163 syserr("mime8to7: multipart boundary \"%.*s\" too long", 164 q - p, p); 165 q = p + sizeof bbuf - 1; 166 } 167 strncpy(bbuf, p, q - p); 168 bbuf[q - p] = '\0'; 169 if (tTd(43, 1)) 170 { 171 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 172 } 173 for (i = 0; i < MAXMIMENESTING; i++) 174 if (boundaries[i] == NULL) 175 break; 176 if (i >= MAXMIMENESTING) 177 syserr("mime8to7: multipart nesting boundary too deep"); 178 else 179 { 180 boundaries[i] = bbuf; 181 boundaries[i + 1] = NULL; 182 } 183 184 /* flag subtypes that can't have any 8-bit data */ 185 if (strcasecmp(subtype, "signed") == 0) 186 flags |= M87F_NO8BIT; 187 188 /* skip the early "comment" prologue */ 189 bt = MBT_FINAL; 190 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 191 { 192 bt = mimeboundary(buf, boundaries); 193 if (bt != MBT_NOTSEP) 194 break; 195 putline(buf, mci); 196 } 197 while (bt != MBT_FINAL) 198 { 199 auto HDR *hdr = NULL; 200 201 sprintf(buf, "--%s", bbuf); 202 putline(buf, mci); 203 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 204 putheader(mci, hdr, e, 0); 205 bt = mime8to7(mci, hdr, e, boundaries, flags); 206 } 207 sprintf(buf, "--%s--", bbuf); 208 putline(buf, mci); 209 210 /* skip the late "comment" epilogue */ 211 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 212 { 213 putline(buf, mci); 214 bt = mimeboundary(buf, boundaries); 215 if (bt != MBT_NOTSEP) 216 break; 217 } 218 boundaries[i] = NULL; 219 return bt; 220 } 221 222 /* 223 ** Non-compound body type 224 ** 225 ** Compute the ratio of seven to eight bit characters; 226 ** use that as a heuristic to decide how to do the 227 ** encoding. 228 */ 229 230 /* handle types that cannot have 8-bit data internally */ 231 sprintf(buf, "%s/%s", type, subtype); 232 if (wordinclass(buf, 'n')) 233 flags |= M87F_NO8BIT; 234 235 sectionsize = sectionhighbits = 0; 236 if (!bitset(M87F_NO8BIT, flags)) 237 { 238 /* remember where we were */ 239 offset = ftell(e->e_dfp); 240 if (offset == -1) 241 syserr("mime8to7: cannot ftell on df%s", e->e_id); 242 243 /* do a scan of this body type to count character types */ 244 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 245 { 246 bt = mimeboundary(buf, boundaries); 247 if (bt != MBT_NOTSEP) 248 break; 249 for (p = buf; *p != '\0'; p++) 250 { 251 /* count bytes with the high bit set */ 252 sectionsize++; 253 if (bitset(0200, *p)) 254 sectionhighbits++; 255 } 256 257 /* 258 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 259 ** assume base64. This heuristic avoids double-reading 260 ** large graphics or video files. 261 */ 262 263 if (sectionsize >= 4096 && 264 sectionhighbits > sectionsize / 4) 265 break; 266 } 267 if (feof(e->e_dfp)) 268 bt = MBT_FINAL; 269 270 /* return to the original offset for processing */ 271 /* XXX use relative seeks to handle >31 bit file sizes? */ 272 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 273 syserr("mime8to7: cannot fseek on df%s", e->e_id); 274 } 275 276 /* 277 ** Heuristically determine encoding method. 278 ** If more than 1/8 of the total characters have the 279 ** eighth bit set, use base64; else use quoted-printable. 280 */ 281 282 if (tTd(43, 8)) 283 { 284 printf("mime8to7: %ld high bits in %ld bytes\n", 285 sectionhighbits, sectionsize); 286 } 287 if (sectionhighbits == 0) 288 { 289 /* no encoding necessary */ 290 p = hvalue("content-transfer-encoding", header); 291 if (p != NULL) 292 { 293 sprintf(buf, "Content-Transfer-Encoding: %s", p); 294 putline(buf, mci); 295 } 296 putline("", mci); 297 mci->mci_flags &= ~MCIF_INHEADER; 298 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 299 { 300 bt = mimeboundary(buf, boundaries); 301 if (bt != MBT_NOTSEP) 302 break; 303 if (buf[0] == 'F' && 304 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 305 strncmp(buf, "From ", 5) == 0) 306 (void) putc('>', mci->mci_out); 307 putline(buf, mci); 308 } 309 } 310 else if (sectionsize / 8 < sectionhighbits) 311 { 312 /* use base64 encoding */ 313 int c1, c2; 314 315 putline("Content-Transfer-Encoding: base64", mci); 316 putline("", mci); 317 mci->mci_flags &= ~MCIF_INHEADER; 318 linelen = 0; 319 while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF) 320 { 321 if (linelen > 71) 322 { 323 fputs(mci->mci_mailer->m_eol, mci->mci_out); 324 linelen = 0; 325 } 326 linelen += 4; 327 fputc(Base64Code[c1 >> 2], mci->mci_out); 328 c1 = (c1 & 0x03) << 4; 329 c2 = mime_getchar(e->e_dfp, boundaries); 330 if (c2 == EOF) 331 { 332 fputc(Base64Code[c1], mci->mci_out); 333 fputc('=', mci->mci_out); 334 fputc('=', mci->mci_out); 335 break; 336 } 337 c1 |= (c2 >> 4) & 0x0f; 338 fputc(Base64Code[c1], mci->mci_out); 339 c1 = (c2 & 0x0f) << 2; 340 c2 = mime_getchar(e->e_dfp, boundaries); 341 if (c2 == EOF) 342 { 343 fputc(Base64Code[c1], mci->mci_out); 344 fputc('=', mci->mci_out); 345 break; 346 } 347 c1 |= (c2 >> 6) & 0x03; 348 fputc(Base64Code[c1], mci->mci_out); 349 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 350 } 351 } 352 else 353 { 354 /* use quoted-printable encoding */ 355 int c1, c2; 356 int fromstate; 357 358 putline("Content-Transfer-Encoding: quoted-printable", mci); 359 putline("", mci); 360 mci->mci_flags &= ~MCIF_INHEADER; 361 linelen = fromstate = 0; 362 c2 = '\n'; 363 while ((c1 = mime_getchar(e->e_dfp, boundaries)) != EOF) 364 { 365 if (c1 == '\n') 366 { 367 if (c2 == ' ' || c2 == '\t') 368 { 369 fputc('=', mci->mci_out); 370 fputc(Base16Code[(c2 >> 4) & 0x0f], 371 mci->mci_out); 372 fputc(Base16Code[c2 & 0x0f], 373 mci->mci_out); 374 fputs(mci->mci_mailer->m_eol, 375 mci->mci_out); 376 } 377 fputs(mci->mci_mailer->m_eol, mci->mci_out); 378 linelen = fromstate = 0; 379 c2 = c1; 380 continue; 381 } 382 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 383 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 384 { 385 fputs("=20", mci->mci_out); 386 linelen += 3; 387 } 388 else if (c2 == ' ' || c2 == '\t') 389 { 390 fputc(c2, mci->mci_out); 391 linelen++; 392 } 393 if (linelen > 72) 394 { 395 fputc('=', mci->mci_out); 396 fputs(mci->mci_mailer->m_eol, mci->mci_out); 397 linelen = fromstate = 0; 398 c2 = '\n'; 399 } 400 if (c2 == '\n' && c1 == '.' && 401 bitnset(M_XDOT, mci->mci_mailer->m_flags)) 402 { 403 fputc('.', mci->mci_out); 404 linelen++; 405 } 406 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 407 { 408 fputc('=', mci->mci_out); 409 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 410 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 411 linelen += 3; 412 } 413 else if (c1 != ' ' && c1 != '\t') 414 { 415 if (linelen < 4 && c1 == "From"[linelen]) 416 fromstate++; 417 fputc(c1, mci->mci_out); 418 linelen++; 419 } 420 c2 = c1; 421 } 422 423 /* output any saved character */ 424 if (c2 == ' ' || c2 == '\t') 425 { 426 fputc('=', mci->mci_out); 427 fputc(Base16Code[(c2 >> 4) & 0x0f], mci->mci_out); 428 fputc(Base16Code[c2 & 0x0f], mci->mci_out); 429 linelen += 3; 430 } 431 } 432 if (linelen > 0) 433 fputs(mci->mci_mailer->m_eol, mci->mci_out); 434 return MimeBoundaryType; 435 } 436 /* 437 ** MIME_GETCHAR -- get a character for MIME processing 438 ** 439 ** Treats boundaries as EOF. 440 ** 441 ** Parameters: 442 ** fp -- the input file. 443 ** boundaries -- the current MIME boundaries. 444 ** 445 ** Returns: 446 ** The next character in the input stream. 447 */ 448 449 int 450 mime_getchar(fp, boundaries) 451 register FILE *fp; 452 char **boundaries; 453 { 454 int c; 455 static char *bp = NULL; 456 static int buflen = 0; 457 static bool atbol = TRUE; /* at beginning of line */ 458 static char buf[128]; /* need not be a full line */ 459 460 if (buflen > 0) 461 { 462 buflen--; 463 return *bp++; 464 } 465 bp = buf; 466 buflen = 0; 467 c = fgetc(fp); 468 if (c == '\n') 469 { 470 /* might be part of a MIME boundary */ 471 *bp++ = c; 472 atbol = TRUE; 473 c = fgetc(fp); 474 } 475 if (c != EOF) 476 *bp++ = c; 477 if (atbol && c == '-') 478 { 479 /* check for a message boundary */ 480 c = fgetc(fp); 481 if (c != '-') 482 { 483 if (c != EOF) 484 *bp++ = c; 485 buflen = bp - buf - 1; 486 bp = buf; 487 return *bp++; 488 } 489 490 /* got "--", now check for rest of separator */ 491 *bp++ = '-'; 492 while (bp < &buf[sizeof buf - 1] && 493 (c = fgetc(fp)) != EOF && c != '\n') 494 { 495 *bp++ = c; 496 } 497 *bp = '\0'; 498 MimeBoundaryType = mimeboundary(buf, boundaries); 499 switch (MimeBoundaryType) 500 { 501 case MBT_FINAL: 502 case MBT_INTERMED: 503 /* we have a message boundary */ 504 buflen = 0; 505 return EOF; 506 } 507 508 atbol = c == '\n'; 509 if (c != EOF) 510 *bp++ = c; 511 } 512 513 buflen = bp - buf - 1; 514 if (buflen < 0) 515 return EOF; 516 bp = buf; 517 return *bp++; 518 } 519 /* 520 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 521 ** 522 ** Parameters: 523 ** line -- the input line. 524 ** boundaries -- the set of currently pending boundaries. 525 ** 526 ** Returns: 527 ** MBT_NOTSEP -- if this is not a separator line 528 ** MBT_INTERMED -- if this is an intermediate separator 529 ** MBT_FINAL -- if this is a final boundary 530 ** MBT_SYNTAX -- if this is a boundary for the wrong 531 ** enclosure -- i.e., a syntax error. 532 */ 533 534 int 535 mimeboundary(line, boundaries) 536 register char *line; 537 char **boundaries; 538 { 539 int type; 540 int i; 541 int savec; 542 543 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 544 return MBT_NOTSEP; 545 if (tTd(43, 5)) 546 printf("mimeboundary: line=\"%s\"... ", line); 547 i = strlen(line); 548 if (line[i - 1] == '\n') 549 i--; 550 while (line[i - 1] == ' ' || line[i - 1] == '\t') 551 i--; 552 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 553 { 554 type = MBT_FINAL; 555 i -= 2; 556 } 557 else 558 type = MBT_INTERMED; 559 560 savec = line[i]; 561 line[i] = '\0'; 562 /* XXX should check for improper nesting here */ 563 if (isboundary(&line[2], boundaries) < 0) 564 type = MBT_NOTSEP; 565 line[i] = savec; 566 if (tTd(43, 5)) 567 printf("%d\n", type); 568 return type; 569 } 570 /* 571 ** DEFCHARSET -- return default character set for message 572 ** 573 ** The first choice for character set is for the mailer 574 ** corresponding to the envelope sender. If neither that 575 ** nor the global configuration file has a default character 576 ** set defined, return "unknown-8bit" as recommended by 577 ** RFC 1428 section 3. 578 ** 579 ** Parameters: 580 ** e -- the envelope for this message. 581 ** 582 ** Returns: 583 ** The default character set for that mailer. 584 */ 585 586 char * 587 defcharset(e) 588 register ENVELOPE *e; 589 { 590 if (e != NULL && e->e_from.q_mailer != NULL && 591 e->e_from.q_mailer->m_defcharset != NULL) 592 return e->e_from.q_mailer->m_defcharset; 593 if (DefaultCharSet != NULL) 594 return DefaultCharSet; 595 return "unknown-8bit"; 596 } 597 /* 598 ** ISBOUNDARY -- is a given string a currently valid boundary? 599 ** 600 ** Parameters: 601 ** line -- the current input line. 602 ** boundaries -- the list of valid boundaries. 603 ** 604 ** Returns: 605 ** The index number in boundaries if the line is found. 606 ** -1 -- otherwise. 607 ** 608 */ 609 610 int 611 isboundary(line, boundaries) 612 char *line; 613 char **boundaries; 614 { 615 register int i; 616 617 i = 0; 618 while (boundaries[i] != NULL) 619 { 620 if (strcmp(line, boundaries[i]) == 0) 621 return i; 622 } 623 return -1; 624 } 625