1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.9 (Berkeley) 11/19/94"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static int MimeBoundaryType; /* internal linkage */ 45 /* 46 ** MIME8TO7 -- output 8 bit body in 7 bit format 47 ** 48 ** The header has already been output -- this has to do the 49 ** 8 to 7 bit conversion. It would be easy if we didn't have 50 ** to deal with nested formats (multipart/xxx and message/rfc822). 51 ** 52 ** We won't be called if we don't have to do a conversion, and 53 ** appropriate MIME-Version: and Content-Type: fields have been 54 ** output. Any Content-Transfer-Encoding: field has not been 55 ** output, and we can add it here. 56 ** 57 ** Parameters: 58 ** mci -- mailer connection information. 59 ** header -- the header for this body part. 60 ** e -- envelope. 61 ** boundary -- the message boundary -- NULL if we are 62 ** processing the outer portion. 63 ** 64 ** Returns: 65 ** An indicator of what terminated the message part: 66 ** MBT_FINAL -- the final boundary 67 ** MBT_INTERMED -- an intermediate boundary 68 ** MBT_NOTSEP -- an end of file 69 */ 70 71 int 72 mime8to7(mci, header, e, boundary) 73 register MCI *mci; 74 HDR *header; 75 register ENVELOPE *e; 76 char *boundary; 77 { 78 register char *p; 79 int linelen; 80 int bt; 81 off_t offset; 82 size_t sectionsize, sectionhighbits; 83 char bbuf[128]; 84 char buf[MAXLINE]; 85 86 if (tTd(43, 1)) 87 { 88 printf("mime8to7: boundary=%s\n", 89 boundary == NULL ? "<none>" : boundary); 90 } 91 p = hvalue("Content-Type", header); 92 if (p != NULL && strncasecmp(p, "multipart/", 10) == 0) 93 { 94 register char *q; 95 96 /* oh dear -- this part is hard */ 97 p = strstr(p, "boundary="); /*XXX*/ 98 if (p == NULL) 99 { 100 syserr("mime8to7: Content-Type: %s missing boundary", p); 101 p = "---"; 102 } 103 else 104 p += 9; 105 if (*p == '"') 106 q = strchr(p, '"'); 107 else 108 q = strchr(p, ','); 109 if (q == NULL) 110 q = p + strlen(p); 111 if (q - p > sizeof bbuf - 1) 112 { 113 syserr("mime8to7: multipart boundary \"%.*s\" too long", 114 q - p, p); 115 q = p + sizeof bbuf - 1; 116 } 117 strncpy(bbuf, p, q - p); 118 bbuf[q - p] = '\0'; 119 if (tTd(43, 1)) 120 { 121 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 122 } 123 124 /* skip the early "comment" prologue */ 125 bt = MBT_FINAL; 126 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 127 { 128 bt = mimeboundary(buf, bbuf); 129 if (bt != MBT_NOTSEP) 130 break; 131 putline(buf, mci); 132 } 133 while (bt != MBT_FINAL) 134 { 135 auto HDR *hdr = NULL; 136 137 sprintf(buf, "--%s", bbuf); 138 putline(buf, mci); 139 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 140 putheader(mci, hdr, e, 0); 141 bt = mime8to7(mci, hdr, e, bbuf); 142 } 143 sprintf(buf, "--%s--", bbuf); 144 putline(buf, mci); 145 146 /* skip the late "comment" epilogue */ 147 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 148 { 149 putline(buf, mci); 150 bt = mimeboundary(buf, boundary); 151 if (bt != MBT_NOTSEP) 152 break; 153 } 154 return bt; 155 } 156 157 /* 158 ** Non-compound body type 159 ** 160 ** Compute the ratio of seven to eight bit characters; 161 ** use that as a heuristic to decide how to do the 162 ** encoding. 163 */ 164 165 /* remember where we were */ 166 offset = ftell(e->e_dfp); 167 if (offset == -1) 168 syserr("mime8to7: cannot ftell on %s", e->e_df); 169 170 /* do a scan of this body type to count character types */ 171 sectionsize = sectionhighbits = 0; 172 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 173 { 174 bt = mimeboundary(buf, boundary); 175 if (bt != MBT_NOTSEP) 176 break; 177 for (p = buf; *p != '\0'; p++) 178 { 179 /* count bytes with the high bit set */ 180 sectionsize++; 181 if (bitset(0200, *p)) 182 sectionhighbits++; 183 } 184 185 /* 186 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 187 ** assume base64. This heuristic avoids double-reading 188 ** large graphics or video files. 189 */ 190 191 if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4) 192 break; 193 } 194 if (feof(e->e_dfp)) 195 bt = MBT_FINAL; 196 197 /* return to the original offset for processing */ 198 /* XXX use relative seeks to handle >31 bit file sizes? */ 199 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 200 syserr("mime8to7: cannot fseek on %s", e->e_df); 201 202 /* 203 ** Heuristically determine encoding method. 204 ** If more than 1/8 of the total characters have the 205 ** eighth bit set, use base64; else use quoted-printable. 206 */ 207 208 if (tTd(43, 8)) 209 { 210 printf("mime8to7: %ld high bits in %ld bytes\n", 211 sectionhighbits, sectionsize); 212 } 213 if (sectionhighbits == 0) 214 { 215 /* no encoding necessary */ 216 p = hvalue("content-transfer-encoding", header); 217 if (p != NULL) 218 { 219 sprintf(buf, "Content-Transfer-Encoding: %s", p); 220 putline(buf, mci); 221 } 222 putline("", mci); 223 mci->mci_flags &= ~MCIF_INHEADER; 224 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 225 { 226 bt = mimeboundary(buf, boundary); 227 if (bt != MBT_NOTSEP) 228 break; 229 if (buf[0] == 'F' && 230 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 231 strncmp(buf, "From ", 5) == 0) 232 (void) putc('>', mci->mci_out); 233 putline(buf, mci); 234 } 235 } 236 else if (sectionsize / 8 < sectionhighbits) 237 { 238 /* use base64 encoding */ 239 int c1, c2; 240 241 putline("Content-Transfer-Encoding: base64", mci); 242 putline("", mci); 243 mci->mci_flags &= ~MCIF_INHEADER; 244 linelen = 0; 245 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 246 { 247 if (linelen > 71) 248 { 249 fputs(mci->mci_mailer->m_eol, mci->mci_out); 250 linelen = 0; 251 } 252 linelen += 4; 253 fputc(Base64Code[c1 >> 2], mci->mci_out); 254 c1 = (c1 & 0x03) << 4; 255 c2 = mime_getchar(e->e_dfp, boundary); 256 if (c2 == EOF) 257 { 258 fputc(Base64Code[c1], mci->mci_out); 259 fputc('=', mci->mci_out); 260 fputc('=', mci->mci_out); 261 break; 262 } 263 c1 |= (c2 >> 4) & 0x0f; 264 fputc(Base64Code[c1], mci->mci_out); 265 c1 = (c2 & 0x0f) << 2; 266 c2 = mime_getchar(e->e_dfp, boundary); 267 if (c2 == EOF) 268 { 269 fputc(Base64Code[c1], mci->mci_out); 270 fputc('=', mci->mci_out); 271 break; 272 } 273 c1 |= (c2 >> 6) & 0x03; 274 fputc(Base64Code[c1], mci->mci_out); 275 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 276 } 277 } 278 else 279 { 280 /* use quoted-printable encoding */ 281 int c1, c2; 282 283 putline("Content-Transfer-Encoding: quoted-printable", mci); 284 putline("", mci); 285 mci->mci_flags &= ~MCIF_INHEADER; 286 linelen = 0; 287 c2 = '\n'; 288 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 289 { 290 if (c1 == '\n') 291 { 292 if (c2 == ' ' || c2 == '\t') 293 { 294 fputc('=', mci->mci_out); 295 fputc(Base16Code[(c2 >> 4) & 0x0f], 296 mci->mci_out); 297 fputc(Base16Code[c2 & 0x0f], 298 mci->mci_out); 299 fputs(mci->mci_mailer->m_eol, 300 mci->mci_out); 301 } 302 fputs(mci->mci_mailer->m_eol, mci->mci_out); 303 linelen = 0; 304 c2 = c1; 305 continue; 306 } 307 if (c2 == ' ' || c2 == '\t') 308 { 309 fputc(c2, mci->mci_out); 310 linelen++; 311 } 312 if (linelen > 72) 313 { 314 fputc('=', mci->mci_out); 315 fputs(mci->mci_mailer->m_eol, mci->mci_out); 316 linelen = 0; 317 c2 = '\n'; 318 } 319 if (c2 == '\n' && c1 == '.' && 320 bitnset(M_XDOT, mci->mci_mailer->m_flags)) 321 { 322 fputc('.', mci->mci_out); 323 linelen++; 324 } 325 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 326 { 327 fputc('=', mci->mci_out); 328 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 329 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 330 linelen += 3; 331 } 332 else if (c1 != ' ' && c1 != '\t') 333 { 334 fputc(c1, mci->mci_out); 335 linelen++; 336 } 337 c2 = c1; 338 } 339 340 /* output any saved character */ 341 if (c2 == ' ' || c2 == '\t') 342 { 343 fputc(c2, mci->mci_out); 344 linelen++; 345 } 346 } 347 if (linelen > 0) 348 fputs(mci->mci_mailer->m_eol, mci->mci_out); 349 return MimeBoundaryType; 350 } 351 352 353 int 354 mime_getchar(fp, boundary) 355 register FILE *fp; 356 char *boundary; 357 { 358 int c; 359 static char *bp = NULL; 360 static int buflen = 0; 361 static bool atbol = TRUE; /* at beginning of line */ 362 static char buf[128]; /* need not be a full line */ 363 364 if (buflen > 0) 365 { 366 buflen--; 367 return *bp++; 368 } 369 c = fgetc(fp); 370 if (atbol && c == '-' && boundary != NULL) 371 { 372 /* check for a message boundary */ 373 bp = buf; 374 c = fgetc(fp); 375 if (c != '-') 376 { 377 if (c != EOF) 378 { 379 *bp = c; 380 buflen++; 381 } 382 return '-'; 383 } 384 385 /* got "--", now check for rest of separator */ 386 *bp++ = '-'; 387 *bp++ = '-'; 388 while (bp < &buf[sizeof buf - 1] && 389 (c = fgetc(fp)) != EOF && c != '\n') 390 { 391 *bp++ = c; 392 } 393 *bp = '\0'; 394 MimeBoundaryType = mimeboundary(buf, boundary); 395 switch (MimeBoundaryType) 396 { 397 case MBT_FINAL: 398 case MBT_INTERMED: 399 /* we have a message boundary */ 400 buflen = 0; 401 return EOF; 402 } 403 404 atbol = c == '\n'; 405 if (c != EOF) 406 *bp++ = c; 407 buflen = bp - buf - 1; 408 bp = buf; 409 return *bp++; 410 } 411 412 atbol = c == '\n'; 413 return c; 414 } 415 /* 416 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 417 ** 418 ** Parameters: 419 ** line -- the input line. 420 ** boundary -- the expected boundary. 421 ** 422 ** Returns: 423 ** MBT_NOTSEP -- if this is not a separator line 424 ** MBT_INTERMED -- if this is an intermediate separator 425 ** MBT_FINAL -- if this is a final boundary 426 ** MBT_SYNTAX -- if this is a boundary for the wrong 427 ** enclosure -- i.e., a syntax error. 428 */ 429 430 int 431 mimeboundary(line, boundary) 432 register char *line; 433 char *boundary; 434 { 435 int type; 436 int i; 437 438 if (line[0] != '-' || line[1] != '-' || boundary == NULL) 439 return MBT_NOTSEP; 440 if (tTd(43, 5)) 441 printf("mimeboundary: bound=\"%s\", line=\"%s\"... ", 442 boundary, line); 443 i = strlen(line); 444 if (line[i - 1] == '\n') 445 i--; 446 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 447 { 448 type = MBT_FINAL; 449 i -= 2; 450 } 451 else 452 type = MBT_INTERMED; 453 454 /* XXX should check for improper nesting here */ 455 if (strncmp(boundary, &line[2], i - 2) != 0 || 456 strlen(boundary) != i - 2) 457 type = MBT_NOTSEP; 458 if (tTd(43, 5)) 459 printf("%d\n", type); 460 return type; 461 } 462 /* 463 ** DEFCHARSET -- return default character set for message 464 ** 465 ** The first choice for character set is for the mailer 466 ** corresponding to the envelope sender. If neither that 467 ** nor the global configuration file has a default character 468 ** set defined, return "unknown-8bit" as recommended by 469 ** RFC 1428 section 3. 470 ** 471 ** Parameters: 472 ** e -- the envelope for this message. 473 ** 474 ** Returns: 475 ** The default character set for that mailer. 476 */ 477 478 char * 479 defcharset(e) 480 register ENVELOPE *e; 481 { 482 if (e != NULL && e->e_from.q_mailer != NULL && 483 e->e_from.q_mailer->m_defcharset != NULL) 484 return e->e_from.q_mailer->m_defcharset; 485 if (DefaultCharSet != NULL) 486 return DefaultCharSet; 487 return "unknown-8bit"; 488 } 489