1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.5 (Berkeley) 08/17/94"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static int MimeBoundaryType; /* internal linkage */ 45 /* 46 ** MIME8TO7 -- output 8 bit body in 7 bit format 47 ** 48 ** The header has already been output -- this has to do the 49 ** 8 to 7 bit conversion. It would be easy if we didn't have 50 ** to deal with nested formats (multipart/xxx and message/rfc822). 51 ** 52 ** We won't be called if we don't have to do a conversion, and 53 ** appropriate MIME-Version: and Content-Type: fields have been 54 ** output. Any Content-Transfer-Encoding: field has not been 55 ** output, and we can add it here. 56 ** 57 ** Parameters: 58 ** mci -- mailer connection information. 59 ** header -- the header for this body part. 60 ** e -- envelope. 61 ** boundary -- the message boundary -- NULL if we are 62 ** processing the outer portion. 63 ** 64 ** Returns: 65 ** An indicator of what terminated the message part: 66 ** MBT_FINAL -- the final boundary 67 ** MBT_INTERMED -- an intermediate boundary 68 ** MBT_NOTSEP -- an end of file 69 */ 70 71 int 72 mime8to7(mci, header, e, boundary) 73 register MCI *mci; 74 HDR *header; 75 register ENVELOPE *e; 76 char *boundary; 77 { 78 register char *p; 79 int linelen; 80 int bt; 81 off_t offset; 82 size_t sectionsize, sectionhighbits; 83 char bbuf[128]; 84 char buf[MAXLINE]; 85 86 if (tTd(43, 1)) 87 { 88 printf("mime8to7: boundary=%s\n", 89 boundary == NULL ? "<none>" : boundary); 90 } 91 p = hvalue("Content-Type", header); 92 if (p != NULL && strncasecmp(p, "multipart/", 10) == 0) 93 { 94 register char *q; 95 96 /* oh dear -- this part is hard */ 97 p = strstr(p, "boundary="); /*XXX*/ 98 if (p == NULL) 99 { 100 syserr("mime8to7: Content-Type: %s missing boundary", p); 101 p = "---"; 102 } 103 else 104 p += 9; 105 if (*p == '"') 106 q = strchr(p, '"'); 107 else 108 q = strchr(p, ','); 109 if (q == NULL) 110 q = p + strlen(p); 111 if (q - p > sizeof bbuf - 1) 112 { 113 syserr("mime8to7: multipart boundary \"%.*s\" too long", 114 q - p, p); 115 q = p + sizeof bbuf - 1; 116 } 117 strncpy(bbuf, p, q - p); 118 bbuf[q - p] = '\0'; 119 if (tTd(43, 1)) 120 { 121 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 122 } 123 124 /* skip the early "comment" prologue */ 125 bt = MBT_FINAL; 126 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 127 { 128 bt = mimeboundary(buf, bbuf); 129 if (bt != MBT_NOTSEP) 130 break; 131 putline(buf, mci); 132 } 133 while (bt != MBT_FINAL) 134 { 135 auto HDR *hdr = NULL; 136 137 sprintf(buf, "--%s", bbuf); 138 putline(buf, mci); 139 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 140 putheader(mci, hdr, e); 141 bt = mime8to7(mci, hdr, e, bbuf); 142 } 143 sprintf(buf, "--%s--", bbuf); 144 putline(buf, mci); 145 146 /* skip the late "comment" epilogue */ 147 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 148 { 149 putline(buf, mci); 150 bt = mimeboundary(buf, boundary); 151 if (bt != MBT_NOTSEP) 152 break; 153 } 154 return bt; 155 } 156 157 /* 158 ** Non-compound body type 159 ** 160 ** Compute the ratio of seven to eight bit characters; 161 ** use that as a heuristic to decide how to do the 162 ** encoding. 163 */ 164 165 /* remember where we were */ 166 offset = ftell(e->e_dfp); 167 if (offset == -1) 168 syserr("mime8to7: cannot ftell on %s", e->e_df); 169 170 /* do a scan of this body type to count character types */ 171 sectionsize = sectionhighbits = 0; 172 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 173 { 174 bt = mimeboundary(buf, boundary); 175 if (bt != MBT_NOTSEP) 176 break; 177 for (p = buf; *p != '\0'; p++) 178 { 179 /* count bytes with the high bit set */ 180 sectionsize++; 181 if (bitset(0200, *p)) 182 sectionhighbits++; 183 } 184 185 /* 186 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 187 ** assume base64. This heuristic avoids double-reading 188 ** large graphics or video files. 189 */ 190 191 if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4) 192 break; 193 } 194 if (feof(e->e_dfp)) 195 bt = MBT_FINAL; 196 197 /* return to the original offset for processing */ 198 /* XXX use relative seeks to handle >31 bit file sizes? */ 199 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 200 syserr("mime8to7: cannot fseek on %s", e->e_df); 201 202 /* 203 ** Heuristically determine encoding method. 204 ** If more than 1/8 of the total characters have the 205 ** eighth bit set, use base64; else use quoted-printable. 206 */ 207 208 if (tTd(43, 8)) 209 { 210 printf("mime8to7: %ld high bits in %ld bytes\n", 211 sectionhighbits, sectionsize); 212 } 213 if (sectionhighbits == 0) 214 { 215 /* no encoding necessary */ 216 p = hvalue("content-transfer-encoding", header); 217 if (p != NULL) 218 { 219 sprintf(buf, "Content-Transfer-Encoding: %s", p); 220 putline(buf, mci); 221 } 222 putline("", mci); 223 mci->mci_flags &= ~MCIF_INHEADER; 224 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 225 { 226 bt = mimeboundary(buf, boundary); 227 if (bt != MBT_NOTSEP) 228 break; 229 if (buf[0] == 'F' && 230 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 231 strncmp(buf, "From ", 5) == 0) 232 (void) putc('>', mci->mci_out); 233 putline(buf, mci); 234 } 235 } 236 else if (sectionsize / 8 < sectionhighbits) 237 { 238 /* use base64 encoding */ 239 int c1, c2; 240 241 putline("Content-Transfer-Encoding: base64", mci); 242 putline("", mci); 243 mci->mci_flags &= ~MCIF_INHEADER; 244 linelen = 0; 245 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 246 { 247 if (linelen > 71) 248 { 249 fputs(mci->mci_mailer->m_eol, mci->mci_out); 250 linelen = 0; 251 } 252 linelen += 4; 253 fputc(Base64Code[c1 >> 2], mci->mci_out); 254 c1 = (c1 & 0x03) << 4; 255 c2 = mime_getchar(e->e_dfp, boundary); 256 if (c2 == EOF) 257 { 258 fputc(Base64Code[c1], mci->mci_out); 259 fputc('=', mci->mci_out); 260 fputc('=', mci->mci_out); 261 break; 262 } 263 c1 |= (c2 >> 4) & 0x0f; 264 fputc(Base64Code[c1], mci->mci_out); 265 c1 = (c2 & 0x0f) << 2; 266 c2 = mime_getchar(e->e_dfp, boundary); 267 if (c2 == EOF) 268 { 269 fputc(Base64Code[c1], mci->mci_out); 270 fputc('=', mci->mci_out); 271 break; 272 } 273 c1 |= (c2 >> 6) & 0x03; 274 fputc(Base64Code[c1], mci->mci_out); 275 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 276 } 277 } 278 else 279 { 280 /* use quoted-printable encoding */ 281 int c1, c2; 282 283 putline("Content-Transfer-Encoding: quoted-printable", mci); 284 putline("", mci); 285 mci->mci_flags &= ~MCIF_INHEADER; 286 linelen = 0; 287 c2 = '\n'; 288 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 289 { 290 if (c1 == '\n') 291 { 292 if (c2 == ' ' || c2 == '\t') 293 { 294 fputc('=', mci->mci_out); 295 fputs(mci->mci_mailer->m_eol, mci->mci_out); 296 } 297 fputs(mci->mci_mailer->m_eol, mci->mci_out); 298 linelen = 0; 299 c2 = c1; 300 continue; 301 } 302 else if (c2 == '\n' && c1 == '.' && 303 bitnset(M_XDOT, mci->mci_mailer->m_flags)) 304 { 305 fputc('.', mci->mci_out); 306 linelen++; 307 } 308 if (linelen > 72) 309 { 310 fputc('=', mci->mci_out); 311 fputs(mci->mci_mailer->m_eol, mci->mci_out); 312 linelen = 0; 313 c2 = '\n'; 314 } 315 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 316 { 317 fputc('=', mci->mci_out); 318 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 319 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 320 linelen += 3; 321 } 322 else 323 { 324 fputc(c1, mci->mci_out); 325 linelen++; 326 } 327 c2 = c1; 328 } 329 } 330 if (linelen > 0) 331 fputs(mci->mci_mailer->m_eol, mci->mci_out); 332 return MimeBoundaryType; 333 } 334 335 336 int 337 mime_getchar(fp, boundary) 338 register FILE *fp; 339 char *boundary; 340 { 341 int c; 342 static char *bp = NULL; 343 static int buflen = 0; 344 static bool atbol = TRUE; /* at beginning of line */ 345 static char buf[128]; /* need not be a full line */ 346 347 if (buflen > 0) 348 { 349 buflen--; 350 return *bp++; 351 } 352 c = fgetc(fp); 353 if (atbol && c == '-' && boundary != NULL) 354 { 355 /* check for a message boundary */ 356 bp = buf; 357 c = fgetc(fp); 358 if (c != '-') 359 { 360 if (c != EOF) 361 { 362 *bp = c; 363 buflen++; 364 } 365 return '-'; 366 } 367 368 /* got "--", now check for rest of separator */ 369 *bp++ = '-'; 370 *bp++ = '-'; 371 while (bp < &buf[sizeof buf - 1] && 372 (c = fgetc(fp)) != EOF && c != '\n') 373 { 374 *bp++ = c; 375 } 376 *bp = '\0'; 377 MimeBoundaryType = mimeboundary(buf, boundary); 378 switch (MimeBoundaryType) 379 { 380 case MBT_FINAL: 381 case MBT_INTERMED: 382 /* we have a message boundary */ 383 buflen = 0; 384 return EOF; 385 } 386 387 atbol = c == '\n'; 388 if (c != EOF) 389 *bp++ = c; 390 buflen = bp - buf - 1; 391 bp = buf; 392 return *bp++; 393 } 394 395 atbol = c == '\n'; 396 return c; 397 } 398 /* 399 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 400 ** 401 ** Parameters: 402 ** line -- the input line. 403 ** boundary -- the expected boundary. 404 ** 405 ** Returns: 406 ** MBT_NOTSEP -- if this is not a separator line 407 ** MBT_INTERMED -- if this is an intermediate separator 408 ** MBT_FINAL -- if this is a final boundary 409 ** MBT_SYNTAX -- if this is a boundary for the wrong 410 ** enclosure -- i.e., a syntax error. 411 */ 412 413 int 414 mimeboundary(line, boundary) 415 register char *line; 416 char *boundary; 417 { 418 int type; 419 int i; 420 421 if (line[0] != '-' || line[1] != '-' || boundary == NULL) 422 return MBT_NOTSEP; 423 if (tTd(43, 5)) 424 printf("mimeboundary: bound=\"%s\", line=\"%s\"... ", 425 boundary, line); 426 i = strlen(line); 427 if (line[i - 1] == '\n') 428 i--; 429 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 430 { 431 type = MBT_FINAL; 432 i -= 2; 433 } 434 else 435 type = MBT_INTERMED; 436 437 /* XXX should check for improper nesting here */ 438 if (strncmp(boundary, &line[2], i - 2) != 0 || 439 strlen(boundary) != i - 2) 440 type = MBT_NOTSEP; 441 if (tTd(43, 5)) 442 printf("%d\n", type); 443 return type; 444 } 445