1 /* 2 ** Copyright 1998 - 2018 Double Precision, Inc. See COPYING for 3 ** distribution information. 4 */ 5 6 /* 7 */ 8 #ifndef rfc2045_h 9 #define rfc2045_h 10 11 #include "rfc2045/rfc2045_config.h" /* VPATH build */ 12 #include "numlib/numlib.h" 13 #include <sys/types.h> 14 #include <string.h> 15 #include <stdio.h> 16 17 #ifdef __cplusplus 18 extern "C" { 19 #endif 20 21 #if 0 22 } 23 #endif 24 25 #define RFC2045_MIME_MESSAGE_RFC822 "message/rfc822" 26 #define RFC2045_MIME_MESSAGE_GLOBAL "message/global" 27 28 #define RFC2045_MIME_MESSAGE_DELIVERY_STATUS "message/delivery-status" 29 #define RFC2045_MIME_MESSAGE_GLOBAL_DELIVERY_STATUS \ 30 "message/global-delivery-status" 31 32 #define RFC2045_MIME_MESSAGE_HEADERS "text/rfc822-headers" 33 #define RFC2045_MIME_MESSAGE_GLOBAL_HEADERS "message/global-headers" 34 35 int rfc2045_message_content_type(const char *); 36 int rfc2045_delivery_status_content_type(const char *); 37 int rfc2045_message_headers_content_type(const char *); 38 39 #define RFC2045_ISMIME1(p) ((p) && atoi(p) == 1) 40 #define RFC2045_ISMIME1DEF(p) (!(p) || atoi(p) == 1) 41 42 struct rfc2045 { 43 struct rfc2045 *parent; 44 unsigned pindex; 45 struct rfc2045 *next; 46 47 off_t startpos, /* At which offset in msg this section starts */ 48 endpos, /* Where it ends */ 49 startbody, /* Where the body of the msg starts */ 50 endbody; /* endpos - trailing CRLF terminator */ 51 off_t nlines; /* Number of lines in message */ 52 off_t nbodylines; /* Number of lines only in the body */ 53 char *mime_version; 54 char *content_type; 55 struct rfc2045attr *content_type_attr; /* Content-Type: attributes */ 56 57 char *content_disposition; 58 char *boundary; 59 struct rfc2045attr *content_disposition_attr; 60 char *content_transfer_encoding; 61 int content_8bit; /* 62 ** Set if content_transfer_encoding is 63 ** 8bit 64 */ 65 char *content_id; 66 char *content_description; 67 char *content_language; 68 char *content_md5; 69 char *content_base; 70 char *content_location; 71 struct rfc2045ac *rfc2045acptr; 72 int has8bitchars; /* For rewriting */ 73 int haslongline; /* For rewriting */ 74 unsigned rfcviolation; /* Boo-boos */ 75 76 #define RFC2045_ERR8BITHEADER 1 /* 8 bit characters in headers */ 77 /* But this is now OK, in UTF8 mode */ 78 79 #define RFC2045_ERR8BITCONTENT 2 /* 8 bit contents, but no 8bit 80 content-transfer-encoding */ 81 #define RFC2045_ERR2COMPLEX 4 /* Too many nested contents */ 82 #define RFC2045_ERRBADBOUNDARY 8 /* Overlapping MIME boundaries */ 83 84 unsigned numparts; /* # of parts allocated */ 85 86 char *rw_transfer_encoding; /* For rewriting */ 87 88 /* Use quoted-printable for 8bit content */ 89 #define RFC2045_RW_7BIT 1 90 91 /* 92 ** Convert quoted-printable, if the resulting line length is not 93 ** excessive. 94 */ 95 96 #define RFC2045_RW_8BIT 2 97 98 /* 99 ** Convert quoted printable without checking for maximum resulting 100 ** line length. 101 */ 102 #define RFC2045_RW_8BIT_ALWAYS 3 103 104 /* Subsections */ 105 106 struct rfc2045 *firstpart, *lastpart; 107 108 /* Working area */ 109 110 char *workbuf; 111 size_t workbufsize; 112 size_t workbuflen; 113 int workinheader; 114 int workclosed; 115 int isdummy; 116 int informdata; /* In a middle of a long form-data part */ 117 char *header; 118 size_t headersize; 119 size_t headerlen; 120 121 int (*decode_func)(struct rfc2045 *, const char *, size_t); 122 void *misc_decode_ptr; 123 int (*udecode_func)(const char *, size_t, void *); 124 } ; 125 126 struct rfc2045attr { 127 struct rfc2045attr *next; 128 char *name; 129 char *value; 130 } ; 131 132 struct rfc2045 *rfc2045_alloc(); 133 void rfc2045_parse(struct rfc2045 *, const char *, size_t); 134 void rfc2045_parse_partial(struct rfc2045 *); 135 void rfc2045_free(struct rfc2045 *); 136 137 void rfc2045_mimeinfo(const struct rfc2045 *, 138 const char **, 139 const char **, 140 const char **); 141 142 const char *rfc2045_boundary(const struct rfc2045 *); 143 int rfc2045_isflowed(const struct rfc2045 *); 144 int rfc2045_isdelsp(const struct rfc2045 *); 145 char *rfc2045_related_start(const struct rfc2045 *); 146 const char *rfc2045_content_id(const struct rfc2045 *); 147 const char *rfc2045_content_description(const struct rfc2045 *); 148 const char *rfc2045_content_language(const struct rfc2045 *); 149 const char *rfc2045_content_md5(const struct rfc2045 *); 150 151 void rfc2045_mimepos(const struct rfc2045 *, off_t *, off_t *, off_t *, 152 off_t *, off_t *); 153 unsigned rfc2045_mimepartcount(const struct rfc2045 *); 154 155 void rfc2045_xdump(struct rfc2045 *); 156 157 struct rfc2045id { 158 struct rfc2045id *next; 159 int idnum; 160 } ; 161 162 void rfc2045_decode(struct rfc2045 *, 163 void (*)(struct rfc2045 *, struct rfc2045id *, void *), 164 void *); 165 166 struct rfc2045 *rfc2045_find(struct rfc2045 *, const char *); 167 168 169 /* 170 ** Source of an rfc2045-formatted content (internal) 171 */ 172 173 struct rfc2045src { 174 void (*deinit_func)(void *); 175 176 int (*seek_func)(off_t pos, void *); 177 ssize_t (*read_func)(char *buf, size_t cnt, void *); 178 179 void *arg; 180 }; 181 /* Read from a filedesc, returns a malloced buffer */ 182 183 struct rfc2045src *rfc2045src_init_fd(int fd); 184 185 /* Destroy a rfc2045src */ 186 187 void rfc2045src_deinit(struct rfc2045src *); 188 189 /************************/ 190 191 void rfc2045_cdecode_start(struct rfc2045 *, 192 int (*)(const char *, size_t, void *), void *); 193 int rfc2045_cdecode(struct rfc2045 *, const char *, size_t); 194 int rfc2045_cdecode_end(struct rfc2045 *); 195 196 const char *rfc2045_getdefaultcharset(); 197 void rfc2045_setdefaultcharset(const char *); 198 struct rfc2045 *rfc2045_fromfd(int); 199 #define rfc2045_fromfp(f) (rfc2045_fromfd(fileno((f)))) 200 struct rfc2045 *rfc2045header_fromfd(int); 201 #define rfc2045header_fromfp(f) (rfc2045header_fromfd(fileno((f)))) 202 203 extern void rfc2045_error(const char *); 204 205 206 struct rfc2045ac { 207 void (*start_section)(struct rfc2045 *); 208 void (*section_contents)(const char *, size_t); 209 void (*end_section)(); 210 } ; 211 212 struct rfc2045 *rfc2045_alloc_ac(); 213 int rfc2045_ac_check(struct rfc2045 *, int); 214 int rfc2045_rewrite(struct rfc2045 *p, struct rfc2045src *src, int fdout_arg, 215 const char *appname); 216 int rfc2045_rewrite_func(struct rfc2045 *p, struct rfc2045src *src, 217 int (*funcarg)(const char *, int, void *), 218 void *funcargarg, 219 const char *appname); 220 221 /* Internal functions */ 222 223 int rfc2045_try_boundary(struct rfc2045 *, struct rfc2045src *, const char *); 224 char *rfc2045_mk_boundary(struct rfc2045 *, struct rfc2045src *); 225 const char *rfc2045_getattr(const struct rfc2045attr *, const char *); 226 int rfc2045_attrset(struct rfc2045attr **, const char *, const char *); 227 228 /* MIME content base/location */ 229 230 char *rfc2045_content_base(struct rfc2045 *p); 231 /* This joins Content-Base: and Content-Location:, as best as I 232 ** can figure it out. 233 */ 234 235 char *rfc2045_append_url(const char *, const char *); 236 /* Do this with two arbitrary URLs */ 237 238 /* MISC mime functions */ 239 240 struct rfc2045 *rfc2045_searchcontenttype(struct rfc2045 *, const char *); 241 /* Assume that the "real" message text is the first MIME section here 242 ** with the given content type. 243 */ 244 245 int rfc2045_decodemimesection(struct rfc2045src *, /* Message to decode */ 246 struct rfc2045 *, /* MIME section to decode */ 247 int (*)(const char *, size_t, void *), 248 /* 249 ** Callback function that receives decoded 250 ** content. 251 */ 252 void * /* 3rd arg to the callback function */ 253 ); 254 /* 255 ** Decode a given MIME section. 256 */ 257 258 int rfc2045_decodetextmimesection(struct rfc2045src *, /* Message to decode */ 259 struct rfc2045 *, /* MIME section */ 260 const char *, /* Convert to this character set */ 261 int *, /* Set to non-0 if MIME section contained chars that could not be converted to the requested charset */ 262 int (*)(const char *, size_t, void *), 263 /* 264 ** Callback function that receives decoded 265 ** content. 266 */ 267 void * /* 3rd arg to the callback function */ 268 ); 269 /* 270 ** Like decodemimesction(), except that the text is automatically 271 ** convert to the specified character set (this function falls back 272 ** to decodemimesection() if libunicode.a is not available, or if 273 ** either the specified character set, or the MIME character set 274 ** is not supported by libunicode.a 275 */ 276 277 278 /* 279 ** READ HEADERS FROM A MIME SECTION. 280 ** 281 ** Call rfc2045header_start() to allocate a structure for the given 282 ** MIME section. 283 ** 284 ** Call rfc2045header_get() to repeatedly get the next header. 285 ** Function returns < 0 for a failure (out of memory, or something 286 ** like that). Function returns 0 for a success. Example: 287 ** 288 ** rfc2045header_get(ptr, &header, &value, 0); 289 ** 290 ** If success: check if header is NULL - end of headers, else 291 ** "header" and "value" will contain the RFC 822 header. 292 ** 293 ** Last argument is flags: 294 */ 295 296 #define RFC2045H_NOLC 1 /* Do not convert header to lowercase */ 297 #define RFC2045H_KEEPNL 2 /* Preserve newlines in the value string 298 ** of multiline headers. 299 */ 300 301 struct rfc2045headerinfo * 302 rfc2045header_start(struct rfc2045src *,/* Readonly source */ 303 struct rfc2045 * /* MIME section to read */ 304 ); 305 306 int rfc2045header_get(struct rfc2045headerinfo *, 307 char **, /* Header return */ 308 char **, /* Value return */ 309 int); /* Flags */ 310 311 void rfc2045header_end(struct rfc2045headerinfo *); 312 313 314 /* 315 ** Generic MIME header parsing code. 316 ** 317 ** header - something like "text/plain; charset=us-ascii; format=flowed". 318 ** 319 ** header_type_cb - callback function, receives the "text/plain" parameter. 320 ** 321 ** header_param_cb - callback function, repeatedly invoked to process the 322 ** additional parameters. In this example, receives "charset" and "us-ascii". 323 ** Note -t he first parameter will always be in lowercase. 324 ** 325 ** void_arg - passthrough parameter to the callback functions. 326 */ 327 328 int rfc2045_parse_mime_header(const char *header, 329 void (*header_type_cb)(const char *, void *), 330 void (*header_param_cb)(const char *, 331 const char *, 332 void *), 333 void *void_arg); 334 335 /* 336 ** The rfc2045_makereply function is used to generate an initial 337 ** reply to a MIME message. rfc2045_makereply takes the following 338 ** structure: 339 */ 340 341 struct rfc2045_mkreplyinfo { 342 343 struct rfc2045src *src; /* Original message source */ 344 345 struct rfc2045 *rfc2045partp; 346 /* 347 ** rfc2045 structure for the message to reply. This may actually 348 ** represent a single message/rfc822 section within a larger MIME 349 ** message digest, in which case we format a reply to this message. 350 */ 351 352 void *voidarg; /* Transparent argument passed to the callback 353 ** functions. 354 */ 355 356 /* 357 ** The following callback functions are called to generate the reply 358 ** message. They must be initialized. 359 */ 360 361 void (*write_func)(const char *, size_t, void *); 362 /* Called to write out the content of the message */ 363 364 void (*writesig_func)(void *); 365 /* Called to write out the sender's signature */ 366 367 int (*myaddr_func)(const char *, void *); 368 /* myaddr_func receives a pointer to an RFC 822 address, and it 369 ** should return non-zero if the address is the sender's address 370 */ 371 372 const char *replymode; 373 /* 374 ** replymode must be initialized to one of the following. It sets 375 ** the actual template for the generated response. 376 ** 377 ** "forward" - forward original message. 378 ** "forwardatt" - forward original message as an RFC822 attachment 379 ** "reply" - a standard reply to the original message's sender 380 ** "replydsn" - a DSN reply to the original message's sender 381 ** "feedback" - generate a feedback report (RFC 5965) 382 ** "replyfeedback" - "feedback" to the sender's address. 383 ** "replyall" - a "reply to all" response. 384 ** "replylist" - "reply to mailing list" response. This is a reply 385 ** that's addressed to the mailing list the original message was sent 386 ** to. 387 */ 388 389 int replytoenvelope; 390 /* 391 ** If non-zero, the "reply" or "replydsn" message gets addressed to the 392 ** "Return-Path" or "Errors-To" address, if available. 393 */ 394 395 int donotquote; 396 397 /* 398 ** If donotquote is set, the contents of the original message are not 399 ** quoted by any of the "reply" modes, and replysalut (below) does not 400 ** get emitted. 401 */ 402 403 int fullmsg; 404 /* 405 ** For replydsn, feedback, replyfeedback, attach the entire message 406 ** instead of just its headers. 407 */ 408 409 const char *replysalut; 410 /* 411 ** This should be set to the salutation to be used for the reply. 412 ** The following %-formats may appear in this string: 413 ** 414 ** %% - an explicit % character 415 ** 416 ** %n - a newline character 417 ** 418 ** %C - the X-Newsgroup: header from the original message 419 ** 420 ** %N - the Newsgroups: header from the original message 421 ** 422 ** %i - the Message-ID: header from the original message 423 ** 424 ** %f - the original message's sender's address 425 ** 426 ** %F - the original message's sender's name 427 ** 428 ** %S - the Subject: header from the original message 429 ** 430 ** %d - the original message's date, in the local timezone 431 ** 432 ** %{...}d - use strftime() to format the original message's date. 433 ** A plain %d is equivalent to %{%a, %d %b %Y %H:%M:%S %z}d. 434 ** 435 ** Example: "%F writes:" 436 */ 437 438 const char *forwarddescr; 439 /* 440 ** For forwardatt, this is the Content-Description: header, 441 ** (typically "Forwarded message"). 442 */ 443 444 /* 445 ** If not NULL, overrides the Subject: header 446 */ 447 448 const char *subject; 449 450 /* 451 ** When reply mode is 'replydsn', dsnfrom must be set to a valid 452 ** email address that's specified as the address that's generating 453 ** the DSN. 454 */ 455 const char *dsnfrom; 456 457 /* 458 ** When reply mode is 'replyfeedback', feedbacktype must be set to 459 ** one of the registered feedback types: 460 ** "abuse", "fraud", "other", "virus". 461 */ 462 const char *feedbacktype; 463 464 /* 465 ** Feedback report headers. 466 ** 467 ** NOTE: rfc2045_makereply() automatically inserts the 468 ** Feedback-Type: (from feedbacktype), User-Agent:, Version:, and 469 ** Arrival-Date: headers. 470 ** 471 ** This is an array of alternating header name and header value 472 ** strings. The header name string does not contain a colon, 473 ** rfc2045_makereply supplies one. And, basically, generates 474 ** "name: value" from this list. 475 ** 476 ** For convenience-sake, the capitalization of the headers get 477 ** adjusted to match the convention in RFC 5965. 478 ** 479 ** The list, which must contain an even number of strings, is terminated 480 ** by a NULL pointer. 481 */ 482 const char * const *feedbackheaders; 483 484 /* 485 ** Set the reply/fwd MIME headers. If this is a NULL pointer, 486 ** write_func() receives ``Content-Type: text/plain; format=flowed; 487 ** delsp=yes; charset="charset" '' with the charset specified below, 488 ** and "Content-Transfer-Encoding: 8bit". 489 ** 490 ** If this is not a NULL pointer, the effect of 491 ** this function should be invocation of write_func() to perform the 492 ** analogous purpose. 493 ** 494 ** The output of content_set_charset() should be consistent with the 495 ** contents of the charset field. 496 */ 497 498 void (*content_set_charset)(void *); 499 500 /* 501 ** Set the reply/fwd content. 502 ** 503 ** This function gets called at the point where the additional contents 504 ** of the reply/fwd should go. 505 ** 506 ** If this is not a NULL pointer, the effect of this function should 507 ** be invocation of write_func() with the additional contents of the 508 ** reply/fwd. The added content should be consistent with the 509 ** charset field. 510 ** 511 ** Note -- this content is likely to end up in a multipart MIME 512 ** message, as such it should not contain any lines that look like 513 ** MIME boundaries. 514 */ 515 516 void (*content_specify)(void *); 517 518 const char *mailinglists; 519 /* 520 ** This should be set to a whitespace-delimited list of mailing list 521 ** RFC 822 addresses that the respondent is subscribed to. It is used 522 ** to figure out which mailing list the original message was sent to 523 ** (all addresses in the original message are compared against this 524 ** list). In the event that we can't find a mailing list address on 525 ** the original message, "replylist" will fall back to "replyall". 526 */ 527 528 const char *charset; 529 /* The respondent's local charset */ 530 531 const char *forwardsep; 532 /* This is used instead of replysalut for forwards. */ 533 } ; 534 535 int rfc2045_makereply(struct rfc2045_mkreplyinfo *); 536 537 /********** Search message content **********/ 538 539 /* 540 ** Callback passed rfc2045_decodemsgtoutf8() 541 */ 542 543 struct rfc2045_decodemsgtoutf8_cb { 544 545 int flags; /* Optional flags, see below */ 546 547 /* Define a non-null function pointer. It gets the name of a header, 548 ** and the raw, unformatted, header contents. 549 ** If returns non-0, the header gets converted and sent to output. 550 ** If null, all headers are sent 551 */ 552 553 int (*headerfilter_func)(const char *name, const char *raw, void *arg); 554 555 /* The output function */ 556 int (*output_func)(const char *data, size_t cnt, void *arg); 557 558 /* If not null, gets invoked after decoding a single header */ 559 int (*headerdone_func)(const char *headername, void *arg); 560 561 void *arg; /* Passthrough arg to _funcs */ 562 }; 563 564 #define RFC2045_DECODEMSG_NOBODY 0x01 565 /* Do not decode MIME content, headers only */ 566 567 #define RFC2045_DECODEMSG_NOHEADERS 0x02 568 /* 569 ** Do not decode MIME headers, only body. This is the same as using a 570 ** headerfilter_func that always returns 0 571 */ 572 573 #define RFC2045_DECODEMSG_NOHEADERNAME 0x04 574 /* 575 ** Do not prepend name: to converted header content. 576 */ 577 578 579 #define RFC2045_DECODEMSG_NOATTACHHEADERS 0x08 580 /* 581 ** Do not decode MIME headers of attachments. Decode only the message's 582 ** main headers. 583 */ 584 585 /* 586 ** Convert a message into a utf8 bytestream. The output produced by this 587 ** function is a catentation of decoded header and text content data, converted 588 ** to utf8. 589 ** 590 ** This is fed into an output function. The output function takes a single 591 ** octet, and returns 0 if the octet was processed, or a negative value if 592 ** the output was aborted. 593 */ 594 595 int rfc2045_decodemsgtoutf8(struct rfc2045src *src, /* The message */ 596 struct rfc2045 *p, /* The parsed message */ 597 598 /* The callback */ 599 struct rfc2045_decodemsgtoutf8_cb *callback); 600 601 602 /********** Decode RFC 2231 attributes ***********/ 603 604 /* 605 ** rfc2231_decodeType() decodes an RFC 2231-encoded Content-Type: header 606 ** attribute, and rfc2231_decodeDisposition() decodes the attribute in the 607 ** Content-Disposition: header. 608 ** 609 ** chsetPtr, langPtr, and textPtr should point to a char ptr. These 610 ** functions automatically allocate the memory, the caller's responsible for 611 ** freeing it. A NULL argument may be provided if the corresponding 612 ** information is not wanted. 613 */ 614 615 int rfc2231_decodeType(struct rfc2045 *rfc, const char *name, 616 char **chsetPtr, 617 char **langPtr, 618 char **textPtr); 619 620 int rfc2231_decodeDisposition(struct rfc2045 *rfc, const char *name, 621 char **chsetPtr, 622 char **langPtr, 623 char **textPtr); 624 625 /* 626 ** The following two functions convert the decoded string to the local 627 ** charset via unicodelib. textPtr cannot be null, this time, because this 628 ** is the only return value. A NULL myChset is an alias for the default 629 ** charset. 630 */ 631 632 int rfc2231_udecodeType(struct rfc2045 *rfc, const char *name, 633 const char *myChset, 634 char **textPtr); 635 636 int rfc2231_udecodeDisposition(struct rfc2045 *rfc, const char *name, 637 const char *myChset, 638 char **textPtr); 639 640 /* 641 ** Build an RFC 2231-encoded name*=value. 642 ** 643 ** name, value, charset, language: see RFC 2231. 644 ** 645 ** (*cb_func) gets invoked 1 or more time, receives a "name=value" pair 646 ** each time. 647 ** 648 ** cb_func must return 0; a non-0 return terminates rfc2231_attrCreate, which 649 ** passes through the return code. 650 ** 651 */ 652 int rfc2231_attrCreate(const char *name, const char *value, 653 const char *charset, 654 const char *language, 655 int (*cb_func)(const char *param, 656 const char *value, 657 void *void_arg), 658 void *cb_arg); 659 660 /** NON-PUBLIC DATA **/ 661 662 struct rfc2231param { 663 struct rfc2231param *next; 664 665 int paramnum; 666 int encoded; 667 668 const char *value; 669 }; 670 671 void rfc2231_paramDestroy(struct rfc2231param *paramList); 672 int rfc2231_buildAttrList(struct rfc2231param **paramList, 673 const char *name, 674 675 const char *attrName, 676 const char *attrValue); 677 678 void rfc2231_paramDecode(struct rfc2231param *paramList, 679 char *charsetPtr, 680 char *langPtr, 681 char *textPtr, 682 int *charsetLen, 683 int *langLen, 684 int *textLen); 685 686 /* 687 ** Encode an E-mail address as utf-8 address type specified in RFC 6533. 688 ** The e-mail address parameter must be encoded in UTF-8. 689 ** 690 ** The E-mail address is encoded as "rfc822" address type if it has only 691 ** ASCII characters, or if use_rfc822 is set to non0. 692 ** 693 ** A malloc-ed address gets returned. 694 */ 695 696 char *rfc6533_encode(const char *address, int use_rfc822); 697 698 /* 699 ** Decode a utf-8 or an rfc-822 address type. Returns a malloc-ed buffer, 700 ** or NULL if the address cannot be decoded. 701 ** 702 ** Assumes valid UTF-8 coding, and does not verify it. 703 ** 704 ** Does verify, for both rfc-822 and utf-8 formats, that the returned address 705 ** does not contain control characters. 706 */ 707 708 char *rfc6533_decode(const char *address); 709 710 #if 0 711 { 712 #endif 713 714 #ifdef __cplusplus 715 } 716 #endif 717 718 #endif 719