1 /*
2 ** Copyright 1998 - 2018 Double Precision, Inc. See COPYING for
3 ** distribution information.
4 */
5
6 /*
7 */
8 #if HAVE_CONFIG_H
9 #include "rfc2045_config.h"
10 #endif
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 #if HAVE_STRINGS_H
15 #include <strings.h>
16 #endif
17 #include <ctype.h>
18 #include "rfc2045.h"
19 #include "rfc822/rfc822.h"
20 #include "rfc2045charset.h"
21
22 static char *rfc2045_defcharset=0;
23
24 int rfc2045_in_reformime=0;
25
26 extern void rfc2045_enomem();
27
28 #define MAXLEVELS 20
29 #define MAXPARTS 300
30
31 /*
32 New RFC2045 structure.
33 */
34
rfc2045_alloc()35 struct rfc2045 *rfc2045_alloc()
36 {
37 struct rfc2045 *p=(struct rfc2045 *)malloc(sizeof(struct rfc2045));
38
39 if (!p)
40 {
41 rfc2045_enomem();
42 return (0);
43 }
44
45 /* Initialize everything to nulls, except for one thing */
46
47 memset(p, '\0', sizeof(*p));
48
49 p->pindex=1; /* Start with part #1 */
50 p->workinheader=1;
51 /* Most of the time, we're about to read a header */
52
53 return (p);
54 }
55
rfc2045_getattr(const struct rfc2045attr * p,const char * name)56 const char *rfc2045_getattr(const struct rfc2045attr *p, const char *name)
57 {
58 while (p)
59 {
60 if (p->name && strcmp(p->name, name) == 0)
61 return (p->value);
62 p=p->next;
63 }
64 return (0);
65 }
66
rfc2045_attrset(struct rfc2045attr ** p,const char * name,const char * val)67 int rfc2045_attrset(struct rfc2045attr **p, const char *name, const char *val)
68 {
69 char *v;
70
71 while (*p)
72 {
73 if (strcmp( (*p)->name, name) == 0) break;
74 p=&(*p)->next;
75 }
76 if (val == 0)
77 {
78 struct rfc2045attr *q= *p;
79
80 if (q)
81 {
82 *p=q->next;
83 if (q->name) free(q->name);
84 if (q->value) free(q->value);
85 free(q);
86 }
87 return 0;
88 }
89
90 v=strdup(val);
91 if (!v)
92 return -1;
93
94 if (!*p)
95 {
96 if (((*p)=(struct rfc2045attr *)malloc(sizeof(**p))) == 0)
97 {
98 free(v);
99 return -1;
100 }
101 memset( (*p), 0, sizeof(**p));
102 if ( ((*p)->name=strdup(name)) == 0)
103 {
104 free( *p );
105 *p=0;
106 free(v);
107 return -1;
108 }
109 }
110 if ( (*p)->value ) free ( (*p)->value );
111 (*p)->value=v;
112 return 0;
113 }
114
115 /* static const char cb_name[]="boundary"; */
116
117 /* #define ContentBoundary(p) (rfc2045_getattr( (p)->content_type_attr, cb_name)) */
118
119 #define ContentBoundary(p) ( (p)->boundary )
120
121 /*
122 Unallocate the RFC2045 structure. Recursively unallocate
123 all sub-structures. Unallocate all associated buffers.
124 */
125
rfc2045_freeattr(struct rfc2045attr * p)126 static void rfc2045_freeattr(struct rfc2045attr *p)
127 {
128 while (p)
129 {
130 struct rfc2045attr *q=p->next;
131
132 if (p->name) free(p->name);
133 if (p->value) free(p->value);
134 free(p);
135 p=q;
136 }
137 }
138
rfc2045_free(struct rfc2045 * p)139 void rfc2045_free(struct rfc2045 *p)
140 {
141 struct rfc2045 *q, *r;
142
143 for (q=p->firstpart; q; )
144 {
145 r=q->next;
146 rfc2045_free(q);
147 q=r;
148 }
149 rfc2045_freeattr(p->content_type_attr);
150 rfc2045_freeattr(p->content_disposition_attr);
151
152 if (p->header) free(p->header);
153 if (p->content_md5) free(p->content_md5);
154 if (p->content_base) free(p->content_base);
155 if (p->content_location) free(p->content_location);
156 if (p->content_language) free(p->content_language);
157 if (p->content_id) free(p->content_id);
158 if (p->content_description) free(p->content_description);
159 if (p->content_transfer_encoding) free(p->content_transfer_encoding);
160 if (p->boundary) free(p->boundary);
161 if (p->content_type) free(p->content_type);
162 if (p->mime_version) free(p->mime_version);
163 if (p->workbuf) free(p->workbuf);
164 if (p->content_disposition) free(p->content_disposition);
165 if (p->rw_transfer_encoding) free(p->rw_transfer_encoding);
166 free(p);
167 }
168
169 /*
170 Generic dynamic buffer append.
171 */
172
rfc2045_add_buf(char ** bufptr,size_t * bufsize,size_t * buflen,const char * p,size_t len)173 void rfc2045_add_buf(
174 char **bufptr, /* Buffer */
175 size_t *bufsize, /* Buffer's maximum size */
176 size_t *buflen, /* Buffer's current size */
177
178 const char *p, size_t len) /* Append this data */
179 {
180 if (len + *buflen > *bufsize)
181 {
182 size_t newsize=len+*buflen+256;
183 char *p= *bufptr ? (char *)realloc(*bufptr, newsize):
184 (char *)malloc(newsize);
185
186 if (!p)
187 {
188 rfc2045_enomem();
189 return;
190 }
191 *bufptr=p;
192 *bufsize=newsize;
193 }
194
195 memcpy(*bufptr + *buflen, p, len);
196 *buflen += len;
197 }
198
199 /* Append to the work buffer */
200
rfc2045_add_workbuf(struct rfc2045 * h,const char * p,size_t len)201 void rfc2045_add_workbuf(struct rfc2045 *h, const char *p, size_t len)
202 {
203 rfc2045_add_buf( &h->workbuf, &h->workbufsize, &h->workbuflen, p, len);
204 }
205
206 /* Append one character to the work buffer */
207
rfc2045_add_workbufch(struct rfc2045 * h,int c)208 void rfc2045_add_workbufch(struct rfc2045 *h, int c)
209 {
210 char cc= (char)c;
211
212 rfc2045_add_workbuf(h, &cc, 1);
213 }
214
215 /*
216 Generic function to duplicate contents of a string.
217 The destination string may already be previously allocated,
218 so unallocate it.
219 */
220
set_string(char ** p,const char * q)221 static void set_string(char **p,
222 const char *q)
223 {
224 if (*p) free(*p);
225
226 *p=0;
227 if (!q) return;
228
229 if ((*p=(char *)malloc(strlen(q)+1)) == 0)
230 {
231 rfc2045_enomem();
232 return;
233 }
234
235 strcpy(*p, q);
236 }
237
238 /* Update byte counts for this structure, and all the superstructures */
239
update_counts(struct rfc2045 * p,size_t newcnt,size_t newendcnt,unsigned nlines)240 static void update_counts(struct rfc2045 *p, size_t newcnt, size_t newendcnt,
241 unsigned nlines)
242 {
243 while (p)
244 {
245 p->endpos = newcnt;
246 p->endbody = newendcnt;
247 p->nlines += nlines;
248 if (!p->workinheader)
249 p->nbodylines += nlines;
250 p=p->parent;
251 }
252 }
253
254 /*
255 Main entry point for RFC2045 parsing. External data is fed
256 by repetitively calling rfc2045_parse().
257
258 rfc2045_parse() breaks up input into lines, and calls doline()
259 to process each line.
260 */
261
262 static void doline(struct rfc2045 *);
263
264 void rfc2045_parse_partial(struct rfc2045 *h);
265
rfc2045_parse(struct rfc2045 * h,const char * buf,size_t s)266 void rfc2045_parse(struct rfc2045 *h, const char *buf, size_t s)
267 {
268 size_t l;
269
270 while (s)
271 {
272 for (l=0; l<s; l++)
273 if (buf[l] == '\n') break;
274 if (l < s && buf[l] == '\n')
275 {
276 ++l;
277 rfc2045_add_workbuf(h, buf, l);
278 doline(h);
279 h->workbuflen=0;
280 }
281 else
282 rfc2045_add_workbuf(h, buf, l);
283 buf += l;
284 s -= l;
285 }
286
287 if (h->workbuflen > 1024)
288 rfc2045_parse_partial(h);
289 }
290
rfc2045_parse_partial(struct rfc2045 * h)291 void rfc2045_parse_partial(struct rfc2045 *h)
292 {
293 /*
294 ** Our buffer's getting pretty big. Let's see if we can
295 ** partially handle it.
296 */
297
298 if (h->workbuflen > 0)
299 {
300 struct rfc2045 *p;
301 int l, i;
302
303 for (p=h; p->lastpart && !p->lastpart->workclosed;
304 p=p->lastpart)
305 ;
306
307 /* If p->workinheader, we've got a mother of all headers
308 ** here. Well, that's just too bad, we'll end up garbling
309 ** it.
310 */
311
312 l=h->workbuflen;
313
314 /* We do need to make sure that the final \r\n gets
315 ** stripped off, so don't gobble up everything if
316 ** the last character we see is a \r
317 */
318
319 if (h->workbuf[l-1] == '\r')
320 --l;
321
322 /* If we'll be rewriting, make sure rwprep knows about
323 ** stuff that was skipped just now. */
324
325 if (h->rfc2045acptr && !p->workinheader &&
326 (!p->lastpart || !p->lastpart->workclosed))
327 (*h->rfc2045acptr->section_contents)(h->workbuf, l);
328
329 update_counts(p, p->endpos+l, p->endpos+l, 0);
330 p->informdata=1;
331 for (i=0; l<h->workbuflen; l++)
332 h->workbuf[i++]=h->workbuf[l];
333 h->workbuflen=i;
334 }
335 }
336
337 /*
338 Append a new RFC2045 subpart. Adds new RFC2045 structure to the
339 end of the list of existing RFC2045 substructures.
340 */
341
append_part_noinherit(struct rfc2045 * p,size_t startpos)342 static struct rfc2045 *append_part_noinherit(struct rfc2045 *p, size_t startpos){
343 struct rfc2045 *newp;
344
345 newp=rfc2045_alloc();
346 if (p->lastpart)
347 {
348 p->lastpart->next=newp;
349 newp->pindex=p->lastpart->pindex+1;
350 }
351 else
352 {
353 p->firstpart=newp;
354 newp->pindex=0;
355 }
356 p->lastpart=newp;
357 newp->parent=p;
358
359 /* Initialize source pointers */
360 newp->startpos=newp->endpos=newp->startbody=newp->endbody=startpos;
361
362 while (p->parent)
363 p=p->parent;
364 ++p->numparts;
365
366 return (newp);
367 }
368
append_part(struct rfc2045 * p,size_t startpos)369 static struct rfc2045 *append_part(struct rfc2045 *p, size_t startpos)
370 {
371 struct rfc2045 *newp=append_part_noinherit(p, startpos);
372
373 /* Substructures inherit content transfer encoding and character set */
374
375 set_string(&newp->content_transfer_encoding,
376 p->content_transfer_encoding);
377
378 if (rfc2045_attrset(&newp->content_type_attr, "charset",
379 rfc2045_getattr(p->content_type_attr, "charset"))
380 < 0)
381 rfc2045_enomem();
382
383 return (newp);
384 }
385
386 /*
387 doline() processes next line in the RFC2045 message.
388
389 Drills down the list of all the multipart messages currently open,
390 and checks if the line is a boundary line for the given multipart.
391 In theory the boundary line, if there is one, should be the boundary
392 line only for the inner multipart only, but, this takes into account
393 broken MIME messages.
394 */
395
396 static void do_header(struct rfc2045 *);
397
doline(struct rfc2045 * p)398 static void doline(struct rfc2045 *p)
399 {
400 size_t cnt=p->workbuflen;
401 char *c=p->workbuf;
402 size_t n=cnt-1; /* Strip \n (we always get at least a \n here) */
403 struct rfc2045 *newp;
404 struct rfc2045ac *rwp=p->rfc2045acptr;
405 unsigned num_levels=0;
406
407 size_t k;
408 int bit8=0;
409
410 if (p->numparts > MAXPARTS)
411 {
412 p->rfcviolation |= RFC2045_ERR2COMPLEX;
413 return;
414 }
415
416 for (k=0; k<cnt; k++)
417 {
418 if (c[k] == 0)
419 c[k]=' ';
420 if (c[k] & 0x80) bit8=1;
421 }
422
423 if (n && c[n-1] == '\r') /* Strip trailing \r */
424 --n;
425
426 /* Before the main drill down loop before, look ahead and see if we're
427 ** in a middle of a form-data section. */
428
429 for (newp=p; newp->lastpart &&
430 !newp->lastpart->workclosed; newp=newp->lastpart,
431 ++num_levels)
432 {
433 if (ContentBoundary(newp) == 0 || newp->workinheader)
434 continue;
435
436 if (newp->lastpart->informdata)
437 {
438 p=newp->lastpart;
439 p->informdata=0;
440 break;
441 }
442 }
443
444 /* Drill down until we match a boundary, or until we've reached
445 the last RFC2045 section that has been opened.
446 */
447
448 while (p->lastpart)
449 {
450 size_t l;
451 const char *cb;
452
453 if (p->lastpart->workclosed)
454 {
455 update_counts(p, p->endpos+cnt, p->endpos+n, 1);
456 return;
457 }
458 /* Leftover trash -- workclosed is set when the final
459 ** terminating boundary has been seen */
460
461 /* content_boundary may be set before the entire header
462 ** has been seen, so continue drilling down in that case
463 */
464
465 cb=ContentBoundary(p);
466
467 if (cb == 0 || p->workinheader)
468 {
469 p=p->lastpart;
470 ++num_levels;
471 continue;
472 }
473
474 l=strlen(cb);
475
476 if (c[0] == '-' && c[1] == '-' && n >= 2+l &&
477 strncasecmp(cb, c+2, l) == 0)
478 {
479
480 if (rwp && (!p->lastpart || !p->lastpart->isdummy))
481 (*rwp->end_section)();
482
483 /* Ok, we've found a boundary */
484
485 if (n >= 4+l && strncmp(c+2+l, "--", 2) == 0)
486 {
487 /* Last boundary */
488
489 p->lastpart->workclosed=1;
490 update_counts(p, p->endpos+cnt, p->endpos+cnt,
491 1);
492 return;
493 }
494
495 /* Create new RFC2045 section */
496
497 newp=append_part(p, p->endpos+cnt);
498 update_counts(p, p->endpos+cnt, p->endpos+n, 1);
499
500 /* The new RFC2045 section is MIME compliant */
501
502 if ((newp->mime_version=strdup(p->mime_version)) == 0)
503 rfc2045_enomem();
504 return;
505 }
506 p=p->lastpart;
507 ++num_levels;
508 }
509
510 /* Ok, we've found the RFC2045 section that we're working with.
511 ** No what?
512 */
513
514 if (! p->workinheader)
515 {
516 /* Processing body, just update the counts. */
517
518 size_t cnt_update=cnt;
519
520 if (bit8 && !p->content_8bit &&
521 (p->rfcviolation & RFC2045_ERR8BITCONTENT) == 0)
522 {
523 struct rfc2045 *q;
524
525 for (q=p; q; q=q->parent)
526 q->rfcviolation |= RFC2045_ERR8BITCONTENT;
527 }
528
529 /*
530 ** In multiparts, the final newline in a part belongs to the
531 ** boundary, otherwise, include it in the text.
532 */
533 if (p->parent && p->parent->content_type &&
534 strncasecmp(p->parent->content_type,
535 "multipart/", 10) == 0)
536 cnt_update=n;
537
538 if (!p->lastpart || !p->lastpart->workclosed)
539 {
540 if (rwp && !p->isdummy)
541 (*rwp->section_contents)(c, cnt);
542
543 update_counts(p, p->endpos+cnt, p->endpos+cnt_update,
544 1);
545 }
546 return;
547 }
548
549 if (bit8 && (p->rfcviolation & RFC2045_ERR8BITHEADER) == 0)
550 {
551 struct rfc2045 *q;
552
553 for (q=p; q; q=q->parent)
554 q->rfcviolation |= RFC2045_ERR8BITHEADER;
555 }
556
557 /* In the header */
558
559 if ( n == 0 ) /* End of header, body begins. Parse header. */
560 {
561 do_header(p); /* Clean up any left over header line */
562 p->workinheader=0;
563
564 /* Message body starts right here */
565
566 p->startbody=p->endpos+cnt;
567 update_counts(p, p->startbody, p->startbody, 1);
568 --p->nbodylines; /* Don't count the blank line */
569
570 /* Discard content type and boundary if I don't understand
571 ** this MIME flavor.
572 */
573
574 if (!RFC2045_ISMIME1(p->mime_version))
575 {
576 set_string(&p->content_type, 0);
577
578 rfc2045_freeattr(p->content_type_attr);
579 p->content_type_attr=0;
580 set_string(&p->content_disposition, 0);
581 rfc2045_freeattr(p->content_disposition_attr);
582 p->content_disposition_attr=0;
583 if (p->boundary)
584 {
585 free(p->boundary);
586 p->boundary=0;
587 }
588 }
589
590 /* Normally, if we don't have a content_type, default it
591 ** to text/plain. However, if the multipart type is
592 ** multipart/digest, it is message/rfc822.
593 */
594
595 if (RFC2045_ISMIME1(p->mime_version) && !p->content_type)
596 {
597 char *q="text/plain";
598
599 if (p->parent && p->parent->content_type &&
600 strcmp(p->parent->content_type,
601 "multipart/digest") == 0)
602 q="message/rfc822";
603 set_string(&p->content_type, q);
604 }
605
606 /* If this is not a multipart section, we don't want to
607 ** hear about any boundaries
608 */
609
610 if (!p->content_type ||
611 strncmp(p->content_type, "multipart/", 10))
612 {
613 if (p->boundary)
614 free(p->boundary);
615 p->boundary=0;
616 }
617
618 /* If this section's a message, we will expect to see
619 ** more RFC2045 stuff, so create a nested RFC2045 structure,
620 ** and indicate that we expect to see headers.
621 */
622
623 if (p->content_type &&
624 rfc2045_message_content_type(p->content_type))
625 {
626 newp=append_part_noinherit(p, p->startbody);
627 newp->workinheader=1;
628 return;
629 }
630
631 /*
632 ** If this is a multipart message (boundary defined),
633 ** create a RFC2045 structure for the pseudo-section
634 ** that precedes the first boundary line.
635 */
636
637 if (ContentBoundary(p))
638 {
639 newp=append_part(p, p->startbody);
640 newp->workinheader=0;
641 newp->isdummy=1;
642 /* It's easier just to create it. */
643 return;
644 }
645
646 if (rwp)
647 (*rwp->start_section)(p);
648 return;
649 }
650
651 /* RFC822 header continues */
652
653 update_counts(p, p->endpos + cnt, p->endpos+n, 1);
654
655 /*
656 ** Until we see an official start of message body, the body starts
657 ** right after what we just read.
658 */
659 p->startbody=p->endbody;
660
661 /* If this header line starts with a space, append one space
662 ** to the saved contents of the previous line, and append this
663 ** line to it.
664 */
665
666 if (isspace((int)(unsigned char)*c))
667 {
668 rfc2045_add_buf(&p->header, &p->headersize, &p->headerlen, " ", 1);
669 }
670 else
671 {
672 /* Otherwise the previous header line is complete, so process it */
673
674 do_header(p);
675 p->headerlen=0;
676 }
677
678 /* Save this line in the header buffer, because the next line
679 ** could be a continuation.
680 */
681
682 rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, c, n);
683 }
684
685 /***********************************************************************/
686
687 /*
688 ** paste_tokens() - recombine an array of RFC822 tokens back as a string.
689 ** (Comments) are ignored.
690 */
691
paste_tokens(struct rfc822t * h,int start,int cnt)692 static char *paste_tokens(struct rfc822t *h, int start, int cnt)
693 {
694 int l;
695 int i;
696 char *p;
697
698 /* Calculate string size */
699
700 l=1;
701 for (i=0; i<cnt; i++)
702 {
703 if (h->tokens[start+i].token == '(')
704 continue;
705
706 if (rfc822_is_atom(h->tokens[start+i].token))
707 l += h->tokens[start+i].len;
708 else
709 l++;
710 }
711
712 /* Do it */
713
714 p=( char *)malloc(l);
715 if (!p)
716 {
717 rfc2045_enomem();
718 return (0);
719 }
720 l=0;
721
722 for (i=0; i<cnt; i++)
723 {
724 if (h->tokens[start+i].token == '(')
725 continue;
726
727 if (rfc822_is_atom(h->tokens[start+i].token))
728 {
729 int l2=h->tokens[start+i].len;
730
731 memcpy(p+l, h->tokens[start+i].ptr, l2);
732 l += l2;
733 }
734 else p[l++]=h->tokens[start+i].token;
735 }
736 p[l]=0;
737 return (p);
738 }
739
740 /*
741 ** Whether this MIME content type is a nested MIME message.
742 */
743
rfc2045_message_content_type(const char * content_type)744 int rfc2045_message_content_type(const char *content_type)
745 {
746 return strcasecmp(content_type, RFC2045_MIME_MESSAGE_RFC822) == 0 ||
747 strcasecmp(content_type, RFC2045_MIME_MESSAGE_GLOBAL) == 0;
748 }
749
750 /*
751 ** Whether this MIME content type is a delivery status notification.
752 */
753
rfc2045_delivery_status_content_type(const char * content_type)754 int rfc2045_delivery_status_content_type(const char *content_type)
755 {
756 return strcasecmp(content_type,
757 RFC2045_MIME_MESSAGE_DELIVERY_STATUS) == 0 ||
758 strcasecmp(content_type,
759 RFC2045_MIME_MESSAGE_GLOBAL_DELIVERY_STATUS) == 0;
760 }
761
rfc2045_message_headers_content_type(const char * content_type)762 int rfc2045_message_headers_content_type(const char *content_type)
763 {
764 return strcasecmp(content_type,
765 RFC2045_MIME_MESSAGE_HEADERS) == 0 ||
766 strcasecmp(content_type,
767 RFC2045_MIME_MESSAGE_GLOBAL_HEADERS) == 0;
768 }
769
770 /* Various permutations of the above, including forcing the string to
771 ** lowercase
772 */
773
lower_paste_tokens(struct rfc822t * h,int start,int cnt)774 static char *lower_paste_tokens(struct rfc822t *h, int start, int cnt)
775 {
776 char *p=paste_tokens(h, start, cnt);
777 char *q;
778
779 for (q=p; q && *q; q++)
780 *q=tolower(*q);
781 return (p);
782 }
783
paste_token(struct rfc822t * h,int i)784 static char *paste_token(struct rfc822t *h, int i)
785 {
786 if (i >= h->ntokens) return (0);
787 return (paste_tokens(h, i, 1));
788 }
789
lower_paste_token(struct rfc822t * h,int i)790 static char *lower_paste_token(struct rfc822t *h, int i)
791 {
792 char *p=paste_token(h, i);
793 char *q;
794
795 for (q=p; q && *q; q++)
796 *q=tolower(*q);
797 return (p);
798 }
799
800 /*
801 do_header() - process completed RFC822 header.
802 */
803
804 static void mime_version(struct rfc2045 *, struct rfc822t *);
805 static void content_type(struct rfc2045 *, struct rfc822t *);
806 static void content_transfer_encoding(struct rfc2045 *, struct rfc822t *);
807 static void content_disposition(struct rfc2045 *, struct rfc822t *);
808 static void content_id(struct rfc2045 *, struct rfc822t *);
809 static void content_description(struct rfc2045 *, const char *);
810 static void content_language(struct rfc2045 *, const char *);
811 static void content_md5(struct rfc2045 *, const char *);
812 static void content_base(struct rfc2045 *, struct rfc822t *);
813 static void content_location(struct rfc2045 *, struct rfc822t *);
814
do_header(struct rfc2045 * p)815 static void do_header(struct rfc2045 *p)
816 {
817 struct rfc822t *header;
818 char *t;
819
820 if (p->headerlen == 0) return;
821 rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, "", 1);
822 /* 0 terminate */
823
824 /* Parse the header line according to RFC822 */
825
826 header=rfc822t_alloc_new(p->header, NULL, NULL);
827
828 if (!header) return; /* Broken header */
829
830 if (header->ntokens < 2 ||
831 header->tokens[0].token ||
832 header->tokens[1].token != ':')
833 {
834 rfc822t_free(header);
835 return; /* Broken header */
836 }
837
838 t=lower_paste_token(header, 0);
839
840 if (t == 0)
841 ;
842 else if (strcmp(t, "mime-version") == 0)
843 {
844 free(t);
845 mime_version(p, header);
846 }
847 else if (strcmp(t, "content-type") == 0)
848 {
849 free(t);
850 content_type(p, header);
851 } else if (strcmp(t, "content-transfer-encoding") == 0)
852 {
853 free(t);
854 content_transfer_encoding(p, header);
855 } else if (strcmp(t, "content-disposition") == 0)
856 {
857 free(t);
858 content_disposition(p, header);
859 } else if (strcmp(t, "content-id") == 0)
860 {
861 free(t);
862 content_id(p, header);
863 } else if (strcmp(t, "content-description") == 0)
864 {
865 free(t);
866 t=strchr(p->header, ':');
867 if (t) ++t;
868 while (t && isspace((int)(unsigned char)*t))
869 ++t;
870 content_description(p, t);
871 } else if (strcmp(t, "content-language") == 0)
872 {
873 free(t);
874 t=strchr(p->header, ':');
875 if (t) ++t;
876 while (t && isspace((int)(unsigned char)*t))
877 ++t;
878 content_language(p, t);
879 } else if (strcmp(t, "content-base") == 0)
880 {
881 free(t);
882 content_base(p, header);
883 } else if (strcmp(t, "content-location") == 0)
884 {
885 free(t);
886 content_location(p, header);
887 } else if (strcmp(t, "content-md5") == 0)
888 {
889 free(t);
890 t=strchr(p->header, ':');
891 if (t) ++t;
892 while (t && isspace((int)(unsigned char)*t))
893 ++t;
894 content_md5(p, t);
895 }
896 else free(t);
897 rfc822t_free(header);
898 }
899
900 /* Mime-Version: and Content-Transfer-Encoding: headers are easy */
901
mime_version(struct rfc2045 * p,struct rfc822t * header)902 static void mime_version(struct rfc2045 *p, struct rfc822t *header)
903 {
904 char *vers=paste_tokens(header, 2, header->ntokens-2);
905
906 if (!vers) return;
907
908 if (p->mime_version) free(p->mime_version);
909 p->mime_version=vers;
910 }
911
content_transfer_encoding(struct rfc2045 * r,struct rfc822t * header)912 static void content_transfer_encoding(struct rfc2045 *r,
913 struct rfc822t *header)
914 {
915 char *p;
916
917 p=lower_paste_tokens(header, 2, header->ntokens-2);
918 if (!p) return;
919
920 if (r->content_transfer_encoding)
921 free(r->content_transfer_encoding);
922 r->content_transfer_encoding=p;
923
924 if (strcmp(p, "8bit") == 0)
925 r->content_8bit=1;
926 }
927
928 /* Dig into the content_type header */
929
parse_content_header(struct rfc822t * header,int init_start,void (* init_token)(char *,void *),void (* init_parameter)(const char *,struct rfc822t *,int,int,void *),void * void_arg)930 static void parse_content_header(struct rfc822t *header,
931 int init_start,
932 void (*init_token)(char *, void *),
933 void (*init_parameter)(const char *,
934 struct rfc822t *,
935 int, int,
936 void *),
937 void *void_arg)
938 {
939 int start;
940 int i, j;
941 char *p;
942
943 /* Look for the 1st ; */
944
945 for (start=init_start; start < header->ntokens; start++)
946 if (header->tokens[start].token == ';')
947 break;
948
949 /* Everything up to the 1st ; is the content type */
950
951 p=lower_paste_tokens(header, init_start, start-init_start);
952 if (!p) return;
953
954 (*init_token)(p, void_arg);
955 if (start < header->ntokens) start++;
956
957 /* Handle the remainder of the Content-Type: header */
958
959 while (start < header->ntokens)
960 {
961 /* Look for next ; */
962
963 for (i=start; i<header->ntokens; i++)
964 if (header->tokens[i].token == ';')
965 break;
966 j=start;
967 if (j < i)
968 {
969 ++j;
970
971 /* We only understand <atom>= */
972
973 while (j < i && header->tokens[j].token == '(')
974 ++j;
975 if (j < i && header->tokens[j].token == '=')
976 {
977 ++j;
978
979 /*
980 ** reformime: loose parsing due to loose
981 ** parsing in MSOE, leading to viruses slipping
982 ** through virus scanners if we strictly
983 ** parsed the content-type header.
984 */
985 if (rfc2045_in_reformime && j < i
986 && header->tokens[j].token == '"')
987 i=j+1;
988
989 p=lower_paste_token(header, start);
990 if (!p) return;
991 (*init_parameter)(p, header, j, i-j, void_arg);
992 free(p);
993 }
994 }
995 if ( i<header->ntokens ) ++i; /* Skip over ; */
996 start=i;
997 }
998 }
999
1000 /* Dig into the content_type header */
1001
1002 static void save_content_type(char *, void *);
1003 static void save_content_type_parameter( const char *,
1004 struct rfc822t *, int, int, void *);
1005
content_type(struct rfc2045 * r,struct rfc822t * header)1006 static void content_type(struct rfc2045 *r, struct rfc822t *header)
1007 {
1008 parse_content_header(header, 2, &save_content_type,
1009 &save_content_type_parameter, r);
1010 }
1011
save_content_type(char * content_type,void * void_arg)1012 static void save_content_type(char *content_type, void *void_arg)
1013 {
1014 struct rfc2045 *r=(struct rfc2045 *)void_arg;
1015
1016 if (r->content_type) free(r->content_type);
1017 r->content_type=content_type;
1018 }
1019
save_content_type_parameter(const char * name,struct rfc822t * header,int start,int len,void * void_arg)1020 static void save_content_type_parameter(const char *name,
1021 struct rfc822t *header, int start,
1022 int len, void *void_arg)
1023 {
1024 struct rfc2045 *r=(struct rfc2045 *)void_arg;
1025 char *p;
1026
1027 p=strcmp(name, "charset") == 0 ?
1028 lower_paste_tokens(header, start, len):
1029 paste_tokens(header, start, len);
1030 if (!p) return;
1031
1032 if (rfc2045_attrset(&r->content_type_attr, name, p) < 0)
1033 {
1034 free(p);
1035 rfc2045_enomem();
1036 return;
1037 }
1038
1039 free(p);
1040
1041 if (strcmp(name, "boundary") == 0)
1042 {
1043 struct rfc2045 *q;
1044
1045 if (r->boundary)
1046 free(r->boundary);
1047 p=lower_paste_tokens(header, start, len);
1048 r->boundary=p;
1049
1050 /*
1051 ** Check all the outer MIME boundaries. If this is a
1052 ** substring of an outer MIME boundary, or the outer
1053 ** boundary is a substring of the inner boundary, we
1054 ** have an ambiguity - see "IMPLEMENTOR'S NOTE" in
1055 ** section 5.1.1 of RFC 2046.
1056 */
1057
1058 for (q=r->parent; q; q=q->parent)
1059 {
1060 const char *a, *b;
1061
1062 if (!q->boundary)
1063 continue;
1064
1065 for (a=q->boundary, b=p; *a && *b; a++, b++)
1066 if (*a != *b)
1067 break;
1068
1069 if (!*a || !*b)
1070 {
1071 while (q->parent)
1072 q=q->parent;
1073 q->rfcviolation |= RFC2045_ERRBADBOUNDARY;
1074 break;
1075 }
1076 }
1077 }
1078 }
1079
1080 /* Dig into content-disposition */
1081
1082 static void save_content_disposition(char *, void *);
1083 static void save_content_disposition_parameter( const char *,
1084 struct rfc822t *, int, int,
1085 void *);
1086
content_disposition(struct rfc2045 * r,struct rfc822t * header)1087 static void content_disposition(struct rfc2045 *r, struct rfc822t *header)
1088 {
1089 parse_content_header(header, 2, &save_content_disposition,
1090 &save_content_disposition_parameter, r);
1091 }
1092
save_content_disposition(char * content_disposition,void * void_arg)1093 static void save_content_disposition(char *content_disposition, void *void_arg)
1094 {
1095 struct rfc2045 *r=(struct rfc2045 *)void_arg;
1096
1097 if (r->content_disposition) free(r->content_disposition);
1098 r->content_disposition=content_disposition;
1099 }
1100
save_content_disposition_parameter(const char * name,struct rfc822t * header,int start,int len,void * void_arg)1101 static void save_content_disposition_parameter(const char *name,
1102 struct rfc822t *header,
1103 int start, int len,
1104 void *void_arg)
1105 {
1106 struct rfc2045 *r=(struct rfc2045 *)void_arg;
1107 char *p;
1108
1109 p=paste_tokens(header, start, len);
1110 if (!p) return;
1111
1112 if (rfc2045_attrset(&r->content_disposition_attr, name, p) < 0)
1113 {
1114 free(p);
1115 rfc2045_enomem();
1116 return;
1117 }
1118 free(p);
1119 }
1120
rfc2045_related_start(const struct rfc2045 * p)1121 char *rfc2045_related_start(const struct rfc2045 *p)
1122 {
1123 const char *cb=rfc2045_getattr( p->content_type_attr, "start");
1124 struct rfc822t *t;
1125 struct rfc822a *a;
1126 int i;
1127
1128 if (!cb || !*cb) return (0);
1129
1130 t=rfc822t_alloc_new(cb, 0, NULL);
1131 if (!t)
1132 {
1133 rfc2045_enomem();
1134 return(0);
1135 }
1136
1137 a=rfc822a_alloc(t);
1138 if (!a)
1139 {
1140 rfc822t_free(t);
1141 rfc2045_enomem();
1142 return (0);
1143 }
1144 for (i=0; i<a->naddrs; i++)
1145 if (a->addrs[i].tokens)
1146 {
1147 char *s=rfc822_getaddr(a, i);
1148
1149 rfc822a_free(a);
1150 rfc822t_free(t);
1151 if (!s)
1152 rfc2045_enomem();
1153 return (s);
1154 }
1155
1156 rfc822a_free(a);
1157 rfc822t_free(t);
1158 return (0);
1159 }
1160
content_id(struct rfc2045 * p,struct rfc822t * t)1161 static void content_id(struct rfc2045 *p, struct rfc822t *t)
1162 {
1163 struct rfc822a *a=rfc822a_alloc(t);
1164 int i;
1165
1166 if (!a)
1167 {
1168 rfc2045_enomem();
1169 return;
1170 }
1171
1172 for (i=0; i<a->naddrs; i++)
1173 if (a->addrs[i].tokens)
1174 {
1175 char *s=rfc822_getaddr(a, i);
1176
1177 if (!s)
1178 {
1179 rfc822a_free(a);
1180 rfc2045_enomem();
1181 return;
1182 }
1183 if (p->content_id)
1184 free(p->content_id);
1185 p->content_id=s;
1186 break;
1187 }
1188
1189 rfc822a_free(a);
1190 }
1191
content_description(struct rfc2045 * p,const char * s)1192 static void content_description(struct rfc2045 *p, const char *s)
1193 {
1194 if (s && *s)
1195 set_string(&p->content_description, s);
1196 }
1197
content_language(struct rfc2045 * p,const char * s)1198 static void content_language(struct rfc2045 *p, const char *s)
1199 {
1200 if (s && *s)
1201 set_string(&p->content_language, s);
1202 }
1203
content_md5(struct rfc2045 * p,const char * s)1204 static void content_md5(struct rfc2045 *p, const char *s)
1205 {
1206 if (s && *s)
1207 set_string(&p->content_md5, s);
1208 }
1209
content_base(struct rfc2045 * p,struct rfc822t * t)1210 static void content_base(struct rfc2045 *p, struct rfc822t *t)
1211 {
1212 char *s;
1213 int i;
1214
1215 for (i=0; i<t->ntokens; i++)
1216 if (t->tokens[i].token == '"')
1217 t->tokens[i].token=0;
1218
1219 s=paste_tokens(t, 2, t->ntokens-2);
1220 set_string(&p->content_base, s);
1221 }
1222
content_location(struct rfc2045 * p,struct rfc822t * t)1223 static void content_location(struct rfc2045 *p, struct rfc822t *t)
1224 {
1225 char *s;
1226 int i;
1227
1228 for (i=0; i<t->ntokens; i++)
1229 if (t->tokens[i].token == '"')
1230 t->tokens[i].token=0;
1231
1232 s=paste_tokens(t, 2, t->ntokens-2);
1233 set_string(&p->content_location, s);
1234 free(s);
1235 }
1236
1237 /* -------------------- */
1238
1239 #define GETINFO(s, def) ( (s) && (*s) ? (s):def)
1240
rfc2045_mimeinfo(const struct rfc2045 * p,const char ** content_type_s,const char ** content_transfer_encoding_s,const char ** charset_s)1241 void rfc2045_mimeinfo(const struct rfc2045 *p,
1242 const char **content_type_s,
1243 const char **content_transfer_encoding_s,
1244 const char **charset_s)
1245 {
1246 const char *c;
1247
1248 *content_type_s=GETINFO(p->content_type, "text/plain");
1249 *content_transfer_encoding_s=GETINFO(p->content_transfer_encoding,
1250 "8bit");
1251
1252 c=rfc2045_getattr(p->content_type_attr, "charset");
1253 if (!c) c=rfc2045_getdefaultcharset();
1254
1255 *charset_s=c;
1256 }
1257
rfc2045_getdefaultcharset()1258 const char *rfc2045_getdefaultcharset()
1259 {
1260 const char *p=rfc2045_defcharset;
1261
1262 if (!p) p=RFC2045CHARSET;
1263 return (p);
1264 }
1265
rfc2045_setdefaultcharset(const char * charset)1266 void rfc2045_setdefaultcharset(const char *charset)
1267 {
1268 char *p=strdup(charset);
1269
1270 if (!p)
1271 {
1272 rfc2045_enomem();
1273 return;
1274 }
1275
1276 if (rfc2045_defcharset) free(rfc2045_defcharset);
1277 rfc2045_defcharset=p;
1278 }
1279
rfc2045_boundary(const struct rfc2045 * p)1280 const char *rfc2045_boundary(const struct rfc2045 *p)
1281 {
1282 const char *cb=rfc2045_getattr( p->content_type_attr, "boundary");
1283
1284 if (!cb) cb="";
1285 return (cb);
1286 }
1287
rfc2045_isflowed(const struct rfc2045 * p)1288 int rfc2045_isflowed(const struct rfc2045 *p)
1289 {
1290 const char *cb=rfc2045_getattr(p->content_type_attr, "format");
1291
1292 return (cb && strcmp(cb, "flowed") == 0);
1293 }
1294
rfc2045_isdelsp(const struct rfc2045 * p)1295 int rfc2045_isdelsp(const struct rfc2045 *p)
1296 {
1297 const char *cb=rfc2045_getattr(p->content_type_attr, "delsp");
1298
1299 return (cb && strcmp(cb, "yes") == 0);
1300 }
1301
rfc2045_content_id(const struct rfc2045 * p)1302 const char *rfc2045_content_id(const struct rfc2045 *p)
1303 {
1304 return (p->content_id ? p->content_id:"");
1305 }
1306
rfc2045_content_description(const struct rfc2045 * p)1307 const char *rfc2045_content_description(const struct rfc2045 *p)
1308 {
1309 return (p->content_description ? p->content_description:"");
1310 }
1311
rfc2045_content_language(const struct rfc2045 * p)1312 const char *rfc2045_content_language(const struct rfc2045 *p)
1313 {
1314 return (p->content_language ? p->content_language:"");
1315 }
1316
rfc2045_content_md5(const struct rfc2045 * p)1317 const char *rfc2045_content_md5(const struct rfc2045 *p)
1318 {
1319 return (p->content_md5 ? p->content_md5:"");
1320 }
1321
rfc2045_mimepos(const struct rfc2045 * p,off_t * start_pos,off_t * end_pos,off_t * start_body,off_t * nlines,off_t * nbodylines)1322 void rfc2045_mimepos(const struct rfc2045 *p,
1323 off_t *start_pos, off_t *end_pos, off_t *start_body,
1324 off_t *nlines, off_t *nbodylines)
1325 {
1326 *start_pos=p->startpos;
1327 *end_pos=p->endpos;
1328
1329 *nlines=p->nlines;
1330 *nbodylines=p->nbodylines;
1331 if (p->parent) /* MIME parts do not have the trailing CRLF */
1332 {
1333 *end_pos=p->endbody;
1334 if (*nlines) --*nlines;
1335 if (*nbodylines) --*nbodylines;
1336 }
1337 *start_body=p->startbody;
1338
1339 if (*start_body == *start_pos) /* No header */
1340 {
1341 *start_body= *end_pos;
1342 }
1343 }
1344
rfc2045_mimepartcount(const struct rfc2045 * p)1345 unsigned rfc2045_mimepartcount(const struct rfc2045 *p)
1346 {
1347 const struct rfc2045 *q;
1348 unsigned n=0;
1349
1350 for (q=p->firstpart; q; q=q->next) ++n;
1351 return (n);
1352 }
1353
1354 /*
1355 ** Generic interface into parse_content_header
1356 */
1357
1358 struct rfc2045_parse_mime_info {
1359 void (*header_type_cb)(const char *, void *);
1360 void (*header_param_cb)(const char *, const char *, void *);
1361 void *void_arg;
1362 };
1363
1364 static void parse_mime_cb(char *, void *);
1365 static void parse_param_cb(const char *, struct rfc822t *,
1366 int, int, void *);
1367
rfc2045_parse_mime_header(const char * header,void (* header_type_cb)(const char *,void *),void (* header_param_cb)(const char *,const char *,void *),void * void_arg)1368 int rfc2045_parse_mime_header(const char *header,
1369 void (*header_type_cb)(const char *, void *),
1370 void (*header_param_cb)(const char *,
1371 const char *,
1372 void *),
1373 void *void_arg)
1374 {
1375 struct rfc2045_parse_mime_info mi;
1376 struct rfc822t *h=rfc822t_alloc_new(header, NULL, NULL);
1377
1378 mi.header_type_cb=header_type_cb;
1379 mi.header_param_cb=header_param_cb;
1380 mi.void_arg=void_arg;
1381
1382 if (!h)
1383 return -1;
1384
1385 parse_content_header(h, 0, parse_mime_cb, parse_param_cb, &mi);
1386 rfc822t_free(h);
1387 return 0;
1388 }
1389
parse_mime_cb(char * t,void * void_arg)1390 static void parse_mime_cb(char *t, void *void_arg)
1391 {
1392 struct rfc2045_parse_mime_info *mi=
1393 (struct rfc2045_parse_mime_info *)void_arg;
1394
1395 (*mi->header_type_cb)(t, mi->void_arg);
1396 free(t);
1397 }
1398
1399
parse_param_cb(const char * name,struct rfc822t * header,int start,int len,void * void_arg)1400 static void parse_param_cb(const char *name,
1401 struct rfc822t *header, int start,
1402 int len, void *void_arg)
1403 {
1404 struct rfc2045_parse_mime_info *mi=
1405 (struct rfc2045_parse_mime_info *)void_arg;
1406 char *p=paste_tokens(header, start, len);
1407
1408 if (!p)
1409 return;
1410
1411 (*mi->header_param_cb)(name, p, mi->void_arg);
1412 free(p);
1413 }
1414