1 /*
2 ** Copyright 1998 - 2018 Double Precision, Inc.  See COPYING for
3 ** distribution information.
4 */
5 
6 /*
7 */
8 #if    HAVE_CONFIG_H
9 #include       "rfc2045_config.h"
10 #endif
11 #include       <stdlib.h>
12 #include       <stdio.h>
13 #include       <string.h>
14 #if    HAVE_STRINGS_H
15 #include       <strings.h>
16 #endif
17 #include	<ctype.h>
18 #include	"rfc2045.h"
19 #include	"rfc822/rfc822.h"
20 #include	"rfc2045charset.h"
21 
22 static char	*rfc2045_defcharset=0;
23 
24 int rfc2045_in_reformime=0;
25 
26 extern void rfc2045_enomem();
27 
28 #define	MAXLEVELS	20
29 #define	MAXPARTS	300
30 
31 /*
32 	New RFC2045 structure.
33 */
34 
rfc2045_alloc()35 struct rfc2045 *rfc2045_alloc()
36 {
37 struct rfc2045 *p=(struct rfc2045 *)malloc(sizeof(struct rfc2045));
38 
39 	if (!p)
40 	{
41 		rfc2045_enomem();
42 		return (0);
43 	}
44 
45 	/* Initialize everything to nulls, except for one thing */
46 
47 	memset(p, '\0', sizeof(*p));
48 
49 	p->pindex=1;	/* Start with part #1 */
50 	p->workinheader=1;
51 	/* Most of the time, we're about to read a header */
52 
53 	return (p);
54 }
55 
rfc2045_getattr(const struct rfc2045attr * p,const char * name)56 const char *rfc2045_getattr(const struct rfc2045attr *p, const char *name)
57 {
58 	while (p)
59 	{
60 		if (p->name && strcmp(p->name, name) == 0)
61 			return (p->value);
62 		p=p->next;
63 	}
64 	return (0);
65 }
66 
rfc2045_attrset(struct rfc2045attr ** p,const char * name,const char * val)67 int rfc2045_attrset(struct rfc2045attr **p, const char *name, const char *val)
68 {
69 char	*v;
70 
71 	while (*p)
72 	{
73 		if (strcmp( (*p)->name, name) == 0)	break;
74 		p=&(*p)->next;
75 	}
76 	if (val == 0)
77 	{
78 	struct rfc2045attr *q= *p;
79 
80 		if (q)
81 		{
82 			*p=q->next;
83 			if (q->name)	free(q->name);
84 			if (q->value)	free(q->value);
85 			free(q);
86 		}
87 		return 0;
88 	}
89 
90 	v=strdup(val);
91 	if (!v)
92 		return -1;
93 
94 	if (!*p)
95 	{
96 		if (((*p)=(struct rfc2045attr *)malloc(sizeof(**p))) == 0)
97 		{
98 			free(v);
99 			return -1;
100 		}
101 		memset( (*p), 0, sizeof(**p));
102 		if ( ((*p)->name=strdup(name)) == 0)
103 		{
104 			free( *p );
105 			*p=0;
106 			free(v);
107 			return -1;
108 		}
109 	}
110 	if ( (*p)->value )	free ( (*p)->value );
111 	(*p)->value=v;
112 	return 0;
113 }
114 
115 /* static const char cb_name[]="boundary"; */
116 
117 /* #define	ContentBoundary(p)	(rfc2045_getattr( (p)->content_type_attr, cb_name)) */
118 
119 #define	ContentBoundary(p)	( (p)->boundary )
120 
121 /*
122 	Unallocate the RFC2045 structure.  Recursively unallocate
123 	all sub-structures.  Unallocate all associated buffers.
124 */
125 
rfc2045_freeattr(struct rfc2045attr * p)126 static void rfc2045_freeattr(struct rfc2045attr *p)
127 {
128 	while (p)
129 	{
130 	struct rfc2045attr *q=p->next;
131 
132 		if (p->name)	free(p->name);
133 		if (p->value)	free(p->value);
134 		free(p);
135 		p=q;
136 	}
137 }
138 
rfc2045_free(struct rfc2045 * p)139 void rfc2045_free(struct rfc2045 *p)
140 {
141 struct rfc2045 *q, *r;
142 
143 	for (q=p->firstpart; q; )
144 	{
145 		r=q->next;
146 		rfc2045_free(q);
147 		q=r;
148 	}
149 	rfc2045_freeattr(p->content_type_attr);
150 	rfc2045_freeattr(p->content_disposition_attr);
151 
152 	if (p->header)		free(p->header);
153 	if (p->content_md5)	free(p->content_md5);
154 	if (p->content_base)	free(p->content_base);
155 	if (p->content_location)	free(p->content_location);
156 	if (p->content_language)	free(p->content_language);
157 	if (p->content_id)	free(p->content_id);
158 	if (p->content_description)	free(p->content_description);
159 	if (p->content_transfer_encoding) free(p->content_transfer_encoding);
160 	if (p->boundary) free(p->boundary);
161 	if (p->content_type)	free(p->content_type);
162 	if (p->mime_version)	free(p->mime_version);
163 	if (p->workbuf)		free(p->workbuf);
164 	if (p->content_disposition) free(p->content_disposition);
165 	if (p->rw_transfer_encoding) free(p->rw_transfer_encoding);
166 	free(p);
167 }
168 
169 /*
170 	Generic dynamic buffer append.
171 */
172 
rfc2045_add_buf(char ** bufptr,size_t * bufsize,size_t * buflen,const char * p,size_t len)173 void rfc2045_add_buf(
174 	char **bufptr,	/* Buffer */
175 	size_t *bufsize,	/* Buffer's maximum size */
176 	size_t *buflen,		/* Buffer's current size */
177 
178 	const char *p, size_t len)	/* Append this data */
179 {
180 	if (len + *buflen > *bufsize)
181 	{
182 	size_t	newsize=len+*buflen+256;
183 	char	*p= *bufptr ? (char *)realloc(*bufptr, newsize):
184 				(char *)malloc(newsize);
185 
186 		if (!p)
187 		{
188 			rfc2045_enomem();
189 			return;
190 		}
191 		*bufptr=p;
192 		*bufsize=newsize;
193 	}
194 
195 	memcpy(*bufptr + *buflen, p, len);
196 	*buflen += len;
197 }
198 
199 /* Append to the work buffer */
200 
rfc2045_add_workbuf(struct rfc2045 * h,const char * p,size_t len)201 void rfc2045_add_workbuf(struct rfc2045 *h, const char *p, size_t len)
202 {
203 	rfc2045_add_buf( &h->workbuf, &h->workbufsize, &h->workbuflen, p, len);
204 }
205 
206 /* Append one character to the work buffer */
207 
rfc2045_add_workbufch(struct rfc2045 * h,int c)208 void rfc2045_add_workbufch(struct rfc2045 *h, int c)
209 {
210 char cc= (char)c;
211 
212 	rfc2045_add_workbuf(h, &cc, 1);
213 }
214 
215 /*
216 	Generic function to duplicate contents of a string.
217 	The destination string may already be previously allocated,
218 	so unallocate it.
219 */
220 
set_string(char ** p,const char * q)221 static void set_string(char **p,
222 	const char *q)
223 {
224 	if (*p)	free(*p);
225 
226 	*p=0;
227 	if (!q)	return;
228 
229 	if ((*p=(char *)malloc(strlen(q)+1)) == 0)
230 	{
231 		rfc2045_enomem();
232 		return;
233 	}
234 
235 	strcpy(*p, q);
236 }
237 
238 /* Update byte counts for this structure, and all the superstructures */
239 
update_counts(struct rfc2045 * p,size_t newcnt,size_t newendcnt,unsigned nlines)240 static void update_counts(struct rfc2045 *p, size_t newcnt, size_t newendcnt,
241 	unsigned nlines)
242 {
243 	while (p)
244 	{
245 		p->endpos = newcnt;
246 		p->endbody = newendcnt;
247 		p->nlines += nlines;
248 		if (!p->workinheader)
249 			p->nbodylines += nlines;
250 		p=p->parent;
251 	}
252 }
253 
254 /*
255 	Main entry point for RFC2045 parsing.  External data is fed
256 	by repetitively calling rfc2045_parse().
257 
258 	rfc2045_parse() breaks up input into lines, and calls doline()
259 	to process each line.
260 */
261 
262 static void doline(struct rfc2045 *);
263 
264 void rfc2045_parse_partial(struct rfc2045 *h);
265 
rfc2045_parse(struct rfc2045 * h,const char * buf,size_t s)266 void rfc2045_parse(struct rfc2045 *h, const char *buf, size_t s)
267 {
268 	size_t	l;
269 
270 	while (s)
271 	{
272 		for (l=0; l<s; l++)
273 			if (buf[l] == '\n')	break;
274 		if (l < s && buf[l] == '\n')
275 		{
276 			++l;
277 			rfc2045_add_workbuf(h, buf, l);
278 			doline(h);
279 			h->workbuflen=0;
280 		}
281 		else
282 			rfc2045_add_workbuf(h, buf, l);
283 		buf += l;
284 		s -= l;
285 	}
286 
287 	if (h->workbuflen > 1024)
288 		rfc2045_parse_partial(h);
289 }
290 
rfc2045_parse_partial(struct rfc2045 * h)291 void rfc2045_parse_partial(struct rfc2045 *h)
292 {
293 	/*
294 	** Our buffer's getting pretty big.  Let's see if we can
295 	** partially handle it.
296 	*/
297 
298 	if (h->workbuflen > 0)
299 	{
300 	struct	rfc2045 *p;
301 	int	l, i;
302 
303 		for (p=h; p->lastpart && !p->lastpart->workclosed;
304 				p=p->lastpart)
305 			;
306 
307 		/* If p->workinheader, we've got a mother of all headers
308 		** here.  Well, that's just too bad, we'll end up garbling
309 		** it.
310 		*/
311 
312 		l=h->workbuflen;
313 
314 		/* We do need to make sure that the final \r\n gets
315 		** stripped off, so don't gobble up everything if
316 		** the last character we see is a \r
317 		*/
318 
319 		if (h->workbuf[l-1] == '\r')
320 			--l;
321 
322 		/* If we'll be rewriting, make sure rwprep knows about
323 		** stuff that was skipped just now. */
324 
325 		if (h->rfc2045acptr && !p->workinheader &&
326 			(!p->lastpart || !p->lastpart->workclosed))
327 			(*h->rfc2045acptr->section_contents)(h->workbuf, l);
328 
329 		update_counts(p, p->endpos+l, p->endpos+l, 0);
330 		p->informdata=1;
331 		for (i=0; l<h->workbuflen; l++)
332 			h->workbuf[i++]=h->workbuf[l];
333 		h->workbuflen=i;
334 	}
335 }
336 
337 /*
338 	Append a new RFC2045 subpart.  Adds new RFC2045 structure to the
339 	end of the list of existing RFC2045 substructures.
340 */
341 
append_part_noinherit(struct rfc2045 * p,size_t startpos)342 static struct rfc2045 *append_part_noinherit(struct rfc2045 *p, size_t startpos){
343 struct rfc2045 *newp;
344 
345 	newp=rfc2045_alloc();
346 	if (p->lastpart)
347 	{
348 		p->lastpart->next=newp;
349 		newp->pindex=p->lastpart->pindex+1;
350 	}
351 	else
352 	{
353 		p->firstpart=newp;
354 		newp->pindex=0;
355 	}
356 	p->lastpart=newp;
357 	newp->parent=p;
358 
359 	/* Initialize source pointers */
360 	newp->startpos=newp->endpos=newp->startbody=newp->endbody=startpos;
361 
362 	while (p->parent)
363 		p=p->parent;
364 	++p->numparts;
365 
366 	return (newp);
367 }
368 
append_part(struct rfc2045 * p,size_t startpos)369 static struct rfc2045 *append_part(struct rfc2045 *p, size_t startpos)
370 {
371 struct rfc2045 *newp=append_part_noinherit(p, startpos);
372 
373 	/* Substructures inherit content transfer encoding and character set */
374 
375 	set_string(&newp->content_transfer_encoding,
376 			p->content_transfer_encoding);
377 
378 	if (rfc2045_attrset(&newp->content_type_attr, "charset",
379 			    rfc2045_getattr(p->content_type_attr, "charset"))
380 	    < 0)
381 		rfc2045_enomem();
382 
383 	return (newp);
384 }
385 
386 /*
387 	doline() processes next line in the RFC2045 message.
388 
389 	Drills down the list of all the multipart messages currently open,
390 	and checks if the line is a boundary line for the given multipart.
391 	In theory the boundary line, if there is one, should be the boundary
392 	line only for the inner multipart only, but, this takes into account
393 	broken MIME messages.
394 */
395 
396 static void do_header(struct rfc2045 *);
397 
doline(struct rfc2045 * p)398 static void doline(struct rfc2045 *p)
399 {
400 size_t	cnt=p->workbuflen;
401 char *c=p->workbuf;
402 size_t	n=cnt-1;	/* Strip \n (we always get at least a \n here) */
403 struct rfc2045 *newp;
404 struct rfc2045ac *rwp=p->rfc2045acptr;
405 unsigned num_levels=0;
406 
407 size_t	k;
408 int	bit8=0;
409 
410 	if (p->numparts > MAXPARTS)
411 	{
412 		p->rfcviolation |= RFC2045_ERR2COMPLEX;
413 		return;
414 	}
415 
416 	for (k=0; k<cnt; k++)
417 	{
418 		if (c[k] == 0)
419 			c[k]=' ';
420 		if (c[k] & 0x80)	bit8=1;
421 	}
422 
423 	if (n && c[n-1] == '\r')	/* Strip trailing \r */
424 		--n;
425 
426 	/* Before the main drill down loop before, look ahead and see if we're
427 	** in a middle of a form-data section.  */
428 
429 	for (newp=p; newp->lastpart &&
430 			!newp->lastpart->workclosed; newp=newp->lastpart,
431 			++num_levels)
432 	{
433 		if (ContentBoundary(newp) == 0 || newp->workinheader)
434 			continue;
435 
436 		if (newp->lastpart->informdata)
437 		{
438 			p=newp->lastpart;
439 			p->informdata=0;
440 			break;
441 		}
442 	}
443 
444 	/* Drill down until we match a boundary, or until we've reached
445 	the last RFC2045 section that has been opened.
446 	*/
447 
448 	while (p->lastpart)
449 	{
450 	size_t l;
451 	const char *cb;
452 
453 		if (p->lastpart->workclosed)
454 		{
455 			update_counts(p, p->endpos+cnt, p->endpos+n, 1);
456 			return;
457 		}
458 		/* Leftover trash -- workclosed is set when the final
459 		** terminating boundary has been seen */
460 
461 		/* content_boundary may be set before the entire header
462 		** has been seen, so continue drilling down in that case
463 		*/
464 
465 		cb=ContentBoundary(p);
466 
467 		if (cb == 0 || p->workinheader)
468 		{
469 			p=p->lastpart;
470 			++num_levels;
471 			continue;
472 		}
473 
474 		l=strlen(cb);
475 
476 		if (c[0] == '-' && c[1] == '-' && n >= 2+l &&
477 			strncasecmp(cb, c+2, l) == 0)
478 		{
479 
480 			if (rwp && (!p->lastpart || !p->lastpart->isdummy))
481 				(*rwp->end_section)();
482 
483 		/* Ok, we've found a boundary */
484 
485 			if (n >= 4+l && strncmp(c+2+l, "--", 2) == 0)
486 			{
487 			/* Last boundary */
488 
489 				p->lastpart->workclosed=1;
490 				update_counts(p, p->endpos+cnt, p->endpos+cnt,
491 					1);
492 				return;
493 			}
494 
495 		/* Create new RFC2045 section */
496 
497 			newp=append_part(p, p->endpos+cnt);
498 			update_counts(p, p->endpos+cnt, p->endpos+n, 1);
499 
500 			/* The new RFC2045 section is MIME compliant */
501 
502 			if ((newp->mime_version=strdup(p->mime_version)) == 0)
503 				rfc2045_enomem();
504 			return;
505 		}
506 		p=p->lastpart;
507 		++num_levels;
508 	}
509 
510 	/* Ok, we've found the RFC2045 section that we're working with.
511 	** No what?
512 	*/
513 
514 	if (! p->workinheader)
515 	{
516 		/* Processing body, just update the counts. */
517 
518 	size_t cnt_update=cnt;
519 
520 		if (bit8 && !p->content_8bit &&
521 			(p->rfcviolation & RFC2045_ERR8BITCONTENT) == 0)
522 		{
523 		struct rfc2045 *q;
524 
525 			for (q=p; q; q=q->parent)
526 				q->rfcviolation |= RFC2045_ERR8BITCONTENT;
527 		}
528 
529 		/*
530 		** In multiparts, the final newline in a part belongs to the
531 		** boundary, otherwise, include it in the text.
532 		*/
533 		if (p->parent && p->parent->content_type &&
534 				strncasecmp(p->parent->content_type,
535 						"multipart/", 10) == 0)
536 			cnt_update=n;
537 
538 		if (!p->lastpart || !p->lastpart->workclosed)
539 		{
540 			if (rwp && !p->isdummy)
541 				(*rwp->section_contents)(c, cnt);
542 
543 			update_counts(p, p->endpos+cnt, p->endpos+cnt_update,
544 				1);
545 		}
546 		return;
547 	}
548 
549 	if (bit8 && (p->rfcviolation & RFC2045_ERR8BITHEADER) == 0)
550 	{
551 	struct rfc2045 *q;
552 
553 		for (q=p; q; q=q->parent)
554 			q->rfcviolation |= RFC2045_ERR8BITHEADER;
555 	}
556 
557 	/* In the header */
558 
559 	if ( n == 0 )	/* End of header, body begins.  Parse header. */
560 	{
561 		do_header(p);	/* Clean up any left over header line */
562 		p->workinheader=0;
563 
564 		/* Message body starts right here */
565 
566 		p->startbody=p->endpos+cnt;
567 		update_counts(p, p->startbody, p->startbody, 1);
568 		--p->nbodylines;	/* Don't count the blank line */
569 
570 		/* Discard content type and boundary if I don't understand
571 		** this MIME flavor.
572 		*/
573 
574 		if (!RFC2045_ISMIME1(p->mime_version))
575 		{
576 			set_string(&p->content_type, 0);
577 
578 			rfc2045_freeattr(p->content_type_attr);
579 			p->content_type_attr=0;
580 			set_string(&p->content_disposition, 0);
581 			rfc2045_freeattr(p->content_disposition_attr);
582 			p->content_disposition_attr=0;
583 			if (p->boundary)
584 			{
585 				free(p->boundary);
586 				p->boundary=0;
587 			}
588 		}
589 
590 		/* Normally, if we don't have a content_type, default it
591 		** to text/plain.  However, if the multipart type is
592 		** multipart/digest, it is message/rfc822.
593 		*/
594 
595 		if (RFC2045_ISMIME1(p->mime_version) && !p->content_type)
596 		{
597 		char	*q="text/plain";
598 
599 			if (p->parent && p->parent->content_type &&
600 				strcmp(p->parent->content_type,
601 					"multipart/digest") == 0)
602 				q="message/rfc822";
603 			set_string(&p->content_type, q);
604 		}
605 
606 		/* If this is not a multipart section, we don't want to
607 		** hear about any boundaries
608 		*/
609 
610 		if (!p->content_type ||
611 			strncmp(p->content_type, "multipart/", 10))
612 		{
613 			if (p->boundary)
614 				free(p->boundary);
615 			p->boundary=0;
616 		}
617 
618 		/* If this section's a message, we will expect to see
619 		** more RFC2045 stuff, so create a nested RFC2045 structure,
620 		** and indicate that we expect to see headers.
621 		*/
622 
623 		if (p->content_type &&
624 		    rfc2045_message_content_type(p->content_type))
625 		{
626 			newp=append_part_noinherit(p, p->startbody);
627 			newp->workinheader=1;
628 			return;
629 		}
630 
631 		/*
632 		** If this is a multipart message (boundary defined),
633 		** create a RFC2045 structure for the pseudo-section
634 		** that precedes the first boundary line.
635 		*/
636 
637 		if (ContentBoundary(p))
638 		{
639 			newp=append_part(p, p->startbody);
640 			newp->workinheader=0;
641 			newp->isdummy=1;
642 				/* It's easier just to create it. */
643 			return;
644 		}
645 
646 		if (rwp)
647 			(*rwp->start_section)(p);
648 		return;
649 	}
650 
651 	/* RFC822 header continues */
652 
653 	update_counts(p, p->endpos + cnt, p->endpos+n, 1);
654 
655 	/*
656 	** Until we see an official start of message body, the body starts
657 	** right after what we just read.
658 	*/
659 	p->startbody=p->endbody;
660 
661 	/* If this header line starts with a space, append one space
662 	** to the saved contents of the previous line, and append this
663 	** line to it.
664 	*/
665 
666 	if (isspace((int)(unsigned char)*c))
667 	{
668 		rfc2045_add_buf(&p->header, &p->headersize, &p->headerlen, " ", 1);
669 	}
670 	else
671 	{
672 	/* Otherwise the previous header line is complete, so process it */
673 
674 		do_header(p);
675 		p->headerlen=0;
676 	}
677 
678 	/* Save this line in the header buffer, because the next line
679 	** could be a continuation.
680 	*/
681 
682 	rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, c, n);
683 }
684 
685 /***********************************************************************/
686 
687 /*
688 ** paste_tokens() - recombine an array of RFC822 tokens back as a string.
689 ** (Comments) are ignored.
690 */
691 
paste_tokens(struct rfc822t * h,int start,int cnt)692 static char *paste_tokens(struct rfc822t *h, int start, int cnt)
693 {
694 int	l;
695 int	i;
696 char	*p;
697 
698 	/* Calculate string size */
699 
700 	l=1;
701 	for (i=0; i<cnt; i++)
702 	{
703 		if (h->tokens[start+i].token == '(')
704 			continue;
705 
706 		if (rfc822_is_atom(h->tokens[start+i].token))
707 			l += h->tokens[start+i].len;
708 		else
709 			l++;
710 	}
711 
712 	/* Do it */
713 
714 	p=( char *)malloc(l);
715 	if (!p)
716 	{
717 		rfc2045_enomem();
718 		return (0);
719 	}
720 	l=0;
721 
722 	for (i=0; i<cnt; i++)
723 	{
724 		if (h->tokens[start+i].token == '(')
725 			continue;
726 
727 		if (rfc822_is_atom(h->tokens[start+i].token))
728 		{
729 		int l2=h->tokens[start+i].len;
730 
731 			memcpy(p+l, h->tokens[start+i].ptr, l2);
732 			l += l2;
733 		}
734 		else	p[l++]=h->tokens[start+i].token;
735 	}
736 	p[l]=0;
737 	return (p);
738 }
739 
740 /*
741 ** Whether this MIME content type is a nested MIME message.
742 */
743 
rfc2045_message_content_type(const char * content_type)744 int rfc2045_message_content_type(const char *content_type)
745 {
746 	return strcasecmp(content_type, RFC2045_MIME_MESSAGE_RFC822) == 0 ||
747 		strcasecmp(content_type, RFC2045_MIME_MESSAGE_GLOBAL) == 0;
748 }
749 
750 /*
751 ** Whether this MIME content type is a delivery status notification.
752 */
753 
rfc2045_delivery_status_content_type(const char * content_type)754 int rfc2045_delivery_status_content_type(const char *content_type)
755 {
756 	return strcasecmp(content_type,
757 		      RFC2045_MIME_MESSAGE_DELIVERY_STATUS) == 0 ||
758 		strcasecmp(content_type,
759 		       RFC2045_MIME_MESSAGE_GLOBAL_DELIVERY_STATUS) == 0;
760 }
761 
rfc2045_message_headers_content_type(const char * content_type)762 int rfc2045_message_headers_content_type(const char *content_type)
763 {
764 	return strcasecmp(content_type,
765 			  RFC2045_MIME_MESSAGE_HEADERS) == 0 ||
766 		strcasecmp(content_type,
767 			   RFC2045_MIME_MESSAGE_GLOBAL_HEADERS) == 0;
768 }
769 
770 /* Various permutations of the above, including forcing the string to
771 ** lowercase
772 */
773 
lower_paste_tokens(struct rfc822t * h,int start,int cnt)774 static char *lower_paste_tokens(struct rfc822t *h, int start, int cnt)
775 {
776 char	*p=paste_tokens(h, start, cnt);
777 char	*q;
778 
779 	for (q=p; q && *q; q++)
780 		*q=tolower(*q);
781 	return (p);
782 }
783 
paste_token(struct rfc822t * h,int i)784 static char *paste_token(struct rfc822t *h, int i)
785 {
786 	if (i >= h->ntokens)	return (0);
787 	return (paste_tokens(h, i, 1));
788 }
789 
lower_paste_token(struct rfc822t * h,int i)790 static char *lower_paste_token(struct rfc822t *h, int i)
791 {
792 char *p=paste_token(h, i);
793 char *q;
794 
795 	for (q=p; q && *q; q++)
796 		*q=tolower(*q);
797 	return (p);
798 }
799 
800 /*
801 	do_header() - process completed RFC822 header.
802 */
803 
804 static void mime_version(struct rfc2045 *, struct rfc822t *);
805 static void content_type(struct rfc2045 *, struct rfc822t *);
806 static void content_transfer_encoding(struct rfc2045 *, struct rfc822t *);
807 static void content_disposition(struct rfc2045 *, struct rfc822t *);
808 static void content_id(struct rfc2045 *, struct rfc822t *);
809 static void content_description(struct rfc2045 *, const char *);
810 static void content_language(struct rfc2045 *, const char *);
811 static void content_md5(struct rfc2045 *, const char *);
812 static void content_base(struct rfc2045 *, struct rfc822t *);
813 static void content_location(struct rfc2045 *, struct rfc822t *);
814 
do_header(struct rfc2045 * p)815 static void do_header(struct rfc2045 *p)
816 {
817 struct rfc822t *header;
818 char	*t;
819 
820 	if (p->headerlen == 0)	return;
821 	rfc2045_add_buf( &p->header, &p->headersize, &p->headerlen, "", 1);
822 				/* 0 terminate */
823 
824 	/* Parse the header line according to RFC822 */
825 
826 	header=rfc822t_alloc_new(p->header, NULL, NULL);
827 
828 	if (!header)	return;	/* Broken header */
829 
830 	if (header->ntokens < 2 ||
831 		header->tokens[0].token ||
832 		header->tokens[1].token != ':')
833 	{
834 		rfc822t_free(header);
835 		return;	/* Broken header */
836 	}
837 
838 	t=lower_paste_token(header, 0);
839 
840 	if (t == 0)
841 		;
842 	else if (strcmp(t, "mime-version") == 0)
843 	{
844 		free(t);
845 		mime_version(p, header);
846 	}
847 	else if (strcmp(t, "content-type") == 0)
848 	{
849 		free(t);
850 		content_type(p, header);
851 	} else if (strcmp(t, "content-transfer-encoding") == 0)
852 	{
853 		free(t);
854 		content_transfer_encoding(p, header);
855 	} else if (strcmp(t, "content-disposition") == 0)
856 	{
857 		free(t);
858 		content_disposition(p, header);
859 	} else if (strcmp(t, "content-id") == 0)
860 	{
861 		free(t);
862 		content_id(p, header);
863 	} else if (strcmp(t, "content-description") == 0)
864 	{
865 		free(t);
866 		t=strchr(p->header, ':');
867 		if (t)	++t;
868 		while (t && isspace((int)(unsigned char)*t))
869 			++t;
870 		content_description(p, t);
871 	} else if (strcmp(t, "content-language") == 0)
872 	{
873 		free(t);
874 		t=strchr(p->header, ':');
875 		if (t)	++t;
876 		while (t && isspace((int)(unsigned char)*t))
877 			++t;
878 		content_language(p, t);
879 	} else if (strcmp(t, "content-base") == 0)
880 	{
881 		free(t);
882 		content_base(p, header);
883 	} else if (strcmp(t, "content-location") == 0)
884 	{
885 		free(t);
886 		content_location(p, header);
887 	} else if (strcmp(t, "content-md5") == 0)
888 	{
889 		free(t);
890 		t=strchr(p->header, ':');
891 		if (t)	++t;
892 		while (t && isspace((int)(unsigned char)*t))
893 			++t;
894 		content_md5(p, t);
895 	}
896 	else	free(t);
897 	rfc822t_free(header);
898 }
899 
900 /* Mime-Version: and Content-Transfer-Encoding: headers are easy */
901 
mime_version(struct rfc2045 * p,struct rfc822t * header)902 static void mime_version(struct rfc2045 *p, struct rfc822t *header)
903 {
904 char	*vers=paste_tokens(header, 2, header->ntokens-2);
905 
906 	if (!vers)	return;
907 
908 	if (p->mime_version)	free(p->mime_version);
909 	p->mime_version=vers;
910 }
911 
content_transfer_encoding(struct rfc2045 * r,struct rfc822t * header)912 static void content_transfer_encoding(struct rfc2045 *r,
913 				struct rfc822t *header)
914 {
915 char	*p;
916 
917 	p=lower_paste_tokens(header, 2, header->ntokens-2);
918 	if (!p)	return;
919 
920 	if (r->content_transfer_encoding)
921 		free(r->content_transfer_encoding);
922 	r->content_transfer_encoding=p;
923 
924 	if (strcmp(p, "8bit") == 0)
925 		r->content_8bit=1;
926 }
927 
928 /* Dig into the content_type header */
929 
parse_content_header(struct rfc822t * header,int init_start,void (* init_token)(char *,void *),void (* init_parameter)(const char *,struct rfc822t *,int,int,void *),void * void_arg)930 static void parse_content_header(struct rfc822t *header,
931 				 int init_start,
932 				 void (*init_token)(char *, void *),
933 				 void (*init_parameter)(const char *,
934 							struct rfc822t *,
935 							int, int,
936 							void *),
937 				 void *void_arg)
938 {
939 int	start;
940 int	i, j;
941 char	*p;
942 
943 	/* Look for the 1st ; */
944 
945 	for (start=init_start; start < header->ntokens; start++)
946 		if (header->tokens[start].token == ';')
947 			break;
948 
949 	/* Everything up to the 1st ; is the content type */
950 
951 	p=lower_paste_tokens(header, init_start, start-init_start);
952 	if (!p)	return;
953 
954 	(*init_token)(p, void_arg);
955 	if (start < header->ntokens) start++;
956 
957 	/* Handle the remainder of the Content-Type: header */
958 
959 	while (start < header->ntokens)
960 	{
961 		/* Look for next ; */
962 
963 		for (i=start; i<header->ntokens; i++)
964 			if (header->tokens[i].token == ';')
965 				break;
966 		j=start;
967 		if (j < i)
968 		{
969 			++j;
970 
971 			/* We only understand <atom>= */
972 
973 			while (j < i && header->tokens[j].token == '(')
974 				++j;
975 			if (j < i && header->tokens[j].token == '=')
976 			{
977 				++j;
978 
979 				/*
980 				** reformime: loose parsing due to loose
981 				** parsing in MSOE, leading to viruses slipping
982 				** through virus scanners if we strictly
983 				** parsed the content-type header.
984 				*/
985 				if (rfc2045_in_reformime && j < i
986 				    && header->tokens[j].token == '"')
987 					i=j+1;
988 
989 				p=lower_paste_token(header, start);
990 				if (!p)	return;
991 				(*init_parameter)(p, header, j, i-j, void_arg);
992 				free(p);
993 			}
994 		}
995 		if ( i<header->ntokens ) ++i;	/* Skip over ; */
996 		start=i;
997 	}
998 }
999 
1000 /* Dig into the content_type header */
1001 
1002 static void save_content_type(char *, void *);
1003 static void save_content_type_parameter( const char *,
1004 					 struct rfc822t *, int, int, void *);
1005 
content_type(struct rfc2045 * r,struct rfc822t * header)1006 static void content_type(struct rfc2045 *r, struct rfc822t *header)
1007 {
1008 	parse_content_header(header, 2, &save_content_type,
1009 			     &save_content_type_parameter, r);
1010 }
1011 
save_content_type(char * content_type,void * void_arg)1012 static void save_content_type(char *content_type, void *void_arg)
1013 {
1014 	struct rfc2045 *r=(struct rfc2045 *)void_arg;
1015 
1016 	if (r->content_type)	free(r->content_type);
1017 	r->content_type=content_type;
1018 }
1019 
save_content_type_parameter(const char * name,struct rfc822t * header,int start,int len,void * void_arg)1020 static void save_content_type_parameter(const char *name,
1021 					struct rfc822t *header, int start,
1022 					int len, void *void_arg)
1023 {
1024 	struct rfc2045 *r=(struct rfc2045 *)void_arg;
1025 	char	*p;
1026 
1027 	p=strcmp(name, "charset") == 0 ?
1028 			lower_paste_tokens(header, start, len):
1029 			paste_tokens(header, start, len);
1030 	if (!p)	return;
1031 
1032 	if (rfc2045_attrset(&r->content_type_attr, name, p) < 0)
1033 	{
1034 		free(p);
1035 		rfc2045_enomem();
1036 		return;
1037 	}
1038 
1039 	free(p);
1040 
1041 	if (strcmp(name, "boundary") == 0)
1042 	{
1043 		struct rfc2045 *q;
1044 
1045 		if (r->boundary)
1046 			free(r->boundary);
1047 		p=lower_paste_tokens(header, start, len);
1048 		r->boundary=p;
1049 
1050 		/*
1051 		** Check all the outer MIME boundaries.  If this is a
1052 		** substring of an outer MIME boundary, or the outer
1053 		** boundary is a substring of the inner boundary, we
1054 		** have an ambiguity - see "IMPLEMENTOR'S NOTE" in
1055 		** section 5.1.1 of RFC 2046.
1056 		*/
1057 
1058 		for (q=r->parent; q; q=q->parent)
1059 		{
1060 			const char *a, *b;
1061 
1062 			if (!q->boundary)
1063 				continue;
1064 
1065 			for (a=q->boundary, b=p; *a && *b; a++, b++)
1066 				if (*a != *b)
1067 					break;
1068 
1069 			if (!*a || !*b)
1070 			{
1071 				while (q->parent)
1072 					q=q->parent;
1073 				q->rfcviolation |= RFC2045_ERRBADBOUNDARY;
1074 				break;
1075 			}
1076 		}
1077 	}
1078 }
1079 
1080 /* Dig into content-disposition */
1081 
1082 static void save_content_disposition(char *, void *);
1083 static void save_content_disposition_parameter( const char *,
1084 						struct rfc822t *, int, int,
1085 						void *);
1086 
content_disposition(struct rfc2045 * r,struct rfc822t * header)1087 static void content_disposition(struct rfc2045 *r, struct rfc822t *header)
1088 {
1089 	parse_content_header(header, 2, &save_content_disposition,
1090 			     &save_content_disposition_parameter, r);
1091 }
1092 
save_content_disposition(char * content_disposition,void * void_arg)1093 static void save_content_disposition(char *content_disposition, void *void_arg)
1094 {
1095 	struct rfc2045 *r=(struct rfc2045 *)void_arg;
1096 
1097 	if (r->content_disposition)	free(r->content_disposition);
1098 	r->content_disposition=content_disposition;
1099 }
1100 
save_content_disposition_parameter(const char * name,struct rfc822t * header,int start,int len,void * void_arg)1101 static void save_content_disposition_parameter(const char *name,
1102 					       struct rfc822t *header,
1103 					       int start, int len,
1104 					       void *void_arg)
1105 {
1106 	struct rfc2045 *r=(struct rfc2045 *)void_arg;
1107 	char	*p;
1108 
1109 	p=paste_tokens(header, start, len);
1110 	if (!p)	return;
1111 
1112 	if (rfc2045_attrset(&r->content_disposition_attr, name, p) < 0)
1113 	{
1114 		free(p);
1115 		rfc2045_enomem();
1116 		return;
1117 	}
1118 	free(p);
1119 }
1120 
rfc2045_related_start(const struct rfc2045 * p)1121 char *rfc2045_related_start(const struct rfc2045 *p)
1122 {
1123 const char *cb=rfc2045_getattr( p->content_type_attr, "start");
1124 struct	rfc822t *t;
1125 struct	rfc822a	*a;
1126 int	i;
1127 
1128 	if (!cb || !*cb)	return (0);
1129 
1130 	t=rfc822t_alloc_new(cb, 0, NULL);
1131 	if (!t)
1132 	{
1133 		rfc2045_enomem();
1134 		return(0);
1135 	}
1136 
1137 	a=rfc822a_alloc(t);
1138 	if (!a)
1139 	{
1140 		rfc822t_free(t);
1141 		rfc2045_enomem();
1142 		return (0);
1143 	}
1144 	for (i=0; i<a->naddrs; i++)
1145 		if (a->addrs[i].tokens)
1146 		{
1147 		char	*s=rfc822_getaddr(a, i);
1148 
1149 			rfc822a_free(a);
1150 			rfc822t_free(t);
1151 			if (!s)
1152 				rfc2045_enomem();
1153 			return (s);
1154 		}
1155 
1156 	rfc822a_free(a);
1157 	rfc822t_free(t);
1158 	return (0);
1159 }
1160 
content_id(struct rfc2045 * p,struct rfc822t * t)1161 static void content_id(struct rfc2045 *p, struct rfc822t *t)
1162 {
1163 struct	rfc822a	*a=rfc822a_alloc(t);
1164 int	i;
1165 
1166 	if (!a)
1167 	{
1168 		rfc2045_enomem();
1169 		return;
1170 	}
1171 
1172 	for (i=0; i<a->naddrs; i++)
1173 		if (a->addrs[i].tokens)
1174 		{
1175 		char	*s=rfc822_getaddr(a, i);
1176 
1177 			if (!s)
1178 			{
1179 				rfc822a_free(a);
1180 				rfc2045_enomem();
1181 				return;
1182 			}
1183 			if (p->content_id)
1184 				free(p->content_id);
1185 			p->content_id=s;
1186 			break;
1187 		}
1188 
1189 	rfc822a_free(a);
1190 }
1191 
content_description(struct rfc2045 * p,const char * s)1192 static void content_description(struct rfc2045 *p, const char *s)
1193 {
1194 	if (s && *s)
1195 		set_string(&p->content_description, s);
1196 }
1197 
content_language(struct rfc2045 * p,const char * s)1198 static void content_language(struct rfc2045 *p, const char *s)
1199 {
1200 	if (s && *s)
1201 		set_string(&p->content_language, s);
1202 }
1203 
content_md5(struct rfc2045 * p,const char * s)1204 static void content_md5(struct rfc2045 *p, const char *s)
1205 {
1206 	if (s && *s)
1207 		set_string(&p->content_md5, s);
1208 }
1209 
content_base(struct rfc2045 * p,struct rfc822t * t)1210 static void content_base(struct rfc2045 *p, struct rfc822t *t)
1211 {
1212 char	*s;
1213 int	i;
1214 
1215 	for (i=0; i<t->ntokens; i++)
1216 		if (t->tokens[i].token == '"')
1217 			t->tokens[i].token=0;
1218 
1219 	s=paste_tokens(t, 2, t->ntokens-2);
1220 	set_string(&p->content_base, s);
1221 }
1222 
content_location(struct rfc2045 * p,struct rfc822t * t)1223 static void content_location(struct rfc2045 *p, struct rfc822t *t)
1224 {
1225 char	*s;
1226 int	i;
1227 
1228 	for (i=0; i<t->ntokens; i++)
1229 		if (t->tokens[i].token == '"')
1230 			t->tokens[i].token=0;
1231 
1232 	s=paste_tokens(t, 2, t->ntokens-2);
1233 	set_string(&p->content_location, s);
1234 	free(s);
1235 }
1236 
1237 /* -------------------- */
1238 
1239 #define	GETINFO(s, def) ( (s) && (*s) ? (s):def)
1240 
rfc2045_mimeinfo(const struct rfc2045 * p,const char ** content_type_s,const char ** content_transfer_encoding_s,const char ** charset_s)1241 void rfc2045_mimeinfo(const struct rfc2045 *p,
1242 	const char **content_type_s,
1243 	const char **content_transfer_encoding_s,
1244 	const char **charset_s)
1245 {
1246 const char *c;
1247 
1248 	*content_type_s=GETINFO(p->content_type, "text/plain");
1249 	*content_transfer_encoding_s=GETINFO(p->content_transfer_encoding,
1250 						"8bit");
1251 
1252 	c=rfc2045_getattr(p->content_type_attr, "charset");
1253 	if (!c)	c=rfc2045_getdefaultcharset();
1254 
1255 	*charset_s=c;
1256 }
1257 
rfc2045_getdefaultcharset()1258 const char *rfc2045_getdefaultcharset()
1259 {
1260 const char *p=rfc2045_defcharset;
1261 
1262 	if (!p)	p=RFC2045CHARSET;
1263 	return (p);
1264 }
1265 
rfc2045_setdefaultcharset(const char * charset)1266 void rfc2045_setdefaultcharset(const char *charset)
1267 {
1268 char	*p=strdup(charset);
1269 
1270 	if (!p)
1271 	{
1272 		rfc2045_enomem();
1273 		return;
1274 	}
1275 
1276 	if (rfc2045_defcharset)	free(rfc2045_defcharset);
1277 	rfc2045_defcharset=p;
1278 }
1279 
rfc2045_boundary(const struct rfc2045 * p)1280 const char *rfc2045_boundary(const struct rfc2045 *p)
1281 {
1282 const char *cb=rfc2045_getattr( p->content_type_attr, "boundary");
1283 
1284 	if (!cb)	cb="";
1285 	return (cb);
1286 }
1287 
rfc2045_isflowed(const struct rfc2045 * p)1288 int rfc2045_isflowed(const struct rfc2045 *p)
1289 {
1290 	const char *cb=rfc2045_getattr(p->content_type_attr, "format");
1291 
1292 	return (cb && strcmp(cb, "flowed") == 0);
1293 }
1294 
rfc2045_isdelsp(const struct rfc2045 * p)1295 int rfc2045_isdelsp(const struct rfc2045 *p)
1296 {
1297 	const char *cb=rfc2045_getattr(p->content_type_attr, "delsp");
1298 
1299 	return (cb && strcmp(cb, "yes") == 0);
1300 }
1301 
rfc2045_content_id(const struct rfc2045 * p)1302 const char *rfc2045_content_id(const struct rfc2045 *p)
1303 {
1304 	return (p->content_id ? p->content_id:"");
1305 }
1306 
rfc2045_content_description(const struct rfc2045 * p)1307 const char *rfc2045_content_description(const struct rfc2045 *p)
1308 {
1309 	return (p->content_description ? p->content_description:"");
1310 }
1311 
rfc2045_content_language(const struct rfc2045 * p)1312 const char *rfc2045_content_language(const struct rfc2045 *p)
1313 {
1314 	return (p->content_language ? p->content_language:"");
1315 }
1316 
rfc2045_content_md5(const struct rfc2045 * p)1317 const char *rfc2045_content_md5(const struct rfc2045 *p)
1318 {
1319 	return (p->content_md5 ? p->content_md5:"");
1320 }
1321 
rfc2045_mimepos(const struct rfc2045 * p,off_t * start_pos,off_t * end_pos,off_t * start_body,off_t * nlines,off_t * nbodylines)1322 void rfc2045_mimepos(const struct rfc2045 *p,
1323 	off_t *start_pos, off_t *end_pos, off_t *start_body,
1324 	off_t *nlines, off_t *nbodylines)
1325 {
1326 	*start_pos=p->startpos;
1327 	*end_pos=p->endpos;
1328 
1329 	*nlines=p->nlines;
1330 	*nbodylines=p->nbodylines;
1331 	if (p->parent)	/* MIME parts do not have the trailing CRLF */
1332 	{
1333 		*end_pos=p->endbody;
1334 		if (*nlines)	--*nlines;
1335 		if (*nbodylines) --*nbodylines;
1336 	}
1337 	*start_body=p->startbody;
1338 
1339 	if (*start_body == *start_pos)	/* No header */
1340 	{
1341 		*start_body= *end_pos;
1342 	}
1343 }
1344 
rfc2045_mimepartcount(const struct rfc2045 * p)1345 unsigned rfc2045_mimepartcount(const struct rfc2045 *p)
1346 {
1347 const struct rfc2045 *q;
1348 unsigned n=0;
1349 
1350 	for (q=p->firstpart; q; q=q->next)	++n;
1351 	return (n);
1352 }
1353 
1354 /*
1355 ** Generic interface into parse_content_header
1356 */
1357 
1358 struct rfc2045_parse_mime_info {
1359 	void (*header_type_cb)(const char *, void *);
1360 	void (*header_param_cb)(const char *, const char *, void *);
1361 	void *void_arg;
1362 };
1363 
1364 static void parse_mime_cb(char *, void *);
1365 static void parse_param_cb(const char *, struct rfc822t *,
1366 			   int, int, void *);
1367 
rfc2045_parse_mime_header(const char * header,void (* header_type_cb)(const char *,void *),void (* header_param_cb)(const char *,const char *,void *),void * void_arg)1368 int rfc2045_parse_mime_header(const char *header,
1369 			      void (*header_type_cb)(const char *, void *),
1370 			      void (*header_param_cb)(const char *,
1371 						      const char *,
1372 						      void *),
1373 			      void *void_arg)
1374 {
1375 	struct rfc2045_parse_mime_info mi;
1376 	struct rfc822t *h=rfc822t_alloc_new(header, NULL, NULL);
1377 
1378 	mi.header_type_cb=header_type_cb;
1379 	mi.header_param_cb=header_param_cb;
1380 	mi.void_arg=void_arg;
1381 
1382 	if (!h)
1383 		return -1;
1384 
1385 	parse_content_header(h, 0, parse_mime_cb, parse_param_cb, &mi);
1386 	rfc822t_free(h);
1387 	return 0;
1388 }
1389 
parse_mime_cb(char * t,void * void_arg)1390 static void parse_mime_cb(char *t, void *void_arg)
1391 {
1392 	struct rfc2045_parse_mime_info *mi=
1393 		(struct rfc2045_parse_mime_info *)void_arg;
1394 
1395 	(*mi->header_type_cb)(t, mi->void_arg);
1396 	free(t);
1397 }
1398 
1399 
parse_param_cb(const char * name,struct rfc822t * header,int start,int len,void * void_arg)1400 static void parse_param_cb(const char *name,
1401 			   struct rfc822t *header, int start,
1402 			   int len, void *void_arg)
1403 {
1404 	struct rfc2045_parse_mime_info *mi=
1405 		(struct rfc2045_parse_mime_info *)void_arg;
1406 	char *p=paste_tokens(header, start, len);
1407 
1408 	if (!p)
1409 		return;
1410 
1411 	(*mi->header_param_cb)(name, p, mi->void_arg);
1412 	free(p);
1413 }
1414