1 /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "lib.h"
4 #include "array.h"
5 #include "str.h"
6 #include "istream.h"
7 #include "rfc822-parser.h"
8 #include "rfc2231-parser.h"
9 #include "message-parser-private.h"
10 
11 message_part_header_callback_t *null_message_part_header_callback = NULL;
12 
13 static int parse_next_header_init(struct message_parser_ctx *ctx,
14 				  struct message_block *block_r);
15 static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
16 				       struct message_block *block_r);
17 static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
18 				  struct message_block *block_r);
19 
20 static struct message_boundary *
boundary_find(struct message_boundary * boundaries,const unsigned char * data,size_t len,bool trailing_dashes)21 boundary_find(struct message_boundary *boundaries,
22 	      const unsigned char *data, size_t len, bool trailing_dashes)
23 {
24 	struct message_boundary *best = NULL;
25 
26 	/* As MIME spec says: search from latest one to oldest one so that we
27 	   don't break if the same boundary is used in nested parts. Also the
28 	   full message line doesn't have to match the boundary, only the
29 	   beginning. However, if there are multiple prefixes whose beginning
30 	   matches, use the longest matching one. */
31 	while (boundaries != NULL) {
32 		if (boundaries->len <= len &&
33 		    memcmp(boundaries->boundary, data, boundaries->len) == 0 &&
34 		    (best == NULL || best->len < boundaries->len)) {
35 			best = boundaries;
36 			/* If we see "foo--", it could either mean that there
37 			   is a boundary named "foo" that ends now or there's
38 			   a boundary "foo--" which continues. */
39 			if (best->len == len ||
40 			    (best->len == len-2 && trailing_dashes)) {
41 				/* This is exactly the wanted boundary. There
42 				   can't be a better one. */
43 				break;
44 			}
45 		}
46 
47 		boundaries = boundaries->next;
48 	}
49 
50 	return best;
51 }
52 
parse_body_add_block(struct message_parser_ctx * ctx,struct message_block * block)53 static void parse_body_add_block(struct message_parser_ctx *ctx,
54 				 struct message_block *block)
55 {
56 	unsigned int missing_cr_count = 0;
57 	const unsigned char *cur, *next, *data = block->data;
58 
59 	i_assert(block->size > 0);
60 
61 	block->hdr = NULL;
62 
63 	/* check if we have NULs */
64 	if (memchr(data, '\0', block->size) != NULL)
65 		ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
66 
67 	/* count number of lines and missing CRs */
68 	if (*data == '\n') {
69 		ctx->part->body_size.lines++;
70 		if (ctx->last_chr != '\r')
71 			missing_cr_count++;
72 	}
73 
74 	cur = data + 1;
75 	while ((next = memchr(cur, '\n', block->size - (cur - data))) != NULL) {
76 		ctx->part->body_size.lines++;
77 		if (next[-1] != '\r')
78 			missing_cr_count++;
79 
80 		cur = next + 1;
81 	}
82 	ctx->last_chr = data[block->size - 1];
83 	ctx->skip += block->size;
84 
85 	ctx->part->body_size.physical_size += block->size;
86 	ctx->part->body_size.virtual_size += block->size + missing_cr_count;
87 }
88 
message_parser_read_more(struct message_parser_ctx * ctx,struct message_block * block_r,bool * full_r)89 int message_parser_read_more(struct message_parser_ctx *ctx,
90 			     struct message_block *block_r, bool *full_r)
91 {
92 	int ret;
93 
94 	if (ctx->skip > 0) {
95 		i_stream_skip(ctx->input, ctx->skip);
96 		ctx->skip = 0;
97 	}
98 
99 	*full_r = FALSE;
100 	ret = i_stream_read_bytes(ctx->input, &block_r->data,
101 				  &block_r->size, ctx->want_count + 1);
102 	if (ret <= 0) {
103 		switch (ret) {
104 		case 0:
105 			if (!ctx->input->eof) {
106 				i_assert(!ctx->input->blocking);
107 				return 0;
108 			}
109 			break;
110 		case -1:
111 			i_assert(ctx->input->eof ||
112 				 ctx->input->stream_errno != 0);
113 			ctx->eof = TRUE;
114 			if (block_r->size != 0) {
115 				/* EOF, but we still have some data.
116 				   return it. */
117 				return 1;
118 			}
119 			return -1;
120 		case -2:
121 			*full_r = TRUE;
122 			break;
123 		default:
124 			i_unreached();
125 		}
126 	}
127 
128 	if (!*full_r) {
129 		/* reset number of wanted characters if we actually got them */
130 		ctx->want_count = 1;
131 	}
132 	return 1;
133 }
134 
135 static void
message_part_append(struct message_parser_ctx * ctx)136 message_part_append(struct message_parser_ctx *ctx)
137 {
138 	struct message_part *parent = ctx->part;
139 	struct message_part *part;
140 
141 	i_assert(!ctx->preparsed);
142 	i_assert(parent != NULL);
143 	i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART |
144 				   MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0);
145 
146 	part = p_new(ctx->part_pool, struct message_part, 1);
147 	part->parent = parent;
148 
149 	/* set child position */
150 	part->physical_pos =
151 		parent->physical_pos +
152 		parent->body_size.physical_size +
153 		parent->header_size.physical_size;
154 
155 	/* add to parent's linked list */
156 	*ctx->next_part = part;
157 	/* update the parent's end-of-linked-list pointer */
158 	struct message_part **next_part = &part->next;
159 	array_push_back(&ctx->next_part_stack, &next_part);
160 	/* This part is now the new parent for the next message_part_append()
161 	   call. Its linked list begins with the children pointer. */
162 	ctx->next_part = &part->children;
163 
164 	ctx->part = part;
165 	ctx->nested_parts_count++;
166 	ctx->total_parts_count++;
167 	i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts);
168 	i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts);
169 }
170 
message_part_finish(struct message_parser_ctx * ctx)171 static void message_part_finish(struct message_parser_ctx *ctx)
172 {
173 	struct message_part **const *parent_next_partp;
174 
175 	if (!ctx->preparsed) {
176 		i_assert(ctx->nested_parts_count > 0);
177 		ctx->nested_parts_count--;
178 
179 		parent_next_partp = array_back(&ctx->next_part_stack);
180 		array_pop_back(&ctx->next_part_stack);
181 		ctx->next_part = *parent_next_partp;
182 	}
183 
184 	message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size);
185 	message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size);
186 	ctx->part->parent->children_count += 1 + ctx->part->children_count;
187 	ctx->part = ctx->part->parent;
188 }
189 
message_boundary_free(struct message_boundary * b)190 static void message_boundary_free(struct message_boundary *b)
191 {
192 	i_free(b->boundary);
193 	i_free(b);
194 }
195 
196 static void
boundary_remove_until(struct message_parser_ctx * ctx,struct message_boundary * boundary)197 boundary_remove_until(struct message_parser_ctx *ctx,
198 		      struct message_boundary *boundary)
199 {
200 	while (ctx->boundaries != boundary) {
201 		struct message_boundary *cur = ctx->boundaries;
202 
203 		i_assert(cur != NULL);
204 		ctx->boundaries = cur->next;
205 		message_boundary_free(cur);
206 
207 	}
208 	ctx->boundaries = boundary;
209 }
210 
parse_next_body_multipart_init(struct message_parser_ctx * ctx)211 static void parse_next_body_multipart_init(struct message_parser_ctx *ctx)
212 {
213 	struct message_boundary *b;
214 
215 	b = i_new(struct message_boundary, 1);
216 	b->part = ctx->part;
217 	b->boundary = ctx->last_boundary;
218 	ctx->last_boundary = NULL;
219 	b->len = strlen(b->boundary);
220 
221 	b->next = ctx->boundaries;
222 	ctx->boundaries = b;
223 }
224 
parse_next_body_message_rfc822_init(struct message_parser_ctx * ctx,struct message_block * block_r)225 static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx,
226 					       struct message_block *block_r)
227 {
228 	message_part_append(ctx);
229 	return parse_next_header_init(ctx, block_r);
230 }
231 
232 static int
boundary_line_find(struct message_parser_ctx * ctx,const unsigned char * data,size_t size,bool full,struct message_boundary ** boundary_r)233 boundary_line_find(struct message_parser_ctx *ctx,
234 		   const unsigned char *data, size_t size, bool full,
235 		   struct message_boundary **boundary_r)
236 {
237 	*boundary_r = NULL;
238 
239 	if (size < 2) {
240 		i_assert(!full);
241 
242 		if (ctx->input->eof)
243 			return -1;
244 		ctx->want_count = 2;
245 		return 0;
246 	}
247 
248 	if (data[0] != '-' || data[1] != '-') {
249 		/* not a boundary, just skip this line */
250 		return -1;
251 	}
252 
253 	if (ctx->total_parts_count >= ctx->max_total_mime_parts) {
254 		/* can't add any more MIME parts. just stop trying to find
255 		   more boundaries. */
256 		ctx->part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
257 		return -1;
258 	}
259 
260 	/* need to find the end of line */
261 	data += 2;
262 	size -= 2;
263 	const unsigned char *lf_pos = memchr(data, '\n', size);
264 	if (lf_pos == NULL &&
265 	    size+2 < BOUNDARY_END_MAX_LEN &&
266 	    !ctx->input->eof && !full) {
267 		/* no LF found */
268 		ctx->want_count = BOUNDARY_END_MAX_LEN;
269 		return 0;
270 	}
271 	size_t find_size = size;
272 	bool trailing_dashes = FALSE;
273 
274 	if (lf_pos != NULL) {
275 		find_size = lf_pos - data;
276 		if (find_size > 0 && data[find_size-1] == '\r')
277 			find_size--;
278 		if (find_size > 2 && data[find_size-1] == '-' &&
279 		    data[find_size-2] == '-')
280 			trailing_dashes = TRUE;
281 	} else if (find_size > BOUNDARY_END_MAX_LEN)
282 		find_size = BOUNDARY_END_MAX_LEN;
283 
284 	*boundary_r = boundary_find(ctx->boundaries, data, find_size,
285 				    trailing_dashes);
286 	if (*boundary_r == NULL)
287 		return -1;
288 
289 	(*boundary_r)->epilogue_found =
290 		size >= (*boundary_r)->len + 2 &&
291 		memcmp(data + (*boundary_r)->len, "--", 2) == 0;
292 	return 1;
293 }
294 
parse_next_mime_header_init(struct message_parser_ctx * ctx,struct message_block * block_r)295 static int parse_next_mime_header_init(struct message_parser_ctx *ctx,
296 				       struct message_block *block_r)
297 {
298 	message_part_append(ctx);
299 	ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
300 
301 	return parse_next_header_init(ctx, block_r);
302 }
303 
parse_next_body_skip_boundary_line(struct message_parser_ctx * ctx,struct message_block * block_r)304 static int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx,
305 					      struct message_block *block_r)
306 {
307 	const unsigned char *ptr;
308 	int ret;
309 	bool full;
310 
311 	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
312 		return ret;
313 
314 	ptr = memchr(block_r->data, '\n', block_r->size);
315 	if (ptr == NULL) {
316 		parse_body_add_block(ctx, block_r);
317 		if (block_r->size > 0 &&
318 		    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
319 			return 1;
320 		return 0;
321 	}
322 
323 	/* found the LF */
324 	block_r->size = (ptr - block_r->data) + 1;
325 	parse_body_add_block(ctx, block_r);
326 
327 	if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) {
328 		/* epilogue */
329 		if (ctx->boundaries != NULL)
330 			ctx->parse_next_block = parse_next_body_to_boundary;
331 		else
332 			ctx->parse_next_block = parse_next_body_to_eof;
333 	} else {
334 		/* a new MIME part begins */
335 		ctx->parse_next_block = parse_next_mime_header_init;
336 	}
337 	if (block_r->size > 0 &&
338 	    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
339 		return 1;
340 	return ctx->parse_next_block(ctx, block_r);
341 }
342 
parse_part_finish(struct message_parser_ctx * ctx,struct message_boundary * boundary,struct message_block * block_r,bool first_line)343 static int parse_part_finish(struct message_parser_ctx *ctx,
344 			     struct message_boundary *boundary,
345 			     struct message_block *block_r, bool first_line)
346 {
347 	size_t line_size;
348 	size_t boundary_len = boundary->len;
349 	bool boundary_epilogue_found = boundary->epilogue_found;
350 
351 	i_assert(ctx->last_boundary == NULL);
352 
353 	/* get back to parent MIME part, summing the child MIME part sizes
354 	   into parent's body sizes */
355 	while (ctx->part != boundary->part) {
356 		message_part_finish(ctx);
357 		i_assert(ctx->part != NULL);
358 	}
359 
360 	if (boundary->epilogue_found) {
361 		/* this boundary isn't needed anymore */
362 		boundary_remove_until(ctx, boundary->next);
363 	} else {
364 		/* forget about the boundaries we possibly skipped */
365 		boundary_remove_until(ctx, boundary);
366 	}
367 
368 	/* the boundary itself should already be in buffer. add that. */
369 	block_r->data = i_stream_get_data(ctx->input, &block_r->size);
370 	i_assert(block_r->size >= ctx->skip);
371 	block_r->data += ctx->skip;
372 	/* [[\r]\n]--<boundary>[--] */
373 	if (first_line)
374 		line_size = 0;
375 	else if (block_r->data[0] == '\r') {
376 		i_assert(block_r->data[1] == '\n');
377 		line_size = 2;
378 	} else {
379 		i_assert(block_r->data[0] == '\n');
380 		line_size = 1;
381 	}
382 	line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0);
383 	i_assert(block_r->size >= ctx->skip + line_size);
384 	block_r->size = line_size;
385 	parse_body_add_block(ctx, block_r);
386 
387 	ctx->parse_next_block = parse_next_body_skip_boundary_line;
388 
389 	if ((ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
390 		return 1;
391 	return ctx->parse_next_block(ctx, block_r);
392 }
393 
parse_next_body_to_boundary(struct message_parser_ctx * ctx,struct message_block * block_r)394 static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
395 				       struct message_block *block_r)
396 {
397 	struct message_boundary *boundary = NULL;
398 	const unsigned char *data, *cur, *next, *end;
399 	size_t boundary_start;
400 	int ret;
401 	bool full;
402 
403 	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
404 		return ret;
405 
406 	data = block_r->data;
407 	if (ctx->last_chr == '\n') {
408 		/* handle boundary in first line of message. alternatively
409 		   it's an empty line. */
410 		ret = boundary_line_find(ctx, block_r->data,
411 					 block_r->size, full, &boundary);
412 		if (ret >= 0) {
413 			return ret == 0 ? 0 :
414 				parse_part_finish(ctx, boundary, block_r, TRUE);
415 		}
416 	}
417 
418 	i_assert(block_r->size > 0);
419 	boundary_start = 0;
420 
421 	/* skip to beginning of the next line. the first line was
422 	   handled already. */
423 	cur = data; end = data + block_r->size;
424 	while ((next = memchr(cur, '\n', end - cur)) != NULL) {
425 		cur = next + 1;
426 
427 		boundary_start = next - data;
428 		if (next > data && next[-1] == '\r')
429 			boundary_start--;
430 
431 		if (boundary_start != 0) {
432 			/* we can at least skip data until the first [CR]LF.
433 			   input buffer can't be full anymore. */
434 			full = FALSE;
435 		}
436 
437 		ret = boundary_line_find(ctx, cur, end - cur, full, &boundary);
438 		if (ret >= 0) {
439 			/* found / need more data */
440 			if (ret == 0 && boundary_start == 0)
441 				ctx->want_count += cur - block_r->data;
442 			break;
443 		}
444 	}
445 
446 	if (next != NULL) {
447 		/* found / need more data */
448 		i_assert(ret >= 0);
449 		i_assert(!(ret == 0 && full));
450 	} else if (boundary_start == 0) {
451 		/* no linefeeds in this block. we can just skip it. */
452 		ret = 0;
453 		if (block_r->data[block_r->size-1] == '\r' && !ctx->eof) {
454 			/* this may be the beginning of the \r\n--boundary */
455 			block_r->size--;
456 		}
457 		boundary_start = block_r->size;
458 	} else {
459 		/* the boundary wasn't found from this data block,
460 		   we'll need more data. */
461 		ret = 0;
462 		ctx->want_count = (block_r->size - boundary_start) + 1;
463 	}
464 
465 	if (ret > 0 || (ret == 0 && !ctx->eof)) {
466 		/* a) we found the boundary
467 		   b) we need more data and haven't reached EOF yet
468 		   so leave CR+LF + last line to buffer */
469 		block_r->size = boundary_start;
470 	}
471 	if (block_r->size != 0) {
472 		parse_body_add_block(ctx, block_r);
473 
474 		if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
475 		    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
476 			return 0;
477 
478 		return 1;
479 	}
480 	return ret <= 0 ? ret :
481 		parse_part_finish(ctx, boundary, block_r, FALSE);
482 }
483 
parse_next_body_to_eof(struct message_parser_ctx * ctx,struct message_block * block_r)484 static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
485 				  struct message_block *block_r)
486 {
487 	bool full;
488 	int ret;
489 
490 	if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
491 		return ret;
492 
493 	parse_body_add_block(ctx, block_r);
494 
495 	if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
496 	    (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
497 		return 0;
498 
499 	return 1;
500 }
501 
parse_content_type(struct message_parser_ctx * ctx,struct message_header_line * hdr)502 static void parse_content_type(struct message_parser_ctx *ctx,
503 			       struct message_header_line *hdr)
504 {
505 	struct rfc822_parser_context parser;
506 	const char *const *results;
507 	string_t *content_type;
508 	int ret;
509 
510 	if (ctx->part_seen_content_type)
511 		return;
512 	ctx->part_seen_content_type = TRUE;
513 
514 	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
515 	rfc822_skip_lwsp(&parser);
516 
517 	content_type = t_str_new(64);
518 	ret = rfc822_parse_content_type(&parser, content_type);
519 
520 	if (strcasecmp(str_c(content_type), "message/rfc822") == 0)
521 		ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
522 	else if (strncasecmp(str_c(content_type), "text", 4) == 0 &&
523 		 (str_len(content_type) == 4 ||
524 		  str_data(content_type)[4] == '/'))
525 		ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
526 	else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) {
527 		ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
528 
529 		if (strcasecmp(str_c(content_type)+10, "digest") == 0)
530 			ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST;
531 	}
532 
533 	if (ret < 0 ||
534 	    (ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
535 	    ctx->last_boundary != NULL) {
536 		rfc822_parser_deinit(&parser);
537 		return;
538 	}
539 
540 	rfc2231_parse(&parser, &results);
541 	for (; *results != NULL; results += 2) {
542 		if (strcasecmp(results[0], "boundary") == 0) {
543 			/* truncate excessively long boundaries */
544 			i_free(ctx->last_boundary);
545 			ctx->last_boundary =
546 				i_strndup(results[1], BOUNDARY_STRING_MAX_LEN);
547 			break;
548 		}
549 	}
550 	rfc822_parser_deinit(&parser);
551 }
552 
block_is_at_eoh(const struct message_block * block)553 static bool block_is_at_eoh(const struct message_block *block)
554 {
555 	if (block->size < 1)
556 		return FALSE;
557 	if (block->data[0] == '\n')
558 		return TRUE;
559 	if (block->data[0] == '\r') {
560 		if (block->size < 2)
561 			return FALSE;
562 		if (block->data[1] == '\n')
563 			return TRUE;
564 	}
565 	return FALSE;
566 }
567 
parse_too_many_nested_mime_parts(struct message_parser_ctx * ctx)568 static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx)
569 {
570 	return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts;
571 }
572 
573 #define MUTEX_FLAGS \
574 	(MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
575 
parse_next_header(struct message_parser_ctx * ctx,struct message_block * block_r)576 static int parse_next_header(struct message_parser_ctx *ctx,
577 			     struct message_block *block_r)
578 {
579 	struct message_part *part = ctx->part;
580 	struct message_header_line *hdr;
581 	struct message_boundary *boundary;
582 	bool full;
583 	int ret;
584 
585 	if ((ret = message_parser_read_more(ctx, block_r, &full)) == 0)
586 		return ret;
587 
588 	if (ret > 0 && block_is_at_eoh(block_r) &&
589 	    ctx->last_boundary != NULL &&
590 	    (part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0) {
591 		/* we are at the end of headers and we've determined that we're
592 		   going to start a multipart. add the boundary already here
593 		   at this point so we can reliably determine whether the
594 		   "\n--boundary" belongs to us or to a previous boundary.
595 		   this is a problem if the boundary prefixes are identical,
596 		   because MIME requires only the prefix to match. */
597 		if (!parse_too_many_nested_mime_parts(ctx)) {
598 			parse_next_body_multipart_init(ctx);
599 			ctx->multipart = TRUE;
600 		} else {
601 			part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
602 			part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MULTIPART);
603 		}
604 	}
605 
606 	/* before parsing the header see if we can find a --boundary from here.
607 	   we're guaranteed to be at the beginning of the line here. */
608 	if (ret > 0) {
609 		ret = ctx->boundaries == NULL ? -1 :
610 			boundary_line_find(ctx, block_r->data,
611 					   block_r->size, full, &boundary);
612 		if (ret > 0 && boundary->part == ctx->part) {
613 			/* our own body begins with our own --boundary.
614 			   we don't want to handle that yet. */
615 			ret = -1;
616 		}
617 	}
618 	if (ret < 0) {
619 		/* no boundary */
620 		ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
621 		if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
622 			ctx->want_count = i_stream_get_data_size(ctx->input) + 1;
623 			return ret;
624 		}
625 	} else if (ret == 0) {
626 		/* need more data */
627 		return 0;
628 	} else {
629 		/* boundary found. stop parsing headers here. The previous
630 		   [CR]LF belongs to the MIME boundary though. */
631 		if (ctx->prev_hdr_newline_size > 0) {
632 			i_assert(ctx->part->header_size.lines > 0);
633 			/* remove the newline size from the MIME header */
634 			ctx->part->header_size.lines--;
635 			ctx->part->header_size.physical_size -=
636 				ctx->prev_hdr_newline_size;
637 			ctx->part->header_size.virtual_size -= 2;
638 			/* add the newline size to the parent's body */
639 			ctx->part->parent->body_size.lines++;
640 			ctx->part->parent->body_size.physical_size +=
641 				ctx->prev_hdr_newline_size;
642 			ctx->part->parent->body_size.virtual_size += 2;
643 		}
644 		hdr = NULL;
645 	}
646 
647 	if (hdr != NULL) {
648 		if (hdr->eoh)
649 			;
650 		else if (strcasecmp(hdr->name, "Mime-Version") == 0) {
651 			/* it's MIME. Content-* headers are valid */
652 			part->flags |= MESSAGE_PART_FLAG_IS_MIME;
653 		} else if (strcasecmp(hdr->name, "Content-Type") == 0) {
654 			if ((ctx->flags &
655 			     MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0)
656 				part->flags |= MESSAGE_PART_FLAG_IS_MIME;
657 
658 			if (hdr->continues)
659 				hdr->use_full_value = TRUE;
660 			else T_BEGIN {
661 				parse_content_type(ctx, hdr);
662 			} T_END;
663 		}
664 
665 		block_r->hdr = hdr;
666 		block_r->size = 0;
667 		ctx->prev_hdr_newline_size = hdr->no_newline ? 0 :
668 			(hdr->crlf_newline ? 2 : 1);
669 		return 1;
670 	}
671 
672 	/* end of headers */
673 	if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
674 		/* It's not MIME. Reset everything we found from
675 		   Content-Type. */
676 		i_assert(!ctx->multipart);
677 		part->flags = 0;
678 	}
679 	i_free(ctx->last_boundary);
680 
681 	if (!ctx->part_seen_content_type ||
682 	    (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
683 		if (part->parent != NULL &&
684 		    (part->parent->flags &
685 		     MESSAGE_PART_FLAG_MULTIPART_DIGEST) != 0) {
686 			/* when there's no content-type specified and we're
687 			   below multipart/digest, assume message/rfc822
688 			   content-type */
689 			part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
690 		} else {
691 			/* otherwise we default to text/plain */
692 			part->flags |= MESSAGE_PART_FLAG_TEXT;
693 		}
694 	}
695 
696 	if (message_parse_header_has_nuls(ctx->hdr_parser_ctx))
697 		part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
698 	message_parse_header_deinit(&ctx->hdr_parser_ctx);
699 
700 	i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
701 
702 	ctx->last_chr = '\n';
703 	if (ctx->multipart) {
704 		i_assert(ctx->last_boundary == NULL);
705 		ctx->multipart = FALSE;
706 		ctx->parse_next_block = parse_next_body_to_boundary;
707 	} else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) == 0) {
708 		/* Not message/rfc822 */
709 		if (ctx->boundaries != NULL)
710 			ctx->parse_next_block = parse_next_body_to_boundary;
711 		else
712 			ctx->parse_next_block = parse_next_body_to_eof;
713 	} else if (!parse_too_many_nested_mime_parts(ctx) &&
714 		   ctx->total_parts_count < ctx->max_total_mime_parts) {
715 		/* message/rfc822 - not reached MIME part limits yet */
716 		ctx->parse_next_block = parse_next_body_message_rfc822_init;
717 	} else {
718 		/* message/rfc822 - already reached MIME part limits */
719 		part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
720 		part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MESSAGE_RFC822);
721 		if (ctx->boundaries != NULL)
722 			ctx->parse_next_block = parse_next_body_to_boundary;
723 		else
724 			ctx->parse_next_block = parse_next_body_to_eof;
725 	}
726 
727 	ctx->want_count = 1;
728 
729 	/* return empty block as end of headers */
730 	block_r->hdr = NULL;
731 	block_r->size = 0;
732 	return 1;
733 }
734 
parse_next_header_init(struct message_parser_ctx * ctx,struct message_block * block_r)735 static int parse_next_header_init(struct message_parser_ctx *ctx,
736 				  struct message_block *block_r)
737 {
738 	i_assert(ctx->hdr_parser_ctx == NULL);
739 
740 	ctx->hdr_parser_ctx =
741 		message_parse_header_init(ctx->input, &ctx->part->header_size,
742 					  ctx->hdr_flags);
743 	ctx->part_seen_content_type = FALSE;
744 	ctx->prev_hdr_newline_size = 0;
745 
746 	ctx->parse_next_block = parse_next_header;
747 	return parse_next_header(ctx, block_r);
748 }
749 
750 struct message_parser_ctx *
message_parser_init_int(struct istream * input,const struct message_parser_settings * set)751 message_parser_init_int(struct istream *input,
752 			const struct message_parser_settings *set)
753 {
754 	struct message_parser_ctx *ctx;
755 
756 	ctx = i_new(struct message_parser_ctx, 1);
757 	ctx->hdr_flags = set->hdr_flags;
758 	ctx->flags = set->flags;
759 	ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ?
760 		set->max_nested_mime_parts :
761 		MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS;
762 	ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ?
763 		set->max_total_mime_parts :
764 		MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS;
765 	ctx->input = input;
766 	i_stream_ref(input);
767 	return ctx;
768 }
769 
770 struct message_parser_ctx *
message_parser_init(pool_t part_pool,struct istream * input,const struct message_parser_settings * set)771 message_parser_init(pool_t part_pool, struct istream *input,
772 		    const struct message_parser_settings *set)
773 {
774 	struct message_parser_ctx *ctx;
775 
776 	ctx = message_parser_init_int(input, set);
777 	ctx->part_pool = part_pool;
778 	ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
779 	ctx->next_part = &ctx->part->children;
780 	ctx->parse_next_block = parse_next_header_init;
781 	ctx->total_parts_count = 1;
782 	i_array_init(&ctx->next_part_stack, 4);
783 	return ctx;
784 }
785 
message_parser_deinit(struct message_parser_ctx ** _ctx,struct message_part ** parts_r)786 void message_parser_deinit(struct message_parser_ctx **_ctx,
787 			  struct message_part **parts_r)
788 {
789 	const char *error;
790 
791 	i_assert((**_ctx).preparsed == FALSE);
792 	if (message_parser_deinit_from_parts(_ctx, parts_r, &error) < 0)
793 		i_panic("message_parser_deinit_from_parts: %s", error);
794 }
795 
message_parser_deinit_from_parts(struct message_parser_ctx ** _ctx,struct message_part ** parts_r,const char ** error_r)796 int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx,
797 				     struct message_part **parts_r,
798 				     const char **error_r)
799 {
800         struct message_parser_ctx *ctx = *_ctx;
801 	int ret = ctx->broken_reason != NULL ? -1 : 0;
802 
803 	*_ctx = NULL;
804 	*parts_r = ctx->parts;
805 	*error_r = ctx->broken_reason;
806 
807 	if (ctx->hdr_parser_ctx != NULL)
808 		message_parse_header_deinit(&ctx->hdr_parser_ctx);
809 	if (ctx->part != NULL) {
810 		/* If the whole message has been parsed, the parts are
811 		   usually finished in message_parser_parse_next_block().
812 		   However, it's possible that the caller finishes reading
813 		   through the istream without calling
814 		   message_parser_parse_next_block() afterwards. In that case
815 		   we still need to finish these parts. */
816 		while (ctx->part->parent != NULL)
817 			message_part_finish(ctx);
818 	}
819 	boundary_remove_until(ctx, NULL);
820 	i_assert(ctx->nested_parts_count == 0);
821 
822 	i_stream_unref(&ctx->input);
823 	array_free(&ctx->next_part_stack);
824 	i_free(ctx->last_boundary);
825 	i_free(ctx);
826 	i_assert(ret < 0 || *parts_r != NULL);
827 	return ret;
828 }
829 
message_parser_parse_next_block(struct message_parser_ctx * ctx,struct message_block * block_r)830 int message_parser_parse_next_block(struct message_parser_ctx *ctx,
831 				    struct message_block *block_r)
832 {
833 	int ret;
834 	bool eof = FALSE, full;
835 
836 	i_zero(block_r);
837 
838 	while ((ret = ctx->parse_next_block(ctx, block_r)) == 0) {
839 		ret = message_parser_read_more(ctx, block_r, &full);
840 		if (ret == 0) {
841 			i_assert(!ctx->input->blocking);
842 			return 0;
843 		}
844 		if (ret == -1) {
845 			i_assert(!eof);
846 			eof = TRUE;
847 		}
848 	}
849 
850 	block_r->part = ctx->part;
851 
852 	if (ret < 0 && ctx->part != NULL) {
853 		/* Successful EOF or unexpected failure */
854 		i_assert(ctx->input->eof || ctx->input->closed ||
855 			 ctx->input->stream_errno != 0 ||
856 			 ctx->broken_reason != NULL);
857 		while (ctx->part->parent != NULL)
858 			message_part_finish(ctx);
859 	}
860 
861 	if (block_r->size == 0) {
862 		/* data isn't supposed to be read, so make sure it's NULL */
863 		block_r->data = NULL;
864 	}
865 	return ret;
866 }
867 
868 #undef message_parser_parse_header
message_parser_parse_header(struct message_parser_ctx * ctx,struct message_size * hdr_size,message_part_header_callback_t * callback,void * context)869 void message_parser_parse_header(struct message_parser_ctx *ctx,
870 				 struct message_size *hdr_size,
871 				 message_part_header_callback_t *callback,
872 				 void *context)
873 {
874 	struct message_block block;
875 	int ret;
876 
877 	while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
878 		callback(block.part, block.hdr, context);
879 
880 		if (block.hdr == NULL)
881 			break;
882 	}
883 	i_assert(ret != 0);
884 	i_assert(ctx->part != NULL);
885 
886 	if (ret < 0) {
887 		/* well, can't return error so fake end of headers */
888 		callback(ctx->part, NULL, context);
889 	}
890 
891         *hdr_size = ctx->part->header_size;
892 }
893 
894 #undef message_parser_parse_body
message_parser_parse_body(struct message_parser_ctx * ctx,message_part_header_callback_t * hdr_callback,void * context)895 void message_parser_parse_body(struct message_parser_ctx *ctx,
896 			       message_part_header_callback_t *hdr_callback,
897 			       void *context)
898 {
899 	struct message_block block;
900 	int ret;
901 
902 	while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
903 		if (block.size == 0 && hdr_callback != NULL)
904 			hdr_callback(block.part, block.hdr, context);
905 	}
906 	i_assert(ret != 0);
907 }
908