1 /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "array.h"
5 #include "str.h"
6 #include "istream.h"
7 #include "rfc822-parser.h"
8 #include "rfc2231-parser.h"
9 #include "message-parser-private.h"
10
11 message_part_header_callback_t *null_message_part_header_callback = NULL;
12
13 static int parse_next_header_init(struct message_parser_ctx *ctx,
14 struct message_block *block_r);
15 static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
16 struct message_block *block_r);
17 static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
18 struct message_block *block_r);
19
20 static struct message_boundary *
boundary_find(struct message_boundary * boundaries,const unsigned char * data,size_t len,bool trailing_dashes)21 boundary_find(struct message_boundary *boundaries,
22 const unsigned char *data, size_t len, bool trailing_dashes)
23 {
24 struct message_boundary *best = NULL;
25
26 /* As MIME spec says: search from latest one to oldest one so that we
27 don't break if the same boundary is used in nested parts. Also the
28 full message line doesn't have to match the boundary, only the
29 beginning. However, if there are multiple prefixes whose beginning
30 matches, use the longest matching one. */
31 while (boundaries != NULL) {
32 if (boundaries->len <= len &&
33 memcmp(boundaries->boundary, data, boundaries->len) == 0 &&
34 (best == NULL || best->len < boundaries->len)) {
35 best = boundaries;
36 /* If we see "foo--", it could either mean that there
37 is a boundary named "foo" that ends now or there's
38 a boundary "foo--" which continues. */
39 if (best->len == len ||
40 (best->len == len-2 && trailing_dashes)) {
41 /* This is exactly the wanted boundary. There
42 can't be a better one. */
43 break;
44 }
45 }
46
47 boundaries = boundaries->next;
48 }
49
50 return best;
51 }
52
parse_body_add_block(struct message_parser_ctx * ctx,struct message_block * block)53 static void parse_body_add_block(struct message_parser_ctx *ctx,
54 struct message_block *block)
55 {
56 unsigned int missing_cr_count = 0;
57 const unsigned char *cur, *next, *data = block->data;
58
59 i_assert(block->size > 0);
60
61 block->hdr = NULL;
62
63 /* check if we have NULs */
64 if (memchr(data, '\0', block->size) != NULL)
65 ctx->part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
66
67 /* count number of lines and missing CRs */
68 if (*data == '\n') {
69 ctx->part->body_size.lines++;
70 if (ctx->last_chr != '\r')
71 missing_cr_count++;
72 }
73
74 cur = data + 1;
75 while ((next = memchr(cur, '\n', block->size - (cur - data))) != NULL) {
76 ctx->part->body_size.lines++;
77 if (next[-1] != '\r')
78 missing_cr_count++;
79
80 cur = next + 1;
81 }
82 ctx->last_chr = data[block->size - 1];
83 ctx->skip += block->size;
84
85 ctx->part->body_size.physical_size += block->size;
86 ctx->part->body_size.virtual_size += block->size + missing_cr_count;
87 }
88
message_parser_read_more(struct message_parser_ctx * ctx,struct message_block * block_r,bool * full_r)89 int message_parser_read_more(struct message_parser_ctx *ctx,
90 struct message_block *block_r, bool *full_r)
91 {
92 int ret;
93
94 if (ctx->skip > 0) {
95 i_stream_skip(ctx->input, ctx->skip);
96 ctx->skip = 0;
97 }
98
99 *full_r = FALSE;
100 ret = i_stream_read_bytes(ctx->input, &block_r->data,
101 &block_r->size, ctx->want_count + 1);
102 if (ret <= 0) {
103 switch (ret) {
104 case 0:
105 if (!ctx->input->eof) {
106 i_assert(!ctx->input->blocking);
107 return 0;
108 }
109 break;
110 case -1:
111 i_assert(ctx->input->eof ||
112 ctx->input->stream_errno != 0);
113 ctx->eof = TRUE;
114 if (block_r->size != 0) {
115 /* EOF, but we still have some data.
116 return it. */
117 return 1;
118 }
119 return -1;
120 case -2:
121 *full_r = TRUE;
122 break;
123 default:
124 i_unreached();
125 }
126 }
127
128 if (!*full_r) {
129 /* reset number of wanted characters if we actually got them */
130 ctx->want_count = 1;
131 }
132 return 1;
133 }
134
135 static void
message_part_append(struct message_parser_ctx * ctx)136 message_part_append(struct message_parser_ctx *ctx)
137 {
138 struct message_part *parent = ctx->part;
139 struct message_part *part;
140
141 i_assert(!ctx->preparsed);
142 i_assert(parent != NULL);
143 i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART |
144 MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0);
145
146 part = p_new(ctx->part_pool, struct message_part, 1);
147 part->parent = parent;
148
149 /* set child position */
150 part->physical_pos =
151 parent->physical_pos +
152 parent->body_size.physical_size +
153 parent->header_size.physical_size;
154
155 /* add to parent's linked list */
156 *ctx->next_part = part;
157 /* update the parent's end-of-linked-list pointer */
158 struct message_part **next_part = &part->next;
159 array_push_back(&ctx->next_part_stack, &next_part);
160 /* This part is now the new parent for the next message_part_append()
161 call. Its linked list begins with the children pointer. */
162 ctx->next_part = &part->children;
163
164 ctx->part = part;
165 ctx->nested_parts_count++;
166 ctx->total_parts_count++;
167 i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts);
168 i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts);
169 }
170
message_part_finish(struct message_parser_ctx * ctx)171 static void message_part_finish(struct message_parser_ctx *ctx)
172 {
173 struct message_part **const *parent_next_partp;
174
175 if (!ctx->preparsed) {
176 i_assert(ctx->nested_parts_count > 0);
177 ctx->nested_parts_count--;
178
179 parent_next_partp = array_back(&ctx->next_part_stack);
180 array_pop_back(&ctx->next_part_stack);
181 ctx->next_part = *parent_next_partp;
182 }
183
184 message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size);
185 message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size);
186 ctx->part->parent->children_count += 1 + ctx->part->children_count;
187 ctx->part = ctx->part->parent;
188 }
189
message_boundary_free(struct message_boundary * b)190 static void message_boundary_free(struct message_boundary *b)
191 {
192 i_free(b->boundary);
193 i_free(b);
194 }
195
196 static void
boundary_remove_until(struct message_parser_ctx * ctx,struct message_boundary * boundary)197 boundary_remove_until(struct message_parser_ctx *ctx,
198 struct message_boundary *boundary)
199 {
200 while (ctx->boundaries != boundary) {
201 struct message_boundary *cur = ctx->boundaries;
202
203 i_assert(cur != NULL);
204 ctx->boundaries = cur->next;
205 message_boundary_free(cur);
206
207 }
208 ctx->boundaries = boundary;
209 }
210
parse_next_body_multipart_init(struct message_parser_ctx * ctx)211 static void parse_next_body_multipart_init(struct message_parser_ctx *ctx)
212 {
213 struct message_boundary *b;
214
215 b = i_new(struct message_boundary, 1);
216 b->part = ctx->part;
217 b->boundary = ctx->last_boundary;
218 ctx->last_boundary = NULL;
219 b->len = strlen(b->boundary);
220
221 b->next = ctx->boundaries;
222 ctx->boundaries = b;
223 }
224
parse_next_body_message_rfc822_init(struct message_parser_ctx * ctx,struct message_block * block_r)225 static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx,
226 struct message_block *block_r)
227 {
228 message_part_append(ctx);
229 return parse_next_header_init(ctx, block_r);
230 }
231
232 static int
boundary_line_find(struct message_parser_ctx * ctx,const unsigned char * data,size_t size,bool full,struct message_boundary ** boundary_r)233 boundary_line_find(struct message_parser_ctx *ctx,
234 const unsigned char *data, size_t size, bool full,
235 struct message_boundary **boundary_r)
236 {
237 *boundary_r = NULL;
238
239 if (size < 2) {
240 i_assert(!full);
241
242 if (ctx->input->eof)
243 return -1;
244 ctx->want_count = 2;
245 return 0;
246 }
247
248 if (data[0] != '-' || data[1] != '-') {
249 /* not a boundary, just skip this line */
250 return -1;
251 }
252
253 if (ctx->total_parts_count >= ctx->max_total_mime_parts) {
254 /* can't add any more MIME parts. just stop trying to find
255 more boundaries. */
256 ctx->part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
257 return -1;
258 }
259
260 /* need to find the end of line */
261 data += 2;
262 size -= 2;
263 const unsigned char *lf_pos = memchr(data, '\n', size);
264 if (lf_pos == NULL &&
265 size+2 < BOUNDARY_END_MAX_LEN &&
266 !ctx->input->eof && !full) {
267 /* no LF found */
268 ctx->want_count = BOUNDARY_END_MAX_LEN;
269 return 0;
270 }
271 size_t find_size = size;
272 bool trailing_dashes = FALSE;
273
274 if (lf_pos != NULL) {
275 find_size = lf_pos - data;
276 if (find_size > 0 && data[find_size-1] == '\r')
277 find_size--;
278 if (find_size > 2 && data[find_size-1] == '-' &&
279 data[find_size-2] == '-')
280 trailing_dashes = TRUE;
281 } else if (find_size > BOUNDARY_END_MAX_LEN)
282 find_size = BOUNDARY_END_MAX_LEN;
283
284 *boundary_r = boundary_find(ctx->boundaries, data, find_size,
285 trailing_dashes);
286 if (*boundary_r == NULL)
287 return -1;
288
289 (*boundary_r)->epilogue_found =
290 size >= (*boundary_r)->len + 2 &&
291 memcmp(data + (*boundary_r)->len, "--", 2) == 0;
292 return 1;
293 }
294
parse_next_mime_header_init(struct message_parser_ctx * ctx,struct message_block * block_r)295 static int parse_next_mime_header_init(struct message_parser_ctx *ctx,
296 struct message_block *block_r)
297 {
298 message_part_append(ctx);
299 ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME;
300
301 return parse_next_header_init(ctx, block_r);
302 }
303
parse_next_body_skip_boundary_line(struct message_parser_ctx * ctx,struct message_block * block_r)304 static int parse_next_body_skip_boundary_line(struct message_parser_ctx *ctx,
305 struct message_block *block_r)
306 {
307 const unsigned char *ptr;
308 int ret;
309 bool full;
310
311 if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
312 return ret;
313
314 ptr = memchr(block_r->data, '\n', block_r->size);
315 if (ptr == NULL) {
316 parse_body_add_block(ctx, block_r);
317 if (block_r->size > 0 &&
318 (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
319 return 1;
320 return 0;
321 }
322
323 /* found the LF */
324 block_r->size = (ptr - block_r->data) + 1;
325 parse_body_add_block(ctx, block_r);
326
327 if (ctx->boundaries == NULL || ctx->boundaries->part != ctx->part) {
328 /* epilogue */
329 if (ctx->boundaries != NULL)
330 ctx->parse_next_block = parse_next_body_to_boundary;
331 else
332 ctx->parse_next_block = parse_next_body_to_eof;
333 } else {
334 /* a new MIME part begins */
335 ctx->parse_next_block = parse_next_mime_header_init;
336 }
337 if (block_r->size > 0 &&
338 (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
339 return 1;
340 return ctx->parse_next_block(ctx, block_r);
341 }
342
parse_part_finish(struct message_parser_ctx * ctx,struct message_boundary * boundary,struct message_block * block_r,bool first_line)343 static int parse_part_finish(struct message_parser_ctx *ctx,
344 struct message_boundary *boundary,
345 struct message_block *block_r, bool first_line)
346 {
347 size_t line_size;
348 size_t boundary_len = boundary->len;
349 bool boundary_epilogue_found = boundary->epilogue_found;
350
351 i_assert(ctx->last_boundary == NULL);
352
353 /* get back to parent MIME part, summing the child MIME part sizes
354 into parent's body sizes */
355 while (ctx->part != boundary->part) {
356 message_part_finish(ctx);
357 i_assert(ctx->part != NULL);
358 }
359
360 if (boundary->epilogue_found) {
361 /* this boundary isn't needed anymore */
362 boundary_remove_until(ctx, boundary->next);
363 } else {
364 /* forget about the boundaries we possibly skipped */
365 boundary_remove_until(ctx, boundary);
366 }
367
368 /* the boundary itself should already be in buffer. add that. */
369 block_r->data = i_stream_get_data(ctx->input, &block_r->size);
370 i_assert(block_r->size >= ctx->skip);
371 block_r->data += ctx->skip;
372 /* [[\r]\n]--<boundary>[--] */
373 if (first_line)
374 line_size = 0;
375 else if (block_r->data[0] == '\r') {
376 i_assert(block_r->data[1] == '\n');
377 line_size = 2;
378 } else {
379 i_assert(block_r->data[0] == '\n');
380 line_size = 1;
381 }
382 line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0);
383 i_assert(block_r->size >= ctx->skip + line_size);
384 block_r->size = line_size;
385 parse_body_add_block(ctx, block_r);
386
387 ctx->parse_next_block = parse_next_body_skip_boundary_line;
388
389 if ((ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES) != 0)
390 return 1;
391 return ctx->parse_next_block(ctx, block_r);
392 }
393
parse_next_body_to_boundary(struct message_parser_ctx * ctx,struct message_block * block_r)394 static int parse_next_body_to_boundary(struct message_parser_ctx *ctx,
395 struct message_block *block_r)
396 {
397 struct message_boundary *boundary = NULL;
398 const unsigned char *data, *cur, *next, *end;
399 size_t boundary_start;
400 int ret;
401 bool full;
402
403 if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
404 return ret;
405
406 data = block_r->data;
407 if (ctx->last_chr == '\n') {
408 /* handle boundary in first line of message. alternatively
409 it's an empty line. */
410 ret = boundary_line_find(ctx, block_r->data,
411 block_r->size, full, &boundary);
412 if (ret >= 0) {
413 return ret == 0 ? 0 :
414 parse_part_finish(ctx, boundary, block_r, TRUE);
415 }
416 }
417
418 i_assert(block_r->size > 0);
419 boundary_start = 0;
420
421 /* skip to beginning of the next line. the first line was
422 handled already. */
423 cur = data; end = data + block_r->size;
424 while ((next = memchr(cur, '\n', end - cur)) != NULL) {
425 cur = next + 1;
426
427 boundary_start = next - data;
428 if (next > data && next[-1] == '\r')
429 boundary_start--;
430
431 if (boundary_start != 0) {
432 /* we can at least skip data until the first [CR]LF.
433 input buffer can't be full anymore. */
434 full = FALSE;
435 }
436
437 ret = boundary_line_find(ctx, cur, end - cur, full, &boundary);
438 if (ret >= 0) {
439 /* found / need more data */
440 if (ret == 0 && boundary_start == 0)
441 ctx->want_count += cur - block_r->data;
442 break;
443 }
444 }
445
446 if (next != NULL) {
447 /* found / need more data */
448 i_assert(ret >= 0);
449 i_assert(!(ret == 0 && full));
450 } else if (boundary_start == 0) {
451 /* no linefeeds in this block. we can just skip it. */
452 ret = 0;
453 if (block_r->data[block_r->size-1] == '\r' && !ctx->eof) {
454 /* this may be the beginning of the \r\n--boundary */
455 block_r->size--;
456 }
457 boundary_start = block_r->size;
458 } else {
459 /* the boundary wasn't found from this data block,
460 we'll need more data. */
461 ret = 0;
462 ctx->want_count = (block_r->size - boundary_start) + 1;
463 }
464
465 if (ret > 0 || (ret == 0 && !ctx->eof)) {
466 /* a) we found the boundary
467 b) we need more data and haven't reached EOF yet
468 so leave CR+LF + last line to buffer */
469 block_r->size = boundary_start;
470 }
471 if (block_r->size != 0) {
472 parse_body_add_block(ctx, block_r);
473
474 if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
475 (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
476 return 0;
477
478 return 1;
479 }
480 return ret <= 0 ? ret :
481 parse_part_finish(ctx, boundary, block_r, FALSE);
482 }
483
parse_next_body_to_eof(struct message_parser_ctx * ctx,struct message_block * block_r)484 static int parse_next_body_to_eof(struct message_parser_ctx *ctx,
485 struct message_block *block_r)
486 {
487 bool full;
488 int ret;
489
490 if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0)
491 return ret;
492
493 parse_body_add_block(ctx, block_r);
494
495 if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 &&
496 (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) == 0)
497 return 0;
498
499 return 1;
500 }
501
parse_content_type(struct message_parser_ctx * ctx,struct message_header_line * hdr)502 static void parse_content_type(struct message_parser_ctx *ctx,
503 struct message_header_line *hdr)
504 {
505 struct rfc822_parser_context parser;
506 const char *const *results;
507 string_t *content_type;
508 int ret;
509
510 if (ctx->part_seen_content_type)
511 return;
512 ctx->part_seen_content_type = TRUE;
513
514 rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
515 rfc822_skip_lwsp(&parser);
516
517 content_type = t_str_new(64);
518 ret = rfc822_parse_content_type(&parser, content_type);
519
520 if (strcasecmp(str_c(content_type), "message/rfc822") == 0)
521 ctx->part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
522 else if (strncasecmp(str_c(content_type), "text", 4) == 0 &&
523 (str_len(content_type) == 4 ||
524 str_data(content_type)[4] == '/'))
525 ctx->part->flags |= MESSAGE_PART_FLAG_TEXT;
526 else if (strncasecmp(str_c(content_type), "multipart/", 10) == 0) {
527 ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART;
528
529 if (strcasecmp(str_c(content_type)+10, "digest") == 0)
530 ctx->part->flags |= MESSAGE_PART_FLAG_MULTIPART_DIGEST;
531 }
532
533 if (ret < 0 ||
534 (ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 ||
535 ctx->last_boundary != NULL) {
536 rfc822_parser_deinit(&parser);
537 return;
538 }
539
540 rfc2231_parse(&parser, &results);
541 for (; *results != NULL; results += 2) {
542 if (strcasecmp(results[0], "boundary") == 0) {
543 /* truncate excessively long boundaries */
544 i_free(ctx->last_boundary);
545 ctx->last_boundary =
546 i_strndup(results[1], BOUNDARY_STRING_MAX_LEN);
547 break;
548 }
549 }
550 rfc822_parser_deinit(&parser);
551 }
552
block_is_at_eoh(const struct message_block * block)553 static bool block_is_at_eoh(const struct message_block *block)
554 {
555 if (block->size < 1)
556 return FALSE;
557 if (block->data[0] == '\n')
558 return TRUE;
559 if (block->data[0] == '\r') {
560 if (block->size < 2)
561 return FALSE;
562 if (block->data[1] == '\n')
563 return TRUE;
564 }
565 return FALSE;
566 }
567
parse_too_many_nested_mime_parts(struct message_parser_ctx * ctx)568 static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx)
569 {
570 return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts;
571 }
572
573 #define MUTEX_FLAGS \
574 (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART)
575
parse_next_header(struct message_parser_ctx * ctx,struct message_block * block_r)576 static int parse_next_header(struct message_parser_ctx *ctx,
577 struct message_block *block_r)
578 {
579 struct message_part *part = ctx->part;
580 struct message_header_line *hdr;
581 struct message_boundary *boundary;
582 bool full;
583 int ret;
584
585 if ((ret = message_parser_read_more(ctx, block_r, &full)) == 0)
586 return ret;
587
588 if (ret > 0 && block_is_at_eoh(block_r) &&
589 ctx->last_boundary != NULL &&
590 (part->flags & MESSAGE_PART_FLAG_IS_MIME) != 0) {
591 /* we are at the end of headers and we've determined that we're
592 going to start a multipart. add the boundary already here
593 at this point so we can reliably determine whether the
594 "\n--boundary" belongs to us or to a previous boundary.
595 this is a problem if the boundary prefixes are identical,
596 because MIME requires only the prefix to match. */
597 if (!parse_too_many_nested_mime_parts(ctx)) {
598 parse_next_body_multipart_init(ctx);
599 ctx->multipart = TRUE;
600 } else {
601 part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
602 part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MULTIPART);
603 }
604 }
605
606 /* before parsing the header see if we can find a --boundary from here.
607 we're guaranteed to be at the beginning of the line here. */
608 if (ret > 0) {
609 ret = ctx->boundaries == NULL ? -1 :
610 boundary_line_find(ctx, block_r->data,
611 block_r->size, full, &boundary);
612 if (ret > 0 && boundary->part == ctx->part) {
613 /* our own body begins with our own --boundary.
614 we don't want to handle that yet. */
615 ret = -1;
616 }
617 }
618 if (ret < 0) {
619 /* no boundary */
620 ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr);
621 if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) {
622 ctx->want_count = i_stream_get_data_size(ctx->input) + 1;
623 return ret;
624 }
625 } else if (ret == 0) {
626 /* need more data */
627 return 0;
628 } else {
629 /* boundary found. stop parsing headers here. The previous
630 [CR]LF belongs to the MIME boundary though. */
631 if (ctx->prev_hdr_newline_size > 0) {
632 i_assert(ctx->part->header_size.lines > 0);
633 /* remove the newline size from the MIME header */
634 ctx->part->header_size.lines--;
635 ctx->part->header_size.physical_size -=
636 ctx->prev_hdr_newline_size;
637 ctx->part->header_size.virtual_size -= 2;
638 /* add the newline size to the parent's body */
639 ctx->part->parent->body_size.lines++;
640 ctx->part->parent->body_size.physical_size +=
641 ctx->prev_hdr_newline_size;
642 ctx->part->parent->body_size.virtual_size += 2;
643 }
644 hdr = NULL;
645 }
646
647 if (hdr != NULL) {
648 if (hdr->eoh)
649 ;
650 else if (strcasecmp(hdr->name, "Mime-Version") == 0) {
651 /* it's MIME. Content-* headers are valid */
652 part->flags |= MESSAGE_PART_FLAG_IS_MIME;
653 } else if (strcasecmp(hdr->name, "Content-Type") == 0) {
654 if ((ctx->flags &
655 MESSAGE_PARSER_FLAG_MIME_VERSION_STRICT) == 0)
656 part->flags |= MESSAGE_PART_FLAG_IS_MIME;
657
658 if (hdr->continues)
659 hdr->use_full_value = TRUE;
660 else T_BEGIN {
661 parse_content_type(ctx, hdr);
662 } T_END;
663 }
664
665 block_r->hdr = hdr;
666 block_r->size = 0;
667 ctx->prev_hdr_newline_size = hdr->no_newline ? 0 :
668 (hdr->crlf_newline ? 2 : 1);
669 return 1;
670 }
671
672 /* end of headers */
673 if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
674 /* It's not MIME. Reset everything we found from
675 Content-Type. */
676 i_assert(!ctx->multipart);
677 part->flags = 0;
678 }
679 i_free(ctx->last_boundary);
680
681 if (!ctx->part_seen_content_type ||
682 (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
683 if (part->parent != NULL &&
684 (part->parent->flags &
685 MESSAGE_PART_FLAG_MULTIPART_DIGEST) != 0) {
686 /* when there's no content-type specified and we're
687 below multipart/digest, assume message/rfc822
688 content-type */
689 part->flags |= MESSAGE_PART_FLAG_MESSAGE_RFC822;
690 } else {
691 /* otherwise we default to text/plain */
692 part->flags |= MESSAGE_PART_FLAG_TEXT;
693 }
694 }
695
696 if (message_parse_header_has_nuls(ctx->hdr_parser_ctx))
697 part->flags |= MESSAGE_PART_FLAG_HAS_NULS;
698 message_parse_header_deinit(&ctx->hdr_parser_ctx);
699
700 i_assert((part->flags & MUTEX_FLAGS) != MUTEX_FLAGS);
701
702 ctx->last_chr = '\n';
703 if (ctx->multipart) {
704 i_assert(ctx->last_boundary == NULL);
705 ctx->multipart = FALSE;
706 ctx->parse_next_block = parse_next_body_to_boundary;
707 } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) == 0) {
708 /* Not message/rfc822 */
709 if (ctx->boundaries != NULL)
710 ctx->parse_next_block = parse_next_body_to_boundary;
711 else
712 ctx->parse_next_block = parse_next_body_to_eof;
713 } else if (!parse_too_many_nested_mime_parts(ctx) &&
714 ctx->total_parts_count < ctx->max_total_mime_parts) {
715 /* message/rfc822 - not reached MIME part limits yet */
716 ctx->parse_next_block = parse_next_body_message_rfc822_init;
717 } else {
718 /* message/rfc822 - already reached MIME part limits */
719 part->flags |= MESSAGE_PART_FLAG_OVERFLOW;
720 part->flags &= ENUM_NEGATE(MESSAGE_PART_FLAG_MESSAGE_RFC822);
721 if (ctx->boundaries != NULL)
722 ctx->parse_next_block = parse_next_body_to_boundary;
723 else
724 ctx->parse_next_block = parse_next_body_to_eof;
725 }
726
727 ctx->want_count = 1;
728
729 /* return empty block as end of headers */
730 block_r->hdr = NULL;
731 block_r->size = 0;
732 return 1;
733 }
734
parse_next_header_init(struct message_parser_ctx * ctx,struct message_block * block_r)735 static int parse_next_header_init(struct message_parser_ctx *ctx,
736 struct message_block *block_r)
737 {
738 i_assert(ctx->hdr_parser_ctx == NULL);
739
740 ctx->hdr_parser_ctx =
741 message_parse_header_init(ctx->input, &ctx->part->header_size,
742 ctx->hdr_flags);
743 ctx->part_seen_content_type = FALSE;
744 ctx->prev_hdr_newline_size = 0;
745
746 ctx->parse_next_block = parse_next_header;
747 return parse_next_header(ctx, block_r);
748 }
749
750 struct message_parser_ctx *
message_parser_init_int(struct istream * input,const struct message_parser_settings * set)751 message_parser_init_int(struct istream *input,
752 const struct message_parser_settings *set)
753 {
754 struct message_parser_ctx *ctx;
755
756 ctx = i_new(struct message_parser_ctx, 1);
757 ctx->hdr_flags = set->hdr_flags;
758 ctx->flags = set->flags;
759 ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ?
760 set->max_nested_mime_parts :
761 MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS;
762 ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ?
763 set->max_total_mime_parts :
764 MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS;
765 ctx->input = input;
766 i_stream_ref(input);
767 return ctx;
768 }
769
770 struct message_parser_ctx *
message_parser_init(pool_t part_pool,struct istream * input,const struct message_parser_settings * set)771 message_parser_init(pool_t part_pool, struct istream *input,
772 const struct message_parser_settings *set)
773 {
774 struct message_parser_ctx *ctx;
775
776 ctx = message_parser_init_int(input, set);
777 ctx->part_pool = part_pool;
778 ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1);
779 ctx->next_part = &ctx->part->children;
780 ctx->parse_next_block = parse_next_header_init;
781 ctx->total_parts_count = 1;
782 i_array_init(&ctx->next_part_stack, 4);
783 return ctx;
784 }
785
message_parser_deinit(struct message_parser_ctx ** _ctx,struct message_part ** parts_r)786 void message_parser_deinit(struct message_parser_ctx **_ctx,
787 struct message_part **parts_r)
788 {
789 const char *error;
790
791 i_assert((**_ctx).preparsed == FALSE);
792 if (message_parser_deinit_from_parts(_ctx, parts_r, &error) < 0)
793 i_panic("message_parser_deinit_from_parts: %s", error);
794 }
795
message_parser_deinit_from_parts(struct message_parser_ctx ** _ctx,struct message_part ** parts_r,const char ** error_r)796 int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx,
797 struct message_part **parts_r,
798 const char **error_r)
799 {
800 struct message_parser_ctx *ctx = *_ctx;
801 int ret = ctx->broken_reason != NULL ? -1 : 0;
802
803 *_ctx = NULL;
804 *parts_r = ctx->parts;
805 *error_r = ctx->broken_reason;
806
807 if (ctx->hdr_parser_ctx != NULL)
808 message_parse_header_deinit(&ctx->hdr_parser_ctx);
809 if (ctx->part != NULL) {
810 /* If the whole message has been parsed, the parts are
811 usually finished in message_parser_parse_next_block().
812 However, it's possible that the caller finishes reading
813 through the istream without calling
814 message_parser_parse_next_block() afterwards. In that case
815 we still need to finish these parts. */
816 while (ctx->part->parent != NULL)
817 message_part_finish(ctx);
818 }
819 boundary_remove_until(ctx, NULL);
820 i_assert(ctx->nested_parts_count == 0);
821
822 i_stream_unref(&ctx->input);
823 array_free(&ctx->next_part_stack);
824 i_free(ctx->last_boundary);
825 i_free(ctx);
826 i_assert(ret < 0 || *parts_r != NULL);
827 return ret;
828 }
829
message_parser_parse_next_block(struct message_parser_ctx * ctx,struct message_block * block_r)830 int message_parser_parse_next_block(struct message_parser_ctx *ctx,
831 struct message_block *block_r)
832 {
833 int ret;
834 bool eof = FALSE, full;
835
836 i_zero(block_r);
837
838 while ((ret = ctx->parse_next_block(ctx, block_r)) == 0) {
839 ret = message_parser_read_more(ctx, block_r, &full);
840 if (ret == 0) {
841 i_assert(!ctx->input->blocking);
842 return 0;
843 }
844 if (ret == -1) {
845 i_assert(!eof);
846 eof = TRUE;
847 }
848 }
849
850 block_r->part = ctx->part;
851
852 if (ret < 0 && ctx->part != NULL) {
853 /* Successful EOF or unexpected failure */
854 i_assert(ctx->input->eof || ctx->input->closed ||
855 ctx->input->stream_errno != 0 ||
856 ctx->broken_reason != NULL);
857 while (ctx->part->parent != NULL)
858 message_part_finish(ctx);
859 }
860
861 if (block_r->size == 0) {
862 /* data isn't supposed to be read, so make sure it's NULL */
863 block_r->data = NULL;
864 }
865 return ret;
866 }
867
868 #undef message_parser_parse_header
message_parser_parse_header(struct message_parser_ctx * ctx,struct message_size * hdr_size,message_part_header_callback_t * callback,void * context)869 void message_parser_parse_header(struct message_parser_ctx *ctx,
870 struct message_size *hdr_size,
871 message_part_header_callback_t *callback,
872 void *context)
873 {
874 struct message_block block;
875 int ret;
876
877 while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
878 callback(block.part, block.hdr, context);
879
880 if (block.hdr == NULL)
881 break;
882 }
883 i_assert(ret != 0);
884 i_assert(ctx->part != NULL);
885
886 if (ret < 0) {
887 /* well, can't return error so fake end of headers */
888 callback(ctx->part, NULL, context);
889 }
890
891 *hdr_size = ctx->part->header_size;
892 }
893
894 #undef message_parser_parse_body
message_parser_parse_body(struct message_parser_ctx * ctx,message_part_header_callback_t * hdr_callback,void * context)895 void message_parser_parse_body(struct message_parser_ctx *ctx,
896 message_part_header_callback_t *hdr_callback,
897 void *context)
898 {
899 struct message_block block;
900 int ret;
901
902 while ((ret = message_parser_parse_next_block(ctx, &block)) > 0) {
903 if (block.size == 0 && hdr_callback != NULL)
904 hdr_callback(block.part, block.hdr, context);
905 }
906 i_assert(ret != 0);
907 }
908