1 /* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
2 
3 /* MD5 header summing logic was pretty much copy&pasted from popa3d by
4    Solar Designer */
5 
6 #include "lib.h"
7 #include "ioloop.h"
8 #include "array.h"
9 #include "buffer.h"
10 #include "istream.h"
11 #include "str.h"
12 #include "write-full.h"
13 #include "message-parser.h"
14 #include "mail-index.h"
15 #include "mbox-storage.h"
16 #include "mbox-md5.h"
17 #include "mbox-sync-private.h"
18 
19 
20 #define IS_LWSP_LF(c) (IS_LWSP(c) || (c) == '\n')
21 
22 struct mbox_sync_header_func {
23 	const char *header;
24 	bool (*func)(struct mbox_sync_mail_context *ctx,
25 		     struct message_header_line *hdr);
26 };
27 
28 struct mbox_flag_type mbox_status_flags[] = {
29 	{ 'R', MAIL_SEEN },
30 	{ 'O', MBOX_NONRECENT_KLUDGE },
31 	{ 0, 0 }
32 };
33 
34 struct mbox_flag_type mbox_xstatus_flags[] = {
35 	{ 'A', MAIL_ANSWERED },
36 	{ 'F', MAIL_FLAGGED },
37 	{ 'T', MAIL_DRAFT },
38 	{ 'D', MAIL_DELETED },
39 	{ 0, 0 }
40 };
41 
parse_trailing_whitespace(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)42 static void parse_trailing_whitespace(struct mbox_sync_mail_context *ctx,
43 				      struct message_header_line *hdr)
44 {
45 	size_t i, space = 0;
46 
47 	/* the value may contain newlines. we can't count whitespace before
48 	   and after it as a single contiguous whitespace block, as that may
49 	   get us into situation where removing whitespace goes eg.
50 	   " \n \n" -> " \n\n" which would then be treated as end of headers.
51 
52 	   that could probably be avoided by being careful, but as newlines
53 	   should never be there (we don't generate them), it's not worth the
54 	   trouble. */
55 
56 	for (i = hdr->full_value_len; i > 0; i--) {
57 		if (!IS_LWSP(hdr->full_value[i-1]))
58 			break;
59 		space++;
60 	}
61 
62 	if ((ssize_t)space > ctx->mail.space) {
63 		i_assert(space != 0);
64 		ctx->mail.offset = ctx->hdr_offset + str_len(ctx->header) + i;
65 		ctx->mail.space = space;
66 	}
67 }
68 
mbox_flag_find(struct mbox_flag_type * flags,char chr)69 static enum mail_flags mbox_flag_find(struct mbox_flag_type *flags, char chr)
70 {
71 	int i;
72 
73 	for (i = 0; flags[i].chr != 0; i++) {
74 		if (flags[i].chr == chr)
75 			return flags[i].flag;
76 	}
77 
78 	return 0;
79 }
80 
parse_status_flags(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr,struct mbox_flag_type * flags_list)81 static bool parse_status_flags(struct mbox_sync_mail_context *ctx,
82 			       struct message_header_line *hdr,
83 			       struct mbox_flag_type *flags_list)
84 {
85 	enum mail_flags flag;
86 	size_t i;
87 	bool duplicates = FALSE;
88 
89 	ctx->mail.flags ^= MBOX_NONRECENT_KLUDGE;
90 	for (i = 0; i < hdr->full_value_len; i++) {
91 		flag = mbox_flag_find(flags_list, hdr->full_value[i]);
92 		if ((ctx->mail.flags & flag) != 0)
93 			duplicates = TRUE;
94 		else
95 			ctx->mail.flags |= flag;
96 	}
97 	ctx->mail.flags ^= MBOX_NONRECENT_KLUDGE;
98 	return duplicates;
99 }
100 
parse_status(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)101 static bool parse_status(struct mbox_sync_mail_context *ctx,
102 			 struct message_header_line *hdr)
103 {
104 	if (parse_status_flags(ctx, hdr, mbox_status_flags))
105 		ctx->mail.status_broken = TRUE;
106 	ctx->hdr_pos[MBOX_HDR_STATUS] = str_len(ctx->header);
107 	return TRUE;
108 }
109 
parse_x_status(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)110 static bool parse_x_status(struct mbox_sync_mail_context *ctx,
111 			   struct message_header_line *hdr)
112 {
113 	if (parse_status_flags(ctx, hdr, mbox_xstatus_flags))
114 		ctx->mail.xstatus_broken = TRUE;
115 	ctx->hdr_pos[MBOX_HDR_X_STATUS] = str_len(ctx->header);
116 	return TRUE;
117 }
118 
119 static void
parse_imap_keywords_list(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr,size_t pos)120 parse_imap_keywords_list(struct mbox_sync_mail_context *ctx,
121                          struct message_header_line *hdr, size_t pos)
122 {
123 	struct mailbox *box = &ctx->sync_ctx->mbox->box;
124 	struct index_mailbox_context *ibox = INDEX_STORAGE_CONTEXT(box);
125 	const char *keyword, *error;
126 	size_t keyword_start;
127 	unsigned int idx, count;
128 
129 	count = 0;
130 	while (pos < hdr->full_value_len) {
131 		if (IS_LWSP_LF(hdr->full_value[pos])) {
132                         pos++;
133 			continue;
134 		}
135 
136 		/* read the keyword */
137 		keyword_start = pos;
138 		for (; pos < hdr->full_value_len; pos++) {
139 			if (IS_LWSP_LF(hdr->full_value[pos]))
140 				break;
141 		}
142 
143 		/* add it to index's keyword list if it's not there already */
144 		keyword = t_strndup(hdr->full_value + keyword_start,
145 				    pos - keyword_start);
146 		if (mailbox_keyword_is_valid(&ctx->sync_ctx->mbox->box,
147 					     keyword, &error)) {
148 			mail_index_keyword_lookup_or_create(box->index,
149 							    keyword, &idx);
150 		}
151 		count++;
152 	}
153 
154 	if (count != array_count(ibox->keyword_names)) {
155 		/* need to update this list */
156 		ctx->imapbase_rewrite = TRUE;
157 		ctx->need_rewrite = TRUE;
158 	}
159 }
160 
parse_x_imap_base(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)161 static bool parse_x_imap_base(struct mbox_sync_mail_context *ctx,
162 			      struct message_header_line *hdr)
163 {
164 	size_t i, j, uid_last_pos;
165 	uint32_t uid_validity, uid_last;
166 
167 	if (ctx->seq != 1 || ctx->seen_imapbase ||
168 	    ctx->sync_ctx->renumber_uids) {
169 		/* Valid only in first message */
170 		return FALSE;
171 	}
172 
173 	/* <uid-validity> 10x<uid-last> */
174 	for (i = 0, uid_validity = 0; i < hdr->full_value_len; i++) {
175 		if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9') {
176 			if (hdr->full_value[i] != ' ')
177 				return FALSE;
178 			break;
179 		}
180 		uid_validity = uid_validity * 10 + (hdr->full_value[i] - '0');
181 	}
182 
183 	if (uid_validity == 0) {
184 		/* broken */
185 		return FALSE;
186 	}
187 
188 	for (; i < hdr->full_value_len; i++) {
189 		if (!IS_LWSP_LF(hdr->full_value[i]))
190 			break;
191 	}
192 	uid_last_pos = i;
193 
194 	for (uid_last = 0, j = 0; i < hdr->full_value_len; i++, j++) {
195 		if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9') {
196 			if (!IS_LWSP_LF(hdr->full_value[i]))
197 				return FALSE;
198 			break;
199 		}
200 		uid_last = uid_last * 10 + (hdr->full_value[i] - '0');
201 	}
202 
203 	if (j != 10 ||
204 	    hdr->full_value_offset != ctx->hdr_offset + str_len(ctx->header)) {
205 		/* uid-last field must be exactly 10 characters to make
206 		   rewriting it easier. also don't try to do this if some
207 		   headers have been removed */
208 		ctx->imapbase_rewrite = TRUE;
209 		ctx->need_rewrite = TRUE;
210 	} else {
211 		ctx->last_uid_value_start_pos = uid_last_pos;
212 		ctx->sync_ctx->base_uid_last_offset =
213 			hdr->full_value_offset + uid_last_pos;
214 	}
215 
216 	if (ctx->sync_ctx->base_uid_validity == 0) {
217 		/* first time parsing this (ie. we're not rewriting).
218 		   save the values. */
219 		ctx->sync_ctx->base_uid_validity = uid_validity;
220 		ctx->sync_ctx->base_uid_last = uid_last;
221 
222 		if (ctx->sync_ctx->next_uid-1 <= uid_last) {
223 			/* new messages have been added since our last sync.
224 			   just update our internal next_uid. */
225 			ctx->sync_ctx->next_uid = uid_last+1;
226 		} else {
227 			/* we need to rewrite the next-uid */
228 			ctx->need_rewrite = TRUE;
229 		}
230 		i_assert(ctx->sync_ctx->next_uid > ctx->sync_ctx->prev_msg_uid);
231 	}
232 
233 	ctx->hdr_pos[MBOX_HDR_X_IMAPBASE] = str_len(ctx->header);
234 	ctx->seen_imapbase = TRUE;
235 
236 	T_BEGIN {
237 		parse_imap_keywords_list(ctx, hdr, i);
238 	} T_END;
239 	parse_trailing_whitespace(ctx, hdr);
240 	return TRUE;
241 }
242 
parse_x_imap(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)243 static bool parse_x_imap(struct mbox_sync_mail_context *ctx,
244 			 struct message_header_line *hdr)
245 {
246 	if (!parse_x_imap_base(ctx, hdr))
247 		return FALSE;
248 
249 	/* this is the c-client style "FOLDER INTERNAL DATA" message.
250 	   skip it. */
251 	ctx->mail.pseudo = TRUE;
252 	return TRUE;
253 }
254 
parse_x_keywords_real(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)255 static bool parse_x_keywords_real(struct mbox_sync_mail_context *ctx,
256 				  struct message_header_line *hdr)
257 {
258 	struct mailbox *box = &ctx->sync_ctx->mbox->box;
259 	ARRAY_TYPE(keyword_indexes) keyword_list;
260 	const unsigned int *list;
261 	string_t *keyword;
262 	size_t keyword_start;
263 	unsigned int i, idx, count;
264 	size_t pos;
265 
266 	if (array_is_created(&ctx->mail.keywords))
267 		return FALSE; /* duplicate header, delete */
268 
269 	/* read keyword indexes to temporary array first */
270 	keyword = t_str_new(128);
271 	t_array_init(&keyword_list, 16);
272 
273 	for (pos = 0; pos < hdr->full_value_len; ) {
274 		if (IS_LWSP_LF(hdr->full_value[pos])) {
275                         pos++;
276 			continue;
277 		}
278 
279 		/* read the keyword string */
280 		keyword_start = pos;
281 		for (; pos < hdr->full_value_len; pos++) {
282 			if (IS_LWSP_LF(hdr->full_value[pos]))
283 				break;
284 		}
285 
286 		str_truncate(keyword, 0);
287 		str_append_data(keyword, hdr->full_value + keyword_start,
288 				pos - keyword_start);
289 		if (!mail_index_keyword_lookup(box->index, str_c(keyword),
290 					       &idx)) {
291 			/* keyword wasn't found. that means the sent mail
292 			   originally contained X-Keywords header. Delete it. */
293 			return FALSE;
294 		}
295 
296 		/* check that the keyword isn't already added there.
297 		   we don't want duplicates. */
298 		list = array_get(&keyword_list, &count);
299 		for (i = 0; i < count; i++) {
300 			if (list[i] == idx)
301 				break;
302 		}
303 
304 		if (i == count)
305 			array_push_back(&keyword_list, &idx);
306 	}
307 
308 	/* once we know how many keywords there are, we can allocate the array
309 	   from mail_keyword_pool without wasting memory. */
310 	if (array_count(&keyword_list) > 0) {
311 		p_array_init(&ctx->mail.keywords,
312 			     ctx->sync_ctx->mail_keyword_pool,
313 			     array_count(&keyword_list));
314 		array_append_array(&ctx->mail.keywords, &keyword_list);
315 	}
316 
317 	ctx->hdr_pos[MBOX_HDR_X_KEYWORDS] = str_len(ctx->header);
318 	parse_trailing_whitespace(ctx, hdr);
319 	return TRUE;
320 }
321 
parse_x_keywords(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)322 static bool parse_x_keywords(struct mbox_sync_mail_context *ctx,
323 			     struct message_header_line *hdr)
324 {
325 	bool ret;
326 
327 	T_BEGIN {
328 		ret = parse_x_keywords_real(ctx, hdr);
329 	} T_END;
330 	return ret;
331 }
332 
parse_x_uid(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)333 static bool parse_x_uid(struct mbox_sync_mail_context *ctx,
334 			struct message_header_line *hdr)
335 {
336 	uint32_t value = 0;
337 	size_t i;
338 
339 	if (ctx->mail.uid != 0) {
340 		/* duplicate */
341 		return FALSE;
342 	}
343 
344 	for (i = 0; i < hdr->full_value_len; i++) {
345 		if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9')
346 			break;
347 		value = value*10 + (hdr->full_value[i] - '0');
348 	}
349 
350 	for (; i < hdr->full_value_len; i++) {
351 		if (!IS_LWSP_LF(hdr->full_value[i])) {
352 			/* broken value */
353 			return FALSE;
354 		}
355 	}
356 
357 	if (ctx->sync_ctx == NULL) {
358 		/* we're in mbox_sync_parse_match_mail().
359 		   don't do any extra checks. */
360 		ctx->mail.uid = value;
361 		return TRUE;
362 	}
363 
364 	if (ctx->seq == 1 && !ctx->seen_imapbase) {
365 		/* Don't bother allowing X-UID before X-IMAPbase
366 		   header. c-client doesn't allow it either, and this
367 		   way the UID doesn't have to be reset if X-IMAPbase
368 		   header isn't what we expect it to be. */
369 		return FALSE;
370 	}
371 
372 	if (value == ctx->sync_ctx->next_uid) {
373 		/* X-UID is the next expected one. allow it because
374 		   we'd just use this UID anyway. X-IMAPbase header
375 		   still needs to be updated for this. */
376 		ctx->sync_ctx->next_uid++;
377 	} else if (value > ctx->sync_ctx->next_uid) {
378 		/* UID is larger than expected. Don't allow it because
379 		   incoming mails can contain untrusted X-UID fields,
380 		   causing possibly DoS if the UIDs get large enough. */
381 		ctx->mail.uid_broken = TRUE;
382 		return FALSE;
383 	}
384 
385 	if (value <= ctx->sync_ctx->prev_msg_uid) {
386 		/* broken - UIDs must be growing */
387 		ctx->mail.uid_broken = TRUE;
388 		return FALSE;
389 	}
390 
391 	ctx->mail.uid = value;
392 	/* if we had multiple X-UID headers, we could have
393 	   uid_broken=TRUE here. */
394 	ctx->mail.uid_broken = FALSE;
395 
396 	if (ctx->sync_ctx->dest_first_mail && ctx->seq != 1) {
397 		/* if we're expunging the first mail, delete this header since
398 		   otherwise X-IMAPbase header would be added after this, which
399 		   we don't like */
400 		return FALSE;
401 	}
402 
403 	ctx->hdr_pos[MBOX_HDR_X_UID] = str_len(ctx->header);
404 	ctx->parsed_uid = value;
405 	parse_trailing_whitespace(ctx, hdr);
406 	return TRUE;
407 }
408 
parse_content_length(struct mbox_sync_mail_context * ctx,struct message_header_line * hdr)409 static bool parse_content_length(struct mbox_sync_mail_context *ctx,
410 				 struct message_header_line *hdr)
411 {
412 	uoff_t value = 0;
413 	size_t i;
414 
415 	if (ctx->content_length != UOFF_T_MAX) {
416 		/* duplicate */
417 		return FALSE;
418 	}
419 
420 	for (i = 0; i < hdr->full_value_len; i++) {
421 		if (hdr->full_value[i] < '0' || hdr->full_value[i] > '9')
422 			break;
423 		value = value*10 + (hdr->full_value[i] - '0');
424 	}
425 
426 	for (; i < hdr->full_value_len; i++) {
427 		if (!IS_LWSP_LF(hdr->full_value[i])) {
428 			/* broken value */
429 			return FALSE;
430 		}
431 	}
432 
433 	ctx->content_length = value;
434 	return TRUE;
435 }
436 
437 static struct mbox_sync_header_func header_funcs[] = {
438 	{ "Content-Length", parse_content_length },
439 	{ "Status", parse_status },
440 	{ "X-IMAP", parse_x_imap },
441 	{ "X-IMAPbase", parse_x_imap_base },
442 	{ "X-Keywords", parse_x_keywords },
443 	{ "X-Status", parse_x_status },
444 	{ "X-UID", parse_x_uid }
445 };
446 
mbox_sync_bsearch_header_func_cmp(const void * p1,const void * p2)447 static int mbox_sync_bsearch_header_func_cmp(const void *p1, const void *p2)
448 {
449 	const char *key = p1;
450 	const struct mbox_sync_header_func *func = p2;
451 
452 	return strcasecmp(key, func->header);
453 }
454 
mbox_sync_parse_next_mail(struct istream * input,struct mbox_sync_mail_context * ctx)455 int mbox_sync_parse_next_mail(struct istream *input,
456 			      struct mbox_sync_mail_context *ctx)
457 {
458 	struct mbox_sync_context *sync_ctx = ctx->sync_ctx;
459 	struct message_header_parser_ctx *hdr_ctx;
460 	struct message_header_line *hdr;
461 	struct mbox_sync_header_func *func;
462 	struct mbox_md5_context *mbox_md5_ctx;
463 	size_t line_start_pos;
464 	int i, ret;
465 
466 	ctx->hdr_offset = ctx->mail.offset;
467 	ctx->mail.flags = MAIL_RECENT; /* default to having recent flag */
468 
469         ctx->header_first_change = SIZE_MAX;
470 	ctx->header_last_change = 0;
471 
472 	for (i = 0; i < MBOX_HDR_COUNT; i++)
473 		ctx->hdr_pos[i] = SIZE_MAX;
474 
475 	ctx->content_length = UOFF_T_MAX;
476 	str_truncate(ctx->header, 0);
477 
478         mbox_md5_ctx = ctx->sync_ctx->mbox->md5_v.init();
479 
480         line_start_pos = 0;
481 	hdr_ctx = message_parse_header_init(input, NULL, 0);
482 	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
483 		if (hdr->eoh) {
484 			ctx->have_eoh = TRUE;
485 			break;
486 		}
487 
488 		if (!hdr->continued) {
489 			line_start_pos = str_len(ctx->header);
490 			str_append(ctx->header, hdr->name);
491 			str_append_data(ctx->header, hdr->middle, hdr->middle_len);
492 		}
493 
494 		func = bsearch(hdr->name, header_funcs,
495 			       N_ELEMENTS(header_funcs), sizeof(*header_funcs),
496 			       mbox_sync_bsearch_header_func_cmp);
497 
498 		if (func != NULL) {
499 			if (hdr->continues) {
500 				hdr->use_full_value = TRUE;
501 				continue;
502 			}
503 
504 			if (!func->func(ctx, hdr)) {
505 				/* this header is broken, remove it */
506 				ctx->need_rewrite = TRUE;
507 				str_truncate(ctx->header, line_start_pos);
508 				if (ctx->header_first_change == SIZE_MAX) {
509 					ctx->header_first_change =
510 						line_start_pos;
511 				}
512 				continue;
513 			}
514 			buffer_append(ctx->header, hdr->full_value,
515 				      hdr->full_value_len);
516 		} else {
517 			ctx->sync_ctx->mbox->md5_v.more(mbox_md5_ctx, hdr);
518 			buffer_append(ctx->header, hdr->value,
519 				      hdr->value_len);
520 		}
521 		if (!hdr->no_newline) {
522 			if (hdr->crlf_newline)
523 				str_append_c(ctx->header, '\r');
524 			str_append_c(ctx->header, '\n');
525 		}
526 	}
527 	i_assert(ret != 0);
528 	message_parse_header_deinit(&hdr_ctx);
529 
530 	ctx->sync_ctx->mbox->md5_v.finish(mbox_md5_ctx, ctx->hdr_md5_sum);
531 
532 	if ((ctx->seq == 1 && !ctx->seen_imapbase) ||
533 	    (ctx->seq > 1 && sync_ctx->dest_first_mail)) {
534 		/* missing X-IMAPbase */
535 		ctx->need_rewrite = TRUE;
536 		if (sync_ctx->base_uid_validity == 0) {
537 			/* figure out a new UIDVALIDITY for us. */
538 			sync_ctx->base_uid_validity =
539 				sync_ctx->hdr->uid_validity != 0 &&
540 				!sync_ctx->renumber_uids ?
541 				sync_ctx->hdr->uid_validity :
542 				I_MAX((uint32_t)ioloop_time, 1);
543 		}
544 	}
545 
546 	ctx->body_offset = input->v_offset;
547 	if (input->stream_errno != 0) {
548 		mbox_sync_set_critical(ctx->sync_ctx, "read(%s) failed: %s",
549 			i_stream_get_name(input), i_stream_get_error(input));
550 		return -1;
551 	}
552 	return 0;
553 }
554 
mbox_sync_parse_match_mail(struct mbox_mailbox * mbox,struct mail_index_view * view,uint32_t seq)555 bool mbox_sync_parse_match_mail(struct mbox_mailbox *mbox,
556 				struct mail_index_view *view, uint32_t seq)
557 {
558         struct mbox_sync_mail_context ctx;
559 	struct message_header_parser_ctx *hdr_ctx;
560 	struct message_header_line *hdr;
561 	struct header_func *func;
562 	struct mbox_md5_context *mbox_md5_ctx;
563 	const void *data;
564 	bool expunged;
565 	uint32_t uid;
566 	int ret;
567 
568 	/* we only wish to be sure that this mail actually is what we expect
569 	   it to be. If there's X-UID header and it matches our UID, we use it.
570 	   Otherwise it could mean that the X-UID header is invalid and it's
571 	   just not yet been rewritten. In that case use MD5 sum, if it
572 	   exists. */
573 
574 	mail_index_lookup_uid(view, seq, &uid);
575 	i_zero(&ctx);
576         mbox_md5_ctx = mbox->md5_v.init();
577 
578 	hdr_ctx = message_parse_header_init(mbox->mbox_stream, NULL, 0);
579 	while ((ret = message_parse_header_next(hdr_ctx, &hdr)) > 0) {
580 		if (hdr->eoh)
581 			break;
582 
583 		func = bsearch(hdr->name, header_funcs,
584 			       N_ELEMENTS(header_funcs), sizeof(*header_funcs),
585 			       mbox_sync_bsearch_header_func_cmp);
586 		if (func != NULL) {
587 			if (strcasecmp(hdr->name, "X-UID") == 0) {
588 				if (hdr->continues) {
589 					hdr->use_full_value = TRUE;
590 					continue;
591 				}
592 				(void)parse_x_uid(&ctx, hdr);
593 
594 				if (ctx.mail.uid == uid)
595 					break;
596 			}
597 		} else {
598 			mbox->md5_v.more(mbox_md5_ctx, hdr);
599 		}
600 	}
601 	i_assert(ret != 0);
602 	message_parse_header_deinit(&hdr_ctx);
603 
604 	mbox->md5_v.finish(mbox_md5_ctx, ctx.hdr_md5_sum);
605 
606 	if (ctx.mail.uid == uid)
607 		return TRUE;
608 
609 	/* match by MD5 sum */
610 	mbox->mbox_save_md5 = TRUE;
611 
612 	mail_index_lookup_ext(view, seq, mbox->md5hdr_ext_idx,
613 			      &data, &expunged);
614 	return data == NULL ? 0 :
615 		memcmp(data, ctx.hdr_md5_sum, 16) == 0;
616 }
617