1 /* Copyright (c) 2003-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "lib.h"
4 #include "istream.h"
5 #include "array.h"
6 #include "buffer.h"
7 #include "str.h"
8 #include "message-date.h"
9 #include "message-part-data.h"
10 #include "message-parser.h"
11 #include "message-header-decode.h"
12 #include "istream-tee.h"
13 #include "istream-header-filter.h"
14 #include "imap-envelope.h"
15 #include "imap-bodystructure.h"
16 #include "index-storage.h"
17 #include "index-mail.h"
18 
19 static const struct message_parser_settings msg_parser_set = {
20 	.hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP |
21 		MESSAGE_HEADER_PARSER_FLAG_DROP_CR,
22 	.flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK,
23 };
24 
25 static void index_mail_filter_stream_destroy(struct index_mail *mail);
26 
header_line_cmp(const struct index_mail_line * l1,const struct index_mail_line * l2)27 static int header_line_cmp(const struct index_mail_line *l1,
28 			   const struct index_mail_line *l2)
29 {
30 	int diff;
31 
32 	diff = (int)l1->field_idx - (int)l2->field_idx;
33 	return diff != 0 ? diff :
34 		(int)l1->line_num - (int)l2->line_num;
35 }
36 
index_mail_parse_header_deinit(struct index_mail * mail)37 void index_mail_parse_header_deinit(struct index_mail *mail)
38 {
39 	mail->data.header_parser_initialized = FALSE;
40 }
41 
index_mail_parse_header_finish(struct index_mail * mail)42 static void index_mail_parse_header_finish(struct index_mail *mail)
43 {
44 	struct mail *_mail = &mail->mail.mail;
45 	const struct index_mail_line *lines;
46 	const unsigned char *header;
47 	const uint8_t *match;
48 	buffer_t *buf;
49 	unsigned int i, j, count, match_idx, match_count;
50 	bool noncontiguous;
51 
52 	/* sort it first so fields are grouped together and ordered by
53 	   line number */
54 	array_sort(&mail->header_lines, header_line_cmp);
55 
56 	lines = array_get(&mail->header_lines, &count);
57 	match = array_get(&mail->header_match, &match_count);
58 	header = mail->header_data->data;
59 	buf = t_buffer_create(256);
60 
61 	/* go through all the header lines we found */
62 	for (i = match_idx = 0; i < count; i = j) {
63 		/* matches and header lines are both sorted, all matches
64 		   until lines[i] weren't found */
65 		while (match_idx < lines[i].field_idx &&
66 		       match_idx < match_count) {
67 			if (HEADER_MATCH_USABLE(mail, match[match_idx]) &&
68 			    mail_cache_field_can_add(_mail->transaction->cache_trans,
69 						     _mail->seq, match_idx)) {
70 				/* this header doesn't exist. remember that. */
71 				i_assert((match[match_idx] &
72 					  HEADER_MATCH_FLAG_FOUND) == 0);
73 				index_mail_cache_add_idx(mail, match_idx,
74 							 "", 0);
75 			}
76 			match_idx++;
77 		}
78 
79 		if (match_idx < match_count) {
80 			/* save index to first header line */
81 			i_assert(match_idx == lines[i].field_idx);
82 			j = i + 1;
83 			array_idx_set(&mail->header_match_lines, match_idx, &j);
84 			match_idx++;
85 		}
86 
87 		if (!mail_cache_field_can_add(_mail->transaction->cache_trans,
88 					      _mail->seq, lines[i].field_idx)) {
89 			/* header is already cached. skip over all the
90 			   header lines. */
91 			for (j = i+1; j < count; j++) {
92 				if (lines[j].field_idx != lines[i].field_idx)
93 					break;
94 			}
95 			continue;
96 		}
97 
98 		/* buffer contains: { uint32_t line_num[], 0, header texts }
99 		   noncontiguous is just a small optimization.. */
100 		buffer_set_used_size(buf, 0);
101 		buffer_append(buf, &lines[i].line_num,
102 			      sizeof(lines[i].line_num));
103 
104 		noncontiguous = FALSE;
105 		for (j = i+1; j < count; j++) {
106 			if (lines[j].field_idx != lines[i].field_idx)
107 				break;
108 
109 			if (lines[j].start_pos != lines[j-1].end_pos)
110 				noncontiguous = TRUE;
111 			buffer_append(buf, &lines[j].line_num,
112 				      sizeof(lines[j].line_num));
113 		}
114 		buffer_append_zero(buf, sizeof(uint32_t));
115 
116 		if (noncontiguous) {
117 			for (; i < j; i++) {
118 				buffer_append(buf, header + lines[i].start_pos,
119 					      lines[i].end_pos -
120 					      lines[i].start_pos);
121 			}
122 			i--;
123 		} else {
124 			buffer_append(buf, header + lines[i].start_pos,
125 				      lines[j-1].end_pos - lines[i].start_pos);
126 		}
127 
128 		index_mail_cache_add_idx(mail, lines[i].field_idx,
129 					 buf->data, buf->used);
130 	}
131 
132 	for (; match_idx < match_count; match_idx++) {
133 		if (HEADER_MATCH_USABLE(mail, match[match_idx]) &&
134 		    mail_cache_field_can_add(_mail->transaction->cache_trans,
135 					     _mail->seq, match_idx)) {
136 			/* this header doesn't exist. remember that. */
137 			i_assert((match[match_idx] &
138 				  HEADER_MATCH_FLAG_FOUND) == 0);
139 			index_mail_cache_add_idx(mail, match_idx, "", 0);
140 		}
141 	}
142 
143 	mail->data.dont_cache_field_idx = UINT_MAX;
144 	index_mail_parse_header_deinit(mail);
145 }
146 
147 static unsigned int
get_header_field_idx(struct mailbox * box,const char * field)148 get_header_field_idx(struct mailbox *box, const char *field)
149 {
150 	struct mail_cache_field header_field;
151 
152 	i_zero(&header_field);
153 	header_field.type = MAIL_CACHE_FIELD_HEADER;
154 	/* Always register with NO decision. The field should be added soon
155 	   with mail_cache_add(), which changes the decision to TEMP. Most
156 	   importantly doing it this way emits mail_cache_decision event. */
157 	header_field.decision = MAIL_CACHE_DECISION_NO;
158 	T_BEGIN {
159 		header_field.name = t_strconcat("hdr.", field, NULL);
160 		mail_cache_register_fields(box->cache, &header_field, 1);
161 	} T_END;
162 	return header_field.idx;
163 }
164 
index_mail_want_parse_headers(struct index_mail * mail)165 bool index_mail_want_parse_headers(struct index_mail *mail)
166 {
167 	if (mail->data.wanted_headers != NULL ||
168 	    mail->data.save_bodystructure_header)
169 		return TRUE;
170 
171 	if ((mail->data.cache_fetch_fields & MAIL_FETCH_DATE) != 0 &&
172 	    !mail->data.sent_date_parsed)
173 		return TRUE;
174 	return FALSE;
175 }
176 
index_mail_parse_header_register_all_wanted(struct index_mail * mail)177 static void index_mail_parse_header_register_all_wanted(struct index_mail *mail)
178 {
179 	struct mail *_mail = &mail->mail.mail;
180 	const struct mail_cache_field *all_cache_fields;
181 	unsigned int i, count;
182 
183 	all_cache_fields =
184 		mail_cache_register_get_list(_mail->box->cache,
185 					     pool_datastack_create(), &count);
186 	for (i = 0; i < count; i++) {
187 		if (strncasecmp(all_cache_fields[i].name, "hdr.", 4) != 0)
188 			continue;
189 		if (!mail_cache_field_want_add(_mail->transaction->cache_trans,
190 					       _mail->seq, i))
191 			continue;
192 
193 		array_idx_set(&mail->header_match, all_cache_fields[i].idx,
194 			      &mail->header_match_value);
195 	}
196 }
197 
index_mail_parse_header_init(struct index_mail * mail,struct mailbox_header_lookup_ctx * headers)198 void index_mail_parse_header_init(struct index_mail *mail,
199 				  struct mailbox_header_lookup_ctx *headers)
200 {
201 	struct index_mail_data *data = &mail->data;
202 	const uint8_t *match;
203 	unsigned int i, field_idx, match_count;
204 
205 	index_mail_filter_stream_destroy(mail);
206 	i_assert(!mail->data.header_parser_initialized);
207 
208 	mail->header_seq = data->seq;
209 	if (mail->header_data == NULL) {
210 		mail->header_data = buffer_create_dynamic(default_pool, 4096);
211 		i_array_init(&mail->header_lines, 32);
212 		i_array_init(&mail->header_match, 32);
213 		i_array_init(&mail->header_match_lines, 32);
214 		mail->header_match_value = HEADER_MATCH_SKIP_COUNT;
215 	} else {
216 		buffer_set_used_size(mail->header_data, 0);
217 		array_clear(&mail->header_lines);
218 		array_clear(&mail->header_match_lines);
219 
220 		i_assert((mail->header_match_value &
221 			  (HEADER_MATCH_SKIP_COUNT-1)) == 0);
222 		if (mail->header_match_value + HEADER_MATCH_SKIP_COUNT <= UINT8_MAX)
223 			mail->header_match_value += HEADER_MATCH_SKIP_COUNT;
224 		else {
225 			/* wrapped, we'll have to clear the buffer */
226 			array_clear(&mail->header_match);
227 			mail->header_match_value = HEADER_MATCH_SKIP_COUNT;
228 		}
229 	}
230 
231 	if (headers != NULL) {
232 		for (i = 0; i < headers->count; i++) {
233 			array_idx_set(&mail->header_match, headers->idx[i],
234 				      &mail->header_match_value);
235 		}
236 	}
237 
238 	if (data->wanted_headers != NULL && data->wanted_headers != headers) {
239 		headers = data->wanted_headers;
240 		for (i = 0; i < headers->count; i++) {
241 			array_idx_set(&mail->header_match, headers->idx[i],
242 				      &mail->header_match_value);
243 		}
244 	}
245 
246 	/* register also all the other headers that exist in cache file */
247 	T_BEGIN {
248 		index_mail_parse_header_register_all_wanted(mail);
249 	} T_END;
250 
251 	/* if we want sent date, it doesn't mean that we also want to cache
252 	   Date: header. if we have Date field's index set at this point we
253 	   know that we want it. otherwise add it and remember that we don't
254 	   want it cached. */
255 	field_idx = get_header_field_idx(mail->mail.mail.box, "Date");
256 	match = array_get(&mail->header_match, &match_count);
257 	if (field_idx < match_count &&
258 	    match[field_idx] == mail->header_match_value) {
259 		/* cache Date: header */
260 	} else if ((data->cache_fetch_fields & MAIL_FETCH_DATE) != 0 ||
261 		   data->save_sent_date) {
262 		/* parse Date: header, but don't cache it. */
263 		data->dont_cache_field_idx = field_idx;
264 		array_idx_set(&mail->header_match, field_idx,
265 			      &mail->header_match_value);
266 	}
267 	mail->data.header_parser_initialized = TRUE;
268 	mail->data.parse_line_num = 0;
269 	i_zero(&mail->data.parse_line);
270 }
271 
index_mail_parse_finish_imap_envelope(struct index_mail * mail)272 static void index_mail_parse_finish_imap_envelope(struct index_mail *mail)
273 {
274 	struct mail *_mail = &mail->mail.mail;
275 	const unsigned int cache_field_envelope =
276 		mail->ibox->cache_fields[MAIL_CACHE_IMAP_ENVELOPE].idx;
277 	string_t *str;
278 
279 	str = str_new(mail->mail.data_pool, 256);
280 	imap_envelope_write(mail->data.envelope_data, str);
281 	mail->data.envelope = str_c(str);
282 	mail->data.save_envelope = FALSE;
283 
284 	if (mail_cache_field_can_add(_mail->transaction->cache_trans,
285 				     _mail->seq, cache_field_envelope)) {
286 		index_mail_cache_add_idx(mail, cache_field_envelope,
287 					 str_data(str), str_len(str));
288 	}
289 }
290 
index_mail_parse_header(struct message_part * part,struct message_header_line * hdr,struct index_mail * mail)291 void index_mail_parse_header(struct message_part *part,
292 			     struct message_header_line *hdr,
293 			     struct index_mail *mail)
294 {
295 	struct mail *_mail = &mail->mail.mail;
296 	struct index_mail_data *data = &mail->data;
297 	unsigned int field_idx, count;
298 	uint8_t *match;
299 
300 	i_assert(data->header_parser_initialized);
301 
302         data->parse_line_num++;
303 
304 	if (data->save_bodystructure_header &&
305 	    !data->parsed_bodystructure_header) {
306 		i_assert(part != NULL);
307 		message_part_data_parse_from_header(mail->mail.data_pool, part, hdr);
308 	}
309 
310 	if (data->save_envelope) {
311 		message_part_envelope_parse_from_header(mail->mail.data_pool,
312 					   &data->envelope_data, hdr);
313 
314 		if (hdr == NULL)
315                         index_mail_parse_finish_imap_envelope(mail);
316 	}
317 
318 	if (hdr == NULL) {
319 		/* end of headers */
320 		if (mail->data.save_sent_date)
321 			mail->data.sent_date_parsed = TRUE;
322 		T_BEGIN {
323 			index_mail_parse_header_finish(mail);
324 		} T_END;
325 		if (data->save_bodystructure_header) {
326 			i_assert(data->parser_ctx != NULL);
327 			data->parsed_bodystructure_header = TRUE;
328 		}
329 		return;
330 	}
331 
332 	if (!hdr->continued) {
333 		T_BEGIN {
334 			const char *cache_field_name =
335 				t_strconcat("hdr.", hdr->name, NULL);
336 			data->parse_line.field_idx =
337 				mail_cache_register_lookup(_mail->box->cache,
338 							   cache_field_name);
339 		} T_END;
340 	}
341 	field_idx = data->parse_line.field_idx;
342 	match = array_get_modifiable(&mail->header_match, &count);
343 	if (field_idx >= count ||
344 	    !HEADER_MATCH_USABLE(mail, match[field_idx])) {
345 		/* we don't want this header. */
346 		return;
347 	}
348 
349 	if (!hdr->continued) {
350 		/* beginning of a line. add the header name. */
351 		data->parse_line.start_pos = str_len(mail->header_data);
352 		data->parse_line.line_num = data->parse_line_num;
353 		str_append(mail->header_data, hdr->name);
354 		str_append_data(mail->header_data, hdr->middle, hdr->middle_len);
355 
356 		/* remember that we saw this header so we don't add it to
357 		   cache as nonexistent. */
358 		match[field_idx] |= HEADER_MATCH_FLAG_FOUND;
359 	}
360 	str_append_data(mail->header_data, hdr->value, hdr->value_len);
361 	if (!hdr->no_newline)
362 		str_append(mail->header_data, "\n");
363 	if (!hdr->continues) {
364 		data->parse_line.end_pos = str_len(mail->header_data);
365 		array_push_back(&mail->header_lines, &data->parse_line);
366 	}
367 }
368 
369 static void
index_mail_parse_part_header_cb(struct message_part * part,struct message_header_line * hdr,struct index_mail * mail)370 index_mail_parse_part_header_cb(struct message_part *part,
371 				struct message_header_line *hdr,
372 				struct index_mail *mail)
373 {
374 	index_mail_parse_header(part, hdr, mail);
375 }
376 
377 static void
index_mail_parse_header_cb(struct message_header_line * hdr,struct index_mail * mail)378 index_mail_parse_header_cb(struct message_header_line *hdr,
379 			   struct index_mail *mail)
380 {
381 	index_mail_parse_header(mail->data.parts, hdr, mail);
382 }
383 
384 struct istream *
index_mail_cache_parse_init(struct mail * _mail,struct istream * input)385 index_mail_cache_parse_init(struct mail *_mail, struct istream *input)
386 {
387 	struct index_mail *mail = INDEX_MAIL(_mail);
388 	struct istream *input2;
389 
390 	i_assert(mail->data.tee_stream == NULL);
391 	i_assert(mail->data.parser_ctx == NULL);
392 
393 	/* we're doing everything for now, figure out later if we want to
394 	   save them. */
395 	mail->data.save_sent_date = TRUE;
396 	mail->data.save_bodystructure_header = TRUE;
397 	mail->data.save_bodystructure_body = TRUE;
398 	/* Don't unnecessarily waste time generating a snippet, since it's
399 	   not as cheap as the others to generate. */
400 	if (index_mail_want_cache(mail, MAIL_CACHE_BODY_SNIPPET))
401 		mail->data.save_body_snippet = TRUE;
402 
403 	mail->data.tee_stream = tee_i_stream_create(input);
404 	input = tee_i_stream_create_child(mail->data.tee_stream);
405 	input2 = tee_i_stream_create_child(mail->data.tee_stream);
406 
407 	index_mail_parse_header_init(mail, NULL);
408 	mail->data.parser_input = input;
409 	mail->data.parser_ctx =
410 		message_parser_init(mail->mail.data_pool, input,
411 				    &msg_parser_set);
412 	i_stream_unref(&input);
413 	return input2;
414 }
415 
index_mail_init_parser(struct index_mail * mail)416 static void index_mail_init_parser(struct index_mail *mail)
417 {
418 	struct index_mail_data *data = &mail->data;
419 	struct message_part *parts;
420 	const char *error;
421 
422 	if (data->parser_ctx != NULL) {
423 		data->parser_input = NULL;
424 		if (message_parser_deinit_from_parts(&data->parser_ctx, &parts, &error) < 0) {
425 			index_mail_set_message_parts_corrupted(&mail->mail.mail, error);
426 			data->parts = NULL;
427 		}
428 		if (data->parts == NULL || data->parts != parts) {
429 			/* The previous parsing didn't finish, so we're
430 			   re-parsing the header. The new parts don't have data
431 			   filled anymore. */
432 			data->parsed_bodystructure_header = FALSE;
433 		}
434 	}
435 
436 	/* make sure parsing starts from the beginning of the stream */
437 	i_stream_seek(mail->data.stream, 0);
438 	if (data->parts == NULL) {
439 		data->parser_input = data->stream;
440 		data->parser_ctx = message_parser_init(mail->mail.data_pool,
441 						       data->stream,
442 						       &msg_parser_set);
443 	} else {
444 		data->parser_ctx =
445 			message_parser_init_from_parts(data->parts,
446 						       data->stream,
447 						       &msg_parser_set);
448 	}
449 }
450 
index_mail_parse_headers_internal(struct index_mail * mail,struct mailbox_header_lookup_ctx * headers)451 int index_mail_parse_headers_internal(struct index_mail *mail,
452 				      struct mailbox_header_lookup_ctx *headers)
453 {
454 	struct index_mail_data *data = &mail->data;
455 
456 	i_assert(data->stream != NULL);
457 
458 	index_mail_parse_header_init(mail, headers);
459 
460 	if (data->parts == NULL || data->save_bodystructure_header ||
461 	    (data->access_part & PARSE_BODY) != 0) {
462 		/* initialize bodystructure parsing in case we read the whole
463 		   message. */
464 		index_mail_init_parser(mail);
465 		message_parser_parse_header(data->parser_ctx, &data->hdr_size,
466 					    index_mail_parse_part_header_cb,
467 					    mail);
468 	} else {
469 		/* just read the header */
470 		i_assert(!data->save_bodystructure_body ||
471 			 data->parser_ctx != NULL);
472 		message_parse_header(data->stream, &data->hdr_size,
473 				     msg_parser_set.hdr_flags,
474 				     index_mail_parse_header_cb, mail);
475 	}
476 	if (index_mail_stream_check_failure(mail) < 0) {
477 		index_mail_parse_header_deinit(mail);
478 		return -1;
479 	}
480 	i_assert(!mail->data.header_parser_initialized);
481 	data->hdr_size_set = TRUE;
482 	data->access_part &= ENUM_NEGATE(PARSE_HDR);
483 	return 0;
484 }
485 
index_mail_parse_headers(struct index_mail * mail,struct mailbox_header_lookup_ctx * headers,const char * reason)486 int index_mail_parse_headers(struct index_mail *mail,
487 			     struct mailbox_header_lookup_ctx *headers,
488 			     const char *reason)
489 {
490 	struct index_mail_data *data = &mail->data;
491 	struct istream *input;
492 	uoff_t old_offset;
493 
494 	old_offset = data->stream == NULL ? 0 : data->stream->v_offset;
495 
496 	if (mail_get_hdr_stream_because(&mail->mail.mail, NULL, reason, &input) < 0)
497 		return -1;
498 
499 	int ret = index_mail_parse_headers_internal(mail, headers);
500 	i_stream_seek(data->stream, old_offset);
501 	return ret;
502 }
503 
504 static void
imap_envelope_parse_callback(struct message_header_line * hdr,struct index_mail * mail)505 imap_envelope_parse_callback(struct message_header_line *hdr,
506 			     struct index_mail *mail)
507 {
508 	message_part_envelope_parse_from_header(mail->mail.data_pool,
509 				   &mail->data.envelope_data, hdr);
510 
511 	if (hdr == NULL)
512 		index_mail_parse_finish_imap_envelope(mail);
513 }
514 
index_mail_headers_get_envelope(struct index_mail * mail)515 int index_mail_headers_get_envelope(struct index_mail *mail)
516 {
517 	const unsigned int cache_field_envelope =
518 		mail->ibox->cache_fields[MAIL_CACHE_IMAP_ENVELOPE].idx;
519 	struct mailbox_header_lookup_ctx *header_ctx;
520 	struct istream *stream;
521 	uoff_t old_offset;
522 	string_t *str;
523 
524 	str = str_new(mail->mail.data_pool, 256);
525 	if (index_mail_cache_lookup_field(mail, str,
526 					  cache_field_envelope) > 0) {
527 		mail->data.envelope = str_c(str);
528 		return 0;
529 	}
530 	str_free(&str);
531 
532 	old_offset = mail->data.stream == NULL ? 0 :
533 		mail->data.stream->v_offset;
534 
535 	/* Make sure header_cache_callback() isn't also parsing the ENVELOPE.
536 	   Otherwise two callbacks are doing it and mixing up results. */
537 	mail->data.save_envelope = FALSE;
538 
539 	header_ctx = mailbox_header_lookup_init(mail->mail.mail.box,
540 						message_part_envelope_headers);
541 	if (mail_get_header_stream(&mail->mail.mail, header_ctx, &stream) < 0) {
542 		mailbox_header_lookup_unref(&header_ctx);
543 		return -1;
544 	}
545 	mailbox_header_lookup_unref(&header_ctx);
546 
547 	if (mail->data.envelope == NULL) {
548 		/* we got the headers from cache - parse them to get the
549 		   envelope */
550 		message_parse_header(stream, NULL, msg_parser_set.hdr_flags,
551 				     imap_envelope_parse_callback, mail);
552 		if (stream->stream_errno != 0) {
553 			index_mail_stream_log_failure_for(mail, stream);
554 			return -1;
555 		}
556 		i_assert(mail->data.envelope != NULL);
557 	}
558 
559 	if (mail->data.stream != NULL)
560 		i_stream_seek(mail->data.stream, old_offset);
561 	return 0;
562 }
563 
get_header_size(buffer_t * buffer,size_t pos)564 static size_t get_header_size(buffer_t *buffer, size_t pos)
565 {
566 	const unsigned char *data = buffer->data;
567 	size_t i, size = buffer->used;
568 
569 	i_assert(pos <= size);
570 
571 	for (i = pos; i < size; i++) {
572 		if (data[i] == '\n') {
573 			if (i+1 == size ||
574 			    (data[i+1] != ' ' && data[i+1] != '\t'))
575 				return i - pos;
576 		}
577 	}
578 	return size - pos;
579 }
580 
index_mail_header_is_parsed(struct index_mail * mail,unsigned int field_idx)581 static int index_mail_header_is_parsed(struct index_mail *mail,
582 				       unsigned int field_idx)
583 {
584 	const uint8_t *match;
585 	unsigned int count;
586 
587 	match = array_get(&mail->header_match, &count);
588 	if (field_idx < count && HEADER_MATCH_USABLE(mail, match[field_idx]))
589 		return (match[field_idx] & HEADER_MATCH_FLAG_FOUND) != 0 ? 1 : 0;
590 	return -1;
591 }
592 
skip_header(const unsigned char ** data,size_t len)593 static bool skip_header(const unsigned char **data, size_t len)
594 {
595 	const unsigned char *p = *data;
596 	size_t i;
597 
598 	for (i = 0; i < len; i++) {
599 		if (p[i] == ':')
600 			break;
601 	}
602 	if (i == len)
603 		return FALSE;
604 
605 	for (i++; i < len; i++) {
606 		if (!IS_LWSP(p[i]))
607 			break;
608 	}
609 
610 	*data = p + i;
611 	return TRUE;
612 }
613 
614 static const char *const *
index_mail_get_parsed_header(struct index_mail * mail,unsigned int field_idx)615 index_mail_get_parsed_header(struct index_mail *mail, unsigned int field_idx)
616 {
617 	ARRAY(const char *) header_values;
618         const struct index_mail_line *lines;
619 	const unsigned char *header, *value_start, *value_end;
620 	const unsigned int *line_idx;
621 	const char *value;
622 	unsigned int i, lines_count, first_line_idx;
623 
624 	line_idx = array_idx(&mail->header_match_lines, field_idx);
625 	i_assert(*line_idx != 0);
626 	first_line_idx = *line_idx - 1;
627 
628 	p_array_init(&header_values, mail->mail.data_pool, 4);
629 	header = mail->header_data->data;
630 
631 	lines = array_get(&mail->header_lines, &lines_count);
632 	for (i = first_line_idx; i < lines_count; i++) {
633 		if (lines[i].field_idx != lines[first_line_idx].field_idx)
634 			break;
635 
636 		/* skip header: and drop ending LF */
637 		value_start = header + lines[i].start_pos;
638 		value_end = header + lines[i].end_pos;
639 		if (skip_header(&value_start, value_end - value_start)) {
640 			if (value_start != value_end && value_end[-1] == '\n')
641 				value_end--;
642 			value = message_header_strdup(mail->mail.data_pool,
643 						      value_start,
644 						      value_end - value_start);
645 			array_push_back(&header_values, &value);
646 		}
647 	}
648 
649 	array_append_zero(&header_values);
650 	return array_front(&header_values);
651 }
652 
653 static int
index_mail_get_raw_headers(struct index_mail * mail,const char * field,const char * const ** value_r)654 index_mail_get_raw_headers(struct index_mail *mail, const char *field,
655 			   const char *const **value_r)
656 {
657 	struct mail *_mail = &mail->mail.mail;
658 	const char *headers[2], *value;
659 	struct mailbox_header_lookup_ctx *headers_ctx;
660 	const unsigned char *data;
661 	unsigned int field_idx;
662 	string_t *dest;
663 	size_t i, len, len2;
664 	int ret;
665 	ARRAY(const char *) header_values;
666 
667 	i_assert(field != NULL);
668 
669 	field_idx = get_header_field_idx(_mail->box, field);
670 
671 	dest = t_str_new(128);
672 	if (mail_cache_lookup_headers(_mail->transaction->cache_view, dest,
673 				      _mail->seq, &field_idx, 1) <= 0) {
674 		/* not in cache / error - first see if it's already parsed */
675 		p_free(mail->mail.data_pool, dest);
676 		if (mail->data.header_parser_initialized) {
677 			/* don't try to parse headers recursively. we're here
678 			   because message size was wrong and istream-mail
679 			   wants to log some cached headers. */
680 			i_assert(mail->mail.mail.lookup_abort >= MAIL_LOOKUP_ABORT_NOT_IN_CACHE);
681 			mail_set_aborted(&mail->mail.mail);
682 			return -1;
683 		}
684 		if (mail->header_seq != mail->data.seq ||
685 		    index_mail_header_is_parsed(mail, field_idx) < 0) {
686 			/* parse */
687 			const char *reason = index_mail_cache_reason(_mail,
688 				t_strdup_printf("header %s", field));
689 			headers[0] = field; headers[1] = NULL;
690 			headers_ctx = mailbox_header_lookup_init(_mail->box,
691 								 headers);
692 			ret = index_mail_parse_headers(mail, headers_ctx, reason);
693 			mailbox_header_lookup_unref(&headers_ctx);
694 			if (ret < 0)
695 				return -1;
696 		}
697 
698 		if ((ret = index_mail_header_is_parsed(mail, field_idx)) <= 0) {
699 			/* not found */
700 			i_assert(ret != -1);
701 			*value_r = p_new(mail->mail.data_pool, const char *, 1);
702 			return 0;
703 		}
704 		*value_r = index_mail_get_parsed_header(mail, field_idx);
705 		return 0;
706 	}
707 	_mail->transaction->stats.cache_hit_count++;
708 	data = buffer_get_data(dest, &len);
709 
710 	if (len == 0) {
711 		/* cached as nonexistent. */
712 		*value_r = p_new(mail->mail.data_pool, const char *, 1);
713 		return 0;
714 	}
715 
716 	p_array_init(&header_values, mail->mail.data_pool, 4);
717 
718 	/* cached. skip "header name: " parts in dest. */
719 	for (i = 0; i < len; i++) {
720 		if (data[i] == ':') {
721 			i++;
722 			while (i < len && IS_LWSP(data[i])) i++;
723 
724 			/* @UNSAFE */
725 			len2 = get_header_size(dest, i);
726 			value = message_header_strdup(mail->mail.data_pool,
727 						     data + i, len2);
728 			i += len2 + 1;
729 
730 			array_push_back(&header_values, &value);
731 		}
732 	}
733 
734 	array_append_zero(&header_values);
735 	*value_r = array_front(&header_values);
736 	return 0;
737 }
738 
unfold_header(pool_t pool,const char ** _str)739 static int unfold_header(pool_t pool, const char **_str)
740 {
741 	const char *str = *_str;
742 	char *new_str;
743 	unsigned int i, j;
744 
745 	for (i = 0; str[i] != '\0'; i++) {
746 		if (str[i] == '\n')
747 			break;
748 	}
749 	if (str[i] == '\0')
750 		return 0;
751 
752 	/* @UNSAFE */
753 	new_str = p_malloc(pool, i + strlen(str+i) + 1);
754 	memcpy(new_str, str, i);
755 	for (j = i; str[i] != '\0'; i++) {
756 		if (str[i] == '\n') {
757 			new_str[j++] = ' ';
758 			i++;
759 			if (str[i] == '\0')
760 				break;
761 
762 			if (str[i] != ' ' && str[i] != '\t') {
763 				/* corrupted */
764 				return -1;
765 			}
766 		} else {
767 			new_str[j++] = str[i];
768 		}
769 	}
770 	new_str[j] = '\0';
771 	*_str = new_str;
772 	return 0;
773 }
774 
str_replace_nuls(string_t * str)775 static void str_replace_nuls(string_t *str)
776 {
777 	char *data = str_c_modifiable(str);
778 	size_t i, len = str_len(str);
779 
780 	for (i = 0; i < len; i++) {
781 		if (data[i] == '\0')
782 			data[i] = ' ';
783 	}
784 }
785 
786 static int
index_mail_headers_decode(struct index_mail * mail,const char * const ** _list,unsigned int max_count)787 index_mail_headers_decode(struct index_mail *mail, const char *const **_list,
788 			  unsigned int max_count)
789 {
790 	const char *const *list = *_list;
791 	const char **decoded_list, *input;
792 	unsigned int i, count;
793 	string_t *str;
794 
795 	count = str_array_length(list);
796 	if (count > max_count)
797 		count = max_count;
798 	decoded_list = p_new(mail->mail.data_pool, const char *, count + 1);
799 
800 	str = t_str_new(512);
801 	for (i = 0; i < count; i++) {
802 		str_truncate(str, 0);
803 		input = list[i];
804 		/* unfold all lines into a single line */
805 		if (unfold_header(mail->mail.data_pool, &input) < 0)
806 			return -1;
807 
808 		/* decode MIME encoded-words. decoding may also add new LFs. */
809 		message_header_decode_utf8((const unsigned char *)input,
810 					   strlen(input), str, NULL);
811 		if (strcmp(str_c(str), input) != 0) {
812 			if (strlen(str_c(str)) != str_len(str)) {
813 				/* replace NULs with spaces */
814 				str_replace_nuls(str);
815 			}
816 			input = p_strdup(mail->mail.data_pool, str_c(str));
817 		}
818 		decoded_list[i] = input;
819 	}
820 	*_list = decoded_list;
821 	return 0;
822 }
823 
index_mail_get_headers(struct mail * _mail,const char * field,bool decode_to_utf8,const char * const ** value_r)824 int index_mail_get_headers(struct mail *_mail, const char *field,
825 			   bool decode_to_utf8, const char *const **value_r)
826 {
827 	struct index_mail *mail = INDEX_MAIL(_mail);
828 	bool retry = TRUE;
829 	int ret;
830 
831 	for (;; retry = FALSE) {
832 		if (index_mail_get_raw_headers(mail, field, value_r) < 0)
833 			return -1;
834 		if (**value_r == NULL)
835 			return 0;
836 		if (!decode_to_utf8)
837 			return 1;
838 
839 		T_BEGIN {
840 			ret = index_mail_headers_decode(mail, value_r, UINT_MAX);
841 		} T_END;
842 
843 		if (ret < 0 && retry) {
844 			mail_set_mail_cache_corrupted(_mail, "Broken header %s",
845 						      field);
846 		} else {
847 			break;
848 		}
849 	}
850 	if (ret < 0) {
851 		i_panic("BUG: Broken header %s for mail UID %u "
852 			"wasn't fixed by re-parsing the header",
853 			field, _mail->uid);
854 	}
855 	return 1;
856 }
857 
index_mail_get_first_header(struct mail * _mail,const char * field,bool decode_to_utf8,const char ** value_r)858 int index_mail_get_first_header(struct mail *_mail, const char *field,
859 				bool decode_to_utf8, const char **value_r)
860 {
861 	struct index_mail *mail = INDEX_MAIL(_mail);
862 	const char *const *list;
863 	bool retry = TRUE;
864 	int ret;
865 
866 	for (;; retry = FALSE) {
867 		if (index_mail_get_raw_headers(mail, field, &list) < 0)
868 			return -1;
869 		if (!decode_to_utf8 || list[0] == NULL) {
870 			ret = 0;
871 			break;
872 		}
873 
874 		T_BEGIN {
875 			ret = index_mail_headers_decode(mail, &list, 1);
876 		} T_END;
877 
878 		if (ret < 0 && retry) {
879 			mail_set_mail_cache_corrupted(_mail, "Broken header %s",
880 						      field);
881 			/* retry by parsing the full header */
882 		} else {
883 			break;
884 		}
885 	}
886 	if (ret < 0) {
887 		i_panic("BUG: Broken header %s for mail UID %u "
888 			"wasn't fixed by re-parsing the header",
889 			field, _mail->uid);
890 	}
891 	*value_r = list[0];
892 	return list[0] != NULL ? 1 : 0;
893 }
894 
895 static void
header_cache_callback(struct header_filter_istream * input ATTR_UNUSED,struct message_header_line * hdr,bool * matched ATTR_UNUSED,struct index_mail * mail)896 header_cache_callback(struct header_filter_istream *input ATTR_UNUSED,
897 		      struct message_header_line *hdr,
898 		      bool *matched ATTR_UNUSED, struct index_mail *mail)
899 {
900 	index_mail_parse_header(NULL, hdr, mail);
901 }
902 
index_mail_filter_stream_destroy(struct index_mail * mail)903 static void index_mail_filter_stream_destroy(struct index_mail *mail)
904 {
905 	if (mail->data.filter_stream == NULL)
906 		return;
907 
908 	const unsigned char *data;
909 	size_t size;
910 
911 	/* read through the previous filter_stream. this makes sure that the
912 	   fields are added to cache, and most importantly it resets
913 	   header_parser_initialized=FALSE so we don't assert on it. */
914 	while (i_stream_read_more(mail->data.filter_stream, &data, &size) > 0)
915 		i_stream_skip(mail->data.filter_stream, size);
916 	if (mail->data.header_parser_initialized) {
917 		/* istream failed while reading the header */
918 		i_assert(mail->data.filter_stream->stream_errno != 0);
919 		index_mail_parse_header_deinit(mail);
920 	}
921 	i_stream_destroy(&mail->data.filter_stream);
922 }
923 
index_mail_get_header_stream(struct mail * _mail,struct mailbox_header_lookup_ctx * headers,struct istream ** stream_r)924 int index_mail_get_header_stream(struct mail *_mail,
925 				 struct mailbox_header_lookup_ctx *headers,
926 				 struct istream **stream_r)
927 {
928 	struct index_mail *mail = INDEX_MAIL(_mail);
929 	struct istream *input;
930 	string_t *dest;
931 
932 	index_mail_filter_stream_destroy(mail);
933 
934 	if (mail->data.save_bodystructure_header) {
935 		/* we have to parse the header. */
936 		const char *reason =
937 			index_mail_cache_reason(_mail, "bodystructure");
938 		if (index_mail_parse_headers(mail, headers, reason) < 0)
939 			return -1;
940 	}
941 
942 	dest = str_new(mail->mail.data_pool, 256);
943 	if (mail_cache_lookup_headers(_mail->transaction->cache_view, dest,
944 				      _mail->seq, headers->idx,
945 				      headers->count) > 0) {
946 		str_append(dest, "\n");
947 		_mail->transaction->stats.cache_hit_count++;
948 		mail->data.filter_stream =
949 			i_stream_create_from_data(str_data(dest),
950 						  str_len(dest));
951 		*stream_r = mail->data.filter_stream;
952 		return 0;
953 	}
954 	/* not in cache / error */
955 	p_free(mail->mail.data_pool, dest);
956 
957 	unsigned int first_not_found = UINT_MAX, not_found_count = 0;
958 	for (unsigned int i = 0; i < headers->count; i++) {
959 		if (mail_cache_field_exists(_mail->transaction->cache_view,
960 					    _mail->seq, headers->idx[i]) <= 0) {
961 			if (not_found_count++ == 0)
962 				first_not_found = i;
963 		}
964 	}
965 
966 	const char *reason;
967 	if (not_found_count == 0)
968 		reason = "BUG: all headers seem to exist in cache";
969 	else {
970 		i_assert(first_not_found != UINT_MAX);
971 		reason = index_mail_cache_reason(_mail, t_strdup_printf(
972 			"%u/%u headers not cached (first=%s)",
973 			not_found_count, headers->count, headers->name[first_not_found]));
974 	}
975 	if (mail_get_hdr_stream_because(_mail, NULL, reason, &input) < 0)
976 		return -1;
977 
978 	index_mail_parse_header_init(mail, headers);
979 	mail->data.filter_stream =
980 		i_stream_create_header_filter(mail->data.stream,
981 					      HEADER_FILTER_INCLUDE |
982 					      HEADER_FILTER_ADD_MISSING_EOH |
983 					      HEADER_FILTER_HIDE_BODY,
984 					      headers->name, headers->count,
985 					      header_cache_callback, mail);
986 	*stream_r = mail->data.filter_stream;
987 	return 0;
988 }
989