1 /* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */
2 
3 #include "lib.h"
4 #include "str.h"
5 #include "wildcard-match.h"
6 #include "array.h"
7 #include "rfc822-parser.h"
8 #include "rfc2231-parser.h"
9 #include "message-address.h"
10 #include "message-header-parser.h"
11 
12 #include "message-part-data.h"
13 
14 const char *message_part_envelope_headers[] = {
15 	"Date", "Subject", "From", "Sender", "Reply-To",
16 	"To", "Cc", "Bcc", "In-Reply-To", "Message-ID",
17 	NULL
18 };
19 
20 /*
21  *
22  */
23 
message_part_data_is_plain_7bit(const struct message_part * part)24 bool message_part_data_is_plain_7bit(const struct message_part *part)
25 {
26 	const struct message_part_data *data = part->data;
27 
28 	i_assert(data != NULL);
29 	i_assert(part->parent == NULL);
30 
31 	/* if content-type is text/xxx we don't have to check any
32 	   multipart stuff */
33 	if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0)
34 		return FALSE;
35 	if (part->next != NULL || part->children != NULL)
36 		return FALSE; /* shouldn't happen normally.. */
37 
38 	/* must be text/plain */
39 	if (data->content_subtype != NULL &&
40 	    strcasecmp(data->content_subtype, "plain") != 0)
41 		return FALSE;
42 
43 	/* only allowed parameter is charset=us-ascii, which is also default */
44 	if (data->content_type_params_count == 0) {
45 		/* charset defaults to us-ascii */
46 	} else if (data->content_type_params_count != 1 ||
47 		   strcasecmp(data->content_type_params[0].name, "charset") != 0 ||
48 		   strcasecmp(data->content_type_params[0].value,
49 			      MESSAGE_PART_DEFAULT_CHARSET) != 0)
50 		return FALSE;
51 
52 	if (data->content_id != NULL ||
53 	    data->content_description != NULL)
54 		return FALSE;
55 
56 	if (data->content_transfer_encoding != NULL &&
57 	    strcasecmp(data->content_transfer_encoding, "7bit") != 0)
58 		return FALSE;
59 
60 	/* BODYSTRUCTURE checks: */
61 	if (data->content_md5 != NULL ||
62 	    data->content_disposition != NULL ||
63 	    data->content_language != NULL ||
64 	    data->content_location != NULL)
65 		return FALSE;
66 
67 	return TRUE;
68 }
69 
message_part_data_get_filename(const struct message_part * part,const char ** filename_r)70 bool message_part_data_get_filename(const struct message_part *part,
71 	const char **filename_r)
72 {
73 	const struct message_part_data *data = part->data;
74 	const struct message_part_param *params;
75 	unsigned int params_count, i;
76 
77 	i_assert(data != NULL);
78 
79 	params = data->content_disposition_params;
80 	params_count = data->content_disposition_params_count;
81 
82 	if (data->content_disposition != NULL &&
83 		strcasecmp(data->content_disposition, "attachment") != 0) {
84 		return FALSE;
85 	}
86 	for (i = 0; i < params_count; i++) {
87 		if (strcasecmp(params[i].name, "filename") == 0 &&
88 			params[i].value != NULL) {
89 			*filename_r = params[i].value;
90 			return TRUE;
91 		}
92 	}
93 	return FALSE;
94 }
95 
96 /*
97  * Header parsing
98  */
99 
100 /* Message part envelope */
101 
102 enum envelope_field {
103 	ENVELOPE_FIELD_DATE = 0,
104 	ENVELOPE_FIELD_SUBJECT,
105 	ENVELOPE_FIELD_FROM,
106 	ENVELOPE_FIELD_SENDER,
107 	ENVELOPE_FIELD_REPLY_TO,
108 	ENVELOPE_FIELD_TO,
109 	ENVELOPE_FIELD_CC,
110 	ENVELOPE_FIELD_BCC,
111 	ENVELOPE_FIELD_IN_REPLY_TO,
112 	ENVELOPE_FIELD_MESSAGE_ID,
113 
114 	ENVELOPE_FIELD_UNKNOWN
115 };
116 
117 static enum envelope_field
envelope_get_field(const char * name)118 envelope_get_field(const char *name)
119 {
120 	switch (*name) {
121 	case 'B':
122 	case 'b':
123 		if (strcasecmp(name, "Bcc") == 0)
124 			return ENVELOPE_FIELD_BCC;
125 		break;
126 	case 'C':
127 	case 'c':
128 		if (strcasecmp(name, "Cc") == 0)
129 			return ENVELOPE_FIELD_CC;
130 		break;
131 	case 'D':
132 	case 'd':
133 		if (strcasecmp(name, "Date") == 0)
134 			return ENVELOPE_FIELD_DATE;
135 		break;
136 	case 'F':
137 	case 'f':
138 		if (strcasecmp(name, "From") == 0)
139 			return ENVELOPE_FIELD_FROM;
140 		break;
141 	case 'I':
142 	case 'i':
143 		if (strcasecmp(name, "In-reply-to") == 0)
144 			return ENVELOPE_FIELD_IN_REPLY_TO;
145 		break;
146 	case 'M':
147 	case 'm':
148 		if (strcasecmp(name, "Message-id") == 0)
149 			return ENVELOPE_FIELD_MESSAGE_ID;
150 		break;
151 	case 'R':
152 	case 'r':
153 		if (strcasecmp(name, "Reply-to") == 0)
154 			return ENVELOPE_FIELD_REPLY_TO;
155 		break;
156 	case 'S':
157 	case 's':
158 		if (strcasecmp(name, "Subject") == 0)
159 			return ENVELOPE_FIELD_SUBJECT;
160 		if (strcasecmp(name, "Sender") == 0)
161 			return ENVELOPE_FIELD_SENDER;
162 		break;
163 	case 'T':
164 	case 't':
165 		if (strcasecmp(name, "To") == 0)
166 			return ENVELOPE_FIELD_TO;
167 		break;
168 	}
169 
170 	return ENVELOPE_FIELD_UNKNOWN;
171 }
172 
message_part_envelope_parse_from_header(pool_t pool,struct message_part_envelope ** data,struct message_header_line * hdr)173 void message_part_envelope_parse_from_header(pool_t pool,
174 	struct message_part_envelope **data,
175 	struct message_header_line *hdr)
176 {
177 	struct message_part_envelope *d;
178 	enum envelope_field field;
179 	struct message_address **addr_p, *addr;
180 	const char **str_p;
181 
182 	if (*data == NULL) {
183 		*data = p_new(pool, struct message_part_envelope, 1);
184 	}
185 
186 	if (hdr == NULL)
187 		return;
188 	field = envelope_get_field(hdr->name);
189 	if (field == ENVELOPE_FIELD_UNKNOWN)
190 		return;
191 
192 	if (hdr->continues) {
193 		/* wait for full value */
194 		hdr->use_full_value = TRUE;
195 		return;
196 	}
197 
198 	d = *data;
199 	addr_p = NULL; str_p = NULL;
200 	switch (field) {
201 	case ENVELOPE_FIELD_DATE:
202 		str_p = &d->date;
203 		break;
204 	case ENVELOPE_FIELD_SUBJECT:
205 		str_p = &d->subject;
206 		break;
207 	case ENVELOPE_FIELD_MESSAGE_ID:
208 		str_p = &d->message_id;
209 		break;
210 	case ENVELOPE_FIELD_IN_REPLY_TO:
211 		str_p = &d->in_reply_to;
212 		break;
213 
214 	case ENVELOPE_FIELD_CC:
215 		addr_p = &d->cc;
216 		break;
217 	case ENVELOPE_FIELD_BCC:
218 		addr_p = &d->bcc;
219 		break;
220 	case ENVELOPE_FIELD_FROM:
221 		addr_p = &d->from;
222 		break;
223 	case ENVELOPE_FIELD_SENDER:
224 		addr_p = &d->sender;
225 		break;
226 	case ENVELOPE_FIELD_TO:
227 		addr_p = &d->to;
228 		break;
229 	case ENVELOPE_FIELD_REPLY_TO:
230 		addr_p = &d->reply_to;
231 		break;
232 	case ENVELOPE_FIELD_UNKNOWN:
233 		i_unreached();
234 	}
235 
236 	if (addr_p != NULL) {
237 		addr = message_address_parse(pool, hdr->full_value,
238 					     hdr->full_value_len,
239 					     UINT_MAX,
240 					     MESSAGE_ADDRESS_PARSE_FLAG_FILL_MISSING);
241 		/* Merge multiple headers the same as if they were comma
242 		   separated in a single line. This is better from security
243 		   point of view, because attacker could intentionally write
244 		   addresses in a way that e.g. the first From header is
245 		   validated while MUA only shows the second From header. */
246 		while (*addr_p != NULL)
247 			addr_p = &(*addr_p)->next;
248 		*addr_p = addr;
249 	} else if (str_p != NULL) {
250 		*str_p = message_header_strdup(pool, hdr->full_value,
251 					       hdr->full_value_len);
252 	}
253 }
254 
255 /* Message part data */
256 
257 static void
parse_mime_parameters(struct rfc822_parser_context * parser,pool_t pool,const struct message_part_param ** params_r,unsigned int * params_count_r)258 parse_mime_parameters(struct rfc822_parser_context *parser,
259 	pool_t pool, const struct message_part_param **params_r,
260 	unsigned int *params_count_r)
261 {
262 	const char *const *results;
263 	struct message_part_param *params;
264 	unsigned int params_count, i;
265 
266 	rfc2231_parse(parser, &results);
267 
268 	params_count = str_array_length(results);
269 	i_assert((params_count % 2) == 0);
270 	params_count /= 2;
271 
272 	if (params_count > 0) {
273 		params = p_new(pool, struct message_part_param, params_count);
274 		for (i = 0; i < params_count; i++) {
275 			params[i].name = p_strdup(pool, results[i*2+0]);
276 			params[i].value = p_strdup(pool, results[i*2+1]);
277 		}
278 		*params_r = params;
279 	}
280 
281 	*params_count_r = params_count;
282 }
283 
284 static void
parse_content_type(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)285 parse_content_type(struct message_part_data *data,
286 	pool_t pool, struct message_header_line *hdr)
287 {
288 	struct rfc822_parser_context parser;
289 	string_t *str;
290 	const char *value;
291 	unsigned int i;
292 	int ret;
293 
294 	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
295 	rfc822_skip_lwsp(&parser);
296 
297 	str = t_str_new(256);
298 	ret = rfc822_parse_content_type(&parser, str);
299 
300 	/* Save content type and subtype */
301 	value = str_c(str);
302 	for (i = 0; value[i] != '\0'; i++) {
303 		if (value[i] == '/') {
304 			data->content_subtype = p_strdup(pool, value + i+1);
305 			break;
306 		}
307 	}
308 	str_truncate(str, i);
309 	data->content_type = p_strdup(pool, str_c(str));
310 	if (data->content_subtype == NULL) {
311 		/* The Content-Type is invalid. Don't leave it NULL so that
312 		   callers can assume that if content_type != NULL,
313 		   content_subtype != NULL also. */
314 		data->content_subtype = p_strdup(pool, "");
315 	}
316 
317 	if (ret < 0) {
318 		/* Content-Type is broken, but we wanted to get it as well as
319 		   we could. Don't try to read the parameters anymore though.
320 
321 		   We don't completely ignore a broken Content-Type, because
322 		   then it would be written as text/plain. This would cause a
323 		   mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */
324 		return;
325 	}
326 
327 	parse_mime_parameters(&parser, pool,
328 		&data->content_type_params,
329 		&data->content_type_params_count);
330 	rfc822_parser_deinit(&parser);
331 }
332 
333 static void
parse_content_transfer_encoding(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)334 parse_content_transfer_encoding(struct message_part_data *data,
335 	pool_t pool, struct message_header_line *hdr)
336 {
337 	struct rfc822_parser_context parser;
338 	string_t *str;
339 
340 	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
341 	rfc822_skip_lwsp(&parser);
342 
343 	str = t_str_new(256);
344 	if (rfc822_parse_mime_token(&parser, str) >= 0 &&
345 	    rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) {
346 		data->content_transfer_encoding =
347 			p_strdup(pool, str_c(str));
348 	}
349 	rfc822_parser_deinit(&parser);
350 }
351 
352 static void
parse_content_disposition(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)353 parse_content_disposition(struct message_part_data *data,
354 	pool_t pool, struct message_header_line *hdr)
355 {
356 	struct rfc822_parser_context parser;
357 	string_t *str;
358 
359 	rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
360 	rfc822_skip_lwsp(&parser);
361 
362 	str = t_str_new(256);
363 	if (rfc822_parse_mime_token(&parser, str) < 0) {
364 		rfc822_parser_deinit(&parser);
365 		return;
366 	}
367 	data->content_disposition = p_strdup(pool, str_c(str));
368 
369 	parse_mime_parameters(&parser, pool,
370 		&data->content_disposition_params,
371 		&data->content_disposition_params_count);
372 	rfc822_parser_deinit(&parser);
373 }
374 
375 static void
parse_content_language(struct message_part_data * data,pool_t pool,const unsigned char * value,size_t value_len)376 parse_content_language(struct message_part_data *data,
377 	pool_t pool, const unsigned char *value, size_t value_len)
378 {
379 	struct rfc822_parser_context parser;
380 	ARRAY_TYPE(const_string) langs;
381 	string_t *str;
382 
383 	/* Language-Header = "Content-Language" ":" 1#Language-tag
384 	   Language-Tag = Primary-tag *( "-" Subtag )
385 	   Primary-tag = 1*8ALPHA
386 	   Subtag = 1*8ALPHA */
387 
388 	rfc822_parser_init(&parser, value, value_len, NULL);
389 
390 	t_array_init(&langs, 16);
391 	str = t_str_new(128);
392 
393 	rfc822_skip_lwsp(&parser);
394 	while (rfc822_parse_atom(&parser, str) >= 0) {
395 		const char *lang = p_strdup(pool, str_c(str));
396 
397 		array_push_back(&langs, &lang);
398 		str_truncate(str, 0);
399 
400 		if (parser.data >= parser.end || *parser.data != ',')
401 			break;
402 		parser.data++;
403 		rfc822_skip_lwsp(&parser);
404 	}
405 	rfc822_parser_deinit(&parser);
406 
407 	if (array_count(&langs) > 0) {
408 		array_append_zero(&langs);
409 		data->content_language =
410 			p_strarray_dup(pool, array_front(&langs));
411 	}
412 }
413 
414 static void
parse_content_header(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)415 parse_content_header(struct message_part_data *data,
416 	pool_t pool, struct message_header_line *hdr)
417 {
418 	const char *name = hdr->name + strlen("Content-");
419 
420 	if (hdr->continues) {
421 		hdr->use_full_value = TRUE;
422 		return;
423 	}
424 
425 	switch (*name) {
426 	case 'i':
427 	case 'I':
428 		if (strcasecmp(name, "ID") == 0 && data->content_id == NULL)
429 			data->content_id =
430 				message_header_strdup(pool, hdr->full_value,
431 						      hdr->full_value_len);
432 		break;
433 
434 	case 'm':
435 	case 'M':
436 		if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL)
437 			data->content_md5 =
438 				message_header_strdup(pool, hdr->full_value,
439 						      hdr->full_value_len);
440 		break;
441 
442 	case 't':
443 	case 'T':
444 		if (strcasecmp(name, "Type") == 0 && data->content_type == NULL)
445 			parse_content_type(data, pool, hdr);
446 		else if (strcasecmp(name, "Transfer-Encoding") == 0 &&
447 			 data->content_transfer_encoding == NULL)
448 			parse_content_transfer_encoding(data, pool, hdr);
449 		break;
450 
451 	case 'l':
452 	case 'L':
453 		if (strcasecmp(name, "Language") == 0 &&
454 		    data->content_language == NULL) {
455 			parse_content_language(data, pool,
456 				hdr->full_value, hdr->full_value_len);
457 		} else if (strcasecmp(name, "Location") == 0 &&
458 			   data->content_location == NULL) {
459 			data->content_location =
460 				message_header_strdup(pool, hdr->full_value,
461 						      hdr->full_value_len);
462 		}
463 		break;
464 
465 	case 'd':
466 	case 'D':
467 		if (strcasecmp(name, "Description") == 0 &&
468 		    data->content_description == NULL)
469 			data->content_description =
470 				message_header_strdup(pool, hdr->full_value,
471 						      hdr->full_value_len);
472 		else if (strcasecmp(name, "Disposition") == 0 &&
473 			 data->content_disposition_params == NULL)
474 			parse_content_disposition(data, pool, hdr);
475 		break;
476 	}
477 }
478 
message_part_data_parse_from_header(pool_t pool,struct message_part * part,struct message_header_line * hdr)479 void message_part_data_parse_from_header(pool_t pool,
480 	struct message_part *part,
481 	struct message_header_line *hdr)
482 {
483 	struct message_part_data *part_data;
484 	struct message_part_envelope *envelope;
485 	bool parent_rfc822;
486 
487 	if (hdr == NULL) {
488 		if (part->data == NULL) {
489 			/* no Content-* headers. add an empty context
490 			   structure anyway. */
491 			part->data = p_new(pool, struct message_part_data, 1);
492 		} else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
493 			/* If there was no Mime-Version, forget all
494 			   the Content-stuff */
495 			part_data = part->data;
496 			envelope = part_data->envelope;
497 
498 			i_zero(part_data);
499 			part_data->envelope = envelope;
500 		}
501 		return;
502 	}
503 
504 	if (hdr->eoh)
505 		return;
506 
507 	parent_rfc822 = part->parent != NULL &&
508 		(part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0;
509 	if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0)
510 		return;
511 
512 	if (part->data == NULL) {
513 		/* initialize message part data */
514 		part->data = p_new(pool, struct message_part_data, 1);
515 	}
516 	part_data = part->data;
517 
518 	if (strncasecmp(hdr->name, "Content-", 8) == 0) {
519 		T_BEGIN {
520 			parse_content_header(part_data, pool, hdr);
521 		} T_END;
522 	}
523 
524 	if (parent_rfc822) {
525 		/* message/rfc822, we need the envelope */
526 		message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr);
527 	}
528 }
529 
message_part_has_content_types(struct message_part * part,const char * const * types)530 bool message_part_has_content_types(struct message_part *part,
531 				    const char *const *types)
532 {
533 	struct message_part_data *data = part->data;
534 	bool ret = TRUE;
535 	const char *const *ptr;
536 	const char *content_type;
537 
538 	i_assert(data != NULL);
539 
540 	if (data->content_type == NULL)
541 		return FALSE;
542 	else if (data->content_subtype == NULL)
543 		content_type = t_strdup_printf("%s/", data->content_type);
544 	else
545 		content_type = t_strdup_printf("%s/%s", data->content_type,
546 							data->content_subtype);
547 	for(ptr = types; *ptr != NULL; ptr++) {
548 		bool exclude = (**ptr == '!');
549 		if (wildcard_match_icase(content_type, (*ptr)+(exclude?1:0)))
550 			ret = !exclude;
551 	}
552 
553 	return ret;
554 }
555 
message_part_has_parameter(struct message_part * part,const char * parameter,bool has_value)556 bool message_part_has_parameter(struct message_part *part, const char *parameter,
557 				bool has_value)
558 {
559 	struct message_part_data *data = part->data;
560 
561 	i_assert(data != NULL);
562 
563 	for (unsigned int i = 0; i < data->content_disposition_params_count; i++) {
564 		const struct message_part_param *param =
565 			&data->content_disposition_params[i];
566 		if (strcasecmp(param->name, parameter) == 0 &&
567 		    (!has_value || *param->value != '\0')) {
568 			return TRUE;
569 		}
570 	}
571 	return FALSE;
572 }
573 
message_part_is_attachment(struct message_part * part,const struct message_part_attachment_settings * set)574 bool message_part_is_attachment(struct message_part *part,
575 				const struct message_part_attachment_settings *set)
576 {
577 	struct message_part_data *data = part->data;
578 
579 	i_assert(data != NULL);
580 
581 	/* see if the content-type is excluded */
582 	if (set->content_type_filter != NULL &&
583 	    !message_part_has_content_types(part, set->content_type_filter))
584 		return FALSE;
585 
586 	/* accept any attachment, or any inlined attachment with filename,
587 	   unless inlined ones are excluded */
588 	if (null_strcasecmp(data->content_disposition, "attachment") == 0 ||
589 	    (!set->exclude_inlined &&
590 	     null_strcasecmp(data->content_disposition, "inline") == 0 &&
591 	     message_part_has_parameter(part, "filename", FALSE)))
592 		return TRUE;
593 	return FALSE;
594 }
595