1 /* Copyright (c) 2014-2018 Dovecot authors, see the included COPYING file */
2
3 #include "lib.h"
4 #include "str.h"
5 #include "wildcard-match.h"
6 #include "array.h"
7 #include "rfc822-parser.h"
8 #include "rfc2231-parser.h"
9 #include "message-address.h"
10 #include "message-header-parser.h"
11
12 #include "message-part-data.h"
13
14 const char *message_part_envelope_headers[] = {
15 "Date", "Subject", "From", "Sender", "Reply-To",
16 "To", "Cc", "Bcc", "In-Reply-To", "Message-ID",
17 NULL
18 };
19
20 /*
21 *
22 */
23
message_part_data_is_plain_7bit(const struct message_part * part)24 bool message_part_data_is_plain_7bit(const struct message_part *part)
25 {
26 const struct message_part_data *data = part->data;
27
28 i_assert(data != NULL);
29 i_assert(part->parent == NULL);
30
31 /* if content-type is text/xxx we don't have to check any
32 multipart stuff */
33 if ((part->flags & MESSAGE_PART_FLAG_TEXT) == 0)
34 return FALSE;
35 if (part->next != NULL || part->children != NULL)
36 return FALSE; /* shouldn't happen normally.. */
37
38 /* must be text/plain */
39 if (data->content_subtype != NULL &&
40 strcasecmp(data->content_subtype, "plain") != 0)
41 return FALSE;
42
43 /* only allowed parameter is charset=us-ascii, which is also default */
44 if (data->content_type_params_count == 0) {
45 /* charset defaults to us-ascii */
46 } else if (data->content_type_params_count != 1 ||
47 strcasecmp(data->content_type_params[0].name, "charset") != 0 ||
48 strcasecmp(data->content_type_params[0].value,
49 MESSAGE_PART_DEFAULT_CHARSET) != 0)
50 return FALSE;
51
52 if (data->content_id != NULL ||
53 data->content_description != NULL)
54 return FALSE;
55
56 if (data->content_transfer_encoding != NULL &&
57 strcasecmp(data->content_transfer_encoding, "7bit") != 0)
58 return FALSE;
59
60 /* BODYSTRUCTURE checks: */
61 if (data->content_md5 != NULL ||
62 data->content_disposition != NULL ||
63 data->content_language != NULL ||
64 data->content_location != NULL)
65 return FALSE;
66
67 return TRUE;
68 }
69
message_part_data_get_filename(const struct message_part * part,const char ** filename_r)70 bool message_part_data_get_filename(const struct message_part *part,
71 const char **filename_r)
72 {
73 const struct message_part_data *data = part->data;
74 const struct message_part_param *params;
75 unsigned int params_count, i;
76
77 i_assert(data != NULL);
78
79 params = data->content_disposition_params;
80 params_count = data->content_disposition_params_count;
81
82 if (data->content_disposition != NULL &&
83 strcasecmp(data->content_disposition, "attachment") != 0) {
84 return FALSE;
85 }
86 for (i = 0; i < params_count; i++) {
87 if (strcasecmp(params[i].name, "filename") == 0 &&
88 params[i].value != NULL) {
89 *filename_r = params[i].value;
90 return TRUE;
91 }
92 }
93 return FALSE;
94 }
95
96 /*
97 * Header parsing
98 */
99
100 /* Message part envelope */
101
102 enum envelope_field {
103 ENVELOPE_FIELD_DATE = 0,
104 ENVELOPE_FIELD_SUBJECT,
105 ENVELOPE_FIELD_FROM,
106 ENVELOPE_FIELD_SENDER,
107 ENVELOPE_FIELD_REPLY_TO,
108 ENVELOPE_FIELD_TO,
109 ENVELOPE_FIELD_CC,
110 ENVELOPE_FIELD_BCC,
111 ENVELOPE_FIELD_IN_REPLY_TO,
112 ENVELOPE_FIELD_MESSAGE_ID,
113
114 ENVELOPE_FIELD_UNKNOWN
115 };
116
117 static enum envelope_field
envelope_get_field(const char * name)118 envelope_get_field(const char *name)
119 {
120 switch (*name) {
121 case 'B':
122 case 'b':
123 if (strcasecmp(name, "Bcc") == 0)
124 return ENVELOPE_FIELD_BCC;
125 break;
126 case 'C':
127 case 'c':
128 if (strcasecmp(name, "Cc") == 0)
129 return ENVELOPE_FIELD_CC;
130 break;
131 case 'D':
132 case 'd':
133 if (strcasecmp(name, "Date") == 0)
134 return ENVELOPE_FIELD_DATE;
135 break;
136 case 'F':
137 case 'f':
138 if (strcasecmp(name, "From") == 0)
139 return ENVELOPE_FIELD_FROM;
140 break;
141 case 'I':
142 case 'i':
143 if (strcasecmp(name, "In-reply-to") == 0)
144 return ENVELOPE_FIELD_IN_REPLY_TO;
145 break;
146 case 'M':
147 case 'm':
148 if (strcasecmp(name, "Message-id") == 0)
149 return ENVELOPE_FIELD_MESSAGE_ID;
150 break;
151 case 'R':
152 case 'r':
153 if (strcasecmp(name, "Reply-to") == 0)
154 return ENVELOPE_FIELD_REPLY_TO;
155 break;
156 case 'S':
157 case 's':
158 if (strcasecmp(name, "Subject") == 0)
159 return ENVELOPE_FIELD_SUBJECT;
160 if (strcasecmp(name, "Sender") == 0)
161 return ENVELOPE_FIELD_SENDER;
162 break;
163 case 'T':
164 case 't':
165 if (strcasecmp(name, "To") == 0)
166 return ENVELOPE_FIELD_TO;
167 break;
168 }
169
170 return ENVELOPE_FIELD_UNKNOWN;
171 }
172
message_part_envelope_parse_from_header(pool_t pool,struct message_part_envelope ** data,struct message_header_line * hdr)173 void message_part_envelope_parse_from_header(pool_t pool,
174 struct message_part_envelope **data,
175 struct message_header_line *hdr)
176 {
177 struct message_part_envelope *d;
178 enum envelope_field field;
179 struct message_address **addr_p, *addr;
180 const char **str_p;
181
182 if (*data == NULL) {
183 *data = p_new(pool, struct message_part_envelope, 1);
184 }
185
186 if (hdr == NULL)
187 return;
188 field = envelope_get_field(hdr->name);
189 if (field == ENVELOPE_FIELD_UNKNOWN)
190 return;
191
192 if (hdr->continues) {
193 /* wait for full value */
194 hdr->use_full_value = TRUE;
195 return;
196 }
197
198 d = *data;
199 addr_p = NULL; str_p = NULL;
200 switch (field) {
201 case ENVELOPE_FIELD_DATE:
202 str_p = &d->date;
203 break;
204 case ENVELOPE_FIELD_SUBJECT:
205 str_p = &d->subject;
206 break;
207 case ENVELOPE_FIELD_MESSAGE_ID:
208 str_p = &d->message_id;
209 break;
210 case ENVELOPE_FIELD_IN_REPLY_TO:
211 str_p = &d->in_reply_to;
212 break;
213
214 case ENVELOPE_FIELD_CC:
215 addr_p = &d->cc;
216 break;
217 case ENVELOPE_FIELD_BCC:
218 addr_p = &d->bcc;
219 break;
220 case ENVELOPE_FIELD_FROM:
221 addr_p = &d->from;
222 break;
223 case ENVELOPE_FIELD_SENDER:
224 addr_p = &d->sender;
225 break;
226 case ENVELOPE_FIELD_TO:
227 addr_p = &d->to;
228 break;
229 case ENVELOPE_FIELD_REPLY_TO:
230 addr_p = &d->reply_to;
231 break;
232 case ENVELOPE_FIELD_UNKNOWN:
233 i_unreached();
234 }
235
236 if (addr_p != NULL) {
237 addr = message_address_parse(pool, hdr->full_value,
238 hdr->full_value_len,
239 UINT_MAX,
240 MESSAGE_ADDRESS_PARSE_FLAG_FILL_MISSING);
241 /* Merge multiple headers the same as if they were comma
242 separated in a single line. This is better from security
243 point of view, because attacker could intentionally write
244 addresses in a way that e.g. the first From header is
245 validated while MUA only shows the second From header. */
246 while (*addr_p != NULL)
247 addr_p = &(*addr_p)->next;
248 *addr_p = addr;
249 } else if (str_p != NULL) {
250 *str_p = message_header_strdup(pool, hdr->full_value,
251 hdr->full_value_len);
252 }
253 }
254
255 /* Message part data */
256
257 static void
parse_mime_parameters(struct rfc822_parser_context * parser,pool_t pool,const struct message_part_param ** params_r,unsigned int * params_count_r)258 parse_mime_parameters(struct rfc822_parser_context *parser,
259 pool_t pool, const struct message_part_param **params_r,
260 unsigned int *params_count_r)
261 {
262 const char *const *results;
263 struct message_part_param *params;
264 unsigned int params_count, i;
265
266 rfc2231_parse(parser, &results);
267
268 params_count = str_array_length(results);
269 i_assert((params_count % 2) == 0);
270 params_count /= 2;
271
272 if (params_count > 0) {
273 params = p_new(pool, struct message_part_param, params_count);
274 for (i = 0; i < params_count; i++) {
275 params[i].name = p_strdup(pool, results[i*2+0]);
276 params[i].value = p_strdup(pool, results[i*2+1]);
277 }
278 *params_r = params;
279 }
280
281 *params_count_r = params_count;
282 }
283
284 static void
parse_content_type(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)285 parse_content_type(struct message_part_data *data,
286 pool_t pool, struct message_header_line *hdr)
287 {
288 struct rfc822_parser_context parser;
289 string_t *str;
290 const char *value;
291 unsigned int i;
292 int ret;
293
294 rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
295 rfc822_skip_lwsp(&parser);
296
297 str = t_str_new(256);
298 ret = rfc822_parse_content_type(&parser, str);
299
300 /* Save content type and subtype */
301 value = str_c(str);
302 for (i = 0; value[i] != '\0'; i++) {
303 if (value[i] == '/') {
304 data->content_subtype = p_strdup(pool, value + i+1);
305 break;
306 }
307 }
308 str_truncate(str, i);
309 data->content_type = p_strdup(pool, str_c(str));
310 if (data->content_subtype == NULL) {
311 /* The Content-Type is invalid. Don't leave it NULL so that
312 callers can assume that if content_type != NULL,
313 content_subtype != NULL also. */
314 data->content_subtype = p_strdup(pool, "");
315 }
316
317 if (ret < 0) {
318 /* Content-Type is broken, but we wanted to get it as well as
319 we could. Don't try to read the parameters anymore though.
320
321 We don't completely ignore a broken Content-Type, because
322 then it would be written as text/plain. This would cause a
323 mismatch with the message_part's MESSAGE_PART_FLAG_TEXT. */
324 return;
325 }
326
327 parse_mime_parameters(&parser, pool,
328 &data->content_type_params,
329 &data->content_type_params_count);
330 rfc822_parser_deinit(&parser);
331 }
332
333 static void
parse_content_transfer_encoding(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)334 parse_content_transfer_encoding(struct message_part_data *data,
335 pool_t pool, struct message_header_line *hdr)
336 {
337 struct rfc822_parser_context parser;
338 string_t *str;
339
340 rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
341 rfc822_skip_lwsp(&parser);
342
343 str = t_str_new(256);
344 if (rfc822_parse_mime_token(&parser, str) >= 0 &&
345 rfc822_skip_lwsp(&parser) == 0 && str_len(str) > 0) {
346 data->content_transfer_encoding =
347 p_strdup(pool, str_c(str));
348 }
349 rfc822_parser_deinit(&parser);
350 }
351
352 static void
parse_content_disposition(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)353 parse_content_disposition(struct message_part_data *data,
354 pool_t pool, struct message_header_line *hdr)
355 {
356 struct rfc822_parser_context parser;
357 string_t *str;
358
359 rfc822_parser_init(&parser, hdr->full_value, hdr->full_value_len, NULL);
360 rfc822_skip_lwsp(&parser);
361
362 str = t_str_new(256);
363 if (rfc822_parse_mime_token(&parser, str) < 0) {
364 rfc822_parser_deinit(&parser);
365 return;
366 }
367 data->content_disposition = p_strdup(pool, str_c(str));
368
369 parse_mime_parameters(&parser, pool,
370 &data->content_disposition_params,
371 &data->content_disposition_params_count);
372 rfc822_parser_deinit(&parser);
373 }
374
375 static void
parse_content_language(struct message_part_data * data,pool_t pool,const unsigned char * value,size_t value_len)376 parse_content_language(struct message_part_data *data,
377 pool_t pool, const unsigned char *value, size_t value_len)
378 {
379 struct rfc822_parser_context parser;
380 ARRAY_TYPE(const_string) langs;
381 string_t *str;
382
383 /* Language-Header = "Content-Language" ":" 1#Language-tag
384 Language-Tag = Primary-tag *( "-" Subtag )
385 Primary-tag = 1*8ALPHA
386 Subtag = 1*8ALPHA */
387
388 rfc822_parser_init(&parser, value, value_len, NULL);
389
390 t_array_init(&langs, 16);
391 str = t_str_new(128);
392
393 rfc822_skip_lwsp(&parser);
394 while (rfc822_parse_atom(&parser, str) >= 0) {
395 const char *lang = p_strdup(pool, str_c(str));
396
397 array_push_back(&langs, &lang);
398 str_truncate(str, 0);
399
400 if (parser.data >= parser.end || *parser.data != ',')
401 break;
402 parser.data++;
403 rfc822_skip_lwsp(&parser);
404 }
405 rfc822_parser_deinit(&parser);
406
407 if (array_count(&langs) > 0) {
408 array_append_zero(&langs);
409 data->content_language =
410 p_strarray_dup(pool, array_front(&langs));
411 }
412 }
413
414 static void
parse_content_header(struct message_part_data * data,pool_t pool,struct message_header_line * hdr)415 parse_content_header(struct message_part_data *data,
416 pool_t pool, struct message_header_line *hdr)
417 {
418 const char *name = hdr->name + strlen("Content-");
419
420 if (hdr->continues) {
421 hdr->use_full_value = TRUE;
422 return;
423 }
424
425 switch (*name) {
426 case 'i':
427 case 'I':
428 if (strcasecmp(name, "ID") == 0 && data->content_id == NULL)
429 data->content_id =
430 message_header_strdup(pool, hdr->full_value,
431 hdr->full_value_len);
432 break;
433
434 case 'm':
435 case 'M':
436 if (strcasecmp(name, "MD5") == 0 && data->content_md5 == NULL)
437 data->content_md5 =
438 message_header_strdup(pool, hdr->full_value,
439 hdr->full_value_len);
440 break;
441
442 case 't':
443 case 'T':
444 if (strcasecmp(name, "Type") == 0 && data->content_type == NULL)
445 parse_content_type(data, pool, hdr);
446 else if (strcasecmp(name, "Transfer-Encoding") == 0 &&
447 data->content_transfer_encoding == NULL)
448 parse_content_transfer_encoding(data, pool, hdr);
449 break;
450
451 case 'l':
452 case 'L':
453 if (strcasecmp(name, "Language") == 0 &&
454 data->content_language == NULL) {
455 parse_content_language(data, pool,
456 hdr->full_value, hdr->full_value_len);
457 } else if (strcasecmp(name, "Location") == 0 &&
458 data->content_location == NULL) {
459 data->content_location =
460 message_header_strdup(pool, hdr->full_value,
461 hdr->full_value_len);
462 }
463 break;
464
465 case 'd':
466 case 'D':
467 if (strcasecmp(name, "Description") == 0 &&
468 data->content_description == NULL)
469 data->content_description =
470 message_header_strdup(pool, hdr->full_value,
471 hdr->full_value_len);
472 else if (strcasecmp(name, "Disposition") == 0 &&
473 data->content_disposition_params == NULL)
474 parse_content_disposition(data, pool, hdr);
475 break;
476 }
477 }
478
message_part_data_parse_from_header(pool_t pool,struct message_part * part,struct message_header_line * hdr)479 void message_part_data_parse_from_header(pool_t pool,
480 struct message_part *part,
481 struct message_header_line *hdr)
482 {
483 struct message_part_data *part_data;
484 struct message_part_envelope *envelope;
485 bool parent_rfc822;
486
487 if (hdr == NULL) {
488 if (part->data == NULL) {
489 /* no Content-* headers. add an empty context
490 structure anyway. */
491 part->data = p_new(pool, struct message_part_data, 1);
492 } else if ((part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) {
493 /* If there was no Mime-Version, forget all
494 the Content-stuff */
495 part_data = part->data;
496 envelope = part_data->envelope;
497
498 i_zero(part_data);
499 part_data->envelope = envelope;
500 }
501 return;
502 }
503
504 if (hdr->eoh)
505 return;
506
507 parent_rfc822 = part->parent != NULL &&
508 (part->parent->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0;
509 if (!parent_rfc822 && strncasecmp(hdr->name, "Content-", 8) != 0)
510 return;
511
512 if (part->data == NULL) {
513 /* initialize message part data */
514 part->data = p_new(pool, struct message_part_data, 1);
515 }
516 part_data = part->data;
517
518 if (strncasecmp(hdr->name, "Content-", 8) == 0) {
519 T_BEGIN {
520 parse_content_header(part_data, pool, hdr);
521 } T_END;
522 }
523
524 if (parent_rfc822) {
525 /* message/rfc822, we need the envelope */
526 message_part_envelope_parse_from_header(pool, &part_data->envelope, hdr);
527 }
528 }
529
message_part_has_content_types(struct message_part * part,const char * const * types)530 bool message_part_has_content_types(struct message_part *part,
531 const char *const *types)
532 {
533 struct message_part_data *data = part->data;
534 bool ret = TRUE;
535 const char *const *ptr;
536 const char *content_type;
537
538 i_assert(data != NULL);
539
540 if (data->content_type == NULL)
541 return FALSE;
542 else if (data->content_subtype == NULL)
543 content_type = t_strdup_printf("%s/", data->content_type);
544 else
545 content_type = t_strdup_printf("%s/%s", data->content_type,
546 data->content_subtype);
547 for(ptr = types; *ptr != NULL; ptr++) {
548 bool exclude = (**ptr == '!');
549 if (wildcard_match_icase(content_type, (*ptr)+(exclude?1:0)))
550 ret = !exclude;
551 }
552
553 return ret;
554 }
555
message_part_has_parameter(struct message_part * part,const char * parameter,bool has_value)556 bool message_part_has_parameter(struct message_part *part, const char *parameter,
557 bool has_value)
558 {
559 struct message_part_data *data = part->data;
560
561 i_assert(data != NULL);
562
563 for (unsigned int i = 0; i < data->content_disposition_params_count; i++) {
564 const struct message_part_param *param =
565 &data->content_disposition_params[i];
566 if (strcasecmp(param->name, parameter) == 0 &&
567 (!has_value || *param->value != '\0')) {
568 return TRUE;
569 }
570 }
571 return FALSE;
572 }
573
message_part_is_attachment(struct message_part * part,const struct message_part_attachment_settings * set)574 bool message_part_is_attachment(struct message_part *part,
575 const struct message_part_attachment_settings *set)
576 {
577 struct message_part_data *data = part->data;
578
579 i_assert(data != NULL);
580
581 /* see if the content-type is excluded */
582 if (set->content_type_filter != NULL &&
583 !message_part_has_content_types(part, set->content_type_filter))
584 return FALSE;
585
586 /* accept any attachment, or any inlined attachment with filename,
587 unless inlined ones are excluded */
588 if (null_strcasecmp(data->content_disposition, "attachment") == 0 ||
589 (!set->exclude_inlined &&
590 null_strcasecmp(data->content_disposition, "inline") == 0 &&
591 message_part_has_parameter(part, "filename", FALSE)))
592 return TRUE;
593 return FALSE;
594 }
595