1/*
2 * Copyright 2016 Software Freedom Conservancy Inc.
3 * Copyright 2018 Michael Gratton <mike@vee.net>
4 *
5 * This software is licensed under the GNU Lesser General Public License
6 * (version 2.1 or later).  See the COPYING file in this distribution.
7 */
8
9
10/**
11 * An RFC-822 style email message.
12 *
13 * Unlike {@link Email}, these objects are always a complete
14 * representation of an email message, and contain no information
15 * other than what RFC-822 and its successor RFC documents specify.
16 */
17public class Geary.RFC822.Message : BaseObject, EmailHeaderSet {
18
19
20    /**
21     * Callback for including non-text MIME entities in message bodies.
22     *
23     * This delegate is an optional parameter to the body constructors
24     * that allows callers to process arbitrary non-text, inline MIME
25     * parts.
26     *
27     * This is only called for non-text MIME parts in mixed multipart
28     * sections.  Inline parts referred to by rich text in alternative
29     * or related documents must be located by the caller and
30     * appropriately presented.
31     */
32    public delegate string? InlinePartReplacer(Part part);
33
34
35    private const string HEADER_IN_REPLY_TO = "In-Reply-To";
36    private const string HEADER_REFERENCES = "References";
37    private const string HEADER_MAILER = "X-Mailer";
38    private const string HEADER_BCC = "Bcc";
39
40    /** Options to use when serialising a message in RFC 822 format. */
41    [Flags]
42    public enum RFC822FormatOptions {
43
44        /** Format for RFC 822 in general. */
45        NONE,
46
47        /**
48         * The message should be serialised for transmission via SMTP.
49         *
50         * SMTP imposes both operational and data-format requirements
51         * on RFC 822 style messages. In particular, BCC headers
52         * should not be included since they will expose BCC
53         * recipients, and lines must be dot-stuffed so as to avoid
54         * terminating the message early if a line starting with a `.`
55         * is encountered.
56         *
57         * See [[http://tools.ietf.org/html/rfc5321#section-4.5.2]]
58         */
59        SMTP_FORMAT;
60
61    }
62
63
64    // Internal note: If a header field is added here, it *must* be
65    // set in Message.from_gmime_message(), below.
66
67    /** {@inheritDoc} */
68    public MailboxAddresses? from { get { return this._from; } }
69    private MailboxAddresses? _from  = null;
70
71    /** {@inheritDoc} */
72    public MailboxAddress? sender { get { return this._sender; } }
73    private MailboxAddress? _sender = null;
74
75    /** {@inheritDoc} */
76    public MailboxAddresses? reply_to { get { return this._reply_to; } }
77    private MailboxAddresses? _reply_to = null;
78
79    /** {@inheritDoc} */
80    public MailboxAddresses? to { get { return this._to; } }
81    private MailboxAddresses? _to = null;
82
83    /** {@inheritDoc} */
84    public MailboxAddresses? cc { get { return this._cc; } }
85    private MailboxAddresses? _cc = null;
86
87    /** {@inheritDoc} */
88    public MailboxAddresses? bcc { get { return this._bcc; } }
89    private MailboxAddresses? _bcc = null;
90
91    /** {@inheritDoc} */
92    public MessageID? message_id { get { return this._message_id; } }
93    private MessageID? _message_id = null;
94
95    /** {@inheritDoc} */
96    public MessageIDList? in_reply_to { get { return this._in_reply_to; } }
97    private MessageIDList? _in_reply_to = null;
98
99    /** {@inheritDoc} */
100    public MessageIDList? references { get { return this._references; } }
101    private MessageIDList? _references = null;
102
103    /** {@inheritDoc} */
104    public Subject? subject { get { return this._subject; } }
105    private Subject? _subject = null;
106
107    /** {@inheritDoc} */
108    public Date? date { get { return this._date; } }
109    private Date? _date = null;
110
111    /** Value of the X-Mailer header. */
112    public string? mailer { get; protected set; default = null; }
113
114    // The backing store for this message. Used to access body parts.
115    private GMime.Message message;
116
117
118    public Message(Full full) throws Error {
119        GMime.Parser parser = new GMime.Parser.with_stream(
120            Utils.create_stream_mem(full.buffer)
121        );
122        var message = parser.construct_message(get_parser_options());
123        if (message == null) {
124            throw new Error.INVALID("Unable to parse RFC 822 message");
125        }
126
127        this.from_gmime_message(message);
128    }
129
130    public Message.from_gmime_message(GMime.Message message)
131        throws Error {
132        this.message = message;
133
134        this._from = to_addresses(message.get_from());
135        this._to = to_addresses(message.get_to());
136        this._cc = to_addresses(message.get_cc());
137        this._bcc = to_addresses(message.get_bcc());
138        this._reply_to = to_addresses(message.get_reply_to());
139
140        var sender = (
141            message.get_sender().get_address(0) as GMime.InternetAddressMailbox
142        );
143        if (sender != null) {
144            this._sender = new MailboxAddress.from_gmime(sender);
145        }
146
147        var subject = message.get_subject();
148        if (subject != null) {
149            this._subject = new Subject(subject);
150        }
151
152        // Use a pointer here to work around GNOME/vala#986
153        GLib.DateTime* date = message.get_date();
154        if (date != null) {
155            this._date = new Date(date);
156        }
157
158        var message_id = message.get_message_id();
159        if (message_id != null) {
160            this._message_id = new MessageID(message_id);
161        }
162
163        // Since these headers may be specified multiple times, we
164        // need to iterate over all of them to find them.
165        var headers = message.get_header_list();
166        for (int i = 0; i < headers.get_count(); i++) {
167            var header = headers.get_header_at(i);
168            switch (header.get_name().down()) {
169            case "in-reply-to":
170                this._in_reply_to = append_message_id(
171                    this._in_reply_to, header.get_raw_value()
172                );
173                break;
174
175            case "references":
176                this._references = append_message_id(
177                    this._references, header.get_raw_value()
178                );
179                break;
180
181            default:
182                break;
183            }
184        }
185
186        this.mailer = message.get_header("X-Mailer");
187    }
188
189    public Message.from_buffer(Memory.Buffer full_email)
190        throws Error {
191        this(new Geary.RFC822.Full(full_email));
192    }
193
194    public Message.from_parts(Header header, Text body)
195        throws Error {
196        GMime.StreamCat stream_cat = new GMime.StreamCat();
197
198        if (header.buffer.size != 0) {
199            stream_cat.add_source(new GMime.StreamMem.with_buffer(header.buffer.get_bytes().get_data()));
200        } else {
201            throw new Error.INVALID("Missing header in RFC 822 message");
202        }
203        if (body.buffer.size != 0) {
204            stream_cat.add_source(new GMime.StreamMem.with_buffer(body.buffer.get_bytes().get_data()));
205        }
206
207        GMime.Parser parser = new GMime.Parser.with_stream(stream_cat);
208        var message = parser.construct_message(Geary.RFC822.get_parser_options());
209        if (message == null) {
210            throw new Error.INVALID("Unable to parse RFC 822 message");
211        }
212
213        this.from_gmime_message(message);
214    }
215
216    public async Message.from_composed_email(Geary.ComposedEmail email,
217                                             string? message_id,
218                                             GLib.Cancellable? cancellable)
219        throws Error {
220        this.message = new GMime.Message(true);
221
222        //
223        // Required headers
224
225        this._from = email.from;
226        foreach (RFC822.MailboxAddress mailbox in email.from) {
227            this.message.add_mailbox(FROM, mailbox.name, mailbox.address);
228        }
229
230        this._date = email.date;
231        this.message.set_date(this.date.value);
232
233        // Optional headers
234
235        if (email.to != null) {
236            this._to = email.to;
237            foreach (RFC822.MailboxAddress mailbox in email.to)
238                this.message.add_mailbox(TO, mailbox.name, mailbox.address);
239        }
240
241        if (email.cc != null) {
242            this._cc = email.cc;
243            foreach (RFC822.MailboxAddress mailbox in email.cc)
244                this.message.add_mailbox(CC, mailbox.name, mailbox.address);
245        }
246
247        if (email.bcc != null) {
248            this._bcc = email.bcc;
249            foreach (RFC822.MailboxAddress mailbox in email.bcc)
250                this.message.add_mailbox(BCC, mailbox.name, mailbox.address);
251        }
252
253        if (email.sender != null) {
254            this._sender = email.sender;
255            this.message.add_mailbox(SENDER, this.sender.name, this.sender.address);
256        }
257
258        if (email.reply_to != null) {
259            this._reply_to = email.reply_to;
260            foreach (RFC822.MailboxAddress mailbox in email.reply_to)
261                this.message.add_mailbox(REPLY_TO, mailbox.name, mailbox.address);
262        }
263
264        if (message_id != null) {
265            this._message_id = new MessageID(message_id);
266            this.message.set_message_id(message_id);
267        }
268
269        if (email.in_reply_to != null) {
270            this._in_reply_to = email.in_reply_to;
271            // We could use `this.message.add_mailbox()` in a similar way like
272            // we did for the other headers, but this would require to change
273            // the type of `email.in_reply_to` and `this.in_reply_to` from
274            // `RFC822.MessageIDList` to `RFC822.MailboxAddresses`.
275            this.message.set_header(HEADER_IN_REPLY_TO,
276                                    email.in_reply_to.to_rfc822_string(),
277                                    Geary.RFC822.get_charset());
278        }
279
280        if (email.references != null) {
281            this._references = email.references;
282            this.message.set_header(HEADER_REFERENCES,
283                                    email.references.to_rfc822_string(),
284                                    Geary.RFC822.get_charset());
285        }
286
287        if (email.subject != null) {
288            this._subject = email.subject;
289            this.message.set_subject(email.subject.value,
290                                     Geary.RFC822.get_charset());
291        }
292
293        // User-Agent
294        if (!Geary.String.is_empty(email.mailer)) {
295            this.mailer = email.mailer;
296            this.message.set_header(HEADER_MAILER, email.mailer,
297                                    Geary.RFC822.get_charset());
298        }
299
300        // Build the message's body mime parts
301
302        Gee.List<GMime.Object> body_parts = new Gee.LinkedList<GMime.Object>();
303
304        // Share the body charset between plain and HTML parts, so we
305        // don't need to work it out twice. This doesn't work for the
306        // content encoding however since the HTML encoding may need
307        // to be different, e.g. if it contains lines longer than
308        // allowed by RFC822/SMTP.
309        string? body_charset = null;
310
311        // Body: text format (optional)
312        if (email.body_text != null) {
313            GMime.Part? body_text = null;
314            try {
315                body_text = yield body_data_to_part(
316                    email.body_text.data,
317                    null,
318                    "text/plain",
319                    true,
320                    cancellable
321                );
322            } catch (GLib.Error err) {
323                warning("Error creating text body part: %s", err.message);
324            }
325            if (body_text != null) {
326                body_charset = body_text.get_content_type().get_parameter(
327                    "charset"
328                );
329                body_parts.add(body_text);
330            }
331        }
332
333        // Body: HTML format (also optional)
334        if (email.body_html != null) {
335            const string CID_URL_PREFIX = "cid:";
336            Gee.List<GMime.Object> related_parts =
337                new Gee.LinkedList<GMime.Object>();
338
339            // The files that need to have Content IDs assigned
340            Gee.Map<string,Memory.Buffer> inline_files = new Gee.HashMap<string,Memory.Buffer>();
341            inline_files.set_all(email.inline_files);
342
343            // Create parts for inline images, if any, and updating
344            // the IMG SRC attributes as we go. An inline file is only
345            // included if it is actually referenced by the HTML - it
346            // may have been deleted by the user after being added.
347
348            // First, treat parts that already have Content Ids
349            // assigned
350            foreach (string cid in email.cid_files.keys) {
351                if (email.contains_inline_img_src(CID_URL_PREFIX + cid)) {
352                    GMime.Object? inline_part = null;
353                    try {
354                        inline_part = yield get_buffer_part(
355                            email.cid_files[cid],
356                            GLib.Path.get_basename(cid),
357                            Geary.Mime.DispositionType.INLINE,
358                            cancellable
359                        );
360                    } catch (GLib.Error err) {
361                        warning(
362                            "Error creating CID part %s: %s",
363                            cid,
364                            err.message
365                        );
366                    }
367                    if (inline_part != null) {
368                        inline_part.set_content_id(cid);
369                        related_parts.add(inline_part);
370                    }
371                    // Don't need to assign a CID to this file, so
372                    // don't process it below any further.
373                    inline_files.unset(cid);
374                }
375            }
376
377            // Then, treat parts that need to have Content Id
378            // assigned.
379            if (!inline_files.is_empty) {
380                const string CID_TEMPLATE = "inline_%02u@geary";
381                uint cid_index = 0;
382                foreach (string name in inline_files.keys) {
383                    string cid = "";
384                    do {
385                        cid = CID_TEMPLATE.printf(cid_index++);
386                    } while (email.cid_files.has_key(cid));
387
388                    if (email.replace_inline_img_src(name,
389                                                     CID_URL_PREFIX + cid)) {
390                        GMime.Object? inline_part = null;
391                        try {
392                            inline_part = yield get_buffer_part(
393                                inline_files[name],
394                                GLib.Path.get_basename(name),
395                                Geary.Mime.DispositionType.INLINE,
396                                cancellable
397                            );
398                        } catch (GLib.Error err) {
399                            warning(
400                                "Error creating inline file part %s: %s",
401                                name,
402                                err.message
403                            );
404                        }
405                        if (inline_part != null) {
406                            inline_part.set_content_id(cid);
407                            related_parts.add(inline_part);
408                        }
409                    }
410                }
411            }
412
413            GMime.Object? body_html = null;
414            try {
415                body_html = yield body_data_to_part(
416                    email.body_html.data,
417                    body_charset,
418                    "text/html",
419                    false,
420                    cancellable
421                );
422            } catch (GLib.Error err) {
423                warning("Error creating html body part: %s", err.message);
424            }
425
426            // Assemble the HTML and inline images into a related
427            // part, if needed
428            if (!related_parts.is_empty) {
429                related_parts.insert(0, body_html);
430                GMime.Object? related_part =
431                   coalesce_related(related_parts, "text/html");
432                if (related_part != null)
433                    body_html = related_part;
434            }
435
436            body_parts.add(body_html);
437        }
438
439        // Build the message's main part.
440        Gee.List<GMime.Object> main_parts = new Gee.LinkedList<GMime.Object>();
441        GMime.Object? body_part = coalesce_parts(body_parts, "alternative");
442        if (body_part != null)
443            main_parts.add(body_part);
444
445        Gee.List<GMime.Object> attachment_parts = new Gee.LinkedList<GMime.Object>();
446        foreach (File file in email.attached_files) {
447            GMime.Object? attachment_part = null;
448            try {
449                attachment_part = yield get_file_part(
450                    file,
451                    Geary.Mime.DispositionType.ATTACHMENT,
452                    cancellable
453                );
454            } catch (GLib.Error err) {
455                warning(
456                    "Error creating attachment file part %s: %s",
457                    file.get_path(),
458                    err.message
459                );
460            }
461            if (attachment_part != null) {
462                attachment_parts.add(attachment_part);
463            }
464        }
465        GMime.Object? attachment_part = coalesce_parts(attachment_parts, "mixed");
466        if (attachment_part != null)
467            main_parts.add(attachment_part);
468
469        GMime.Object? main_part = coalesce_parts(main_parts, "mixed");
470        this.message.set_mime_part(main_part);
471    }
472
473    private GMime.Object? coalesce_related(Gee.List<GMime.Object> parts,
474                                           string type) {
475        GMime.Object? part = coalesce_parts(parts, "related");
476        if (parts.size > 1) {
477            part.set_header("Type", type, Geary.RFC822.get_charset());
478        }
479        return part;
480    }
481
482    private GMime.Object? coalesce_parts(Gee.List<GMime.Object> parts, string subtype) {
483        if (parts.size == 0) {
484            return null;
485        } else if (parts.size == 1) {
486            return parts.first();
487        } else {
488            GMime.Multipart multipart = new GMime.Multipart.with_subtype(subtype);
489            foreach (GMime.Object part in parts)
490                multipart.add(part);
491            return multipart;
492        }
493    }
494
495    private async GMime.Part? get_file_part(File file,
496                                            Geary.Mime.DispositionType disposition,
497                                            GLib.Cancellable? cancellable)
498        throws GLib.Error {
499        FileInfo file_info = yield file.query_info_async(
500            FileAttribute.STANDARD_CONTENT_TYPE,
501            FileQueryInfoFlags.NONE
502        );
503
504        GMime.Part part = new GMime.Part();
505        part.set_disposition(disposition.serialize());
506        part.set_filename(file.get_basename());
507
508        GMime.ContentType content_type = GMime.ContentType.parse(
509            Geary.RFC822.get_parser_options(),
510            file_info.get_content_type()
511        );
512        part.set_content_type(content_type);
513
514        // Always use a binary encoding since even when attaching
515        // text/plain parts, the line ending must always be preserved
516        // and this is not possible without a binary encoding. See
517        // https://gitlab.gnome.org/GNOME/geary/-/issues/1001
518        //
519        // TODO: The actual content encoding should be set based on
520        // the IMAP/SMTP server's supported encoding. For example, if
521        // 8-bit or binary is supported, then those should be used
522        // instead of Base64.
523        part.set_content_encoding(BASE64);
524
525        GMime.StreamGIO stream = new GMime.StreamGIO(file);
526        stream.set_owner(false);
527        part.set_content(
528            new GMime.DataWrapper.with_stream(
529                stream, GMime.ContentEncoding.BINARY
530            )
531        );
532
533        return part;
534    }
535
536    /**
537     * Create a GMime part for the provided attachment buffer
538     */
539    private async GMime.Part? get_buffer_part(Memory.Buffer buffer,
540                                              string basename,
541                                              Geary.Mime.DispositionType disposition,
542                                              GLib.Cancellable? cancellable)
543        throws GLib.Error {
544        Mime.ContentType? mime_type = Mime.ContentType.guess_type(
545            basename,
546            buffer
547        );
548
549        if (mime_type == null) {
550            throw new Error.INVALID(
551                _("Could not determine mime type for “%s”.").printf(basename)
552                );
553        }
554
555        GMime.ContentType? content_type = GMime.ContentType.parse(
556            Geary.RFC822.get_parser_options(),
557            mime_type.get_mime_type()
558        );
559
560        if (content_type == null) {
561            throw new Error.INVALID(
562                _("Could not determine content type for mime type “%s” on “%s”.").printf(mime_type.to_string(), basename)
563                );
564        }
565
566        GMime.Part part = new GMime.Part();
567        part.set_disposition(disposition.serialize());
568        part.set_filename(basename);
569        part.set_content_type(content_type);
570
571        // Always use a binary encoding since even when attaching
572        // text/plain parts, the line ending must always be preserved
573        // and this is not possible without a binary encoding. See
574        // https://gitlab.gnome.org/GNOME/geary/-/issues/1001
575        //
576        // TODO: The actual content encoding should be set based on
577        // the IMAP/SMTP server's supported encoding. For example, if
578        // 8-bit or binary is supported, then those should be used
579        // instead of Base64.
580        part.set_content_encoding(BASE64);
581
582        GMime.StreamMem stream = Utils.create_stream_mem(buffer);
583        part.set_content(
584            new GMime.DataWrapper.with_stream(
585                stream, GMime.ContentEncoding.BINARY
586            )
587        );
588
589        return part;
590    }
591
592    /**
593     * Generates a preview from the email's message body.
594     *
595     * If there is no body, the empty string will be returned.
596     */
597    public string get_preview() {
598        TextFormat format = TextFormat.PLAIN;
599        string? preview = null;
600        try {
601            preview = get_plain_body(false, null);
602        } catch (Error e) {
603            try {
604                format = TextFormat.HTML;
605                preview = get_html_body(null);
606            } catch (Error error) {
607                debug("Could not generate message preview: %s\n and: %s",
608                      e.message, error.message);
609            }
610        }
611
612        return (preview != null)
613            ? Geary.RFC822.Utils.to_preview_text(preview, format)
614            : "";
615    }
616
617    public Gee.List<RFC822.MailboxAddress>? get_recipients() {
618        Gee.List<RFC822.MailboxAddress> addrs = new Gee.ArrayList<RFC822.MailboxAddress>();
619
620        if (to != null)
621            addrs.add_all(to.get_all());
622
623        if (cc != null)
624            addrs.add_all(cc.get_all());
625
626        if (bcc != null)
627            addrs.add_all(bcc.get_all());
628
629        return (addrs.size > 0) ? addrs : null;
630    }
631
632    /**
633     * Returns the header of the message.
634     */
635    public Header get_header() {
636        return new Header.from_gmime(this.message);
637    }
638
639    /**
640     * Returns the body of the message.
641     */
642    public Text get_body() {
643        Text? body = null;
644        GMime.Object? gmime = this.message.get_mime_part();
645        if (gmime != null) {
646            var stream = new GMime.StreamMem();
647
648            // GMime doesn't support writing content-only via the
649            // public API, so suppress all headers in the message
650            // instead.
651            GMime.FormatOptions options = Geary.RFC822.get_format_options().clone();
652            GMime.HeaderList headers = message.get_header_list();
653            int count = headers.get_count();
654            for (int i = 0; i < count; i++) {
655                options.add_hidden_header(headers.get_header_at(i).get_name());
656            }
657            gmime.write_to_stream(options, stream);
658            body = new Text.from_gmime(stream);
659        } else {
660            body = new Text(Memory.EmptyBuffer.instance);
661        }
662        return body;
663    }
664
665    /**
666     * Serialises the message using native (i.e. LF) line endings.
667     */
668    public Memory.Buffer get_native_buffer() throws Error {
669        return message_to_memory_buffer(false, NONE);
670    }
671
672    /**
673     * Serialises the message using RFC 822 (i.e. CRLF) line endings.
674     *
675     * Returns the message as a memory buffer suitable for network
676     * transmission and interoperability with other RFC 822 consumers.
677     */
678    public Memory.Buffer get_rfc822_buffer(RFC822FormatOptions options = NONE)
679        throws Error {
680        return message_to_memory_buffer(true, options);
681    }
682
683    /**
684     * Determines if the message has one or display HTML parts.
685     */
686    public bool has_html_body() {
687        return has_body_parts(message.get_mime_part(), "html");
688    }
689
690    /**
691     * Determines if the message has one or plain text display parts.
692     */
693    public bool has_plain_body() {
694        return has_body_parts(message.get_mime_part(), "plain");
695    }
696
697    /**
698     * Determines if the message has any body text/subtype MIME parts.
699     *
700     * A body part is one that would be displayed to the user,
701     * i.e. parts returned by {@link get_html_body} or {@link
702     * get_plain_body}.
703     *
704     * The logic for selecting text nodes here must match that in
705     * construct_body_from_mime_parts.
706     */
707    private bool has_body_parts(GMime.Object node, string text_subtype) {
708        Part part = new Part(node);
709        bool is_matching_part = false;
710
711        if (node is GMime.Multipart) {
712            GMime.Multipart multipart = (GMime.Multipart) node;
713            int count = multipart.get_count();
714            for (int i = 0; i < count && !is_matching_part; i++) {
715                is_matching_part = has_body_parts(
716                    multipart.get_part(i), text_subtype
717                );
718            }
719        } else if (node is GMime.Part) {
720            Mime.DispositionType disposition = Mime.DispositionType.UNSPECIFIED;
721            if (part.content_disposition != null) {
722                disposition = part.content_disposition.disposition_type;
723            }
724
725            is_matching_part = (
726                disposition != Mime.DispositionType.ATTACHMENT &&
727                part.content_type.is_type("text", text_subtype)
728            );
729        }
730        return is_matching_part;
731    }
732
733    /**
734     * This method is the main utility method used by the other body-generating constructors.
735     *
736     * Only text/* MIME parts of the specified subtype are added to body.  If a non-text part is
737     * within a multipart/mixed container, the {@link InlinePartReplacer} is invoked.
738     *
739     * If to_html is true, the text is run through a filter to HTML-ize it.  (Obviously, this
740     * should be false if text/html is being searched for.).
741     *
742     * The final constructed body is stored in the body string.
743     *
744     * The initial call should pass the root of this message and UNSPECIFIED as its container
745     * subtype.
746     *
747     * @return Whether a text part with the desired text_subtype was found
748     */
749    private bool construct_body_from_mime_parts(GMime.Object node,
750                                                Mime.MultipartSubtype container_subtype,
751                                                string text_subtype,
752                                                bool to_html,
753                                                InlinePartReplacer? replacer,
754                                                ref string? body)
755        throws Error {
756        Part part = new Part(node);
757        Mime.ContentType content_type = part.content_type;
758
759        // If this is a multipart, call ourselves recursively on the children
760        GMime.Multipart? multipart = node as GMime.Multipart;
761        if (multipart != null) {
762            Mime.MultipartSubtype this_subtype =
763                Mime.MultipartSubtype.from_content_type(content_type, null);
764
765            bool found_text_subtype = false;
766
767            StringBuilder builder = new StringBuilder();
768            int count = multipart.get_count();
769            for (int i = 0; i < count; ++i) {
770                GMime.Object child = multipart.get_part(i);
771
772                string? child_body = null;
773                found_text_subtype |= construct_body_from_mime_parts(child, this_subtype, text_subtype,
774                    to_html, replacer, ref child_body);
775                if (child_body != null)
776                    builder.append(child_body);
777            }
778
779            if (!String.is_empty(builder.str))
780                body = builder.str;
781
782            return found_text_subtype;
783        }
784
785        Mime.DispositionType disposition = Mime.DispositionType.UNSPECIFIED;
786        if (part.content_disposition != null) {
787            disposition = part.content_disposition.disposition_type;
788        }
789
790        // Process inline leaf parts
791        if (node is GMime.Part &&
792            disposition != Mime.DispositionType.ATTACHMENT) {
793
794            // Assemble body from matching text parts, else use inline
795            // part replacer *only* for inline parts and if in a mixed
796            // multipart where each element is to be presented to the
797            // user as structure dictates; For alternative and
798            // related, the inline part is referred to elsewhere in
799            // the document and it's the callers responsibility to
800            // locate them
801
802            if (content_type.is_type("text", text_subtype)) {
803                body = part.write_to_buffer(
804                    Part.EncodingConversion.UTF8,
805                    to_html ? Part.BodyFormatting.HTML : Part.BodyFormatting.NONE
806                ).to_string();
807            } else if (replacer != null &&
808                       disposition == Mime.DispositionType.INLINE &&
809                       container_subtype == Mime.MultipartSubtype.MIXED) {
810                body = replacer(part);
811            }
812        }
813
814        return body != null;
815    }
816
817    /**
818     * A front-end to construct_body_from_mime_parts() that converts its output parameters into
819     * something that front-facing methods want to return.
820     */
821    private string? internal_get_body(string text_subtype, bool to_html, InlinePartReplacer? replacer)
822        throws Error {
823        string? body = null;
824        if (!construct_body_from_mime_parts(message.get_mime_part(), Mime.MultipartSubtype.UNSPECIFIED,
825            text_subtype, to_html, replacer, ref body)) {
826            throw new Error.NOT_FOUND("Could not find any \"text/%s\" parts", text_subtype);
827        }
828
829        return body;
830    }
831
832    /**
833     * Returns the HTML portion of the message body, if present.
834     *
835     * Recursively walks the MIME structure (depth-first) serializing
836     * all text/html MIME parts of the specified type into a single
837     * UTF-8 string.  Non-text MIME parts inside of multipart/mixed
838     * containers are offered to the {@link InlinePartReplacer}, which
839     * can either return null or return a string that is inserted in
840     * lieu of the MIME part into the final document.  All other MIME
841     * parts are ignored.
842     *
843     * @throws Error.NOT_FOUND if an HTML body is not present.
844     */
845    public string? get_html_body(InlinePartReplacer? replacer) throws Error {
846        return internal_get_body("html", false, replacer);
847    }
848
849    /**
850     * Returns the plaintext portion of the message body, if present.
851     *
852     * Recursively walks the MIME structure (depth-first) serializing
853     * all text/plain MIME parts of the specified type into a single
854     * UTF-8 string.  Non-text MIME parts inside of multipart/mixed
855     * containers are offered to the {@link InlinePartReplacer}, which
856     * can either return null or return a string that is inserted in
857     * lieu of the MIME part into the final document.  All other MIME
858     * parts are ignored.
859     *
860     * The convert_to_html flag indicates if the plaintext body should
861     * be converted into HTML.  Note that the InlinePartReplacer's
862     * output is not converted; it's up to the caller to know what
863     * format to return when invoked.
864     *
865     * @throws Error.NOT_FOUND if a plaintext body is not present.
866     */
867    public string? get_plain_body(bool convert_to_html, InlinePartReplacer? replacer)
868        throws Error {
869        return internal_get_body("plain", convert_to_html, replacer);
870    }
871
872    /**
873     * Return the body as a searchable string.  The body in this case should
874     * include everything visible in the message's body in the client, which
875     * would be only one body part, plus any visible attachments (which can be
876     * disabled by passing false in include_sub_messages).  Note that values
877     * that come out of this function are persisted.
878     */
879    public string? get_searchable_body(bool include_sub_messages = true)
880        throws Error {
881        string? body = null;
882        bool html = false;
883        try {
884            body = get_html_body(null);
885            html = true;
886        } catch (Error e) {
887            try {
888                body = get_plain_body(false, null);
889            } catch (Error e) {
890                // Ignore.
891            }
892        }
893
894        if (body != null && html)
895            body = Geary.HTML.html_to_text(body);
896
897        if (include_sub_messages) {
898            foreach (Message sub_message in get_sub_messages()) {
899                // We index a rough approximation of what a client would be
900                // displaying for each sub-message, including the subject,
901                // recipients, etc.  We can avoid attachments here because
902                // they're recursively picked up in the top-level message,
903                // indexed separately.
904                StringBuilder sub_full = new StringBuilder();
905                if (sub_message.subject != null) {
906                    sub_full.append(sub_message.subject.to_searchable_string());
907                    sub_full.append("\n");
908                }
909                if (sub_message.from != null) {
910                    sub_full.append(sub_message.from.to_searchable_string());
911                    sub_full.append("\n");
912                }
913                string? recipients = sub_message.get_searchable_recipients();
914                if (recipients != null) {
915                    sub_full.append(recipients);
916                    sub_full.append("\n");
917                }
918                // Our top-level get_sub_messages() recursively parses the
919                // whole MIME tree, so when we get the body for a sub-message,
920                // we don't need to invoke it again.
921                string? sub_body = sub_message.get_searchable_body(false);
922                if (sub_body != null)
923                    sub_full.append(sub_body);
924
925                if (sub_full.len > 0) {
926                    if (body == null)
927                        body = "";
928                    body += "\n" + sub_full.str;
929                }
930            }
931        }
932
933        return body;
934    }
935
936    /**
937     * Return the full list of recipients (to, cc, and bcc) as a searchable
938     * string.  Note that values that come out of this function are persisted.
939     */
940    public string? get_searchable_recipients() {
941        string searchable = null;
942        Gee.List<RFC822.MailboxAddress>? recipient_list = get_recipients();
943        if (recipient_list != null) {
944            MailboxAddresses recipients = new MailboxAddresses(recipient_list);
945            searchable = recipients.to_searchable_string();
946        }
947        return searchable;
948    }
949
950    // UNSPECIFIED disposition means "return all Mime parts"
951    internal Gee.List<Part> get_attachments(
952        Mime.DispositionType disposition = Mime.DispositionType.UNSPECIFIED)
953        throws Error {
954        Gee.List<Part> attachments = new Gee.LinkedList<Part>();
955        get_attachments_recursively(attachments, message.get_mime_part(), disposition);
956        return attachments;
957    }
958
959    private MailboxAddresses? to_addresses(GMime.InternetAddressList? list)
960        throws Error {
961        MailboxAddresses? addresses = null;
962        if (list != null && list.length() > 0) {
963            addresses = new MailboxAddresses.from_gmime(list);
964        }
965        return addresses;
966    }
967
968    private MessageIDList? append_message_id(MessageIDList? existing,
969                                            string header_value)
970        throws Error {
971        MessageIDList? ids = existing;
972        if (!String.is_empty_or_whitespace(header_value)) {
973            try {
974                ids = new MessageIDList.from_rfc822_string(header_value);
975                if (existing != null) {
976                    ids = existing.concatenate_list(ids);
977                }
978            } catch (Error err) {
979                // Can't simply throw this since we need to be as lax as
980                // possible when decoding messages. Hence just log it.
981                debug("Error parsing message id list: %s", err.message);
982            }
983        }
984        return ids;
985    }
986
987    private void get_attachments_recursively(Gee.List<Part> attachments,
988                                             GMime.Object root,
989                                             Mime.DispositionType requested_disposition)
990        throws Error {
991        if (root is GMime.Multipart) {
992            GMime.Multipart multipart = (GMime.Multipart) root;
993            int count = multipart.get_count();
994            for (int i = 0; i < count; ++i) {
995                get_attachments_recursively(attachments, multipart.get_part(i), requested_disposition);
996            }
997        } else if (root is GMime.MessagePart) {
998            GMime.MessagePart messagepart = (GMime.MessagePart) root;
999            GMime.Message message = messagepart.get_message();
1000            bool is_unknown;
1001            Mime.DispositionType disposition = Mime.DispositionType.deserialize(root.get_disposition(),
1002                out is_unknown);
1003            if (disposition == Mime.DispositionType.UNSPECIFIED || is_unknown) {
1004                // This is often the case, and we'll treat these as attached
1005                disposition = Mime.DispositionType.ATTACHMENT;
1006            }
1007
1008            if (requested_disposition == Mime.DispositionType.UNSPECIFIED || disposition == requested_disposition) {
1009                GMime.Stream stream = new GMime.StreamMem();
1010                message.write_to_stream(Geary.RFC822.get_format_options(), stream);
1011                GMime.DataWrapper data = new GMime.DataWrapper.with_stream(stream,
1012                    GMime.ContentEncoding.BINARY);  // Equivalent to no encoding
1013                GMime.Part part = new GMime.Part.with_type("message", "rfc822");
1014                part.set_content(data);
1015                part.set_filename((message.get_subject() ?? _("(no subject)")) + ".eml");
1016                attachments.add(new Part(part));
1017            }
1018
1019            get_attachments_recursively(attachments, message.get_mime_part(),
1020                requested_disposition);
1021        } else if (root is GMime.Part) {
1022            Part part = new Part(root);
1023
1024            Mime.DispositionType actual_disposition =
1025                Mime.DispositionType.UNSPECIFIED;
1026            if (part.content_disposition != null) {
1027                actual_disposition = part.content_disposition.disposition_type;
1028            }
1029
1030            if (requested_disposition == Mime.DispositionType.UNSPECIFIED ||
1031                actual_disposition == requested_disposition) {
1032                Mime.ContentType content_type = part.content_type;
1033
1034#if WITH_TNEF_SUPPORT
1035                if (content_type.is_type("application", "vnd.ms-tnef")) {
1036                    GMime.StreamMem stream = new GMime.StreamMem();
1037                    ((GMime.Part) root).get_content().write_to_stream(stream);
1038                    ByteArray tnef_data = stream.get_byte_array();
1039                    Ytnef.TNEFStruct tn = Ytnef.TNEFStruct();
1040                    if (Ytnef.ParseMemory(tnef_data.data, ref tn) == 0) {
1041                        for (unowned Ytnef.Attachment? a = tn.starting_attach.next; a != null; a = a.next) {
1042                            attachments.add(new Part(tnef_attachment_to_gmime_part(a)));
1043                        }
1044                    }
1045                } else
1046#endif // WITH_TNEF_SUPPORT
1047                if (actual_disposition == Mime.DispositionType.ATTACHMENT ||
1048                    (!content_type.is_type("text", "plain") &&
1049                     !content_type.is_type("text", "html"))) {
1050                    // Skip text/plain and text/html parts that are INLINE
1051                    // or UNSPECIFIED, as they will be included in the body
1052                    attachments.add(part);
1053                }
1054            }
1055        }
1056    }
1057
1058#if WITH_TNEF_SUPPORT
1059    private GMime.Part tnef_attachment_to_gmime_part(Ytnef.Attachment a) {
1060        Ytnef.VariableLength* filenameProp = Ytnef.MAPIFindProperty(a.MAPI, Ytnef.PROP_TAG(Ytnef.PropType.STRING8, Ytnef.PropID.ATTACH_LONG_FILENAME));
1061        if (filenameProp == Ytnef.MAPI_UNDEFINED) {
1062            filenameProp = Ytnef.MAPIFindProperty(a.MAPI, Ytnef.PROP_TAG(Ytnef.PropType.STRING8, Ytnef.PropID.DISPLAY_NAME));
1063            if (filenameProp == Ytnef.MAPI_UNDEFINED) {
1064                filenameProp = &a.Title;
1065            }
1066        }
1067        string filename = (string) filenameProp.data;
1068        uint8[] data = Bytes.unref_to_data(new Bytes(a.FileData.data));
1069
1070        GMime.Part part = new GMime.Part.with_type("text", "plain");
1071        part.set_filename(filename);
1072        part.set_content_type(GMime.ContentType.parse(Geary.RFC822.get_parser_options(), GLib.ContentType.guess(filename, data, null)));
1073        part.set_content(new GMime.DataWrapper.with_stream(new GMime.StreamMem.with_buffer(data), GMime.ContentEncoding.BINARY));
1074        return part;
1075    }
1076#endif
1077
1078    public Gee.List<Geary.RFC822.Message> get_sub_messages()
1079        throws Error {
1080        Gee.List<Geary.RFC822.Message> messages = new Gee.ArrayList<Geary.RFC822.Message>();
1081        find_sub_messages(messages, message.get_mime_part());
1082        return messages;
1083    }
1084
1085    private void find_sub_messages(Gee.List<Message> messages,
1086                                   GMime.Object root)
1087        throws Error {
1088        // If this is a multipart container, check each of its children.
1089        GMime.Multipart? multipart = root as GMime.Multipart;
1090        if (multipart != null) {
1091            int count = multipart.get_count();
1092            for (int i = 0; i < count; ++i) {
1093                find_sub_messages(messages, multipart.get_part(i));
1094            }
1095            return;
1096        }
1097
1098        GMime.MessagePart? messagepart = root as GMime.MessagePart;
1099        if (messagepart != null) {
1100            GMime.Message sub_message = messagepart.get_message();
1101            if (sub_message != null) {
1102                messages.add(new Message.from_gmime_message(sub_message));
1103            } else {
1104                warning("Corrupt message, possibly bug 769697");
1105            }
1106        }
1107    }
1108
1109    private Memory.Buffer message_to_memory_buffer(bool encode_lf,
1110                                                   RFC822FormatOptions options)
1111        throws Error {
1112        ByteArray byte_array = new ByteArray();
1113        GMime.StreamMem stream = new GMime.StreamMem.with_byte_array(byte_array);
1114        stream.set_owner(false);
1115
1116        GMime.StreamFilter stream_filter = new GMime.StreamFilter(stream);
1117        if (encode_lf) {
1118            stream_filter.add(new GMime.FilterUnix2Dos(false));
1119        } else {
1120            stream_filter.add(new GMime.FilterDos2Unix(false));
1121        }
1122        if (RFC822FormatOptions.SMTP_FORMAT in options) {
1123            stream_filter.add(new GMime.FilterSmtpData());
1124        }
1125
1126        var format = Geary.RFC822.get_format_options();
1127        if (RFC822FormatOptions.SMTP_FORMAT in options) {
1128            format = format.clone();
1129            format.add_hidden_header("Bcc");
1130        }
1131
1132        if (message.write_to_stream(format, stream_filter) < 0) {
1133            throw new Error.FAILED(
1134                "Unable to write RFC822 message to filter stream"
1135            );
1136        }
1137
1138        if (stream_filter.flush() != 0) {
1139            throw new Error.FAILED(
1140                "Unable to flush RFC822 message to memory stream"
1141            );
1142        }
1143
1144        if (stream.flush() != 0) {
1145            throw new Error.FAILED(
1146                "Unable to flush RFC822 message to memory buffer"
1147            );
1148        }
1149
1150        return new Memory.ByteBuffer.from_byte_array(byte_array);
1151    }
1152
1153    public string to_string() {
1154        return message.to_string(Geary.RFC822.get_format_options());
1155    }
1156
1157    /**
1158     * Returns a MIME part for some body content.
1159     *
1160     * Determining the appropriate body charset and encoding is
1161     * unfortunately a multi-step process that involves reading it
1162     * completely, several times:
1163     *
1164     * 1. Guess the best charset by scanning the complete body.
1165     * 2. Convert the body into the preferred charset, essential
1166     *    to avoid e.g. guessing Base64 encoding for ISO-8859-1
1167     *    because of the 0x0's present in UTF bytes with high-bit
1168     *    chars.
1169     * 3. Determine, given the correctly encoded charset
1170     *    what the appropriate encoding is by scanning the
1171     *    complete, encoded body.
1172     *
1173     * This applies to both text/plain and text/html parts, but we
1174     * don't need to do it repeatedly for each, since HTML is 7-bit
1175     * clean ASCII. So if we have guessed both already for a plain
1176     * text body, it will still apply for any HTML part.
1177     */
1178    private async GMime.Part body_data_to_part(uint8[] content,
1179                                               string? charset,
1180                                               string content_type,
1181                                               bool is_flowed,
1182                                               GLib.Cancellable? cancellable)
1183        throws GLib.Error {
1184        GMime.Stream content_stream = new GMime.StreamMem.with_buffer(content);
1185        if (charset == null) {
1186            charset = yield Utils.get_best_charset(content_stream, cancellable);
1187        }
1188        GMime.StreamFilter filter_stream = new GMime.StreamFilter(content_stream);
1189        filter_stream.add(new GMime.FilterCharset(UTF8_CHARSET, charset));
1190
1191        GMime.ContentEncoding encoding = yield Utils.get_best_encoding(
1192            filter_stream,
1193            GMime.EncodingConstraint.7BIT,
1194            cancellable
1195        );
1196
1197        if (is_flowed && encoding == GMime.ContentEncoding.BASE64) {
1198            // Base64-encoded text needs to have CR's added after LF's
1199            // before encoding, otherwise it breaks format=flowed. See
1200            // Bug 753528.
1201            filter_stream.add(new GMime.FilterUnix2Dos(false));
1202        }
1203
1204        GMime.ContentType complete_type = GMime.ContentType.parse(
1205                                              Geary.RFC822.get_parser_options(),
1206                                              content_type
1207                                          );
1208        complete_type.set_parameter("charset", charset);
1209        if (is_flowed) {
1210            complete_type.set_parameter("format", "flowed");
1211        }
1212
1213        GMime.DataWrapper body = new GMime.DataWrapper.with_stream(
1214            filter_stream, GMime.ContentEncoding.DEFAULT
1215        );
1216
1217        GMime.Part body_part = new GMime.Part.with_type("text", "plain");
1218        body_part.set_content_type(complete_type);
1219        body_part.set_content(body);
1220        body_part.set_content_encoding(encoding);
1221        return body_part;
1222    }
1223
1224}
1225